[INET]: Move tcp_port_rover to inet_hashinfo
[pandora-kernel.git] / net / ipv6 / tcp_ipv6.c
1 /*
2  *      TCP over IPv6
3  *      Linux INET6 implementation 
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on: 
11  *      linux/net/ipv4/tcp.c
12  *      linux/net/ipv4/tcp_input.c
13  *      linux/net/ipv4/tcp_output.c
14  *
15  *      Fixes:
16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
19  *                                      a single port at the same time.
20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
21  *
22  *      This program is free software; you can redistribute it and/or
23  *      modify it under the terms of the GNU General Public License
24  *      as published by the Free Software Foundation; either version
25  *      2 of the License, or (at your option) any later version.
26  */
27
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
36 #include <linux/in.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
43
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
47
48 #include <net/tcp.h>
49 #include <net/ndisc.h>
50 #include <net/ipv6.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
57 #include <net/xfrm.h>
58 #include <net/addrconf.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61
62 #include <asm/uaccess.h>
63
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
66
67 static void     tcp_v6_send_reset(struct sk_buff *skb);
68 static void     tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
70                                   struct sk_buff *skb);
71
72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
74
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
77
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80                                     struct in6_addr *faddr, u16 fport)
81 {
82         int hashent = (lport ^ fport);
83
84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85         hashent ^= hashent>>16;
86         hashent ^= hashent>>8;
87         return (hashent & (tcp_hashinfo.ehash_size - 1));
88 }
89
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
91 {
92         struct inet_sock *inet = inet_sk(sk);
93         struct ipv6_pinfo *np = inet6_sk(sk);
94         struct in6_addr *laddr = &np->rcv_saddr;
95         struct in6_addr *faddr = &np->daddr;
96         __u16 lport = inet->num;
97         __u16 fport = inet->dport;
98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
99 }
100
101 static inline int tcp_v6_bind_conflict(const struct sock *sk,
102                                        const struct inet_bind_bucket *tb)
103 {
104         const struct sock *sk2;
105         const struct hlist_node *node;
106
107         /* We must walk the whole port owner list in this case. -DaveM */
108         sk_for_each_bound(sk2, node, &tb->owners) {
109                 if (sk != sk2 &&
110                     (!sk->sk_bound_dev_if ||
111                      !sk2->sk_bound_dev_if ||
112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113                     (!sk->sk_reuse || !sk2->sk_reuse ||
114                      sk2->sk_state == TCP_LISTEN) &&
115                      ipv6_rcv_saddr_equal(sk, sk2))
116                         break;
117         }
118
119         return node != NULL;
120 }
121
122 /* Grrr, addr_type already calculated by caller, but I don't want
123  * to add some silly "cookie" argument to this method just for that.
124  * But it doesn't matter, the recalculation is in the rarest path
125  * this function ever takes.
126  */
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
128 {
129         struct inet_bind_hashbucket *head;
130         struct inet_bind_bucket *tb;
131         struct hlist_node *node;
132         int ret;
133
134         local_bh_disable();
135         if (snum == 0) {
136                 int low = sysctl_local_port_range[0];
137                 int high = sysctl_local_port_range[1];
138                 int remaining = (high - low) + 1;
139                 int rover;
140
141                 spin_lock(&tcp_hashinfo.portalloc_lock);
142                 if (tcp_hashinfo.port_rover < low)
143                         rover = low;
144                 else
145                         rover = tcp_hashinfo.port_rover;
146                 do {    rover++;
147                         if (rover > high)
148                                 rover = low;
149                         head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
150                         spin_lock(&head->lock);
151                         inet_bind_bucket_for_each(tb, node, &head->chain)
152                                 if (tb->port == rover)
153                                         goto next;
154                         break;
155                 next:
156                         spin_unlock(&head->lock);
157                 } while (--remaining > 0);
158                 tcp_hashinfo.port_rover = rover;
159                 spin_unlock(&tcp_hashinfo.portalloc_lock);
160
161                 /* Exhausted local port range during search?  It is not
162                  * possible for us to be holding one of the bind hash
163                  * locks if this test triggers, because if 'remaining'
164                  * drops to zero, we broke out of the do/while loop at
165                  * the top level, not from the 'break;' statement.
166                  */
167                 ret = 1;
168                 if (unlikely(remaining <= 0))
169                         goto fail;
170
171                 /* OK, here is the one we will use. */
172                 snum = rover;
173         } else {
174                 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
175                 spin_lock(&head->lock);
176                 inet_bind_bucket_for_each(tb, node, &head->chain)
177                         if (tb->port == snum)
178                                 goto tb_found;
179         }
180         tb = NULL;
181         goto tb_not_found;
182 tb_found:
183         if (tb && !hlist_empty(&tb->owners)) {
184                 if (tb->fastreuse > 0 && sk->sk_reuse &&
185                     sk->sk_state != TCP_LISTEN) {
186                         goto success;
187                 } else {
188                         ret = 1;
189                         if (tcp_v6_bind_conflict(sk, tb))
190                                 goto fail_unlock;
191                 }
192         }
193 tb_not_found:
194         ret = 1;
195         if (tb == NULL) {
196                 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
197                 if (tb == NULL)
198                         goto fail_unlock;
199         }
200         if (hlist_empty(&tb->owners)) {
201                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
202                         tb->fastreuse = 1;
203                 else
204                         tb->fastreuse = 0;
205         } else if (tb->fastreuse &&
206                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
207                 tb->fastreuse = 0;
208
209 success:
210         if (!inet_sk(sk)->bind_hash)
211                 inet_bind_hash(sk, tb, snum);
212         BUG_TRAP(inet_sk(sk)->bind_hash == tb);
213         ret = 0;
214
215 fail_unlock:
216         spin_unlock(&head->lock);
217 fail:
218         local_bh_enable();
219         return ret;
220 }
221
222 static __inline__ void __tcp_v6_hash(struct sock *sk)
223 {
224         struct hlist_head *list;
225         rwlock_t *lock;
226
227         BUG_TRAP(sk_unhashed(sk));
228
229         if (sk->sk_state == TCP_LISTEN) {
230                 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
231                 lock = &tcp_hashinfo.lhash_lock;
232                 tcp_listen_wlock();
233         } else {
234                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
235                 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
236                 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
237                 write_lock(lock);
238         }
239
240         __sk_add_node(sk, list);
241         sock_prot_inc_use(sk->sk_prot);
242         write_unlock(lock);
243 }
244
245
246 static void tcp_v6_hash(struct sock *sk)
247 {
248         if (sk->sk_state != TCP_CLOSE) {
249                 struct tcp_sock *tp = tcp_sk(sk);
250
251                 if (tp->af_specific == &ipv6_mapped) {
252                         tcp_prot.hash(sk);
253                         return;
254                 }
255                 local_bh_disable();
256                 __tcp_v6_hash(sk);
257                 local_bh_enable();
258         }
259 }
260
261 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
262 {
263         struct sock *sk;
264         struct hlist_node *node;
265         struct sock *result = NULL;
266         int score, hiscore;
267
268         hiscore=0;
269         read_lock(&tcp_hashinfo.lhash_lock);
270         sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) {
271                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
272                         struct ipv6_pinfo *np = inet6_sk(sk);
273                         
274                         score = 1;
275                         if (!ipv6_addr_any(&np->rcv_saddr)) {
276                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
277                                         continue;
278                                 score++;
279                         }
280                         if (sk->sk_bound_dev_if) {
281                                 if (sk->sk_bound_dev_if != dif)
282                                         continue;
283                                 score++;
284                         }
285                         if (score == 3) {
286                                 result = sk;
287                                 break;
288                         }
289                         if (score > hiscore) {
290                                 hiscore = score;
291                                 result = sk;
292                         }
293                 }
294         }
295         if (result)
296                 sock_hold(result);
297         read_unlock(&tcp_hashinfo.lhash_lock);
298         return result;
299 }
300
301 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
302  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
303  *
304  * The sockhash lock must be held as a reader here.
305  */
306
307 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
308                                                        struct in6_addr *daddr, u16 hnum,
309                                                        int dif)
310 {
311         struct inet_ehash_bucket *head;
312         struct sock *sk;
313         struct hlist_node *node;
314         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
315         int hash;
316
317         /* Optimize here for direct hit, only listening connections can
318          * have wildcards anyways.
319          */
320         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
321         head = &tcp_hashinfo.ehash[hash];
322         read_lock(&head->lock);
323         sk_for_each(sk, node, &head->chain) {
324                 /* For IPV6 do the cheaper port and family tests first. */
325                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
326                         goto hit; /* You sunk my battleship! */
327         }
328         /* Must check for a TIME_WAIT'er before going to listener hash. */
329         sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
330                 /* FIXME: acme: check this... */
331                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
332
333                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
334                    sk->sk_family                == PF_INET6) {
335                         if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
336                            ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
337                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
338                                 goto hit;
339                 }
340         }
341         read_unlock(&head->lock);
342         return NULL;
343
344 hit:
345         sock_hold(sk);
346         read_unlock(&head->lock);
347         return sk;
348 }
349
350
351 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
352                                            struct in6_addr *daddr, u16 hnum,
353                                            int dif)
354 {
355         struct sock *sk;
356
357         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
358
359         if (sk)
360                 return sk;
361
362         return tcp_v6_lookup_listener(daddr, hnum, dif);
363 }
364
365 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
366                                   struct in6_addr *daddr, u16 dport,
367                                   int dif)
368 {
369         struct sock *sk;
370
371         local_bh_disable();
372         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
373         local_bh_enable();
374
375         return sk;
376 }
377
378 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
379
380
381 /*
382  * Open request hash tables.
383  */
384
385 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
386 {
387         u32 a, b, c;
388
389         a = raddr->s6_addr32[0];
390         b = raddr->s6_addr32[1];
391         c = raddr->s6_addr32[2];
392
393         a += JHASH_GOLDEN_RATIO;
394         b += JHASH_GOLDEN_RATIO;
395         c += rnd;
396         __jhash_mix(a, b, c);
397
398         a += raddr->s6_addr32[3];
399         b += (u32) rport;
400         __jhash_mix(a, b, c);
401
402         return c & (TCP_SYNQ_HSIZE - 1);
403 }
404
405 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
406                                               struct request_sock ***prevp,
407                                               __u16 rport,
408                                               struct in6_addr *raddr,
409                                               struct in6_addr *laddr,
410                                               int iif)
411 {
412         struct listen_sock *lopt = tp->accept_queue.listen_opt;
413         struct request_sock *req, **prev;  
414
415         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
416              (req = *prev) != NULL;
417              prev = &req->dl_next) {
418                 const struct tcp6_request_sock *treq = tcp6_rsk(req);
419
420                 if (inet_rsk(req)->rmt_port == rport &&
421                     req->rsk_ops->family == AF_INET6 &&
422                     ipv6_addr_equal(&treq->rmt_addr, raddr) &&
423                     ipv6_addr_equal(&treq->loc_addr, laddr) &&
424                     (!treq->iif || treq->iif == iif)) {
425                         BUG_TRAP(req->sk == NULL);
426                         *prevp = prev;
427                         return req;
428                 }
429         }
430
431         return NULL;
432 }
433
434 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
435                                    struct in6_addr *saddr, 
436                                    struct in6_addr *daddr, 
437                                    unsigned long base)
438 {
439         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
440 }
441
442 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
443 {
444         if (skb->protocol == htons(ETH_P_IPV6)) {
445                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
446                                                     skb->nh.ipv6h->saddr.s6_addr32,
447                                                     skb->h.th->dest,
448                                                     skb->h.th->source);
449         } else {
450                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
451                                                   skb->nh.iph->saddr,
452                                                   skb->h.th->dest,
453                                                   skb->h.th->source);
454         }
455 }
456
457 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
458                                       struct tcp_tw_bucket **twp)
459 {
460         struct inet_sock *inet = inet_sk(sk);
461         struct ipv6_pinfo *np = inet6_sk(sk);
462         struct in6_addr *daddr = &np->rcv_saddr;
463         struct in6_addr *saddr = &np->daddr;
464         int dif = sk->sk_bound_dev_if;
465         u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
466         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
467         struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
468         struct sock *sk2;
469         struct hlist_node *node;
470         struct tcp_tw_bucket *tw;
471
472         write_lock(&head->lock);
473
474         /* Check TIME-WAIT sockets first. */
475         sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
476                 tw = (struct tcp_tw_bucket*)sk2;
477
478                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
479                    sk2->sk_family               == PF_INET6     &&
480                    ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
481                    ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
482                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
483                         struct tcp_sock *tp = tcp_sk(sk);
484
485                         if (tw->tw_ts_recent_stamp &&
486                             (!twp || (sysctl_tcp_tw_reuse &&
487                                       xtime.tv_sec - 
488                                       tw->tw_ts_recent_stamp > 1))) {
489                                 /* See comment in tcp_ipv4.c */
490                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
491                                 if (!tp->write_seq)
492                                         tp->write_seq = 1;
493                                 tp->rx_opt.ts_recent = tw->tw_ts_recent;
494                                 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
495                                 sock_hold(sk2);
496                                 goto unique;
497                         } else
498                                 goto not_unique;
499                 }
500         }
501         tw = NULL;
502
503         /* And established part... */
504         sk_for_each(sk2, node, &head->chain) {
505                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
506                         goto not_unique;
507         }
508
509 unique:
510         BUG_TRAP(sk_unhashed(sk));
511         __sk_add_node(sk, &head->chain);
512         sk->sk_hashent = hash;
513         sock_prot_inc_use(sk->sk_prot);
514         write_unlock(&head->lock);
515
516         if (twp) {
517                 *twp = tw;
518                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
519         } else if (tw) {
520                 /* Silly. Should hash-dance instead... */
521                 tcp_tw_deschedule(tw);
522                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
523
524                 tcp_tw_put(tw);
525         }
526         return 0;
527
528 not_unique:
529         write_unlock(&head->lock);
530         return -EADDRNOTAVAIL;
531 }
532
533 static inline u32 tcpv6_port_offset(const struct sock *sk)
534 {
535         const struct inet_sock *inet = inet_sk(sk);
536         const struct ipv6_pinfo *np = inet6_sk(sk);
537
538         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
539                                            np->daddr.s6_addr32,
540                                            inet->dport);
541 }
542
543 static int tcp_v6_hash_connect(struct sock *sk)
544 {
545         unsigned short snum = inet_sk(sk)->num;
546         struct inet_bind_hashbucket *head;
547         struct inet_bind_bucket *tb;
548         int ret;
549
550         if (!snum) {
551                 int low = sysctl_local_port_range[0];
552                 int high = sysctl_local_port_range[1];
553                 int range = high - low;
554                 int i;
555                 int port;
556                 static u32 hint;
557                 u32 offset = hint + tcpv6_port_offset(sk);
558                 struct hlist_node *node;
559                 struct tcp_tw_bucket *tw = NULL;
560
561                 local_bh_disable();
562                 for (i = 1; i <= range; i++) {
563                         port = low + (i + offset) % range;
564                         head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
565                         spin_lock(&head->lock);
566
567                         /* Does not bother with rcv_saddr checks,
568                          * because the established check is already
569                          * unique enough.
570                          */
571                         inet_bind_bucket_for_each(tb, node, &head->chain) {
572                                 if (tb->port == port) {
573                                         BUG_TRAP(!hlist_empty(&tb->owners));
574                                         if (tb->fastreuse >= 0)
575                                                 goto next_port;
576                                         if (!__tcp_v6_check_established(sk,
577                                                                         port,
578                                                                         &tw))
579                                                 goto ok;
580                                         goto next_port;
581                                 }
582                         }
583
584                         tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
585                         if (!tb) {
586                                 spin_unlock(&head->lock);
587                                 break;
588                         }
589                         tb->fastreuse = -1;
590                         goto ok;
591
592                 next_port:
593                         spin_unlock(&head->lock);
594                 }
595                 local_bh_enable();
596
597                 return -EADDRNOTAVAIL;
598
599 ok:
600                 hint += i;
601
602                 /* Head lock still held and bh's disabled */
603                 inet_bind_hash(sk, tb, port);
604                 if (sk_unhashed(sk)) {
605                         inet_sk(sk)->sport = htons(port);
606                         __tcp_v6_hash(sk);
607                 }
608                 spin_unlock(&head->lock);
609
610                 if (tw) {
611                         tcp_tw_deschedule(tw);
612                         tcp_tw_put(tw);
613                 }
614
615                 ret = 0;
616                 goto out;
617         }
618
619         head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
620         tb   = inet_sk(sk)->bind_hash;
621         spin_lock_bh(&head->lock);
622
623         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
624                 __tcp_v6_hash(sk);
625                 spin_unlock_bh(&head->lock);
626                 return 0;
627         } else {
628                 spin_unlock(&head->lock);
629                 /* No definite answer... Walk to established hash table */
630                 ret = __tcp_v6_check_established(sk, snum, NULL);
631 out:
632                 local_bh_enable();
633                 return ret;
634         }
635 }
636
637 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
638 {
639         return IP6CB(skb)->iif;
640 }
641
642 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
643                           int addr_len)
644 {
645         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
646         struct inet_sock *inet = inet_sk(sk);
647         struct ipv6_pinfo *np = inet6_sk(sk);
648         struct tcp_sock *tp = tcp_sk(sk);
649         struct in6_addr *saddr = NULL, *final_p = NULL, final;
650         struct flowi fl;
651         struct dst_entry *dst;
652         int addr_type;
653         int err;
654
655         if (addr_len < SIN6_LEN_RFC2133) 
656                 return -EINVAL;
657
658         if (usin->sin6_family != AF_INET6) 
659                 return(-EAFNOSUPPORT);
660
661         memset(&fl, 0, sizeof(fl));
662
663         if (np->sndflow) {
664                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
665                 IP6_ECN_flow_init(fl.fl6_flowlabel);
666                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
667                         struct ip6_flowlabel *flowlabel;
668                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
669                         if (flowlabel == NULL)
670                                 return -EINVAL;
671                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
672                         fl6_sock_release(flowlabel);
673                 }
674         }
675
676         /*
677          *      connect() to INADDR_ANY means loopback (BSD'ism).
678          */
679         
680         if(ipv6_addr_any(&usin->sin6_addr))
681                 usin->sin6_addr.s6_addr[15] = 0x1; 
682
683         addr_type = ipv6_addr_type(&usin->sin6_addr);
684
685         if(addr_type & IPV6_ADDR_MULTICAST)
686                 return -ENETUNREACH;
687
688         if (addr_type&IPV6_ADDR_LINKLOCAL) {
689                 if (addr_len >= sizeof(struct sockaddr_in6) &&
690                     usin->sin6_scope_id) {
691                         /* If interface is set while binding, indices
692                          * must coincide.
693                          */
694                         if (sk->sk_bound_dev_if &&
695                             sk->sk_bound_dev_if != usin->sin6_scope_id)
696                                 return -EINVAL;
697
698                         sk->sk_bound_dev_if = usin->sin6_scope_id;
699                 }
700
701                 /* Connect to link-local address requires an interface */
702                 if (!sk->sk_bound_dev_if)
703                         return -EINVAL;
704         }
705
706         if (tp->rx_opt.ts_recent_stamp &&
707             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
708                 tp->rx_opt.ts_recent = 0;
709                 tp->rx_opt.ts_recent_stamp = 0;
710                 tp->write_seq = 0;
711         }
712
713         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
714         np->flow_label = fl.fl6_flowlabel;
715
716         /*
717          *      TCP over IPv4
718          */
719
720         if (addr_type == IPV6_ADDR_MAPPED) {
721                 u32 exthdrlen = tp->ext_header_len;
722                 struct sockaddr_in sin;
723
724                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
725
726                 if (__ipv6_only_sock(sk))
727                         return -ENETUNREACH;
728
729                 sin.sin_family = AF_INET;
730                 sin.sin_port = usin->sin6_port;
731                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
732
733                 tp->af_specific = &ipv6_mapped;
734                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
735
736                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
737
738                 if (err) {
739                         tp->ext_header_len = exthdrlen;
740                         tp->af_specific = &ipv6_specific;
741                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
742                         goto failure;
743                 } else {
744                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
745                                       inet->saddr);
746                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
747                                       inet->rcv_saddr);
748                 }
749
750                 return err;
751         }
752
753         if (!ipv6_addr_any(&np->rcv_saddr))
754                 saddr = &np->rcv_saddr;
755
756         fl.proto = IPPROTO_TCP;
757         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
758         ipv6_addr_copy(&fl.fl6_src,
759                        (saddr ? saddr : &np->saddr));
760         fl.oif = sk->sk_bound_dev_if;
761         fl.fl_ip_dport = usin->sin6_port;
762         fl.fl_ip_sport = inet->sport;
763
764         if (np->opt && np->opt->srcrt) {
765                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
766                 ipv6_addr_copy(&final, &fl.fl6_dst);
767                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
768                 final_p = &final;
769         }
770
771         err = ip6_dst_lookup(sk, &dst, &fl);
772         if (err)
773                 goto failure;
774         if (final_p)
775                 ipv6_addr_copy(&fl.fl6_dst, final_p);
776
777         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
778                 dst_release(dst);
779                 goto failure;
780         }
781
782         if (saddr == NULL) {
783                 saddr = &fl.fl6_src;
784                 ipv6_addr_copy(&np->rcv_saddr, saddr);
785         }
786
787         /* set the source address */
788         ipv6_addr_copy(&np->saddr, saddr);
789         inet->rcv_saddr = LOOPBACK4_IPV6;
790
791         ip6_dst_store(sk, dst, NULL);
792         sk->sk_route_caps = dst->dev->features &
793                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
794
795         tp->ext_header_len = 0;
796         if (np->opt)
797                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
798
799         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
800
801         inet->dport = usin->sin6_port;
802
803         tcp_set_state(sk, TCP_SYN_SENT);
804         err = tcp_v6_hash_connect(sk);
805         if (err)
806                 goto late_failure;
807
808         if (!tp->write_seq)
809                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
810                                                              np->daddr.s6_addr32,
811                                                              inet->sport,
812                                                              inet->dport);
813
814         err = tcp_connect(sk);
815         if (err)
816                 goto late_failure;
817
818         return 0;
819
820 late_failure:
821         tcp_set_state(sk, TCP_CLOSE);
822         __sk_dst_reset(sk);
823 failure:
824         inet->dport = 0;
825         sk->sk_route_caps = 0;
826         return err;
827 }
828
829 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
830                 int type, int code, int offset, __u32 info)
831 {
832         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
833         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
834         struct ipv6_pinfo *np;
835         struct sock *sk;
836         int err;
837         struct tcp_sock *tp; 
838         __u32 seq;
839
840         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
841
842         if (sk == NULL) {
843                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
844                 return;
845         }
846
847         if (sk->sk_state == TCP_TIME_WAIT) {
848                 tcp_tw_put((struct tcp_tw_bucket*)sk);
849                 return;
850         }
851
852         bh_lock_sock(sk);
853         if (sock_owned_by_user(sk))
854                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
855
856         if (sk->sk_state == TCP_CLOSE)
857                 goto out;
858
859         tp = tcp_sk(sk);
860         seq = ntohl(th->seq); 
861         if (sk->sk_state != TCP_LISTEN &&
862             !between(seq, tp->snd_una, tp->snd_nxt)) {
863                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
864                 goto out;
865         }
866
867         np = inet6_sk(sk);
868
869         if (type == ICMPV6_PKT_TOOBIG) {
870                 struct dst_entry *dst = NULL;
871
872                 if (sock_owned_by_user(sk))
873                         goto out;
874                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
875                         goto out;
876
877                 /* icmp should have updated the destination cache entry */
878                 dst = __sk_dst_check(sk, np->dst_cookie);
879
880                 if (dst == NULL) {
881                         struct inet_sock *inet = inet_sk(sk);
882                         struct flowi fl;
883
884                         /* BUGGG_FUTURE: Again, it is not clear how
885                            to handle rthdr case. Ignore this complexity
886                            for now.
887                          */
888                         memset(&fl, 0, sizeof(fl));
889                         fl.proto = IPPROTO_TCP;
890                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
891                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
892                         fl.oif = sk->sk_bound_dev_if;
893                         fl.fl_ip_dport = inet->dport;
894                         fl.fl_ip_sport = inet->sport;
895
896                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
897                                 sk->sk_err_soft = -err;
898                                 goto out;
899                         }
900
901                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
902                                 sk->sk_err_soft = -err;
903                                 goto out;
904                         }
905
906                 } else
907                         dst_hold(dst);
908
909                 if (tp->pmtu_cookie > dst_mtu(dst)) {
910                         tcp_sync_mss(sk, dst_mtu(dst));
911                         tcp_simple_retransmit(sk);
912                 } /* else let the usual retransmit timer handle it */
913                 dst_release(dst);
914                 goto out;
915         }
916
917         icmpv6_err_convert(type, code, &err);
918
919         /* Might be for an request_sock */
920         switch (sk->sk_state) {
921                 struct request_sock *req, **prev;
922         case TCP_LISTEN:
923                 if (sock_owned_by_user(sk))
924                         goto out;
925
926                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
927                                         &hdr->saddr, tcp_v6_iif(skb));
928                 if (!req)
929                         goto out;
930
931                 /* ICMPs are not backlogged, hence we cannot get
932                  * an established socket here.
933                  */
934                 BUG_TRAP(req->sk == NULL);
935
936                 if (seq != tcp_rsk(req)->snt_isn) {
937                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
938                         goto out;
939                 }
940
941                 tcp_synq_drop(sk, req, prev);
942                 goto out;
943
944         case TCP_SYN_SENT:
945         case TCP_SYN_RECV:  /* Cannot happen.
946                                It can, it SYNs are crossed. --ANK */ 
947                 if (!sock_owned_by_user(sk)) {
948                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
949                         sk->sk_err = err;
950                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
951
952                         tcp_done(sk);
953                 } else
954                         sk->sk_err_soft = err;
955                 goto out;
956         }
957
958         if (!sock_owned_by_user(sk) && np->recverr) {
959                 sk->sk_err = err;
960                 sk->sk_error_report(sk);
961         } else
962                 sk->sk_err_soft = err;
963
964 out:
965         bh_unlock_sock(sk);
966         sock_put(sk);
967 }
968
969
970 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
971                               struct dst_entry *dst)
972 {
973         struct tcp6_request_sock *treq = tcp6_rsk(req);
974         struct ipv6_pinfo *np = inet6_sk(sk);
975         struct sk_buff * skb;
976         struct ipv6_txoptions *opt = NULL;
977         struct in6_addr * final_p = NULL, final;
978         struct flowi fl;
979         int err = -1;
980
981         memset(&fl, 0, sizeof(fl));
982         fl.proto = IPPROTO_TCP;
983         ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
984         ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
985         fl.fl6_flowlabel = 0;
986         fl.oif = treq->iif;
987         fl.fl_ip_dport = inet_rsk(req)->rmt_port;
988         fl.fl_ip_sport = inet_sk(sk)->sport;
989
990         if (dst == NULL) {
991                 opt = np->opt;
992                 if (opt == NULL &&
993                     np->rxopt.bits.srcrt == 2 &&
994                     treq->pktopts) {
995                         struct sk_buff *pktopts = treq->pktopts;
996                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
997                         if (rxopt->srcrt)
998                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
999                 }
1000
1001                 if (opt && opt->srcrt) {
1002                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1003                         ipv6_addr_copy(&final, &fl.fl6_dst);
1004                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1005                         final_p = &final;
1006                 }
1007
1008                 err = ip6_dst_lookup(sk, &dst, &fl);
1009                 if (err)
1010                         goto done;
1011                 if (final_p)
1012                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1013                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1014                         goto done;
1015         }
1016
1017         skb = tcp_make_synack(sk, dst, req);
1018         if (skb) {
1019                 struct tcphdr *th = skb->h.th;
1020
1021                 th->check = tcp_v6_check(th, skb->len,
1022                                          &treq->loc_addr, &treq->rmt_addr,
1023                                          csum_partial((char *)th, skb->len, skb->csum));
1024
1025                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1026                 err = ip6_xmit(sk, skb, &fl, opt, 0);
1027                 if (err == NET_XMIT_CN)
1028                         err = 0;
1029         }
1030
1031 done:
1032         dst_release(dst);
1033         if (opt && opt != np->opt)
1034                 sock_kfree_s(sk, opt, opt->tot_len);
1035         return err;
1036 }
1037
1038 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1039 {
1040         if (tcp6_rsk(req)->pktopts)
1041                 kfree_skb(tcp6_rsk(req)->pktopts);
1042 }
1043
1044 static struct request_sock_ops tcp6_request_sock_ops = {
1045         .family         =       AF_INET6,
1046         .obj_size       =       sizeof(struct tcp6_request_sock),
1047         .rtx_syn_ack    =       tcp_v6_send_synack,
1048         .send_ack       =       tcp_v6_reqsk_send_ack,
1049         .destructor     =       tcp_v6_reqsk_destructor,
1050         .send_reset     =       tcp_v6_send_reset
1051 };
1052
1053 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1054 {
1055         struct ipv6_pinfo *np = inet6_sk(sk);
1056         struct inet6_skb_parm *opt = IP6CB(skb);
1057
1058         if (np->rxopt.all) {
1059                 if ((opt->hop && np->rxopt.bits.hopopts) ||
1060                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1061                      np->rxopt.bits.rxflow) ||
1062                     (opt->srcrt && np->rxopt.bits.srcrt) ||
1063                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1064                         return 1;
1065         }
1066         return 0;
1067 }
1068
1069
1070 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
1071                               struct sk_buff *skb)
1072 {
1073         struct ipv6_pinfo *np = inet6_sk(sk);
1074
1075         if (skb->ip_summed == CHECKSUM_HW) {
1076                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
1077                 skb->csum = offsetof(struct tcphdr, check);
1078         } else {
1079                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 
1080                                             csum_partial((char *)th, th->doff<<2, 
1081                                                          skb->csum));
1082         }
1083 }
1084
1085
1086 static void tcp_v6_send_reset(struct sk_buff *skb)
1087 {
1088         struct tcphdr *th = skb->h.th, *t1; 
1089         struct sk_buff *buff;
1090         struct flowi fl;
1091
1092         if (th->rst)
1093                 return;
1094
1095         if (!ipv6_unicast_destination(skb))
1096                 return; 
1097
1098         /*
1099          * We need to grab some memory, and put together an RST,
1100          * and then put it into the queue to be sent.
1101          */
1102
1103         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1104                          GFP_ATOMIC);
1105         if (buff == NULL) 
1106                 return;
1107
1108         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1109
1110         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1111
1112         /* Swap the send and the receive. */
1113         memset(t1, 0, sizeof(*t1));
1114         t1->dest = th->source;
1115         t1->source = th->dest;
1116         t1->doff = sizeof(*t1)/4;
1117         t1->rst = 1;
1118   
1119         if(th->ack) {
1120                 t1->seq = th->ack_seq;
1121         } else {
1122                 t1->ack = 1;
1123                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1124                                     + skb->len - (th->doff<<2));
1125         }
1126
1127         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1128
1129         memset(&fl, 0, sizeof(fl));
1130         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1131         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1132
1133         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1134                                     sizeof(*t1), IPPROTO_TCP,
1135                                     buff->csum);
1136
1137         fl.proto = IPPROTO_TCP;
1138         fl.oif = tcp_v6_iif(skb);
1139         fl.fl_ip_dport = t1->dest;
1140         fl.fl_ip_sport = t1->source;
1141
1142         /* sk = NULL, but it is safe for now. RST socket required. */
1143         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1144
1145                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1146                         dst_release(buff->dst);
1147                         return;
1148                 }
1149
1150                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1151                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1152                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1153                 return;
1154         }
1155
1156         kfree_skb(buff);
1157 }
1158
1159 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1160 {
1161         struct tcphdr *th = skb->h.th, *t1;
1162         struct sk_buff *buff;
1163         struct flowi fl;
1164         int tot_len = sizeof(struct tcphdr);
1165
1166         if (ts)
1167                 tot_len += 3*4;
1168
1169         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1170                          GFP_ATOMIC);
1171         if (buff == NULL)
1172                 return;
1173
1174         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1175
1176         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1177
1178         /* Swap the send and the receive. */
1179         memset(t1, 0, sizeof(*t1));
1180         t1->dest = th->source;
1181         t1->source = th->dest;
1182         t1->doff = tot_len/4;
1183         t1->seq = htonl(seq);
1184         t1->ack_seq = htonl(ack);
1185         t1->ack = 1;
1186         t1->window = htons(win);
1187         
1188         if (ts) {
1189                 u32 *ptr = (u32*)(t1 + 1);
1190                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1191                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1192                 *ptr++ = htonl(tcp_time_stamp);
1193                 *ptr = htonl(ts);
1194         }
1195
1196         buff->csum = csum_partial((char *)t1, tot_len, 0);
1197
1198         memset(&fl, 0, sizeof(fl));
1199         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1200         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1201
1202         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1203                                     tot_len, IPPROTO_TCP,
1204                                     buff->csum);
1205
1206         fl.proto = IPPROTO_TCP;
1207         fl.oif = tcp_v6_iif(skb);
1208         fl.fl_ip_dport = t1->dest;
1209         fl.fl_ip_sport = t1->source;
1210
1211         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1212                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1213                         dst_release(buff->dst);
1214                         return;
1215                 }
1216                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1217                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1218                 return;
1219         }
1220
1221         kfree_skb(buff);
1222 }
1223
1224 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1225 {
1226         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1227
1228         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1229                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1230
1231         tcp_tw_put(tw);
1232 }
1233
1234 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1235 {
1236         tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1237 }
1238
1239
1240 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1241 {
1242         struct request_sock *req, **prev;
1243         struct tcphdr *th = skb->h.th;
1244         struct tcp_sock *tp = tcp_sk(sk);
1245         struct sock *nsk;
1246
1247         /* Find possible connection requests. */
1248         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1249                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1250         if (req)
1251                 return tcp_check_req(sk, skb, req, prev);
1252
1253         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1254                                           th->source,
1255                                           &skb->nh.ipv6h->daddr,
1256                                           ntohs(th->dest),
1257                                           tcp_v6_iif(skb));
1258
1259         if (nsk) {
1260                 if (nsk->sk_state != TCP_TIME_WAIT) {
1261                         bh_lock_sock(nsk);
1262                         return nsk;
1263                 }
1264                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1265                 return NULL;
1266         }
1267
1268 #if 0 /*def CONFIG_SYN_COOKIES*/
1269         if (!th->rst && !th->syn && th->ack)
1270                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1271 #endif
1272         return sk;
1273 }
1274
1275 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1276 {
1277         struct tcp_sock *tp = tcp_sk(sk);
1278         struct listen_sock *lopt = tp->accept_queue.listen_opt;
1279         u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1280
1281         reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1282         tcp_synq_added(sk);
1283 }
1284
1285
1286 /* FIXME: this is substantially similar to the ipv4 code.
1287  * Can some kind of merge be done? -- erics
1288  */
1289 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1290 {
1291         struct tcp6_request_sock *treq;
1292         struct ipv6_pinfo *np = inet6_sk(sk);
1293         struct tcp_options_received tmp_opt;
1294         struct tcp_sock *tp = tcp_sk(sk);
1295         struct request_sock *req = NULL;
1296         __u32 isn = TCP_SKB_CB(skb)->when;
1297
1298         if (skb->protocol == htons(ETH_P_IP))
1299                 return tcp_v4_conn_request(sk, skb);
1300
1301         if (!ipv6_unicast_destination(skb))
1302                 goto drop; 
1303
1304         /*
1305          *      There are no SYN attacks on IPv6, yet...        
1306          */
1307         if (tcp_synq_is_full(sk) && !isn) {
1308                 if (net_ratelimit())
1309                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1310                 goto drop;              
1311         }
1312
1313         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1314                 goto drop;
1315
1316         req = reqsk_alloc(&tcp6_request_sock_ops);
1317         if (req == NULL)
1318                 goto drop;
1319
1320         tcp_clear_options(&tmp_opt);
1321         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1322         tmp_opt.user_mss = tp->rx_opt.user_mss;
1323
1324         tcp_parse_options(skb, &tmp_opt, 0);
1325
1326         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1327         tcp_openreq_init(req, &tmp_opt, skb);
1328
1329         treq = tcp6_rsk(req);
1330         ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1331         ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1332         TCP_ECN_create_request(req, skb->h.th);
1333         treq->pktopts = NULL;
1334         if (ipv6_opt_accepted(sk, skb) ||
1335             np->rxopt.bits.rxinfo ||
1336             np->rxopt.bits.rxhlim) {
1337                 atomic_inc(&skb->users);
1338                 treq->pktopts = skb;
1339         }
1340         treq->iif = sk->sk_bound_dev_if;
1341
1342         /* So that link locals have meaning */
1343         if (!sk->sk_bound_dev_if &&
1344             ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1345                 treq->iif = tcp_v6_iif(skb);
1346
1347         if (isn == 0) 
1348                 isn = tcp_v6_init_sequence(sk,skb);
1349
1350         tcp_rsk(req)->snt_isn = isn;
1351
1352         if (tcp_v6_send_synack(sk, req, NULL))
1353                 goto drop;
1354
1355         tcp_v6_synq_add(sk, req);
1356
1357         return 0;
1358
1359 drop:
1360         if (req)
1361                 reqsk_free(req);
1362
1363         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1364         return 0; /* don't send reset */
1365 }
1366
1367 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1368                                           struct request_sock *req,
1369                                           struct dst_entry *dst)
1370 {
1371         struct tcp6_request_sock *treq = tcp6_rsk(req);
1372         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1373         struct tcp6_sock *newtcp6sk;
1374         struct inet_sock *newinet;
1375         struct tcp_sock *newtp;
1376         struct sock *newsk;
1377         struct ipv6_txoptions *opt;
1378
1379         if (skb->protocol == htons(ETH_P_IP)) {
1380                 /*
1381                  *      v6 mapped
1382                  */
1383
1384                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1385
1386                 if (newsk == NULL) 
1387                         return NULL;
1388
1389                 newtcp6sk = (struct tcp6_sock *)newsk;
1390                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1391
1392                 newinet = inet_sk(newsk);
1393                 newnp = inet6_sk(newsk);
1394                 newtp = tcp_sk(newsk);
1395
1396                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1397
1398                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1399                               newinet->daddr);
1400
1401                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1402                               newinet->saddr);
1403
1404                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1405
1406                 newtp->af_specific = &ipv6_mapped;
1407                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1408                 newnp->pktoptions  = NULL;
1409                 newnp->opt         = NULL;
1410                 newnp->mcast_oif   = tcp_v6_iif(skb);
1411                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1412
1413                 /*
1414                  * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1415                  * here, tcp_create_openreq_child now does this for us, see the comment in
1416                  * that function for the gory details. -acme
1417                  */
1418
1419                 /* It is tricky place. Until this moment IPv4 tcp
1420                    worked with IPv6 af_tcp.af_specific.
1421                    Sync it now.
1422                  */
1423                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1424
1425                 return newsk;
1426         }
1427
1428         opt = np->opt;
1429
1430         if (sk_acceptq_is_full(sk))
1431                 goto out_overflow;
1432
1433         if (np->rxopt.bits.srcrt == 2 &&
1434             opt == NULL && treq->pktopts) {
1435                 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1436                 if (rxopt->srcrt)
1437                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1438         }
1439
1440         if (dst == NULL) {
1441                 struct in6_addr *final_p = NULL, final;
1442                 struct flowi fl;
1443
1444                 memset(&fl, 0, sizeof(fl));
1445                 fl.proto = IPPROTO_TCP;
1446                 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1447                 if (opt && opt->srcrt) {
1448                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1449                         ipv6_addr_copy(&final, &fl.fl6_dst);
1450                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1451                         final_p = &final;
1452                 }
1453                 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1454                 fl.oif = sk->sk_bound_dev_if;
1455                 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1456                 fl.fl_ip_sport = inet_sk(sk)->sport;
1457
1458                 if (ip6_dst_lookup(sk, &dst, &fl))
1459                         goto out;
1460
1461                 if (final_p)
1462                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1463
1464                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1465                         goto out;
1466         } 
1467
1468         newsk = tcp_create_openreq_child(sk, req, skb);
1469         if (newsk == NULL)
1470                 goto out;
1471
1472         /*
1473          * No need to charge this sock to the relevant IPv6 refcnt debug socks
1474          * count here, tcp_create_openreq_child now does this for us, see the
1475          * comment in that function for the gory details. -acme
1476          */
1477
1478         ip6_dst_store(newsk, dst, NULL);
1479         newsk->sk_route_caps = dst->dev->features &
1480                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1481
1482         newtcp6sk = (struct tcp6_sock *)newsk;
1483         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1484
1485         newtp = tcp_sk(newsk);
1486         newinet = inet_sk(newsk);
1487         newnp = inet6_sk(newsk);
1488
1489         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1490
1491         ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1492         ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1493         ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1494         newsk->sk_bound_dev_if = treq->iif;
1495
1496         /* Now IPv6 options... 
1497
1498            First: no IPv4 options.
1499          */
1500         newinet->opt = NULL;
1501
1502         /* Clone RX bits */
1503         newnp->rxopt.all = np->rxopt.all;
1504
1505         /* Clone pktoptions received with SYN */
1506         newnp->pktoptions = NULL;
1507         if (treq->pktopts != NULL) {
1508                 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1509                 kfree_skb(treq->pktopts);
1510                 treq->pktopts = NULL;
1511                 if (newnp->pktoptions)
1512                         skb_set_owner_r(newnp->pktoptions, newsk);
1513         }
1514         newnp->opt        = NULL;
1515         newnp->mcast_oif  = tcp_v6_iif(skb);
1516         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1517
1518         /* Clone native IPv6 options from listening socket (if any)
1519
1520            Yes, keeping reference count would be much more clever,
1521            but we make one more one thing there: reattach optmem
1522            to newsk.
1523          */
1524         if (opt) {
1525                 newnp->opt = ipv6_dup_options(newsk, opt);
1526                 if (opt != np->opt)
1527                         sock_kfree_s(sk, opt, opt->tot_len);
1528         }
1529
1530         newtp->ext_header_len = 0;
1531         if (newnp->opt)
1532                 newtp->ext_header_len = newnp->opt->opt_nflen +
1533                                         newnp->opt->opt_flen;
1534
1535         tcp_sync_mss(newsk, dst_mtu(dst));
1536         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1537         tcp_initialize_rcv_mss(newsk);
1538
1539         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1540
1541         __tcp_v6_hash(newsk);
1542         inet_inherit_port(&tcp_hashinfo, sk, newsk);
1543
1544         return newsk;
1545
1546 out_overflow:
1547         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1548 out:
1549         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1550         if (opt && opt != np->opt)
1551                 sock_kfree_s(sk, opt, opt->tot_len);
1552         dst_release(dst);
1553         return NULL;
1554 }
1555
1556 static int tcp_v6_checksum_init(struct sk_buff *skb)
1557 {
1558         if (skb->ip_summed == CHECKSUM_HW) {
1559                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1560                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1561                                   &skb->nh.ipv6h->daddr,skb->csum))
1562                         return 0;
1563                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1564         }
1565         if (skb->len <= 76) {
1566                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1567                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1568                         return -1;
1569                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1570         } else {
1571                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1572                                           &skb->nh.ipv6h->daddr,0);
1573         }
1574         return 0;
1575 }
1576
1577 /* The socket must have it's spinlock held when we get
1578  * here.
1579  *
1580  * We have a potential double-lock case here, so even when
1581  * doing backlog processing we use the BH locking scheme.
1582  * This is because we cannot sleep with the original spinlock
1583  * held.
1584  */
1585 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1586 {
1587         struct ipv6_pinfo *np = inet6_sk(sk);
1588         struct tcp_sock *tp;
1589         struct sk_buff *opt_skb = NULL;
1590
1591         /* Imagine: socket is IPv6. IPv4 packet arrives,
1592            goes to IPv4 receive handler and backlogged.
1593            From backlog it always goes here. Kerboom...
1594            Fortunately, tcp_rcv_established and rcv_established
1595            handle them correctly, but it is not case with
1596            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1597          */
1598
1599         if (skb->protocol == htons(ETH_P_IP))
1600                 return tcp_v4_do_rcv(sk, skb);
1601
1602         if (sk_filter(sk, skb, 0))
1603                 goto discard;
1604
1605         /*
1606          *      socket locking is here for SMP purposes as backlog rcv
1607          *      is currently called with bh processing disabled.
1608          */
1609
1610         /* Do Stevens' IPV6_PKTOPTIONS.
1611
1612            Yes, guys, it is the only place in our code, where we
1613            may make it not affecting IPv4.
1614            The rest of code is protocol independent,
1615            and I do not like idea to uglify IPv4.
1616
1617            Actually, all the idea behind IPV6_PKTOPTIONS
1618            looks not very well thought. For now we latch
1619            options, received in the last packet, enqueued
1620            by tcp. Feel free to propose better solution.
1621                                                --ANK (980728)
1622          */
1623         if (np->rxopt.all)
1624                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1625
1626         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1627                 TCP_CHECK_TIMER(sk);
1628                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1629                         goto reset;
1630                 TCP_CHECK_TIMER(sk);
1631                 if (opt_skb)
1632                         goto ipv6_pktoptions;
1633                 return 0;
1634         }
1635
1636         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1637                 goto csum_err;
1638
1639         if (sk->sk_state == TCP_LISTEN) { 
1640                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1641                 if (!nsk)
1642                         goto discard;
1643
1644                 /*
1645                  * Queue it on the new socket if the new socket is active,
1646                  * otherwise we just shortcircuit this and continue with
1647                  * the new socket..
1648                  */
1649                 if(nsk != sk) {
1650                         if (tcp_child_process(sk, nsk, skb))
1651                                 goto reset;
1652                         if (opt_skb)
1653                                 __kfree_skb(opt_skb);
1654                         return 0;
1655                 }
1656         }
1657
1658         TCP_CHECK_TIMER(sk);
1659         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1660                 goto reset;
1661         TCP_CHECK_TIMER(sk);
1662         if (opt_skb)
1663                 goto ipv6_pktoptions;
1664         return 0;
1665
1666 reset:
1667         tcp_v6_send_reset(skb);
1668 discard:
1669         if (opt_skb)
1670                 __kfree_skb(opt_skb);
1671         kfree_skb(skb);
1672         return 0;
1673 csum_err:
1674         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1675         goto discard;
1676
1677
1678 ipv6_pktoptions:
1679         /* Do you ask, what is it?
1680
1681            1. skb was enqueued by tcp.
1682            2. skb is added to tail of read queue, rather than out of order.
1683            3. socket is not in passive state.
1684            4. Finally, it really contains options, which user wants to receive.
1685          */
1686         tp = tcp_sk(sk);
1687         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1688             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1689                 if (np->rxopt.bits.rxinfo)
1690                         np->mcast_oif = tcp_v6_iif(opt_skb);
1691                 if (np->rxopt.bits.rxhlim)
1692                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1693                 if (ipv6_opt_accepted(sk, opt_skb)) {
1694                         skb_set_owner_r(opt_skb, sk);
1695                         opt_skb = xchg(&np->pktoptions, opt_skb);
1696                 } else {
1697                         __kfree_skb(opt_skb);
1698                         opt_skb = xchg(&np->pktoptions, NULL);
1699                 }
1700         }
1701
1702         if (opt_skb)
1703                 kfree_skb(opt_skb);
1704         return 0;
1705 }
1706
1707 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1708 {
1709         struct sk_buff *skb = *pskb;
1710         struct tcphdr *th;      
1711         struct sock *sk;
1712         int ret;
1713
1714         if (skb->pkt_type != PACKET_HOST)
1715                 goto discard_it;
1716
1717         /*
1718          *      Count it even if it's bad.
1719          */
1720         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1721
1722         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1723                 goto discard_it;
1724
1725         th = skb->h.th;
1726
1727         if (th->doff < sizeof(struct tcphdr)/4)
1728                 goto bad_packet;
1729         if (!pskb_may_pull(skb, th->doff*4))
1730                 goto discard_it;
1731
1732         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1733              tcp_v6_checksum_init(skb) < 0))
1734                 goto bad_packet;
1735
1736         th = skb->h.th;
1737         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1738         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1739                                     skb->len - th->doff*4);
1740         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1741         TCP_SKB_CB(skb)->when = 0;
1742         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1743         TCP_SKB_CB(skb)->sacked = 0;
1744
1745         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1746                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1747
1748         if (!sk)
1749                 goto no_tcp_socket;
1750
1751 process:
1752         if (sk->sk_state == TCP_TIME_WAIT)
1753                 goto do_time_wait;
1754
1755         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1756                 goto discard_and_relse;
1757
1758         if (sk_filter(sk, skb, 0))
1759                 goto discard_and_relse;
1760
1761         skb->dev = NULL;
1762
1763         bh_lock_sock(sk);
1764         ret = 0;
1765         if (!sock_owned_by_user(sk)) {
1766                 if (!tcp_prequeue(sk, skb))
1767                         ret = tcp_v6_do_rcv(sk, skb);
1768         } else
1769                 sk_add_backlog(sk, skb);
1770         bh_unlock_sock(sk);
1771
1772         sock_put(sk);
1773         return ret ? -1 : 0;
1774
1775 no_tcp_socket:
1776         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1777                 goto discard_it;
1778
1779         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1780 bad_packet:
1781                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1782         } else {
1783                 tcp_v6_send_reset(skb);
1784         }
1785
1786 discard_it:
1787
1788         /*
1789          *      Discard frame
1790          */
1791
1792         kfree_skb(skb);
1793         return 0;
1794
1795 discard_and_relse:
1796         sock_put(sk);
1797         goto discard_it;
1798
1799 do_time_wait:
1800         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1801                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1802                 goto discard_it;
1803         }
1804
1805         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1806                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1807                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1808                 goto discard_it;
1809         }
1810
1811         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1812                                           skb, th, skb->len)) {
1813         case TCP_TW_SYN:
1814         {
1815                 struct sock *sk2;
1816
1817                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1818                 if (sk2 != NULL) {
1819                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1820                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1821                         sk = sk2;
1822                         goto process;
1823                 }
1824                 /* Fall through to ACK */
1825         }
1826         case TCP_TW_ACK:
1827                 tcp_v6_timewait_ack(sk, skb);
1828                 break;
1829         case TCP_TW_RST:
1830                 goto no_tcp_socket;
1831         case TCP_TW_SUCCESS:;
1832         }
1833         goto discard_it;
1834 }
1835
1836 static int tcp_v6_rebuild_header(struct sock *sk)
1837 {
1838         int err;
1839         struct dst_entry *dst;
1840         struct ipv6_pinfo *np = inet6_sk(sk);
1841
1842         dst = __sk_dst_check(sk, np->dst_cookie);
1843
1844         if (dst == NULL) {
1845                 struct inet_sock *inet = inet_sk(sk);
1846                 struct in6_addr *final_p = NULL, final;
1847                 struct flowi fl;
1848
1849                 memset(&fl, 0, sizeof(fl));
1850                 fl.proto = IPPROTO_TCP;
1851                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1852                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1853                 fl.fl6_flowlabel = np->flow_label;
1854                 fl.oif = sk->sk_bound_dev_if;
1855                 fl.fl_ip_dport = inet->dport;
1856                 fl.fl_ip_sport = inet->sport;
1857
1858                 if (np->opt && np->opt->srcrt) {
1859                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1860                         ipv6_addr_copy(&final, &fl.fl6_dst);
1861                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1862                         final_p = &final;
1863                 }
1864
1865                 err = ip6_dst_lookup(sk, &dst, &fl);
1866                 if (err) {
1867                         sk->sk_route_caps = 0;
1868                         return err;
1869                 }
1870                 if (final_p)
1871                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1872
1873                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1874                         sk->sk_err_soft = -err;
1875                         dst_release(dst);
1876                         return err;
1877                 }
1878
1879                 ip6_dst_store(sk, dst, NULL);
1880                 sk->sk_route_caps = dst->dev->features &
1881                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1882         }
1883
1884         return 0;
1885 }
1886
1887 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1888 {
1889         struct sock *sk = skb->sk;
1890         struct inet_sock *inet = inet_sk(sk);
1891         struct ipv6_pinfo *np = inet6_sk(sk);
1892         struct flowi fl;
1893         struct dst_entry *dst;
1894         struct in6_addr *final_p = NULL, final;
1895
1896         memset(&fl, 0, sizeof(fl));
1897         fl.proto = IPPROTO_TCP;
1898         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1899         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1900         fl.fl6_flowlabel = np->flow_label;
1901         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1902         fl.oif = sk->sk_bound_dev_if;
1903         fl.fl_ip_sport = inet->sport;
1904         fl.fl_ip_dport = inet->dport;
1905
1906         if (np->opt && np->opt->srcrt) {
1907                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1908                 ipv6_addr_copy(&final, &fl.fl6_dst);
1909                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1910                 final_p = &final;
1911         }
1912
1913         dst = __sk_dst_check(sk, np->dst_cookie);
1914
1915         if (dst == NULL) {
1916                 int err = ip6_dst_lookup(sk, &dst, &fl);
1917
1918                 if (err) {
1919                         sk->sk_err_soft = -err;
1920                         return err;
1921                 }
1922
1923                 if (final_p)
1924                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1925
1926                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1927                         sk->sk_route_caps = 0;
1928                         dst_release(dst);
1929                         return err;
1930                 }
1931
1932                 ip6_dst_store(sk, dst, NULL);
1933                 sk->sk_route_caps = dst->dev->features &
1934                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1935         }
1936
1937         skb->dst = dst_clone(dst);
1938
1939         /* Restore final destination back after routing done */
1940         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1941
1942         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1943 }
1944
1945 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1946 {
1947         struct ipv6_pinfo *np = inet6_sk(sk);
1948         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1949
1950         sin6->sin6_family = AF_INET6;
1951         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1952         sin6->sin6_port = inet_sk(sk)->dport;
1953         /* We do not store received flowlabel for TCP */
1954         sin6->sin6_flowinfo = 0;
1955         sin6->sin6_scope_id = 0;
1956         if (sk->sk_bound_dev_if &&
1957             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1958                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1959 }
1960
1961 static int tcp_v6_remember_stamp(struct sock *sk)
1962 {
1963         /* Alas, not yet... */
1964         return 0;
1965 }
1966
1967 static struct tcp_func ipv6_specific = {
1968         .queue_xmit     =       tcp_v6_xmit,
1969         .send_check     =       tcp_v6_send_check,
1970         .rebuild_header =       tcp_v6_rebuild_header,
1971         .conn_request   =       tcp_v6_conn_request,
1972         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1973         .remember_stamp =       tcp_v6_remember_stamp,
1974         .net_header_len =       sizeof(struct ipv6hdr),
1975
1976         .setsockopt     =       ipv6_setsockopt,
1977         .getsockopt     =       ipv6_getsockopt,
1978         .addr2sockaddr  =       v6_addr2sockaddr,
1979         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1980 };
1981
1982 /*
1983  *      TCP over IPv4 via INET6 API
1984  */
1985
1986 static struct tcp_func ipv6_mapped = {
1987         .queue_xmit     =       ip_queue_xmit,
1988         .send_check     =       tcp_v4_send_check,
1989         .rebuild_header =       inet_sk_rebuild_header,
1990         .conn_request   =       tcp_v6_conn_request,
1991         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1992         .remember_stamp =       tcp_v4_remember_stamp,
1993         .net_header_len =       sizeof(struct iphdr),
1994
1995         .setsockopt     =       ipv6_setsockopt,
1996         .getsockopt     =       ipv6_getsockopt,
1997         .addr2sockaddr  =       v6_addr2sockaddr,
1998         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1999 };
2000
2001
2002
2003 /* NOTE: A lot of things set to zero explicitly by call to
2004  *       sk_alloc() so need not be done here.
2005  */
2006 static int tcp_v6_init_sock(struct sock *sk)
2007 {
2008         struct tcp_sock *tp = tcp_sk(sk);
2009
2010         skb_queue_head_init(&tp->out_of_order_queue);
2011         tcp_init_xmit_timers(sk);
2012         tcp_prequeue_init(tp);
2013
2014         tp->rto  = TCP_TIMEOUT_INIT;
2015         tp->mdev = TCP_TIMEOUT_INIT;
2016
2017         /* So many TCP implementations out there (incorrectly) count the
2018          * initial SYN frame in their delayed-ACK and congestion control
2019          * algorithms that we must have the following bandaid to talk
2020          * efficiently to them.  -DaveM
2021          */
2022         tp->snd_cwnd = 2;
2023
2024         /* See draft-stevens-tcpca-spec-01 for discussion of the
2025          * initialization of these values.
2026          */
2027         tp->snd_ssthresh = 0x7fffffff;
2028         tp->snd_cwnd_clamp = ~0;
2029         tp->mss_cache = 536;
2030
2031         tp->reordering = sysctl_tcp_reordering;
2032
2033         sk->sk_state = TCP_CLOSE;
2034
2035         tp->af_specific = &ipv6_specific;
2036         tp->ca_ops = &tcp_init_congestion_ops;
2037         sk->sk_write_space = sk_stream_write_space;
2038         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2039
2040         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2041         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2042
2043         atomic_inc(&tcp_sockets_allocated);
2044
2045         return 0;
2046 }
2047
2048 static int tcp_v6_destroy_sock(struct sock *sk)
2049 {
2050         extern int tcp_v4_destroy_sock(struct sock *sk);
2051
2052         tcp_v4_destroy_sock(sk);
2053         return inet6_destroy_sock(sk);
2054 }
2055
2056 /* Proc filesystem TCPv6 sock list dumping. */
2057 static void get_openreq6(struct seq_file *seq, 
2058                          struct sock *sk, struct request_sock *req, int i, int uid)
2059 {
2060         struct in6_addr *dest, *src;
2061         int ttd = req->expires - jiffies;
2062
2063         if (ttd < 0)
2064                 ttd = 0;
2065
2066         src = &tcp6_rsk(req)->loc_addr;
2067         dest = &tcp6_rsk(req)->rmt_addr;
2068         seq_printf(seq,
2069                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2070                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2071                    i,
2072                    src->s6_addr32[0], src->s6_addr32[1],
2073                    src->s6_addr32[2], src->s6_addr32[3],
2074                    ntohs(inet_sk(sk)->sport),
2075                    dest->s6_addr32[0], dest->s6_addr32[1],
2076                    dest->s6_addr32[2], dest->s6_addr32[3],
2077                    ntohs(inet_rsk(req)->rmt_port),
2078                    TCP_SYN_RECV,
2079                    0,0, /* could print option size, but that is af dependent. */
2080                    1,   /* timers active (only the expire timer) */  
2081                    jiffies_to_clock_t(ttd), 
2082                    req->retrans,
2083                    uid,
2084                    0,  /* non standard timer */  
2085                    0, /* open_requests have no inode */
2086                    0, req);
2087 }
2088
2089 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2090 {
2091         struct in6_addr *dest, *src;
2092         __u16 destp, srcp;
2093         int timer_active;
2094         unsigned long timer_expires;
2095         struct inet_sock *inet = inet_sk(sp);
2096         struct tcp_sock *tp = tcp_sk(sp);
2097         struct ipv6_pinfo *np = inet6_sk(sp);
2098
2099         dest  = &np->daddr;
2100         src   = &np->rcv_saddr;
2101         destp = ntohs(inet->dport);
2102         srcp  = ntohs(inet->sport);
2103         if (tp->pending == TCP_TIME_RETRANS) {
2104                 timer_active    = 1;
2105                 timer_expires   = tp->timeout;
2106         } else if (tp->pending == TCP_TIME_PROBE0) {
2107                 timer_active    = 4;
2108                 timer_expires   = tp->timeout;
2109         } else if (timer_pending(&sp->sk_timer)) {
2110                 timer_active    = 2;
2111                 timer_expires   = sp->sk_timer.expires;
2112         } else {
2113                 timer_active    = 0;
2114                 timer_expires = jiffies;
2115         }
2116
2117         seq_printf(seq,
2118                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2119                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2120                    i,
2121                    src->s6_addr32[0], src->s6_addr32[1],
2122                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2123                    dest->s6_addr32[0], dest->s6_addr32[1],
2124                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2125                    sp->sk_state, 
2126                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2127                    timer_active,
2128                    jiffies_to_clock_t(timer_expires - jiffies),
2129                    tp->retransmits,
2130                    sock_i_uid(sp),
2131                    tp->probes_out,
2132                    sock_i_ino(sp),
2133                    atomic_read(&sp->sk_refcnt), sp,
2134                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2135                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2136                    );
2137 }
2138
2139 static void get_timewait6_sock(struct seq_file *seq, 
2140                                struct tcp_tw_bucket *tw, int i)
2141 {
2142         struct in6_addr *dest, *src;
2143         __u16 destp, srcp;
2144         int ttd = tw->tw_ttd - jiffies;
2145
2146         if (ttd < 0)
2147                 ttd = 0;
2148
2149         dest  = &tw->tw_v6_daddr;
2150         src   = &tw->tw_v6_rcv_saddr;
2151         destp = ntohs(tw->tw_dport);
2152         srcp  = ntohs(tw->tw_sport);
2153
2154         seq_printf(seq,
2155                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2156                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2157                    i,
2158                    src->s6_addr32[0], src->s6_addr32[1],
2159                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2160                    dest->s6_addr32[0], dest->s6_addr32[1],
2161                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2162                    tw->tw_substate, 0, 0,
2163                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2164                    atomic_read(&tw->tw_refcnt), tw);
2165 }
2166
2167 #ifdef CONFIG_PROC_FS
2168 static int tcp6_seq_show(struct seq_file *seq, void *v)
2169 {
2170         struct tcp_iter_state *st;
2171
2172         if (v == SEQ_START_TOKEN) {
2173                 seq_puts(seq,
2174                          "  sl  "
2175                          "local_address                         "
2176                          "remote_address                        "
2177                          "st tx_queue rx_queue tr tm->when retrnsmt"
2178                          "   uid  timeout inode\n");
2179                 goto out;
2180         }
2181         st = seq->private;
2182
2183         switch (st->state) {
2184         case TCP_SEQ_STATE_LISTENING:
2185         case TCP_SEQ_STATE_ESTABLISHED:
2186                 get_tcp6_sock(seq, v, st->num);
2187                 break;
2188         case TCP_SEQ_STATE_OPENREQ:
2189                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2190                 break;
2191         case TCP_SEQ_STATE_TIME_WAIT:
2192                 get_timewait6_sock(seq, v, st->num);
2193                 break;
2194         }
2195 out:
2196         return 0;
2197 }
2198
2199 static struct file_operations tcp6_seq_fops;
2200 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2201         .owner          = THIS_MODULE,
2202         .name           = "tcp6",
2203         .family         = AF_INET6,
2204         .seq_show       = tcp6_seq_show,
2205         .seq_fops       = &tcp6_seq_fops,
2206 };
2207
2208 int __init tcp6_proc_init(void)
2209 {
2210         return tcp_proc_register(&tcp6_seq_afinfo);
2211 }
2212
2213 void tcp6_proc_exit(void)
2214 {
2215         tcp_proc_unregister(&tcp6_seq_afinfo);
2216 }
2217 #endif
2218
2219 struct proto tcpv6_prot = {
2220         .name                   = "TCPv6",
2221         .owner                  = THIS_MODULE,
2222         .close                  = tcp_close,
2223         .connect                = tcp_v6_connect,
2224         .disconnect             = tcp_disconnect,
2225         .accept                 = tcp_accept,
2226         .ioctl                  = tcp_ioctl,
2227         .init                   = tcp_v6_init_sock,
2228         .destroy                = tcp_v6_destroy_sock,
2229         .shutdown               = tcp_shutdown,
2230         .setsockopt             = tcp_setsockopt,
2231         .getsockopt             = tcp_getsockopt,
2232         .sendmsg                = tcp_sendmsg,
2233         .recvmsg                = tcp_recvmsg,
2234         .backlog_rcv            = tcp_v6_do_rcv,
2235         .hash                   = tcp_v6_hash,
2236         .unhash                 = tcp_unhash,
2237         .get_port               = tcp_v6_get_port,
2238         .enter_memory_pressure  = tcp_enter_memory_pressure,
2239         .sockets_allocated      = &tcp_sockets_allocated,
2240         .memory_allocated       = &tcp_memory_allocated,
2241         .memory_pressure        = &tcp_memory_pressure,
2242         .sysctl_mem             = sysctl_tcp_mem,
2243         .sysctl_wmem            = sysctl_tcp_wmem,
2244         .sysctl_rmem            = sysctl_tcp_rmem,
2245         .max_header             = MAX_TCP_HEADER,
2246         .obj_size               = sizeof(struct tcp6_sock),
2247         .rsk_prot               = &tcp6_request_sock_ops,
2248 };
2249
2250 static struct inet6_protocol tcpv6_protocol = {
2251         .handler        =       tcp_v6_rcv,
2252         .err_handler    =       tcp_v6_err,
2253         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2254 };
2255
2256 extern struct proto_ops inet6_stream_ops;
2257
2258 static struct inet_protosw tcpv6_protosw = {
2259         .type           =       SOCK_STREAM,
2260         .protocol       =       IPPROTO_TCP,
2261         .prot           =       &tcpv6_prot,
2262         .ops            =       &inet6_stream_ops,
2263         .capability     =       -1,
2264         .no_check       =       0,
2265         .flags          =       INET_PROTOSW_PERMANENT,
2266 };
2267
2268 void __init tcpv6_init(void)
2269 {
2270         /* register inet6 protocol */
2271         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2272                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2273         inet6_register_protosw(&tcpv6_protosw);
2274 }