3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
80 const struct in6_addr *faddr, const u16 fport,
83 int hashent = (lport ^ fport);
85 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
86 hashent ^= hashent>>16;
87 hashent ^= hashent>>8;
88 return (hashent & (ehash_size - 1));
91 static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
93 const struct inet_sock *inet = inet_sk(sk);
94 const struct ipv6_pinfo *np = inet6_sk(sk);
95 const struct in6_addr *laddr = &np->rcv_saddr;
96 const struct in6_addr *faddr = &np->daddr;
97 const __u16 lport = inet->num;
98 const __u16 fport = inet->dport;
99 return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size);
102 static inline int tcp_v6_bind_conflict(const struct sock *sk,
103 const struct inet_bind_bucket *tb)
105 const struct sock *sk2;
106 const struct hlist_node *node;
108 /* We must walk the whole port owner list in this case. -DaveM */
109 sk_for_each_bound(sk2, node, &tb->owners) {
111 (!sk->sk_bound_dev_if ||
112 !sk2->sk_bound_dev_if ||
113 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
114 (!sk->sk_reuse || !sk2->sk_reuse ||
115 sk2->sk_state == TCP_LISTEN) &&
116 ipv6_rcv_saddr_equal(sk, sk2))
123 /* Grrr, addr_type already calculated by caller, but I don't want
124 * to add some silly "cookie" argument to this method just for that.
125 * But it doesn't matter, the recalculation is in the rarest path
126 * this function ever takes.
128 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
130 struct inet_bind_hashbucket *head;
131 struct inet_bind_bucket *tb;
132 struct hlist_node *node;
137 int low = sysctl_local_port_range[0];
138 int high = sysctl_local_port_range[1];
139 int remaining = (high - low) + 1;
142 spin_lock(&tcp_hashinfo.portalloc_lock);
143 if (tcp_hashinfo.port_rover < low)
146 rover = tcp_hashinfo.port_rover;
150 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
151 spin_lock(&head->lock);
152 inet_bind_bucket_for_each(tb, node, &head->chain)
153 if (tb->port == rover)
157 spin_unlock(&head->lock);
158 } while (--remaining > 0);
159 tcp_hashinfo.port_rover = rover;
160 spin_unlock(&tcp_hashinfo.portalloc_lock);
162 /* Exhausted local port range during search? It is not
163 * possible for us to be holding one of the bind hash
164 * locks if this test triggers, because if 'remaining'
165 * drops to zero, we broke out of the do/while loop at
166 * the top level, not from the 'break;' statement.
169 if (unlikely(remaining <= 0))
172 /* OK, here is the one we will use. */
175 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
176 spin_lock(&head->lock);
177 inet_bind_bucket_for_each(tb, node, &head->chain)
178 if (tb->port == snum)
184 if (tb && !hlist_empty(&tb->owners)) {
185 if (tb->fastreuse > 0 && sk->sk_reuse &&
186 sk->sk_state != TCP_LISTEN) {
190 if (tcp_v6_bind_conflict(sk, tb))
197 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
201 if (hlist_empty(&tb->owners)) {
202 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
206 } else if (tb->fastreuse &&
207 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
211 if (!inet_csk(sk)->icsk_bind_hash)
212 inet_bind_hash(sk, tb, snum);
213 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
217 spin_unlock(&head->lock);
223 static __inline__ void __tcp_v6_hash(struct sock *sk)
225 struct hlist_head *list;
228 BUG_TRAP(sk_unhashed(sk));
230 if (sk->sk_state == TCP_LISTEN) {
231 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
232 lock = &tcp_hashinfo.lhash_lock;
233 inet_listen_wlock(&tcp_hashinfo);
235 sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
236 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
237 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
241 __sk_add_node(sk, list);
242 sock_prot_inc_use(sk->sk_prot);
247 static void tcp_v6_hash(struct sock *sk)
249 if (sk->sk_state != TCP_CLOSE) {
250 struct tcp_sock *tp = tcp_sk(sk);
252 if (tp->af_specific == &ipv6_mapped) {
262 static struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
263 const struct in6_addr *daddr,
264 const unsigned short hnum,
268 struct hlist_node *node;
269 struct sock *result = NULL;
273 read_lock(&hashinfo->lhash_lock);
274 sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
275 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
276 struct ipv6_pinfo *np = inet6_sk(sk);
279 if (!ipv6_addr_any(&np->rcv_saddr)) {
280 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
284 if (sk->sk_bound_dev_if) {
285 if (sk->sk_bound_dev_if != dif)
293 if (score > hiscore) {
301 read_unlock(&hashinfo->lhash_lock);
305 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
306 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
308 * The sockhash lock must be held as a reader here.
311 static inline struct sock *
312 __inet6_lookup_established(struct inet_hashinfo *hashinfo,
313 const struct in6_addr *saddr,
315 const struct in6_addr *daddr,
320 const struct hlist_node *node;
321 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
322 /* Optimize here for direct hit, only listening connections can
323 * have wildcards anyways.
325 const int hash = inet6_ehashfn(daddr, hnum, saddr, sport,
326 hashinfo->ehash_size);
327 struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
329 read_lock(&head->lock);
330 sk_for_each(sk, node, &head->chain) {
331 /* For IPV6 do the cheaper port and family tests first. */
332 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
333 goto hit; /* You sunk my battleship! */
335 /* Must check for a TIME_WAIT'er before going to listener hash. */
336 sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
337 const struct inet_timewait_sock *tw = inet_twsk(sk);
339 if(*((__u32 *)&(tw->tw_dport)) == ports &&
340 sk->sk_family == PF_INET6) {
341 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
343 if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
344 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
345 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
349 read_unlock(&head->lock);
354 read_unlock(&head->lock);
359 static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
360 const struct in6_addr *saddr,
362 const struct in6_addr *daddr,
366 struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
371 return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
374 inline struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
375 const struct in6_addr *saddr, const u16 sport,
376 const struct in6_addr *daddr, const u16 dport,
382 sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
388 EXPORT_SYMBOL_GPL(inet6_lookup);
392 * Open request hash tables.
395 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
399 a = raddr->s6_addr32[0];
400 b = raddr->s6_addr32[1];
401 c = raddr->s6_addr32[2];
403 a += JHASH_GOLDEN_RATIO;
404 b += JHASH_GOLDEN_RATIO;
406 __jhash_mix(a, b, c);
408 a += raddr->s6_addr32[3];
410 __jhash_mix(a, b, c);
412 return c & (TCP_SYNQ_HSIZE - 1);
415 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
416 struct request_sock ***prevp,
418 struct in6_addr *raddr,
419 struct in6_addr *laddr,
422 const struct inet_connection_sock *icsk = inet_csk(sk);
423 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
424 struct request_sock *req, **prev;
426 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
427 (req = *prev) != NULL;
428 prev = &req->dl_next) {
429 const struct tcp6_request_sock *treq = tcp6_rsk(req);
431 if (inet_rsk(req)->rmt_port == rport &&
432 req->rsk_ops->family == AF_INET6 &&
433 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
434 ipv6_addr_equal(&treq->loc_addr, laddr) &&
435 (!treq->iif || treq->iif == iif)) {
436 BUG_TRAP(req->sk == NULL);
445 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
446 struct in6_addr *saddr,
447 struct in6_addr *daddr,
450 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
453 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
455 if (skb->protocol == htons(ETH_P_IPV6)) {
456 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
457 skb->nh.ipv6h->saddr.s6_addr32,
461 return secure_tcp_sequence_number(skb->nh.iph->daddr,
468 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
469 struct inet_timewait_sock **twp)
471 struct inet_sock *inet = inet_sk(sk);
472 const struct ipv6_pinfo *np = inet6_sk(sk);
473 const struct in6_addr *daddr = &np->rcv_saddr;
474 const struct in6_addr *saddr = &np->daddr;
475 const int dif = sk->sk_bound_dev_if;
476 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
477 const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
478 tcp_hashinfo.ehash_size);
479 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
481 const struct hlist_node *node;
482 struct inet_timewait_sock *tw;
484 write_lock(&head->lock);
486 /* Check TIME-WAIT sockets first. */
487 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
488 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
492 if(*((__u32 *)&(tw->tw_dport)) == ports &&
493 sk2->sk_family == PF_INET6 &&
494 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
495 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
496 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
497 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
498 struct tcp_sock *tp = tcp_sk(sk);
500 if (tcptw->tw_ts_recent_stamp &&
502 (sysctl_tcp_tw_reuse &&
503 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
504 /* See comment in tcp_ipv4.c */
505 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
508 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
509 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
518 /* And established part... */
519 sk_for_each(sk2, node, &head->chain) {
520 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
525 BUG_TRAP(sk_unhashed(sk));
526 __sk_add_node(sk, &head->chain);
527 sk->sk_hashent = hash;
528 sock_prot_inc_use(sk->sk_prot);
529 write_unlock(&head->lock);
533 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
535 /* Silly. Should hash-dance instead... */
536 inet_twsk_deschedule(tw, &tcp_death_row);
537 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
544 write_unlock(&head->lock);
545 return -EADDRNOTAVAIL;
548 static inline u32 tcpv6_port_offset(const struct sock *sk)
550 const struct inet_sock *inet = inet_sk(sk);
551 const struct ipv6_pinfo *np = inet6_sk(sk);
553 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
558 static int tcp_v6_hash_connect(struct sock *sk)
560 unsigned short snum = inet_sk(sk)->num;
561 struct inet_bind_hashbucket *head;
562 struct inet_bind_bucket *tb;
566 int low = sysctl_local_port_range[0];
567 int high = sysctl_local_port_range[1];
568 int range = high - low;
572 u32 offset = hint + tcpv6_port_offset(sk);
573 struct hlist_node *node;
574 struct inet_timewait_sock *tw = NULL;
577 for (i = 1; i <= range; i++) {
578 port = low + (i + offset) % range;
579 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
580 spin_lock(&head->lock);
582 /* Does not bother with rcv_saddr checks,
583 * because the established check is already
586 inet_bind_bucket_for_each(tb, node, &head->chain) {
587 if (tb->port == port) {
588 BUG_TRAP(!hlist_empty(&tb->owners));
589 if (tb->fastreuse >= 0)
591 if (!__tcp_v6_check_established(sk,
599 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
601 spin_unlock(&head->lock);
608 spin_unlock(&head->lock);
612 return -EADDRNOTAVAIL;
617 /* Head lock still held and bh's disabled */
618 inet_bind_hash(sk, tb, port);
619 if (sk_unhashed(sk)) {
620 inet_sk(sk)->sport = htons(port);
623 spin_unlock(&head->lock);
626 inet_twsk_deschedule(tw, &tcp_death_row);
634 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
635 tb = inet_csk(sk)->icsk_bind_hash;
636 spin_lock_bh(&head->lock);
638 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
640 spin_unlock_bh(&head->lock);
643 spin_unlock(&head->lock);
644 /* No definite answer... Walk to established hash table */
645 ret = __tcp_v6_check_established(sk, snum, NULL);
652 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
655 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
656 struct inet_sock *inet = inet_sk(sk);
657 struct ipv6_pinfo *np = inet6_sk(sk);
658 struct tcp_sock *tp = tcp_sk(sk);
659 struct in6_addr *saddr = NULL, *final_p = NULL, final;
661 struct dst_entry *dst;
665 if (addr_len < SIN6_LEN_RFC2133)
668 if (usin->sin6_family != AF_INET6)
669 return(-EAFNOSUPPORT);
671 memset(&fl, 0, sizeof(fl));
674 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
675 IP6_ECN_flow_init(fl.fl6_flowlabel);
676 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
677 struct ip6_flowlabel *flowlabel;
678 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
679 if (flowlabel == NULL)
681 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
682 fl6_sock_release(flowlabel);
687 * connect() to INADDR_ANY means loopback (BSD'ism).
690 if(ipv6_addr_any(&usin->sin6_addr))
691 usin->sin6_addr.s6_addr[15] = 0x1;
693 addr_type = ipv6_addr_type(&usin->sin6_addr);
695 if(addr_type & IPV6_ADDR_MULTICAST)
698 if (addr_type&IPV6_ADDR_LINKLOCAL) {
699 if (addr_len >= sizeof(struct sockaddr_in6) &&
700 usin->sin6_scope_id) {
701 /* If interface is set while binding, indices
704 if (sk->sk_bound_dev_if &&
705 sk->sk_bound_dev_if != usin->sin6_scope_id)
708 sk->sk_bound_dev_if = usin->sin6_scope_id;
711 /* Connect to link-local address requires an interface */
712 if (!sk->sk_bound_dev_if)
716 if (tp->rx_opt.ts_recent_stamp &&
717 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
718 tp->rx_opt.ts_recent = 0;
719 tp->rx_opt.ts_recent_stamp = 0;
723 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
724 np->flow_label = fl.fl6_flowlabel;
730 if (addr_type == IPV6_ADDR_MAPPED) {
731 u32 exthdrlen = tp->ext_header_len;
732 struct sockaddr_in sin;
734 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
736 if (__ipv6_only_sock(sk))
739 sin.sin_family = AF_INET;
740 sin.sin_port = usin->sin6_port;
741 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
743 tp->af_specific = &ipv6_mapped;
744 sk->sk_backlog_rcv = tcp_v4_do_rcv;
746 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
749 tp->ext_header_len = exthdrlen;
750 tp->af_specific = &ipv6_specific;
751 sk->sk_backlog_rcv = tcp_v6_do_rcv;
754 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
756 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
763 if (!ipv6_addr_any(&np->rcv_saddr))
764 saddr = &np->rcv_saddr;
766 fl.proto = IPPROTO_TCP;
767 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
768 ipv6_addr_copy(&fl.fl6_src,
769 (saddr ? saddr : &np->saddr));
770 fl.oif = sk->sk_bound_dev_if;
771 fl.fl_ip_dport = usin->sin6_port;
772 fl.fl_ip_sport = inet->sport;
774 if (np->opt && np->opt->srcrt) {
775 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
776 ipv6_addr_copy(&final, &fl.fl6_dst);
777 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
781 err = ip6_dst_lookup(sk, &dst, &fl);
785 ipv6_addr_copy(&fl.fl6_dst, final_p);
787 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
794 ipv6_addr_copy(&np->rcv_saddr, saddr);
797 /* set the source address */
798 ipv6_addr_copy(&np->saddr, saddr);
799 inet->rcv_saddr = LOOPBACK4_IPV6;
801 ip6_dst_store(sk, dst, NULL);
802 sk->sk_route_caps = dst->dev->features &
803 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
805 tp->ext_header_len = 0;
807 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
809 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
811 inet->dport = usin->sin6_port;
813 tcp_set_state(sk, TCP_SYN_SENT);
814 err = tcp_v6_hash_connect(sk);
819 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
824 err = tcp_connect(sk);
831 tcp_set_state(sk, TCP_CLOSE);
835 sk->sk_route_caps = 0;
839 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
840 int type, int code, int offset, __u32 info)
842 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
843 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
844 struct ipv6_pinfo *np;
850 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
851 th->source, skb->dev->ifindex);
854 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
858 if (sk->sk_state == TCP_TIME_WAIT) {
859 inet_twsk_put((struct inet_timewait_sock *)sk);
864 if (sock_owned_by_user(sk))
865 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
867 if (sk->sk_state == TCP_CLOSE)
871 seq = ntohl(th->seq);
872 if (sk->sk_state != TCP_LISTEN &&
873 !between(seq, tp->snd_una, tp->snd_nxt)) {
874 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
880 if (type == ICMPV6_PKT_TOOBIG) {
881 struct dst_entry *dst = NULL;
883 if (sock_owned_by_user(sk))
885 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
888 /* icmp should have updated the destination cache entry */
889 dst = __sk_dst_check(sk, np->dst_cookie);
892 struct inet_sock *inet = inet_sk(sk);
895 /* BUGGG_FUTURE: Again, it is not clear how
896 to handle rthdr case. Ignore this complexity
899 memset(&fl, 0, sizeof(fl));
900 fl.proto = IPPROTO_TCP;
901 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
902 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
903 fl.oif = sk->sk_bound_dev_if;
904 fl.fl_ip_dport = inet->dport;
905 fl.fl_ip_sport = inet->sport;
907 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
908 sk->sk_err_soft = -err;
912 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
913 sk->sk_err_soft = -err;
920 if (tp->pmtu_cookie > dst_mtu(dst)) {
921 tcp_sync_mss(sk, dst_mtu(dst));
922 tcp_simple_retransmit(sk);
923 } /* else let the usual retransmit timer handle it */
928 icmpv6_err_convert(type, code, &err);
930 /* Might be for an request_sock */
931 switch (sk->sk_state) {
932 struct request_sock *req, **prev;
934 if (sock_owned_by_user(sk))
937 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
938 &hdr->saddr, inet6_iif(skb));
942 /* ICMPs are not backlogged, hence we cannot get
943 * an established socket here.
945 BUG_TRAP(req->sk == NULL);
947 if (seq != tcp_rsk(req)->snt_isn) {
948 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
952 inet_csk_reqsk_queue_drop(sk, req, prev);
956 case TCP_SYN_RECV: /* Cannot happen.
957 It can, it SYNs are crossed. --ANK */
958 if (!sock_owned_by_user(sk)) {
959 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
961 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
965 sk->sk_err_soft = err;
969 if (!sock_owned_by_user(sk) && np->recverr) {
971 sk->sk_error_report(sk);
973 sk->sk_err_soft = err;
981 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
982 struct dst_entry *dst)
984 struct tcp6_request_sock *treq = tcp6_rsk(req);
985 struct ipv6_pinfo *np = inet6_sk(sk);
986 struct sk_buff * skb;
987 struct ipv6_txoptions *opt = NULL;
988 struct in6_addr * final_p = NULL, final;
992 memset(&fl, 0, sizeof(fl));
993 fl.proto = IPPROTO_TCP;
994 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
995 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
996 fl.fl6_flowlabel = 0;
998 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
999 fl.fl_ip_sport = inet_sk(sk)->sport;
1004 np->rxopt.bits.srcrt == 2 &&
1006 struct sk_buff *pktopts = treq->pktopts;
1007 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
1009 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
1012 if (opt && opt->srcrt) {
1013 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1014 ipv6_addr_copy(&final, &fl.fl6_dst);
1015 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1019 err = ip6_dst_lookup(sk, &dst, &fl);
1023 ipv6_addr_copy(&fl.fl6_dst, final_p);
1024 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1028 skb = tcp_make_synack(sk, dst, req);
1030 struct tcphdr *th = skb->h.th;
1032 th->check = tcp_v6_check(th, skb->len,
1033 &treq->loc_addr, &treq->rmt_addr,
1034 csum_partial((char *)th, skb->len, skb->csum));
1036 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1037 err = ip6_xmit(sk, skb, &fl, opt, 0);
1038 if (err == NET_XMIT_CN)
1044 if (opt && opt != np->opt)
1045 sock_kfree_s(sk, opt, opt->tot_len);
1049 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1051 if (tcp6_rsk(req)->pktopts)
1052 kfree_skb(tcp6_rsk(req)->pktopts);
1055 static struct request_sock_ops tcp6_request_sock_ops = {
1057 .obj_size = sizeof(struct tcp6_request_sock),
1058 .rtx_syn_ack = tcp_v6_send_synack,
1059 .send_ack = tcp_v6_reqsk_send_ack,
1060 .destructor = tcp_v6_reqsk_destructor,
1061 .send_reset = tcp_v6_send_reset
1064 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1066 struct ipv6_pinfo *np = inet6_sk(sk);
1067 struct inet6_skb_parm *opt = IP6CB(skb);
1069 if (np->rxopt.all) {
1070 if ((opt->hop && np->rxopt.bits.hopopts) ||
1071 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1072 np->rxopt.bits.rxflow) ||
1073 (opt->srcrt && np->rxopt.bits.srcrt) ||
1074 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1081 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1082 struct sk_buff *skb)
1084 struct ipv6_pinfo *np = inet6_sk(sk);
1086 if (skb->ip_summed == CHECKSUM_HW) {
1087 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1088 skb->csum = offsetof(struct tcphdr, check);
1090 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1091 csum_partial((char *)th, th->doff<<2,
1097 static void tcp_v6_send_reset(struct sk_buff *skb)
1099 struct tcphdr *th = skb->h.th, *t1;
1100 struct sk_buff *buff;
1106 if (!ipv6_unicast_destination(skb))
1110 * We need to grab some memory, and put together an RST,
1111 * and then put it into the queue to be sent.
1114 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1119 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1121 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1123 /* Swap the send and the receive. */
1124 memset(t1, 0, sizeof(*t1));
1125 t1->dest = th->source;
1126 t1->source = th->dest;
1127 t1->doff = sizeof(*t1)/4;
1131 t1->seq = th->ack_seq;
1134 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1135 + skb->len - (th->doff<<2));
1138 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1140 memset(&fl, 0, sizeof(fl));
1141 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1142 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1144 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1145 sizeof(*t1), IPPROTO_TCP,
1148 fl.proto = IPPROTO_TCP;
1149 fl.oif = inet6_iif(skb);
1150 fl.fl_ip_dport = t1->dest;
1151 fl.fl_ip_sport = t1->source;
1153 /* sk = NULL, but it is safe for now. RST socket required. */
1154 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1156 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1157 dst_release(buff->dst);
1161 ip6_xmit(NULL, buff, &fl, NULL, 0);
1162 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1163 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1170 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1172 struct tcphdr *th = skb->h.th, *t1;
1173 struct sk_buff *buff;
1175 int tot_len = sizeof(struct tcphdr);
1180 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1185 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1187 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1189 /* Swap the send and the receive. */
1190 memset(t1, 0, sizeof(*t1));
1191 t1->dest = th->source;
1192 t1->source = th->dest;
1193 t1->doff = tot_len/4;
1194 t1->seq = htonl(seq);
1195 t1->ack_seq = htonl(ack);
1197 t1->window = htons(win);
1200 u32 *ptr = (u32*)(t1 + 1);
1201 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1202 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1203 *ptr++ = htonl(tcp_time_stamp);
1207 buff->csum = csum_partial((char *)t1, tot_len, 0);
1209 memset(&fl, 0, sizeof(fl));
1210 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1211 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1213 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1214 tot_len, IPPROTO_TCP,
1217 fl.proto = IPPROTO_TCP;
1218 fl.oif = inet6_iif(skb);
1219 fl.fl_ip_dport = t1->dest;
1220 fl.fl_ip_sport = t1->source;
1222 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1223 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1224 dst_release(buff->dst);
1227 ip6_xmit(NULL, buff, &fl, NULL, 0);
1228 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1235 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1237 struct inet_timewait_sock *tw = inet_twsk(sk);
1238 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1240 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1241 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1242 tcptw->tw_ts_recent);
1247 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1249 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1253 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1255 struct request_sock *req, **prev;
1256 const struct tcphdr *th = skb->h.th;
1259 /* Find possible connection requests. */
1260 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1261 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1263 return tcp_check_req(sk, skb, req, prev);
1265 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1266 th->source, &skb->nh.ipv6h->daddr,
1267 ntohs(th->dest), inet6_iif(skb));
1270 if (nsk->sk_state != TCP_TIME_WAIT) {
1274 inet_twsk_put((struct inet_timewait_sock *)nsk);
1278 #if 0 /*def CONFIG_SYN_COOKIES*/
1279 if (!th->rst && !th->syn && th->ack)
1280 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1285 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1287 struct inet_connection_sock *icsk = inet_csk(sk);
1288 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1289 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1291 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1292 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1296 /* FIXME: this is substantially similar to the ipv4 code.
1297 * Can some kind of merge be done? -- erics
1299 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1301 struct tcp6_request_sock *treq;
1302 struct ipv6_pinfo *np = inet6_sk(sk);
1303 struct tcp_options_received tmp_opt;
1304 struct tcp_sock *tp = tcp_sk(sk);
1305 struct request_sock *req = NULL;
1306 __u32 isn = TCP_SKB_CB(skb)->when;
1308 if (skb->protocol == htons(ETH_P_IP))
1309 return tcp_v4_conn_request(sk, skb);
1311 if (!ipv6_unicast_destination(skb))
1315 * There are no SYN attacks on IPv6, yet...
1317 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1318 if (net_ratelimit())
1319 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1323 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1326 req = reqsk_alloc(&tcp6_request_sock_ops);
1330 tcp_clear_options(&tmp_opt);
1331 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1332 tmp_opt.user_mss = tp->rx_opt.user_mss;
1334 tcp_parse_options(skb, &tmp_opt, 0);
1336 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1337 tcp_openreq_init(req, &tmp_opt, skb);
1339 treq = tcp6_rsk(req);
1340 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1341 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1342 TCP_ECN_create_request(req, skb->h.th);
1343 treq->pktopts = NULL;
1344 if (ipv6_opt_accepted(sk, skb) ||
1345 np->rxopt.bits.rxinfo ||
1346 np->rxopt.bits.rxhlim) {
1347 atomic_inc(&skb->users);
1348 treq->pktopts = skb;
1350 treq->iif = sk->sk_bound_dev_if;
1352 /* So that link locals have meaning */
1353 if (!sk->sk_bound_dev_if &&
1354 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1355 treq->iif = inet6_iif(skb);
1358 isn = tcp_v6_init_sequence(sk,skb);
1360 tcp_rsk(req)->snt_isn = isn;
1362 if (tcp_v6_send_synack(sk, req, NULL))
1365 tcp_v6_synq_add(sk, req);
1373 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1374 return 0; /* don't send reset */
1377 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1378 struct request_sock *req,
1379 struct dst_entry *dst)
1381 struct tcp6_request_sock *treq = tcp6_rsk(req);
1382 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1383 struct tcp6_sock *newtcp6sk;
1384 struct inet_sock *newinet;
1385 struct tcp_sock *newtp;
1387 struct ipv6_txoptions *opt;
1389 if (skb->protocol == htons(ETH_P_IP)) {
1394 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1399 newtcp6sk = (struct tcp6_sock *)newsk;
1400 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1402 newinet = inet_sk(newsk);
1403 newnp = inet6_sk(newsk);
1404 newtp = tcp_sk(newsk);
1406 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1408 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1411 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1414 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1416 newtp->af_specific = &ipv6_mapped;
1417 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1418 newnp->pktoptions = NULL;
1420 newnp->mcast_oif = inet6_iif(skb);
1421 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1424 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1425 * here, tcp_create_openreq_child now does this for us, see the comment in
1426 * that function for the gory details. -acme
1429 /* It is tricky place. Until this moment IPv4 tcp
1430 worked with IPv6 af_tcp.af_specific.
1433 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1440 if (sk_acceptq_is_full(sk))
1443 if (np->rxopt.bits.srcrt == 2 &&
1444 opt == NULL && treq->pktopts) {
1445 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1447 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1451 struct in6_addr *final_p = NULL, final;
1454 memset(&fl, 0, sizeof(fl));
1455 fl.proto = IPPROTO_TCP;
1456 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1457 if (opt && opt->srcrt) {
1458 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1459 ipv6_addr_copy(&final, &fl.fl6_dst);
1460 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1463 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1464 fl.oif = sk->sk_bound_dev_if;
1465 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1466 fl.fl_ip_sport = inet_sk(sk)->sport;
1468 if (ip6_dst_lookup(sk, &dst, &fl))
1472 ipv6_addr_copy(&fl.fl6_dst, final_p);
1474 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1478 newsk = tcp_create_openreq_child(sk, req, skb);
1483 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1484 * count here, tcp_create_openreq_child now does this for us, see the
1485 * comment in that function for the gory details. -acme
1488 ip6_dst_store(newsk, dst, NULL);
1489 newsk->sk_route_caps = dst->dev->features &
1490 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1492 newtcp6sk = (struct tcp6_sock *)newsk;
1493 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1495 newtp = tcp_sk(newsk);
1496 newinet = inet_sk(newsk);
1497 newnp = inet6_sk(newsk);
1499 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1501 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1502 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1503 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1504 newsk->sk_bound_dev_if = treq->iif;
1506 /* Now IPv6 options...
1508 First: no IPv4 options.
1510 newinet->opt = NULL;
1513 newnp->rxopt.all = np->rxopt.all;
1515 /* Clone pktoptions received with SYN */
1516 newnp->pktoptions = NULL;
1517 if (treq->pktopts != NULL) {
1518 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1519 kfree_skb(treq->pktopts);
1520 treq->pktopts = NULL;
1521 if (newnp->pktoptions)
1522 skb_set_owner_r(newnp->pktoptions, newsk);
1525 newnp->mcast_oif = inet6_iif(skb);
1526 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1528 /* Clone native IPv6 options from listening socket (if any)
1530 Yes, keeping reference count would be much more clever,
1531 but we make one more one thing there: reattach optmem
1535 newnp->opt = ipv6_dup_options(newsk, opt);
1537 sock_kfree_s(sk, opt, opt->tot_len);
1540 newtp->ext_header_len = 0;
1542 newtp->ext_header_len = newnp->opt->opt_nflen +
1543 newnp->opt->opt_flen;
1545 tcp_sync_mss(newsk, dst_mtu(dst));
1546 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1547 tcp_initialize_rcv_mss(newsk);
1549 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1551 __tcp_v6_hash(newsk);
1552 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1557 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1559 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1560 if (opt && opt != np->opt)
1561 sock_kfree_s(sk, opt, opt->tot_len);
1566 static int tcp_v6_checksum_init(struct sk_buff *skb)
1568 if (skb->ip_summed == CHECKSUM_HW) {
1569 skb->ip_summed = CHECKSUM_UNNECESSARY;
1570 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1571 &skb->nh.ipv6h->daddr,skb->csum))
1573 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1575 if (skb->len <= 76) {
1576 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1577 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1579 skb->ip_summed = CHECKSUM_UNNECESSARY;
1581 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1582 &skb->nh.ipv6h->daddr,0);
1587 /* The socket must have it's spinlock held when we get
1590 * We have a potential double-lock case here, so even when
1591 * doing backlog processing we use the BH locking scheme.
1592 * This is because we cannot sleep with the original spinlock
1595 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1597 struct ipv6_pinfo *np = inet6_sk(sk);
1598 struct tcp_sock *tp;
1599 struct sk_buff *opt_skb = NULL;
1601 /* Imagine: socket is IPv6. IPv4 packet arrives,
1602 goes to IPv4 receive handler and backlogged.
1603 From backlog it always goes here. Kerboom...
1604 Fortunately, tcp_rcv_established and rcv_established
1605 handle them correctly, but it is not case with
1606 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1609 if (skb->protocol == htons(ETH_P_IP))
1610 return tcp_v4_do_rcv(sk, skb);
1612 if (sk_filter(sk, skb, 0))
1616 * socket locking is here for SMP purposes as backlog rcv
1617 * is currently called with bh processing disabled.
1620 /* Do Stevens' IPV6_PKTOPTIONS.
1622 Yes, guys, it is the only place in our code, where we
1623 may make it not affecting IPv4.
1624 The rest of code is protocol independent,
1625 and I do not like idea to uglify IPv4.
1627 Actually, all the idea behind IPV6_PKTOPTIONS
1628 looks not very well thought. For now we latch
1629 options, received in the last packet, enqueued
1630 by tcp. Feel free to propose better solution.
1634 opt_skb = skb_clone(skb, GFP_ATOMIC);
1636 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1637 TCP_CHECK_TIMER(sk);
1638 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1640 TCP_CHECK_TIMER(sk);
1642 goto ipv6_pktoptions;
1646 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1649 if (sk->sk_state == TCP_LISTEN) {
1650 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1655 * Queue it on the new socket if the new socket is active,
1656 * otherwise we just shortcircuit this and continue with
1660 if (tcp_child_process(sk, nsk, skb))
1663 __kfree_skb(opt_skb);
1668 TCP_CHECK_TIMER(sk);
1669 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1671 TCP_CHECK_TIMER(sk);
1673 goto ipv6_pktoptions;
1677 tcp_v6_send_reset(skb);
1680 __kfree_skb(opt_skb);
1684 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1689 /* Do you ask, what is it?
1691 1. skb was enqueued by tcp.
1692 2. skb is added to tail of read queue, rather than out of order.
1693 3. socket is not in passive state.
1694 4. Finally, it really contains options, which user wants to receive.
1697 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1698 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1699 if (np->rxopt.bits.rxinfo)
1700 np->mcast_oif = inet6_iif(opt_skb);
1701 if (np->rxopt.bits.rxhlim)
1702 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1703 if (ipv6_opt_accepted(sk, opt_skb)) {
1704 skb_set_owner_r(opt_skb, sk);
1705 opt_skb = xchg(&np->pktoptions, opt_skb);
1707 __kfree_skb(opt_skb);
1708 opt_skb = xchg(&np->pktoptions, NULL);
1717 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1719 struct sk_buff *skb = *pskb;
1724 if (skb->pkt_type != PACKET_HOST)
1728 * Count it even if it's bad.
1730 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1732 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1737 if (th->doff < sizeof(struct tcphdr)/4)
1739 if (!pskb_may_pull(skb, th->doff*4))
1742 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1743 tcp_v6_checksum_init(skb) < 0))
1747 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1748 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1749 skb->len - th->doff*4);
1750 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1751 TCP_SKB_CB(skb)->when = 0;
1752 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1753 TCP_SKB_CB(skb)->sacked = 0;
1755 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1756 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1763 if (sk->sk_state == TCP_TIME_WAIT)
1766 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1767 goto discard_and_relse;
1769 if (sk_filter(sk, skb, 0))
1770 goto discard_and_relse;
1776 if (!sock_owned_by_user(sk)) {
1777 if (!tcp_prequeue(sk, skb))
1778 ret = tcp_v6_do_rcv(sk, skb);
1780 sk_add_backlog(sk, skb);
1784 return ret ? -1 : 0;
1787 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1790 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1792 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1794 tcp_v6_send_reset(skb);
1811 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1812 inet_twsk_put((struct inet_timewait_sock *)sk);
1816 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1817 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1818 inet_twsk_put((struct inet_timewait_sock *)sk);
1822 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1828 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1829 &skb->nh.ipv6h->daddr,
1830 ntohs(th->dest), inet6_iif(skb));
1832 struct inet_timewait_sock *tw = inet_twsk(sk);
1833 inet_twsk_deschedule(tw, &tcp_death_row);
1838 /* Fall through to ACK */
1841 tcp_v6_timewait_ack(sk, skb);
1845 case TCP_TW_SUCCESS:;
1850 static int tcp_v6_rebuild_header(struct sock *sk)
1853 struct dst_entry *dst;
1854 struct ipv6_pinfo *np = inet6_sk(sk);
1856 dst = __sk_dst_check(sk, np->dst_cookie);
1859 struct inet_sock *inet = inet_sk(sk);
1860 struct in6_addr *final_p = NULL, final;
1863 memset(&fl, 0, sizeof(fl));
1864 fl.proto = IPPROTO_TCP;
1865 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1866 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1867 fl.fl6_flowlabel = np->flow_label;
1868 fl.oif = sk->sk_bound_dev_if;
1869 fl.fl_ip_dport = inet->dport;
1870 fl.fl_ip_sport = inet->sport;
1872 if (np->opt && np->opt->srcrt) {
1873 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1874 ipv6_addr_copy(&final, &fl.fl6_dst);
1875 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1879 err = ip6_dst_lookup(sk, &dst, &fl);
1881 sk->sk_route_caps = 0;
1885 ipv6_addr_copy(&fl.fl6_dst, final_p);
1887 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1888 sk->sk_err_soft = -err;
1893 ip6_dst_store(sk, dst, NULL);
1894 sk->sk_route_caps = dst->dev->features &
1895 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1901 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1903 struct sock *sk = skb->sk;
1904 struct inet_sock *inet = inet_sk(sk);
1905 struct ipv6_pinfo *np = inet6_sk(sk);
1907 struct dst_entry *dst;
1908 struct in6_addr *final_p = NULL, final;
1910 memset(&fl, 0, sizeof(fl));
1911 fl.proto = IPPROTO_TCP;
1912 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1913 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1914 fl.fl6_flowlabel = np->flow_label;
1915 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1916 fl.oif = sk->sk_bound_dev_if;
1917 fl.fl_ip_sport = inet->sport;
1918 fl.fl_ip_dport = inet->dport;
1920 if (np->opt && np->opt->srcrt) {
1921 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1922 ipv6_addr_copy(&final, &fl.fl6_dst);
1923 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1927 dst = __sk_dst_check(sk, np->dst_cookie);
1930 int err = ip6_dst_lookup(sk, &dst, &fl);
1933 sk->sk_err_soft = -err;
1938 ipv6_addr_copy(&fl.fl6_dst, final_p);
1940 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1941 sk->sk_route_caps = 0;
1946 ip6_dst_store(sk, dst, NULL);
1947 sk->sk_route_caps = dst->dev->features &
1948 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1951 skb->dst = dst_clone(dst);
1953 /* Restore final destination back after routing done */
1954 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1956 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1959 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1961 struct ipv6_pinfo *np = inet6_sk(sk);
1962 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1964 sin6->sin6_family = AF_INET6;
1965 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1966 sin6->sin6_port = inet_sk(sk)->dport;
1967 /* We do not store received flowlabel for TCP */
1968 sin6->sin6_flowinfo = 0;
1969 sin6->sin6_scope_id = 0;
1970 if (sk->sk_bound_dev_if &&
1971 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1972 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1975 static int tcp_v6_remember_stamp(struct sock *sk)
1977 /* Alas, not yet... */
1981 static struct tcp_func ipv6_specific = {
1982 .queue_xmit = tcp_v6_xmit,
1983 .send_check = tcp_v6_send_check,
1984 .rebuild_header = tcp_v6_rebuild_header,
1985 .conn_request = tcp_v6_conn_request,
1986 .syn_recv_sock = tcp_v6_syn_recv_sock,
1987 .remember_stamp = tcp_v6_remember_stamp,
1988 .net_header_len = sizeof(struct ipv6hdr),
1990 .setsockopt = ipv6_setsockopt,
1991 .getsockopt = ipv6_getsockopt,
1992 .addr2sockaddr = v6_addr2sockaddr,
1993 .sockaddr_len = sizeof(struct sockaddr_in6)
1997 * TCP over IPv4 via INET6 API
2000 static struct tcp_func ipv6_mapped = {
2001 .queue_xmit = ip_queue_xmit,
2002 .send_check = tcp_v4_send_check,
2003 .rebuild_header = inet_sk_rebuild_header,
2004 .conn_request = tcp_v6_conn_request,
2005 .syn_recv_sock = tcp_v6_syn_recv_sock,
2006 .remember_stamp = tcp_v4_remember_stamp,
2007 .net_header_len = sizeof(struct iphdr),
2009 .setsockopt = ipv6_setsockopt,
2010 .getsockopt = ipv6_getsockopt,
2011 .addr2sockaddr = v6_addr2sockaddr,
2012 .sockaddr_len = sizeof(struct sockaddr_in6)
2017 /* NOTE: A lot of things set to zero explicitly by call to
2018 * sk_alloc() so need not be done here.
2020 static int tcp_v6_init_sock(struct sock *sk)
2022 struct inet_connection_sock *icsk = inet_csk(sk);
2023 struct tcp_sock *tp = tcp_sk(sk);
2025 skb_queue_head_init(&tp->out_of_order_queue);
2026 tcp_init_xmit_timers(sk);
2027 tcp_prequeue_init(tp);
2029 icsk->icsk_rto = TCP_TIMEOUT_INIT;
2030 tp->mdev = TCP_TIMEOUT_INIT;
2032 /* So many TCP implementations out there (incorrectly) count the
2033 * initial SYN frame in their delayed-ACK and congestion control
2034 * algorithms that we must have the following bandaid to talk
2035 * efficiently to them. -DaveM
2039 /* See draft-stevens-tcpca-spec-01 for discussion of the
2040 * initialization of these values.
2042 tp->snd_ssthresh = 0x7fffffff;
2043 tp->snd_cwnd_clamp = ~0;
2044 tp->mss_cache = 536;
2046 tp->reordering = sysctl_tcp_reordering;
2048 sk->sk_state = TCP_CLOSE;
2050 tp->af_specific = &ipv6_specific;
2051 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
2052 sk->sk_write_space = sk_stream_write_space;
2053 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2055 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2056 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2058 atomic_inc(&tcp_sockets_allocated);
2063 static int tcp_v6_destroy_sock(struct sock *sk)
2065 extern int tcp_v4_destroy_sock(struct sock *sk);
2067 tcp_v4_destroy_sock(sk);
2068 return inet6_destroy_sock(sk);
2071 /* Proc filesystem TCPv6 sock list dumping. */
2072 static void get_openreq6(struct seq_file *seq,
2073 struct sock *sk, struct request_sock *req, int i, int uid)
2075 struct in6_addr *dest, *src;
2076 int ttd = req->expires - jiffies;
2081 src = &tcp6_rsk(req)->loc_addr;
2082 dest = &tcp6_rsk(req)->rmt_addr;
2084 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2085 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2087 src->s6_addr32[0], src->s6_addr32[1],
2088 src->s6_addr32[2], src->s6_addr32[3],
2089 ntohs(inet_sk(sk)->sport),
2090 dest->s6_addr32[0], dest->s6_addr32[1],
2091 dest->s6_addr32[2], dest->s6_addr32[3],
2092 ntohs(inet_rsk(req)->rmt_port),
2094 0,0, /* could print option size, but that is af dependent. */
2095 1, /* timers active (only the expire timer) */
2096 jiffies_to_clock_t(ttd),
2099 0, /* non standard timer */
2100 0, /* open_requests have no inode */
2104 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2106 struct in6_addr *dest, *src;
2109 unsigned long timer_expires;
2110 struct inet_sock *inet = inet_sk(sp);
2111 struct tcp_sock *tp = tcp_sk(sp);
2112 const struct inet_connection_sock *icsk = inet_csk(sp);
2113 struct ipv6_pinfo *np = inet6_sk(sp);
2116 src = &np->rcv_saddr;
2117 destp = ntohs(inet->dport);
2118 srcp = ntohs(inet->sport);
2120 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2122 timer_expires = icsk->icsk_timeout;
2123 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2125 timer_expires = icsk->icsk_timeout;
2126 } else if (timer_pending(&sp->sk_timer)) {
2128 timer_expires = sp->sk_timer.expires;
2131 timer_expires = jiffies;
2135 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2136 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2138 src->s6_addr32[0], src->s6_addr32[1],
2139 src->s6_addr32[2], src->s6_addr32[3], srcp,
2140 dest->s6_addr32[0], dest->s6_addr32[1],
2141 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2143 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2145 jiffies_to_clock_t(timer_expires - jiffies),
2146 icsk->icsk_retransmits,
2148 icsk->icsk_probes_out,
2150 atomic_read(&sp->sk_refcnt), sp,
2153 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
2154 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2158 static void get_timewait6_sock(struct seq_file *seq,
2159 struct inet_timewait_sock *tw, int i)
2161 struct in6_addr *dest, *src;
2163 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2164 int ttd = tw->tw_ttd - jiffies;
2169 dest = &tcp6tw->tw_v6_daddr;
2170 src = &tcp6tw->tw_v6_rcv_saddr;
2171 destp = ntohs(tw->tw_dport);
2172 srcp = ntohs(tw->tw_sport);
2175 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2176 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2178 src->s6_addr32[0], src->s6_addr32[1],
2179 src->s6_addr32[2], src->s6_addr32[3], srcp,
2180 dest->s6_addr32[0], dest->s6_addr32[1],
2181 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2182 tw->tw_substate, 0, 0,
2183 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2184 atomic_read(&tw->tw_refcnt), tw);
2187 #ifdef CONFIG_PROC_FS
2188 static int tcp6_seq_show(struct seq_file *seq, void *v)
2190 struct tcp_iter_state *st;
2192 if (v == SEQ_START_TOKEN) {
2197 "st tx_queue rx_queue tr tm->when retrnsmt"
2198 " uid timeout inode\n");
2203 switch (st->state) {
2204 case TCP_SEQ_STATE_LISTENING:
2205 case TCP_SEQ_STATE_ESTABLISHED:
2206 get_tcp6_sock(seq, v, st->num);
2208 case TCP_SEQ_STATE_OPENREQ:
2209 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2211 case TCP_SEQ_STATE_TIME_WAIT:
2212 get_timewait6_sock(seq, v, st->num);
2219 static struct file_operations tcp6_seq_fops;
2220 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2221 .owner = THIS_MODULE,
2224 .seq_show = tcp6_seq_show,
2225 .seq_fops = &tcp6_seq_fops,
2228 int __init tcp6_proc_init(void)
2230 return tcp_proc_register(&tcp6_seq_afinfo);
2233 void tcp6_proc_exit(void)
2235 tcp_proc_unregister(&tcp6_seq_afinfo);
2239 struct proto tcpv6_prot = {
2241 .owner = THIS_MODULE,
2243 .connect = tcp_v6_connect,
2244 .disconnect = tcp_disconnect,
2245 .accept = inet_csk_accept,
2247 .init = tcp_v6_init_sock,
2248 .destroy = tcp_v6_destroy_sock,
2249 .shutdown = tcp_shutdown,
2250 .setsockopt = tcp_setsockopt,
2251 .getsockopt = tcp_getsockopt,
2252 .sendmsg = tcp_sendmsg,
2253 .recvmsg = tcp_recvmsg,
2254 .backlog_rcv = tcp_v6_do_rcv,
2255 .hash = tcp_v6_hash,
2256 .unhash = tcp_unhash,
2257 .get_port = tcp_v6_get_port,
2258 .enter_memory_pressure = tcp_enter_memory_pressure,
2259 .sockets_allocated = &tcp_sockets_allocated,
2260 .memory_allocated = &tcp_memory_allocated,
2261 .memory_pressure = &tcp_memory_pressure,
2262 .orphan_count = &tcp_orphan_count,
2263 .sysctl_mem = sysctl_tcp_mem,
2264 .sysctl_wmem = sysctl_tcp_wmem,
2265 .sysctl_rmem = sysctl_tcp_rmem,
2266 .max_header = MAX_TCP_HEADER,
2267 .obj_size = sizeof(struct tcp6_sock),
2268 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2269 .rsk_prot = &tcp6_request_sock_ops,
2272 static struct inet6_protocol tcpv6_protocol = {
2273 .handler = tcp_v6_rcv,
2274 .err_handler = tcp_v6_err,
2275 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2278 extern struct proto_ops inet6_stream_ops;
2280 static struct inet_protosw tcpv6_protosw = {
2281 .type = SOCK_STREAM,
2282 .protocol = IPPROTO_TCP,
2283 .prot = &tcpv6_prot,
2284 .ops = &inet6_stream_ops,
2287 .flags = INET_PROTOSW_PERMANENT,
2290 void __init tcpv6_init(void)
2292 /* register inet6 protocol */
2293 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2294 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2295 inet6_register_protosw(&tcpv6_protosw);