2 * Copyright (c) 2013 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
60 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 return hash_32((__force u32)key ^ (__force u32)remote,
72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
73 struct dst_entry *dst, __be32 saddr)
75 struct dst_entry *old_dst;
78 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
83 static noinline void tunnel_dst_set(struct ip_tunnel *t,
84 struct dst_entry *dst, __be32 saddr)
86 __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
89 static void tunnel_dst_reset(struct ip_tunnel *t)
91 tunnel_dst_set(t, NULL, 0);
94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
98 for_each_possible_cpu(i)
99 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104 u32 cookie, __be32 *saddr)
106 struct ip_tunnel_dst *idst;
107 struct dst_entry *dst;
110 idst = raw_cpu_ptr(t->dst_cache);
111 dst = rcu_dereference(idst->dst);
112 if (dst && !atomic_inc_not_zero(&dst->__refcnt))
115 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116 *saddr = idst->saddr;
124 return (struct rtable *)dst;
127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128 __be16 flags, __be32 key)
130 if (p->i_flags & TUNNEL_KEY) {
131 if (flags & TUNNEL_KEY)
132 return key == p->i_key;
134 /* key expected, none present */
137 return !(flags & TUNNEL_KEY);
140 /* Fallback tunnel: no source, no destination, no key, no options
143 We require exact key match i.e. if a key is present in packet
144 it will match only tunnel with the same key; if it is not present,
145 it will match only keyless tunnel.
147 All keysless packets, if not matched configured keyless tunnels
148 will match fallback tunnel.
149 Given src, dst and key, find appropriate for input tunnel.
151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152 int link, __be16 flags,
153 __be32 remote, __be32 local,
157 struct ip_tunnel *t, *cand = NULL;
158 struct hlist_head *head;
160 hash = ip_tunnel_hash(key, remote);
161 head = &itn->tunnels[hash];
163 hlist_for_each_entry_rcu(t, head, hash_node) {
164 if (local != t->parms.iph.saddr ||
165 remote != t->parms.iph.daddr ||
166 !(t->dev->flags & IFF_UP))
169 if (!ip_tunnel_key_match(&t->parms, flags, key))
172 if (t->parms.link == link)
178 hlist_for_each_entry_rcu(t, head, hash_node) {
179 if (remote != t->parms.iph.daddr ||
180 t->parms.iph.saddr != 0 ||
181 !(t->dev->flags & IFF_UP))
184 if (!ip_tunnel_key_match(&t->parms, flags, key))
187 if (t->parms.link == link)
193 hash = ip_tunnel_hash(key, 0);
194 head = &itn->tunnels[hash];
196 hlist_for_each_entry_rcu(t, head, hash_node) {
197 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
201 if (!(t->dev->flags & IFF_UP))
204 if (!ip_tunnel_key_match(&t->parms, flags, key))
207 if (t->parms.link == link)
213 if (flags & TUNNEL_NO_KEY)
214 goto skip_key_lookup;
216 hlist_for_each_entry_rcu(t, head, hash_node) {
217 if (t->parms.i_key != key ||
218 t->parms.iph.saddr != 0 ||
219 t->parms.iph.daddr != 0 ||
220 !(t->dev->flags & IFF_UP))
223 if (t->parms.link == link)
233 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234 return netdev_priv(itn->fb_tunnel_dev);
239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242 struct ip_tunnel_parm *parms)
246 __be32 i_key = parms->i_key;
248 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249 remote = parms->iph.daddr;
253 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
256 h = ip_tunnel_hash(i_key, remote);
257 return &itn->tunnels[h];
260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
262 struct hlist_head *head = ip_bucket(itn, &t->parms);
264 hlist_add_head_rcu(&t->hash_node, head);
267 static void ip_tunnel_del(struct ip_tunnel *t)
269 hlist_del_init_rcu(&t->hash_node);
272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273 struct ip_tunnel_parm *parms,
276 __be32 remote = parms->iph.daddr;
277 __be32 local = parms->iph.saddr;
278 __be32 key = parms->i_key;
279 __be16 flags = parms->i_flags;
280 int link = parms->link;
281 struct ip_tunnel *t = NULL;
282 struct hlist_head *head = ip_bucket(itn, parms);
284 hlist_for_each_entry_rcu(t, head, hash_node) {
285 if (local == t->parms.iph.saddr &&
286 remote == t->parms.iph.daddr &&
287 link == t->parms.link &&
288 type == t->dev->type &&
289 ip_tunnel_key_match(&t->parms, flags, key))
295 static struct net_device *__ip_tunnel_create(struct net *net,
296 const struct rtnl_link_ops *ops,
297 struct ip_tunnel_parm *parms)
300 struct ip_tunnel *tunnel;
301 struct net_device *dev;
305 strlcpy(name, parms->name, IFNAMSIZ);
307 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
311 strlcpy(name, ops->kind, IFNAMSIZ);
312 strncat(name, "%d", 2);
316 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
321 dev_net_set(dev, net);
323 dev->rtnl_link_ops = ops;
325 tunnel = netdev_priv(dev);
326 tunnel->parms = *parms;
329 err = register_netdevice(dev);
341 static inline void init_tunnel_flow(struct flowi4 *fl4,
343 __be32 daddr, __be32 saddr,
344 __be32 key, __u8 tos, int oif)
346 memset(fl4, 0, sizeof(*fl4));
347 fl4->flowi4_oif = oif;
350 fl4->flowi4_tos = tos;
351 fl4->flowi4_proto = proto;
352 fl4->fl4_gre_key = key;
355 static int ip_tunnel_bind_dev(struct net_device *dev)
357 struct net_device *tdev = NULL;
358 struct ip_tunnel *tunnel = netdev_priv(dev);
359 const struct iphdr *iph;
360 int hlen = LL_MAX_HEADER;
361 int mtu = ETH_DATA_LEN;
362 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
364 iph = &tunnel->parms.iph;
366 /* Guess output device to choose reasonable mtu and needed_headroom */
371 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372 iph->saddr, tunnel->parms.o_key,
373 RT_TOS(iph->tos), tunnel->parms.link);
374 rt = ip_route_output_key(tunnel->net, &fl4);
378 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
381 if (dev->type != ARPHRD_ETHER)
382 dev->flags |= IFF_POINTOPOINT;
385 if (!tdev && tunnel->parms.link)
386 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
389 hlen = tdev->hard_header_len + tdev->needed_headroom;
392 dev->iflink = tunnel->parms.link;
394 dev->needed_headroom = t_hlen + hlen;
395 mtu -= (dev->hard_header_len + t_hlen);
403 static struct ip_tunnel *ip_tunnel_create(struct net *net,
404 struct ip_tunnel_net *itn,
405 struct ip_tunnel_parm *parms)
407 struct ip_tunnel *nt;
408 struct net_device *dev;
410 BUG_ON(!itn->fb_tunnel_dev);
411 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
413 return ERR_CAST(dev);
415 dev->mtu = ip_tunnel_bind_dev(dev);
417 nt = netdev_priv(dev);
418 ip_tunnel_add(itn, nt);
422 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
423 const struct tnl_ptk_info *tpi, bool log_ecn_error)
425 struct pcpu_sw_netstats *tstats;
426 const struct iphdr *iph = ip_hdr(skb);
429 #ifdef CONFIG_NET_IPGRE_BROADCAST
430 if (ipv4_is_multicast(iph->daddr)) {
431 tunnel->dev->stats.multicast++;
432 skb->pkt_type = PACKET_BROADCAST;
436 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
437 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
438 tunnel->dev->stats.rx_crc_errors++;
439 tunnel->dev->stats.rx_errors++;
443 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
444 if (!(tpi->flags&TUNNEL_SEQ) ||
445 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
446 tunnel->dev->stats.rx_fifo_errors++;
447 tunnel->dev->stats.rx_errors++;
450 tunnel->i_seqno = ntohl(tpi->seq) + 1;
453 skb_reset_network_header(skb);
455 err = IP_ECN_decapsulate(iph, skb);
458 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
459 &iph->saddr, iph->tos);
461 ++tunnel->dev->stats.rx_frame_errors;
462 ++tunnel->dev->stats.rx_errors;
467 tstats = this_cpu_ptr(tunnel->dev->tstats);
468 u64_stats_update_begin(&tstats->syncp);
469 tstats->rx_packets++;
470 tstats->rx_bytes += skb->len;
471 u64_stats_update_end(&tstats->syncp);
473 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
475 if (tunnel->dev->type == ARPHRD_ETHER) {
476 skb->protocol = eth_type_trans(skb, tunnel->dev);
477 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
479 skb->dev = tunnel->dev;
482 gro_cells_receive(&tunnel->gro_cells, skb);
489 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
491 static int ip_encap_hlen(struct ip_tunnel_encap *e)
494 case TUNNEL_ENCAP_NONE:
496 case TUNNEL_ENCAP_FOU:
497 return sizeof(struct udphdr);
503 int ip_tunnel_encap_setup(struct ip_tunnel *t,
504 struct ip_tunnel_encap *ipencap)
508 memset(&t->encap, 0, sizeof(t->encap));
510 hlen = ip_encap_hlen(ipencap);
514 t->encap.type = ipencap->type;
515 t->encap.sport = ipencap->sport;
516 t->encap.dport = ipencap->dport;
517 t->encap.flags = ipencap->flags;
519 t->encap_hlen = hlen;
520 t->hlen = t->encap_hlen + t->tun_hlen;
524 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
526 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
527 size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
531 bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
532 int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
534 skb = iptunnel_handle_offloads(skb, csum, type);
539 /* Get length and hash before making space in skb */
541 sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
544 skb_push(skb, hdr_len);
546 skb_reset_transport_header(skb);
551 uh->len = htons(skb->len);
553 udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
554 fl4->saddr, fl4->daddr, skb->len);
556 *protocol = IPPROTO_UDP;
561 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
562 u8 *protocol, struct flowi4 *fl4)
564 switch (t->encap.type) {
565 case TUNNEL_ENCAP_NONE:
567 case TUNNEL_ENCAP_FOU:
568 return fou_build_header(skb, &t->encap, t->encap_hlen,
574 EXPORT_SYMBOL(ip_tunnel_encap);
576 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
577 struct rtable *rt, __be16 df)
579 struct ip_tunnel *tunnel = netdev_priv(dev);
580 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
584 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
585 - sizeof(struct iphdr) - tunnel->hlen;
587 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
590 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
592 if (skb->protocol == htons(ETH_P_IP)) {
593 if (!skb_is_gso(skb) &&
594 (df & htons(IP_DF)) && mtu < pkt_size) {
595 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
596 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
600 #if IS_ENABLED(CONFIG_IPV6)
601 else if (skb->protocol == htons(ETH_P_IPV6)) {
602 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
604 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
605 mtu >= IPV6_MIN_MTU) {
606 if ((tunnel->parms.iph.daddr &&
607 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
608 rt6->rt6i_dst.plen == 128) {
609 rt6->rt6i_flags |= RTF_MODIFIED;
610 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
614 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
616 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
624 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
625 const struct iphdr *tnl_params, u8 protocol)
627 struct ip_tunnel *tunnel = netdev_priv(dev);
628 const struct iphdr *inner_iph;
632 struct rtable *rt; /* Route to the other host */
633 unsigned int max_headroom; /* The extra header space needed */
638 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
639 connected = (tunnel->parms.iph.daddr != 0);
641 dst = tnl_params->daddr;
645 if (skb_dst(skb) == NULL) {
646 dev->stats.tx_fifo_errors++;
650 if (skb->protocol == htons(ETH_P_IP)) {
651 rt = skb_rtable(skb);
652 dst = rt_nexthop(rt, inner_iph->daddr);
654 #if IS_ENABLED(CONFIG_IPV6)
655 else if (skb->protocol == htons(ETH_P_IPV6)) {
656 const struct in6_addr *addr6;
657 struct neighbour *neigh;
658 bool do_tx_error_icmp;
661 neigh = dst_neigh_lookup(skb_dst(skb),
662 &ipv6_hdr(skb)->daddr);
666 addr6 = (const struct in6_addr *)&neigh->primary_key;
667 addr_type = ipv6_addr_type(addr6);
669 if (addr_type == IPV6_ADDR_ANY) {
670 addr6 = &ipv6_hdr(skb)->daddr;
671 addr_type = ipv6_addr_type(addr6);
674 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
675 do_tx_error_icmp = true;
677 do_tx_error_icmp = false;
678 dst = addr6->s6_addr32[3];
680 neigh_release(neigh);
681 if (do_tx_error_icmp)
691 tos = tnl_params->tos;
694 if (skb->protocol == htons(ETH_P_IP)) {
695 tos = inner_iph->tos;
697 } else if (skb->protocol == htons(ETH_P_IPV6)) {
698 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
703 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
704 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
706 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
709 rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
712 rt = ip_route_output_key(tunnel->net, &fl4);
715 dev->stats.tx_carrier_errors++;
719 tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
722 if (rt->dst.dev == dev) {
724 dev->stats.collisions++;
728 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
733 if (tunnel->err_count > 0) {
734 if (time_before(jiffies,
735 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
738 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
739 dst_link_failure(skb);
741 tunnel->err_count = 0;
744 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
745 ttl = tnl_params->ttl;
747 if (skb->protocol == htons(ETH_P_IP))
748 ttl = inner_iph->ttl;
749 #if IS_ENABLED(CONFIG_IPV6)
750 else if (skb->protocol == htons(ETH_P_IPV6))
751 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
754 ttl = ip4_dst_hoplimit(&rt->dst);
757 df = tnl_params->frag_off;
758 if (skb->protocol == htons(ETH_P_IP))
759 df |= (inner_iph->frag_off&htons(IP_DF));
761 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
762 + rt->dst.header_len;
763 if (max_headroom > dev->needed_headroom)
764 dev->needed_headroom = max_headroom;
766 if (skb_cow_head(skb, dev->needed_headroom)) {
768 dev->stats.tx_dropped++;
773 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
774 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
775 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
779 #if IS_ENABLED(CONFIG_IPV6)
781 dst_link_failure(skb);
784 dev->stats.tx_errors++;
787 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
789 static void ip_tunnel_update(struct ip_tunnel_net *itn,
791 struct net_device *dev,
792 struct ip_tunnel_parm *p,
796 t->parms.iph.saddr = p->iph.saddr;
797 t->parms.iph.daddr = p->iph.daddr;
798 t->parms.i_key = p->i_key;
799 t->parms.o_key = p->o_key;
800 if (dev->type != ARPHRD_ETHER) {
801 memcpy(dev->dev_addr, &p->iph.saddr, 4);
802 memcpy(dev->broadcast, &p->iph.daddr, 4);
804 ip_tunnel_add(itn, t);
806 t->parms.iph.ttl = p->iph.ttl;
807 t->parms.iph.tos = p->iph.tos;
808 t->parms.iph.frag_off = p->iph.frag_off;
810 if (t->parms.link != p->link) {
813 t->parms.link = p->link;
814 mtu = ip_tunnel_bind_dev(dev);
818 ip_tunnel_dst_reset_all(t);
819 netdev_state_change(dev);
822 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
825 struct ip_tunnel *t = netdev_priv(dev);
826 struct net *net = t->net;
827 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
829 BUG_ON(!itn->fb_tunnel_dev);
832 if (dev == itn->fb_tunnel_dev) {
833 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
835 t = netdev_priv(dev);
837 memcpy(p, &t->parms, sizeof(*p));
843 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
846 p->iph.frag_off |= htons(IP_DF);
847 if (!(p->i_flags & VTI_ISVTI)) {
848 if (!(p->i_flags & TUNNEL_KEY))
850 if (!(p->o_flags & TUNNEL_KEY))
854 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
856 if (cmd == SIOCADDTUNNEL) {
858 t = ip_tunnel_create(net, itn, p);
859 err = PTR_ERR_OR_ZERO(t);
866 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
873 unsigned int nflags = 0;
875 if (ipv4_is_multicast(p->iph.daddr))
876 nflags = IFF_BROADCAST;
877 else if (p->iph.daddr)
878 nflags = IFF_POINTOPOINT;
880 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
885 t = netdev_priv(dev);
891 ip_tunnel_update(itn, t, dev, p, true);
899 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
902 if (dev == itn->fb_tunnel_dev) {
904 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
908 if (t == netdev_priv(itn->fb_tunnel_dev))
912 unregister_netdevice(dev);
923 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
925 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
927 struct ip_tunnel *tunnel = netdev_priv(dev);
928 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
931 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
936 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
938 static void ip_tunnel_dev_free(struct net_device *dev)
940 struct ip_tunnel *tunnel = netdev_priv(dev);
942 gro_cells_destroy(&tunnel->gro_cells);
943 free_percpu(tunnel->dst_cache);
944 free_percpu(dev->tstats);
948 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
950 struct ip_tunnel *tunnel = netdev_priv(dev);
951 struct ip_tunnel_net *itn;
953 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
955 if (itn->fb_tunnel_dev != dev) {
956 ip_tunnel_del(netdev_priv(dev));
957 unregister_netdevice_queue(dev, head);
960 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
962 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
963 struct rtnl_link_ops *ops, char *devname)
965 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
966 struct ip_tunnel_parm parms;
969 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
970 INIT_HLIST_HEAD(&itn->tunnels[i]);
973 itn->fb_tunnel_dev = NULL;
977 memset(&parms, 0, sizeof(parms));
979 strlcpy(parms.name, devname, IFNAMSIZ);
982 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
983 /* FB netdevice is special: we have one, and only one per netns.
984 * Allowing to move it to another netns is clearly unsafe.
986 if (!IS_ERR(itn->fb_tunnel_dev)) {
987 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
988 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
989 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
993 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
995 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
997 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
998 struct rtnl_link_ops *ops)
1000 struct net *net = dev_net(itn->fb_tunnel_dev);
1001 struct net_device *dev, *aux;
1004 for_each_netdev_safe(net, dev, aux)
1005 if (dev->rtnl_link_ops == ops)
1006 unregister_netdevice_queue(dev, head);
1008 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1009 struct ip_tunnel *t;
1010 struct hlist_node *n;
1011 struct hlist_head *thead = &itn->tunnels[h];
1013 hlist_for_each_entry_safe(t, n, thead, hash_node)
1014 /* If dev is in the same netns, it has already
1015 * been added to the list by the previous loop.
1017 if (!net_eq(dev_net(t->dev), net))
1018 unregister_netdevice_queue(t->dev, head);
1022 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1027 ip_tunnel_destroy(itn, &list, ops);
1028 unregister_netdevice_many(&list);
1031 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1033 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1034 struct ip_tunnel_parm *p)
1036 struct ip_tunnel *nt;
1037 struct net *net = dev_net(dev);
1038 struct ip_tunnel_net *itn;
1042 nt = netdev_priv(dev);
1043 itn = net_generic(net, nt->ip_tnl_net_id);
1045 if (ip_tunnel_find(itn, p, dev->type))
1050 err = register_netdevice(dev);
1054 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1055 eth_hw_addr_random(dev);
1057 mtu = ip_tunnel_bind_dev(dev);
1061 ip_tunnel_add(itn, nt);
1066 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1068 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1069 struct ip_tunnel_parm *p)
1071 struct ip_tunnel *t;
1072 struct ip_tunnel *tunnel = netdev_priv(dev);
1073 struct net *net = tunnel->net;
1074 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1076 if (dev == itn->fb_tunnel_dev)
1079 t = ip_tunnel_find(itn, p, dev->type);
1087 if (dev->type != ARPHRD_ETHER) {
1088 unsigned int nflags = 0;
1090 if (ipv4_is_multicast(p->iph.daddr))
1091 nflags = IFF_BROADCAST;
1092 else if (p->iph.daddr)
1093 nflags = IFF_POINTOPOINT;
1095 if ((dev->flags ^ nflags) &
1096 (IFF_POINTOPOINT | IFF_BROADCAST))
1101 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1104 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1106 int ip_tunnel_init(struct net_device *dev)
1108 struct ip_tunnel *tunnel = netdev_priv(dev);
1109 struct iphdr *iph = &tunnel->parms.iph;
1112 dev->destructor = ip_tunnel_dev_free;
1113 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1117 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1118 if (!tunnel->dst_cache) {
1119 free_percpu(dev->tstats);
1123 err = gro_cells_init(&tunnel->gro_cells, dev);
1125 free_percpu(tunnel->dst_cache);
1126 free_percpu(dev->tstats);
1131 tunnel->net = dev_net(dev);
1132 strcpy(tunnel->parms.name, dev->name);
1138 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1140 void ip_tunnel_uninit(struct net_device *dev)
1142 struct ip_tunnel *tunnel = netdev_priv(dev);
1143 struct net *net = tunnel->net;
1144 struct ip_tunnel_net *itn;
1146 itn = net_generic(net, tunnel->ip_tnl_net_id);
1147 /* fb_tunnel_dev will be unregisted in net-exit call. */
1148 if (itn->fb_tunnel_dev != dev)
1149 ip_tunnel_del(netdev_priv(dev));
1151 ip_tunnel_dst_reset_all(tunnel);
1153 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1155 /* Do least required initialization, rest of init is done in tunnel_init call */
1156 void ip_tunnel_setup(struct net_device *dev, int net_id)
1158 struct ip_tunnel *tunnel = netdev_priv(dev);
1159 tunnel->ip_tnl_net_id = net_id;
1161 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1163 MODULE_LICENSE("GPL");