Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
[pandora-kernel.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58 #include <net/udp.h>
59 #include <net/gue.h>
60
61 #if IS_ENABLED(CONFIG_IPV6)
62 #include <net/ipv6.h>
63 #include <net/ip6_fib.h>
64 #include <net/ip6_route.h>
65 #endif
66
67 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
68 {
69         return hash_32((__force u32)key ^ (__force u32)remote,
70                          IP_TNL_HASH_BITS);
71 }
72
73 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
74                              struct dst_entry *dst, __be32 saddr)
75 {
76         struct dst_entry *old_dst;
77
78         dst_clone(dst);
79         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
80         dst_release(old_dst);
81         idst->saddr = saddr;
82 }
83
84 static noinline void tunnel_dst_set(struct ip_tunnel *t,
85                            struct dst_entry *dst, __be32 saddr)
86 {
87         __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
88 }
89
90 static void tunnel_dst_reset(struct ip_tunnel *t)
91 {
92         tunnel_dst_set(t, NULL, 0);
93 }
94
95 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
96 {
97         int i;
98
99         for_each_possible_cpu(i)
100                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
101 }
102 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
103
104 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
105                                         u32 cookie, __be32 *saddr)
106 {
107         struct ip_tunnel_dst *idst;
108         struct dst_entry *dst;
109
110         rcu_read_lock();
111         idst = raw_cpu_ptr(t->dst_cache);
112         dst = rcu_dereference(idst->dst);
113         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
114                 dst = NULL;
115         if (dst) {
116                 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
117                         *saddr = idst->saddr;
118                 } else {
119                         tunnel_dst_reset(t);
120                         dst_release(dst);
121                         dst = NULL;
122                 }
123         }
124         rcu_read_unlock();
125         return (struct rtable *)dst;
126 }
127
128 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
129                                 __be16 flags, __be32 key)
130 {
131         if (p->i_flags & TUNNEL_KEY) {
132                 if (flags & TUNNEL_KEY)
133                         return key == p->i_key;
134                 else
135                         /* key expected, none present */
136                         return false;
137         } else
138                 return !(flags & TUNNEL_KEY);
139 }
140
141 /* Fallback tunnel: no source, no destination, no key, no options
142
143    Tunnel hash table:
144    We require exact key match i.e. if a key is present in packet
145    it will match only tunnel with the same key; if it is not present,
146    it will match only keyless tunnel.
147
148    All keysless packets, if not matched configured keyless tunnels
149    will match fallback tunnel.
150    Given src, dst and key, find appropriate for input tunnel.
151 */
152 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
153                                    int link, __be16 flags,
154                                    __be32 remote, __be32 local,
155                                    __be32 key)
156 {
157         unsigned int hash;
158         struct ip_tunnel *t, *cand = NULL;
159         struct hlist_head *head;
160
161         hash = ip_tunnel_hash(key, remote);
162         head = &itn->tunnels[hash];
163
164         hlist_for_each_entry_rcu(t, head, hash_node) {
165                 if (local != t->parms.iph.saddr ||
166                     remote != t->parms.iph.daddr ||
167                     !(t->dev->flags & IFF_UP))
168                         continue;
169
170                 if (!ip_tunnel_key_match(&t->parms, flags, key))
171                         continue;
172
173                 if (t->parms.link == link)
174                         return t;
175                 else
176                         cand = t;
177         }
178
179         hlist_for_each_entry_rcu(t, head, hash_node) {
180                 if (remote != t->parms.iph.daddr ||
181                     t->parms.iph.saddr != 0 ||
182                     !(t->dev->flags & IFF_UP))
183                         continue;
184
185                 if (!ip_tunnel_key_match(&t->parms, flags, key))
186                         continue;
187
188                 if (t->parms.link == link)
189                         return t;
190                 else if (!cand)
191                         cand = t;
192         }
193
194         hash = ip_tunnel_hash(key, 0);
195         head = &itn->tunnels[hash];
196
197         hlist_for_each_entry_rcu(t, head, hash_node) {
198                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
199                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
200                         continue;
201
202                 if (!(t->dev->flags & IFF_UP))
203                         continue;
204
205                 if (!ip_tunnel_key_match(&t->parms, flags, key))
206                         continue;
207
208                 if (t->parms.link == link)
209                         return t;
210                 else if (!cand)
211                         cand = t;
212         }
213
214         if (flags & TUNNEL_NO_KEY)
215                 goto skip_key_lookup;
216
217         hlist_for_each_entry_rcu(t, head, hash_node) {
218                 if (t->parms.i_key != key ||
219                     t->parms.iph.saddr != 0 ||
220                     t->parms.iph.daddr != 0 ||
221                     !(t->dev->flags & IFF_UP))
222                         continue;
223
224                 if (t->parms.link == link)
225                         return t;
226                 else if (!cand)
227                         cand = t;
228         }
229
230 skip_key_lookup:
231         if (cand)
232                 return cand;
233
234         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
235                 return netdev_priv(itn->fb_tunnel_dev);
236
237
238         return NULL;
239 }
240 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
241
242 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
243                                     struct ip_tunnel_parm *parms)
244 {
245         unsigned int h;
246         __be32 remote;
247         __be32 i_key = parms->i_key;
248
249         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
250                 remote = parms->iph.daddr;
251         else
252                 remote = 0;
253
254         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
255                 i_key = 0;
256
257         h = ip_tunnel_hash(i_key, remote);
258         return &itn->tunnels[h];
259 }
260
261 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
262 {
263         struct hlist_head *head = ip_bucket(itn, &t->parms);
264
265         hlist_add_head_rcu(&t->hash_node, head);
266 }
267
268 static void ip_tunnel_del(struct ip_tunnel *t)
269 {
270         hlist_del_init_rcu(&t->hash_node);
271 }
272
273 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
274                                         struct ip_tunnel_parm *parms,
275                                         int type)
276 {
277         __be32 remote = parms->iph.daddr;
278         __be32 local = parms->iph.saddr;
279         __be32 key = parms->i_key;
280         __be16 flags = parms->i_flags;
281         int link = parms->link;
282         struct ip_tunnel *t = NULL;
283         struct hlist_head *head = ip_bucket(itn, parms);
284
285         hlist_for_each_entry_rcu(t, head, hash_node) {
286                 if (local == t->parms.iph.saddr &&
287                     remote == t->parms.iph.daddr &&
288                     link == t->parms.link &&
289                     type == t->dev->type &&
290                     ip_tunnel_key_match(&t->parms, flags, key))
291                         break;
292         }
293         return t;
294 }
295
296 static struct net_device *__ip_tunnel_create(struct net *net,
297                                              const struct rtnl_link_ops *ops,
298                                              struct ip_tunnel_parm *parms)
299 {
300         int err;
301         struct ip_tunnel *tunnel;
302         struct net_device *dev;
303         char name[IFNAMSIZ];
304
305         if (parms->name[0])
306                 strlcpy(name, parms->name, IFNAMSIZ);
307         else {
308                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
309                         err = -E2BIG;
310                         goto failed;
311                 }
312                 strlcpy(name, ops->kind, IFNAMSIZ);
313                 strncat(name, "%d", 2);
314         }
315
316         ASSERT_RTNL();
317         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
318         if (!dev) {
319                 err = -ENOMEM;
320                 goto failed;
321         }
322         dev_net_set(dev, net);
323
324         dev->rtnl_link_ops = ops;
325
326         tunnel = netdev_priv(dev);
327         tunnel->parms = *parms;
328         tunnel->net = net;
329
330         err = register_netdevice(dev);
331         if (err)
332                 goto failed_free;
333
334         return dev;
335
336 failed_free:
337         free_netdev(dev);
338 failed:
339         return ERR_PTR(err);
340 }
341
342 static inline void init_tunnel_flow(struct flowi4 *fl4,
343                                     int proto,
344                                     __be32 daddr, __be32 saddr,
345                                     __be32 key, __u8 tos, int oif)
346 {
347         memset(fl4, 0, sizeof(*fl4));
348         fl4->flowi4_oif = oif;
349         fl4->daddr = daddr;
350         fl4->saddr = saddr;
351         fl4->flowi4_tos = tos;
352         fl4->flowi4_proto = proto;
353         fl4->fl4_gre_key = key;
354 }
355
356 static int ip_tunnel_bind_dev(struct net_device *dev)
357 {
358         struct net_device *tdev = NULL;
359         struct ip_tunnel *tunnel = netdev_priv(dev);
360         const struct iphdr *iph;
361         int hlen = LL_MAX_HEADER;
362         int mtu = ETH_DATA_LEN;
363         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
364
365         iph = &tunnel->parms.iph;
366
367         /* Guess output device to choose reasonable mtu and needed_headroom */
368         if (iph->daddr) {
369                 struct flowi4 fl4;
370                 struct rtable *rt;
371
372                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
373                                  iph->saddr, tunnel->parms.o_key,
374                                  RT_TOS(iph->tos), tunnel->parms.link);
375                 rt = ip_route_output_key(tunnel->net, &fl4);
376
377                 if (!IS_ERR(rt)) {
378                         tdev = rt->dst.dev;
379                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
380                         ip_rt_put(rt);
381                 }
382                 if (dev->type != ARPHRD_ETHER)
383                         dev->flags |= IFF_POINTOPOINT;
384         }
385
386         if (!tdev && tunnel->parms.link)
387                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
388
389         if (tdev) {
390                 hlen = tdev->hard_header_len + tdev->needed_headroom;
391                 mtu = tdev->mtu;
392         }
393         dev->iflink = tunnel->parms.link;
394
395         dev->needed_headroom = t_hlen + hlen;
396         mtu -= (dev->hard_header_len + t_hlen);
397
398         if (mtu < 68)
399                 mtu = 68;
400
401         return mtu;
402 }
403
404 static struct ip_tunnel *ip_tunnel_create(struct net *net,
405                                           struct ip_tunnel_net *itn,
406                                           struct ip_tunnel_parm *parms)
407 {
408         struct ip_tunnel *nt;
409         struct net_device *dev;
410
411         BUG_ON(!itn->fb_tunnel_dev);
412         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
413         if (IS_ERR(dev))
414                 return ERR_CAST(dev);
415
416         dev->mtu = ip_tunnel_bind_dev(dev);
417
418         nt = netdev_priv(dev);
419         ip_tunnel_add(itn, nt);
420         return nt;
421 }
422
423 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
424                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
425 {
426         struct pcpu_sw_netstats *tstats;
427         const struct iphdr *iph = ip_hdr(skb);
428         int err;
429
430 #ifdef CONFIG_NET_IPGRE_BROADCAST
431         if (ipv4_is_multicast(iph->daddr)) {
432                 tunnel->dev->stats.multicast++;
433                 skb->pkt_type = PACKET_BROADCAST;
434         }
435 #endif
436
437         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
438              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
439                 tunnel->dev->stats.rx_crc_errors++;
440                 tunnel->dev->stats.rx_errors++;
441                 goto drop;
442         }
443
444         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
445                 if (!(tpi->flags&TUNNEL_SEQ) ||
446                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
447                         tunnel->dev->stats.rx_fifo_errors++;
448                         tunnel->dev->stats.rx_errors++;
449                         goto drop;
450                 }
451                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
452         }
453
454         skb_reset_network_header(skb);
455
456         err = IP_ECN_decapsulate(iph, skb);
457         if (unlikely(err)) {
458                 if (log_ecn_error)
459                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
460                                         &iph->saddr, iph->tos);
461                 if (err > 1) {
462                         ++tunnel->dev->stats.rx_frame_errors;
463                         ++tunnel->dev->stats.rx_errors;
464                         goto drop;
465                 }
466         }
467
468         tstats = this_cpu_ptr(tunnel->dev->tstats);
469         u64_stats_update_begin(&tstats->syncp);
470         tstats->rx_packets++;
471         tstats->rx_bytes += skb->len;
472         u64_stats_update_end(&tstats->syncp);
473
474         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
475
476         if (tunnel->dev->type == ARPHRD_ETHER) {
477                 skb->protocol = eth_type_trans(skb, tunnel->dev);
478                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
479         } else {
480                 skb->dev = tunnel->dev;
481         }
482
483         gro_cells_receive(&tunnel->gro_cells, skb);
484         return 0;
485
486 drop:
487         kfree_skb(skb);
488         return 0;
489 }
490 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
491
492 static int ip_encap_hlen(struct ip_tunnel_encap *e)
493 {
494         switch (e->type) {
495         case TUNNEL_ENCAP_NONE:
496                 return 0;
497         case TUNNEL_ENCAP_FOU:
498                 return sizeof(struct udphdr);
499         case TUNNEL_ENCAP_GUE:
500                 return sizeof(struct udphdr) + sizeof(struct guehdr);
501         default:
502                 return -EINVAL;
503         }
504 }
505
506 int ip_tunnel_encap_setup(struct ip_tunnel *t,
507                           struct ip_tunnel_encap *ipencap)
508 {
509         int hlen;
510
511         memset(&t->encap, 0, sizeof(t->encap));
512
513         hlen = ip_encap_hlen(ipencap);
514         if (hlen < 0)
515                 return hlen;
516
517         t->encap.type = ipencap->type;
518         t->encap.sport = ipencap->sport;
519         t->encap.dport = ipencap->dport;
520         t->encap.flags = ipencap->flags;
521
522         t->encap_hlen = hlen;
523         t->hlen = t->encap_hlen + t->tun_hlen;
524
525         return 0;
526 }
527 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
528
529 static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
530                             size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
531 {
532         struct udphdr *uh;
533         __be16 sport;
534         bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
535         int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
536
537         skb = iptunnel_handle_offloads(skb, csum, type);
538
539         if (IS_ERR(skb))
540                 return PTR_ERR(skb);
541
542         /* Get length and hash before making space in skb */
543
544         sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
545                                                skb, 0, 0, false);
546
547         skb_push(skb, hdr_len);
548
549         skb_reset_transport_header(skb);
550         uh = udp_hdr(skb);
551
552         if (e->type == TUNNEL_ENCAP_GUE) {
553                 struct guehdr *guehdr = (struct guehdr *)&uh[1];
554
555                 guehdr->version = 0;
556                 guehdr->hlen = 0;
557                 guehdr->flags = 0;
558                 guehdr->next_hdr = *protocol;
559         }
560
561         uh->dest = e->dport;
562         uh->source = sport;
563         uh->len = htons(skb->len);
564         uh->check = 0;
565         udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
566                      fl4->saddr, fl4->daddr, skb->len);
567
568         *protocol = IPPROTO_UDP;
569
570         return 0;
571 }
572
573 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
574                     u8 *protocol, struct flowi4 *fl4)
575 {
576         switch (t->encap.type) {
577         case TUNNEL_ENCAP_NONE:
578                 return 0;
579         case TUNNEL_ENCAP_FOU:
580         case TUNNEL_ENCAP_GUE:
581                 return fou_build_header(skb, &t->encap, t->encap_hlen,
582                                         protocol, fl4);
583         default:
584                 return -EINVAL;
585         }
586 }
587 EXPORT_SYMBOL(ip_tunnel_encap);
588
589 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
590                             struct rtable *rt, __be16 df)
591 {
592         struct ip_tunnel *tunnel = netdev_priv(dev);
593         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
594         int mtu;
595
596         if (df)
597                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
598                                         - sizeof(struct iphdr) - tunnel->hlen;
599         else
600                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
601
602         if (skb_dst(skb))
603                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
604
605         if (skb->protocol == htons(ETH_P_IP)) {
606                 if (!skb_is_gso(skb) &&
607                     (df & htons(IP_DF)) && mtu < pkt_size) {
608                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
609                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
610                         return -E2BIG;
611                 }
612         }
613 #if IS_ENABLED(CONFIG_IPV6)
614         else if (skb->protocol == htons(ETH_P_IPV6)) {
615                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
616
617                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
618                            mtu >= IPV6_MIN_MTU) {
619                         if ((tunnel->parms.iph.daddr &&
620                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
621                             rt6->rt6i_dst.plen == 128) {
622                                 rt6->rt6i_flags |= RTF_MODIFIED;
623                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
624                         }
625                 }
626
627                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
628                                         mtu < pkt_size) {
629                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
630                         return -E2BIG;
631                 }
632         }
633 #endif
634         return 0;
635 }
636
637 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
638                     const struct iphdr *tnl_params, u8 protocol)
639 {
640         struct ip_tunnel *tunnel = netdev_priv(dev);
641         const struct iphdr *inner_iph;
642         struct flowi4 fl4;
643         u8     tos, ttl;
644         __be16 df;
645         struct rtable *rt;              /* Route to the other host */
646         unsigned int max_headroom;      /* The extra header space needed */
647         __be32 dst;
648         int err;
649         bool connected;
650
651         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
652         connected = (tunnel->parms.iph.daddr != 0);
653
654         dst = tnl_params->daddr;
655         if (dst == 0) {
656                 /* NBMA tunnel */
657
658                 if (skb_dst(skb) == NULL) {
659                         dev->stats.tx_fifo_errors++;
660                         goto tx_error;
661                 }
662
663                 if (skb->protocol == htons(ETH_P_IP)) {
664                         rt = skb_rtable(skb);
665                         dst = rt_nexthop(rt, inner_iph->daddr);
666                 }
667 #if IS_ENABLED(CONFIG_IPV6)
668                 else if (skb->protocol == htons(ETH_P_IPV6)) {
669                         const struct in6_addr *addr6;
670                         struct neighbour *neigh;
671                         bool do_tx_error_icmp;
672                         int addr_type;
673
674                         neigh = dst_neigh_lookup(skb_dst(skb),
675                                                  &ipv6_hdr(skb)->daddr);
676                         if (neigh == NULL)
677                                 goto tx_error;
678
679                         addr6 = (const struct in6_addr *)&neigh->primary_key;
680                         addr_type = ipv6_addr_type(addr6);
681
682                         if (addr_type == IPV6_ADDR_ANY) {
683                                 addr6 = &ipv6_hdr(skb)->daddr;
684                                 addr_type = ipv6_addr_type(addr6);
685                         }
686
687                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
688                                 do_tx_error_icmp = true;
689                         else {
690                                 do_tx_error_icmp = false;
691                                 dst = addr6->s6_addr32[3];
692                         }
693                         neigh_release(neigh);
694                         if (do_tx_error_icmp)
695                                 goto tx_error_icmp;
696                 }
697 #endif
698                 else
699                         goto tx_error;
700
701                 connected = false;
702         }
703
704         tos = tnl_params->tos;
705         if (tos & 0x1) {
706                 tos &= ~0x1;
707                 if (skb->protocol == htons(ETH_P_IP)) {
708                         tos = inner_iph->tos;
709                         connected = false;
710                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
711                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
712                         connected = false;
713                 }
714         }
715
716         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
717                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
718
719         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
720                 goto tx_error;
721
722         rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
723
724         if (!rt) {
725                 rt = ip_route_output_key(tunnel->net, &fl4);
726
727                 if (IS_ERR(rt)) {
728                         dev->stats.tx_carrier_errors++;
729                         goto tx_error;
730                 }
731                 if (connected)
732                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
733         }
734
735         if (rt->dst.dev == dev) {
736                 ip_rt_put(rt);
737                 dev->stats.collisions++;
738                 goto tx_error;
739         }
740
741         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
742                 ip_rt_put(rt);
743                 goto tx_error;
744         }
745
746         if (tunnel->err_count > 0) {
747                 if (time_before(jiffies,
748                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
749                         tunnel->err_count--;
750
751                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
752                         dst_link_failure(skb);
753                 } else
754                         tunnel->err_count = 0;
755         }
756
757         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
758         ttl = tnl_params->ttl;
759         if (ttl == 0) {
760                 if (skb->protocol == htons(ETH_P_IP))
761                         ttl = inner_iph->ttl;
762 #if IS_ENABLED(CONFIG_IPV6)
763                 else if (skb->protocol == htons(ETH_P_IPV6))
764                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
765 #endif
766                 else
767                         ttl = ip4_dst_hoplimit(&rt->dst);
768         }
769
770         df = tnl_params->frag_off;
771         if (skb->protocol == htons(ETH_P_IP))
772                 df |= (inner_iph->frag_off&htons(IP_DF));
773
774         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
775                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
776         if (max_headroom > dev->needed_headroom)
777                 dev->needed_headroom = max_headroom;
778
779         if (skb_cow_head(skb, dev->needed_headroom)) {
780                 ip_rt_put(rt);
781                 dev->stats.tx_dropped++;
782                 kfree_skb(skb);
783                 return;
784         }
785
786         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
787                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
788         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
789
790         return;
791
792 #if IS_ENABLED(CONFIG_IPV6)
793 tx_error_icmp:
794         dst_link_failure(skb);
795 #endif
796 tx_error:
797         dev->stats.tx_errors++;
798         kfree_skb(skb);
799 }
800 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
801
802 static void ip_tunnel_update(struct ip_tunnel_net *itn,
803                              struct ip_tunnel *t,
804                              struct net_device *dev,
805                              struct ip_tunnel_parm *p,
806                              bool set_mtu)
807 {
808         ip_tunnel_del(t);
809         t->parms.iph.saddr = p->iph.saddr;
810         t->parms.iph.daddr = p->iph.daddr;
811         t->parms.i_key = p->i_key;
812         t->parms.o_key = p->o_key;
813         if (dev->type != ARPHRD_ETHER) {
814                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
815                 memcpy(dev->broadcast, &p->iph.daddr, 4);
816         }
817         ip_tunnel_add(itn, t);
818
819         t->parms.iph.ttl = p->iph.ttl;
820         t->parms.iph.tos = p->iph.tos;
821         t->parms.iph.frag_off = p->iph.frag_off;
822
823         if (t->parms.link != p->link) {
824                 int mtu;
825
826                 t->parms.link = p->link;
827                 mtu = ip_tunnel_bind_dev(dev);
828                 if (set_mtu)
829                         dev->mtu = mtu;
830         }
831         ip_tunnel_dst_reset_all(t);
832         netdev_state_change(dev);
833 }
834
835 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
836 {
837         int err = 0;
838         struct ip_tunnel *t = netdev_priv(dev);
839         struct net *net = t->net;
840         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
841
842         BUG_ON(!itn->fb_tunnel_dev);
843         switch (cmd) {
844         case SIOCGETTUNNEL:
845                 if (dev == itn->fb_tunnel_dev) {
846                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
847                         if (t == NULL)
848                                 t = netdev_priv(dev);
849                 }
850                 memcpy(p, &t->parms, sizeof(*p));
851                 break;
852
853         case SIOCADDTUNNEL:
854         case SIOCCHGTUNNEL:
855                 err = -EPERM;
856                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
857                         goto done;
858                 if (p->iph.ttl)
859                         p->iph.frag_off |= htons(IP_DF);
860                 if (!(p->i_flags & VTI_ISVTI)) {
861                         if (!(p->i_flags & TUNNEL_KEY))
862                                 p->i_key = 0;
863                         if (!(p->o_flags & TUNNEL_KEY))
864                                 p->o_key = 0;
865                 }
866
867                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
868
869                 if (cmd == SIOCADDTUNNEL) {
870                         if (!t) {
871                                 t = ip_tunnel_create(net, itn, p);
872                                 err = PTR_ERR_OR_ZERO(t);
873                                 break;
874                         }
875
876                         err = -EEXIST;
877                         break;
878                 }
879                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
880                         if (t != NULL) {
881                                 if (t->dev != dev) {
882                                         err = -EEXIST;
883                                         break;
884                                 }
885                         } else {
886                                 unsigned int nflags = 0;
887
888                                 if (ipv4_is_multicast(p->iph.daddr))
889                                         nflags = IFF_BROADCAST;
890                                 else if (p->iph.daddr)
891                                         nflags = IFF_POINTOPOINT;
892
893                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
894                                         err = -EINVAL;
895                                         break;
896                                 }
897
898                                 t = netdev_priv(dev);
899                         }
900                 }
901
902                 if (t) {
903                         err = 0;
904                         ip_tunnel_update(itn, t, dev, p, true);
905                 } else {
906                         err = -ENOENT;
907                 }
908                 break;
909
910         case SIOCDELTUNNEL:
911                 err = -EPERM;
912                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
913                         goto done;
914
915                 if (dev == itn->fb_tunnel_dev) {
916                         err = -ENOENT;
917                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918                         if (t == NULL)
919                                 goto done;
920                         err = -EPERM;
921                         if (t == netdev_priv(itn->fb_tunnel_dev))
922                                 goto done;
923                         dev = t->dev;
924                 }
925                 unregister_netdevice(dev);
926                 err = 0;
927                 break;
928
929         default:
930                 err = -EINVAL;
931         }
932
933 done:
934         return err;
935 }
936 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
937
938 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
939 {
940         struct ip_tunnel *tunnel = netdev_priv(dev);
941         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
942
943         if (new_mtu < 68 ||
944             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
945                 return -EINVAL;
946         dev->mtu = new_mtu;
947         return 0;
948 }
949 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
950
951 static void ip_tunnel_dev_free(struct net_device *dev)
952 {
953         struct ip_tunnel *tunnel = netdev_priv(dev);
954
955         gro_cells_destroy(&tunnel->gro_cells);
956         free_percpu(tunnel->dst_cache);
957         free_percpu(dev->tstats);
958         free_netdev(dev);
959 }
960
961 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
962 {
963         struct ip_tunnel *tunnel = netdev_priv(dev);
964         struct ip_tunnel_net *itn;
965
966         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
967
968         if (itn->fb_tunnel_dev != dev) {
969                 ip_tunnel_del(netdev_priv(dev));
970                 unregister_netdevice_queue(dev, head);
971         }
972 }
973 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
974
975 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
976                                   struct rtnl_link_ops *ops, char *devname)
977 {
978         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
979         struct ip_tunnel_parm parms;
980         unsigned int i;
981
982         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
983                 INIT_HLIST_HEAD(&itn->tunnels[i]);
984
985         if (!ops) {
986                 itn->fb_tunnel_dev = NULL;
987                 return 0;
988         }
989
990         memset(&parms, 0, sizeof(parms));
991         if (devname)
992                 strlcpy(parms.name, devname, IFNAMSIZ);
993
994         rtnl_lock();
995         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
996         /* FB netdevice is special: we have one, and only one per netns.
997          * Allowing to move it to another netns is clearly unsafe.
998          */
999         if (!IS_ERR(itn->fb_tunnel_dev)) {
1000                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1001                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1002                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1003         }
1004         rtnl_unlock();
1005
1006         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1007 }
1008 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1009
1010 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1011                               struct rtnl_link_ops *ops)
1012 {
1013         struct net *net = dev_net(itn->fb_tunnel_dev);
1014         struct net_device *dev, *aux;
1015         int h;
1016
1017         for_each_netdev_safe(net, dev, aux)
1018                 if (dev->rtnl_link_ops == ops)
1019                         unregister_netdevice_queue(dev, head);
1020
1021         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1022                 struct ip_tunnel *t;
1023                 struct hlist_node *n;
1024                 struct hlist_head *thead = &itn->tunnels[h];
1025
1026                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1027                         /* If dev is in the same netns, it has already
1028                          * been added to the list by the previous loop.
1029                          */
1030                         if (!net_eq(dev_net(t->dev), net))
1031                                 unregister_netdevice_queue(t->dev, head);
1032         }
1033 }
1034
1035 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1036 {
1037         LIST_HEAD(list);
1038
1039         rtnl_lock();
1040         ip_tunnel_destroy(itn, &list, ops);
1041         unregister_netdevice_many(&list);
1042         rtnl_unlock();
1043 }
1044 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1045
1046 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1047                       struct ip_tunnel_parm *p)
1048 {
1049         struct ip_tunnel *nt;
1050         struct net *net = dev_net(dev);
1051         struct ip_tunnel_net *itn;
1052         int mtu;
1053         int err;
1054
1055         nt = netdev_priv(dev);
1056         itn = net_generic(net, nt->ip_tnl_net_id);
1057
1058         if (ip_tunnel_find(itn, p, dev->type))
1059                 return -EEXIST;
1060
1061         nt->net = net;
1062         nt->parms = *p;
1063         err = register_netdevice(dev);
1064         if (err)
1065                 goto out;
1066
1067         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1068                 eth_hw_addr_random(dev);
1069
1070         mtu = ip_tunnel_bind_dev(dev);
1071         if (!tb[IFLA_MTU])
1072                 dev->mtu = mtu;
1073
1074         ip_tunnel_add(itn, nt);
1075
1076 out:
1077         return err;
1078 }
1079 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1080
1081 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1082                          struct ip_tunnel_parm *p)
1083 {
1084         struct ip_tunnel *t;
1085         struct ip_tunnel *tunnel = netdev_priv(dev);
1086         struct net *net = tunnel->net;
1087         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1088
1089         if (dev == itn->fb_tunnel_dev)
1090                 return -EINVAL;
1091
1092         t = ip_tunnel_find(itn, p, dev->type);
1093
1094         if (t) {
1095                 if (t->dev != dev)
1096                         return -EEXIST;
1097         } else {
1098                 t = tunnel;
1099
1100                 if (dev->type != ARPHRD_ETHER) {
1101                         unsigned int nflags = 0;
1102
1103                         if (ipv4_is_multicast(p->iph.daddr))
1104                                 nflags = IFF_BROADCAST;
1105                         else if (p->iph.daddr)
1106                                 nflags = IFF_POINTOPOINT;
1107
1108                         if ((dev->flags ^ nflags) &
1109                             (IFF_POINTOPOINT | IFF_BROADCAST))
1110                                 return -EINVAL;
1111                 }
1112         }
1113
1114         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1115         return 0;
1116 }
1117 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1118
1119 int ip_tunnel_init(struct net_device *dev)
1120 {
1121         struct ip_tunnel *tunnel = netdev_priv(dev);
1122         struct iphdr *iph = &tunnel->parms.iph;
1123         int err;
1124
1125         dev->destructor = ip_tunnel_dev_free;
1126         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1127         if (!dev->tstats)
1128                 return -ENOMEM;
1129
1130         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1131         if (!tunnel->dst_cache) {
1132                 free_percpu(dev->tstats);
1133                 return -ENOMEM;
1134         }
1135
1136         err = gro_cells_init(&tunnel->gro_cells, dev);
1137         if (err) {
1138                 free_percpu(tunnel->dst_cache);
1139                 free_percpu(dev->tstats);
1140                 return err;
1141         }
1142
1143         tunnel->dev = dev;
1144         tunnel->net = dev_net(dev);
1145         strcpy(tunnel->parms.name, dev->name);
1146         iph->version            = 4;
1147         iph->ihl                = 5;
1148
1149         return 0;
1150 }
1151 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1152
1153 void ip_tunnel_uninit(struct net_device *dev)
1154 {
1155         struct ip_tunnel *tunnel = netdev_priv(dev);
1156         struct net *net = tunnel->net;
1157         struct ip_tunnel_net *itn;
1158
1159         itn = net_generic(net, tunnel->ip_tnl_net_id);
1160         /* fb_tunnel_dev will be unregisted in net-exit call. */
1161         if (itn->fb_tunnel_dev != dev)
1162                 ip_tunnel_del(netdev_priv(dev));
1163
1164         ip_tunnel_dst_reset_all(tunnel);
1165 }
1166 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1167
1168 /* Do least required initialization, rest of init is done in tunnel_init call */
1169 void ip_tunnel_setup(struct net_device *dev, int net_id)
1170 {
1171         struct ip_tunnel *tunnel = netdev_priv(dev);
1172         tunnel->ip_tnl_net_id = net_id;
1173 }
1174 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1175
1176 MODULE_LICENSE("GPL");