Merge remote-tracking branch 'regmap/fix/core' into regmap-linus
[pandora-kernel.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst, __be32 saddr)
73 {
74         struct dst_entry *old_dst;
75
76         dst_clone(dst);
77         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78         dst_release(old_dst);
79         idst->saddr = saddr;
80 }
81
82 static noinline void tunnel_dst_set(struct ip_tunnel *t,
83                            struct dst_entry *dst, __be32 saddr)
84 {
85         __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
86 }
87
88 static void tunnel_dst_reset(struct ip_tunnel *t)
89 {
90         tunnel_dst_set(t, NULL, 0);
91 }
92
93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
94 {
95         int i;
96
97         for_each_possible_cpu(i)
98                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
99 }
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
101
102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103                                         u32 cookie, __be32 *saddr)
104 {
105         struct ip_tunnel_dst *idst;
106         struct dst_entry *dst;
107
108         rcu_read_lock();
109         idst = raw_cpu_ptr(t->dst_cache);
110         dst = rcu_dereference(idst->dst);
111         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112                 dst = NULL;
113         if (dst) {
114                 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115                         *saddr = idst->saddr;
116                 } else {
117                         tunnel_dst_reset(t);
118                         dst_release(dst);
119                         dst = NULL;
120                 }
121         }
122         rcu_read_unlock();
123         return (struct rtable *)dst;
124 }
125
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127                                 __be16 flags, __be32 key)
128 {
129         if (p->i_flags & TUNNEL_KEY) {
130                 if (flags & TUNNEL_KEY)
131                         return key == p->i_key;
132                 else
133                         /* key expected, none present */
134                         return false;
135         } else
136                 return !(flags & TUNNEL_KEY);
137 }
138
139 /* Fallback tunnel: no source, no destination, no key, no options
140
141    Tunnel hash table:
142    We require exact key match i.e. if a key is present in packet
143    it will match only tunnel with the same key; if it is not present,
144    it will match only keyless tunnel.
145
146    All keysless packets, if not matched configured keyless tunnels
147    will match fallback tunnel.
148    Given src, dst and key, find appropriate for input tunnel.
149 */
150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151                                    int link, __be16 flags,
152                                    __be32 remote, __be32 local,
153                                    __be32 key)
154 {
155         unsigned int hash;
156         struct ip_tunnel *t, *cand = NULL;
157         struct hlist_head *head;
158
159         hash = ip_tunnel_hash(key, remote);
160         head = &itn->tunnels[hash];
161
162         hlist_for_each_entry_rcu(t, head, hash_node) {
163                 if (local != t->parms.iph.saddr ||
164                     remote != t->parms.iph.daddr ||
165                     !(t->dev->flags & IFF_UP))
166                         continue;
167
168                 if (!ip_tunnel_key_match(&t->parms, flags, key))
169                         continue;
170
171                 if (t->parms.link == link)
172                         return t;
173                 else
174                         cand = t;
175         }
176
177         hlist_for_each_entry_rcu(t, head, hash_node) {
178                 if (remote != t->parms.iph.daddr ||
179                     t->parms.iph.saddr != 0 ||
180                     !(t->dev->flags & IFF_UP))
181                         continue;
182
183                 if (!ip_tunnel_key_match(&t->parms, flags, key))
184                         continue;
185
186                 if (t->parms.link == link)
187                         return t;
188                 else if (!cand)
189                         cand = t;
190         }
191
192         hash = ip_tunnel_hash(key, 0);
193         head = &itn->tunnels[hash];
194
195         hlist_for_each_entry_rcu(t, head, hash_node) {
196                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198                         continue;
199
200                 if (!(t->dev->flags & IFF_UP))
201                         continue;
202
203                 if (!ip_tunnel_key_match(&t->parms, flags, key))
204                         continue;
205
206                 if (t->parms.link == link)
207                         return t;
208                 else if (!cand)
209                         cand = t;
210         }
211
212         if (flags & TUNNEL_NO_KEY)
213                 goto skip_key_lookup;
214
215         hlist_for_each_entry_rcu(t, head, hash_node) {
216                 if (t->parms.i_key != key ||
217                     t->parms.iph.saddr != 0 ||
218                     t->parms.iph.daddr != 0 ||
219                     !(t->dev->flags & IFF_UP))
220                         continue;
221
222                 if (t->parms.link == link)
223                         return t;
224                 else if (!cand)
225                         cand = t;
226         }
227
228 skip_key_lookup:
229         if (cand)
230                 return cand;
231
232         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
233                 return netdev_priv(itn->fb_tunnel_dev);
234
235
236         return NULL;
237 }
238 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
239
240 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
241                                     struct ip_tunnel_parm *parms)
242 {
243         unsigned int h;
244         __be32 remote;
245         __be32 i_key = parms->i_key;
246
247         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
248                 remote = parms->iph.daddr;
249         else
250                 remote = 0;
251
252         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
253                 i_key = 0;
254
255         h = ip_tunnel_hash(i_key, remote);
256         return &itn->tunnels[h];
257 }
258
259 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
260 {
261         struct hlist_head *head = ip_bucket(itn, &t->parms);
262
263         hlist_add_head_rcu(&t->hash_node, head);
264 }
265
266 static void ip_tunnel_del(struct ip_tunnel *t)
267 {
268         hlist_del_init_rcu(&t->hash_node);
269 }
270
271 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
272                                         struct ip_tunnel_parm *parms,
273                                         int type)
274 {
275         __be32 remote = parms->iph.daddr;
276         __be32 local = parms->iph.saddr;
277         __be32 key = parms->i_key;
278         __be16 flags = parms->i_flags;
279         int link = parms->link;
280         struct ip_tunnel *t = NULL;
281         struct hlist_head *head = ip_bucket(itn, parms);
282
283         hlist_for_each_entry_rcu(t, head, hash_node) {
284                 if (local == t->parms.iph.saddr &&
285                     remote == t->parms.iph.daddr &&
286                     link == t->parms.link &&
287                     type == t->dev->type &&
288                     ip_tunnel_key_match(&t->parms, flags, key))
289                         break;
290         }
291         return t;
292 }
293
294 static struct net_device *__ip_tunnel_create(struct net *net,
295                                              const struct rtnl_link_ops *ops,
296                                              struct ip_tunnel_parm *parms)
297 {
298         int err;
299         struct ip_tunnel *tunnel;
300         struct net_device *dev;
301         char name[IFNAMSIZ];
302
303         if (parms->name[0])
304                 strlcpy(name, parms->name, IFNAMSIZ);
305         else {
306                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
307                         err = -E2BIG;
308                         goto failed;
309                 }
310                 strlcpy(name, ops->kind, IFNAMSIZ);
311                 strncat(name, "%d", 2);
312         }
313
314         ASSERT_RTNL();
315         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
316         if (!dev) {
317                 err = -ENOMEM;
318                 goto failed;
319         }
320         dev_net_set(dev, net);
321
322         dev->rtnl_link_ops = ops;
323
324         tunnel = netdev_priv(dev);
325         tunnel->parms = *parms;
326         tunnel->net = net;
327
328         err = register_netdevice(dev);
329         if (err)
330                 goto failed_free;
331
332         return dev;
333
334 failed_free:
335         free_netdev(dev);
336 failed:
337         return ERR_PTR(err);
338 }
339
340 static inline void init_tunnel_flow(struct flowi4 *fl4,
341                                     int proto,
342                                     __be32 daddr, __be32 saddr,
343                                     __be32 key, __u8 tos, int oif)
344 {
345         memset(fl4, 0, sizeof(*fl4));
346         fl4->flowi4_oif = oif;
347         fl4->daddr = daddr;
348         fl4->saddr = saddr;
349         fl4->flowi4_tos = tos;
350         fl4->flowi4_proto = proto;
351         fl4->fl4_gre_key = key;
352 }
353
354 static int ip_tunnel_bind_dev(struct net_device *dev)
355 {
356         struct net_device *tdev = NULL;
357         struct ip_tunnel *tunnel = netdev_priv(dev);
358         const struct iphdr *iph;
359         int hlen = LL_MAX_HEADER;
360         int mtu = ETH_DATA_LEN;
361         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
362
363         iph = &tunnel->parms.iph;
364
365         /* Guess output device to choose reasonable mtu and needed_headroom */
366         if (iph->daddr) {
367                 struct flowi4 fl4;
368                 struct rtable *rt;
369
370                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
371                                  iph->saddr, tunnel->parms.o_key,
372                                  RT_TOS(iph->tos), tunnel->parms.link);
373                 rt = ip_route_output_key(tunnel->net, &fl4);
374
375                 if (!IS_ERR(rt)) {
376                         tdev = rt->dst.dev;
377                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
378                         ip_rt_put(rt);
379                 }
380                 if (dev->type != ARPHRD_ETHER)
381                         dev->flags |= IFF_POINTOPOINT;
382         }
383
384         if (!tdev && tunnel->parms.link)
385                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
386
387         if (tdev) {
388                 hlen = tdev->hard_header_len + tdev->needed_headroom;
389                 mtu = tdev->mtu;
390         }
391         dev->iflink = tunnel->parms.link;
392
393         dev->needed_headroom = t_hlen + hlen;
394         mtu -= (dev->hard_header_len + t_hlen);
395
396         if (mtu < 68)
397                 mtu = 68;
398
399         return mtu;
400 }
401
402 static struct ip_tunnel *ip_tunnel_create(struct net *net,
403                                           struct ip_tunnel_net *itn,
404                                           struct ip_tunnel_parm *parms)
405 {
406         struct ip_tunnel *nt;
407         struct net_device *dev;
408
409         BUG_ON(!itn->fb_tunnel_dev);
410         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
411         if (IS_ERR(dev))
412                 return ERR_CAST(dev);
413
414         dev->mtu = ip_tunnel_bind_dev(dev);
415
416         nt = netdev_priv(dev);
417         ip_tunnel_add(itn, nt);
418         return nt;
419 }
420
421 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
423 {
424         struct pcpu_sw_netstats *tstats;
425         const struct iphdr *iph = ip_hdr(skb);
426         int err;
427
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429         if (ipv4_is_multicast(iph->daddr)) {
430                 tunnel->dev->stats.multicast++;
431                 skb->pkt_type = PACKET_BROADCAST;
432         }
433 #endif
434
435         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437                 tunnel->dev->stats.rx_crc_errors++;
438                 tunnel->dev->stats.rx_errors++;
439                 goto drop;
440         }
441
442         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443                 if (!(tpi->flags&TUNNEL_SEQ) ||
444                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445                         tunnel->dev->stats.rx_fifo_errors++;
446                         tunnel->dev->stats.rx_errors++;
447                         goto drop;
448                 }
449                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
450         }
451
452         skb_reset_network_header(skb);
453
454         err = IP_ECN_decapsulate(iph, skb);
455         if (unlikely(err)) {
456                 if (log_ecn_error)
457                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458                                         &iph->saddr, iph->tos);
459                 if (err > 1) {
460                         ++tunnel->dev->stats.rx_frame_errors;
461                         ++tunnel->dev->stats.rx_errors;
462                         goto drop;
463                 }
464         }
465
466         tstats = this_cpu_ptr(tunnel->dev->tstats);
467         u64_stats_update_begin(&tstats->syncp);
468         tstats->rx_packets++;
469         tstats->rx_bytes += skb->len;
470         u64_stats_update_end(&tstats->syncp);
471
472         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
473
474         if (tunnel->dev->type == ARPHRD_ETHER) {
475                 skb->protocol = eth_type_trans(skb, tunnel->dev);
476                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
477         } else {
478                 skb->dev = tunnel->dev;
479         }
480
481         gro_cells_receive(&tunnel->gro_cells, skb);
482         return 0;
483
484 drop:
485         kfree_skb(skb);
486         return 0;
487 }
488 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
489
490 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
491                             struct rtable *rt, __be16 df)
492 {
493         struct ip_tunnel *tunnel = netdev_priv(dev);
494         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
495         int mtu;
496
497         if (df)
498                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
499                                         - sizeof(struct iphdr) - tunnel->hlen;
500         else
501                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502
503         if (skb_dst(skb))
504                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
505
506         if (skb->protocol == htons(ETH_P_IP)) {
507                 if (!skb_is_gso(skb) &&
508                     (df & htons(IP_DF)) && mtu < pkt_size) {
509                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
510                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
511                         return -E2BIG;
512                 }
513         }
514 #if IS_ENABLED(CONFIG_IPV6)
515         else if (skb->protocol == htons(ETH_P_IPV6)) {
516                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
517
518                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
519                            mtu >= IPV6_MIN_MTU) {
520                         if ((tunnel->parms.iph.daddr &&
521                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
522                             rt6->rt6i_dst.plen == 128) {
523                                 rt6->rt6i_flags |= RTF_MODIFIED;
524                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
525                         }
526                 }
527
528                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
529                                         mtu < pkt_size) {
530                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
531                         return -E2BIG;
532                 }
533         }
534 #endif
535         return 0;
536 }
537
538 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
539                     const struct iphdr *tnl_params, const u8 protocol)
540 {
541         struct ip_tunnel *tunnel = netdev_priv(dev);
542         const struct iphdr *inner_iph;
543         struct flowi4 fl4;
544         u8     tos, ttl;
545         __be16 df;
546         struct rtable *rt;              /* Route to the other host */
547         unsigned int max_headroom;      /* The extra header space needed */
548         __be32 dst;
549         int err;
550         bool connected;
551
552         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
553         connected = (tunnel->parms.iph.daddr != 0);
554
555         dst = tnl_params->daddr;
556         if (dst == 0) {
557                 /* NBMA tunnel */
558
559                 if (skb_dst(skb) == NULL) {
560                         dev->stats.tx_fifo_errors++;
561                         goto tx_error;
562                 }
563
564                 if (skb->protocol == htons(ETH_P_IP)) {
565                         rt = skb_rtable(skb);
566                         dst = rt_nexthop(rt, inner_iph->daddr);
567                 }
568 #if IS_ENABLED(CONFIG_IPV6)
569                 else if (skb->protocol == htons(ETH_P_IPV6)) {
570                         const struct in6_addr *addr6;
571                         struct neighbour *neigh;
572                         bool do_tx_error_icmp;
573                         int addr_type;
574
575                         neigh = dst_neigh_lookup(skb_dst(skb),
576                                                  &ipv6_hdr(skb)->daddr);
577                         if (neigh == NULL)
578                                 goto tx_error;
579
580                         addr6 = (const struct in6_addr *)&neigh->primary_key;
581                         addr_type = ipv6_addr_type(addr6);
582
583                         if (addr_type == IPV6_ADDR_ANY) {
584                                 addr6 = &ipv6_hdr(skb)->daddr;
585                                 addr_type = ipv6_addr_type(addr6);
586                         }
587
588                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
589                                 do_tx_error_icmp = true;
590                         else {
591                                 do_tx_error_icmp = false;
592                                 dst = addr6->s6_addr32[3];
593                         }
594                         neigh_release(neigh);
595                         if (do_tx_error_icmp)
596                                 goto tx_error_icmp;
597                 }
598 #endif
599                 else
600                         goto tx_error;
601
602                 connected = false;
603         }
604
605         tos = tnl_params->tos;
606         if (tos & 0x1) {
607                 tos &= ~0x1;
608                 if (skb->protocol == htons(ETH_P_IP)) {
609                         tos = inner_iph->tos;
610                         connected = false;
611                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
612                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
613                         connected = false;
614                 }
615         }
616
617         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
618                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
619
620         rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
621
622         if (!rt) {
623                 rt = ip_route_output_key(tunnel->net, &fl4);
624
625                 if (IS_ERR(rt)) {
626                         dev->stats.tx_carrier_errors++;
627                         goto tx_error;
628                 }
629                 if (connected)
630                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
631         }
632
633         if (rt->dst.dev == dev) {
634                 ip_rt_put(rt);
635                 dev->stats.collisions++;
636                 goto tx_error;
637         }
638
639         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
640                 ip_rt_put(rt);
641                 goto tx_error;
642         }
643
644         if (tunnel->err_count > 0) {
645                 if (time_before(jiffies,
646                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
647                         tunnel->err_count--;
648
649                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
650                         dst_link_failure(skb);
651                 } else
652                         tunnel->err_count = 0;
653         }
654
655         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
656         ttl = tnl_params->ttl;
657         if (ttl == 0) {
658                 if (skb->protocol == htons(ETH_P_IP))
659                         ttl = inner_iph->ttl;
660 #if IS_ENABLED(CONFIG_IPV6)
661                 else if (skb->protocol == htons(ETH_P_IPV6))
662                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
663 #endif
664                 else
665                         ttl = ip4_dst_hoplimit(&rt->dst);
666         }
667
668         df = tnl_params->frag_off;
669         if (skb->protocol == htons(ETH_P_IP))
670                 df |= (inner_iph->frag_off&htons(IP_DF));
671
672         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
673                         + rt->dst.header_len;
674         if (max_headroom > dev->needed_headroom)
675                 dev->needed_headroom = max_headroom;
676
677         if (skb_cow_head(skb, dev->needed_headroom)) {
678                 ip_rt_put(rt);
679                 dev->stats.tx_dropped++;
680                 kfree_skb(skb);
681                 return;
682         }
683
684         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
685                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
686         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
687
688         return;
689
690 #if IS_ENABLED(CONFIG_IPV6)
691 tx_error_icmp:
692         dst_link_failure(skb);
693 #endif
694 tx_error:
695         dev->stats.tx_errors++;
696         kfree_skb(skb);
697 }
698 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
699
700 static void ip_tunnel_update(struct ip_tunnel_net *itn,
701                              struct ip_tunnel *t,
702                              struct net_device *dev,
703                              struct ip_tunnel_parm *p,
704                              bool set_mtu)
705 {
706         ip_tunnel_del(t);
707         t->parms.iph.saddr = p->iph.saddr;
708         t->parms.iph.daddr = p->iph.daddr;
709         t->parms.i_key = p->i_key;
710         t->parms.o_key = p->o_key;
711         if (dev->type != ARPHRD_ETHER) {
712                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
713                 memcpy(dev->broadcast, &p->iph.daddr, 4);
714         }
715         ip_tunnel_add(itn, t);
716
717         t->parms.iph.ttl = p->iph.ttl;
718         t->parms.iph.tos = p->iph.tos;
719         t->parms.iph.frag_off = p->iph.frag_off;
720
721         if (t->parms.link != p->link) {
722                 int mtu;
723
724                 t->parms.link = p->link;
725                 mtu = ip_tunnel_bind_dev(dev);
726                 if (set_mtu)
727                         dev->mtu = mtu;
728         }
729         ip_tunnel_dst_reset_all(t);
730         netdev_state_change(dev);
731 }
732
733 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
734 {
735         int err = 0;
736         struct ip_tunnel *t = netdev_priv(dev);
737         struct net *net = t->net;
738         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
739
740         BUG_ON(!itn->fb_tunnel_dev);
741         switch (cmd) {
742         case SIOCGETTUNNEL:
743                 if (dev == itn->fb_tunnel_dev) {
744                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
745                         if (t == NULL)
746                                 t = netdev_priv(dev);
747                 }
748                 memcpy(p, &t->parms, sizeof(*p));
749                 break;
750
751         case SIOCADDTUNNEL:
752         case SIOCCHGTUNNEL:
753                 err = -EPERM;
754                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
755                         goto done;
756                 if (p->iph.ttl)
757                         p->iph.frag_off |= htons(IP_DF);
758                 if (!(p->i_flags & VTI_ISVTI)) {
759                         if (!(p->i_flags & TUNNEL_KEY))
760                                 p->i_key = 0;
761                         if (!(p->o_flags & TUNNEL_KEY))
762                                 p->o_key = 0;
763                 }
764
765                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
766
767                 if (!t && (cmd == SIOCADDTUNNEL)) {
768                         t = ip_tunnel_create(net, itn, p);
769                         err = PTR_ERR_OR_ZERO(t);
770                         break;
771                 }
772                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
773                         if (t != NULL) {
774                                 if (t->dev != dev) {
775                                         err = -EEXIST;
776                                         break;
777                                 }
778                         } else {
779                                 unsigned int nflags = 0;
780
781                                 if (ipv4_is_multicast(p->iph.daddr))
782                                         nflags = IFF_BROADCAST;
783                                 else if (p->iph.daddr)
784                                         nflags = IFF_POINTOPOINT;
785
786                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
787                                         err = -EINVAL;
788                                         break;
789                                 }
790
791                                 t = netdev_priv(dev);
792                         }
793                 }
794
795                 if (t) {
796                         err = 0;
797                         ip_tunnel_update(itn, t, dev, p, true);
798                 } else {
799                         err = -ENOENT;
800                 }
801                 break;
802
803         case SIOCDELTUNNEL:
804                 err = -EPERM;
805                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
806                         goto done;
807
808                 if (dev == itn->fb_tunnel_dev) {
809                         err = -ENOENT;
810                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
811                         if (t == NULL)
812                                 goto done;
813                         err = -EPERM;
814                         if (t == netdev_priv(itn->fb_tunnel_dev))
815                                 goto done;
816                         dev = t->dev;
817                 }
818                 unregister_netdevice(dev);
819                 err = 0;
820                 break;
821
822         default:
823                 err = -EINVAL;
824         }
825
826 done:
827         return err;
828 }
829 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
830
831 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
832 {
833         struct ip_tunnel *tunnel = netdev_priv(dev);
834         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
835
836         if (new_mtu < 68 ||
837             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
838                 return -EINVAL;
839         dev->mtu = new_mtu;
840         return 0;
841 }
842 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
843
844 static void ip_tunnel_dev_free(struct net_device *dev)
845 {
846         struct ip_tunnel *tunnel = netdev_priv(dev);
847
848         gro_cells_destroy(&tunnel->gro_cells);
849         free_percpu(tunnel->dst_cache);
850         free_percpu(dev->tstats);
851         free_netdev(dev);
852 }
853
854 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
855 {
856         struct ip_tunnel *tunnel = netdev_priv(dev);
857         struct ip_tunnel_net *itn;
858
859         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
860
861         if (itn->fb_tunnel_dev != dev) {
862                 ip_tunnel_del(netdev_priv(dev));
863                 unregister_netdevice_queue(dev, head);
864         }
865 }
866 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
867
868 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
869                                   struct rtnl_link_ops *ops, char *devname)
870 {
871         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
872         struct ip_tunnel_parm parms;
873         unsigned int i;
874
875         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
876                 INIT_HLIST_HEAD(&itn->tunnels[i]);
877
878         if (!ops) {
879                 itn->fb_tunnel_dev = NULL;
880                 return 0;
881         }
882
883         memset(&parms, 0, sizeof(parms));
884         if (devname)
885                 strlcpy(parms.name, devname, IFNAMSIZ);
886
887         rtnl_lock();
888         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
889         /* FB netdevice is special: we have one, and only one per netns.
890          * Allowing to move it to another netns is clearly unsafe.
891          */
892         if (!IS_ERR(itn->fb_tunnel_dev)) {
893                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
894                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
895                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
896         }
897         rtnl_unlock();
898
899         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
900 }
901 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
902
903 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
904                               struct rtnl_link_ops *ops)
905 {
906         struct net *net = dev_net(itn->fb_tunnel_dev);
907         struct net_device *dev, *aux;
908         int h;
909
910         for_each_netdev_safe(net, dev, aux)
911                 if (dev->rtnl_link_ops == ops)
912                         unregister_netdevice_queue(dev, head);
913
914         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
915                 struct ip_tunnel *t;
916                 struct hlist_node *n;
917                 struct hlist_head *thead = &itn->tunnels[h];
918
919                 hlist_for_each_entry_safe(t, n, thead, hash_node)
920                         /* If dev is in the same netns, it has already
921                          * been added to the list by the previous loop.
922                          */
923                         if (!net_eq(dev_net(t->dev), net))
924                                 unregister_netdevice_queue(t->dev, head);
925         }
926 }
927
928 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
929 {
930         LIST_HEAD(list);
931
932         rtnl_lock();
933         ip_tunnel_destroy(itn, &list, ops);
934         unregister_netdevice_many(&list);
935         rtnl_unlock();
936 }
937 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
938
939 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
940                       struct ip_tunnel_parm *p)
941 {
942         struct ip_tunnel *nt;
943         struct net *net = dev_net(dev);
944         struct ip_tunnel_net *itn;
945         int mtu;
946         int err;
947
948         nt = netdev_priv(dev);
949         itn = net_generic(net, nt->ip_tnl_net_id);
950
951         if (ip_tunnel_find(itn, p, dev->type))
952                 return -EEXIST;
953
954         nt->net = net;
955         nt->parms = *p;
956         err = register_netdevice(dev);
957         if (err)
958                 goto out;
959
960         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
961                 eth_hw_addr_random(dev);
962
963         mtu = ip_tunnel_bind_dev(dev);
964         if (!tb[IFLA_MTU])
965                 dev->mtu = mtu;
966
967         ip_tunnel_add(itn, nt);
968
969 out:
970         return err;
971 }
972 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
973
974 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
975                          struct ip_tunnel_parm *p)
976 {
977         struct ip_tunnel *t;
978         struct ip_tunnel *tunnel = netdev_priv(dev);
979         struct net *net = tunnel->net;
980         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
981
982         if (dev == itn->fb_tunnel_dev)
983                 return -EINVAL;
984
985         t = ip_tunnel_find(itn, p, dev->type);
986
987         if (t) {
988                 if (t->dev != dev)
989                         return -EEXIST;
990         } else {
991                 t = tunnel;
992
993                 if (dev->type != ARPHRD_ETHER) {
994                         unsigned int nflags = 0;
995
996                         if (ipv4_is_multicast(p->iph.daddr))
997                                 nflags = IFF_BROADCAST;
998                         else if (p->iph.daddr)
999                                 nflags = IFF_POINTOPOINT;
1000
1001                         if ((dev->flags ^ nflags) &
1002                             (IFF_POINTOPOINT | IFF_BROADCAST))
1003                                 return -EINVAL;
1004                 }
1005         }
1006
1007         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1008         return 0;
1009 }
1010 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1011
1012 int ip_tunnel_init(struct net_device *dev)
1013 {
1014         struct ip_tunnel *tunnel = netdev_priv(dev);
1015         struct iphdr *iph = &tunnel->parms.iph;
1016         int err;
1017
1018         dev->destructor = ip_tunnel_dev_free;
1019         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1020         if (!dev->tstats)
1021                 return -ENOMEM;
1022
1023         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1024         if (!tunnel->dst_cache) {
1025                 free_percpu(dev->tstats);
1026                 return -ENOMEM;
1027         }
1028
1029         err = gro_cells_init(&tunnel->gro_cells, dev);
1030         if (err) {
1031                 free_percpu(tunnel->dst_cache);
1032                 free_percpu(dev->tstats);
1033                 return err;
1034         }
1035
1036         tunnel->dev = dev;
1037         tunnel->net = dev_net(dev);
1038         strcpy(tunnel->parms.name, dev->name);
1039         iph->version            = 4;
1040         iph->ihl                = 5;
1041
1042         return 0;
1043 }
1044 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1045
1046 void ip_tunnel_uninit(struct net_device *dev)
1047 {
1048         struct ip_tunnel *tunnel = netdev_priv(dev);
1049         struct net *net = tunnel->net;
1050         struct ip_tunnel_net *itn;
1051
1052         itn = net_generic(net, tunnel->ip_tnl_net_id);
1053         /* fb_tunnel_dev will be unregisted in net-exit call. */
1054         if (itn->fb_tunnel_dev != dev)
1055                 ip_tunnel_del(netdev_priv(dev));
1056
1057         ip_tunnel_dst_reset_all(tunnel);
1058 }
1059 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1060
1061 /* Do least required initialization, rest of init is done in tunnel_init call */
1062 void ip_tunnel_setup(struct net_device *dev, int net_id)
1063 {
1064         struct ip_tunnel *tunnel = netdev_priv(dev);
1065         tunnel->ip_tnl_net_id = net_id;
1066 }
1067 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1068
1069 MODULE_LICENSE("GPL");