Merge tag 'trace-fixes-v3.17-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git...
[pandora-kernel.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43 #include <linux/err.h>
44
45 #include <net/sock.h>
46 #include <net/ip.h>
47 #include <net/icmp.h>
48 #include <net/protocol.h>
49 #include <net/ip_tunnels.h>
50 #include <net/arp.h>
51 #include <net/checksum.h>
52 #include <net/dsfield.h>
53 #include <net/inet_ecn.h>
54 #include <net/xfrm.h>
55 #include <net/net_namespace.h>
56 #include <net/netns/generic.h>
57 #include <net/rtnetlink.h>
58
59 #if IS_ENABLED(CONFIG_IPV6)
60 #include <net/ipv6.h>
61 #include <net/ip6_fib.h>
62 #include <net/ip6_route.h>
63 #endif
64
65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72                              struct dst_entry *dst, __be32 saddr)
73 {
74         struct dst_entry *old_dst;
75
76         dst_clone(dst);
77         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
78         dst_release(old_dst);
79         idst->saddr = saddr;
80 }
81
82 static noinline void tunnel_dst_set(struct ip_tunnel *t,
83                            struct dst_entry *dst, __be32 saddr)
84 {
85         __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
86 }
87
88 static void tunnel_dst_reset(struct ip_tunnel *t)
89 {
90         tunnel_dst_set(t, NULL, 0);
91 }
92
93 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
94 {
95         int i;
96
97         for_each_possible_cpu(i)
98                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
99 }
100 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
101
102 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
103                                         u32 cookie, __be32 *saddr)
104 {
105         struct ip_tunnel_dst *idst;
106         struct dst_entry *dst;
107
108         rcu_read_lock();
109         idst = raw_cpu_ptr(t->dst_cache);
110         dst = rcu_dereference(idst->dst);
111         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
112                 dst = NULL;
113         if (dst) {
114                 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
115                         *saddr = idst->saddr;
116                 } else {
117                         tunnel_dst_reset(t);
118                         dst_release(dst);
119                         dst = NULL;
120                 }
121         }
122         rcu_read_unlock();
123         return (struct rtable *)dst;
124 }
125
126 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
127                                 __be16 flags, __be32 key)
128 {
129         if (p->i_flags & TUNNEL_KEY) {
130                 if (flags & TUNNEL_KEY)
131                         return key == p->i_key;
132                 else
133                         /* key expected, none present */
134                         return false;
135         } else
136                 return !(flags & TUNNEL_KEY);
137 }
138
139 /* Fallback tunnel: no source, no destination, no key, no options
140
141    Tunnel hash table:
142    We require exact key match i.e. if a key is present in packet
143    it will match only tunnel with the same key; if it is not present,
144    it will match only keyless tunnel.
145
146    All keysless packets, if not matched configured keyless tunnels
147    will match fallback tunnel.
148    Given src, dst and key, find appropriate for input tunnel.
149 */
150 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
151                                    int link, __be16 flags,
152                                    __be32 remote, __be32 local,
153                                    __be32 key)
154 {
155         unsigned int hash;
156         struct ip_tunnel *t, *cand = NULL;
157         struct hlist_head *head;
158
159         hash = ip_tunnel_hash(key, remote);
160         head = &itn->tunnels[hash];
161
162         hlist_for_each_entry_rcu(t, head, hash_node) {
163                 if (local != t->parms.iph.saddr ||
164                     remote != t->parms.iph.daddr ||
165                     !(t->dev->flags & IFF_UP))
166                         continue;
167
168                 if (!ip_tunnel_key_match(&t->parms, flags, key))
169                         continue;
170
171                 if (t->parms.link == link)
172                         return t;
173                 else
174                         cand = t;
175         }
176
177         hlist_for_each_entry_rcu(t, head, hash_node) {
178                 if (remote != t->parms.iph.daddr ||
179                     t->parms.iph.saddr != 0 ||
180                     !(t->dev->flags & IFF_UP))
181                         continue;
182
183                 if (!ip_tunnel_key_match(&t->parms, flags, key))
184                         continue;
185
186                 if (t->parms.link == link)
187                         return t;
188                 else if (!cand)
189                         cand = t;
190         }
191
192         hash = ip_tunnel_hash(key, 0);
193         head = &itn->tunnels[hash];
194
195         hlist_for_each_entry_rcu(t, head, hash_node) {
196                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
197                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
198                         continue;
199
200                 if (!(t->dev->flags & IFF_UP))
201                         continue;
202
203                 if (!ip_tunnel_key_match(&t->parms, flags, key))
204                         continue;
205
206                 if (t->parms.link == link)
207                         return t;
208                 else if (!cand)
209                         cand = t;
210         }
211
212         if (flags & TUNNEL_NO_KEY)
213                 goto skip_key_lookup;
214
215         hlist_for_each_entry_rcu(t, head, hash_node) {
216                 if (t->parms.i_key != key ||
217                     t->parms.iph.saddr != 0 ||
218                     t->parms.iph.daddr != 0 ||
219                     !(t->dev->flags & IFF_UP))
220                         continue;
221
222                 if (t->parms.link == link)
223                         return t;
224                 else if (!cand)
225                         cand = t;
226         }
227
228 skip_key_lookup:
229         if (cand)
230                 return cand;
231
232         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
233                 return netdev_priv(itn->fb_tunnel_dev);
234
235
236         return NULL;
237 }
238 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
239
240 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
241                                     struct ip_tunnel_parm *parms)
242 {
243         unsigned int h;
244         __be32 remote;
245         __be32 i_key = parms->i_key;
246
247         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
248                 remote = parms->iph.daddr;
249         else
250                 remote = 0;
251
252         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
253                 i_key = 0;
254
255         h = ip_tunnel_hash(i_key, remote);
256         return &itn->tunnels[h];
257 }
258
259 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
260 {
261         struct hlist_head *head = ip_bucket(itn, &t->parms);
262
263         hlist_add_head_rcu(&t->hash_node, head);
264 }
265
266 static void ip_tunnel_del(struct ip_tunnel *t)
267 {
268         hlist_del_init_rcu(&t->hash_node);
269 }
270
271 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
272                                         struct ip_tunnel_parm *parms,
273                                         int type)
274 {
275         __be32 remote = parms->iph.daddr;
276         __be32 local = parms->iph.saddr;
277         __be32 key = parms->i_key;
278         __be16 flags = parms->i_flags;
279         int link = parms->link;
280         struct ip_tunnel *t = NULL;
281         struct hlist_head *head = ip_bucket(itn, parms);
282
283         hlist_for_each_entry_rcu(t, head, hash_node) {
284                 if (local == t->parms.iph.saddr &&
285                     remote == t->parms.iph.daddr &&
286                     link == t->parms.link &&
287                     type == t->dev->type &&
288                     ip_tunnel_key_match(&t->parms, flags, key))
289                         break;
290         }
291         return t;
292 }
293
294 static struct net_device *__ip_tunnel_create(struct net *net,
295                                              const struct rtnl_link_ops *ops,
296                                              struct ip_tunnel_parm *parms)
297 {
298         int err;
299         struct ip_tunnel *tunnel;
300         struct net_device *dev;
301         char name[IFNAMSIZ];
302
303         if (parms->name[0])
304                 strlcpy(name, parms->name, IFNAMSIZ);
305         else {
306                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
307                         err = -E2BIG;
308                         goto failed;
309                 }
310                 strlcpy(name, ops->kind, IFNAMSIZ);
311                 strncat(name, "%d", 2);
312         }
313
314         ASSERT_RTNL();
315         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
316         if (!dev) {
317                 err = -ENOMEM;
318                 goto failed;
319         }
320         dev_net_set(dev, net);
321
322         dev->rtnl_link_ops = ops;
323
324         tunnel = netdev_priv(dev);
325         tunnel->parms = *parms;
326         tunnel->net = net;
327
328         err = register_netdevice(dev);
329         if (err)
330                 goto failed_free;
331
332         return dev;
333
334 failed_free:
335         free_netdev(dev);
336 failed:
337         return ERR_PTR(err);
338 }
339
340 static inline void init_tunnel_flow(struct flowi4 *fl4,
341                                     int proto,
342                                     __be32 daddr, __be32 saddr,
343                                     __be32 key, __u8 tos, int oif)
344 {
345         memset(fl4, 0, sizeof(*fl4));
346         fl4->flowi4_oif = oif;
347         fl4->daddr = daddr;
348         fl4->saddr = saddr;
349         fl4->flowi4_tos = tos;
350         fl4->flowi4_proto = proto;
351         fl4->fl4_gre_key = key;
352 }
353
354 static int ip_tunnel_bind_dev(struct net_device *dev)
355 {
356         struct net_device *tdev = NULL;
357         struct ip_tunnel *tunnel = netdev_priv(dev);
358         const struct iphdr *iph;
359         int hlen = LL_MAX_HEADER;
360         int mtu = ETH_DATA_LEN;
361         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
362
363         iph = &tunnel->parms.iph;
364
365         /* Guess output device to choose reasonable mtu and needed_headroom */
366         if (iph->daddr) {
367                 struct flowi4 fl4;
368                 struct rtable *rt;
369
370                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
371                                  iph->saddr, tunnel->parms.o_key,
372                                  RT_TOS(iph->tos), tunnel->parms.link);
373                 rt = ip_route_output_key(tunnel->net, &fl4);
374
375                 if (!IS_ERR(rt)) {
376                         tdev = rt->dst.dev;
377                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
378                         ip_rt_put(rt);
379                 }
380                 if (dev->type != ARPHRD_ETHER)
381                         dev->flags |= IFF_POINTOPOINT;
382         }
383
384         if (!tdev && tunnel->parms.link)
385                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
386
387         if (tdev) {
388                 hlen = tdev->hard_header_len + tdev->needed_headroom;
389                 mtu = tdev->mtu;
390         }
391         dev->iflink = tunnel->parms.link;
392
393         dev->needed_headroom = t_hlen + hlen;
394         mtu -= (dev->hard_header_len + t_hlen);
395
396         if (mtu < 68)
397                 mtu = 68;
398
399         return mtu;
400 }
401
402 static struct ip_tunnel *ip_tunnel_create(struct net *net,
403                                           struct ip_tunnel_net *itn,
404                                           struct ip_tunnel_parm *parms)
405 {
406         struct ip_tunnel *nt;
407         struct net_device *dev;
408
409         BUG_ON(!itn->fb_tunnel_dev);
410         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
411         if (IS_ERR(dev))
412                 return ERR_CAST(dev);
413
414         dev->mtu = ip_tunnel_bind_dev(dev);
415
416         nt = netdev_priv(dev);
417         ip_tunnel_add(itn, nt);
418         return nt;
419 }
420
421 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
423 {
424         struct pcpu_sw_netstats *tstats;
425         const struct iphdr *iph = ip_hdr(skb);
426         int err;
427
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429         if (ipv4_is_multicast(iph->daddr)) {
430                 tunnel->dev->stats.multicast++;
431                 skb->pkt_type = PACKET_BROADCAST;
432         }
433 #endif
434
435         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437                 tunnel->dev->stats.rx_crc_errors++;
438                 tunnel->dev->stats.rx_errors++;
439                 goto drop;
440         }
441
442         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443                 if (!(tpi->flags&TUNNEL_SEQ) ||
444                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445                         tunnel->dev->stats.rx_fifo_errors++;
446                         tunnel->dev->stats.rx_errors++;
447                         goto drop;
448                 }
449                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
450         }
451
452         skb_reset_network_header(skb);
453
454         err = IP_ECN_decapsulate(iph, skb);
455         if (unlikely(err)) {
456                 if (log_ecn_error)
457                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458                                         &iph->saddr, iph->tos);
459                 if (err > 1) {
460                         ++tunnel->dev->stats.rx_frame_errors;
461                         ++tunnel->dev->stats.rx_errors;
462                         goto drop;
463                 }
464         }
465
466         tstats = this_cpu_ptr(tunnel->dev->tstats);
467         u64_stats_update_begin(&tstats->syncp);
468         tstats->rx_packets++;
469         tstats->rx_bytes += skb->len;
470         u64_stats_update_end(&tstats->syncp);
471
472         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
473
474         if (tunnel->dev->type == ARPHRD_ETHER) {
475                 skb->protocol = eth_type_trans(skb, tunnel->dev);
476                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
477         } else {
478                 skb->dev = tunnel->dev;
479         }
480
481         gro_cells_receive(&tunnel->gro_cells, skb);
482         return 0;
483
484 drop:
485         kfree_skb(skb);
486         return 0;
487 }
488 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
489
490 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
491                             struct rtable *rt, __be16 df)
492 {
493         struct ip_tunnel *tunnel = netdev_priv(dev);
494         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
495         int mtu;
496
497         if (df)
498                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
499                                         - sizeof(struct iphdr) - tunnel->hlen;
500         else
501                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
502
503         if (skb_dst(skb))
504                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
505
506         if (skb->protocol == htons(ETH_P_IP)) {
507                 if (!skb_is_gso(skb) &&
508                     (df & htons(IP_DF)) && mtu < pkt_size) {
509                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
510                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
511                         return -E2BIG;
512                 }
513         }
514 #if IS_ENABLED(CONFIG_IPV6)
515         else if (skb->protocol == htons(ETH_P_IPV6)) {
516                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
517
518                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
519                            mtu >= IPV6_MIN_MTU) {
520                         if ((tunnel->parms.iph.daddr &&
521                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
522                             rt6->rt6i_dst.plen == 128) {
523                                 rt6->rt6i_flags |= RTF_MODIFIED;
524                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
525                         }
526                 }
527
528                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
529                                         mtu < pkt_size) {
530                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
531                         return -E2BIG;
532                 }
533         }
534 #endif
535         return 0;
536 }
537
538 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
539                     const struct iphdr *tnl_params, const u8 protocol)
540 {
541         struct ip_tunnel *tunnel = netdev_priv(dev);
542         const struct iphdr *inner_iph;
543         struct flowi4 fl4;
544         u8     tos, ttl;
545         __be16 df;
546         struct rtable *rt;              /* Route to the other host */
547         unsigned int max_headroom;      /* The extra header space needed */
548         __be32 dst;
549         int err;
550         bool connected;
551
552         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
553         connected = (tunnel->parms.iph.daddr != 0);
554
555         dst = tnl_params->daddr;
556         if (dst == 0) {
557                 /* NBMA tunnel */
558
559                 if (skb_dst(skb) == NULL) {
560                         dev->stats.tx_fifo_errors++;
561                         goto tx_error;
562                 }
563
564                 if (skb->protocol == htons(ETH_P_IP)) {
565                         rt = skb_rtable(skb);
566                         dst = rt_nexthop(rt, inner_iph->daddr);
567                 }
568 #if IS_ENABLED(CONFIG_IPV6)
569                 else if (skb->protocol == htons(ETH_P_IPV6)) {
570                         const struct in6_addr *addr6;
571                         struct neighbour *neigh;
572                         bool do_tx_error_icmp;
573                         int addr_type;
574
575                         neigh = dst_neigh_lookup(skb_dst(skb),
576                                                  &ipv6_hdr(skb)->daddr);
577                         if (neigh == NULL)
578                                 goto tx_error;
579
580                         addr6 = (const struct in6_addr *)&neigh->primary_key;
581                         addr_type = ipv6_addr_type(addr6);
582
583                         if (addr_type == IPV6_ADDR_ANY) {
584                                 addr6 = &ipv6_hdr(skb)->daddr;
585                                 addr_type = ipv6_addr_type(addr6);
586                         }
587
588                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
589                                 do_tx_error_icmp = true;
590                         else {
591                                 do_tx_error_icmp = false;
592                                 dst = addr6->s6_addr32[3];
593                         }
594                         neigh_release(neigh);
595                         if (do_tx_error_icmp)
596                                 goto tx_error_icmp;
597                 }
598 #endif
599                 else
600                         goto tx_error;
601
602                 connected = false;
603         }
604
605         tos = tnl_params->tos;
606         if (tos & 0x1) {
607                 tos &= ~0x1;
608                 if (skb->protocol == htons(ETH_P_IP)) {
609                         tos = inner_iph->tos;
610                         connected = false;
611                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
612                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
613                         connected = false;
614                 }
615         }
616
617         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
618                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
619
620         rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
621
622         if (!rt) {
623                 rt = ip_route_output_key(tunnel->net, &fl4);
624
625                 if (IS_ERR(rt)) {
626                         dev->stats.tx_carrier_errors++;
627                         goto tx_error;
628                 }
629                 if (connected)
630                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
631         }
632
633         if (rt->dst.dev == dev) {
634                 ip_rt_put(rt);
635                 dev->stats.collisions++;
636                 goto tx_error;
637         }
638
639         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
640                 ip_rt_put(rt);
641                 goto tx_error;
642         }
643
644         if (tunnel->err_count > 0) {
645                 if (time_before(jiffies,
646                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
647                         tunnel->err_count--;
648
649                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
650                         dst_link_failure(skb);
651                 } else
652                         tunnel->err_count = 0;
653         }
654
655         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
656         ttl = tnl_params->ttl;
657         if (ttl == 0) {
658                 if (skb->protocol == htons(ETH_P_IP))
659                         ttl = inner_iph->ttl;
660 #if IS_ENABLED(CONFIG_IPV6)
661                 else if (skb->protocol == htons(ETH_P_IPV6))
662                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
663 #endif
664                 else
665                         ttl = ip4_dst_hoplimit(&rt->dst);
666         }
667
668         df = tnl_params->frag_off;
669         if (skb->protocol == htons(ETH_P_IP))
670                 df |= (inner_iph->frag_off&htons(IP_DF));
671
672         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
673                         + rt->dst.header_len;
674         if (max_headroom > dev->needed_headroom)
675                 dev->needed_headroom = max_headroom;
676
677         if (skb_cow_head(skb, dev->needed_headroom)) {
678                 ip_rt_put(rt);
679                 dev->stats.tx_dropped++;
680                 kfree_skb(skb);
681                 return;
682         }
683
684         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
685                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
686         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
687
688         return;
689
690 #if IS_ENABLED(CONFIG_IPV6)
691 tx_error_icmp:
692         dst_link_failure(skb);
693 #endif
694 tx_error:
695         dev->stats.tx_errors++;
696         kfree_skb(skb);
697 }
698 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
699
700 static void ip_tunnel_update(struct ip_tunnel_net *itn,
701                              struct ip_tunnel *t,
702                              struct net_device *dev,
703                              struct ip_tunnel_parm *p,
704                              bool set_mtu)
705 {
706         ip_tunnel_del(t);
707         t->parms.iph.saddr = p->iph.saddr;
708         t->parms.iph.daddr = p->iph.daddr;
709         t->parms.i_key = p->i_key;
710         t->parms.o_key = p->o_key;
711         if (dev->type != ARPHRD_ETHER) {
712                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
713                 memcpy(dev->broadcast, &p->iph.daddr, 4);
714         }
715         ip_tunnel_add(itn, t);
716
717         t->parms.iph.ttl = p->iph.ttl;
718         t->parms.iph.tos = p->iph.tos;
719         t->parms.iph.frag_off = p->iph.frag_off;
720
721         if (t->parms.link != p->link) {
722                 int mtu;
723
724                 t->parms.link = p->link;
725                 mtu = ip_tunnel_bind_dev(dev);
726                 if (set_mtu)
727                         dev->mtu = mtu;
728         }
729         ip_tunnel_dst_reset_all(t);
730         netdev_state_change(dev);
731 }
732
733 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
734 {
735         int err = 0;
736         struct ip_tunnel *t = netdev_priv(dev);
737         struct net *net = t->net;
738         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
739
740         BUG_ON(!itn->fb_tunnel_dev);
741         switch (cmd) {
742         case SIOCGETTUNNEL:
743                 if (dev == itn->fb_tunnel_dev) {
744                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
745                         if (t == NULL)
746                                 t = netdev_priv(dev);
747                 }
748                 memcpy(p, &t->parms, sizeof(*p));
749                 break;
750
751         case SIOCADDTUNNEL:
752         case SIOCCHGTUNNEL:
753                 err = -EPERM;
754                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
755                         goto done;
756                 if (p->iph.ttl)
757                         p->iph.frag_off |= htons(IP_DF);
758                 if (!(p->i_flags & VTI_ISVTI)) {
759                         if (!(p->i_flags & TUNNEL_KEY))
760                                 p->i_key = 0;
761                         if (!(p->o_flags & TUNNEL_KEY))
762                                 p->o_key = 0;
763                 }
764
765                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
766
767                 if (cmd == SIOCADDTUNNEL) {
768                         if (!t) {
769                                 t = ip_tunnel_create(net, itn, p);
770                                 err = PTR_ERR_OR_ZERO(t);
771                                 break;
772                         }
773
774                         err = -EEXIST;
775                         break;
776                 }
777                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
778                         if (t != NULL) {
779                                 if (t->dev != dev) {
780                                         err = -EEXIST;
781                                         break;
782                                 }
783                         } else {
784                                 unsigned int nflags = 0;
785
786                                 if (ipv4_is_multicast(p->iph.daddr))
787                                         nflags = IFF_BROADCAST;
788                                 else if (p->iph.daddr)
789                                         nflags = IFF_POINTOPOINT;
790
791                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
792                                         err = -EINVAL;
793                                         break;
794                                 }
795
796                                 t = netdev_priv(dev);
797                         }
798                 }
799
800                 if (t) {
801                         err = 0;
802                         ip_tunnel_update(itn, t, dev, p, true);
803                 } else {
804                         err = -ENOENT;
805                 }
806                 break;
807
808         case SIOCDELTUNNEL:
809                 err = -EPERM;
810                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
811                         goto done;
812
813                 if (dev == itn->fb_tunnel_dev) {
814                         err = -ENOENT;
815                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
816                         if (t == NULL)
817                                 goto done;
818                         err = -EPERM;
819                         if (t == netdev_priv(itn->fb_tunnel_dev))
820                                 goto done;
821                         dev = t->dev;
822                 }
823                 unregister_netdevice(dev);
824                 err = 0;
825                 break;
826
827         default:
828                 err = -EINVAL;
829         }
830
831 done:
832         return err;
833 }
834 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
835
836 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
837 {
838         struct ip_tunnel *tunnel = netdev_priv(dev);
839         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
840
841         if (new_mtu < 68 ||
842             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
843                 return -EINVAL;
844         dev->mtu = new_mtu;
845         return 0;
846 }
847 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
848
849 static void ip_tunnel_dev_free(struct net_device *dev)
850 {
851         struct ip_tunnel *tunnel = netdev_priv(dev);
852
853         gro_cells_destroy(&tunnel->gro_cells);
854         free_percpu(tunnel->dst_cache);
855         free_percpu(dev->tstats);
856         free_netdev(dev);
857 }
858
859 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
860 {
861         struct ip_tunnel *tunnel = netdev_priv(dev);
862         struct ip_tunnel_net *itn;
863
864         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
865
866         if (itn->fb_tunnel_dev != dev) {
867                 ip_tunnel_del(netdev_priv(dev));
868                 unregister_netdevice_queue(dev, head);
869         }
870 }
871 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
872
873 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
874                                   struct rtnl_link_ops *ops, char *devname)
875 {
876         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
877         struct ip_tunnel_parm parms;
878         unsigned int i;
879
880         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
881                 INIT_HLIST_HEAD(&itn->tunnels[i]);
882
883         if (!ops) {
884                 itn->fb_tunnel_dev = NULL;
885                 return 0;
886         }
887
888         memset(&parms, 0, sizeof(parms));
889         if (devname)
890                 strlcpy(parms.name, devname, IFNAMSIZ);
891
892         rtnl_lock();
893         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
894         /* FB netdevice is special: we have one, and only one per netns.
895          * Allowing to move it to another netns is clearly unsafe.
896          */
897         if (!IS_ERR(itn->fb_tunnel_dev)) {
898                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
899                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
900                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
901         }
902         rtnl_unlock();
903
904         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
905 }
906 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
907
908 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
909                               struct rtnl_link_ops *ops)
910 {
911         struct net *net = dev_net(itn->fb_tunnel_dev);
912         struct net_device *dev, *aux;
913         int h;
914
915         for_each_netdev_safe(net, dev, aux)
916                 if (dev->rtnl_link_ops == ops)
917                         unregister_netdevice_queue(dev, head);
918
919         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
920                 struct ip_tunnel *t;
921                 struct hlist_node *n;
922                 struct hlist_head *thead = &itn->tunnels[h];
923
924                 hlist_for_each_entry_safe(t, n, thead, hash_node)
925                         /* If dev is in the same netns, it has already
926                          * been added to the list by the previous loop.
927                          */
928                         if (!net_eq(dev_net(t->dev), net))
929                                 unregister_netdevice_queue(t->dev, head);
930         }
931 }
932
933 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
934 {
935         LIST_HEAD(list);
936
937         rtnl_lock();
938         ip_tunnel_destroy(itn, &list, ops);
939         unregister_netdevice_many(&list);
940         rtnl_unlock();
941 }
942 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
943
944 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
945                       struct ip_tunnel_parm *p)
946 {
947         struct ip_tunnel *nt;
948         struct net *net = dev_net(dev);
949         struct ip_tunnel_net *itn;
950         int mtu;
951         int err;
952
953         nt = netdev_priv(dev);
954         itn = net_generic(net, nt->ip_tnl_net_id);
955
956         if (ip_tunnel_find(itn, p, dev->type))
957                 return -EEXIST;
958
959         nt->net = net;
960         nt->parms = *p;
961         err = register_netdevice(dev);
962         if (err)
963                 goto out;
964
965         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
966                 eth_hw_addr_random(dev);
967
968         mtu = ip_tunnel_bind_dev(dev);
969         if (!tb[IFLA_MTU])
970                 dev->mtu = mtu;
971
972         ip_tunnel_add(itn, nt);
973
974 out:
975         return err;
976 }
977 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
978
979 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
980                          struct ip_tunnel_parm *p)
981 {
982         struct ip_tunnel *t;
983         struct ip_tunnel *tunnel = netdev_priv(dev);
984         struct net *net = tunnel->net;
985         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
986
987         if (dev == itn->fb_tunnel_dev)
988                 return -EINVAL;
989
990         t = ip_tunnel_find(itn, p, dev->type);
991
992         if (t) {
993                 if (t->dev != dev)
994                         return -EEXIST;
995         } else {
996                 t = tunnel;
997
998                 if (dev->type != ARPHRD_ETHER) {
999                         unsigned int nflags = 0;
1000
1001                         if (ipv4_is_multicast(p->iph.daddr))
1002                                 nflags = IFF_BROADCAST;
1003                         else if (p->iph.daddr)
1004                                 nflags = IFF_POINTOPOINT;
1005
1006                         if ((dev->flags ^ nflags) &
1007                             (IFF_POINTOPOINT | IFF_BROADCAST))
1008                                 return -EINVAL;
1009                 }
1010         }
1011
1012         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1013         return 0;
1014 }
1015 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1016
1017 int ip_tunnel_init(struct net_device *dev)
1018 {
1019         struct ip_tunnel *tunnel = netdev_priv(dev);
1020         struct iphdr *iph = &tunnel->parms.iph;
1021         int err;
1022
1023         dev->destructor = ip_tunnel_dev_free;
1024         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1025         if (!dev->tstats)
1026                 return -ENOMEM;
1027
1028         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1029         if (!tunnel->dst_cache) {
1030                 free_percpu(dev->tstats);
1031                 return -ENOMEM;
1032         }
1033
1034         err = gro_cells_init(&tunnel->gro_cells, dev);
1035         if (err) {
1036                 free_percpu(tunnel->dst_cache);
1037                 free_percpu(dev->tstats);
1038                 return err;
1039         }
1040
1041         tunnel->dev = dev;
1042         tunnel->net = dev_net(dev);
1043         strcpy(tunnel->parms.name, dev->name);
1044         iph->version            = 4;
1045         iph->ihl                = 5;
1046
1047         return 0;
1048 }
1049 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1050
1051 void ip_tunnel_uninit(struct net_device *dev)
1052 {
1053         struct ip_tunnel *tunnel = netdev_priv(dev);
1054         struct net *net = tunnel->net;
1055         struct ip_tunnel_net *itn;
1056
1057         itn = net_generic(net, tunnel->ip_tnl_net_id);
1058         /* fb_tunnel_dev will be unregisted in net-exit call. */
1059         if (itn->fb_tunnel_dev != dev)
1060                 ip_tunnel_del(netdev_priv(dev));
1061
1062         ip_tunnel_dst_reset_all(tunnel);
1063 }
1064 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1065
1066 /* Do least required initialization, rest of init is done in tunnel_init call */
1067 void ip_tunnel_setup(struct net_device *dev, int net_id)
1068 {
1069         struct ip_tunnel *tunnel = netdev_priv(dev);
1070         tunnel->ip_tnl_net_id = net_id;
1071 }
1072 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1073
1074 MODULE_LICENSE("GPL");