Merge tag 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jgarzik...
[pandora-kernel.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static bool log_ecn_error = true;
124 module_param(log_ecn_error, bool, 0644);
125 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
126
127 static int ipip_net_id __read_mostly;
128 struct ipip_net {
129         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
130         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
131         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
132         struct ip_tunnel __rcu *tunnels_wc[1];
133         struct ip_tunnel __rcu **tunnels[4];
134
135         struct net_device *fb_tunnel_dev;
136 };
137
138 static int ipip_tunnel_init(struct net_device *dev);
139 static void ipip_tunnel_setup(struct net_device *dev);
140 static void ipip_dev_free(struct net_device *dev);
141 static struct rtnl_link_ops ipip_link_ops __read_mostly;
142
143 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
144                                                   struct rtnl_link_stats64 *tot)
145 {
146         int i;
147
148         for_each_possible_cpu(i) {
149                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
150                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
151                 unsigned int start;
152
153                 do {
154                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
155                         rx_packets = tstats->rx_packets;
156                         tx_packets = tstats->tx_packets;
157                         rx_bytes = tstats->rx_bytes;
158                         tx_bytes = tstats->tx_bytes;
159                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
160
161                 tot->rx_packets += rx_packets;
162                 tot->tx_packets += tx_packets;
163                 tot->rx_bytes   += rx_bytes;
164                 tot->tx_bytes   += tx_bytes;
165         }
166
167         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
168         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
169         tot->tx_dropped = dev->stats.tx_dropped;
170         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
171         tot->tx_errors = dev->stats.tx_errors;
172         tot->collisions = dev->stats.collisions;
173
174         return tot;
175 }
176
177 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
178                 __be32 remote, __be32 local)
179 {
180         unsigned int h0 = HASH(remote);
181         unsigned int h1 = HASH(local);
182         struct ip_tunnel *t;
183         struct ipip_net *ipn = net_generic(net, ipip_net_id);
184
185         for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
186                 if (local == t->parms.iph.saddr &&
187                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188                         return t;
189
190         for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
191                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
192                         return t;
193
194         for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
195                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
196                         return t;
197
198         t = rcu_dereference(ipn->tunnels_wc[0]);
199         if (t && (t->dev->flags&IFF_UP))
200                 return t;
201         return NULL;
202 }
203
204 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
205                 struct ip_tunnel_parm *parms)
206 {
207         __be32 remote = parms->iph.daddr;
208         __be32 local = parms->iph.saddr;
209         unsigned int h = 0;
210         int prio = 0;
211
212         if (remote) {
213                 prio |= 2;
214                 h ^= HASH(remote);
215         }
216         if (local) {
217                 prio |= 1;
218                 h ^= HASH(local);
219         }
220         return &ipn->tunnels[prio][h];
221 }
222
223 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
224                 struct ip_tunnel *t)
225 {
226         return __ipip_bucket(ipn, &t->parms);
227 }
228
229 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
230 {
231         struct ip_tunnel __rcu **tp;
232         struct ip_tunnel *iter;
233
234         for (tp = ipip_bucket(ipn, t);
235              (iter = rtnl_dereference(*tp)) != NULL;
236              tp = &iter->next) {
237                 if (t == iter) {
238                         rcu_assign_pointer(*tp, t->next);
239                         break;
240                 }
241         }
242 }
243
244 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
245 {
246         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
247
248         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
249         rcu_assign_pointer(*tp, t);
250 }
251
252 static int ipip_tunnel_create(struct net_device *dev)
253 {
254         struct ip_tunnel *t = netdev_priv(dev);
255         struct net *net = dev_net(dev);
256         struct ipip_net *ipn = net_generic(net, ipip_net_id);
257         int err;
258
259         err = ipip_tunnel_init(dev);
260         if (err < 0)
261                 goto out;
262
263         err = register_netdevice(dev);
264         if (err < 0)
265                 goto out;
266
267         strcpy(t->parms.name, dev->name);
268         dev->rtnl_link_ops = &ipip_link_ops;
269
270         dev_hold(dev);
271         ipip_tunnel_link(ipn, t);
272         return 0;
273
274 out:
275         return err;
276 }
277
278 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
279                 struct ip_tunnel_parm *parms, int create)
280 {
281         __be32 remote = parms->iph.daddr;
282         __be32 local = parms->iph.saddr;
283         struct ip_tunnel *t, *nt;
284         struct ip_tunnel __rcu **tp;
285         struct net_device *dev;
286         char name[IFNAMSIZ];
287         struct ipip_net *ipn = net_generic(net, ipip_net_id);
288
289         for (tp = __ipip_bucket(ipn, parms);
290                  (t = rtnl_dereference(*tp)) != NULL;
291                  tp = &t->next) {
292                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
293                         return t;
294         }
295         if (!create)
296                 return NULL;
297
298         if (parms->name[0])
299                 strlcpy(name, parms->name, IFNAMSIZ);
300         else
301                 strcpy(name, "tunl%d");
302
303         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
304         if (dev == NULL)
305                 return NULL;
306
307         dev_net_set(dev, net);
308
309         nt = netdev_priv(dev);
310         nt->parms = *parms;
311
312         if (ipip_tunnel_create(dev) < 0)
313                 goto failed_free;
314
315         return nt;
316
317 failed_free:
318         ipip_dev_free(dev);
319         return NULL;
320 }
321
322 /* called with RTNL */
323 static void ipip_tunnel_uninit(struct net_device *dev)
324 {
325         struct net *net = dev_net(dev);
326         struct ipip_net *ipn = net_generic(net, ipip_net_id);
327
328         if (dev == ipn->fb_tunnel_dev)
329                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
330         else
331                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
332         dev_put(dev);
333 }
334
335 static int ipip_err(struct sk_buff *skb, u32 info)
336 {
337
338 /* All the routers (except for Linux) return only
339    8 bytes of packet payload. It means, that precise relaying of
340    ICMP in the real Internet is absolutely infeasible.
341  */
342         const struct iphdr *iph = (const struct iphdr *)skb->data;
343         const int type = icmp_hdr(skb)->type;
344         const int code = icmp_hdr(skb)->code;
345         struct ip_tunnel *t;
346         int err;
347
348         switch (type) {
349         default:
350         case ICMP_PARAMETERPROB:
351                 return 0;
352
353         case ICMP_DEST_UNREACH:
354                 switch (code) {
355                 case ICMP_SR_FAILED:
356                 case ICMP_PORT_UNREACH:
357                         /* Impossible event. */
358                         return 0;
359                 default:
360                         /* All others are translated to HOST_UNREACH.
361                            rfc2003 contains "deep thoughts" about NET_UNREACH,
362                            I believe they are just ether pollution. --ANK
363                          */
364                         break;
365                 }
366                 break;
367         case ICMP_TIME_EXCEEDED:
368                 if (code != ICMP_EXC_TTL)
369                         return 0;
370                 break;
371         case ICMP_REDIRECT:
372                 break;
373         }
374
375         err = -ENOENT;
376         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
377         if (t == NULL)
378                 goto out;
379
380         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
381                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
382                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
383                 err = 0;
384                 goto out;
385         }
386
387         if (type == ICMP_REDIRECT) {
388                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
389                               IPPROTO_IPIP, 0);
390                 err = 0;
391                 goto out;
392         }
393
394         if (t->parms.iph.daddr == 0)
395                 goto out;
396
397         err = 0;
398         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
399                 goto out;
400
401         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
402                 t->err_count++;
403         else
404                 t->err_count = 1;
405         t->err_time = jiffies;
406 out:
407
408         return err;
409 }
410
411 static int ipip_rcv(struct sk_buff *skb)
412 {
413         struct ip_tunnel *tunnel;
414         const struct iphdr *iph = ip_hdr(skb);
415         int err;
416
417         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
418         if (tunnel != NULL) {
419                 struct pcpu_tstats *tstats;
420
421                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
422                         goto drop;
423
424                 secpath_reset(skb);
425
426                 skb->mac_header = skb->network_header;
427                 skb_reset_network_header(skb);
428                 skb->protocol = htons(ETH_P_IP);
429                 skb->pkt_type = PACKET_HOST;
430
431                 __skb_tunnel_rx(skb, tunnel->dev);
432
433                 err = IP_ECN_decapsulate(iph, skb);
434                 if (unlikely(err)) {
435                         if (log_ecn_error)
436                                 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
437                                                      &iph->saddr, iph->tos);
438                         if (err > 1) {
439                                 ++tunnel->dev->stats.rx_frame_errors;
440                                 ++tunnel->dev->stats.rx_errors;
441                                 goto drop;
442                         }
443                 }
444
445                 tstats = this_cpu_ptr(tunnel->dev->tstats);
446                 u64_stats_update_begin(&tstats->syncp);
447                 tstats->rx_packets++;
448                 tstats->rx_bytes += skb->len;
449                 u64_stats_update_end(&tstats->syncp);
450
451                 netif_rx(skb);
452                 return 0;
453         }
454
455         return -1;
456
457 drop:
458         kfree_skb(skb);
459         return 0;
460 }
461
462 /*
463  *      This function assumes it is being called from dev_queue_xmit()
464  *      and that skb is filled properly by that function.
465  */
466
467 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
468 {
469         struct ip_tunnel *tunnel = netdev_priv(dev);
470         const struct iphdr  *tiph = &tunnel->parms.iph;
471         u8     tos = tunnel->parms.iph.tos;
472         __be16 df = tiph->frag_off;
473         struct rtable *rt;                      /* Route to the other host */
474         struct net_device *tdev;                /* Device to other host */
475         const struct iphdr  *old_iph;
476         struct iphdr  *iph;                     /* Our new IP header */
477         unsigned int max_headroom;              /* The extra header space needed */
478         __be32 dst = tiph->daddr;
479         struct flowi4 fl4;
480         int    mtu;
481
482         if (skb->protocol != htons(ETH_P_IP))
483                 goto tx_error;
484
485         if (skb->ip_summed == CHECKSUM_PARTIAL &&
486             skb_checksum_help(skb))
487                 goto tx_error;
488
489         old_iph = ip_hdr(skb);
490
491         if (tos & 1)
492                 tos = old_iph->tos;
493
494         if (!dst) {
495                 /* NBMA tunnel */
496                 if ((rt = skb_rtable(skb)) == NULL) {
497                         dev->stats.tx_fifo_errors++;
498                         goto tx_error;
499                 }
500                 dst = rt_nexthop(rt, old_iph->daddr);
501         }
502
503         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
504                                    dst, tiph->saddr,
505                                    0, 0,
506                                    IPPROTO_IPIP, RT_TOS(tos),
507                                    tunnel->parms.link);
508         if (IS_ERR(rt)) {
509                 dev->stats.tx_carrier_errors++;
510                 goto tx_error_icmp;
511         }
512         tdev = rt->dst.dev;
513
514         if (tdev == dev) {
515                 ip_rt_put(rt);
516                 dev->stats.collisions++;
517                 goto tx_error;
518         }
519
520         df |= old_iph->frag_off & htons(IP_DF);
521
522         if (df) {
523                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
524
525                 if (mtu < 68) {
526                         dev->stats.collisions++;
527                         ip_rt_put(rt);
528                         goto tx_error;
529                 }
530
531                 if (skb_dst(skb))
532                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
533
534                 if ((old_iph->frag_off & htons(IP_DF)) &&
535                     mtu < ntohs(old_iph->tot_len)) {
536                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
537                                   htonl(mtu));
538                         ip_rt_put(rt);
539                         goto tx_error;
540                 }
541         }
542
543         if (tunnel->err_count > 0) {
544                 if (time_before(jiffies,
545                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
546                         tunnel->err_count--;
547                         dst_link_failure(skb);
548                 } else
549                         tunnel->err_count = 0;
550         }
551
552         /*
553          * Okay, now see if we can stuff it in the buffer as-is.
554          */
555         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
556
557         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
558             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
559                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
560                 if (!new_skb) {
561                         ip_rt_put(rt);
562                         dev->stats.tx_dropped++;
563                         dev_kfree_skb(skb);
564                         return NETDEV_TX_OK;
565                 }
566                 if (skb->sk)
567                         skb_set_owner_w(new_skb, skb->sk);
568                 dev_kfree_skb(skb);
569                 skb = new_skb;
570                 old_iph = ip_hdr(skb);
571         }
572
573         skb->transport_header = skb->network_header;
574         skb_push(skb, sizeof(struct iphdr));
575         skb_reset_network_header(skb);
576         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
578                               IPSKB_REROUTED);
579         skb_dst_drop(skb);
580         skb_dst_set(skb, &rt->dst);
581
582         /*
583          *      Push down and install the IPIP header.
584          */
585
586         iph                     =       ip_hdr(skb);
587         iph->version            =       4;
588         iph->ihl                =       sizeof(struct iphdr)>>2;
589         iph->frag_off           =       df;
590         iph->protocol           =       IPPROTO_IPIP;
591         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
592         iph->daddr              =       fl4.daddr;
593         iph->saddr              =       fl4.saddr;
594
595         if ((iph->ttl = tiph->ttl) == 0)
596                 iph->ttl        =       old_iph->ttl;
597
598         iptunnel_xmit(skb, dev);
599         return NETDEV_TX_OK;
600
601 tx_error_icmp:
602         dst_link_failure(skb);
603 tx_error:
604         dev->stats.tx_errors++;
605         dev_kfree_skb(skb);
606         return NETDEV_TX_OK;
607 }
608
609 static void ipip_tunnel_bind_dev(struct net_device *dev)
610 {
611         struct net_device *tdev = NULL;
612         struct ip_tunnel *tunnel;
613         const struct iphdr *iph;
614
615         tunnel = netdev_priv(dev);
616         iph = &tunnel->parms.iph;
617
618         if (iph->daddr) {
619                 struct rtable *rt;
620                 struct flowi4 fl4;
621
622                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
623                                            iph->daddr, iph->saddr,
624                                            0, 0,
625                                            IPPROTO_IPIP,
626                                            RT_TOS(iph->tos),
627                                            tunnel->parms.link);
628                 if (!IS_ERR(rt)) {
629                         tdev = rt->dst.dev;
630                         ip_rt_put(rt);
631                 }
632                 dev->flags |= IFF_POINTOPOINT;
633         }
634
635         if (!tdev && tunnel->parms.link)
636                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
637
638         if (tdev) {
639                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
640                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
641         }
642         dev->iflink = tunnel->parms.link;
643 }
644
645 static void ipip_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
646 {
647         struct net *net = dev_net(t->dev);
648         struct ipip_net *ipn = net_generic(net, ipip_net_id);
649
650         ipip_tunnel_unlink(ipn, t);
651         synchronize_net();
652         t->parms.iph.saddr = p->iph.saddr;
653         t->parms.iph.daddr = p->iph.daddr;
654         memcpy(t->dev->dev_addr, &p->iph.saddr, 4);
655         memcpy(t->dev->broadcast, &p->iph.daddr, 4);
656         ipip_tunnel_link(ipn, t);
657         t->parms.iph.ttl = p->iph.ttl;
658         t->parms.iph.tos = p->iph.tos;
659         t->parms.iph.frag_off = p->iph.frag_off;
660         if (t->parms.link != p->link) {
661                 t->parms.link = p->link;
662                 ipip_tunnel_bind_dev(t->dev);
663         }
664         netdev_state_change(t->dev);
665 }
666
667 static int
668 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
669 {
670         int err = 0;
671         struct ip_tunnel_parm p;
672         struct ip_tunnel *t;
673         struct net *net = dev_net(dev);
674         struct ipip_net *ipn = net_generic(net, ipip_net_id);
675
676         switch (cmd) {
677         case SIOCGETTUNNEL:
678                 t = NULL;
679                 if (dev == ipn->fb_tunnel_dev) {
680                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
681                                 err = -EFAULT;
682                                 break;
683                         }
684                         t = ipip_tunnel_locate(net, &p, 0);
685                 }
686                 if (t == NULL)
687                         t = netdev_priv(dev);
688                 memcpy(&p, &t->parms, sizeof(p));
689                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
690                         err = -EFAULT;
691                 break;
692
693         case SIOCADDTUNNEL:
694         case SIOCCHGTUNNEL:
695                 err = -EPERM;
696                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
697                         goto done;
698
699                 err = -EFAULT;
700                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
701                         goto done;
702
703                 err = -EINVAL;
704                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
705                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
706                         goto done;
707                 if (p.iph.ttl)
708                         p.iph.frag_off |= htons(IP_DF);
709
710                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
711
712                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
713                         if (t != NULL) {
714                                 if (t->dev != dev) {
715                                         err = -EEXIST;
716                                         break;
717                                 }
718                         } else {
719                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
720                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
721                                         err = -EINVAL;
722                                         break;
723                                 }
724                                 t = netdev_priv(dev);
725                         }
726
727                         ipip_tunnel_update(t, &p);
728                 }
729
730                 if (t) {
731                         err = 0;
732                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
733                                 err = -EFAULT;
734                 } else
735                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
736                 break;
737
738         case SIOCDELTUNNEL:
739                 err = -EPERM;
740                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
741                         goto done;
742
743                 if (dev == ipn->fb_tunnel_dev) {
744                         err = -EFAULT;
745                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
746                                 goto done;
747                         err = -ENOENT;
748                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
749                                 goto done;
750                         err = -EPERM;
751                         if (t->dev == ipn->fb_tunnel_dev)
752                                 goto done;
753                         dev = t->dev;
754                 }
755                 unregister_netdevice(dev);
756                 err = 0;
757                 break;
758
759         default:
760                 err = -EINVAL;
761         }
762
763 done:
764         return err;
765 }
766
767 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
768 {
769         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
770                 return -EINVAL;
771         dev->mtu = new_mtu;
772         return 0;
773 }
774
775 static const struct net_device_ops ipip_netdev_ops = {
776         .ndo_uninit     = ipip_tunnel_uninit,
777         .ndo_start_xmit = ipip_tunnel_xmit,
778         .ndo_do_ioctl   = ipip_tunnel_ioctl,
779         .ndo_change_mtu = ipip_tunnel_change_mtu,
780         .ndo_get_stats64 = ipip_get_stats64,
781 };
782
783 static void ipip_dev_free(struct net_device *dev)
784 {
785         free_percpu(dev->tstats);
786         free_netdev(dev);
787 }
788
789 #define IPIP_FEATURES (NETIF_F_SG |             \
790                        NETIF_F_FRAGLIST |       \
791                        NETIF_F_HIGHDMA |        \
792                        NETIF_F_HW_CSUM)
793
794 static void ipip_tunnel_setup(struct net_device *dev)
795 {
796         dev->netdev_ops         = &ipip_netdev_ops;
797         dev->destructor         = ipip_dev_free;
798
799         dev->type               = ARPHRD_TUNNEL;
800         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
801         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
802         dev->flags              = IFF_NOARP;
803         dev->iflink             = 0;
804         dev->addr_len           = 4;
805         dev->features           |= NETIF_F_NETNS_LOCAL;
806         dev->features           |= NETIF_F_LLTX;
807         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
808
809         dev->features           |= IPIP_FEATURES;
810         dev->hw_features        |= IPIP_FEATURES;
811 }
812
813 static int ipip_tunnel_init(struct net_device *dev)
814 {
815         struct ip_tunnel *tunnel = netdev_priv(dev);
816
817         tunnel->dev = dev;
818
819         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
820         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
821
822         ipip_tunnel_bind_dev(dev);
823
824         dev->tstats = alloc_percpu(struct pcpu_tstats);
825         if (!dev->tstats)
826                 return -ENOMEM;
827
828         return 0;
829 }
830
831 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
832 {
833         struct ip_tunnel *tunnel = netdev_priv(dev);
834         struct iphdr *iph = &tunnel->parms.iph;
835         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
836
837         tunnel->dev = dev;
838         strcpy(tunnel->parms.name, dev->name);
839
840         iph->version            = 4;
841         iph->protocol           = IPPROTO_IPIP;
842         iph->ihl                = 5;
843
844         dev->tstats = alloc_percpu(struct pcpu_tstats);
845         if (!dev->tstats)
846                 return -ENOMEM;
847
848         dev_hold(dev);
849         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
850         return 0;
851 }
852
853 static void ipip_netlink_parms(struct nlattr *data[],
854                                struct ip_tunnel_parm *parms)
855 {
856         memset(parms, 0, sizeof(*parms));
857
858         parms->iph.version = 4;
859         parms->iph.protocol = IPPROTO_IPIP;
860         parms->iph.ihl = 5;
861
862         if (!data)
863                 return;
864
865         if (data[IFLA_IPTUN_LINK])
866                 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
867
868         if (data[IFLA_IPTUN_LOCAL])
869                 parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]);
870
871         if (data[IFLA_IPTUN_REMOTE])
872                 parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]);
873
874         if (data[IFLA_IPTUN_TTL]) {
875                 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
876                 if (parms->iph.ttl)
877                         parms->iph.frag_off = htons(IP_DF);
878         }
879
880         if (data[IFLA_IPTUN_TOS])
881                 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
882
883         if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
884                 parms->iph.frag_off = htons(IP_DF);
885 }
886
887 static int ipip_newlink(struct net *src_net, struct net_device *dev,
888                         struct nlattr *tb[], struct nlattr *data[])
889 {
890         struct net *net = dev_net(dev);
891         struct ip_tunnel *nt;
892
893         nt = netdev_priv(dev);
894         ipip_netlink_parms(data, &nt->parms);
895
896         if (ipip_tunnel_locate(net, &nt->parms, 0))
897                 return -EEXIST;
898
899         return ipip_tunnel_create(dev);
900 }
901
902 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
903                            struct nlattr *data[])
904 {
905         struct ip_tunnel *t;
906         struct ip_tunnel_parm p;
907         struct net *net = dev_net(dev);
908         struct ipip_net *ipn = net_generic(net, ipip_net_id);
909
910         if (dev == ipn->fb_tunnel_dev)
911                 return -EINVAL;
912
913         ipip_netlink_parms(data, &p);
914
915         if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
916             (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
917                 return -EINVAL;
918
919         t = ipip_tunnel_locate(net, &p, 0);
920
921         if (t) {
922                 if (t->dev != dev)
923                         return -EEXIST;
924         } else
925                 t = netdev_priv(dev);
926
927         ipip_tunnel_update(t, &p);
928         return 0;
929 }
930
931 static size_t ipip_get_size(const struct net_device *dev)
932 {
933         return
934                 /* IFLA_IPTUN_LINK */
935                 nla_total_size(4) +
936                 /* IFLA_IPTUN_LOCAL */
937                 nla_total_size(4) +
938                 /* IFLA_IPTUN_REMOTE */
939                 nla_total_size(4) +
940                 /* IFLA_IPTUN_TTL */
941                 nla_total_size(1) +
942                 /* IFLA_IPTUN_TOS */
943                 nla_total_size(1) +
944                 /* IFLA_IPTUN_PMTUDISC */
945                 nla_total_size(1) +
946                 0;
947 }
948
949 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
950 {
951         struct ip_tunnel *tunnel = netdev_priv(dev);
952         struct ip_tunnel_parm *parm = &tunnel->parms;
953
954         if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
955             nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
956             nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
957             nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
958             nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
959             nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
960                        !!(parm->iph.frag_off & htons(IP_DF))))
961                 goto nla_put_failure;
962         return 0;
963
964 nla_put_failure:
965         return -EMSGSIZE;
966 }
967
968 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
969         [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
970         [IFLA_IPTUN_LOCAL]              = { .type = NLA_U32 },
971         [IFLA_IPTUN_REMOTE]             = { .type = NLA_U32 },
972         [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
973         [IFLA_IPTUN_TOS]                = { .type = NLA_U8 },
974         [IFLA_IPTUN_PMTUDISC]           = { .type = NLA_U8 },
975 };
976
977 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
978         .kind           = "ipip",
979         .maxtype        = IFLA_IPTUN_MAX,
980         .policy         = ipip_policy,
981         .priv_size      = sizeof(struct ip_tunnel),
982         .setup          = ipip_tunnel_setup,
983         .newlink        = ipip_newlink,
984         .changelink     = ipip_changelink,
985         .get_size       = ipip_get_size,
986         .fill_info      = ipip_fill_info,
987 };
988
989 static struct xfrm_tunnel ipip_handler __read_mostly = {
990         .handler        =       ipip_rcv,
991         .err_handler    =       ipip_err,
992         .priority       =       1,
993 };
994
995 static const char banner[] __initconst =
996         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
997
998 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
999 {
1000         int prio;
1001
1002         for (prio = 1; prio < 4; prio++) {
1003                 int h;
1004                 for (h = 0; h < HASH_SIZE; h++) {
1005                         struct ip_tunnel *t;
1006
1007                         t = rtnl_dereference(ipn->tunnels[prio][h]);
1008                         while (t != NULL) {
1009                                 unregister_netdevice_queue(t->dev, head);
1010                                 t = rtnl_dereference(t->next);
1011                         }
1012                 }
1013         }
1014 }
1015
1016 static int __net_init ipip_init_net(struct net *net)
1017 {
1018         struct ipip_net *ipn = net_generic(net, ipip_net_id);
1019         struct ip_tunnel *t;
1020         int err;
1021
1022         ipn->tunnels[0] = ipn->tunnels_wc;
1023         ipn->tunnels[1] = ipn->tunnels_l;
1024         ipn->tunnels[2] = ipn->tunnels_r;
1025         ipn->tunnels[3] = ipn->tunnels_r_l;
1026
1027         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
1028                                            "tunl0",
1029                                            ipip_tunnel_setup);
1030         if (!ipn->fb_tunnel_dev) {
1031                 err = -ENOMEM;
1032                 goto err_alloc_dev;
1033         }
1034         dev_net_set(ipn->fb_tunnel_dev, net);
1035
1036         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
1037         if (err)
1038                 goto err_reg_dev;
1039
1040         if ((err = register_netdev(ipn->fb_tunnel_dev)))
1041                 goto err_reg_dev;
1042
1043         t = netdev_priv(ipn->fb_tunnel_dev);
1044
1045         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
1046         return 0;
1047
1048 err_reg_dev:
1049         ipip_dev_free(ipn->fb_tunnel_dev);
1050 err_alloc_dev:
1051         /* nothing */
1052         return err;
1053 }
1054
1055 static void __net_exit ipip_exit_net(struct net *net)
1056 {
1057         struct ipip_net *ipn = net_generic(net, ipip_net_id);
1058         LIST_HEAD(list);
1059
1060         rtnl_lock();
1061         ipip_destroy_tunnels(ipn, &list);
1062         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
1063         unregister_netdevice_many(&list);
1064         rtnl_unlock();
1065 }
1066
1067 static struct pernet_operations ipip_net_ops = {
1068         .init = ipip_init_net,
1069         .exit = ipip_exit_net,
1070         .id   = &ipip_net_id,
1071         .size = sizeof(struct ipip_net),
1072 };
1073
1074 static int __init ipip_init(void)
1075 {
1076         int err;
1077
1078         printk(banner);
1079
1080         err = register_pernet_device(&ipip_net_ops);
1081         if (err < 0)
1082                 return err;
1083         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
1084         if (err < 0) {
1085                 pr_info("%s: can't register tunnel\n", __func__);
1086                 goto xfrm_tunnel_failed;
1087         }
1088         err = rtnl_link_register(&ipip_link_ops);
1089         if (err < 0)
1090                 goto rtnl_link_failed;
1091
1092 out:
1093         return err;
1094
1095 rtnl_link_failed:
1096         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1097 xfrm_tunnel_failed:
1098         unregister_pernet_device(&ipip_net_ops);
1099         goto out;
1100 }
1101
1102 static void __exit ipip_fini(void)
1103 {
1104         rtnl_link_unregister(&ipip_link_ops);
1105         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1106                 pr_info("%s: can't deregister tunnel\n", __func__);
1107
1108         unregister_pernet_device(&ipip_net_ops);
1109 }
1110
1111 module_init(ipip_init);
1112 module_exit(ipip_fini);
1113 MODULE_LICENSE("GPL");
1114 MODULE_ALIAS_NETDEV("tunl0");