Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[pandora-kernel.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128         struct ip_tunnel __rcu *tunnels_wc[1];
129         struct ip_tunnel __rcu **tunnels[4];
130
131         struct net_device *fb_tunnel_dev;
132 };
133
134 static int ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 static void ipip_dev_free(struct net_device *dev);
137
138 /*
139  * Locking : hash tables are protected by RCU and RTNL
140  */
141
142 #define for_each_ip_tunnel_rcu(start) \
143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144
145 /* often modified stats are per cpu, other are shared (netdev->stats) */
146 struct pcpu_tstats {
147         unsigned long   rx_packets;
148         unsigned long   rx_bytes;
149         unsigned long   tx_packets;
150         unsigned long   tx_bytes;
151 };
152
153 static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154 {
155         struct pcpu_tstats sum = { 0 };
156         int i;
157
158         for_each_possible_cpu(i) {
159                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160
161                 sum.rx_packets += tstats->rx_packets;
162                 sum.rx_bytes   += tstats->rx_bytes;
163                 sum.tx_packets += tstats->tx_packets;
164                 sum.tx_bytes   += tstats->tx_bytes;
165         }
166         dev->stats.rx_packets = sum.rx_packets;
167         dev->stats.rx_bytes   = sum.rx_bytes;
168         dev->stats.tx_packets = sum.tx_packets;
169         dev->stats.tx_bytes   = sum.tx_bytes;
170         return &dev->stats;
171 }
172
173 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
174                 __be32 remote, __be32 local)
175 {
176         unsigned int h0 = HASH(remote);
177         unsigned int h1 = HASH(local);
178         struct ip_tunnel *t;
179         struct ipip_net *ipn = net_generic(net, ipip_net_id);
180
181         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
182                 if (local == t->parms.iph.saddr &&
183                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
184                         return t;
185
186         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
187                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188                         return t;
189
190         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
191                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
192                         return t;
193
194         t = rcu_dereference(ipn->tunnels_wc[0]);
195         if (t && (t->dev->flags&IFF_UP))
196                 return t;
197         return NULL;
198 }
199
200 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
201                 struct ip_tunnel_parm *parms)
202 {
203         __be32 remote = parms->iph.daddr;
204         __be32 local = parms->iph.saddr;
205         unsigned int h = 0;
206         int prio = 0;
207
208         if (remote) {
209                 prio |= 2;
210                 h ^= HASH(remote);
211         }
212         if (local) {
213                 prio |= 1;
214                 h ^= HASH(local);
215         }
216         return &ipn->tunnels[prio][h];
217 }
218
219 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
220                 struct ip_tunnel *t)
221 {
222         return __ipip_bucket(ipn, &t->parms);
223 }
224
225 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
226 {
227         struct ip_tunnel __rcu **tp;
228         struct ip_tunnel *iter;
229
230         for (tp = ipip_bucket(ipn, t);
231              (iter = rtnl_dereference(*tp)) != NULL;
232              tp = &iter->next) {
233                 if (t == iter) {
234                         RCU_INIT_POINTER(*tp, t->next);
235                         break;
236                 }
237         }
238 }
239
240 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
241 {
242         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
243
244         RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
245         RCU_INIT_POINTER(*tp, t);
246 }
247
248 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
249                 struct ip_tunnel_parm *parms, int create)
250 {
251         __be32 remote = parms->iph.daddr;
252         __be32 local = parms->iph.saddr;
253         struct ip_tunnel *t, *nt;
254         struct ip_tunnel __rcu **tp;
255         struct net_device *dev;
256         char name[IFNAMSIZ];
257         struct ipip_net *ipn = net_generic(net, ipip_net_id);
258
259         for (tp = __ipip_bucket(ipn, parms);
260                  (t = rtnl_dereference(*tp)) != NULL;
261                  tp = &t->next) {
262                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
263                         return t;
264         }
265         if (!create)
266                 return NULL;
267
268         if (parms->name[0])
269                 strlcpy(name, parms->name, IFNAMSIZ);
270         else
271                 strcpy(name, "tunl%d");
272
273         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
274         if (dev == NULL)
275                 return NULL;
276
277         dev_net_set(dev, net);
278
279         nt = netdev_priv(dev);
280         nt->parms = *parms;
281
282         if (ipip_tunnel_init(dev) < 0)
283                 goto failed_free;
284
285         if (register_netdevice(dev) < 0)
286                 goto failed_free;
287
288         dev_hold(dev);
289         ipip_tunnel_link(ipn, nt);
290         return nt;
291
292 failed_free:
293         ipip_dev_free(dev);
294         return NULL;
295 }
296
297 /* called with RTNL */
298 static void ipip_tunnel_uninit(struct net_device *dev)
299 {
300         struct net *net = dev_net(dev);
301         struct ipip_net *ipn = net_generic(net, ipip_net_id);
302
303         if (dev == ipn->fb_tunnel_dev)
304                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
305         else
306                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
307         dev_put(dev);
308 }
309
310 static int ipip_err(struct sk_buff *skb, u32 info)
311 {
312
313 /* All the routers (except for Linux) return only
314    8 bytes of packet payload. It means, that precise relaying of
315    ICMP in the real Internet is absolutely infeasible.
316  */
317         const struct iphdr *iph = (const struct iphdr *)skb->data;
318         const int type = icmp_hdr(skb)->type;
319         const int code = icmp_hdr(skb)->code;
320         struct ip_tunnel *t;
321         int err;
322
323         switch (type) {
324         default:
325         case ICMP_PARAMETERPROB:
326                 return 0;
327
328         case ICMP_DEST_UNREACH:
329                 switch (code) {
330                 case ICMP_SR_FAILED:
331                 case ICMP_PORT_UNREACH:
332                         /* Impossible event. */
333                         return 0;
334                 case ICMP_FRAG_NEEDED:
335                         /* Soft state for pmtu is maintained by IP core. */
336                         return 0;
337                 default:
338                         /* All others are translated to HOST_UNREACH.
339                            rfc2003 contains "deep thoughts" about NET_UNREACH,
340                            I believe they are just ether pollution. --ANK
341                          */
342                         break;
343                 }
344                 break;
345         case ICMP_TIME_EXCEEDED:
346                 if (code != ICMP_EXC_TTL)
347                         return 0;
348                 break;
349         }
350
351         err = -ENOENT;
352
353         rcu_read_lock();
354         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
355         if (t == NULL || t->parms.iph.daddr == 0)
356                 goto out;
357
358         err = 0;
359         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
360                 goto out;
361
362         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
363                 t->err_count++;
364         else
365                 t->err_count = 1;
366         t->err_time = jiffies;
367 out:
368         rcu_read_unlock();
369         return err;
370 }
371
372 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
373                                         struct sk_buff *skb)
374 {
375         struct iphdr *inner_iph = ip_hdr(skb);
376
377         if (INET_ECN_is_ce(outer_iph->tos))
378                 IP_ECN_set_ce(inner_iph);
379 }
380
381 static int ipip_rcv(struct sk_buff *skb)
382 {
383         struct ip_tunnel *tunnel;
384         const struct iphdr *iph = ip_hdr(skb);
385
386         rcu_read_lock();
387         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
388         if (tunnel != NULL) {
389                 struct pcpu_tstats *tstats;
390
391                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
392                         rcu_read_unlock();
393                         kfree_skb(skb);
394                         return 0;
395                 }
396
397                 secpath_reset(skb);
398
399                 skb->mac_header = skb->network_header;
400                 skb_reset_network_header(skb);
401                 skb->protocol = htons(ETH_P_IP);
402                 skb->pkt_type = PACKET_HOST;
403
404                 tstats = this_cpu_ptr(tunnel->dev->tstats);
405                 tstats->rx_packets++;
406                 tstats->rx_bytes += skb->len;
407
408                 __skb_tunnel_rx(skb, tunnel->dev);
409
410                 ipip_ecn_decapsulate(iph, skb);
411
412                 netif_rx(skb);
413
414                 rcu_read_unlock();
415                 return 0;
416         }
417         rcu_read_unlock();
418
419         return -1;
420 }
421
422 /*
423  *      This function assumes it is being called from dev_queue_xmit()
424  *      and that skb is filled properly by that function.
425  */
426
427 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
428 {
429         struct ip_tunnel *tunnel = netdev_priv(dev);
430         struct pcpu_tstats *tstats;
431         const struct iphdr  *tiph = &tunnel->parms.iph;
432         u8     tos = tunnel->parms.iph.tos;
433         __be16 df = tiph->frag_off;
434         struct rtable *rt;                      /* Route to the other host */
435         struct net_device *tdev;                /* Device to other host */
436         const struct iphdr  *old_iph = ip_hdr(skb);
437         struct iphdr  *iph;                     /* Our new IP header */
438         unsigned int max_headroom;              /* The extra header space needed */
439         __be32 dst = tiph->daddr;
440         struct flowi4 fl4;
441         int    mtu;
442
443         if (skb->protocol != htons(ETH_P_IP))
444                 goto tx_error;
445
446         if (tos & 1)
447                 tos = old_iph->tos;
448
449         if (!dst) {
450                 /* NBMA tunnel */
451                 if ((rt = skb_rtable(skb)) == NULL) {
452                         dev->stats.tx_fifo_errors++;
453                         goto tx_error;
454                 }
455                 if ((dst = rt->rt_gateway) == 0)
456                         goto tx_error_icmp;
457         }
458
459         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
460                                    dst, tiph->saddr,
461                                    0, 0,
462                                    IPPROTO_IPIP, RT_TOS(tos),
463                                    tunnel->parms.link);
464         if (IS_ERR(rt)) {
465                 dev->stats.tx_carrier_errors++;
466                 goto tx_error_icmp;
467         }
468         tdev = rt->dst.dev;
469
470         if (tdev == dev) {
471                 ip_rt_put(rt);
472                 dev->stats.collisions++;
473                 goto tx_error;
474         }
475
476         df |= old_iph->frag_off & htons(IP_DF);
477
478         if (df) {
479                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
480
481                 if (mtu < 68) {
482                         dev->stats.collisions++;
483                         ip_rt_put(rt);
484                         goto tx_error;
485                 }
486
487                 if (skb_dst(skb))
488                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
489
490                 if ((old_iph->frag_off & htons(IP_DF)) &&
491                     mtu < ntohs(old_iph->tot_len)) {
492                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
493                                   htonl(mtu));
494                         ip_rt_put(rt);
495                         goto tx_error;
496                 }
497         }
498
499         if (tunnel->err_count > 0) {
500                 if (time_before(jiffies,
501                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
502                         tunnel->err_count--;
503                         dst_link_failure(skb);
504                 } else
505                         tunnel->err_count = 0;
506         }
507
508         /*
509          * Okay, now see if we can stuff it in the buffer as-is.
510          */
511         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
512
513         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
514             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
515                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
516                 if (!new_skb) {
517                         ip_rt_put(rt);
518                         dev->stats.tx_dropped++;
519                         dev_kfree_skb(skb);
520                         return NETDEV_TX_OK;
521                 }
522                 if (skb->sk)
523                         skb_set_owner_w(new_skb, skb->sk);
524                 dev_kfree_skb(skb);
525                 skb = new_skb;
526                 old_iph = ip_hdr(skb);
527         }
528
529         skb->transport_header = skb->network_header;
530         skb_push(skb, sizeof(struct iphdr));
531         skb_reset_network_header(skb);
532         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
533         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
534                               IPSKB_REROUTED);
535         skb_dst_drop(skb);
536         skb_dst_set(skb, &rt->dst);
537
538         /*
539          *      Push down and install the IPIP header.
540          */
541
542         iph                     =       ip_hdr(skb);
543         iph->version            =       4;
544         iph->ihl                =       sizeof(struct iphdr)>>2;
545         iph->frag_off           =       df;
546         iph->protocol           =       IPPROTO_IPIP;
547         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
548         iph->daddr              =       fl4.daddr;
549         iph->saddr              =       fl4.saddr;
550
551         if ((iph->ttl = tiph->ttl) == 0)
552                 iph->ttl        =       old_iph->ttl;
553
554         nf_reset(skb);
555         tstats = this_cpu_ptr(dev->tstats);
556         __IPTUNNEL_XMIT(tstats, &dev->stats);
557         return NETDEV_TX_OK;
558
559 tx_error_icmp:
560         dst_link_failure(skb);
561 tx_error:
562         dev->stats.tx_errors++;
563         dev_kfree_skb(skb);
564         return NETDEV_TX_OK;
565 }
566
567 static void ipip_tunnel_bind_dev(struct net_device *dev)
568 {
569         struct net_device *tdev = NULL;
570         struct ip_tunnel *tunnel;
571         const struct iphdr *iph;
572
573         tunnel = netdev_priv(dev);
574         iph = &tunnel->parms.iph;
575
576         if (iph->daddr) {
577                 struct rtable *rt;
578                 struct flowi4 fl4;
579
580                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
581                                            iph->daddr, iph->saddr,
582                                            0, 0,
583                                            IPPROTO_IPIP,
584                                            RT_TOS(iph->tos),
585                                            tunnel->parms.link);
586                 if (!IS_ERR(rt)) {
587                         tdev = rt->dst.dev;
588                         ip_rt_put(rt);
589                 }
590                 dev->flags |= IFF_POINTOPOINT;
591         }
592
593         if (!tdev && tunnel->parms.link)
594                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
595
596         if (tdev) {
597                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
598                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
599         }
600         dev->iflink = tunnel->parms.link;
601 }
602
603 static int
604 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
605 {
606         int err = 0;
607         struct ip_tunnel_parm p;
608         struct ip_tunnel *t;
609         struct net *net = dev_net(dev);
610         struct ipip_net *ipn = net_generic(net, ipip_net_id);
611
612         switch (cmd) {
613         case SIOCGETTUNNEL:
614                 t = NULL;
615                 if (dev == ipn->fb_tunnel_dev) {
616                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
617                                 err = -EFAULT;
618                                 break;
619                         }
620                         t = ipip_tunnel_locate(net, &p, 0);
621                 }
622                 if (t == NULL)
623                         t = netdev_priv(dev);
624                 memcpy(&p, &t->parms, sizeof(p));
625                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
626                         err = -EFAULT;
627                 break;
628
629         case SIOCADDTUNNEL:
630         case SIOCCHGTUNNEL:
631                 err = -EPERM;
632                 if (!capable(CAP_NET_ADMIN))
633                         goto done;
634
635                 err = -EFAULT;
636                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
637                         goto done;
638
639                 err = -EINVAL;
640                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
641                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
642                         goto done;
643                 if (p.iph.ttl)
644                         p.iph.frag_off |= htons(IP_DF);
645
646                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
647
648                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
649                         if (t != NULL) {
650                                 if (t->dev != dev) {
651                                         err = -EEXIST;
652                                         break;
653                                 }
654                         } else {
655                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
656                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
657                                         err = -EINVAL;
658                                         break;
659                                 }
660                                 t = netdev_priv(dev);
661                                 ipip_tunnel_unlink(ipn, t);
662                                 synchronize_net();
663                                 t->parms.iph.saddr = p.iph.saddr;
664                                 t->parms.iph.daddr = p.iph.daddr;
665                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
666                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
667                                 ipip_tunnel_link(ipn, t);
668                                 netdev_state_change(dev);
669                         }
670                 }
671
672                 if (t) {
673                         err = 0;
674                         if (cmd == SIOCCHGTUNNEL) {
675                                 t->parms.iph.ttl = p.iph.ttl;
676                                 t->parms.iph.tos = p.iph.tos;
677                                 t->parms.iph.frag_off = p.iph.frag_off;
678                                 if (t->parms.link != p.link) {
679                                         t->parms.link = p.link;
680                                         ipip_tunnel_bind_dev(dev);
681                                         netdev_state_change(dev);
682                                 }
683                         }
684                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
685                                 err = -EFAULT;
686                 } else
687                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
688                 break;
689
690         case SIOCDELTUNNEL:
691                 err = -EPERM;
692                 if (!capable(CAP_NET_ADMIN))
693                         goto done;
694
695                 if (dev == ipn->fb_tunnel_dev) {
696                         err = -EFAULT;
697                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
698                                 goto done;
699                         err = -ENOENT;
700                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
701                                 goto done;
702                         err = -EPERM;
703                         if (t->dev == ipn->fb_tunnel_dev)
704                                 goto done;
705                         dev = t->dev;
706                 }
707                 unregister_netdevice(dev);
708                 err = 0;
709                 break;
710
711         default:
712                 err = -EINVAL;
713         }
714
715 done:
716         return err;
717 }
718
719 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
720 {
721         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
722                 return -EINVAL;
723         dev->mtu = new_mtu;
724         return 0;
725 }
726
727 static const struct net_device_ops ipip_netdev_ops = {
728         .ndo_uninit     = ipip_tunnel_uninit,
729         .ndo_start_xmit = ipip_tunnel_xmit,
730         .ndo_do_ioctl   = ipip_tunnel_ioctl,
731         .ndo_change_mtu = ipip_tunnel_change_mtu,
732         .ndo_get_stats  = ipip_get_stats,
733 };
734
735 static void ipip_dev_free(struct net_device *dev)
736 {
737         free_percpu(dev->tstats);
738         free_netdev(dev);
739 }
740
741 static void ipip_tunnel_setup(struct net_device *dev)
742 {
743         dev->netdev_ops         = &ipip_netdev_ops;
744         dev->destructor         = ipip_dev_free;
745
746         dev->type               = ARPHRD_TUNNEL;
747         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
748         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
749         dev->flags              = IFF_NOARP;
750         dev->iflink             = 0;
751         dev->addr_len           = 4;
752         dev->features           |= NETIF_F_NETNS_LOCAL;
753         dev->features           |= NETIF_F_LLTX;
754         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
755 }
756
757 static int ipip_tunnel_init(struct net_device *dev)
758 {
759         struct ip_tunnel *tunnel = netdev_priv(dev);
760
761         tunnel->dev = dev;
762         strcpy(tunnel->parms.name, dev->name);
763
764         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
765         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
766
767         ipip_tunnel_bind_dev(dev);
768
769         dev->tstats = alloc_percpu(struct pcpu_tstats);
770         if (!dev->tstats)
771                 return -ENOMEM;
772
773         return 0;
774 }
775
776 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
777 {
778         struct ip_tunnel *tunnel = netdev_priv(dev);
779         struct iphdr *iph = &tunnel->parms.iph;
780         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
781
782         tunnel->dev = dev;
783         strcpy(tunnel->parms.name, dev->name);
784
785         iph->version            = 4;
786         iph->protocol           = IPPROTO_IPIP;
787         iph->ihl                = 5;
788
789         dev->tstats = alloc_percpu(struct pcpu_tstats);
790         if (!dev->tstats)
791                 return -ENOMEM;
792
793         dev_hold(dev);
794         RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel);
795         return 0;
796 }
797
798 static struct xfrm_tunnel ipip_handler __read_mostly = {
799         .handler        =       ipip_rcv,
800         .err_handler    =       ipip_err,
801         .priority       =       1,
802 };
803
804 static const char banner[] __initconst =
805         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
806
807 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
808 {
809         int prio;
810
811         for (prio = 1; prio < 4; prio++) {
812                 int h;
813                 for (h = 0; h < HASH_SIZE; h++) {
814                         struct ip_tunnel *t;
815
816                         t = rtnl_dereference(ipn->tunnels[prio][h]);
817                         while (t != NULL) {
818                                 unregister_netdevice_queue(t->dev, head);
819                                 t = rtnl_dereference(t->next);
820                         }
821                 }
822         }
823 }
824
825 static int __net_init ipip_init_net(struct net *net)
826 {
827         struct ipip_net *ipn = net_generic(net, ipip_net_id);
828         int err;
829
830         ipn->tunnels[0] = ipn->tunnels_wc;
831         ipn->tunnels[1] = ipn->tunnels_l;
832         ipn->tunnels[2] = ipn->tunnels_r;
833         ipn->tunnels[3] = ipn->tunnels_r_l;
834
835         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
836                                            "tunl0",
837                                            ipip_tunnel_setup);
838         if (!ipn->fb_tunnel_dev) {
839                 err = -ENOMEM;
840                 goto err_alloc_dev;
841         }
842         dev_net_set(ipn->fb_tunnel_dev, net);
843
844         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
845         if (err)
846                 goto err_reg_dev;
847
848         if ((err = register_netdev(ipn->fb_tunnel_dev)))
849                 goto err_reg_dev;
850
851         return 0;
852
853 err_reg_dev:
854         ipip_dev_free(ipn->fb_tunnel_dev);
855 err_alloc_dev:
856         /* nothing */
857         return err;
858 }
859
860 static void __net_exit ipip_exit_net(struct net *net)
861 {
862         struct ipip_net *ipn = net_generic(net, ipip_net_id);
863         LIST_HEAD(list);
864
865         rtnl_lock();
866         ipip_destroy_tunnels(ipn, &list);
867         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
868         unregister_netdevice_many(&list);
869         rtnl_unlock();
870 }
871
872 static struct pernet_operations ipip_net_ops = {
873         .init = ipip_init_net,
874         .exit = ipip_exit_net,
875         .id   = &ipip_net_id,
876         .size = sizeof(struct ipip_net),
877 };
878
879 static int __init ipip_init(void)
880 {
881         int err;
882
883         printk(banner);
884
885         err = register_pernet_device(&ipip_net_ops);
886         if (err < 0)
887                 return err;
888         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
889         if (err < 0) {
890                 unregister_pernet_device(&ipip_net_ops);
891                 printk(KERN_INFO "ipip init: can't register tunnel\n");
892         }
893         return err;
894 }
895
896 static void __exit ipip_fini(void)
897 {
898         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
899                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
900
901         unregister_pernet_device(&ipip_net_ops);
902 }
903
904 module_init(ipip_init);
905 module_exit(ipip_fini);
906 MODULE_LICENSE("GPL");
907 MODULE_ALIAS_NETDEV("tunl0");