Merge branch 'for-3.2-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj...
[pandora-kernel.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128         struct ip_tunnel __rcu *tunnels_wc[1];
129         struct ip_tunnel __rcu **tunnels[4];
130
131         struct net_device *fb_tunnel_dev;
132 };
133
134 static int ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 static void ipip_dev_free(struct net_device *dev);
137
138 /*
139  * Locking : hash tables are protected by RCU and RTNL
140  */
141
142 #define for_each_ip_tunnel_rcu(start) \
143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144
145 /* often modified stats are per cpu, other are shared (netdev->stats) */
146 struct pcpu_tstats {
147         unsigned long   rx_packets;
148         unsigned long   rx_bytes;
149         unsigned long   tx_packets;
150         unsigned long   tx_bytes;
151 };
152
153 static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154 {
155         struct pcpu_tstats sum = { 0 };
156         int i;
157
158         for_each_possible_cpu(i) {
159                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160
161                 sum.rx_packets += tstats->rx_packets;
162                 sum.rx_bytes   += tstats->rx_bytes;
163                 sum.tx_packets += tstats->tx_packets;
164                 sum.tx_bytes   += tstats->tx_bytes;
165         }
166         dev->stats.rx_packets = sum.rx_packets;
167         dev->stats.rx_bytes   = sum.rx_bytes;
168         dev->stats.tx_packets = sum.tx_packets;
169         dev->stats.tx_bytes   = sum.tx_bytes;
170         return &dev->stats;
171 }
172
173 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
174                 __be32 remote, __be32 local)
175 {
176         unsigned int h0 = HASH(remote);
177         unsigned int h1 = HASH(local);
178         struct ip_tunnel *t;
179         struct ipip_net *ipn = net_generic(net, ipip_net_id);
180
181         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
182                 if (local == t->parms.iph.saddr &&
183                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
184                         return t;
185
186         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
187                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
188                         return t;
189
190         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
191                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
192                         return t;
193
194         t = rcu_dereference(ipn->tunnels_wc[0]);
195         if (t && (t->dev->flags&IFF_UP))
196                 return t;
197         return NULL;
198 }
199
200 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
201                 struct ip_tunnel_parm *parms)
202 {
203         __be32 remote = parms->iph.daddr;
204         __be32 local = parms->iph.saddr;
205         unsigned int h = 0;
206         int prio = 0;
207
208         if (remote) {
209                 prio |= 2;
210                 h ^= HASH(remote);
211         }
212         if (local) {
213                 prio |= 1;
214                 h ^= HASH(local);
215         }
216         return &ipn->tunnels[prio][h];
217 }
218
219 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
220                 struct ip_tunnel *t)
221 {
222         return __ipip_bucket(ipn, &t->parms);
223 }
224
225 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
226 {
227         struct ip_tunnel __rcu **tp;
228         struct ip_tunnel *iter;
229
230         for (tp = ipip_bucket(ipn, t);
231              (iter = rtnl_dereference(*tp)) != NULL;
232              tp = &iter->next) {
233                 if (t == iter) {
234                         RCU_INIT_POINTER(*tp, t->next);
235                         break;
236                 }
237         }
238 }
239
240 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
241 {
242         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
243
244         RCU_INIT_POINTER(t->next, rtnl_dereference(*tp));
245         RCU_INIT_POINTER(*tp, t);
246 }
247
248 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
249                 struct ip_tunnel_parm *parms, int create)
250 {
251         __be32 remote = parms->iph.daddr;
252         __be32 local = parms->iph.saddr;
253         struct ip_tunnel *t, *nt;
254         struct ip_tunnel __rcu **tp;
255         struct net_device *dev;
256         char name[IFNAMSIZ];
257         struct ipip_net *ipn = net_generic(net, ipip_net_id);
258
259         for (tp = __ipip_bucket(ipn, parms);
260                  (t = rtnl_dereference(*tp)) != NULL;
261                  tp = &t->next) {
262                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
263                         return t;
264         }
265         if (!create)
266                 return NULL;
267
268         if (parms->name[0])
269                 strlcpy(name, parms->name, IFNAMSIZ);
270         else
271                 strcpy(name, "tunl%d");
272
273         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
274         if (dev == NULL)
275                 return NULL;
276
277         dev_net_set(dev, net);
278
279         nt = netdev_priv(dev);
280         nt->parms = *parms;
281
282         if (ipip_tunnel_init(dev) < 0)
283                 goto failed_free;
284
285         if (register_netdevice(dev) < 0)
286                 goto failed_free;
287
288         strcpy(nt->parms.name, dev->name);
289
290         dev_hold(dev);
291         ipip_tunnel_link(ipn, nt);
292         return nt;
293
294 failed_free:
295         ipip_dev_free(dev);
296         return NULL;
297 }
298
299 /* called with RTNL */
300 static void ipip_tunnel_uninit(struct net_device *dev)
301 {
302         struct net *net = dev_net(dev);
303         struct ipip_net *ipn = net_generic(net, ipip_net_id);
304
305         if (dev == ipn->fb_tunnel_dev)
306                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
307         else
308                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
309         dev_put(dev);
310 }
311
312 static int ipip_err(struct sk_buff *skb, u32 info)
313 {
314
315 /* All the routers (except for Linux) return only
316    8 bytes of packet payload. It means, that precise relaying of
317    ICMP in the real Internet is absolutely infeasible.
318  */
319         const struct iphdr *iph = (const struct iphdr *)skb->data;
320         const int type = icmp_hdr(skb)->type;
321         const int code = icmp_hdr(skb)->code;
322         struct ip_tunnel *t;
323         int err;
324
325         switch (type) {
326         default:
327         case ICMP_PARAMETERPROB:
328                 return 0;
329
330         case ICMP_DEST_UNREACH:
331                 switch (code) {
332                 case ICMP_SR_FAILED:
333                 case ICMP_PORT_UNREACH:
334                         /* Impossible event. */
335                         return 0;
336                 case ICMP_FRAG_NEEDED:
337                         /* Soft state for pmtu is maintained by IP core. */
338                         return 0;
339                 default:
340                         /* All others are translated to HOST_UNREACH.
341                            rfc2003 contains "deep thoughts" about NET_UNREACH,
342                            I believe they are just ether pollution. --ANK
343                          */
344                         break;
345                 }
346                 break;
347         case ICMP_TIME_EXCEEDED:
348                 if (code != ICMP_EXC_TTL)
349                         return 0;
350                 break;
351         }
352
353         err = -ENOENT;
354
355         rcu_read_lock();
356         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
357         if (t == NULL || t->parms.iph.daddr == 0)
358                 goto out;
359
360         err = 0;
361         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
362                 goto out;
363
364         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
365                 t->err_count++;
366         else
367                 t->err_count = 1;
368         t->err_time = jiffies;
369 out:
370         rcu_read_unlock();
371         return err;
372 }
373
374 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
375                                         struct sk_buff *skb)
376 {
377         struct iphdr *inner_iph = ip_hdr(skb);
378
379         if (INET_ECN_is_ce(outer_iph->tos))
380                 IP_ECN_set_ce(inner_iph);
381 }
382
383 static int ipip_rcv(struct sk_buff *skb)
384 {
385         struct ip_tunnel *tunnel;
386         const struct iphdr *iph = ip_hdr(skb);
387
388         rcu_read_lock();
389         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
390         if (tunnel != NULL) {
391                 struct pcpu_tstats *tstats;
392
393                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
394                         rcu_read_unlock();
395                         kfree_skb(skb);
396                         return 0;
397                 }
398
399                 secpath_reset(skb);
400
401                 skb->mac_header = skb->network_header;
402                 skb_reset_network_header(skb);
403                 skb->protocol = htons(ETH_P_IP);
404                 skb->pkt_type = PACKET_HOST;
405
406                 tstats = this_cpu_ptr(tunnel->dev->tstats);
407                 tstats->rx_packets++;
408                 tstats->rx_bytes += skb->len;
409
410                 __skb_tunnel_rx(skb, tunnel->dev);
411
412                 ipip_ecn_decapsulate(iph, skb);
413
414                 netif_rx(skb);
415
416                 rcu_read_unlock();
417                 return 0;
418         }
419         rcu_read_unlock();
420
421         return -1;
422 }
423
424 /*
425  *      This function assumes it is being called from dev_queue_xmit()
426  *      and that skb is filled properly by that function.
427  */
428
429 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
430 {
431         struct ip_tunnel *tunnel = netdev_priv(dev);
432         struct pcpu_tstats *tstats;
433         const struct iphdr  *tiph = &tunnel->parms.iph;
434         u8     tos = tunnel->parms.iph.tos;
435         __be16 df = tiph->frag_off;
436         struct rtable *rt;                      /* Route to the other host */
437         struct net_device *tdev;                /* Device to other host */
438         const struct iphdr  *old_iph = ip_hdr(skb);
439         struct iphdr  *iph;                     /* Our new IP header */
440         unsigned int max_headroom;              /* The extra header space needed */
441         __be32 dst = tiph->daddr;
442         struct flowi4 fl4;
443         int    mtu;
444
445         if (skb->protocol != htons(ETH_P_IP))
446                 goto tx_error;
447
448         if (tos & 1)
449                 tos = old_iph->tos;
450
451         if (!dst) {
452                 /* NBMA tunnel */
453                 if ((rt = skb_rtable(skb)) == NULL) {
454                         dev->stats.tx_fifo_errors++;
455                         goto tx_error;
456                 }
457                 if ((dst = rt->rt_gateway) == 0)
458                         goto tx_error_icmp;
459         }
460
461         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
462                                    dst, tiph->saddr,
463                                    0, 0,
464                                    IPPROTO_IPIP, RT_TOS(tos),
465                                    tunnel->parms.link);
466         if (IS_ERR(rt)) {
467                 dev->stats.tx_carrier_errors++;
468                 goto tx_error_icmp;
469         }
470         tdev = rt->dst.dev;
471
472         if (tdev == dev) {
473                 ip_rt_put(rt);
474                 dev->stats.collisions++;
475                 goto tx_error;
476         }
477
478         df |= old_iph->frag_off & htons(IP_DF);
479
480         if (df) {
481                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
482
483                 if (mtu < 68) {
484                         dev->stats.collisions++;
485                         ip_rt_put(rt);
486                         goto tx_error;
487                 }
488
489                 if (skb_dst(skb))
490                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
491
492                 if ((old_iph->frag_off & htons(IP_DF)) &&
493                     mtu < ntohs(old_iph->tot_len)) {
494                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
495                                   htonl(mtu));
496                         ip_rt_put(rt);
497                         goto tx_error;
498                 }
499         }
500
501         if (tunnel->err_count > 0) {
502                 if (time_before(jiffies,
503                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
504                         tunnel->err_count--;
505                         dst_link_failure(skb);
506                 } else
507                         tunnel->err_count = 0;
508         }
509
510         /*
511          * Okay, now see if we can stuff it in the buffer as-is.
512          */
513         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
514
515         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
516             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
517                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
518                 if (!new_skb) {
519                         ip_rt_put(rt);
520                         dev->stats.tx_dropped++;
521                         dev_kfree_skb(skb);
522                         return NETDEV_TX_OK;
523                 }
524                 if (skb->sk)
525                         skb_set_owner_w(new_skb, skb->sk);
526                 dev_kfree_skb(skb);
527                 skb = new_skb;
528                 old_iph = ip_hdr(skb);
529         }
530
531         skb->transport_header = skb->network_header;
532         skb_push(skb, sizeof(struct iphdr));
533         skb_reset_network_header(skb);
534         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
535         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
536                               IPSKB_REROUTED);
537         skb_dst_drop(skb);
538         skb_dst_set(skb, &rt->dst);
539
540         /*
541          *      Push down and install the IPIP header.
542          */
543
544         iph                     =       ip_hdr(skb);
545         iph->version            =       4;
546         iph->ihl                =       sizeof(struct iphdr)>>2;
547         iph->frag_off           =       df;
548         iph->protocol           =       IPPROTO_IPIP;
549         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
550         iph->daddr              =       fl4.daddr;
551         iph->saddr              =       fl4.saddr;
552
553         if ((iph->ttl = tiph->ttl) == 0)
554                 iph->ttl        =       old_iph->ttl;
555
556         nf_reset(skb);
557         tstats = this_cpu_ptr(dev->tstats);
558         __IPTUNNEL_XMIT(tstats, &dev->stats);
559         return NETDEV_TX_OK;
560
561 tx_error_icmp:
562         dst_link_failure(skb);
563 tx_error:
564         dev->stats.tx_errors++;
565         dev_kfree_skb(skb);
566         return NETDEV_TX_OK;
567 }
568
569 static void ipip_tunnel_bind_dev(struct net_device *dev)
570 {
571         struct net_device *tdev = NULL;
572         struct ip_tunnel *tunnel;
573         const struct iphdr *iph;
574
575         tunnel = netdev_priv(dev);
576         iph = &tunnel->parms.iph;
577
578         if (iph->daddr) {
579                 struct rtable *rt;
580                 struct flowi4 fl4;
581
582                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
583                                            iph->daddr, iph->saddr,
584                                            0, 0,
585                                            IPPROTO_IPIP,
586                                            RT_TOS(iph->tos),
587                                            tunnel->parms.link);
588                 if (!IS_ERR(rt)) {
589                         tdev = rt->dst.dev;
590                         ip_rt_put(rt);
591                 }
592                 dev->flags |= IFF_POINTOPOINT;
593         }
594
595         if (!tdev && tunnel->parms.link)
596                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
597
598         if (tdev) {
599                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
600                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
601         }
602         dev->iflink = tunnel->parms.link;
603 }
604
605 static int
606 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
607 {
608         int err = 0;
609         struct ip_tunnel_parm p;
610         struct ip_tunnel *t;
611         struct net *net = dev_net(dev);
612         struct ipip_net *ipn = net_generic(net, ipip_net_id);
613
614         switch (cmd) {
615         case SIOCGETTUNNEL:
616                 t = NULL;
617                 if (dev == ipn->fb_tunnel_dev) {
618                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
619                                 err = -EFAULT;
620                                 break;
621                         }
622                         t = ipip_tunnel_locate(net, &p, 0);
623                 }
624                 if (t == NULL)
625                         t = netdev_priv(dev);
626                 memcpy(&p, &t->parms, sizeof(p));
627                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
628                         err = -EFAULT;
629                 break;
630
631         case SIOCADDTUNNEL:
632         case SIOCCHGTUNNEL:
633                 err = -EPERM;
634                 if (!capable(CAP_NET_ADMIN))
635                         goto done;
636
637                 err = -EFAULT;
638                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
639                         goto done;
640
641                 err = -EINVAL;
642                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
643                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
644                         goto done;
645                 if (p.iph.ttl)
646                         p.iph.frag_off |= htons(IP_DF);
647
648                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
649
650                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
651                         if (t != NULL) {
652                                 if (t->dev != dev) {
653                                         err = -EEXIST;
654                                         break;
655                                 }
656                         } else {
657                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
658                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
659                                         err = -EINVAL;
660                                         break;
661                                 }
662                                 t = netdev_priv(dev);
663                                 ipip_tunnel_unlink(ipn, t);
664                                 synchronize_net();
665                                 t->parms.iph.saddr = p.iph.saddr;
666                                 t->parms.iph.daddr = p.iph.daddr;
667                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
668                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
669                                 ipip_tunnel_link(ipn, t);
670                                 netdev_state_change(dev);
671                         }
672                 }
673
674                 if (t) {
675                         err = 0;
676                         if (cmd == SIOCCHGTUNNEL) {
677                                 t->parms.iph.ttl = p.iph.ttl;
678                                 t->parms.iph.tos = p.iph.tos;
679                                 t->parms.iph.frag_off = p.iph.frag_off;
680                                 if (t->parms.link != p.link) {
681                                         t->parms.link = p.link;
682                                         ipip_tunnel_bind_dev(dev);
683                                         netdev_state_change(dev);
684                                 }
685                         }
686                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
687                                 err = -EFAULT;
688                 } else
689                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
690                 break;
691
692         case SIOCDELTUNNEL:
693                 err = -EPERM;
694                 if (!capable(CAP_NET_ADMIN))
695                         goto done;
696
697                 if (dev == ipn->fb_tunnel_dev) {
698                         err = -EFAULT;
699                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
700                                 goto done;
701                         err = -ENOENT;
702                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
703                                 goto done;
704                         err = -EPERM;
705                         if (t->dev == ipn->fb_tunnel_dev)
706                                 goto done;
707                         dev = t->dev;
708                 }
709                 unregister_netdevice(dev);
710                 err = 0;
711                 break;
712
713         default:
714                 err = -EINVAL;
715         }
716
717 done:
718         return err;
719 }
720
721 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
722 {
723         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
724                 return -EINVAL;
725         dev->mtu = new_mtu;
726         return 0;
727 }
728
729 static const struct net_device_ops ipip_netdev_ops = {
730         .ndo_uninit     = ipip_tunnel_uninit,
731         .ndo_start_xmit = ipip_tunnel_xmit,
732         .ndo_do_ioctl   = ipip_tunnel_ioctl,
733         .ndo_change_mtu = ipip_tunnel_change_mtu,
734         .ndo_get_stats  = ipip_get_stats,
735 };
736
737 static void ipip_dev_free(struct net_device *dev)
738 {
739         free_percpu(dev->tstats);
740         free_netdev(dev);
741 }
742
743 static void ipip_tunnel_setup(struct net_device *dev)
744 {
745         dev->netdev_ops         = &ipip_netdev_ops;
746         dev->destructor         = ipip_dev_free;
747
748         dev->type               = ARPHRD_TUNNEL;
749         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
750         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
751         dev->flags              = IFF_NOARP;
752         dev->iflink             = 0;
753         dev->addr_len           = 4;
754         dev->features           |= NETIF_F_NETNS_LOCAL;
755         dev->features           |= NETIF_F_LLTX;
756         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
757 }
758
759 static int ipip_tunnel_init(struct net_device *dev)
760 {
761         struct ip_tunnel *tunnel = netdev_priv(dev);
762
763         tunnel->dev = dev;
764
765         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
766         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
767
768         ipip_tunnel_bind_dev(dev);
769
770         dev->tstats = alloc_percpu(struct pcpu_tstats);
771         if (!dev->tstats)
772                 return -ENOMEM;
773
774         return 0;
775 }
776
777 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
778 {
779         struct ip_tunnel *tunnel = netdev_priv(dev);
780         struct iphdr *iph = &tunnel->parms.iph;
781         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
782
783         tunnel->dev = dev;
784         strcpy(tunnel->parms.name, dev->name);
785
786         iph->version            = 4;
787         iph->protocol           = IPPROTO_IPIP;
788         iph->ihl                = 5;
789
790         dev->tstats = alloc_percpu(struct pcpu_tstats);
791         if (!dev->tstats)
792                 return -ENOMEM;
793
794         dev_hold(dev);
795         RCU_INIT_POINTER(ipn->tunnels_wc[0], tunnel);
796         return 0;
797 }
798
799 static struct xfrm_tunnel ipip_handler __read_mostly = {
800         .handler        =       ipip_rcv,
801         .err_handler    =       ipip_err,
802         .priority       =       1,
803 };
804
805 static const char banner[] __initconst =
806         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
807
808 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
809 {
810         int prio;
811
812         for (prio = 1; prio < 4; prio++) {
813                 int h;
814                 for (h = 0; h < HASH_SIZE; h++) {
815                         struct ip_tunnel *t;
816
817                         t = rtnl_dereference(ipn->tunnels[prio][h]);
818                         while (t != NULL) {
819                                 unregister_netdevice_queue(t->dev, head);
820                                 t = rtnl_dereference(t->next);
821                         }
822                 }
823         }
824 }
825
826 static int __net_init ipip_init_net(struct net *net)
827 {
828         struct ipip_net *ipn = net_generic(net, ipip_net_id);
829         struct ip_tunnel *t;
830         int err;
831
832         ipn->tunnels[0] = ipn->tunnels_wc;
833         ipn->tunnels[1] = ipn->tunnels_l;
834         ipn->tunnels[2] = ipn->tunnels_r;
835         ipn->tunnels[3] = ipn->tunnels_r_l;
836
837         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
838                                            "tunl0",
839                                            ipip_tunnel_setup);
840         if (!ipn->fb_tunnel_dev) {
841                 err = -ENOMEM;
842                 goto err_alloc_dev;
843         }
844         dev_net_set(ipn->fb_tunnel_dev, net);
845
846         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
847         if (err)
848                 goto err_reg_dev;
849
850         if ((err = register_netdev(ipn->fb_tunnel_dev)))
851                 goto err_reg_dev;
852
853         t = netdev_priv(ipn->fb_tunnel_dev);
854
855         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
856         return 0;
857
858 err_reg_dev:
859         ipip_dev_free(ipn->fb_tunnel_dev);
860 err_alloc_dev:
861         /* nothing */
862         return err;
863 }
864
865 static void __net_exit ipip_exit_net(struct net *net)
866 {
867         struct ipip_net *ipn = net_generic(net, ipip_net_id);
868         LIST_HEAD(list);
869
870         rtnl_lock();
871         ipip_destroy_tunnels(ipn, &list);
872         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
873         unregister_netdevice_many(&list);
874         rtnl_unlock();
875 }
876
877 static struct pernet_operations ipip_net_ops = {
878         .init = ipip_init_net,
879         .exit = ipip_exit_net,
880         .id   = &ipip_net_id,
881         .size = sizeof(struct ipip_net),
882 };
883
884 static int __init ipip_init(void)
885 {
886         int err;
887
888         printk(banner);
889
890         err = register_pernet_device(&ipip_net_ops);
891         if (err < 0)
892                 return err;
893         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
894         if (err < 0) {
895                 unregister_pernet_device(&ipip_net_ops);
896                 printk(KERN_INFO "ipip init: can't register tunnel\n");
897         }
898         return err;
899 }
900
901 static void __exit ipip_fini(void)
902 {
903         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
904                 printk(KERN_INFO "ipip close: can't deregister tunnel\n");
905
906         unregister_pernet_device(&ipip_net_ops);
907 }
908
909 module_init(ipip_init);
910 module_exit(ipip_fini);
911 MODULE_LICENSE("GPL");
912 MODULE_ALIAS_NETDEV("tunl0");