hwmon: (applesmc) Correct sysfs fan error handling
[pandora-kernel.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM 1
69 #endif
70
71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
72    Note that the changes are semaphored via rtnl_lock.
73  */
74
75 static DEFINE_RWLOCK(mrt_lock);
76
77 /*
78  *      Multicast router control variables
79  */
80
81 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
82
83 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
84
85 /* Special spinlock for queue of unresolved entries */
86 static DEFINE_SPINLOCK(mfc_unres_lock);
87
88 /* We return to original Alan's scheme. Hash table of resolved
89    entries is changed only in process context and protected
90    with weak lock mrt_lock. Queue of unresolved entries is protected
91    with strong spinlock mfc_unres_lock.
92
93    In this case data path is free of exclusive locks at all.
94  */
95
96 static struct kmem_cache *mrt_cachep __read_mostly;
97
98 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
99 static int ipmr_cache_report(struct net *net,
100                              struct sk_buff *pkt, vifi_t vifi, int assert);
101 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
102
103 static struct timer_list ipmr_expire_timer;
104
105 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
106
107 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
108 {
109         struct net *net = dev_net(dev);
110
111         dev_close(dev);
112
113         dev = __dev_get_by_name(net, "tunl0");
114         if (dev) {
115                 const struct net_device_ops *ops = dev->netdev_ops;
116                 struct ifreq ifr;
117                 struct ip_tunnel_parm p;
118
119                 memset(&p, 0, sizeof(p));
120                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
121                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
122                 p.iph.version = 4;
123                 p.iph.ihl = 5;
124                 p.iph.protocol = IPPROTO_IPIP;
125                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
126                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
127
128                 if (ops->ndo_do_ioctl) {
129                         mm_segment_t oldfs = get_fs();
130
131                         set_fs(KERNEL_DS);
132                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
133                         set_fs(oldfs);
134                 }
135         }
136 }
137
138 static
139 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
140 {
141         struct net_device  *dev;
142
143         dev = __dev_get_by_name(net, "tunl0");
144
145         if (dev) {
146                 const struct net_device_ops *ops = dev->netdev_ops;
147                 int err;
148                 struct ifreq ifr;
149                 struct ip_tunnel_parm p;
150                 struct in_device  *in_dev;
151
152                 memset(&p, 0, sizeof(p));
153                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
154                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
155                 p.iph.version = 4;
156                 p.iph.ihl = 5;
157                 p.iph.protocol = IPPROTO_IPIP;
158                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
159                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
160
161                 if (ops->ndo_do_ioctl) {
162                         mm_segment_t oldfs = get_fs();
163
164                         set_fs(KERNEL_DS);
165                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
166                         set_fs(oldfs);
167                 } else
168                         err = -EOPNOTSUPP;
169
170                 dev = NULL;
171
172                 if (err == 0 &&
173                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
174                         dev->flags |= IFF_MULTICAST;
175
176                         in_dev = __in_dev_get_rtnl(dev);
177                         if (in_dev == NULL)
178                                 goto failure;
179
180                         ipv4_devconf_setall(in_dev);
181                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
182
183                         if (dev_open(dev))
184                                 goto failure;
185                         dev_hold(dev);
186                 }
187         }
188         return dev;
189
190 failure:
191         /* allow the register to be completed before unregistering. */
192         rtnl_unlock();
193         rtnl_lock();
194
195         unregister_netdevice(dev);
196         return NULL;
197 }
198
199 #ifdef CONFIG_IP_PIMSM
200
201 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
202 {
203         struct net *net = dev_net(dev);
204
205         read_lock(&mrt_lock);
206         dev->stats.tx_bytes += skb->len;
207         dev->stats.tx_packets++;
208         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
209                           IGMPMSG_WHOLEPKT);
210         read_unlock(&mrt_lock);
211         kfree_skb(skb);
212         return NETDEV_TX_OK;
213 }
214
215 static const struct net_device_ops reg_vif_netdev_ops = {
216         .ndo_start_xmit = reg_vif_xmit,
217 };
218
219 static void reg_vif_setup(struct net_device *dev)
220 {
221         dev->type               = ARPHRD_PIMREG;
222         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
223         dev->flags              = IFF_NOARP;
224         dev->netdev_ops         = &reg_vif_netdev_ops,
225         dev->destructor         = free_netdev;
226         dev->features           |= NETIF_F_NETNS_LOCAL;
227 }
228
229 static struct net_device *ipmr_reg_vif(struct net *net)
230 {
231         struct net_device *dev;
232         struct in_device *in_dev;
233
234         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
235
236         if (dev == NULL)
237                 return NULL;
238
239         dev_net_set(dev, net);
240
241         if (register_netdevice(dev)) {
242                 free_netdev(dev);
243                 return NULL;
244         }
245         dev->iflink = 0;
246
247         rcu_read_lock();
248         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
249                 rcu_read_unlock();
250                 goto failure;
251         }
252
253         ipv4_devconf_setall(in_dev);
254         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
255         rcu_read_unlock();
256
257         if (dev_open(dev))
258                 goto failure;
259
260         dev_hold(dev);
261
262         return dev;
263
264 failure:
265         /* allow the register to be completed before unregistering. */
266         rtnl_unlock();
267         rtnl_lock();
268
269         unregister_netdevice(dev);
270         return NULL;
271 }
272 #endif
273
274 /*
275  *      Delete a VIF entry
276  *      @notify: Set to 1, if the caller is a notifier_call
277  */
278
279 static int vif_delete(struct net *net, int vifi, int notify,
280                       struct list_head *head)
281 {
282         struct vif_device *v;
283         struct net_device *dev;
284         struct in_device *in_dev;
285
286         if (vifi < 0 || vifi >= net->ipv4.maxvif)
287                 return -EADDRNOTAVAIL;
288
289         v = &net->ipv4.vif_table[vifi];
290
291         write_lock_bh(&mrt_lock);
292         dev = v->dev;
293         v->dev = NULL;
294
295         if (!dev) {
296                 write_unlock_bh(&mrt_lock);
297                 return -EADDRNOTAVAIL;
298         }
299
300 #ifdef CONFIG_IP_PIMSM
301         if (vifi == net->ipv4.mroute_reg_vif_num)
302                 net->ipv4.mroute_reg_vif_num = -1;
303 #endif
304
305         if (vifi+1 == net->ipv4.maxvif) {
306                 int tmp;
307                 for (tmp=vifi-1; tmp>=0; tmp--) {
308                         if (VIF_EXISTS(net, tmp))
309                                 break;
310                 }
311                 net->ipv4.maxvif = tmp+1;
312         }
313
314         write_unlock_bh(&mrt_lock);
315
316         dev_set_allmulti(dev, -1);
317
318         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
319                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
320                 ip_rt_multicast_event(in_dev);
321         }
322
323         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
324                 unregister_netdevice_queue(dev, head);
325
326         dev_put(dev);
327         return 0;
328 }
329
330 static inline void ipmr_cache_free(struct mfc_cache *c)
331 {
332         release_net(mfc_net(c));
333         kmem_cache_free(mrt_cachep, c);
334 }
335
336 /* Destroy an unresolved cache entry, killing queued skbs
337    and reporting error to netlink readers.
338  */
339
340 static void ipmr_destroy_unres(struct mfc_cache *c)
341 {
342         struct sk_buff *skb;
343         struct nlmsgerr *e;
344         struct net *net = mfc_net(c);
345
346         atomic_dec(&net->ipv4.cache_resolve_queue_len);
347
348         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
349                 if (ip_hdr(skb)->version == 0) {
350                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
351                         nlh->nlmsg_type = NLMSG_ERROR;
352                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
353                         skb_trim(skb, nlh->nlmsg_len);
354                         e = NLMSG_DATA(nlh);
355                         e->error = -ETIMEDOUT;
356                         memset(&e->msg, 0, sizeof(e->msg));
357
358                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
359                 } else
360                         kfree_skb(skb);
361         }
362
363         ipmr_cache_free(c);
364 }
365
366
367 /* Single timer process for all the unresolved queue. */
368
369 static void ipmr_expire_process(unsigned long dummy)
370 {
371         unsigned long now;
372         unsigned long expires;
373         struct mfc_cache *c, **cp;
374
375         if (!spin_trylock(&mfc_unres_lock)) {
376                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
377                 return;
378         }
379
380         if (mfc_unres_queue == NULL)
381                 goto out;
382
383         now = jiffies;
384         expires = 10*HZ;
385         cp = &mfc_unres_queue;
386
387         while ((c=*cp) != NULL) {
388                 if (time_after(c->mfc_un.unres.expires, now)) {
389                         unsigned long interval = c->mfc_un.unres.expires - now;
390                         if (interval < expires)
391                                 expires = interval;
392                         cp = &c->next;
393                         continue;
394                 }
395
396                 *cp = c->next;
397
398                 ipmr_destroy_unres(c);
399         }
400
401         if (mfc_unres_queue != NULL)
402                 mod_timer(&ipmr_expire_timer, jiffies + expires);
403
404 out:
405         spin_unlock(&mfc_unres_lock);
406 }
407
408 /* Fill oifs list. It is called under write locked mrt_lock. */
409
410 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
411 {
412         int vifi;
413         struct net *net = mfc_net(cache);
414
415         cache->mfc_un.res.minvif = MAXVIFS;
416         cache->mfc_un.res.maxvif = 0;
417         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
418
419         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
420                 if (VIF_EXISTS(net, vifi) &&
421                     ttls[vifi] && ttls[vifi] < 255) {
422                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
423                         if (cache->mfc_un.res.minvif > vifi)
424                                 cache->mfc_un.res.minvif = vifi;
425                         if (cache->mfc_un.res.maxvif <= vifi)
426                                 cache->mfc_un.res.maxvif = vifi + 1;
427                 }
428         }
429 }
430
431 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
432 {
433         int vifi = vifc->vifc_vifi;
434         struct vif_device *v = &net->ipv4.vif_table[vifi];
435         struct net_device *dev;
436         struct in_device *in_dev;
437         int err;
438
439         /* Is vif busy ? */
440         if (VIF_EXISTS(net, vifi))
441                 return -EADDRINUSE;
442
443         switch (vifc->vifc_flags) {
444 #ifdef CONFIG_IP_PIMSM
445         case VIFF_REGISTER:
446                 /*
447                  * Special Purpose VIF in PIM
448                  * All the packets will be sent to the daemon
449                  */
450                 if (net->ipv4.mroute_reg_vif_num >= 0)
451                         return -EADDRINUSE;
452                 dev = ipmr_reg_vif(net);
453                 if (!dev)
454                         return -ENOBUFS;
455                 err = dev_set_allmulti(dev, 1);
456                 if (err) {
457                         unregister_netdevice(dev);
458                         dev_put(dev);
459                         return err;
460                 }
461                 break;
462 #endif
463         case VIFF_TUNNEL:
464                 dev = ipmr_new_tunnel(net, vifc);
465                 if (!dev)
466                         return -ENOBUFS;
467                 err = dev_set_allmulti(dev, 1);
468                 if (err) {
469                         ipmr_del_tunnel(dev, vifc);
470                         dev_put(dev);
471                         return err;
472                 }
473                 break;
474
475         case VIFF_USE_IFINDEX:
476         case 0:
477                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
478                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
479                         if (dev && dev->ip_ptr == NULL) {
480                                 dev_put(dev);
481                                 return -EADDRNOTAVAIL;
482                         }
483                 } else
484                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
485
486                 if (!dev)
487                         return -EADDRNOTAVAIL;
488                 err = dev_set_allmulti(dev, 1);
489                 if (err) {
490                         dev_put(dev);
491                         return err;
492                 }
493                 break;
494         default:
495                 return -EINVAL;
496         }
497
498         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
499                 dev_put(dev);
500                 return -EADDRNOTAVAIL;
501         }
502         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
503         ip_rt_multicast_event(in_dev);
504
505         /*
506          *      Fill in the VIF structures
507          */
508         v->rate_limit = vifc->vifc_rate_limit;
509         v->local = vifc->vifc_lcl_addr.s_addr;
510         v->remote = vifc->vifc_rmt_addr.s_addr;
511         v->flags = vifc->vifc_flags;
512         if (!mrtsock)
513                 v->flags |= VIFF_STATIC;
514         v->threshold = vifc->vifc_threshold;
515         v->bytes_in = 0;
516         v->bytes_out = 0;
517         v->pkt_in = 0;
518         v->pkt_out = 0;
519         v->link = dev->ifindex;
520         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
521                 v->link = dev->iflink;
522
523         /* And finish update writing critical data */
524         write_lock_bh(&mrt_lock);
525         v->dev = dev;
526 #ifdef CONFIG_IP_PIMSM
527         if (v->flags&VIFF_REGISTER)
528                 net->ipv4.mroute_reg_vif_num = vifi;
529 #endif
530         if (vifi+1 > net->ipv4.maxvif)
531                 net->ipv4.maxvif = vifi+1;
532         write_unlock_bh(&mrt_lock);
533         return 0;
534 }
535
536 static struct mfc_cache *ipmr_cache_find(struct net *net,
537                                          __be32 origin,
538                                          __be32 mcastgrp)
539 {
540         int line = MFC_HASH(mcastgrp, origin);
541         struct mfc_cache *c;
542
543         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
544                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
545                         break;
546         }
547         return c;
548 }
549
550 /*
551  *      Allocate a multicast cache entry
552  */
553 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
554 {
555         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
556         if (c == NULL)
557                 return NULL;
558         c->mfc_un.res.minvif = MAXVIFS;
559         mfc_net_set(c, net);
560         return c;
561 }
562
563 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
564 {
565         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
566         if (c == NULL)
567                 return NULL;
568         skb_queue_head_init(&c->mfc_un.unres.unresolved);
569         c->mfc_un.unres.expires = jiffies + 10*HZ;
570         mfc_net_set(c, net);
571         return c;
572 }
573
574 /*
575  *      A cache entry has gone into a resolved state from queued
576  */
577
578 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
579 {
580         struct sk_buff *skb;
581         struct nlmsgerr *e;
582
583         /*
584          *      Play the pending entries through our router
585          */
586
587         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
588                 if (ip_hdr(skb)->version == 0) {
589                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
590
591                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
592                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
593                                                   (u8 *)nlh);
594                         } else {
595                                 nlh->nlmsg_type = NLMSG_ERROR;
596                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
597                                 skb_trim(skb, nlh->nlmsg_len);
598                                 e = NLMSG_DATA(nlh);
599                                 e->error = -EMSGSIZE;
600                                 memset(&e->msg, 0, sizeof(e->msg));
601                         }
602
603                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
604                 } else
605                         ip_mr_forward(skb, c, 0);
606         }
607 }
608
609 /*
610  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
611  *      expects the following bizarre scheme.
612  *
613  *      Called under mrt_lock.
614  */
615
616 static int ipmr_cache_report(struct net *net,
617                              struct sk_buff *pkt, vifi_t vifi, int assert)
618 {
619         struct sk_buff *skb;
620         const int ihl = ip_hdrlen(pkt);
621         struct igmphdr *igmp;
622         struct igmpmsg *msg;
623         int ret;
624
625 #ifdef CONFIG_IP_PIMSM
626         if (assert == IGMPMSG_WHOLEPKT)
627                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
628         else
629 #endif
630                 skb = alloc_skb(128, GFP_ATOMIC);
631
632         if (!skb)
633                 return -ENOBUFS;
634
635 #ifdef CONFIG_IP_PIMSM
636         if (assert == IGMPMSG_WHOLEPKT) {
637                 /* Ugly, but we have no choice with this interface.
638                    Duplicate old header, fix ihl, length etc.
639                    And all this only to mangle msg->im_msgtype and
640                    to set msg->im_mbz to "mbz" :-)
641                  */
642                 skb_push(skb, sizeof(struct iphdr));
643                 skb_reset_network_header(skb);
644                 skb_reset_transport_header(skb);
645                 msg = (struct igmpmsg *)skb_network_header(skb);
646                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
647                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
648                 msg->im_mbz = 0;
649                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
650                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
651                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
652                                              sizeof(struct iphdr));
653         } else
654 #endif
655         {
656
657         /*
658          *      Copy the IP header
659          */
660
661         skb->network_header = skb->tail;
662         skb_put(skb, ihl);
663         skb_copy_to_linear_data(skb, pkt->data, ihl);
664         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
665         msg = (struct igmpmsg *)skb_network_header(skb);
666         msg->im_vif = vifi;
667         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
668
669         /*
670          *      Add our header
671          */
672
673         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
674         igmp->type      =
675         msg->im_msgtype = assert;
676         igmp->code      =       0;
677         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
678         skb->transport_header = skb->network_header;
679         }
680
681         if (net->ipv4.mroute_sk == NULL) {
682                 kfree_skb(skb);
683                 return -EINVAL;
684         }
685
686         /*
687          *      Deliver to mrouted
688          */
689         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
690         if (ret < 0) {
691                 if (net_ratelimit())
692                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
693                 kfree_skb(skb);
694         }
695
696         return ret;
697 }
698
699 /*
700  *      Queue a packet for resolution. It gets locked cache entry!
701  */
702
703 static int
704 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
705 {
706         int err;
707         struct mfc_cache *c;
708         const struct iphdr *iph = ip_hdr(skb);
709
710         spin_lock_bh(&mfc_unres_lock);
711         for (c=mfc_unres_queue; c; c=c->next) {
712                 if (net_eq(mfc_net(c), net) &&
713                     c->mfc_mcastgrp == iph->daddr &&
714                     c->mfc_origin == iph->saddr)
715                         break;
716         }
717
718         if (c == NULL) {
719                 /*
720                  *      Create a new entry if allowable
721                  */
722
723                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
724                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
725                         spin_unlock_bh(&mfc_unres_lock);
726
727                         kfree_skb(skb);
728                         return -ENOBUFS;
729                 }
730
731                 /*
732                  *      Fill in the new cache entry
733                  */
734                 c->mfc_parent   = -1;
735                 c->mfc_origin   = iph->saddr;
736                 c->mfc_mcastgrp = iph->daddr;
737
738                 /*
739                  *      Reflect first query at mrouted.
740                  */
741                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
742                 if (err < 0) {
743                         /* If the report failed throw the cache entry
744                            out - Brad Parker
745                          */
746                         spin_unlock_bh(&mfc_unres_lock);
747
748                         ipmr_cache_free(c);
749                         kfree_skb(skb);
750                         return err;
751                 }
752
753                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
754                 c->next = mfc_unres_queue;
755                 mfc_unres_queue = c;
756
757                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
758         }
759
760         /*
761          *      See if we can append the packet
762          */
763         if (c->mfc_un.unres.unresolved.qlen>3) {
764                 kfree_skb(skb);
765                 err = -ENOBUFS;
766         } else {
767                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
768                 err = 0;
769         }
770
771         spin_unlock_bh(&mfc_unres_lock);
772         return err;
773 }
774
775 /*
776  *      MFC cache manipulation by user space mroute daemon
777  */
778
779 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
780 {
781         int line;
782         struct mfc_cache *c, **cp;
783
784         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
785
786         for (cp = &net->ipv4.mfc_cache_array[line];
787              (c = *cp) != NULL; cp = &c->next) {
788                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
789                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
790                         write_lock_bh(&mrt_lock);
791                         *cp = c->next;
792                         write_unlock_bh(&mrt_lock);
793
794                         ipmr_cache_free(c);
795                         return 0;
796                 }
797         }
798         return -ENOENT;
799 }
800
801 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
802 {
803         int line;
804         struct mfc_cache *uc, *c, **cp;
805
806         if (mfc->mfcc_parent >= MAXVIFS)
807                 return -ENFILE;
808
809         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
810
811         for (cp = &net->ipv4.mfc_cache_array[line];
812              (c = *cp) != NULL; cp = &c->next) {
813                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
814                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
815                         break;
816         }
817
818         if (c != NULL) {
819                 write_lock_bh(&mrt_lock);
820                 c->mfc_parent = mfc->mfcc_parent;
821                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
822                 if (!mrtsock)
823                         c->mfc_flags |= MFC_STATIC;
824                 write_unlock_bh(&mrt_lock);
825                 return 0;
826         }
827
828         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
829                 return -EINVAL;
830
831         c = ipmr_cache_alloc(net);
832         if (c == NULL)
833                 return -ENOMEM;
834
835         c->mfc_origin = mfc->mfcc_origin.s_addr;
836         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
837         c->mfc_parent = mfc->mfcc_parent;
838         ipmr_update_thresholds(c, mfc->mfcc_ttls);
839         if (!mrtsock)
840                 c->mfc_flags |= MFC_STATIC;
841
842         write_lock_bh(&mrt_lock);
843         c->next = net->ipv4.mfc_cache_array[line];
844         net->ipv4.mfc_cache_array[line] = c;
845         write_unlock_bh(&mrt_lock);
846
847         /*
848          *      Check to see if we resolved a queued list. If so we
849          *      need to send on the frames and tidy up.
850          */
851         spin_lock_bh(&mfc_unres_lock);
852         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
853              cp = &uc->next) {
854                 if (net_eq(mfc_net(uc), net) &&
855                     uc->mfc_origin == c->mfc_origin &&
856                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
857                         *cp = uc->next;
858                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
859                         break;
860                 }
861         }
862         if (mfc_unres_queue == NULL)
863                 del_timer(&ipmr_expire_timer);
864         spin_unlock_bh(&mfc_unres_lock);
865
866         if (uc) {
867                 ipmr_cache_resolve(uc, c);
868                 ipmr_cache_free(uc);
869         }
870         return 0;
871 }
872
873 /*
874  *      Close the multicast socket, and clear the vif tables etc
875  */
876
877 static void mroute_clean_tables(struct net *net)
878 {
879         int i;
880         LIST_HEAD(list);
881
882         /*
883          *      Shut down all active vif entries
884          */
885         for (i = 0; i < net->ipv4.maxvif; i++) {
886                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
887                         vif_delete(net, i, 0, &list);
888         }
889         unregister_netdevice_many(&list);
890
891         /*
892          *      Wipe the cache
893          */
894         for (i=0; i<MFC_LINES; i++) {
895                 struct mfc_cache *c, **cp;
896
897                 cp = &net->ipv4.mfc_cache_array[i];
898                 while ((c = *cp) != NULL) {
899                         if (c->mfc_flags&MFC_STATIC) {
900                                 cp = &c->next;
901                                 continue;
902                         }
903                         write_lock_bh(&mrt_lock);
904                         *cp = c->next;
905                         write_unlock_bh(&mrt_lock);
906
907                         ipmr_cache_free(c);
908                 }
909         }
910
911         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
912                 struct mfc_cache *c, **cp;
913
914                 spin_lock_bh(&mfc_unres_lock);
915                 cp = &mfc_unres_queue;
916                 while ((c = *cp) != NULL) {
917                         if (!net_eq(mfc_net(c), net)) {
918                                 cp = &c->next;
919                                 continue;
920                         }
921                         *cp = c->next;
922
923                         ipmr_destroy_unres(c);
924                 }
925                 spin_unlock_bh(&mfc_unres_lock);
926         }
927 }
928
929 static void mrtsock_destruct(struct sock *sk)
930 {
931         struct net *net = sock_net(sk);
932
933         rtnl_lock();
934         if (sk == net->ipv4.mroute_sk) {
935                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
936
937                 write_lock_bh(&mrt_lock);
938                 net->ipv4.mroute_sk = NULL;
939                 write_unlock_bh(&mrt_lock);
940
941                 mroute_clean_tables(net);
942         }
943         rtnl_unlock();
944 }
945
946 /*
947  *      Socket options and virtual interface manipulation. The whole
948  *      virtual interface system is a complete heap, but unfortunately
949  *      that's how BSD mrouted happens to think. Maybe one day with a proper
950  *      MOSPF/PIM router set up we can clean this up.
951  */
952
953 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
954 {
955         int ret;
956         struct vifctl vif;
957         struct mfcctl mfc;
958         struct net *net = sock_net(sk);
959
960         if (optname != MRT_INIT) {
961                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
962                         return -EACCES;
963         }
964
965         switch (optname) {
966         case MRT_INIT:
967                 if (sk->sk_type != SOCK_RAW ||
968                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
969                         return -EOPNOTSUPP;
970                 if (optlen != sizeof(int))
971                         return -ENOPROTOOPT;
972
973                 rtnl_lock();
974                 if (net->ipv4.mroute_sk) {
975                         rtnl_unlock();
976                         return -EADDRINUSE;
977                 }
978
979                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
980                 if (ret == 0) {
981                         write_lock_bh(&mrt_lock);
982                         net->ipv4.mroute_sk = sk;
983                         write_unlock_bh(&mrt_lock);
984
985                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
986                 }
987                 rtnl_unlock();
988                 return ret;
989         case MRT_DONE:
990                 if (sk != net->ipv4.mroute_sk)
991                         return -EACCES;
992                 return ip_ra_control(sk, 0, NULL);
993         case MRT_ADD_VIF:
994         case MRT_DEL_VIF:
995                 if (optlen != sizeof(vif))
996                         return -EINVAL;
997                 if (copy_from_user(&vif, optval, sizeof(vif)))
998                         return -EFAULT;
999                 if (vif.vifc_vifi >= MAXVIFS)
1000                         return -ENFILE;
1001                 rtnl_lock();
1002                 if (optname == MRT_ADD_VIF) {
1003                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
1004                 } else {
1005                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
1006                 }
1007                 rtnl_unlock();
1008                 return ret;
1009
1010                 /*
1011                  *      Manipulate the forwarding caches. These live
1012                  *      in a sort of kernel/user symbiosis.
1013                  */
1014         case MRT_ADD_MFC:
1015         case MRT_DEL_MFC:
1016                 if (optlen != sizeof(mfc))
1017                         return -EINVAL;
1018                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1019                         return -EFAULT;
1020                 rtnl_lock();
1021                 if (optname == MRT_DEL_MFC)
1022                         ret = ipmr_mfc_delete(net, &mfc);
1023                 else
1024                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1025                 rtnl_unlock();
1026                 return ret;
1027                 /*
1028                  *      Control PIM assert.
1029                  */
1030         case MRT_ASSERT:
1031         {
1032                 int v;
1033                 if (get_user(v,(int __user *)optval))
1034                         return -EFAULT;
1035                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1036                 return 0;
1037         }
1038 #ifdef CONFIG_IP_PIMSM
1039         case MRT_PIM:
1040         {
1041                 int v;
1042
1043                 if (get_user(v,(int __user *)optval))
1044                         return -EFAULT;
1045                 v = (v) ? 1 : 0;
1046
1047                 rtnl_lock();
1048                 ret = 0;
1049                 if (v != net->ipv4.mroute_do_pim) {
1050                         net->ipv4.mroute_do_pim = v;
1051                         net->ipv4.mroute_do_assert = v;
1052                 }
1053                 rtnl_unlock();
1054                 return ret;
1055         }
1056 #endif
1057         /*
1058          *      Spurious command, or MRT_VERSION which you cannot
1059          *      set.
1060          */
1061         default:
1062                 return -ENOPROTOOPT;
1063         }
1064 }
1065
1066 /*
1067  *      Getsock opt support for the multicast routing system.
1068  */
1069
1070 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1071 {
1072         int olr;
1073         int val;
1074         struct net *net = sock_net(sk);
1075
1076         if (optname != MRT_VERSION &&
1077 #ifdef CONFIG_IP_PIMSM
1078            optname!=MRT_PIM &&
1079 #endif
1080            optname!=MRT_ASSERT)
1081                 return -ENOPROTOOPT;
1082
1083         if (get_user(olr, optlen))
1084                 return -EFAULT;
1085
1086         olr = min_t(unsigned int, olr, sizeof(int));
1087         if (olr < 0)
1088                 return -EINVAL;
1089
1090         if (put_user(olr, optlen))
1091                 return -EFAULT;
1092         if (optname == MRT_VERSION)
1093                 val = 0x0305;
1094 #ifdef CONFIG_IP_PIMSM
1095         else if (optname == MRT_PIM)
1096                 val = net->ipv4.mroute_do_pim;
1097 #endif
1098         else
1099                 val = net->ipv4.mroute_do_assert;
1100         if (copy_to_user(optval, &val, olr))
1101                 return -EFAULT;
1102         return 0;
1103 }
1104
1105 /*
1106  *      The IP multicast ioctl support routines.
1107  */
1108
1109 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1110 {
1111         struct sioc_sg_req sr;
1112         struct sioc_vif_req vr;
1113         struct vif_device *vif;
1114         struct mfc_cache *c;
1115         struct net *net = sock_net(sk);
1116
1117         switch (cmd) {
1118         case SIOCGETVIFCNT:
1119                 if (copy_from_user(&vr, arg, sizeof(vr)))
1120                         return -EFAULT;
1121                 if (vr.vifi >= net->ipv4.maxvif)
1122                         return -EINVAL;
1123                 read_lock(&mrt_lock);
1124                 vif = &net->ipv4.vif_table[vr.vifi];
1125                 if (VIF_EXISTS(net, vr.vifi)) {
1126                         vr.icount = vif->pkt_in;
1127                         vr.ocount = vif->pkt_out;
1128                         vr.ibytes = vif->bytes_in;
1129                         vr.obytes = vif->bytes_out;
1130                         read_unlock(&mrt_lock);
1131
1132                         if (copy_to_user(arg, &vr, sizeof(vr)))
1133                                 return -EFAULT;
1134                         return 0;
1135                 }
1136                 read_unlock(&mrt_lock);
1137                 return -EADDRNOTAVAIL;
1138         case SIOCGETSGCNT:
1139                 if (copy_from_user(&sr, arg, sizeof(sr)))
1140                         return -EFAULT;
1141
1142                 read_lock(&mrt_lock);
1143                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1144                 if (c) {
1145                         sr.pktcnt = c->mfc_un.res.pkt;
1146                         sr.bytecnt = c->mfc_un.res.bytes;
1147                         sr.wrong_if = c->mfc_un.res.wrong_if;
1148                         read_unlock(&mrt_lock);
1149
1150                         if (copy_to_user(arg, &sr, sizeof(sr)))
1151                                 return -EFAULT;
1152                         return 0;
1153                 }
1154                 read_unlock(&mrt_lock);
1155                 return -EADDRNOTAVAIL;
1156         default:
1157                 return -ENOIOCTLCMD;
1158         }
1159 }
1160
1161
1162 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1163 {
1164         struct net_device *dev = ptr;
1165         struct net *net = dev_net(dev);
1166         struct vif_device *v;
1167         int ct;
1168         LIST_HEAD(list);
1169
1170         if (event != NETDEV_UNREGISTER)
1171                 return NOTIFY_DONE;
1172         v = &net->ipv4.vif_table[0];
1173         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1174                 if (v->dev == dev)
1175                         vif_delete(net, ct, 1, &list);
1176         }
1177         unregister_netdevice_many(&list);
1178         return NOTIFY_DONE;
1179 }
1180
1181
1182 static struct notifier_block ip_mr_notifier = {
1183         .notifier_call = ipmr_device_event,
1184 };
1185
1186 /*
1187  *      Encapsulate a packet by attaching a valid IPIP header to it.
1188  *      This avoids tunnel drivers and other mess and gives us the speed so
1189  *      important for multicast video.
1190  */
1191
1192 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1193 {
1194         struct iphdr *iph;
1195         struct iphdr *old_iph = ip_hdr(skb);
1196
1197         skb_push(skb, sizeof(struct iphdr));
1198         skb->transport_header = skb->network_header;
1199         skb_reset_network_header(skb);
1200         iph = ip_hdr(skb);
1201
1202         iph->version    =       4;
1203         iph->tos        =       old_iph->tos;
1204         iph->ttl        =       old_iph->ttl;
1205         iph->frag_off   =       0;
1206         iph->daddr      =       daddr;
1207         iph->saddr      =       saddr;
1208         iph->protocol   =       IPPROTO_IPIP;
1209         iph->ihl        =       5;
1210         iph->tot_len    =       htons(skb->len);
1211         ip_select_ident(iph, skb_dst(skb), NULL);
1212         ip_send_check(iph);
1213
1214         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1215         nf_reset(skb);
1216 }
1217
1218 static inline int ipmr_forward_finish(struct sk_buff *skb)
1219 {
1220         struct ip_options * opt = &(IPCB(skb)->opt);
1221
1222         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1223
1224         if (unlikely(opt->optlen))
1225                 ip_forward_options(skb);
1226
1227         return dst_output(skb);
1228 }
1229
1230 /*
1231  *      Processing handlers for ipmr_forward
1232  */
1233
1234 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1235 {
1236         struct net *net = mfc_net(c);
1237         const struct iphdr *iph = ip_hdr(skb);
1238         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1239         struct net_device *dev;
1240         struct rtable *rt;
1241         int    encap = 0;
1242
1243         if (vif->dev == NULL)
1244                 goto out_free;
1245
1246 #ifdef CONFIG_IP_PIMSM
1247         if (vif->flags & VIFF_REGISTER) {
1248                 vif->pkt_out++;
1249                 vif->bytes_out += skb->len;
1250                 vif->dev->stats.tx_bytes += skb->len;
1251                 vif->dev->stats.tx_packets++;
1252                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1253                 goto out_free;
1254         }
1255 #endif
1256
1257         if (vif->flags&VIFF_TUNNEL) {
1258                 struct flowi fl = { .oif = vif->link,
1259                                     .nl_u = { .ip4_u =
1260                                               { .daddr = vif->remote,
1261                                                 .saddr = vif->local,
1262                                                 .tos = RT_TOS(iph->tos) } },
1263                                     .proto = IPPROTO_IPIP };
1264                 if (ip_route_output_key(net, &rt, &fl))
1265                         goto out_free;
1266                 encap = sizeof(struct iphdr);
1267         } else {
1268                 struct flowi fl = { .oif = vif->link,
1269                                     .nl_u = { .ip4_u =
1270                                               { .daddr = iph->daddr,
1271                                                 .tos = RT_TOS(iph->tos) } },
1272                                     .proto = IPPROTO_IPIP };
1273                 if (ip_route_output_key(net, &rt, &fl))
1274                         goto out_free;
1275         }
1276
1277         dev = rt->u.dst.dev;
1278
1279         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1280                 /* Do not fragment multicasts. Alas, IPv4 does not
1281                    allow to send ICMP, so that packets will disappear
1282                    to blackhole.
1283                  */
1284
1285                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1286                 ip_rt_put(rt);
1287                 goto out_free;
1288         }
1289
1290         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1291
1292         if (skb_cow(skb, encap)) {
1293                 ip_rt_put(rt);
1294                 goto out_free;
1295         }
1296
1297         vif->pkt_out++;
1298         vif->bytes_out += skb->len;
1299
1300         skb_dst_drop(skb);
1301         skb_dst_set(skb, &rt->u.dst);
1302         ip_decrease_ttl(ip_hdr(skb));
1303
1304         /* FIXME: forward and output firewalls used to be called here.
1305          * What do we do with netfilter? -- RR */
1306         if (vif->flags & VIFF_TUNNEL) {
1307                 ip_encap(skb, vif->local, vif->remote);
1308                 /* FIXME: extra output firewall step used to be here. --RR */
1309                 vif->dev->stats.tx_packets++;
1310                 vif->dev->stats.tx_bytes += skb->len;
1311         }
1312
1313         IPCB(skb)->flags |= IPSKB_FORWARDED;
1314
1315         /*
1316          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1317          * not only before forwarding, but after forwarding on all output
1318          * interfaces. It is clear, if mrouter runs a multicasting
1319          * program, it should receive packets not depending to what interface
1320          * program is joined.
1321          * If we will not make it, the program will have to join on all
1322          * interfaces. On the other hand, multihoming host (or router, but
1323          * not mrouter) cannot join to more than one interface - it will
1324          * result in receiving multiple packets.
1325          */
1326         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1327                 ipmr_forward_finish);
1328         return;
1329
1330 out_free:
1331         kfree_skb(skb);
1332         return;
1333 }
1334
1335 static int ipmr_find_vif(struct net_device *dev)
1336 {
1337         struct net *net = dev_net(dev);
1338         int ct;
1339         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1340                 if (net->ipv4.vif_table[ct].dev == dev)
1341                         break;
1342         }
1343         return ct;
1344 }
1345
1346 /* "local" means that we should preserve one skb (for local delivery) */
1347
1348 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1349 {
1350         int psend = -1;
1351         int vif, ct;
1352         struct net *net = mfc_net(cache);
1353
1354         vif = cache->mfc_parent;
1355         cache->mfc_un.res.pkt++;
1356         cache->mfc_un.res.bytes += skb->len;
1357
1358         /*
1359          * Wrong interface: drop packet and (maybe) send PIM assert.
1360          */
1361         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1362                 int true_vifi;
1363
1364                 if (skb_rtable(skb)->fl.iif == 0) {
1365                         /* It is our own packet, looped back.
1366                            Very complicated situation...
1367
1368                            The best workaround until routing daemons will be
1369                            fixed is not to redistribute packet, if it was
1370                            send through wrong interface. It means, that
1371                            multicast applications WILL NOT work for
1372                            (S,G), which have default multicast route pointing
1373                            to wrong oif. In any case, it is not a good
1374                            idea to use multicasting applications on router.
1375                          */
1376                         goto dont_forward;
1377                 }
1378
1379                 cache->mfc_un.res.wrong_if++;
1380                 true_vifi = ipmr_find_vif(skb->dev);
1381
1382                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1383                     /* pimsm uses asserts, when switching from RPT to SPT,
1384                        so that we cannot check that packet arrived on an oif.
1385                        It is bad, but otherwise we would need to move pretty
1386                        large chunk of pimd to kernel. Ough... --ANK
1387                      */
1388                     (net->ipv4.mroute_do_pim ||
1389                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1390                     time_after(jiffies,
1391                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1392                         cache->mfc_un.res.last_assert = jiffies;
1393                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1394                 }
1395                 goto dont_forward;
1396         }
1397
1398         net->ipv4.vif_table[vif].pkt_in++;
1399         net->ipv4.vif_table[vif].bytes_in += skb->len;
1400
1401         /*
1402          *      Forward the frame
1403          */
1404         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1405                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1406                         if (psend != -1) {
1407                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1408                                 if (skb2)
1409                                         ipmr_queue_xmit(skb2, cache, psend);
1410                         }
1411                         psend = ct;
1412                 }
1413         }
1414         if (psend != -1) {
1415                 if (local) {
1416                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1417                         if (skb2)
1418                                 ipmr_queue_xmit(skb2, cache, psend);
1419                 } else {
1420                         ipmr_queue_xmit(skb, cache, psend);
1421                         return 0;
1422                 }
1423         }
1424
1425 dont_forward:
1426         if (!local)
1427                 kfree_skb(skb);
1428         return 0;
1429 }
1430
1431
1432 /*
1433  *      Multicast packets for forwarding arrive here
1434  */
1435
1436 int ip_mr_input(struct sk_buff *skb)
1437 {
1438         struct mfc_cache *cache;
1439         struct net *net = dev_net(skb->dev);
1440         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1441
1442         /* Packet is looped back after forward, it should not be
1443            forwarded second time, but still can be delivered locally.
1444          */
1445         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1446                 goto dont_forward;
1447
1448         if (!local) {
1449                     if (IPCB(skb)->opt.router_alert) {
1450                             if (ip_call_ra_chain(skb))
1451                                     return 0;
1452                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1453                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1454                                Cisco IOS <= 11.2(8)) do not put router alert
1455                                option to IGMP packets destined to routable
1456                                groups. It is very bad, because it means
1457                                that we can forward NO IGMP messages.
1458                              */
1459                             read_lock(&mrt_lock);
1460                             if (net->ipv4.mroute_sk) {
1461                                     nf_reset(skb);
1462                                     raw_rcv(net->ipv4.mroute_sk, skb);
1463                                     read_unlock(&mrt_lock);
1464                                     return 0;
1465                             }
1466                             read_unlock(&mrt_lock);
1467                     }
1468         }
1469
1470         read_lock(&mrt_lock);
1471         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1472
1473         /*
1474          *      No usable cache entry
1475          */
1476         if (cache == NULL) {
1477                 int vif;
1478
1479                 if (local) {
1480                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1481                         ip_local_deliver(skb);
1482                         if (skb2 == NULL) {
1483                                 read_unlock(&mrt_lock);
1484                                 return -ENOBUFS;
1485                         }
1486                         skb = skb2;
1487                 }
1488
1489                 vif = ipmr_find_vif(skb->dev);
1490                 if (vif >= 0) {
1491                         int err = ipmr_cache_unresolved(net, vif, skb);
1492                         read_unlock(&mrt_lock);
1493
1494                         return err;
1495                 }
1496                 read_unlock(&mrt_lock);
1497                 kfree_skb(skb);
1498                 return -ENODEV;
1499         }
1500
1501         ip_mr_forward(skb, cache, local);
1502
1503         read_unlock(&mrt_lock);
1504
1505         if (local)
1506                 return ip_local_deliver(skb);
1507
1508         return 0;
1509
1510 dont_forward:
1511         if (local)
1512                 return ip_local_deliver(skb);
1513         kfree_skb(skb);
1514         return 0;
1515 }
1516
1517 #ifdef CONFIG_IP_PIMSM
1518 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1519 {
1520         struct net_device *reg_dev = NULL;
1521         struct iphdr *encap;
1522         struct net *net = dev_net(skb->dev);
1523
1524         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1525         /*
1526            Check that:
1527            a. packet is really destinted to a multicast group
1528            b. packet is not a NULL-REGISTER
1529            c. packet is not truncated
1530          */
1531         if (!ipv4_is_multicast(encap->daddr) ||
1532             encap->tot_len == 0 ||
1533             ntohs(encap->tot_len) + pimlen > skb->len)
1534                 return 1;
1535
1536         read_lock(&mrt_lock);
1537         if (net->ipv4.mroute_reg_vif_num >= 0)
1538                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1539         if (reg_dev)
1540                 dev_hold(reg_dev);
1541         read_unlock(&mrt_lock);
1542
1543         if (reg_dev == NULL)
1544                 return 1;
1545
1546         skb->mac_header = skb->network_header;
1547         skb_pull(skb, (u8*)encap - skb->data);
1548         skb_reset_network_header(skb);
1549         skb->dev = reg_dev;
1550         skb->protocol = htons(ETH_P_IP);
1551         skb->ip_summed = 0;
1552         skb->pkt_type = PACKET_HOST;
1553         skb_dst_drop(skb);
1554         reg_dev->stats.rx_bytes += skb->len;
1555         reg_dev->stats.rx_packets++;
1556         nf_reset(skb);
1557         netif_rx(skb);
1558         dev_put(reg_dev);
1559
1560         return 0;
1561 }
1562 #endif
1563
1564 #ifdef CONFIG_IP_PIMSM_V1
1565 /*
1566  * Handle IGMP messages of PIMv1
1567  */
1568
1569 int pim_rcv_v1(struct sk_buff * skb)
1570 {
1571         struct igmphdr *pim;
1572         struct net *net = dev_net(skb->dev);
1573
1574         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1575                 goto drop;
1576
1577         pim = igmp_hdr(skb);
1578
1579         if (!net->ipv4.mroute_do_pim ||
1580             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1581                 goto drop;
1582
1583         if (__pim_rcv(skb, sizeof(*pim))) {
1584 drop:
1585                 kfree_skb(skb);
1586         }
1587         return 0;
1588 }
1589 #endif
1590
1591 #ifdef CONFIG_IP_PIMSM_V2
1592 static int pim_rcv(struct sk_buff * skb)
1593 {
1594         struct pimreghdr *pim;
1595
1596         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1597                 goto drop;
1598
1599         pim = (struct pimreghdr *)skb_transport_header(skb);
1600         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1601             (pim->flags&PIM_NULL_REGISTER) ||
1602             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1603              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1604                 goto drop;
1605
1606         if (__pim_rcv(skb, sizeof(*pim))) {
1607 drop:
1608                 kfree_skb(skb);
1609         }
1610         return 0;
1611 }
1612 #endif
1613
1614 static int
1615 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1616 {
1617         int ct;
1618         struct rtnexthop *nhp;
1619         struct net *net = mfc_net(c);
1620         u8 *b = skb_tail_pointer(skb);
1621         struct rtattr *mp_head;
1622
1623         /* If cache is unresolved, don't try to parse IIF and OIF */
1624         if (c->mfc_parent > MAXVIFS)
1625                 return -ENOENT;
1626
1627         if (VIF_EXISTS(net, c->mfc_parent))
1628                 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1629
1630         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1631
1632         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1633                 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1634                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1635                                 goto rtattr_failure;
1636                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1637                         nhp->rtnh_flags = 0;
1638                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1639                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1640                         nhp->rtnh_len = sizeof(*nhp);
1641                 }
1642         }
1643         mp_head->rta_type = RTA_MULTIPATH;
1644         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1645         rtm->rtm_type = RTN_MULTICAST;
1646         return 1;
1647
1648 rtattr_failure:
1649         nlmsg_trim(skb, b);
1650         return -EMSGSIZE;
1651 }
1652
1653 int ipmr_get_route(struct net *net,
1654                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1655 {
1656         int err;
1657         struct mfc_cache *cache;
1658         struct rtable *rt = skb_rtable(skb);
1659
1660         read_lock(&mrt_lock);
1661         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1662
1663         if (cache == NULL) {
1664                 struct sk_buff *skb2;
1665                 struct iphdr *iph;
1666                 struct net_device *dev;
1667                 int vif;
1668
1669                 if (nowait) {
1670                         read_unlock(&mrt_lock);
1671                         return -EAGAIN;
1672                 }
1673
1674                 dev = skb->dev;
1675                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1676                         read_unlock(&mrt_lock);
1677                         return -ENODEV;
1678                 }
1679                 skb2 = skb_clone(skb, GFP_ATOMIC);
1680                 if (!skb2) {
1681                         read_unlock(&mrt_lock);
1682                         return -ENOMEM;
1683                 }
1684
1685                 skb_push(skb2, sizeof(struct iphdr));
1686                 skb_reset_network_header(skb2);
1687                 iph = ip_hdr(skb2);
1688                 iph->ihl = sizeof(struct iphdr) >> 2;
1689                 iph->saddr = rt->rt_src;
1690                 iph->daddr = rt->rt_dst;
1691                 iph->version = 0;
1692                 err = ipmr_cache_unresolved(net, vif, skb2);
1693                 read_unlock(&mrt_lock);
1694                 return err;
1695         }
1696
1697         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1698                 cache->mfc_flags |= MFC_NOTIFY;
1699         err = ipmr_fill_mroute(skb, cache, rtm);
1700         read_unlock(&mrt_lock);
1701         return err;
1702 }
1703
1704 #ifdef CONFIG_PROC_FS
1705 /*
1706  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1707  */
1708 struct ipmr_vif_iter {
1709         struct seq_net_private p;
1710         int ct;
1711 };
1712
1713 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1714                                            struct ipmr_vif_iter *iter,
1715                                            loff_t pos)
1716 {
1717         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1718                 if (!VIF_EXISTS(net, iter->ct))
1719                         continue;
1720                 if (pos-- == 0)
1721                         return &net->ipv4.vif_table[iter->ct];
1722         }
1723         return NULL;
1724 }
1725
1726 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1727         __acquires(mrt_lock)
1728 {
1729         struct net *net = seq_file_net(seq);
1730
1731         read_lock(&mrt_lock);
1732         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1733                 : SEQ_START_TOKEN;
1734 }
1735
1736 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1737 {
1738         struct ipmr_vif_iter *iter = seq->private;
1739         struct net *net = seq_file_net(seq);
1740
1741         ++*pos;
1742         if (v == SEQ_START_TOKEN)
1743                 return ipmr_vif_seq_idx(net, iter, 0);
1744
1745         while (++iter->ct < net->ipv4.maxvif) {
1746                 if (!VIF_EXISTS(net, iter->ct))
1747                         continue;
1748                 return &net->ipv4.vif_table[iter->ct];
1749         }
1750         return NULL;
1751 }
1752
1753 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1754         __releases(mrt_lock)
1755 {
1756         read_unlock(&mrt_lock);
1757 }
1758
1759 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1760 {
1761         struct net *net = seq_file_net(seq);
1762
1763         if (v == SEQ_START_TOKEN) {
1764                 seq_puts(seq,
1765                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1766         } else {
1767                 const struct vif_device *vif = v;
1768                 const char *name =  vif->dev ? vif->dev->name : "none";
1769
1770                 seq_printf(seq,
1771                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1772                            vif - net->ipv4.vif_table,
1773                            name, vif->bytes_in, vif->pkt_in,
1774                            vif->bytes_out, vif->pkt_out,
1775                            vif->flags, vif->local, vif->remote);
1776         }
1777         return 0;
1778 }
1779
1780 static const struct seq_operations ipmr_vif_seq_ops = {
1781         .start = ipmr_vif_seq_start,
1782         .next  = ipmr_vif_seq_next,
1783         .stop  = ipmr_vif_seq_stop,
1784         .show  = ipmr_vif_seq_show,
1785 };
1786
1787 static int ipmr_vif_open(struct inode *inode, struct file *file)
1788 {
1789         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1790                             sizeof(struct ipmr_vif_iter));
1791 }
1792
1793 static const struct file_operations ipmr_vif_fops = {
1794         .owner   = THIS_MODULE,
1795         .open    = ipmr_vif_open,
1796         .read    = seq_read,
1797         .llseek  = seq_lseek,
1798         .release = seq_release_net,
1799 };
1800
1801 struct ipmr_mfc_iter {
1802         struct seq_net_private p;
1803         struct mfc_cache **cache;
1804         int ct;
1805 };
1806
1807
1808 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1809                                           struct ipmr_mfc_iter *it, loff_t pos)
1810 {
1811         struct mfc_cache *mfc;
1812
1813         it->cache = net->ipv4.mfc_cache_array;
1814         read_lock(&mrt_lock);
1815         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1816                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1817                      mfc; mfc = mfc->next)
1818                         if (pos-- == 0)
1819                                 return mfc;
1820         read_unlock(&mrt_lock);
1821
1822         it->cache = &mfc_unres_queue;
1823         spin_lock_bh(&mfc_unres_lock);
1824         for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1825                 if (net_eq(mfc_net(mfc), net) &&
1826                     pos-- == 0)
1827                         return mfc;
1828         spin_unlock_bh(&mfc_unres_lock);
1829
1830         it->cache = NULL;
1831         return NULL;
1832 }
1833
1834
1835 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1836 {
1837         struct ipmr_mfc_iter *it = seq->private;
1838         struct net *net = seq_file_net(seq);
1839
1840         it->cache = NULL;
1841         it->ct = 0;
1842         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1843                 : SEQ_START_TOKEN;
1844 }
1845
1846 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1847 {
1848         struct mfc_cache *mfc = v;
1849         struct ipmr_mfc_iter *it = seq->private;
1850         struct net *net = seq_file_net(seq);
1851
1852         ++*pos;
1853
1854         if (v == SEQ_START_TOKEN)
1855                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1856
1857         if (mfc->next)
1858                 return mfc->next;
1859
1860         if (it->cache == &mfc_unres_queue)
1861                 goto end_of_list;
1862
1863         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1864
1865         while (++it->ct < MFC_LINES) {
1866                 mfc = net->ipv4.mfc_cache_array[it->ct];
1867                 if (mfc)
1868                         return mfc;
1869         }
1870
1871         /* exhausted cache_array, show unresolved */
1872         read_unlock(&mrt_lock);
1873         it->cache = &mfc_unres_queue;
1874         it->ct = 0;
1875
1876         spin_lock_bh(&mfc_unres_lock);
1877         mfc = mfc_unres_queue;
1878         while (mfc && !net_eq(mfc_net(mfc), net))
1879                 mfc = mfc->next;
1880         if (mfc)
1881                 return mfc;
1882
1883  end_of_list:
1884         spin_unlock_bh(&mfc_unres_lock);
1885         it->cache = NULL;
1886
1887         return NULL;
1888 }
1889
1890 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1891 {
1892         struct ipmr_mfc_iter *it = seq->private;
1893         struct net *net = seq_file_net(seq);
1894
1895         if (it->cache == &mfc_unres_queue)
1896                 spin_unlock_bh(&mfc_unres_lock);
1897         else if (it->cache == net->ipv4.mfc_cache_array)
1898                 read_unlock(&mrt_lock);
1899 }
1900
1901 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1902 {
1903         int n;
1904         struct net *net = seq_file_net(seq);
1905
1906         if (v == SEQ_START_TOKEN) {
1907                 seq_puts(seq,
1908                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1909         } else {
1910                 const struct mfc_cache *mfc = v;
1911                 const struct ipmr_mfc_iter *it = seq->private;
1912
1913                 seq_printf(seq, "%08lX %08lX %-3hd",
1914                            (unsigned long) mfc->mfc_mcastgrp,
1915                            (unsigned long) mfc->mfc_origin,
1916                            mfc->mfc_parent);
1917
1918                 if (it->cache != &mfc_unres_queue) {
1919                         seq_printf(seq, " %8lu %8lu %8lu",
1920                                    mfc->mfc_un.res.pkt,
1921                                    mfc->mfc_un.res.bytes,
1922                                    mfc->mfc_un.res.wrong_if);
1923                         for (n = mfc->mfc_un.res.minvif;
1924                              n < mfc->mfc_un.res.maxvif; n++ ) {
1925                                 if (VIF_EXISTS(net, n) &&
1926                                     mfc->mfc_un.res.ttls[n] < 255)
1927                                         seq_printf(seq,
1928                                            " %2d:%-3d",
1929                                            n, mfc->mfc_un.res.ttls[n]);
1930                         }
1931                 } else {
1932                         /* unresolved mfc_caches don't contain
1933                          * pkt, bytes and wrong_if values
1934                          */
1935                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1936                 }
1937                 seq_putc(seq, '\n');
1938         }
1939         return 0;
1940 }
1941
1942 static const struct seq_operations ipmr_mfc_seq_ops = {
1943         .start = ipmr_mfc_seq_start,
1944         .next  = ipmr_mfc_seq_next,
1945         .stop  = ipmr_mfc_seq_stop,
1946         .show  = ipmr_mfc_seq_show,
1947 };
1948
1949 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1950 {
1951         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1952                             sizeof(struct ipmr_mfc_iter));
1953 }
1954
1955 static const struct file_operations ipmr_mfc_fops = {
1956         .owner   = THIS_MODULE,
1957         .open    = ipmr_mfc_open,
1958         .read    = seq_read,
1959         .llseek  = seq_lseek,
1960         .release = seq_release_net,
1961 };
1962 #endif
1963
1964 #ifdef CONFIG_IP_PIMSM_V2
1965 static const struct net_protocol pim_protocol = {
1966         .handler        =       pim_rcv,
1967         .netns_ok       =       1,
1968 };
1969 #endif
1970
1971
1972 /*
1973  *      Setup for IP multicast routing
1974  */
1975 static int __net_init ipmr_net_init(struct net *net)
1976 {
1977         int err = 0;
1978
1979         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1980                                       GFP_KERNEL);
1981         if (!net->ipv4.vif_table) {
1982                 err = -ENOMEM;
1983                 goto fail;
1984         }
1985
1986         /* Forwarding cache */
1987         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1988                                             sizeof(struct mfc_cache *),
1989                                             GFP_KERNEL);
1990         if (!net->ipv4.mfc_cache_array) {
1991                 err = -ENOMEM;
1992                 goto fail_mfc_cache;
1993         }
1994
1995 #ifdef CONFIG_IP_PIMSM
1996         net->ipv4.mroute_reg_vif_num = -1;
1997 #endif
1998
1999 #ifdef CONFIG_PROC_FS
2000         err = -ENOMEM;
2001         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2002                 goto proc_vif_fail;
2003         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2004                 goto proc_cache_fail;
2005 #endif
2006         return 0;
2007
2008 #ifdef CONFIG_PROC_FS
2009 proc_cache_fail:
2010         proc_net_remove(net, "ip_mr_vif");
2011 proc_vif_fail:
2012         kfree(net->ipv4.mfc_cache_array);
2013 #endif
2014 fail_mfc_cache:
2015         kfree(net->ipv4.vif_table);
2016 fail:
2017         return err;
2018 }
2019
2020 static void __net_exit ipmr_net_exit(struct net *net)
2021 {
2022 #ifdef CONFIG_PROC_FS
2023         proc_net_remove(net, "ip_mr_cache");
2024         proc_net_remove(net, "ip_mr_vif");
2025 #endif
2026         kfree(net->ipv4.mfc_cache_array);
2027         kfree(net->ipv4.vif_table);
2028 }
2029
2030 static struct pernet_operations ipmr_net_ops = {
2031         .init = ipmr_net_init,
2032         .exit = ipmr_net_exit,
2033 };
2034
2035 int __init ip_mr_init(void)
2036 {
2037         int err;
2038
2039         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2040                                        sizeof(struct mfc_cache),
2041                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2042                                        NULL);
2043         if (!mrt_cachep)
2044                 return -ENOMEM;
2045
2046         err = register_pernet_subsys(&ipmr_net_ops);
2047         if (err)
2048                 goto reg_pernet_fail;
2049
2050         setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
2051         err = register_netdevice_notifier(&ip_mr_notifier);
2052         if (err)
2053                 goto reg_notif_fail;
2054 #ifdef CONFIG_IP_PIMSM_V2
2055         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2056                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2057                 err = -EAGAIN;
2058                 goto add_proto_fail;
2059         }
2060 #endif
2061         return 0;
2062
2063 #ifdef CONFIG_IP_PIMSM_V2
2064 add_proto_fail:
2065         unregister_netdevice_notifier(&ip_mr_notifier);
2066 #endif
2067 reg_notif_fail:
2068         del_timer(&ipmr_expire_timer);
2069         unregister_pernet_subsys(&ipmr_net_ops);
2070 reg_pernet_fail:
2071         kmem_cache_destroy(mrt_cachep);
2072         return err;
2073 }