ipv4: ipmr: move mroute data into seperate structure
[pandora-kernel.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requrement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66
67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
68 #define CONFIG_IP_PIMSM 1
69 #endif
70
71 struct mr_table {
72         struct sock             *mroute_sk;
73         struct timer_list       ipmr_expire_timer;
74         struct list_head        mfc_unres_queue;
75         struct list_head        mfc_cache_array[MFC_LINES];
76         struct vif_device       vif_table[MAXVIFS];
77         int                     maxvif;
78         atomic_t                cache_resolve_queue_len;
79         int                     mroute_do_assert;
80         int                     mroute_do_pim;
81 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
82         int                     mroute_reg_vif_num;
83 #endif
84 };
85
86 /* Big lock, protecting vif table, mrt cache and mroute socket state.
87    Note that the changes are semaphored via rtnl_lock.
88  */
89
90 static DEFINE_RWLOCK(mrt_lock);
91
92 /*
93  *      Multicast router control variables
94  */
95
96 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
97
98 /* Special spinlock for queue of unresolved entries */
99 static DEFINE_SPINLOCK(mfc_unres_lock);
100
101 /* We return to original Alan's scheme. Hash table of resolved
102    entries is changed only in process context and protected
103    with weak lock mrt_lock. Queue of unresolved entries is protected
104    with strong spinlock mfc_unres_lock.
105
106    In this case data path is free of exclusive locks at all.
107  */
108
109 static struct kmem_cache *mrt_cachep __read_mostly;
110
111 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
112                          struct sk_buff *skb, struct mfc_cache *cache,
113                          int local);
114 static int ipmr_cache_report(struct mr_table *mrt,
115                              struct sk_buff *pkt, vifi_t vifi, int assert);
116 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
117                             struct mfc_cache *c, struct rtmsg *rtm);
118
119 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
120
121 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
122 {
123         struct net *net = dev_net(dev);
124
125         dev_close(dev);
126
127         dev = __dev_get_by_name(net, "tunl0");
128         if (dev) {
129                 const struct net_device_ops *ops = dev->netdev_ops;
130                 struct ifreq ifr;
131                 struct ip_tunnel_parm p;
132
133                 memset(&p, 0, sizeof(p));
134                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
135                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
136                 p.iph.version = 4;
137                 p.iph.ihl = 5;
138                 p.iph.protocol = IPPROTO_IPIP;
139                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
140                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
141
142                 if (ops->ndo_do_ioctl) {
143                         mm_segment_t oldfs = get_fs();
144
145                         set_fs(KERNEL_DS);
146                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
147                         set_fs(oldfs);
148                 }
149         }
150 }
151
152 static
153 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
154 {
155         struct net_device  *dev;
156
157         dev = __dev_get_by_name(net, "tunl0");
158
159         if (dev) {
160                 const struct net_device_ops *ops = dev->netdev_ops;
161                 int err;
162                 struct ifreq ifr;
163                 struct ip_tunnel_parm p;
164                 struct in_device  *in_dev;
165
166                 memset(&p, 0, sizeof(p));
167                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
168                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
169                 p.iph.version = 4;
170                 p.iph.ihl = 5;
171                 p.iph.protocol = IPPROTO_IPIP;
172                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
173                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
174
175                 if (ops->ndo_do_ioctl) {
176                         mm_segment_t oldfs = get_fs();
177
178                         set_fs(KERNEL_DS);
179                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
180                         set_fs(oldfs);
181                 } else
182                         err = -EOPNOTSUPP;
183
184                 dev = NULL;
185
186                 if (err == 0 &&
187                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
188                         dev->flags |= IFF_MULTICAST;
189
190                         in_dev = __in_dev_get_rtnl(dev);
191                         if (in_dev == NULL)
192                                 goto failure;
193
194                         ipv4_devconf_setall(in_dev);
195                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
196
197                         if (dev_open(dev))
198                                 goto failure;
199                         dev_hold(dev);
200                 }
201         }
202         return dev;
203
204 failure:
205         /* allow the register to be completed before unregistering. */
206         rtnl_unlock();
207         rtnl_lock();
208
209         unregister_netdevice(dev);
210         return NULL;
211 }
212
213 #ifdef CONFIG_IP_PIMSM
214
215 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
216 {
217         struct net *net = dev_net(dev);
218         struct mr_table *mrt = net->ipv4.mrt;
219
220         read_lock(&mrt_lock);
221         dev->stats.tx_bytes += skb->len;
222         dev->stats.tx_packets++;
223         ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
224         read_unlock(&mrt_lock);
225         kfree_skb(skb);
226         return NETDEV_TX_OK;
227 }
228
229 static const struct net_device_ops reg_vif_netdev_ops = {
230         .ndo_start_xmit = reg_vif_xmit,
231 };
232
233 static void reg_vif_setup(struct net_device *dev)
234 {
235         dev->type               = ARPHRD_PIMREG;
236         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
237         dev->flags              = IFF_NOARP;
238         dev->netdev_ops         = &reg_vif_netdev_ops,
239         dev->destructor         = free_netdev;
240         dev->features           |= NETIF_F_NETNS_LOCAL;
241 }
242
243 static struct net_device *ipmr_reg_vif(struct net *net)
244 {
245         struct net_device *dev;
246         struct in_device *in_dev;
247
248         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
249
250         if (dev == NULL)
251                 return NULL;
252
253         dev_net_set(dev, net);
254
255         if (register_netdevice(dev)) {
256                 free_netdev(dev);
257                 return NULL;
258         }
259         dev->iflink = 0;
260
261         rcu_read_lock();
262         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
263                 rcu_read_unlock();
264                 goto failure;
265         }
266
267         ipv4_devconf_setall(in_dev);
268         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
269         rcu_read_unlock();
270
271         if (dev_open(dev))
272                 goto failure;
273
274         dev_hold(dev);
275
276         return dev;
277
278 failure:
279         /* allow the register to be completed before unregistering. */
280         rtnl_unlock();
281         rtnl_lock();
282
283         unregister_netdevice(dev);
284         return NULL;
285 }
286 #endif
287
288 /*
289  *      Delete a VIF entry
290  *      @notify: Set to 1, if the caller is a notifier_call
291  */
292
293 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
294                       struct list_head *head)
295 {
296         struct vif_device *v;
297         struct net_device *dev;
298         struct in_device *in_dev;
299
300         if (vifi < 0 || vifi >= mrt->maxvif)
301                 return -EADDRNOTAVAIL;
302
303         v = &mrt->vif_table[vifi];
304
305         write_lock_bh(&mrt_lock);
306         dev = v->dev;
307         v->dev = NULL;
308
309         if (!dev) {
310                 write_unlock_bh(&mrt_lock);
311                 return -EADDRNOTAVAIL;
312         }
313
314 #ifdef CONFIG_IP_PIMSM
315         if (vifi == mrt->mroute_reg_vif_num)
316                 mrt->mroute_reg_vif_num = -1;
317 #endif
318
319         if (vifi+1 == mrt->maxvif) {
320                 int tmp;
321                 for (tmp=vifi-1; tmp>=0; tmp--) {
322                         if (VIF_EXISTS(mrt, tmp))
323                                 break;
324                 }
325                 mrt->maxvif = tmp+1;
326         }
327
328         write_unlock_bh(&mrt_lock);
329
330         dev_set_allmulti(dev, -1);
331
332         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
333                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
334                 ip_rt_multicast_event(in_dev);
335         }
336
337         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
338                 unregister_netdevice_queue(dev, head);
339
340         dev_put(dev);
341         return 0;
342 }
343
344 static inline void ipmr_cache_free(struct mfc_cache *c)
345 {
346         kmem_cache_free(mrt_cachep, c);
347 }
348
349 /* Destroy an unresolved cache entry, killing queued skbs
350    and reporting error to netlink readers.
351  */
352
353 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
354 {
355         struct net *net = NULL; //mrt->net;
356         struct sk_buff *skb;
357         struct nlmsgerr *e;
358
359         atomic_dec(&mrt->cache_resolve_queue_len);
360
361         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
362                 if (ip_hdr(skb)->version == 0) {
363                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
364                         nlh->nlmsg_type = NLMSG_ERROR;
365                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
366                         skb_trim(skb, nlh->nlmsg_len);
367                         e = NLMSG_DATA(nlh);
368                         e->error = -ETIMEDOUT;
369                         memset(&e->msg, 0, sizeof(e->msg));
370
371                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
372                 } else
373                         kfree_skb(skb);
374         }
375
376         ipmr_cache_free(c);
377 }
378
379
380 /* Timer process for the unresolved queue. */
381
382 static void ipmr_expire_process(unsigned long arg)
383 {
384         struct mr_table *mrt = (struct mr_table *)arg;
385         unsigned long now;
386         unsigned long expires;
387         struct mfc_cache *c, *next;
388
389         if (!spin_trylock(&mfc_unres_lock)) {
390                 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
391                 return;
392         }
393
394         if (list_empty(&mrt->mfc_unres_queue))
395                 goto out;
396
397         now = jiffies;
398         expires = 10*HZ;
399
400         list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
401                 if (time_after(c->mfc_un.unres.expires, now)) {
402                         unsigned long interval = c->mfc_un.unres.expires - now;
403                         if (interval < expires)
404                                 expires = interval;
405                         continue;
406                 }
407
408                 list_del(&c->list);
409                 ipmr_destroy_unres(mrt, c);
410         }
411
412         if (!list_empty(&mrt->mfc_unres_queue))
413                 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
414
415 out:
416         spin_unlock(&mfc_unres_lock);
417 }
418
419 /* Fill oifs list. It is called under write locked mrt_lock. */
420
421 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
422                                    unsigned char *ttls)
423 {
424         int vifi;
425
426         cache->mfc_un.res.minvif = MAXVIFS;
427         cache->mfc_un.res.maxvif = 0;
428         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
429
430         for (vifi = 0; vifi < mrt->maxvif; vifi++) {
431                 if (VIF_EXISTS(mrt, vifi) &&
432                     ttls[vifi] && ttls[vifi] < 255) {
433                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
434                         if (cache->mfc_un.res.minvif > vifi)
435                                 cache->mfc_un.res.minvif = vifi;
436                         if (cache->mfc_un.res.maxvif <= vifi)
437                                 cache->mfc_un.res.maxvif = vifi + 1;
438                 }
439         }
440 }
441
442 static int vif_add(struct net *net, struct mr_table *mrt,
443                    struct vifctl *vifc, int mrtsock)
444 {
445         int vifi = vifc->vifc_vifi;
446         struct vif_device *v = &mrt->vif_table[vifi];
447         struct net_device *dev;
448         struct in_device *in_dev;
449         int err;
450
451         /* Is vif busy ? */
452         if (VIF_EXISTS(mrt, vifi))
453                 return -EADDRINUSE;
454
455         switch (vifc->vifc_flags) {
456 #ifdef CONFIG_IP_PIMSM
457         case VIFF_REGISTER:
458                 /*
459                  * Special Purpose VIF in PIM
460                  * All the packets will be sent to the daemon
461                  */
462                 if (mrt->mroute_reg_vif_num >= 0)
463                         return -EADDRINUSE;
464                 dev = ipmr_reg_vif(net);
465                 if (!dev)
466                         return -ENOBUFS;
467                 err = dev_set_allmulti(dev, 1);
468                 if (err) {
469                         unregister_netdevice(dev);
470                         dev_put(dev);
471                         return err;
472                 }
473                 break;
474 #endif
475         case VIFF_TUNNEL:
476                 dev = ipmr_new_tunnel(net, vifc);
477                 if (!dev)
478                         return -ENOBUFS;
479                 err = dev_set_allmulti(dev, 1);
480                 if (err) {
481                         ipmr_del_tunnel(dev, vifc);
482                         dev_put(dev);
483                         return err;
484                 }
485                 break;
486
487         case VIFF_USE_IFINDEX:
488         case 0:
489                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
490                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
491                         if (dev && dev->ip_ptr == NULL) {
492                                 dev_put(dev);
493                                 return -EADDRNOTAVAIL;
494                         }
495                 } else
496                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
497
498                 if (!dev)
499                         return -EADDRNOTAVAIL;
500                 err = dev_set_allmulti(dev, 1);
501                 if (err) {
502                         dev_put(dev);
503                         return err;
504                 }
505                 break;
506         default:
507                 return -EINVAL;
508         }
509
510         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
511                 dev_put(dev);
512                 return -EADDRNOTAVAIL;
513         }
514         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
515         ip_rt_multicast_event(in_dev);
516
517         /*
518          *      Fill in the VIF structures
519          */
520         v->rate_limit = vifc->vifc_rate_limit;
521         v->local = vifc->vifc_lcl_addr.s_addr;
522         v->remote = vifc->vifc_rmt_addr.s_addr;
523         v->flags = vifc->vifc_flags;
524         if (!mrtsock)
525                 v->flags |= VIFF_STATIC;
526         v->threshold = vifc->vifc_threshold;
527         v->bytes_in = 0;
528         v->bytes_out = 0;
529         v->pkt_in = 0;
530         v->pkt_out = 0;
531         v->link = dev->ifindex;
532         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
533                 v->link = dev->iflink;
534
535         /* And finish update writing critical data */
536         write_lock_bh(&mrt_lock);
537         v->dev = dev;
538 #ifdef CONFIG_IP_PIMSM
539         if (v->flags&VIFF_REGISTER)
540                 mrt->mroute_reg_vif_num = vifi;
541 #endif
542         if (vifi+1 > mrt->maxvif)
543                 mrt->maxvif = vifi+1;
544         write_unlock_bh(&mrt_lock);
545         return 0;
546 }
547
548 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
549                                          __be32 origin,
550                                          __be32 mcastgrp)
551 {
552         int line = MFC_HASH(mcastgrp, origin);
553         struct mfc_cache *c;
554
555         list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
556                 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
557                         return c;
558         }
559         return NULL;
560 }
561
562 /*
563  *      Allocate a multicast cache entry
564  */
565 static struct mfc_cache *ipmr_cache_alloc(void)
566 {
567         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
568         if (c == NULL)
569                 return NULL;
570         c->mfc_un.res.minvif = MAXVIFS;
571         return c;
572 }
573
574 static struct mfc_cache *ipmr_cache_alloc_unres(void)
575 {
576         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
577         if (c == NULL)
578                 return NULL;
579         skb_queue_head_init(&c->mfc_un.unres.unresolved);
580         c->mfc_un.unres.expires = jiffies + 10*HZ;
581         return c;
582 }
583
584 /*
585  *      A cache entry has gone into a resolved state from queued
586  */
587
588 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
589                                struct mfc_cache *uc, struct mfc_cache *c)
590 {
591         struct sk_buff *skb;
592         struct nlmsgerr *e;
593
594         /*
595          *      Play the pending entries through our router
596          */
597
598         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
599                 if (ip_hdr(skb)->version == 0) {
600                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
601
602                         if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
603                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
604                                                   (u8 *)nlh);
605                         } else {
606                                 nlh->nlmsg_type = NLMSG_ERROR;
607                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
608                                 skb_trim(skb, nlh->nlmsg_len);
609                                 e = NLMSG_DATA(nlh);
610                                 e->error = -EMSGSIZE;
611                                 memset(&e->msg, 0, sizeof(e->msg));
612                         }
613
614                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
615                 } else
616                         ip_mr_forward(net, mrt, skb, c, 0);
617         }
618 }
619
620 /*
621  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
622  *      expects the following bizarre scheme.
623  *
624  *      Called under mrt_lock.
625  */
626
627 static int ipmr_cache_report(struct mr_table *mrt,
628                              struct sk_buff *pkt, vifi_t vifi, int assert)
629 {
630         struct sk_buff *skb;
631         const int ihl = ip_hdrlen(pkt);
632         struct igmphdr *igmp;
633         struct igmpmsg *msg;
634         int ret;
635
636 #ifdef CONFIG_IP_PIMSM
637         if (assert == IGMPMSG_WHOLEPKT)
638                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
639         else
640 #endif
641                 skb = alloc_skb(128, GFP_ATOMIC);
642
643         if (!skb)
644                 return -ENOBUFS;
645
646 #ifdef CONFIG_IP_PIMSM
647         if (assert == IGMPMSG_WHOLEPKT) {
648                 /* Ugly, but we have no choice with this interface.
649                    Duplicate old header, fix ihl, length etc.
650                    And all this only to mangle msg->im_msgtype and
651                    to set msg->im_mbz to "mbz" :-)
652                  */
653                 skb_push(skb, sizeof(struct iphdr));
654                 skb_reset_network_header(skb);
655                 skb_reset_transport_header(skb);
656                 msg = (struct igmpmsg *)skb_network_header(skb);
657                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
658                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
659                 msg->im_mbz = 0;
660                 msg->im_vif = mrt->mroute_reg_vif_num;
661                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
662                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
663                                              sizeof(struct iphdr));
664         } else
665 #endif
666         {
667
668         /*
669          *      Copy the IP header
670          */
671
672         skb->network_header = skb->tail;
673         skb_put(skb, ihl);
674         skb_copy_to_linear_data(skb, pkt->data, ihl);
675         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
676         msg = (struct igmpmsg *)skb_network_header(skb);
677         msg->im_vif = vifi;
678         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
679
680         /*
681          *      Add our header
682          */
683
684         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
685         igmp->type      =
686         msg->im_msgtype = assert;
687         igmp->code      =       0;
688         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
689         skb->transport_header = skb->network_header;
690         }
691
692         if (mrt->mroute_sk == NULL) {
693                 kfree_skb(skb);
694                 return -EINVAL;
695         }
696
697         /*
698          *      Deliver to mrouted
699          */
700         ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
701         if (ret < 0) {
702                 if (net_ratelimit())
703                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
704                 kfree_skb(skb);
705         }
706
707         return ret;
708 }
709
710 /*
711  *      Queue a packet for resolution. It gets locked cache entry!
712  */
713
714 static int
715 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
716 {
717         bool found = false;
718         int err;
719         struct mfc_cache *c;
720         const struct iphdr *iph = ip_hdr(skb);
721
722         spin_lock_bh(&mfc_unres_lock);
723         list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
724                 if (c->mfc_mcastgrp == iph->daddr &&
725                     c->mfc_origin == iph->saddr) {
726                         found = true;
727                         break;
728                 }
729         }
730
731         if (!found) {
732                 /*
733                  *      Create a new entry if allowable
734                  */
735
736                 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
737                     (c = ipmr_cache_alloc_unres()) == NULL) {
738                         spin_unlock_bh(&mfc_unres_lock);
739
740                         kfree_skb(skb);
741                         return -ENOBUFS;
742                 }
743
744                 /*
745                  *      Fill in the new cache entry
746                  */
747                 c->mfc_parent   = -1;
748                 c->mfc_origin   = iph->saddr;
749                 c->mfc_mcastgrp = iph->daddr;
750
751                 /*
752                  *      Reflect first query at mrouted.
753                  */
754                 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
755                 if (err < 0) {
756                         /* If the report failed throw the cache entry
757                            out - Brad Parker
758                          */
759                         spin_unlock_bh(&mfc_unres_lock);
760
761                         ipmr_cache_free(c);
762                         kfree_skb(skb);
763                         return err;
764                 }
765
766                 atomic_inc(&mrt->cache_resolve_queue_len);
767                 list_add(&c->list, &mrt->mfc_unres_queue);
768
769                 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
770         }
771
772         /*
773          *      See if we can append the packet
774          */
775         if (c->mfc_un.unres.unresolved.qlen>3) {
776                 kfree_skb(skb);
777                 err = -ENOBUFS;
778         } else {
779                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
780                 err = 0;
781         }
782
783         spin_unlock_bh(&mfc_unres_lock);
784         return err;
785 }
786
787 /*
788  *      MFC cache manipulation by user space mroute daemon
789  */
790
791 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
792 {
793         int line;
794         struct mfc_cache *c, *next;
795
796         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
797
798         list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
799                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
800                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
801                         write_lock_bh(&mrt_lock);
802                         list_del(&c->list);
803                         write_unlock_bh(&mrt_lock);
804
805                         ipmr_cache_free(c);
806                         return 0;
807                 }
808         }
809         return -ENOENT;
810 }
811
812 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
813                         struct mfcctl *mfc, int mrtsock)
814 {
815         bool found = false;
816         int line;
817         struct mfc_cache *uc, *c;
818
819         if (mfc->mfcc_parent >= MAXVIFS)
820                 return -ENFILE;
821
822         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
823
824         list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
825                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
826                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
827                         found = true;
828                         break;
829                 }
830         }
831
832         if (found) {
833                 write_lock_bh(&mrt_lock);
834                 c->mfc_parent = mfc->mfcc_parent;
835                 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
836                 if (!mrtsock)
837                         c->mfc_flags |= MFC_STATIC;
838                 write_unlock_bh(&mrt_lock);
839                 return 0;
840         }
841
842         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
843                 return -EINVAL;
844
845         c = ipmr_cache_alloc();
846         if (c == NULL)
847                 return -ENOMEM;
848
849         c->mfc_origin = mfc->mfcc_origin.s_addr;
850         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
851         c->mfc_parent = mfc->mfcc_parent;
852         ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
853         if (!mrtsock)
854                 c->mfc_flags |= MFC_STATIC;
855
856         write_lock_bh(&mrt_lock);
857         list_add(&c->list, &mrt->mfc_cache_array[line]);
858         write_unlock_bh(&mrt_lock);
859
860         /*
861          *      Check to see if we resolved a queued list. If so we
862          *      need to send on the frames and tidy up.
863          */
864         spin_lock_bh(&mfc_unres_lock);
865         list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
866                 if (uc->mfc_origin == c->mfc_origin &&
867                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
868                         list_del(&uc->list);
869                         atomic_dec(&mrt->cache_resolve_queue_len);
870                         break;
871                 }
872         }
873         if (list_empty(&mrt->mfc_unres_queue))
874                 del_timer(&mrt->ipmr_expire_timer);
875         spin_unlock_bh(&mfc_unres_lock);
876
877         if (uc) {
878                 ipmr_cache_resolve(net, mrt, uc, c);
879                 ipmr_cache_free(uc);
880         }
881         return 0;
882 }
883
884 /*
885  *      Close the multicast socket, and clear the vif tables etc
886  */
887
888 static void mroute_clean_tables(struct mr_table *mrt)
889 {
890         int i;
891         LIST_HEAD(list);
892         struct mfc_cache *c, *next;
893
894         /*
895          *      Shut down all active vif entries
896          */
897         for (i = 0; i < mrt->maxvif; i++) {
898                 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
899                         vif_delete(mrt, i, 0, &list);
900         }
901         unregister_netdevice_many(&list);
902
903         /*
904          *      Wipe the cache
905          */
906         for (i = 0; i < MFC_LINES; i++) {
907                 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
908                         if (c->mfc_flags&MFC_STATIC)
909                                 continue;
910                         write_lock_bh(&mrt_lock);
911                         list_del(&c->list);
912                         write_unlock_bh(&mrt_lock);
913
914                         ipmr_cache_free(c);
915                 }
916         }
917
918         if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
919                 spin_lock_bh(&mfc_unres_lock);
920                 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
921                         list_del(&c->list);
922                         ipmr_destroy_unres(mrt, c);
923                 }
924                 spin_unlock_bh(&mfc_unres_lock);
925         }
926 }
927
928 static void mrtsock_destruct(struct sock *sk)
929 {
930         struct net *net = sock_net(sk);
931         struct mr_table *mrt = net->ipv4.mrt;
932
933         rtnl_lock();
934         if (sk == mrt->mroute_sk) {
935                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
936
937                 write_lock_bh(&mrt_lock);
938                 mrt->mroute_sk = NULL;
939                 write_unlock_bh(&mrt_lock);
940
941                 mroute_clean_tables(mrt);
942         }
943         rtnl_unlock();
944 }
945
946 /*
947  *      Socket options and virtual interface manipulation. The whole
948  *      virtual interface system is a complete heap, but unfortunately
949  *      that's how BSD mrouted happens to think. Maybe one day with a proper
950  *      MOSPF/PIM router set up we can clean this up.
951  */
952
953 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
954 {
955         int ret;
956         struct vifctl vif;
957         struct mfcctl mfc;
958         struct net *net = sock_net(sk);
959         struct mr_table *mrt = net->ipv4.mrt;
960
961         if (optname != MRT_INIT) {
962                 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
963                         return -EACCES;
964         }
965
966         switch (optname) {
967         case MRT_INIT:
968                 if (sk->sk_type != SOCK_RAW ||
969                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
970                         return -EOPNOTSUPP;
971                 if (optlen != sizeof(int))
972                         return -ENOPROTOOPT;
973
974                 rtnl_lock();
975                 if (mrt->mroute_sk) {
976                         rtnl_unlock();
977                         return -EADDRINUSE;
978                 }
979
980                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
981                 if (ret == 0) {
982                         write_lock_bh(&mrt_lock);
983                         mrt->mroute_sk = sk;
984                         write_unlock_bh(&mrt_lock);
985
986                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
987                 }
988                 rtnl_unlock();
989                 return ret;
990         case MRT_DONE:
991                 if (sk != mrt->mroute_sk)
992                         return -EACCES;
993                 return ip_ra_control(sk, 0, NULL);
994         case MRT_ADD_VIF:
995         case MRT_DEL_VIF:
996                 if (optlen != sizeof(vif))
997                         return -EINVAL;
998                 if (copy_from_user(&vif, optval, sizeof(vif)))
999                         return -EFAULT;
1000                 if (vif.vifc_vifi >= MAXVIFS)
1001                         return -ENFILE;
1002                 rtnl_lock();
1003                 if (optname == MRT_ADD_VIF) {
1004                         ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1005                 } else {
1006                         ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1007                 }
1008                 rtnl_unlock();
1009                 return ret;
1010
1011                 /*
1012                  *      Manipulate the forwarding caches. These live
1013                  *      in a sort of kernel/user symbiosis.
1014                  */
1015         case MRT_ADD_MFC:
1016         case MRT_DEL_MFC:
1017                 if (optlen != sizeof(mfc))
1018                         return -EINVAL;
1019                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1020                         return -EFAULT;
1021                 rtnl_lock();
1022                 if (optname == MRT_DEL_MFC)
1023                         ret = ipmr_mfc_delete(mrt, &mfc);
1024                 else
1025                         ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1026                 rtnl_unlock();
1027                 return ret;
1028                 /*
1029                  *      Control PIM assert.
1030                  */
1031         case MRT_ASSERT:
1032         {
1033                 int v;
1034                 if (get_user(v,(int __user *)optval))
1035                         return -EFAULT;
1036                 mrt->mroute_do_assert = (v) ? 1 : 0;
1037                 return 0;
1038         }
1039 #ifdef CONFIG_IP_PIMSM
1040         case MRT_PIM:
1041         {
1042                 int v;
1043
1044                 if (get_user(v,(int __user *)optval))
1045                         return -EFAULT;
1046                 v = (v) ? 1 : 0;
1047
1048                 rtnl_lock();
1049                 ret = 0;
1050                 if (v != mrt->mroute_do_pim) {
1051                         mrt->mroute_do_pim = v;
1052                         mrt->mroute_do_assert = v;
1053                 }
1054                 rtnl_unlock();
1055                 return ret;
1056         }
1057 #endif
1058         /*
1059          *      Spurious command, or MRT_VERSION which you cannot
1060          *      set.
1061          */
1062         default:
1063                 return -ENOPROTOOPT;
1064         }
1065 }
1066
1067 /*
1068  *      Getsock opt support for the multicast routing system.
1069  */
1070
1071 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1072 {
1073         int olr;
1074         int val;
1075         struct net *net = sock_net(sk);
1076         struct mr_table *mrt = net->ipv4.mrt;
1077
1078         if (optname != MRT_VERSION &&
1079 #ifdef CONFIG_IP_PIMSM
1080            optname!=MRT_PIM &&
1081 #endif
1082            optname!=MRT_ASSERT)
1083                 return -ENOPROTOOPT;
1084
1085         if (get_user(olr, optlen))
1086                 return -EFAULT;
1087
1088         olr = min_t(unsigned int, olr, sizeof(int));
1089         if (olr < 0)
1090                 return -EINVAL;
1091
1092         if (put_user(olr, optlen))
1093                 return -EFAULT;
1094         if (optname == MRT_VERSION)
1095                 val = 0x0305;
1096 #ifdef CONFIG_IP_PIMSM
1097         else if (optname == MRT_PIM)
1098                 val = mrt->mroute_do_pim;
1099 #endif
1100         else
1101                 val = mrt->mroute_do_assert;
1102         if (copy_to_user(optval, &val, olr))
1103                 return -EFAULT;
1104         return 0;
1105 }
1106
1107 /*
1108  *      The IP multicast ioctl support routines.
1109  */
1110
1111 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1112 {
1113         struct sioc_sg_req sr;
1114         struct sioc_vif_req vr;
1115         struct vif_device *vif;
1116         struct mfc_cache *c;
1117         struct net *net = sock_net(sk);
1118         struct mr_table *mrt = net->ipv4.mrt;
1119
1120         switch (cmd) {
1121         case SIOCGETVIFCNT:
1122                 if (copy_from_user(&vr, arg, sizeof(vr)))
1123                         return -EFAULT;
1124                 if (vr.vifi >= mrt->maxvif)
1125                         return -EINVAL;
1126                 read_lock(&mrt_lock);
1127                 vif = &mrt->vif_table[vr.vifi];
1128                 if (VIF_EXISTS(mrt, vr.vifi)) {
1129                         vr.icount = vif->pkt_in;
1130                         vr.ocount = vif->pkt_out;
1131                         vr.ibytes = vif->bytes_in;
1132                         vr.obytes = vif->bytes_out;
1133                         read_unlock(&mrt_lock);
1134
1135                         if (copy_to_user(arg, &vr, sizeof(vr)))
1136                                 return -EFAULT;
1137                         return 0;
1138                 }
1139                 read_unlock(&mrt_lock);
1140                 return -EADDRNOTAVAIL;
1141         case SIOCGETSGCNT:
1142                 if (copy_from_user(&sr, arg, sizeof(sr)))
1143                         return -EFAULT;
1144
1145                 read_lock(&mrt_lock);
1146                 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1147                 if (c) {
1148                         sr.pktcnt = c->mfc_un.res.pkt;
1149                         sr.bytecnt = c->mfc_un.res.bytes;
1150                         sr.wrong_if = c->mfc_un.res.wrong_if;
1151                         read_unlock(&mrt_lock);
1152
1153                         if (copy_to_user(arg, &sr, sizeof(sr)))
1154                                 return -EFAULT;
1155                         return 0;
1156                 }
1157                 read_unlock(&mrt_lock);
1158                 return -EADDRNOTAVAIL;
1159         default:
1160                 return -ENOIOCTLCMD;
1161         }
1162 }
1163
1164
1165 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1166 {
1167         struct net_device *dev = ptr;
1168         struct net *net = dev_net(dev);
1169         struct mr_table *mrt = net->ipv4.mrt;
1170         struct vif_device *v;
1171         int ct;
1172         LIST_HEAD(list);
1173
1174         if (event != NETDEV_UNREGISTER)
1175                 return NOTIFY_DONE;
1176         v = &mrt->vif_table[0];
1177         for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1178                 if (v->dev == dev)
1179                         vif_delete(mrt, ct, 1, &list);
1180         }
1181         unregister_netdevice_many(&list);
1182         return NOTIFY_DONE;
1183 }
1184
1185
1186 static struct notifier_block ip_mr_notifier = {
1187         .notifier_call = ipmr_device_event,
1188 };
1189
1190 /*
1191  *      Encapsulate a packet by attaching a valid IPIP header to it.
1192  *      This avoids tunnel drivers and other mess and gives us the speed so
1193  *      important for multicast video.
1194  */
1195
1196 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1197 {
1198         struct iphdr *iph;
1199         struct iphdr *old_iph = ip_hdr(skb);
1200
1201         skb_push(skb, sizeof(struct iphdr));
1202         skb->transport_header = skb->network_header;
1203         skb_reset_network_header(skb);
1204         iph = ip_hdr(skb);
1205
1206         iph->version    =       4;
1207         iph->tos        =       old_iph->tos;
1208         iph->ttl        =       old_iph->ttl;
1209         iph->frag_off   =       0;
1210         iph->daddr      =       daddr;
1211         iph->saddr      =       saddr;
1212         iph->protocol   =       IPPROTO_IPIP;
1213         iph->ihl        =       5;
1214         iph->tot_len    =       htons(skb->len);
1215         ip_select_ident(iph, skb_dst(skb), NULL);
1216         ip_send_check(iph);
1217
1218         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1219         nf_reset(skb);
1220 }
1221
1222 static inline int ipmr_forward_finish(struct sk_buff *skb)
1223 {
1224         struct ip_options * opt = &(IPCB(skb)->opt);
1225
1226         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1227
1228         if (unlikely(opt->optlen))
1229                 ip_forward_options(skb);
1230
1231         return dst_output(skb);
1232 }
1233
1234 /*
1235  *      Processing handlers for ipmr_forward
1236  */
1237
1238 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1239                             struct sk_buff *skb, struct mfc_cache *c, int vifi)
1240 {
1241         const struct iphdr *iph = ip_hdr(skb);
1242         struct vif_device *vif = &mrt->vif_table[vifi];
1243         struct net_device *dev;
1244         struct rtable *rt;
1245         int    encap = 0;
1246
1247         if (vif->dev == NULL)
1248                 goto out_free;
1249
1250 #ifdef CONFIG_IP_PIMSM
1251         if (vif->flags & VIFF_REGISTER) {
1252                 vif->pkt_out++;
1253                 vif->bytes_out += skb->len;
1254                 vif->dev->stats.tx_bytes += skb->len;
1255                 vif->dev->stats.tx_packets++;
1256                 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1257                 goto out_free;
1258         }
1259 #endif
1260
1261         if (vif->flags&VIFF_TUNNEL) {
1262                 struct flowi fl = { .oif = vif->link,
1263                                     .nl_u = { .ip4_u =
1264                                               { .daddr = vif->remote,
1265                                                 .saddr = vif->local,
1266                                                 .tos = RT_TOS(iph->tos) } },
1267                                     .proto = IPPROTO_IPIP };
1268                 if (ip_route_output_key(net, &rt, &fl))
1269                         goto out_free;
1270                 encap = sizeof(struct iphdr);
1271         } else {
1272                 struct flowi fl = { .oif = vif->link,
1273                                     .nl_u = { .ip4_u =
1274                                               { .daddr = iph->daddr,
1275                                                 .tos = RT_TOS(iph->tos) } },
1276                                     .proto = IPPROTO_IPIP };
1277                 if (ip_route_output_key(net, &rt, &fl))
1278                         goto out_free;
1279         }
1280
1281         dev = rt->u.dst.dev;
1282
1283         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1284                 /* Do not fragment multicasts. Alas, IPv4 does not
1285                    allow to send ICMP, so that packets will disappear
1286                    to blackhole.
1287                  */
1288
1289                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1290                 ip_rt_put(rt);
1291                 goto out_free;
1292         }
1293
1294         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1295
1296         if (skb_cow(skb, encap)) {
1297                 ip_rt_put(rt);
1298                 goto out_free;
1299         }
1300
1301         vif->pkt_out++;
1302         vif->bytes_out += skb->len;
1303
1304         skb_dst_drop(skb);
1305         skb_dst_set(skb, &rt->u.dst);
1306         ip_decrease_ttl(ip_hdr(skb));
1307
1308         /* FIXME: forward and output firewalls used to be called here.
1309          * What do we do with netfilter? -- RR */
1310         if (vif->flags & VIFF_TUNNEL) {
1311                 ip_encap(skb, vif->local, vif->remote);
1312                 /* FIXME: extra output firewall step used to be here. --RR */
1313                 vif->dev->stats.tx_packets++;
1314                 vif->dev->stats.tx_bytes += skb->len;
1315         }
1316
1317         IPCB(skb)->flags |= IPSKB_FORWARDED;
1318
1319         /*
1320          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1321          * not only before forwarding, but after forwarding on all output
1322          * interfaces. It is clear, if mrouter runs a multicasting
1323          * program, it should receive packets not depending to what interface
1324          * program is joined.
1325          * If we will not make it, the program will have to join on all
1326          * interfaces. On the other hand, multihoming host (or router, but
1327          * not mrouter) cannot join to more than one interface - it will
1328          * result in receiving multiple packets.
1329          */
1330         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1331                 ipmr_forward_finish);
1332         return;
1333
1334 out_free:
1335         kfree_skb(skb);
1336         return;
1337 }
1338
1339 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1340 {
1341         int ct;
1342
1343         for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1344                 if (mrt->vif_table[ct].dev == dev)
1345                         break;
1346         }
1347         return ct;
1348 }
1349
1350 /* "local" means that we should preserve one skb (for local delivery) */
1351
1352 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1353                          struct sk_buff *skb, struct mfc_cache *cache,
1354                          int local)
1355 {
1356         int psend = -1;
1357         int vif, ct;
1358
1359         vif = cache->mfc_parent;
1360         cache->mfc_un.res.pkt++;
1361         cache->mfc_un.res.bytes += skb->len;
1362
1363         /*
1364          * Wrong interface: drop packet and (maybe) send PIM assert.
1365          */
1366         if (mrt->vif_table[vif].dev != skb->dev) {
1367                 int true_vifi;
1368
1369                 if (skb_rtable(skb)->fl.iif == 0) {
1370                         /* It is our own packet, looped back.
1371                            Very complicated situation...
1372
1373                            The best workaround until routing daemons will be
1374                            fixed is not to redistribute packet, if it was
1375                            send through wrong interface. It means, that
1376                            multicast applications WILL NOT work for
1377                            (S,G), which have default multicast route pointing
1378                            to wrong oif. In any case, it is not a good
1379                            idea to use multicasting applications on router.
1380                          */
1381                         goto dont_forward;
1382                 }
1383
1384                 cache->mfc_un.res.wrong_if++;
1385                 true_vifi = ipmr_find_vif(mrt, skb->dev);
1386
1387                 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1388                     /* pimsm uses asserts, when switching from RPT to SPT,
1389                        so that we cannot check that packet arrived on an oif.
1390                        It is bad, but otherwise we would need to move pretty
1391                        large chunk of pimd to kernel. Ough... --ANK
1392                      */
1393                     (mrt->mroute_do_pim ||
1394                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1395                     time_after(jiffies,
1396                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1397                         cache->mfc_un.res.last_assert = jiffies;
1398                         ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1399                 }
1400                 goto dont_forward;
1401         }
1402
1403         mrt->vif_table[vif].pkt_in++;
1404         mrt->vif_table[vif].bytes_in += skb->len;
1405
1406         /*
1407          *      Forward the frame
1408          */
1409         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1410                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1411                         if (psend != -1) {
1412                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1413                                 if (skb2)
1414                                         ipmr_queue_xmit(net, mrt, skb2, cache,
1415                                                         psend);
1416                         }
1417                         psend = ct;
1418                 }
1419         }
1420         if (psend != -1) {
1421                 if (local) {
1422                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1423                         if (skb2)
1424                                 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1425                 } else {
1426                         ipmr_queue_xmit(net, mrt, skb, cache, psend);
1427                         return 0;
1428                 }
1429         }
1430
1431 dont_forward:
1432         if (!local)
1433                 kfree_skb(skb);
1434         return 0;
1435 }
1436
1437
1438 /*
1439  *      Multicast packets for forwarding arrive here
1440  */
1441
1442 int ip_mr_input(struct sk_buff *skb)
1443 {
1444         struct mfc_cache *cache;
1445         struct net *net = dev_net(skb->dev);
1446         struct mr_table *mrt = net->ipv4.mrt;
1447         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1448
1449         /* Packet is looped back after forward, it should not be
1450            forwarded second time, but still can be delivered locally.
1451          */
1452         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1453                 goto dont_forward;
1454
1455         if (!local) {
1456                     if (IPCB(skb)->opt.router_alert) {
1457                             if (ip_call_ra_chain(skb))
1458                                     return 0;
1459                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1460                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1461                                Cisco IOS <= 11.2(8)) do not put router alert
1462                                option to IGMP packets destined to routable
1463                                groups. It is very bad, because it means
1464                                that we can forward NO IGMP messages.
1465                              */
1466                             read_lock(&mrt_lock);
1467                             if (mrt->mroute_sk) {
1468                                     nf_reset(skb);
1469                                     raw_rcv(mrt->mroute_sk, skb);
1470                                     read_unlock(&mrt_lock);
1471                                     return 0;
1472                             }
1473                             read_unlock(&mrt_lock);
1474                     }
1475         }
1476
1477         read_lock(&mrt_lock);
1478         cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1479
1480         /*
1481          *      No usable cache entry
1482          */
1483         if (cache == NULL) {
1484                 int vif;
1485
1486                 if (local) {
1487                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1488                         ip_local_deliver(skb);
1489                         if (skb2 == NULL) {
1490                                 read_unlock(&mrt_lock);
1491                                 return -ENOBUFS;
1492                         }
1493                         skb = skb2;
1494                 }
1495
1496                 vif = ipmr_find_vif(mrt, skb->dev);
1497                 if (vif >= 0) {
1498                         int err = ipmr_cache_unresolved(mrt, vif, skb);
1499                         read_unlock(&mrt_lock);
1500
1501                         return err;
1502                 }
1503                 read_unlock(&mrt_lock);
1504                 kfree_skb(skb);
1505                 return -ENODEV;
1506         }
1507
1508         ip_mr_forward(net, mrt, skb, cache, local);
1509
1510         read_unlock(&mrt_lock);
1511
1512         if (local)
1513                 return ip_local_deliver(skb);
1514
1515         return 0;
1516
1517 dont_forward:
1518         if (local)
1519                 return ip_local_deliver(skb);
1520         kfree_skb(skb);
1521         return 0;
1522 }
1523
1524 #ifdef CONFIG_IP_PIMSM
1525 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1526 {
1527         struct net_device *reg_dev = NULL;
1528         struct iphdr *encap;
1529         struct net *net = dev_net(skb->dev);
1530         struct mr_table *mrt = net->ipv4.mrt;
1531
1532         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1533         /*
1534            Check that:
1535            a. packet is really destinted to a multicast group
1536            b. packet is not a NULL-REGISTER
1537            c. packet is not truncated
1538          */
1539         if (!ipv4_is_multicast(encap->daddr) ||
1540             encap->tot_len == 0 ||
1541             ntohs(encap->tot_len) + pimlen > skb->len)
1542                 return 1;
1543
1544         read_lock(&mrt_lock);
1545         if (mrt->mroute_reg_vif_num >= 0)
1546                 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1547         if (reg_dev)
1548                 dev_hold(reg_dev);
1549         read_unlock(&mrt_lock);
1550
1551         if (reg_dev == NULL)
1552                 return 1;
1553
1554         skb->mac_header = skb->network_header;
1555         skb_pull(skb, (u8*)encap - skb->data);
1556         skb_reset_network_header(skb);
1557         skb->dev = reg_dev;
1558         skb->protocol = htons(ETH_P_IP);
1559         skb->ip_summed = 0;
1560         skb->pkt_type = PACKET_HOST;
1561         skb_dst_drop(skb);
1562         reg_dev->stats.rx_bytes += skb->len;
1563         reg_dev->stats.rx_packets++;
1564         nf_reset(skb);
1565         netif_rx(skb);
1566         dev_put(reg_dev);
1567
1568         return 0;
1569 }
1570 #endif
1571
1572 #ifdef CONFIG_IP_PIMSM_V1
1573 /*
1574  * Handle IGMP messages of PIMv1
1575  */
1576
1577 int pim_rcv_v1(struct sk_buff * skb)
1578 {
1579         struct igmphdr *pim;
1580         struct net *net = dev_net(skb->dev);
1581         struct mr_table *mrt = net->ipv4.mrt;
1582
1583         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1584                 goto drop;
1585
1586         pim = igmp_hdr(skb);
1587
1588         if (!mrt->mroute_do_pim ||
1589             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1590                 goto drop;
1591
1592         if (__pim_rcv(skb, sizeof(*pim))) {
1593 drop:
1594                 kfree_skb(skb);
1595         }
1596         return 0;
1597 }
1598 #endif
1599
1600 #ifdef CONFIG_IP_PIMSM_V2
1601 static int pim_rcv(struct sk_buff * skb)
1602 {
1603         struct pimreghdr *pim;
1604
1605         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1606                 goto drop;
1607
1608         pim = (struct pimreghdr *)skb_transport_header(skb);
1609         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1610             (pim->flags&PIM_NULL_REGISTER) ||
1611             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1612              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1613                 goto drop;
1614
1615         if (__pim_rcv(skb, sizeof(*pim))) {
1616 drop:
1617                 kfree_skb(skb);
1618         }
1619         return 0;
1620 }
1621 #endif
1622
1623 static int
1624 ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
1625                  struct rtmsg *rtm)
1626 {
1627         int ct;
1628         struct rtnexthop *nhp;
1629         u8 *b = skb_tail_pointer(skb);
1630         struct rtattr *mp_head;
1631
1632         /* If cache is unresolved, don't try to parse IIF and OIF */
1633         if (c->mfc_parent > MAXVIFS)
1634                 return -ENOENT;
1635
1636         if (VIF_EXISTS(mrt, c->mfc_parent))
1637                 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1638
1639         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1640
1641         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1642                 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1643                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1644                                 goto rtattr_failure;
1645                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1646                         nhp->rtnh_flags = 0;
1647                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1648                         nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1649                         nhp->rtnh_len = sizeof(*nhp);
1650                 }
1651         }
1652         mp_head->rta_type = RTA_MULTIPATH;
1653         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1654         rtm->rtm_type = RTN_MULTICAST;
1655         return 1;
1656
1657 rtattr_failure:
1658         nlmsg_trim(skb, b);
1659         return -EMSGSIZE;
1660 }
1661
1662 int ipmr_get_route(struct net *net,
1663                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1664 {
1665         int err;
1666         struct mr_table *mrt = net->ipv4.mrt;
1667         struct mfc_cache *cache;
1668         struct rtable *rt = skb_rtable(skb);
1669
1670         read_lock(&mrt_lock);
1671         cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1672
1673         if (cache == NULL) {
1674                 struct sk_buff *skb2;
1675                 struct iphdr *iph;
1676                 struct net_device *dev;
1677                 int vif;
1678
1679                 if (nowait) {
1680                         read_unlock(&mrt_lock);
1681                         return -EAGAIN;
1682                 }
1683
1684                 dev = skb->dev;
1685                 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1686                         read_unlock(&mrt_lock);
1687                         return -ENODEV;
1688                 }
1689                 skb2 = skb_clone(skb, GFP_ATOMIC);
1690                 if (!skb2) {
1691                         read_unlock(&mrt_lock);
1692                         return -ENOMEM;
1693                 }
1694
1695                 skb_push(skb2, sizeof(struct iphdr));
1696                 skb_reset_network_header(skb2);
1697                 iph = ip_hdr(skb2);
1698                 iph->ihl = sizeof(struct iphdr) >> 2;
1699                 iph->saddr = rt->rt_src;
1700                 iph->daddr = rt->rt_dst;
1701                 iph->version = 0;
1702                 err = ipmr_cache_unresolved(mrt, vif, skb2);
1703                 read_unlock(&mrt_lock);
1704                 return err;
1705         }
1706
1707         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1708                 cache->mfc_flags |= MFC_NOTIFY;
1709         err = ipmr_fill_mroute(mrt, skb, cache, rtm);
1710         read_unlock(&mrt_lock);
1711         return err;
1712 }
1713
1714 #ifdef CONFIG_PROC_FS
1715 /*
1716  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1717  */
1718 struct ipmr_vif_iter {
1719         struct seq_net_private p;
1720         int ct;
1721 };
1722
1723 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1724                                            struct ipmr_vif_iter *iter,
1725                                            loff_t pos)
1726 {
1727         struct mr_table *mrt = net->ipv4.mrt;
1728
1729         for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
1730                 if (!VIF_EXISTS(mrt, iter->ct))
1731                         continue;
1732                 if (pos-- == 0)
1733                         return &mrt->vif_table[iter->ct];
1734         }
1735         return NULL;
1736 }
1737
1738 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1739         __acquires(mrt_lock)
1740 {
1741         struct net *net = seq_file_net(seq);
1742
1743         read_lock(&mrt_lock);
1744         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1745                 : SEQ_START_TOKEN;
1746 }
1747
1748 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1749 {
1750         struct ipmr_vif_iter *iter = seq->private;
1751         struct net *net = seq_file_net(seq);
1752         struct mr_table *mrt = net->ipv4.mrt;
1753
1754         ++*pos;
1755         if (v == SEQ_START_TOKEN)
1756                 return ipmr_vif_seq_idx(net, iter, 0);
1757
1758         while (++iter->ct < mrt->maxvif) {
1759                 if (!VIF_EXISTS(mrt, iter->ct))
1760                         continue;
1761                 return &mrt->vif_table[iter->ct];
1762         }
1763         return NULL;
1764 }
1765
1766 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1767         __releases(mrt_lock)
1768 {
1769         read_unlock(&mrt_lock);
1770 }
1771
1772 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1773 {
1774         struct net *net = seq_file_net(seq);
1775         struct mr_table *mrt = net->ipv4.mrt;
1776
1777         if (v == SEQ_START_TOKEN) {
1778                 seq_puts(seq,
1779                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1780         } else {
1781                 const struct vif_device *vif = v;
1782                 const char *name =  vif->dev ? vif->dev->name : "none";
1783
1784                 seq_printf(seq,
1785                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1786                            vif - mrt->vif_table,
1787                            name, vif->bytes_in, vif->pkt_in,
1788                            vif->bytes_out, vif->pkt_out,
1789                            vif->flags, vif->local, vif->remote);
1790         }
1791         return 0;
1792 }
1793
1794 static const struct seq_operations ipmr_vif_seq_ops = {
1795         .start = ipmr_vif_seq_start,
1796         .next  = ipmr_vif_seq_next,
1797         .stop  = ipmr_vif_seq_stop,
1798         .show  = ipmr_vif_seq_show,
1799 };
1800
1801 static int ipmr_vif_open(struct inode *inode, struct file *file)
1802 {
1803         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1804                             sizeof(struct ipmr_vif_iter));
1805 }
1806
1807 static const struct file_operations ipmr_vif_fops = {
1808         .owner   = THIS_MODULE,
1809         .open    = ipmr_vif_open,
1810         .read    = seq_read,
1811         .llseek  = seq_lseek,
1812         .release = seq_release_net,
1813 };
1814
1815 struct ipmr_mfc_iter {
1816         struct seq_net_private p;
1817         struct list_head *cache;
1818         int ct;
1819 };
1820
1821
1822 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1823                                           struct ipmr_mfc_iter *it, loff_t pos)
1824 {
1825         struct mr_table *mrt = net->ipv4.mrt;
1826         struct mfc_cache *mfc;
1827
1828         read_lock(&mrt_lock);
1829         for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
1830                 it->cache = &mrt->mfc_cache_array[it->ct];
1831                 list_for_each_entry(mfc, it->cache, list)
1832                         if (pos-- == 0)
1833                                 return mfc;
1834         }
1835         read_unlock(&mrt_lock);
1836
1837         spin_lock_bh(&mfc_unres_lock);
1838         it->cache = &mrt->mfc_unres_queue;
1839         list_for_each_entry(mfc, it->cache, list)
1840                 if (pos-- == 0)
1841                         return mfc;
1842         spin_unlock_bh(&mfc_unres_lock);
1843
1844         it->cache = NULL;
1845         return NULL;
1846 }
1847
1848
1849 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1850 {
1851         struct ipmr_mfc_iter *it = seq->private;
1852         struct net *net = seq_file_net(seq);
1853
1854         it->cache = NULL;
1855         it->ct = 0;
1856         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1857                 : SEQ_START_TOKEN;
1858 }
1859
1860 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1861 {
1862         struct mfc_cache *mfc = v;
1863         struct ipmr_mfc_iter *it = seq->private;
1864         struct net *net = seq_file_net(seq);
1865         struct mr_table *mrt = net->ipv4.mrt;
1866
1867         ++*pos;
1868
1869         if (v == SEQ_START_TOKEN)
1870                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1871
1872         if (mfc->list.next != it->cache)
1873                 return list_entry(mfc->list.next, struct mfc_cache, list);
1874
1875         if (it->cache == &mrt->mfc_unres_queue)
1876                 goto end_of_list;
1877
1878         BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
1879
1880         while (++it->ct < MFC_LINES) {
1881                 it->cache = &mrt->mfc_cache_array[it->ct];
1882                 if (list_empty(it->cache))
1883                         continue;
1884                 return list_first_entry(it->cache, struct mfc_cache, list);
1885         }
1886
1887         /* exhausted cache_array, show unresolved */
1888         read_unlock(&mrt_lock);
1889         it->cache = &mrt->mfc_unres_queue;
1890         it->ct = 0;
1891
1892         spin_lock_bh(&mfc_unres_lock);
1893         if (!list_empty(it->cache))
1894                 return list_first_entry(it->cache, struct mfc_cache, list);
1895
1896  end_of_list:
1897         spin_unlock_bh(&mfc_unres_lock);
1898         it->cache = NULL;
1899
1900         return NULL;
1901 }
1902
1903 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1904 {
1905         struct ipmr_mfc_iter *it = seq->private;
1906         struct net *net = seq_file_net(seq);
1907         struct mr_table *mrt = net->ipv4.mrt;
1908
1909         if (it->cache == &mrt->mfc_unres_queue)
1910                 spin_unlock_bh(&mfc_unres_lock);
1911         else if (it->cache == &mrt->mfc_cache_array[it->ct])
1912                 read_unlock(&mrt_lock);
1913 }
1914
1915 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1916 {
1917         int n;
1918         struct net *net = seq_file_net(seq);
1919         struct mr_table *mrt = net->ipv4.mrt;
1920
1921         if (v == SEQ_START_TOKEN) {
1922                 seq_puts(seq,
1923                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1924         } else {
1925                 const struct mfc_cache *mfc = v;
1926                 const struct ipmr_mfc_iter *it = seq->private;
1927
1928                 seq_printf(seq, "%08lX %08lX %-3hd",
1929                            (unsigned long) mfc->mfc_mcastgrp,
1930                            (unsigned long) mfc->mfc_origin,
1931                            mfc->mfc_parent);
1932
1933                 if (it->cache != &mrt->mfc_unres_queue) {
1934                         seq_printf(seq, " %8lu %8lu %8lu",
1935                                    mfc->mfc_un.res.pkt,
1936                                    mfc->mfc_un.res.bytes,
1937                                    mfc->mfc_un.res.wrong_if);
1938                         for (n = mfc->mfc_un.res.minvif;
1939                              n < mfc->mfc_un.res.maxvif; n++ ) {
1940                                 if (VIF_EXISTS(mrt, n) &&
1941                                     mfc->mfc_un.res.ttls[n] < 255)
1942                                         seq_printf(seq,
1943                                            " %2d:%-3d",
1944                                            n, mfc->mfc_un.res.ttls[n]);
1945                         }
1946                 } else {
1947                         /* unresolved mfc_caches don't contain
1948                          * pkt, bytes and wrong_if values
1949                          */
1950                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1951                 }
1952                 seq_putc(seq, '\n');
1953         }
1954         return 0;
1955 }
1956
1957 static const struct seq_operations ipmr_mfc_seq_ops = {
1958         .start = ipmr_mfc_seq_start,
1959         .next  = ipmr_mfc_seq_next,
1960         .stop  = ipmr_mfc_seq_stop,
1961         .show  = ipmr_mfc_seq_show,
1962 };
1963
1964 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1965 {
1966         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1967                             sizeof(struct ipmr_mfc_iter));
1968 }
1969
1970 static const struct file_operations ipmr_mfc_fops = {
1971         .owner   = THIS_MODULE,
1972         .open    = ipmr_mfc_open,
1973         .read    = seq_read,
1974         .llseek  = seq_lseek,
1975         .release = seq_release_net,
1976 };
1977 #endif
1978
1979 #ifdef CONFIG_IP_PIMSM_V2
1980 static const struct net_protocol pim_protocol = {
1981         .handler        =       pim_rcv,
1982         .netns_ok       =       1,
1983 };
1984 #endif
1985
1986
1987 /*
1988  *      Setup for IP multicast routing
1989  */
1990 static int __net_init ipmr_net_init(struct net *net)
1991 {
1992         struct mr_table *mrt;
1993         unsigned int i;
1994         int err = 0;
1995
1996         mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
1997         if (mrt == NULL) {
1998                 err = -ENOMEM;
1999                 goto fail;
2000         }
2001
2002         /* Forwarding cache */
2003         for (i = 0; i < MFC_LINES; i++)
2004                 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
2005
2006         INIT_LIST_HEAD(&mrt->mfc_unres_queue);
2007
2008         setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
2009                     (unsigned long)net);
2010
2011 #ifdef CONFIG_IP_PIMSM
2012         mrt->mroute_reg_vif_num = -1;
2013 #endif
2014
2015 #ifdef CONFIG_PROC_FS
2016         err = -ENOMEM;
2017         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2018                 goto proc_vif_fail;
2019         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2020                 goto proc_cache_fail;
2021 #endif
2022
2023         net->ipv4.mrt = mrt;
2024         return 0;
2025
2026 #ifdef CONFIG_PROC_FS
2027 proc_cache_fail:
2028         proc_net_remove(net, "ip_mr_vif");
2029 proc_vif_fail:
2030         kfree(mrt);
2031 #endif
2032 fail:
2033         return err;
2034 }
2035
2036 static void __net_exit ipmr_net_exit(struct net *net)
2037 {
2038 #ifdef CONFIG_PROC_FS
2039         proc_net_remove(net, "ip_mr_cache");
2040         proc_net_remove(net, "ip_mr_vif");
2041 #endif
2042         kfree(net->ipv4.mrt);
2043 }
2044
2045 static struct pernet_operations ipmr_net_ops = {
2046         .init = ipmr_net_init,
2047         .exit = ipmr_net_exit,
2048 };
2049
2050 int __init ip_mr_init(void)
2051 {
2052         int err;
2053
2054         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2055                                        sizeof(struct mfc_cache),
2056                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2057                                        NULL);
2058         if (!mrt_cachep)
2059                 return -ENOMEM;
2060
2061         err = register_pernet_subsys(&ipmr_net_ops);
2062         if (err)
2063                 goto reg_pernet_fail;
2064
2065         err = register_netdevice_notifier(&ip_mr_notifier);
2066         if (err)
2067                 goto reg_notif_fail;
2068 #ifdef CONFIG_IP_PIMSM_V2
2069         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2070                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2071                 err = -EAGAIN;
2072                 goto add_proto_fail;
2073         }
2074 #endif
2075         return 0;
2076
2077 #ifdef CONFIG_IP_PIMSM_V2
2078 add_proto_fail:
2079         unregister_netdevice_notifier(&ip_mr_notifier);
2080 #endif
2081 reg_notif_fail:
2082         unregister_pernet_subsys(&ipmr_net_ops);
2083 reg_pernet_fail:
2084         kmem_cache_destroy(mrt_cachep);
2085         return err;
2086 }