Merge branch 'drm-intel-next' of git://git.kernel.org/pub/scm/linux/kernel/git/anholt...
[pandora-kernel.git] / net / ipv4 / ipmr.c
1 /*
2  *      IP multicast routing support for mrouted 3.6/3.8
3  *
4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5  *        Linux Consultancy and Custom Driver Development
6  *
7  *      This program is free software; you can redistribute it and/or
8  *      modify it under the terms of the GNU General Public License
9  *      as published by the Free Software Foundation; either version
10  *      2 of the License, or (at your option) any later version.
11  *
12  *      Fixes:
13  *      Michael Chastain        :       Incorrect size of copying.
14  *      Alan Cox                :       Added the cache manager code
15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
16  *      Mike McLagan            :       Routing by source
17  *      Malcolm Beattie         :       Buffer handling fixes.
18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
19  *      SVR Anand               :       Fixed several multicast bugs and problems.
20  *      Alexey Kuznetsov        :       Status, optimisations and more.
21  *      Brad Parker             :       Better behaviour on mrouted upcall
22  *                                      overflow.
23  *      Carlos Picoto           :       PIMv1 Support
24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
25  *                                      Relax this requirement to work with older peers.
26  *
27  */
28
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
35 #include <linux/mm.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
40 #include <linux/in.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
52 #include <net/ip.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
56 #include <net/sock.h>
57 #include <net/icmp.h>
58 #include <net/udp.h>
59 #include <net/raw.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
63 #include <net/ipip.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
67
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
70 #endif
71
72 struct mr_table {
73         struct list_head        list;
74 #ifdef CONFIG_NET_NS
75         struct net              *net;
76 #endif
77         u32                     id;
78         struct sock             *mroute_sk;
79         struct timer_list       ipmr_expire_timer;
80         struct list_head        mfc_unres_queue;
81         struct list_head        mfc_cache_array[MFC_LINES];
82         struct vif_device       vif_table[MAXVIFS];
83         int                     maxvif;
84         atomic_t                cache_resolve_queue_len;
85         int                     mroute_do_assert;
86         int                     mroute_do_pim;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88         int                     mroute_reg_vif_num;
89 #endif
90 };
91
92 struct ipmr_rule {
93         struct fib_rule         common;
94 };
95
96 struct ipmr_result {
97         struct mr_table         *mrt;
98 };
99
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101    Note that the changes are semaphored via rtnl_lock.
102  */
103
104 static DEFINE_RWLOCK(mrt_lock);
105
106 /*
107  *      Multicast router control variables
108  */
109
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
111
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock);
114
115 /* We return to original Alan's scheme. Hash table of resolved
116    entries is changed only in process context and protected
117    with weak lock mrt_lock. Queue of unresolved entries is protected
118    with strong spinlock mfc_unres_lock.
119
120    In this case data path is free of exclusive locks at all.
121  */
122
123 static struct kmem_cache *mrt_cachep __read_mostly;
124
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127                          struct sk_buff *skb, struct mfc_cache *cache,
128                          int local);
129 static int ipmr_cache_report(struct mr_table *mrt,
130                              struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132                               struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg);
134
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137         list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
138
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
140 {
141         struct mr_table *mrt;
142
143         ipmr_for_each_table(mrt, net) {
144                 if (mrt->id == id)
145                         return mrt;
146         }
147         return NULL;
148 }
149
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151                            struct mr_table **mrt)
152 {
153         struct ipmr_result res;
154         struct fib_lookup_arg arg = { .result = &res, };
155         int err;
156
157         err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
158         if (err < 0)
159                 return err;
160         *mrt = res.mrt;
161         return 0;
162 }
163
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165                             int flags, struct fib_lookup_arg *arg)
166 {
167         struct ipmr_result *res = arg->result;
168         struct mr_table *mrt;
169
170         switch (rule->action) {
171         case FR_ACT_TO_TBL:
172                 break;
173         case FR_ACT_UNREACHABLE:
174                 return -ENETUNREACH;
175         case FR_ACT_PROHIBIT:
176                 return -EACCES;
177         case FR_ACT_BLACKHOLE:
178         default:
179                 return -EINVAL;
180         }
181
182         mrt = ipmr_get_table(rule->fr_net, rule->table);
183         if (mrt == NULL)
184                 return -EAGAIN;
185         res->mrt = mrt;
186         return 0;
187 }
188
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
190 {
191         return 1;
192 }
193
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195         FRA_GENERIC_POLICY,
196 };
197
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199                                struct fib_rule_hdr *frh, struct nlattr **tb)
200 {
201         return 0;
202 }
203
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
205                              struct nlattr **tb)
206 {
207         return 1;
208 }
209
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211                           struct fib_rule_hdr *frh)
212 {
213         frh->dst_len = 0;
214         frh->src_len = 0;
215         frh->tos     = 0;
216         return 0;
217 }
218
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220         .family         = RTNL_FAMILY_IPMR,
221         .rule_size      = sizeof(struct ipmr_rule),
222         .addr_size      = sizeof(u32),
223         .action         = ipmr_rule_action,
224         .match          = ipmr_rule_match,
225         .configure      = ipmr_rule_configure,
226         .compare        = ipmr_rule_compare,
227         .default_pref   = fib_default_rule_pref,
228         .fill           = ipmr_rule_fill,
229         .nlgroup        = RTNLGRP_IPV4_RULE,
230         .policy         = ipmr_rule_policy,
231         .owner          = THIS_MODULE,
232 };
233
234 static int __net_init ipmr_rules_init(struct net *net)
235 {
236         struct fib_rules_ops *ops;
237         struct mr_table *mrt;
238         int err;
239
240         ops = fib_rules_register(&ipmr_rules_ops_template, net);
241         if (IS_ERR(ops))
242                 return PTR_ERR(ops);
243
244         INIT_LIST_HEAD(&net->ipv4.mr_tables);
245
246         mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
247         if (mrt == NULL) {
248                 err = -ENOMEM;
249                 goto err1;
250         }
251
252         err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253         if (err < 0)
254                 goto err2;
255
256         net->ipv4.mr_rules_ops = ops;
257         return 0;
258
259 err2:
260         kfree(mrt);
261 err1:
262         fib_rules_unregister(ops);
263         return err;
264 }
265
266 static void __net_exit ipmr_rules_exit(struct net *net)
267 {
268         struct mr_table *mrt, *next;
269
270         list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
271                 kfree(mrt);
272         fib_rules_unregister(net->ipv4.mr_rules_ops);
273 }
274 #else
275 #define ipmr_for_each_table(mrt, net) \
276         for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
277
278 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
279 {
280         return net->ipv4.mrt;
281 }
282
283 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
284                            struct mr_table **mrt)
285 {
286         *mrt = net->ipv4.mrt;
287         return 0;
288 }
289
290 static int __net_init ipmr_rules_init(struct net *net)
291 {
292         net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
293         return net->ipv4.mrt ? 0 : -ENOMEM;
294 }
295
296 static void __net_exit ipmr_rules_exit(struct net *net)
297 {
298         kfree(net->ipv4.mrt);
299 }
300 #endif
301
302 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
303 {
304         struct mr_table *mrt;
305         unsigned int i;
306
307         mrt = ipmr_get_table(net, id);
308         if (mrt != NULL)
309                 return mrt;
310
311         mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
312         if (mrt == NULL)
313                 return NULL;
314         write_pnet(&mrt->net, net);
315         mrt->id = id;
316
317         /* Forwarding cache */
318         for (i = 0; i < MFC_LINES; i++)
319                 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
320
321         INIT_LIST_HEAD(&mrt->mfc_unres_queue);
322
323         setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
324                     (unsigned long)mrt);
325
326 #ifdef CONFIG_IP_PIMSM
327         mrt->mroute_reg_vif_num = -1;
328 #endif
329 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
330         list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331 #endif
332         return mrt;
333 }
334
335 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
336
337 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
338 {
339         struct net *net = dev_net(dev);
340
341         dev_close(dev);
342
343         dev = __dev_get_by_name(net, "tunl0");
344         if (dev) {
345                 const struct net_device_ops *ops = dev->netdev_ops;
346                 struct ifreq ifr;
347                 struct ip_tunnel_parm p;
348
349                 memset(&p, 0, sizeof(p));
350                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
351                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
352                 p.iph.version = 4;
353                 p.iph.ihl = 5;
354                 p.iph.protocol = IPPROTO_IPIP;
355                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
356                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
357
358                 if (ops->ndo_do_ioctl) {
359                         mm_segment_t oldfs = get_fs();
360
361                         set_fs(KERNEL_DS);
362                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
363                         set_fs(oldfs);
364                 }
365         }
366 }
367
368 static
369 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
370 {
371         struct net_device  *dev;
372
373         dev = __dev_get_by_name(net, "tunl0");
374
375         if (dev) {
376                 const struct net_device_ops *ops = dev->netdev_ops;
377                 int err;
378                 struct ifreq ifr;
379                 struct ip_tunnel_parm p;
380                 struct in_device  *in_dev;
381
382                 memset(&p, 0, sizeof(p));
383                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
384                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
385                 p.iph.version = 4;
386                 p.iph.ihl = 5;
387                 p.iph.protocol = IPPROTO_IPIP;
388                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
389                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
390
391                 if (ops->ndo_do_ioctl) {
392                         mm_segment_t oldfs = get_fs();
393
394                         set_fs(KERNEL_DS);
395                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
396                         set_fs(oldfs);
397                 } else
398                         err = -EOPNOTSUPP;
399
400                 dev = NULL;
401
402                 if (err == 0 &&
403                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
404                         dev->flags |= IFF_MULTICAST;
405
406                         in_dev = __in_dev_get_rtnl(dev);
407                         if (in_dev == NULL)
408                                 goto failure;
409
410                         ipv4_devconf_setall(in_dev);
411                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
412
413                         if (dev_open(dev))
414                                 goto failure;
415                         dev_hold(dev);
416                 }
417         }
418         return dev;
419
420 failure:
421         /* allow the register to be completed before unregistering. */
422         rtnl_unlock();
423         rtnl_lock();
424
425         unregister_netdevice(dev);
426         return NULL;
427 }
428
429 #ifdef CONFIG_IP_PIMSM
430
431 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
432 {
433         struct net *net = dev_net(dev);
434         struct mr_table *mrt;
435         struct flowi fl = {
436                 .oif            = dev->ifindex,
437                 .iif            = skb->skb_iif,
438                 .mark           = skb->mark,
439         };
440         int err;
441
442         err = ipmr_fib_lookup(net, &fl, &mrt);
443         if (err < 0)
444                 return err;
445
446         read_lock(&mrt_lock);
447         dev->stats.tx_bytes += skb->len;
448         dev->stats.tx_packets++;
449         ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
450         read_unlock(&mrt_lock);
451         kfree_skb(skb);
452         return NETDEV_TX_OK;
453 }
454
455 static const struct net_device_ops reg_vif_netdev_ops = {
456         .ndo_start_xmit = reg_vif_xmit,
457 };
458
459 static void reg_vif_setup(struct net_device *dev)
460 {
461         dev->type               = ARPHRD_PIMREG;
462         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
463         dev->flags              = IFF_NOARP;
464         dev->netdev_ops         = &reg_vif_netdev_ops,
465         dev->destructor         = free_netdev;
466         dev->features           |= NETIF_F_NETNS_LOCAL;
467 }
468
469 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
470 {
471         struct net_device *dev;
472         struct in_device *in_dev;
473         char name[IFNAMSIZ];
474
475         if (mrt->id == RT_TABLE_DEFAULT)
476                 sprintf(name, "pimreg");
477         else
478                 sprintf(name, "pimreg%u", mrt->id);
479
480         dev = alloc_netdev(0, name, reg_vif_setup);
481
482         if (dev == NULL)
483                 return NULL;
484
485         dev_net_set(dev, net);
486
487         if (register_netdevice(dev)) {
488                 free_netdev(dev);
489                 return NULL;
490         }
491         dev->iflink = 0;
492
493         rcu_read_lock();
494         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495                 rcu_read_unlock();
496                 goto failure;
497         }
498
499         ipv4_devconf_setall(in_dev);
500         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
501         rcu_read_unlock();
502
503         if (dev_open(dev))
504                 goto failure;
505
506         dev_hold(dev);
507
508         return dev;
509
510 failure:
511         /* allow the register to be completed before unregistering. */
512         rtnl_unlock();
513         rtnl_lock();
514
515         unregister_netdevice(dev);
516         return NULL;
517 }
518 #endif
519
520 /*
521  *      Delete a VIF entry
522  *      @notify: Set to 1, if the caller is a notifier_call
523  */
524
525 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
526                       struct list_head *head)
527 {
528         struct vif_device *v;
529         struct net_device *dev;
530         struct in_device *in_dev;
531
532         if (vifi < 0 || vifi >= mrt->maxvif)
533                 return -EADDRNOTAVAIL;
534
535         v = &mrt->vif_table[vifi];
536
537         write_lock_bh(&mrt_lock);
538         dev = v->dev;
539         v->dev = NULL;
540
541         if (!dev) {
542                 write_unlock_bh(&mrt_lock);
543                 return -EADDRNOTAVAIL;
544         }
545
546 #ifdef CONFIG_IP_PIMSM
547         if (vifi == mrt->mroute_reg_vif_num)
548                 mrt->mroute_reg_vif_num = -1;
549 #endif
550
551         if (vifi+1 == mrt->maxvif) {
552                 int tmp;
553                 for (tmp=vifi-1; tmp>=0; tmp--) {
554                         if (VIF_EXISTS(mrt, tmp))
555                                 break;
556                 }
557                 mrt->maxvif = tmp+1;
558         }
559
560         write_unlock_bh(&mrt_lock);
561
562         dev_set_allmulti(dev, -1);
563
564         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
565                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
566                 ip_rt_multicast_event(in_dev);
567         }
568
569         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
570                 unregister_netdevice_queue(dev, head);
571
572         dev_put(dev);
573         return 0;
574 }
575
576 static inline void ipmr_cache_free(struct mfc_cache *c)
577 {
578         kmem_cache_free(mrt_cachep, c);
579 }
580
581 /* Destroy an unresolved cache entry, killing queued skbs
582    and reporting error to netlink readers.
583  */
584
585 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
586 {
587         struct net *net = read_pnet(&mrt->net);
588         struct sk_buff *skb;
589         struct nlmsgerr *e;
590
591         atomic_dec(&mrt->cache_resolve_queue_len);
592
593         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
594                 if (ip_hdr(skb)->version == 0) {
595                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
596                         nlh->nlmsg_type = NLMSG_ERROR;
597                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
598                         skb_trim(skb, nlh->nlmsg_len);
599                         e = NLMSG_DATA(nlh);
600                         e->error = -ETIMEDOUT;
601                         memset(&e->msg, 0, sizeof(e->msg));
602
603                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
604                 } else
605                         kfree_skb(skb);
606         }
607
608         ipmr_cache_free(c);
609 }
610
611
612 /* Timer process for the unresolved queue. */
613
614 static void ipmr_expire_process(unsigned long arg)
615 {
616         struct mr_table *mrt = (struct mr_table *)arg;
617         unsigned long now;
618         unsigned long expires;
619         struct mfc_cache *c, *next;
620
621         if (!spin_trylock(&mfc_unres_lock)) {
622                 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
623                 return;
624         }
625
626         if (list_empty(&mrt->mfc_unres_queue))
627                 goto out;
628
629         now = jiffies;
630         expires = 10*HZ;
631
632         list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
633                 if (time_after(c->mfc_un.unres.expires, now)) {
634                         unsigned long interval = c->mfc_un.unres.expires - now;
635                         if (interval < expires)
636                                 expires = interval;
637                         continue;
638                 }
639
640                 list_del(&c->list);
641                 ipmr_destroy_unres(mrt, c);
642         }
643
644         if (!list_empty(&mrt->mfc_unres_queue))
645                 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
646
647 out:
648         spin_unlock(&mfc_unres_lock);
649 }
650
651 /* Fill oifs list. It is called under write locked mrt_lock. */
652
653 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
654                                    unsigned char *ttls)
655 {
656         int vifi;
657
658         cache->mfc_un.res.minvif = MAXVIFS;
659         cache->mfc_un.res.maxvif = 0;
660         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
661
662         for (vifi = 0; vifi < mrt->maxvif; vifi++) {
663                 if (VIF_EXISTS(mrt, vifi) &&
664                     ttls[vifi] && ttls[vifi] < 255) {
665                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
666                         if (cache->mfc_un.res.minvif > vifi)
667                                 cache->mfc_un.res.minvif = vifi;
668                         if (cache->mfc_un.res.maxvif <= vifi)
669                                 cache->mfc_un.res.maxvif = vifi + 1;
670                 }
671         }
672 }
673
674 static int vif_add(struct net *net, struct mr_table *mrt,
675                    struct vifctl *vifc, int mrtsock)
676 {
677         int vifi = vifc->vifc_vifi;
678         struct vif_device *v = &mrt->vif_table[vifi];
679         struct net_device *dev;
680         struct in_device *in_dev;
681         int err;
682
683         /* Is vif busy ? */
684         if (VIF_EXISTS(mrt, vifi))
685                 return -EADDRINUSE;
686
687         switch (vifc->vifc_flags) {
688 #ifdef CONFIG_IP_PIMSM
689         case VIFF_REGISTER:
690                 /*
691                  * Special Purpose VIF in PIM
692                  * All the packets will be sent to the daemon
693                  */
694                 if (mrt->mroute_reg_vif_num >= 0)
695                         return -EADDRINUSE;
696                 dev = ipmr_reg_vif(net, mrt);
697                 if (!dev)
698                         return -ENOBUFS;
699                 err = dev_set_allmulti(dev, 1);
700                 if (err) {
701                         unregister_netdevice(dev);
702                         dev_put(dev);
703                         return err;
704                 }
705                 break;
706 #endif
707         case VIFF_TUNNEL:
708                 dev = ipmr_new_tunnel(net, vifc);
709                 if (!dev)
710                         return -ENOBUFS;
711                 err = dev_set_allmulti(dev, 1);
712                 if (err) {
713                         ipmr_del_tunnel(dev, vifc);
714                         dev_put(dev);
715                         return err;
716                 }
717                 break;
718
719         case VIFF_USE_IFINDEX:
720         case 0:
721                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
722                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
723                         if (dev && dev->ip_ptr == NULL) {
724                                 dev_put(dev);
725                                 return -EADDRNOTAVAIL;
726                         }
727                 } else
728                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
729
730                 if (!dev)
731                         return -EADDRNOTAVAIL;
732                 err = dev_set_allmulti(dev, 1);
733                 if (err) {
734                         dev_put(dev);
735                         return err;
736                 }
737                 break;
738         default:
739                 return -EINVAL;
740         }
741
742         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
743                 dev_put(dev);
744                 return -EADDRNOTAVAIL;
745         }
746         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
747         ip_rt_multicast_event(in_dev);
748
749         /*
750          *      Fill in the VIF structures
751          */
752         v->rate_limit = vifc->vifc_rate_limit;
753         v->local = vifc->vifc_lcl_addr.s_addr;
754         v->remote = vifc->vifc_rmt_addr.s_addr;
755         v->flags = vifc->vifc_flags;
756         if (!mrtsock)
757                 v->flags |= VIFF_STATIC;
758         v->threshold = vifc->vifc_threshold;
759         v->bytes_in = 0;
760         v->bytes_out = 0;
761         v->pkt_in = 0;
762         v->pkt_out = 0;
763         v->link = dev->ifindex;
764         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
765                 v->link = dev->iflink;
766
767         /* And finish update writing critical data */
768         write_lock_bh(&mrt_lock);
769         v->dev = dev;
770 #ifdef CONFIG_IP_PIMSM
771         if (v->flags&VIFF_REGISTER)
772                 mrt->mroute_reg_vif_num = vifi;
773 #endif
774         if (vifi+1 > mrt->maxvif)
775                 mrt->maxvif = vifi+1;
776         write_unlock_bh(&mrt_lock);
777         return 0;
778 }
779
780 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
781                                          __be32 origin,
782                                          __be32 mcastgrp)
783 {
784         int line = MFC_HASH(mcastgrp, origin);
785         struct mfc_cache *c;
786
787         list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
788                 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
789                         return c;
790         }
791         return NULL;
792 }
793
794 /*
795  *      Allocate a multicast cache entry
796  */
797 static struct mfc_cache *ipmr_cache_alloc(void)
798 {
799         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
800         if (c == NULL)
801                 return NULL;
802         c->mfc_un.res.minvif = MAXVIFS;
803         return c;
804 }
805
806 static struct mfc_cache *ipmr_cache_alloc_unres(void)
807 {
808         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
809         if (c == NULL)
810                 return NULL;
811         skb_queue_head_init(&c->mfc_un.unres.unresolved);
812         c->mfc_un.unres.expires = jiffies + 10*HZ;
813         return c;
814 }
815
816 /*
817  *      A cache entry has gone into a resolved state from queued
818  */
819
820 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
821                                struct mfc_cache *uc, struct mfc_cache *c)
822 {
823         struct sk_buff *skb;
824         struct nlmsgerr *e;
825
826         /*
827          *      Play the pending entries through our router
828          */
829
830         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
831                 if (ip_hdr(skb)->version == 0) {
832                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
833
834                         if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
835                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
836                                                   (u8 *)nlh);
837                         } else {
838                                 nlh->nlmsg_type = NLMSG_ERROR;
839                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
840                                 skb_trim(skb, nlh->nlmsg_len);
841                                 e = NLMSG_DATA(nlh);
842                                 e->error = -EMSGSIZE;
843                                 memset(&e->msg, 0, sizeof(e->msg));
844                         }
845
846                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
847                 } else
848                         ip_mr_forward(net, mrt, skb, c, 0);
849         }
850 }
851
852 /*
853  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
854  *      expects the following bizarre scheme.
855  *
856  *      Called under mrt_lock.
857  */
858
859 static int ipmr_cache_report(struct mr_table *mrt,
860                              struct sk_buff *pkt, vifi_t vifi, int assert)
861 {
862         struct sk_buff *skb;
863         const int ihl = ip_hdrlen(pkt);
864         struct igmphdr *igmp;
865         struct igmpmsg *msg;
866         int ret;
867
868 #ifdef CONFIG_IP_PIMSM
869         if (assert == IGMPMSG_WHOLEPKT)
870                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
871         else
872 #endif
873                 skb = alloc_skb(128, GFP_ATOMIC);
874
875         if (!skb)
876                 return -ENOBUFS;
877
878 #ifdef CONFIG_IP_PIMSM
879         if (assert == IGMPMSG_WHOLEPKT) {
880                 /* Ugly, but we have no choice with this interface.
881                    Duplicate old header, fix ihl, length etc.
882                    And all this only to mangle msg->im_msgtype and
883                    to set msg->im_mbz to "mbz" :-)
884                  */
885                 skb_push(skb, sizeof(struct iphdr));
886                 skb_reset_network_header(skb);
887                 skb_reset_transport_header(skb);
888                 msg = (struct igmpmsg *)skb_network_header(skb);
889                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
890                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
891                 msg->im_mbz = 0;
892                 msg->im_vif = mrt->mroute_reg_vif_num;
893                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
894                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
895                                              sizeof(struct iphdr));
896         } else
897 #endif
898         {
899
900         /*
901          *      Copy the IP header
902          */
903
904         skb->network_header = skb->tail;
905         skb_put(skb, ihl);
906         skb_copy_to_linear_data(skb, pkt->data, ihl);
907         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
908         msg = (struct igmpmsg *)skb_network_header(skb);
909         msg->im_vif = vifi;
910         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
911
912         /*
913          *      Add our header
914          */
915
916         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
917         igmp->type      =
918         msg->im_msgtype = assert;
919         igmp->code      =       0;
920         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
921         skb->transport_header = skb->network_header;
922         }
923
924         if (mrt->mroute_sk == NULL) {
925                 kfree_skb(skb);
926                 return -EINVAL;
927         }
928
929         /*
930          *      Deliver to mrouted
931          */
932         ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
933         if (ret < 0) {
934                 if (net_ratelimit())
935                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
936                 kfree_skb(skb);
937         }
938
939         return ret;
940 }
941
942 /*
943  *      Queue a packet for resolution. It gets locked cache entry!
944  */
945
946 static int
947 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
948 {
949         bool found = false;
950         int err;
951         struct mfc_cache *c;
952         const struct iphdr *iph = ip_hdr(skb);
953
954         spin_lock_bh(&mfc_unres_lock);
955         list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
956                 if (c->mfc_mcastgrp == iph->daddr &&
957                     c->mfc_origin == iph->saddr) {
958                         found = true;
959                         break;
960                 }
961         }
962
963         if (!found) {
964                 /*
965                  *      Create a new entry if allowable
966                  */
967
968                 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
969                     (c = ipmr_cache_alloc_unres()) == NULL) {
970                         spin_unlock_bh(&mfc_unres_lock);
971
972                         kfree_skb(skb);
973                         return -ENOBUFS;
974                 }
975
976                 /*
977                  *      Fill in the new cache entry
978                  */
979                 c->mfc_parent   = -1;
980                 c->mfc_origin   = iph->saddr;
981                 c->mfc_mcastgrp = iph->daddr;
982
983                 /*
984                  *      Reflect first query at mrouted.
985                  */
986                 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
987                 if (err < 0) {
988                         /* If the report failed throw the cache entry
989                            out - Brad Parker
990                          */
991                         spin_unlock_bh(&mfc_unres_lock);
992
993                         ipmr_cache_free(c);
994                         kfree_skb(skb);
995                         return err;
996                 }
997
998                 atomic_inc(&mrt->cache_resolve_queue_len);
999                 list_add(&c->list, &mrt->mfc_unres_queue);
1000
1001                 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1002                         mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1003         }
1004
1005         /*
1006          *      See if we can append the packet
1007          */
1008         if (c->mfc_un.unres.unresolved.qlen>3) {
1009                 kfree_skb(skb);
1010                 err = -ENOBUFS;
1011         } else {
1012                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1013                 err = 0;
1014         }
1015
1016         spin_unlock_bh(&mfc_unres_lock);
1017         return err;
1018 }
1019
1020 /*
1021  *      MFC cache manipulation by user space mroute daemon
1022  */
1023
1024 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1025 {
1026         int line;
1027         struct mfc_cache *c, *next;
1028
1029         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1030
1031         list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1032                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1033                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1034                         write_lock_bh(&mrt_lock);
1035                         list_del(&c->list);
1036                         write_unlock_bh(&mrt_lock);
1037
1038                         ipmr_cache_free(c);
1039                         return 0;
1040                 }
1041         }
1042         return -ENOENT;
1043 }
1044
1045 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1046                         struct mfcctl *mfc, int mrtsock)
1047 {
1048         bool found = false;
1049         int line;
1050         struct mfc_cache *uc, *c;
1051
1052         if (mfc->mfcc_parent >= MAXVIFS)
1053                 return -ENFILE;
1054
1055         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1056
1057         list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1058                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1059                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1060                         found = true;
1061                         break;
1062                 }
1063         }
1064
1065         if (found) {
1066                 write_lock_bh(&mrt_lock);
1067                 c->mfc_parent = mfc->mfcc_parent;
1068                 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1069                 if (!mrtsock)
1070                         c->mfc_flags |= MFC_STATIC;
1071                 write_unlock_bh(&mrt_lock);
1072                 return 0;
1073         }
1074
1075         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1076                 return -EINVAL;
1077
1078         c = ipmr_cache_alloc();
1079         if (c == NULL)
1080                 return -ENOMEM;
1081
1082         c->mfc_origin = mfc->mfcc_origin.s_addr;
1083         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1084         c->mfc_parent = mfc->mfcc_parent;
1085         ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1086         if (!mrtsock)
1087                 c->mfc_flags |= MFC_STATIC;
1088
1089         write_lock_bh(&mrt_lock);
1090         list_add(&c->list, &mrt->mfc_cache_array[line]);
1091         write_unlock_bh(&mrt_lock);
1092
1093         /*
1094          *      Check to see if we resolved a queued list. If so we
1095          *      need to send on the frames and tidy up.
1096          */
1097         found = false;
1098         spin_lock_bh(&mfc_unres_lock);
1099         list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1100                 if (uc->mfc_origin == c->mfc_origin &&
1101                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1102                         list_del(&uc->list);
1103                         atomic_dec(&mrt->cache_resolve_queue_len);
1104                         found = true;
1105                         break;
1106                 }
1107         }
1108         if (list_empty(&mrt->mfc_unres_queue))
1109                 del_timer(&mrt->ipmr_expire_timer);
1110         spin_unlock_bh(&mfc_unres_lock);
1111
1112         if (found) {
1113                 ipmr_cache_resolve(net, mrt, uc, c);
1114                 ipmr_cache_free(uc);
1115         }
1116         return 0;
1117 }
1118
1119 /*
1120  *      Close the multicast socket, and clear the vif tables etc
1121  */
1122
1123 static void mroute_clean_tables(struct mr_table *mrt)
1124 {
1125         int i;
1126         LIST_HEAD(list);
1127         struct mfc_cache *c, *next;
1128
1129         /*
1130          *      Shut down all active vif entries
1131          */
1132         for (i = 0; i < mrt->maxvif; i++) {
1133                 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1134                         vif_delete(mrt, i, 0, &list);
1135         }
1136         unregister_netdevice_many(&list);
1137
1138         /*
1139          *      Wipe the cache
1140          */
1141         for (i = 0; i < MFC_LINES; i++) {
1142                 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1143                         if (c->mfc_flags&MFC_STATIC)
1144                                 continue;
1145                         write_lock_bh(&mrt_lock);
1146                         list_del(&c->list);
1147                         write_unlock_bh(&mrt_lock);
1148
1149                         ipmr_cache_free(c);
1150                 }
1151         }
1152
1153         if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1154                 spin_lock_bh(&mfc_unres_lock);
1155                 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1156                         list_del(&c->list);
1157                         ipmr_destroy_unres(mrt, c);
1158                 }
1159                 spin_unlock_bh(&mfc_unres_lock);
1160         }
1161 }
1162
1163 static void mrtsock_destruct(struct sock *sk)
1164 {
1165         struct net *net = sock_net(sk);
1166         struct mr_table *mrt;
1167
1168         rtnl_lock();
1169         ipmr_for_each_table(mrt, net) {
1170                 if (sk == mrt->mroute_sk) {
1171                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1172
1173                         write_lock_bh(&mrt_lock);
1174                         mrt->mroute_sk = NULL;
1175                         write_unlock_bh(&mrt_lock);
1176
1177                         mroute_clean_tables(mrt);
1178                 }
1179         }
1180         rtnl_unlock();
1181 }
1182
1183 /*
1184  *      Socket options and virtual interface manipulation. The whole
1185  *      virtual interface system is a complete heap, but unfortunately
1186  *      that's how BSD mrouted happens to think. Maybe one day with a proper
1187  *      MOSPF/PIM router set up we can clean this up.
1188  */
1189
1190 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1191 {
1192         int ret;
1193         struct vifctl vif;
1194         struct mfcctl mfc;
1195         struct net *net = sock_net(sk);
1196         struct mr_table *mrt;
1197
1198         mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1199         if (mrt == NULL)
1200                 return -ENOENT;
1201
1202         if (optname != MRT_INIT) {
1203                 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1204                         return -EACCES;
1205         }
1206
1207         switch (optname) {
1208         case MRT_INIT:
1209                 if (sk->sk_type != SOCK_RAW ||
1210                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
1211                         return -EOPNOTSUPP;
1212                 if (optlen != sizeof(int))
1213                         return -ENOPROTOOPT;
1214
1215                 rtnl_lock();
1216                 if (mrt->mroute_sk) {
1217                         rtnl_unlock();
1218                         return -EADDRINUSE;
1219                 }
1220
1221                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1222                 if (ret == 0) {
1223                         write_lock_bh(&mrt_lock);
1224                         mrt->mroute_sk = sk;
1225                         write_unlock_bh(&mrt_lock);
1226
1227                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1228                 }
1229                 rtnl_unlock();
1230                 return ret;
1231         case MRT_DONE:
1232                 if (sk != mrt->mroute_sk)
1233                         return -EACCES;
1234                 return ip_ra_control(sk, 0, NULL);
1235         case MRT_ADD_VIF:
1236         case MRT_DEL_VIF:
1237                 if (optlen != sizeof(vif))
1238                         return -EINVAL;
1239                 if (copy_from_user(&vif, optval, sizeof(vif)))
1240                         return -EFAULT;
1241                 if (vif.vifc_vifi >= MAXVIFS)
1242                         return -ENFILE;
1243                 rtnl_lock();
1244                 if (optname == MRT_ADD_VIF) {
1245                         ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1246                 } else {
1247                         ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1248                 }
1249                 rtnl_unlock();
1250                 return ret;
1251
1252                 /*
1253                  *      Manipulate the forwarding caches. These live
1254                  *      in a sort of kernel/user symbiosis.
1255                  */
1256         case MRT_ADD_MFC:
1257         case MRT_DEL_MFC:
1258                 if (optlen != sizeof(mfc))
1259                         return -EINVAL;
1260                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1261                         return -EFAULT;
1262                 rtnl_lock();
1263                 if (optname == MRT_DEL_MFC)
1264                         ret = ipmr_mfc_delete(mrt, &mfc);
1265                 else
1266                         ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1267                 rtnl_unlock();
1268                 return ret;
1269                 /*
1270                  *      Control PIM assert.
1271                  */
1272         case MRT_ASSERT:
1273         {
1274                 int v;
1275                 if (get_user(v,(int __user *)optval))
1276                         return -EFAULT;
1277                 mrt->mroute_do_assert = (v) ? 1 : 0;
1278                 return 0;
1279         }
1280 #ifdef CONFIG_IP_PIMSM
1281         case MRT_PIM:
1282         {
1283                 int v;
1284
1285                 if (get_user(v,(int __user *)optval))
1286                         return -EFAULT;
1287                 v = (v) ? 1 : 0;
1288
1289                 rtnl_lock();
1290                 ret = 0;
1291                 if (v != mrt->mroute_do_pim) {
1292                         mrt->mroute_do_pim = v;
1293                         mrt->mroute_do_assert = v;
1294                 }
1295                 rtnl_unlock();
1296                 return ret;
1297         }
1298 #endif
1299 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1300         case MRT_TABLE:
1301         {
1302                 u32 v;
1303
1304                 if (optlen != sizeof(u32))
1305                         return -EINVAL;
1306                 if (get_user(v, (u32 __user *)optval))
1307                         return -EFAULT;
1308                 if (sk == mrt->mroute_sk)
1309                         return -EBUSY;
1310
1311                 rtnl_lock();
1312                 ret = 0;
1313                 if (!ipmr_new_table(net, v))
1314                         ret = -ENOMEM;
1315                 raw_sk(sk)->ipmr_table = v;
1316                 rtnl_unlock();
1317                 return ret;
1318         }
1319 #endif
1320         /*
1321          *      Spurious command, or MRT_VERSION which you cannot
1322          *      set.
1323          */
1324         default:
1325                 return -ENOPROTOOPT;
1326         }
1327 }
1328
1329 /*
1330  *      Getsock opt support for the multicast routing system.
1331  */
1332
1333 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1334 {
1335         int olr;
1336         int val;
1337         struct net *net = sock_net(sk);
1338         struct mr_table *mrt;
1339
1340         mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1341         if (mrt == NULL)
1342                 return -ENOENT;
1343
1344         if (optname != MRT_VERSION &&
1345 #ifdef CONFIG_IP_PIMSM
1346            optname!=MRT_PIM &&
1347 #endif
1348            optname!=MRT_ASSERT)
1349                 return -ENOPROTOOPT;
1350
1351         if (get_user(olr, optlen))
1352                 return -EFAULT;
1353
1354         olr = min_t(unsigned int, olr, sizeof(int));
1355         if (olr < 0)
1356                 return -EINVAL;
1357
1358         if (put_user(olr, optlen))
1359                 return -EFAULT;
1360         if (optname == MRT_VERSION)
1361                 val = 0x0305;
1362 #ifdef CONFIG_IP_PIMSM
1363         else if (optname == MRT_PIM)
1364                 val = mrt->mroute_do_pim;
1365 #endif
1366         else
1367                 val = mrt->mroute_do_assert;
1368         if (copy_to_user(optval, &val, olr))
1369                 return -EFAULT;
1370         return 0;
1371 }
1372
1373 /*
1374  *      The IP multicast ioctl support routines.
1375  */
1376
1377 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1378 {
1379         struct sioc_sg_req sr;
1380         struct sioc_vif_req vr;
1381         struct vif_device *vif;
1382         struct mfc_cache *c;
1383         struct net *net = sock_net(sk);
1384         struct mr_table *mrt;
1385
1386         mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1387         if (mrt == NULL)
1388                 return -ENOENT;
1389
1390         switch (cmd) {
1391         case SIOCGETVIFCNT:
1392                 if (copy_from_user(&vr, arg, sizeof(vr)))
1393                         return -EFAULT;
1394                 if (vr.vifi >= mrt->maxvif)
1395                         return -EINVAL;
1396                 read_lock(&mrt_lock);
1397                 vif = &mrt->vif_table[vr.vifi];
1398                 if (VIF_EXISTS(mrt, vr.vifi)) {
1399                         vr.icount = vif->pkt_in;
1400                         vr.ocount = vif->pkt_out;
1401                         vr.ibytes = vif->bytes_in;
1402                         vr.obytes = vif->bytes_out;
1403                         read_unlock(&mrt_lock);
1404
1405                         if (copy_to_user(arg, &vr, sizeof(vr)))
1406                                 return -EFAULT;
1407                         return 0;
1408                 }
1409                 read_unlock(&mrt_lock);
1410                 return -EADDRNOTAVAIL;
1411         case SIOCGETSGCNT:
1412                 if (copy_from_user(&sr, arg, sizeof(sr)))
1413                         return -EFAULT;
1414
1415                 read_lock(&mrt_lock);
1416                 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1417                 if (c) {
1418                         sr.pktcnt = c->mfc_un.res.pkt;
1419                         sr.bytecnt = c->mfc_un.res.bytes;
1420                         sr.wrong_if = c->mfc_un.res.wrong_if;
1421                         read_unlock(&mrt_lock);
1422
1423                         if (copy_to_user(arg, &sr, sizeof(sr)))
1424                                 return -EFAULT;
1425                         return 0;
1426                 }
1427                 read_unlock(&mrt_lock);
1428                 return -EADDRNOTAVAIL;
1429         default:
1430                 return -ENOIOCTLCMD;
1431         }
1432 }
1433
1434
1435 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1436 {
1437         struct net_device *dev = ptr;
1438         struct net *net = dev_net(dev);
1439         struct mr_table *mrt;
1440         struct vif_device *v;
1441         int ct;
1442         LIST_HEAD(list);
1443
1444         if (event != NETDEV_UNREGISTER)
1445                 return NOTIFY_DONE;
1446
1447         ipmr_for_each_table(mrt, net) {
1448                 v = &mrt->vif_table[0];
1449                 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1450                         if (v->dev == dev)
1451                                 vif_delete(mrt, ct, 1, &list);
1452                 }
1453         }
1454         unregister_netdevice_many(&list);
1455         return NOTIFY_DONE;
1456 }
1457
1458
1459 static struct notifier_block ip_mr_notifier = {
1460         .notifier_call = ipmr_device_event,
1461 };
1462
1463 /*
1464  *      Encapsulate a packet by attaching a valid IPIP header to it.
1465  *      This avoids tunnel drivers and other mess and gives us the speed so
1466  *      important for multicast video.
1467  */
1468
1469 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1470 {
1471         struct iphdr *iph;
1472         struct iphdr *old_iph = ip_hdr(skb);
1473
1474         skb_push(skb, sizeof(struct iphdr));
1475         skb->transport_header = skb->network_header;
1476         skb_reset_network_header(skb);
1477         iph = ip_hdr(skb);
1478
1479         iph->version    =       4;
1480         iph->tos        =       old_iph->tos;
1481         iph->ttl        =       old_iph->ttl;
1482         iph->frag_off   =       0;
1483         iph->daddr      =       daddr;
1484         iph->saddr      =       saddr;
1485         iph->protocol   =       IPPROTO_IPIP;
1486         iph->ihl        =       5;
1487         iph->tot_len    =       htons(skb->len);
1488         ip_select_ident(iph, skb_dst(skb), NULL);
1489         ip_send_check(iph);
1490
1491         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1492         nf_reset(skb);
1493 }
1494
1495 static inline int ipmr_forward_finish(struct sk_buff *skb)
1496 {
1497         struct ip_options * opt = &(IPCB(skb)->opt);
1498
1499         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1500
1501         if (unlikely(opt->optlen))
1502                 ip_forward_options(skb);
1503
1504         return dst_output(skb);
1505 }
1506
1507 /*
1508  *      Processing handlers for ipmr_forward
1509  */
1510
1511 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1512                             struct sk_buff *skb, struct mfc_cache *c, int vifi)
1513 {
1514         const struct iphdr *iph = ip_hdr(skb);
1515         struct vif_device *vif = &mrt->vif_table[vifi];
1516         struct net_device *dev;
1517         struct rtable *rt;
1518         int    encap = 0;
1519
1520         if (vif->dev == NULL)
1521                 goto out_free;
1522
1523 #ifdef CONFIG_IP_PIMSM
1524         if (vif->flags & VIFF_REGISTER) {
1525                 vif->pkt_out++;
1526                 vif->bytes_out += skb->len;
1527                 vif->dev->stats.tx_bytes += skb->len;
1528                 vif->dev->stats.tx_packets++;
1529                 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1530                 goto out_free;
1531         }
1532 #endif
1533
1534         if (vif->flags&VIFF_TUNNEL) {
1535                 struct flowi fl = { .oif = vif->link,
1536                                     .nl_u = { .ip4_u =
1537                                               { .daddr = vif->remote,
1538                                                 .saddr = vif->local,
1539                                                 .tos = RT_TOS(iph->tos) } },
1540                                     .proto = IPPROTO_IPIP };
1541                 if (ip_route_output_key(net, &rt, &fl))
1542                         goto out_free;
1543                 encap = sizeof(struct iphdr);
1544         } else {
1545                 struct flowi fl = { .oif = vif->link,
1546                                     .nl_u = { .ip4_u =
1547                                               { .daddr = iph->daddr,
1548                                                 .tos = RT_TOS(iph->tos) } },
1549                                     .proto = IPPROTO_IPIP };
1550                 if (ip_route_output_key(net, &rt, &fl))
1551                         goto out_free;
1552         }
1553
1554         dev = rt->u.dst.dev;
1555
1556         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1557                 /* Do not fragment multicasts. Alas, IPv4 does not
1558                    allow to send ICMP, so that packets will disappear
1559                    to blackhole.
1560                  */
1561
1562                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1563                 ip_rt_put(rt);
1564                 goto out_free;
1565         }
1566
1567         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1568
1569         if (skb_cow(skb, encap)) {
1570                 ip_rt_put(rt);
1571                 goto out_free;
1572         }
1573
1574         vif->pkt_out++;
1575         vif->bytes_out += skb->len;
1576
1577         skb_dst_drop(skb);
1578         skb_dst_set(skb, &rt->u.dst);
1579         ip_decrease_ttl(ip_hdr(skb));
1580
1581         /* FIXME: forward and output firewalls used to be called here.
1582          * What do we do with netfilter? -- RR */
1583         if (vif->flags & VIFF_TUNNEL) {
1584                 ip_encap(skb, vif->local, vif->remote);
1585                 /* FIXME: extra output firewall step used to be here. --RR */
1586                 vif->dev->stats.tx_packets++;
1587                 vif->dev->stats.tx_bytes += skb->len;
1588         }
1589
1590         IPCB(skb)->flags |= IPSKB_FORWARDED;
1591
1592         /*
1593          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1594          * not only before forwarding, but after forwarding on all output
1595          * interfaces. It is clear, if mrouter runs a multicasting
1596          * program, it should receive packets not depending to what interface
1597          * program is joined.
1598          * If we will not make it, the program will have to join on all
1599          * interfaces. On the other hand, multihoming host (or router, but
1600          * not mrouter) cannot join to more than one interface - it will
1601          * result in receiving multiple packets.
1602          */
1603         NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1604                 ipmr_forward_finish);
1605         return;
1606
1607 out_free:
1608         kfree_skb(skb);
1609 }
1610
1611 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1612 {
1613         int ct;
1614
1615         for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1616                 if (mrt->vif_table[ct].dev == dev)
1617                         break;
1618         }
1619         return ct;
1620 }
1621
1622 /* "local" means that we should preserve one skb (for local delivery) */
1623
1624 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1625                          struct sk_buff *skb, struct mfc_cache *cache,
1626                          int local)
1627 {
1628         int psend = -1;
1629         int vif, ct;
1630
1631         vif = cache->mfc_parent;
1632         cache->mfc_un.res.pkt++;
1633         cache->mfc_un.res.bytes += skb->len;
1634
1635         /*
1636          * Wrong interface: drop packet and (maybe) send PIM assert.
1637          */
1638         if (mrt->vif_table[vif].dev != skb->dev) {
1639                 int true_vifi;
1640
1641                 if (skb_rtable(skb)->fl.iif == 0) {
1642                         /* It is our own packet, looped back.
1643                            Very complicated situation...
1644
1645                            The best workaround until routing daemons will be
1646                            fixed is not to redistribute packet, if it was
1647                            send through wrong interface. It means, that
1648                            multicast applications WILL NOT work for
1649                            (S,G), which have default multicast route pointing
1650                            to wrong oif. In any case, it is not a good
1651                            idea to use multicasting applications on router.
1652                          */
1653                         goto dont_forward;
1654                 }
1655
1656                 cache->mfc_un.res.wrong_if++;
1657                 true_vifi = ipmr_find_vif(mrt, skb->dev);
1658
1659                 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1660                     /* pimsm uses asserts, when switching from RPT to SPT,
1661                        so that we cannot check that packet arrived on an oif.
1662                        It is bad, but otherwise we would need to move pretty
1663                        large chunk of pimd to kernel. Ough... --ANK
1664                      */
1665                     (mrt->mroute_do_pim ||
1666                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1667                     time_after(jiffies,
1668                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1669                         cache->mfc_un.res.last_assert = jiffies;
1670                         ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1671                 }
1672                 goto dont_forward;
1673         }
1674
1675         mrt->vif_table[vif].pkt_in++;
1676         mrt->vif_table[vif].bytes_in += skb->len;
1677
1678         /*
1679          *      Forward the frame
1680          */
1681         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1682                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1683                         if (psend != -1) {
1684                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1685                                 if (skb2)
1686                                         ipmr_queue_xmit(net, mrt, skb2, cache,
1687                                                         psend);
1688                         }
1689                         psend = ct;
1690                 }
1691         }
1692         if (psend != -1) {
1693                 if (local) {
1694                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1695                         if (skb2)
1696                                 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1697                 } else {
1698                         ipmr_queue_xmit(net, mrt, skb, cache, psend);
1699                         return 0;
1700                 }
1701         }
1702
1703 dont_forward:
1704         if (!local)
1705                 kfree_skb(skb);
1706         return 0;
1707 }
1708
1709
1710 /*
1711  *      Multicast packets for forwarding arrive here
1712  */
1713
1714 int ip_mr_input(struct sk_buff *skb)
1715 {
1716         struct mfc_cache *cache;
1717         struct net *net = dev_net(skb->dev);
1718         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1719         struct mr_table *mrt;
1720         int err;
1721
1722         /* Packet is looped back after forward, it should not be
1723            forwarded second time, but still can be delivered locally.
1724          */
1725         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1726                 goto dont_forward;
1727
1728         err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1729         if (err < 0)
1730                 return err;
1731
1732         if (!local) {
1733                     if (IPCB(skb)->opt.router_alert) {
1734                             if (ip_call_ra_chain(skb))
1735                                     return 0;
1736                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1737                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1738                                Cisco IOS <= 11.2(8)) do not put router alert
1739                                option to IGMP packets destined to routable
1740                                groups. It is very bad, because it means
1741                                that we can forward NO IGMP messages.
1742                              */
1743                             read_lock(&mrt_lock);
1744                             if (mrt->mroute_sk) {
1745                                     nf_reset(skb);
1746                                     raw_rcv(mrt->mroute_sk, skb);
1747                                     read_unlock(&mrt_lock);
1748                                     return 0;
1749                             }
1750                             read_unlock(&mrt_lock);
1751                     }
1752         }
1753
1754         read_lock(&mrt_lock);
1755         cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1756
1757         /*
1758          *      No usable cache entry
1759          */
1760         if (cache == NULL) {
1761                 int vif;
1762
1763                 if (local) {
1764                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1765                         ip_local_deliver(skb);
1766                         if (skb2 == NULL) {
1767                                 read_unlock(&mrt_lock);
1768                                 return -ENOBUFS;
1769                         }
1770                         skb = skb2;
1771                 }
1772
1773                 vif = ipmr_find_vif(mrt, skb->dev);
1774                 if (vif >= 0) {
1775                         int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1776                         read_unlock(&mrt_lock);
1777
1778                         return err2;
1779                 }
1780                 read_unlock(&mrt_lock);
1781                 kfree_skb(skb);
1782                 return -ENODEV;
1783         }
1784
1785         ip_mr_forward(net, mrt, skb, cache, local);
1786
1787         read_unlock(&mrt_lock);
1788
1789         if (local)
1790                 return ip_local_deliver(skb);
1791
1792         return 0;
1793
1794 dont_forward:
1795         if (local)
1796                 return ip_local_deliver(skb);
1797         kfree_skb(skb);
1798         return 0;
1799 }
1800
1801 #ifdef CONFIG_IP_PIMSM
1802 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1803                      unsigned int pimlen)
1804 {
1805         struct net_device *reg_dev = NULL;
1806         struct iphdr *encap;
1807
1808         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1809         /*
1810            Check that:
1811            a. packet is really destinted to a multicast group
1812            b. packet is not a NULL-REGISTER
1813            c. packet is not truncated
1814          */
1815         if (!ipv4_is_multicast(encap->daddr) ||
1816             encap->tot_len == 0 ||
1817             ntohs(encap->tot_len) + pimlen > skb->len)
1818                 return 1;
1819
1820         read_lock(&mrt_lock);
1821         if (mrt->mroute_reg_vif_num >= 0)
1822                 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1823         if (reg_dev)
1824                 dev_hold(reg_dev);
1825         read_unlock(&mrt_lock);
1826
1827         if (reg_dev == NULL)
1828                 return 1;
1829
1830         skb->mac_header = skb->network_header;
1831         skb_pull(skb, (u8*)encap - skb->data);
1832         skb_reset_network_header(skb);
1833         skb->protocol = htons(ETH_P_IP);
1834         skb->ip_summed = 0;
1835         skb->pkt_type = PACKET_HOST;
1836
1837         skb_tunnel_rx(skb, reg_dev);
1838
1839         netif_rx(skb);
1840         dev_put(reg_dev);
1841
1842         return 0;
1843 }
1844 #endif
1845
1846 #ifdef CONFIG_IP_PIMSM_V1
1847 /*
1848  * Handle IGMP messages of PIMv1
1849  */
1850
1851 int pim_rcv_v1(struct sk_buff * skb)
1852 {
1853         struct igmphdr *pim;
1854         struct net *net = dev_net(skb->dev);
1855         struct mr_table *mrt;
1856
1857         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1858                 goto drop;
1859
1860         pim = igmp_hdr(skb);
1861
1862         if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1863                 goto drop;
1864
1865         if (!mrt->mroute_do_pim ||
1866             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1867                 goto drop;
1868
1869         if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1870 drop:
1871                 kfree_skb(skb);
1872         }
1873         return 0;
1874 }
1875 #endif
1876
1877 #ifdef CONFIG_IP_PIMSM_V2
1878 static int pim_rcv(struct sk_buff * skb)
1879 {
1880         struct pimreghdr *pim;
1881         struct net *net = dev_net(skb->dev);
1882         struct mr_table *mrt;
1883
1884         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1885                 goto drop;
1886
1887         pim = (struct pimreghdr *)skb_transport_header(skb);
1888         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1889             (pim->flags&PIM_NULL_REGISTER) ||
1890             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1891              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1892                 goto drop;
1893
1894         if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1895                 goto drop;
1896
1897         if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1898 drop:
1899                 kfree_skb(skb);
1900         }
1901         return 0;
1902 }
1903 #endif
1904
1905 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1906                               struct mfc_cache *c, struct rtmsg *rtm)
1907 {
1908         int ct;
1909         struct rtnexthop *nhp;
1910         u8 *b = skb_tail_pointer(skb);
1911         struct rtattr *mp_head;
1912
1913         /* If cache is unresolved, don't try to parse IIF and OIF */
1914         if (c->mfc_parent >= MAXVIFS)
1915                 return -ENOENT;
1916
1917         if (VIF_EXISTS(mrt, c->mfc_parent))
1918                 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1919
1920         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1921
1922         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1923                 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1924                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1925                                 goto rtattr_failure;
1926                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1927                         nhp->rtnh_flags = 0;
1928                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1929                         nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1930                         nhp->rtnh_len = sizeof(*nhp);
1931                 }
1932         }
1933         mp_head->rta_type = RTA_MULTIPATH;
1934         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1935         rtm->rtm_type = RTN_MULTICAST;
1936         return 1;
1937
1938 rtattr_failure:
1939         nlmsg_trim(skb, b);
1940         return -EMSGSIZE;
1941 }
1942
1943 int ipmr_get_route(struct net *net,
1944                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1945 {
1946         int err;
1947         struct mr_table *mrt;
1948         struct mfc_cache *cache;
1949         struct rtable *rt = skb_rtable(skb);
1950
1951         mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1952         if (mrt == NULL)
1953                 return -ENOENT;
1954
1955         read_lock(&mrt_lock);
1956         cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1957
1958         if (cache == NULL) {
1959                 struct sk_buff *skb2;
1960                 struct iphdr *iph;
1961                 struct net_device *dev;
1962                 int vif;
1963
1964                 if (nowait) {
1965                         read_unlock(&mrt_lock);
1966                         return -EAGAIN;
1967                 }
1968
1969                 dev = skb->dev;
1970                 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1971                         read_unlock(&mrt_lock);
1972                         return -ENODEV;
1973                 }
1974                 skb2 = skb_clone(skb, GFP_ATOMIC);
1975                 if (!skb2) {
1976                         read_unlock(&mrt_lock);
1977                         return -ENOMEM;
1978                 }
1979
1980                 skb_push(skb2, sizeof(struct iphdr));
1981                 skb_reset_network_header(skb2);
1982                 iph = ip_hdr(skb2);
1983                 iph->ihl = sizeof(struct iphdr) >> 2;
1984                 iph->saddr = rt->rt_src;
1985                 iph->daddr = rt->rt_dst;
1986                 iph->version = 0;
1987                 err = ipmr_cache_unresolved(mrt, vif, skb2);
1988                 read_unlock(&mrt_lock);
1989                 return err;
1990         }
1991
1992         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1993                 cache->mfc_flags |= MFC_NOTIFY;
1994         err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
1995         read_unlock(&mrt_lock);
1996         return err;
1997 }
1998
1999 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2000                             u32 pid, u32 seq, struct mfc_cache *c)
2001 {
2002         struct nlmsghdr *nlh;
2003         struct rtmsg *rtm;
2004
2005         nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2006         if (nlh == NULL)
2007                 return -EMSGSIZE;
2008
2009         rtm = nlmsg_data(nlh);
2010         rtm->rtm_family   = RTNL_FAMILY_IPMR;
2011         rtm->rtm_dst_len  = 32;
2012         rtm->rtm_src_len  = 32;
2013         rtm->rtm_tos      = 0;
2014         rtm->rtm_table    = mrt->id;
2015         NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2016         rtm->rtm_type     = RTN_MULTICAST;
2017         rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2018         rtm->rtm_protocol = RTPROT_UNSPEC;
2019         rtm->rtm_flags    = 0;
2020
2021         NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2022         NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2023
2024         if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2025                 goto nla_put_failure;
2026
2027         return nlmsg_end(skb, nlh);
2028
2029 nla_put_failure:
2030         nlmsg_cancel(skb, nlh);
2031         return -EMSGSIZE;
2032 }
2033
2034 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2035 {
2036         struct net *net = sock_net(skb->sk);
2037         struct mr_table *mrt;
2038         struct mfc_cache *mfc;
2039         unsigned int t = 0, s_t;
2040         unsigned int h = 0, s_h;
2041         unsigned int e = 0, s_e;
2042
2043         s_t = cb->args[0];
2044         s_h = cb->args[1];
2045         s_e = cb->args[2];
2046
2047         read_lock(&mrt_lock);
2048         ipmr_for_each_table(mrt, net) {
2049                 if (t < s_t)
2050                         goto next_table;
2051                 if (t > s_t)
2052                         s_h = 0;
2053                 for (h = s_h; h < MFC_LINES; h++) {
2054                         list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
2055                                 if (e < s_e)
2056                                         goto next_entry;
2057                                 if (ipmr_fill_mroute(mrt, skb,
2058                                                      NETLINK_CB(cb->skb).pid,
2059                                                      cb->nlh->nlmsg_seq,
2060                                                      mfc) < 0)
2061                                         goto done;
2062 next_entry:
2063                                 e++;
2064                         }
2065                         e = s_e = 0;
2066                 }
2067                 s_h = 0;
2068 next_table:
2069                 t++;
2070         }
2071 done:
2072         read_unlock(&mrt_lock);
2073
2074         cb->args[2] = e;
2075         cb->args[1] = h;
2076         cb->args[0] = t;
2077
2078         return skb->len;
2079 }
2080
2081 #ifdef CONFIG_PROC_FS
2082 /*
2083  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2084  */
2085 struct ipmr_vif_iter {
2086         struct seq_net_private p;
2087         struct mr_table *mrt;
2088         int ct;
2089 };
2090
2091 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2092                                            struct ipmr_vif_iter *iter,
2093                                            loff_t pos)
2094 {
2095         struct mr_table *mrt = iter->mrt;
2096
2097         for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2098                 if (!VIF_EXISTS(mrt, iter->ct))
2099                         continue;
2100                 if (pos-- == 0)
2101                         return &mrt->vif_table[iter->ct];
2102         }
2103         return NULL;
2104 }
2105
2106 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2107         __acquires(mrt_lock)
2108 {
2109         struct ipmr_vif_iter *iter = seq->private;
2110         struct net *net = seq_file_net(seq);
2111         struct mr_table *mrt;
2112
2113         mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2114         if (mrt == NULL)
2115                 return ERR_PTR(-ENOENT);
2116
2117         iter->mrt = mrt;
2118
2119         read_lock(&mrt_lock);
2120         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2121                 : SEQ_START_TOKEN;
2122 }
2123
2124 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2125 {
2126         struct ipmr_vif_iter *iter = seq->private;
2127         struct net *net = seq_file_net(seq);
2128         struct mr_table *mrt = iter->mrt;
2129
2130         ++*pos;
2131         if (v == SEQ_START_TOKEN)
2132                 return ipmr_vif_seq_idx(net, iter, 0);
2133
2134         while (++iter->ct < mrt->maxvif) {
2135                 if (!VIF_EXISTS(mrt, iter->ct))
2136                         continue;
2137                 return &mrt->vif_table[iter->ct];
2138         }
2139         return NULL;
2140 }
2141
2142 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2143         __releases(mrt_lock)
2144 {
2145         read_unlock(&mrt_lock);
2146 }
2147
2148 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2149 {
2150         struct ipmr_vif_iter *iter = seq->private;
2151         struct mr_table *mrt = iter->mrt;
2152
2153         if (v == SEQ_START_TOKEN) {
2154                 seq_puts(seq,
2155                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
2156         } else {
2157                 const struct vif_device *vif = v;
2158                 const char *name =  vif->dev ? vif->dev->name : "none";
2159
2160                 seq_printf(seq,
2161                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
2162                            vif - mrt->vif_table,
2163                            name, vif->bytes_in, vif->pkt_in,
2164                            vif->bytes_out, vif->pkt_out,
2165                            vif->flags, vif->local, vif->remote);
2166         }
2167         return 0;
2168 }
2169
2170 static const struct seq_operations ipmr_vif_seq_ops = {
2171         .start = ipmr_vif_seq_start,
2172         .next  = ipmr_vif_seq_next,
2173         .stop  = ipmr_vif_seq_stop,
2174         .show  = ipmr_vif_seq_show,
2175 };
2176
2177 static int ipmr_vif_open(struct inode *inode, struct file *file)
2178 {
2179         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2180                             sizeof(struct ipmr_vif_iter));
2181 }
2182
2183 static const struct file_operations ipmr_vif_fops = {
2184         .owner   = THIS_MODULE,
2185         .open    = ipmr_vif_open,
2186         .read    = seq_read,
2187         .llseek  = seq_lseek,
2188         .release = seq_release_net,
2189 };
2190
2191 struct ipmr_mfc_iter {
2192         struct seq_net_private p;
2193         struct mr_table *mrt;
2194         struct list_head *cache;
2195         int ct;
2196 };
2197
2198
2199 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2200                                           struct ipmr_mfc_iter *it, loff_t pos)
2201 {
2202         struct mr_table *mrt = it->mrt;
2203         struct mfc_cache *mfc;
2204
2205         read_lock(&mrt_lock);
2206         for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2207                 it->cache = &mrt->mfc_cache_array[it->ct];
2208                 list_for_each_entry(mfc, it->cache, list)
2209                         if (pos-- == 0)
2210                                 return mfc;
2211         }
2212         read_unlock(&mrt_lock);
2213
2214         spin_lock_bh(&mfc_unres_lock);
2215         it->cache = &mrt->mfc_unres_queue;
2216         list_for_each_entry(mfc, it->cache, list)
2217                 if (pos-- == 0)
2218                         return mfc;
2219         spin_unlock_bh(&mfc_unres_lock);
2220
2221         it->cache = NULL;
2222         return NULL;
2223 }
2224
2225
2226 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2227 {
2228         struct ipmr_mfc_iter *it = seq->private;
2229         struct net *net = seq_file_net(seq);
2230         struct mr_table *mrt;
2231
2232         mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2233         if (mrt == NULL)
2234                 return ERR_PTR(-ENOENT);
2235
2236         it->mrt = mrt;
2237         it->cache = NULL;
2238         it->ct = 0;
2239         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2240                 : SEQ_START_TOKEN;
2241 }
2242
2243 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2244 {
2245         struct mfc_cache *mfc = v;
2246         struct ipmr_mfc_iter *it = seq->private;
2247         struct net *net = seq_file_net(seq);
2248         struct mr_table *mrt = it->mrt;
2249
2250         ++*pos;
2251
2252         if (v == SEQ_START_TOKEN)
2253                 return ipmr_mfc_seq_idx(net, seq->private, 0);
2254
2255         if (mfc->list.next != it->cache)
2256                 return list_entry(mfc->list.next, struct mfc_cache, list);
2257
2258         if (it->cache == &mrt->mfc_unres_queue)
2259                 goto end_of_list;
2260
2261         BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2262
2263         while (++it->ct < MFC_LINES) {
2264                 it->cache = &mrt->mfc_cache_array[it->ct];
2265                 if (list_empty(it->cache))
2266                         continue;
2267                 return list_first_entry(it->cache, struct mfc_cache, list);
2268         }
2269
2270         /* exhausted cache_array, show unresolved */
2271         read_unlock(&mrt_lock);
2272         it->cache = &mrt->mfc_unres_queue;
2273         it->ct = 0;
2274
2275         spin_lock_bh(&mfc_unres_lock);
2276         if (!list_empty(it->cache))
2277                 return list_first_entry(it->cache, struct mfc_cache, list);
2278
2279  end_of_list:
2280         spin_unlock_bh(&mfc_unres_lock);
2281         it->cache = NULL;
2282
2283         return NULL;
2284 }
2285
2286 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2287 {
2288         struct ipmr_mfc_iter *it = seq->private;
2289         struct mr_table *mrt = it->mrt;
2290
2291         if (it->cache == &mrt->mfc_unres_queue)
2292                 spin_unlock_bh(&mfc_unres_lock);
2293         else if (it->cache == &mrt->mfc_cache_array[it->ct])
2294                 read_unlock(&mrt_lock);
2295 }
2296
2297 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2298 {
2299         int n;
2300
2301         if (v == SEQ_START_TOKEN) {
2302                 seq_puts(seq,
2303                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
2304         } else {
2305                 const struct mfc_cache *mfc = v;
2306                 const struct ipmr_mfc_iter *it = seq->private;
2307                 const struct mr_table *mrt = it->mrt;
2308
2309                 seq_printf(seq, "%08X %08X %-3hd",
2310                            (__force u32) mfc->mfc_mcastgrp,
2311                            (__force u32) mfc->mfc_origin,
2312                            mfc->mfc_parent);
2313
2314                 if (it->cache != &mrt->mfc_unres_queue) {
2315                         seq_printf(seq, " %8lu %8lu %8lu",
2316                                    mfc->mfc_un.res.pkt,
2317                                    mfc->mfc_un.res.bytes,
2318                                    mfc->mfc_un.res.wrong_if);
2319                         for (n = mfc->mfc_un.res.minvif;
2320                              n < mfc->mfc_un.res.maxvif; n++ ) {
2321                                 if (VIF_EXISTS(mrt, n) &&
2322                                     mfc->mfc_un.res.ttls[n] < 255)
2323                                         seq_printf(seq,
2324                                            " %2d:%-3d",
2325                                            n, mfc->mfc_un.res.ttls[n]);
2326                         }
2327                 } else {
2328                         /* unresolved mfc_caches don't contain
2329                          * pkt, bytes and wrong_if values
2330                          */
2331                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2332                 }
2333                 seq_putc(seq, '\n');
2334         }
2335         return 0;
2336 }
2337
2338 static const struct seq_operations ipmr_mfc_seq_ops = {
2339         .start = ipmr_mfc_seq_start,
2340         .next  = ipmr_mfc_seq_next,
2341         .stop  = ipmr_mfc_seq_stop,
2342         .show  = ipmr_mfc_seq_show,
2343 };
2344
2345 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2346 {
2347         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2348                             sizeof(struct ipmr_mfc_iter));
2349 }
2350
2351 static const struct file_operations ipmr_mfc_fops = {
2352         .owner   = THIS_MODULE,
2353         .open    = ipmr_mfc_open,
2354         .read    = seq_read,
2355         .llseek  = seq_lseek,
2356         .release = seq_release_net,
2357 };
2358 #endif
2359
2360 #ifdef CONFIG_IP_PIMSM_V2
2361 static const struct net_protocol pim_protocol = {
2362         .handler        =       pim_rcv,
2363         .netns_ok       =       1,
2364 };
2365 #endif
2366
2367
2368 /*
2369  *      Setup for IP multicast routing
2370  */
2371 static int __net_init ipmr_net_init(struct net *net)
2372 {
2373         int err;
2374
2375         err = ipmr_rules_init(net);
2376         if (err < 0)
2377                 goto fail;
2378
2379 #ifdef CONFIG_PROC_FS
2380         err = -ENOMEM;
2381         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2382                 goto proc_vif_fail;
2383         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2384                 goto proc_cache_fail;
2385 #endif
2386         return 0;
2387
2388 #ifdef CONFIG_PROC_FS
2389 proc_cache_fail:
2390         proc_net_remove(net, "ip_mr_vif");
2391 proc_vif_fail:
2392         ipmr_rules_exit(net);
2393 #endif
2394 fail:
2395         return err;
2396 }
2397
2398 static void __net_exit ipmr_net_exit(struct net *net)
2399 {
2400 #ifdef CONFIG_PROC_FS
2401         proc_net_remove(net, "ip_mr_cache");
2402         proc_net_remove(net, "ip_mr_vif");
2403 #endif
2404         ipmr_rules_exit(net);
2405 }
2406
2407 static struct pernet_operations ipmr_net_ops = {
2408         .init = ipmr_net_init,
2409         .exit = ipmr_net_exit,
2410 };
2411
2412 int __init ip_mr_init(void)
2413 {
2414         int err;
2415
2416         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2417                                        sizeof(struct mfc_cache),
2418                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2419                                        NULL);
2420         if (!mrt_cachep)
2421                 return -ENOMEM;
2422
2423         err = register_pernet_subsys(&ipmr_net_ops);
2424         if (err)
2425                 goto reg_pernet_fail;
2426
2427         err = register_netdevice_notifier(&ip_mr_notifier);
2428         if (err)
2429                 goto reg_notif_fail;
2430 #ifdef CONFIG_IP_PIMSM_V2
2431         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2432                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2433                 err = -EAGAIN;
2434                 goto add_proto_fail;
2435         }
2436 #endif
2437         rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2438         return 0;
2439
2440 #ifdef CONFIG_IP_PIMSM_V2
2441 add_proto_fail:
2442         unregister_netdevice_notifier(&ip_mr_notifier);
2443 #endif
2444 reg_notif_fail:
2445         unregister_pernet_subsys(&ipmr_net_ops);
2446 reg_pernet_fail:
2447         kmem_cache_destroy(mrt_cachep);
2448         return err;
2449 }