2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requrement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
73 struct list_head list;
75 struct sock *mroute_sk;
76 struct timer_list ipmr_expire_timer;
77 struct list_head mfc_unres_queue;
78 struct list_head mfc_cache_array[MFC_LINES];
79 struct vif_device vif_table[MAXVIFS];
81 atomic_t cache_resolve_queue_len;
84 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
85 int mroute_reg_vif_num;
90 struct fib_rule common;
97 /* Big lock, protecting vif table, mrt cache and mroute socket state.
98 Note that the changes are semaphored via rtnl_lock.
101 static DEFINE_RWLOCK(mrt_lock);
104 * Multicast router control variables
107 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
109 /* Special spinlock for queue of unresolved entries */
110 static DEFINE_SPINLOCK(mfc_unres_lock);
112 /* We return to original Alan's scheme. Hash table of resolved
113 entries is changed only in process context and protected
114 with weak lock mrt_lock. Queue of unresolved entries is protected
115 with strong spinlock mfc_unres_lock.
117 In this case data path is free of exclusive locks at all.
120 static struct kmem_cache *mrt_cachep __read_mostly;
122 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
123 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
124 struct sk_buff *skb, struct mfc_cache *cache,
126 static int ipmr_cache_report(struct mr_table *mrt,
127 struct sk_buff *pkt, vifi_t vifi, int assert);
128 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
129 struct mfc_cache *c, struct rtmsg *rtm);
130 static void ipmr_expire_process(unsigned long arg);
132 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
133 #define ipmr_for_each_table(mrt, net) \
134 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
136 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
138 struct mr_table *mrt;
140 ipmr_for_each_table(mrt, net) {
147 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
148 struct mr_table **mrt)
150 struct ipmr_result res;
151 struct fib_lookup_arg arg = { .result = &res, };
154 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
161 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
162 int flags, struct fib_lookup_arg *arg)
164 struct ipmr_result *res = arg->result;
165 struct mr_table *mrt;
167 switch (rule->action) {
170 case FR_ACT_UNREACHABLE:
172 case FR_ACT_PROHIBIT:
174 case FR_ACT_BLACKHOLE:
179 mrt = ipmr_get_table(rule->fr_net, rule->table);
186 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
191 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
195 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
196 struct fib_rule_hdr *frh, struct nlattr **tb)
201 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
207 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
208 struct fib_rule_hdr *frh)
216 static struct fib_rules_ops ipmr_rules_ops_template = {
217 .family = FIB_RULES_IPMR,
218 .rule_size = sizeof(struct ipmr_rule),
219 .addr_size = sizeof(u32),
220 .action = ipmr_rule_action,
221 .match = ipmr_rule_match,
222 .configure = ipmr_rule_configure,
223 .compare = ipmr_rule_compare,
224 .default_pref = fib_default_rule_pref,
225 .fill = ipmr_rule_fill,
226 .nlgroup = RTNLGRP_IPV4_RULE,
227 .policy = ipmr_rule_policy,
228 .owner = THIS_MODULE,
231 static int __net_init ipmr_rules_init(struct net *net)
233 struct fib_rules_ops *ops;
234 struct mr_table *mrt;
237 ops = fib_rules_register(&ipmr_rules_ops_template, net);
241 INIT_LIST_HEAD(&net->ipv4.mr_tables);
243 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
249 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
253 net->ipv4.mr_rules_ops = ops;
259 fib_rules_unregister(ops);
263 static void __net_exit ipmr_rules_exit(struct net *net)
265 struct mr_table *mrt, *next;
267 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
269 fib_rules_unregister(net->ipv4.mr_rules_ops);
272 #define ipmr_for_each_table(mrt, net) \
273 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
275 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
277 return net->ipv4.mrt;
280 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
281 struct mr_table **mrt)
283 *mrt = net->ipv4.mrt;
287 static int __net_init ipmr_rules_init(struct net *net)
289 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
290 return net->ipv4.mrt ? 0 : -ENOMEM;
293 static void __net_exit ipmr_rules_exit(struct net *net)
295 kfree(net->ipv4.mrt);
299 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
301 struct mr_table *mrt;
304 mrt = ipmr_get_table(net, id);
308 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
313 /* Forwarding cache */
314 for (i = 0; i < MFC_LINES; i++)
315 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
317 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
319 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
322 #ifdef CONFIG_IP_PIMSM
323 mrt->mroute_reg_vif_num = -1;
325 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
326 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
331 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
333 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
335 struct net *net = dev_net(dev);
339 dev = __dev_get_by_name(net, "tunl0");
341 const struct net_device_ops *ops = dev->netdev_ops;
343 struct ip_tunnel_parm p;
345 memset(&p, 0, sizeof(p));
346 p.iph.daddr = v->vifc_rmt_addr.s_addr;
347 p.iph.saddr = v->vifc_lcl_addr.s_addr;
350 p.iph.protocol = IPPROTO_IPIP;
351 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
352 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
354 if (ops->ndo_do_ioctl) {
355 mm_segment_t oldfs = get_fs();
358 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
365 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
367 struct net_device *dev;
369 dev = __dev_get_by_name(net, "tunl0");
372 const struct net_device_ops *ops = dev->netdev_ops;
375 struct ip_tunnel_parm p;
376 struct in_device *in_dev;
378 memset(&p, 0, sizeof(p));
379 p.iph.daddr = v->vifc_rmt_addr.s_addr;
380 p.iph.saddr = v->vifc_lcl_addr.s_addr;
383 p.iph.protocol = IPPROTO_IPIP;
384 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
385 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
387 if (ops->ndo_do_ioctl) {
388 mm_segment_t oldfs = get_fs();
391 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
399 (dev = __dev_get_by_name(net, p.name)) != NULL) {
400 dev->flags |= IFF_MULTICAST;
402 in_dev = __in_dev_get_rtnl(dev);
406 ipv4_devconf_setall(in_dev);
407 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
417 /* allow the register to be completed before unregistering. */
421 unregister_netdevice(dev);
425 #ifdef CONFIG_IP_PIMSM
427 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
429 struct net *net = dev_net(dev);
430 struct mr_table *mrt;
438 err = ipmr_fib_lookup(net, &fl, &mrt);
442 read_lock(&mrt_lock);
443 dev->stats.tx_bytes += skb->len;
444 dev->stats.tx_packets++;
445 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
446 read_unlock(&mrt_lock);
451 static const struct net_device_ops reg_vif_netdev_ops = {
452 .ndo_start_xmit = reg_vif_xmit,
455 static void reg_vif_setup(struct net_device *dev)
457 dev->type = ARPHRD_PIMREG;
458 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
459 dev->flags = IFF_NOARP;
460 dev->netdev_ops = ®_vif_netdev_ops,
461 dev->destructor = free_netdev;
462 dev->features |= NETIF_F_NETNS_LOCAL;
465 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
467 struct net_device *dev;
468 struct in_device *in_dev;
471 if (mrt->id == RT_TABLE_DEFAULT)
472 sprintf(name, "pimreg");
474 sprintf(name, "pimreg%u", mrt->id);
476 dev = alloc_netdev(0, name, reg_vif_setup);
481 dev_net_set(dev, net);
483 if (register_netdevice(dev)) {
490 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
495 ipv4_devconf_setall(in_dev);
496 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
507 /* allow the register to be completed before unregistering. */
511 unregister_netdevice(dev);
518 * @notify: Set to 1, if the caller is a notifier_call
521 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
522 struct list_head *head)
524 struct vif_device *v;
525 struct net_device *dev;
526 struct in_device *in_dev;
528 if (vifi < 0 || vifi >= mrt->maxvif)
529 return -EADDRNOTAVAIL;
531 v = &mrt->vif_table[vifi];
533 write_lock_bh(&mrt_lock);
538 write_unlock_bh(&mrt_lock);
539 return -EADDRNOTAVAIL;
542 #ifdef CONFIG_IP_PIMSM
543 if (vifi == mrt->mroute_reg_vif_num)
544 mrt->mroute_reg_vif_num = -1;
547 if (vifi+1 == mrt->maxvif) {
549 for (tmp=vifi-1; tmp>=0; tmp--) {
550 if (VIF_EXISTS(mrt, tmp))
556 write_unlock_bh(&mrt_lock);
558 dev_set_allmulti(dev, -1);
560 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
561 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
562 ip_rt_multicast_event(in_dev);
565 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
566 unregister_netdevice_queue(dev, head);
572 static inline void ipmr_cache_free(struct mfc_cache *c)
574 kmem_cache_free(mrt_cachep, c);
577 /* Destroy an unresolved cache entry, killing queued skbs
578 and reporting error to netlink readers.
581 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
583 struct net *net = NULL; //mrt->net;
587 atomic_dec(&mrt->cache_resolve_queue_len);
589 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
590 if (ip_hdr(skb)->version == 0) {
591 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
592 nlh->nlmsg_type = NLMSG_ERROR;
593 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
594 skb_trim(skb, nlh->nlmsg_len);
596 e->error = -ETIMEDOUT;
597 memset(&e->msg, 0, sizeof(e->msg));
599 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
608 /* Timer process for the unresolved queue. */
610 static void ipmr_expire_process(unsigned long arg)
612 struct mr_table *mrt = (struct mr_table *)arg;
614 unsigned long expires;
615 struct mfc_cache *c, *next;
617 if (!spin_trylock(&mfc_unres_lock)) {
618 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
622 if (list_empty(&mrt->mfc_unres_queue))
628 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
629 if (time_after(c->mfc_un.unres.expires, now)) {
630 unsigned long interval = c->mfc_un.unres.expires - now;
631 if (interval < expires)
637 ipmr_destroy_unres(mrt, c);
640 if (!list_empty(&mrt->mfc_unres_queue))
641 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
644 spin_unlock(&mfc_unres_lock);
647 /* Fill oifs list. It is called under write locked mrt_lock. */
649 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
654 cache->mfc_un.res.minvif = MAXVIFS;
655 cache->mfc_un.res.maxvif = 0;
656 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
658 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
659 if (VIF_EXISTS(mrt, vifi) &&
660 ttls[vifi] && ttls[vifi] < 255) {
661 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
662 if (cache->mfc_un.res.minvif > vifi)
663 cache->mfc_un.res.minvif = vifi;
664 if (cache->mfc_un.res.maxvif <= vifi)
665 cache->mfc_un.res.maxvif = vifi + 1;
670 static int vif_add(struct net *net, struct mr_table *mrt,
671 struct vifctl *vifc, int mrtsock)
673 int vifi = vifc->vifc_vifi;
674 struct vif_device *v = &mrt->vif_table[vifi];
675 struct net_device *dev;
676 struct in_device *in_dev;
680 if (VIF_EXISTS(mrt, vifi))
683 switch (vifc->vifc_flags) {
684 #ifdef CONFIG_IP_PIMSM
687 * Special Purpose VIF in PIM
688 * All the packets will be sent to the daemon
690 if (mrt->mroute_reg_vif_num >= 0)
692 dev = ipmr_reg_vif(net, mrt);
695 err = dev_set_allmulti(dev, 1);
697 unregister_netdevice(dev);
704 dev = ipmr_new_tunnel(net, vifc);
707 err = dev_set_allmulti(dev, 1);
709 ipmr_del_tunnel(dev, vifc);
715 case VIFF_USE_IFINDEX:
717 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
718 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
719 if (dev && dev->ip_ptr == NULL) {
721 return -EADDRNOTAVAIL;
724 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
727 return -EADDRNOTAVAIL;
728 err = dev_set_allmulti(dev, 1);
738 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
740 return -EADDRNOTAVAIL;
742 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
743 ip_rt_multicast_event(in_dev);
746 * Fill in the VIF structures
748 v->rate_limit = vifc->vifc_rate_limit;
749 v->local = vifc->vifc_lcl_addr.s_addr;
750 v->remote = vifc->vifc_rmt_addr.s_addr;
751 v->flags = vifc->vifc_flags;
753 v->flags |= VIFF_STATIC;
754 v->threshold = vifc->vifc_threshold;
759 v->link = dev->ifindex;
760 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
761 v->link = dev->iflink;
763 /* And finish update writing critical data */
764 write_lock_bh(&mrt_lock);
766 #ifdef CONFIG_IP_PIMSM
767 if (v->flags&VIFF_REGISTER)
768 mrt->mroute_reg_vif_num = vifi;
770 if (vifi+1 > mrt->maxvif)
771 mrt->maxvif = vifi+1;
772 write_unlock_bh(&mrt_lock);
776 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
780 int line = MFC_HASH(mcastgrp, origin);
783 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
784 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
791 * Allocate a multicast cache entry
793 static struct mfc_cache *ipmr_cache_alloc(void)
795 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
798 c->mfc_un.res.minvif = MAXVIFS;
802 static struct mfc_cache *ipmr_cache_alloc_unres(void)
804 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
807 skb_queue_head_init(&c->mfc_un.unres.unresolved);
808 c->mfc_un.unres.expires = jiffies + 10*HZ;
813 * A cache entry has gone into a resolved state from queued
816 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
817 struct mfc_cache *uc, struct mfc_cache *c)
823 * Play the pending entries through our router
826 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
827 if (ip_hdr(skb)->version == 0) {
828 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
830 if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
831 nlh->nlmsg_len = (skb_tail_pointer(skb) -
834 nlh->nlmsg_type = NLMSG_ERROR;
835 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
836 skb_trim(skb, nlh->nlmsg_len);
838 e->error = -EMSGSIZE;
839 memset(&e->msg, 0, sizeof(e->msg));
842 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
844 ip_mr_forward(net, mrt, skb, c, 0);
849 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
850 * expects the following bizarre scheme.
852 * Called under mrt_lock.
855 static int ipmr_cache_report(struct mr_table *mrt,
856 struct sk_buff *pkt, vifi_t vifi, int assert)
859 const int ihl = ip_hdrlen(pkt);
860 struct igmphdr *igmp;
864 #ifdef CONFIG_IP_PIMSM
865 if (assert == IGMPMSG_WHOLEPKT)
866 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
869 skb = alloc_skb(128, GFP_ATOMIC);
874 #ifdef CONFIG_IP_PIMSM
875 if (assert == IGMPMSG_WHOLEPKT) {
876 /* Ugly, but we have no choice with this interface.
877 Duplicate old header, fix ihl, length etc.
878 And all this only to mangle msg->im_msgtype and
879 to set msg->im_mbz to "mbz" :-)
881 skb_push(skb, sizeof(struct iphdr));
882 skb_reset_network_header(skb);
883 skb_reset_transport_header(skb);
884 msg = (struct igmpmsg *)skb_network_header(skb);
885 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
886 msg->im_msgtype = IGMPMSG_WHOLEPKT;
888 msg->im_vif = mrt->mroute_reg_vif_num;
889 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
890 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
891 sizeof(struct iphdr));
900 skb->network_header = skb->tail;
902 skb_copy_to_linear_data(skb, pkt->data, ihl);
903 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
904 msg = (struct igmpmsg *)skb_network_header(skb);
906 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
912 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
914 msg->im_msgtype = assert;
916 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
917 skb->transport_header = skb->network_header;
920 if (mrt->mroute_sk == NULL) {
928 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
931 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
939 * Queue a packet for resolution. It gets locked cache entry!
943 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
948 const struct iphdr *iph = ip_hdr(skb);
950 spin_lock_bh(&mfc_unres_lock);
951 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
952 if (c->mfc_mcastgrp == iph->daddr &&
953 c->mfc_origin == iph->saddr) {
961 * Create a new entry if allowable
964 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
965 (c = ipmr_cache_alloc_unres()) == NULL) {
966 spin_unlock_bh(&mfc_unres_lock);
973 * Fill in the new cache entry
976 c->mfc_origin = iph->saddr;
977 c->mfc_mcastgrp = iph->daddr;
980 * Reflect first query at mrouted.
982 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
984 /* If the report failed throw the cache entry
987 spin_unlock_bh(&mfc_unres_lock);
994 atomic_inc(&mrt->cache_resolve_queue_len);
995 list_add(&c->list, &mrt->mfc_unres_queue);
997 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1001 * See if we can append the packet
1003 if (c->mfc_un.unres.unresolved.qlen>3) {
1007 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1011 spin_unlock_bh(&mfc_unres_lock);
1016 * MFC cache manipulation by user space mroute daemon
1019 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1022 struct mfc_cache *c, *next;
1024 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1026 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1027 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1028 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1029 write_lock_bh(&mrt_lock);
1031 write_unlock_bh(&mrt_lock);
1040 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1041 struct mfcctl *mfc, int mrtsock)
1045 struct mfc_cache *uc, *c;
1047 if (mfc->mfcc_parent >= MAXVIFS)
1050 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1052 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1053 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1054 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1061 write_lock_bh(&mrt_lock);
1062 c->mfc_parent = mfc->mfcc_parent;
1063 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1065 c->mfc_flags |= MFC_STATIC;
1066 write_unlock_bh(&mrt_lock);
1070 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1073 c = ipmr_cache_alloc();
1077 c->mfc_origin = mfc->mfcc_origin.s_addr;
1078 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1079 c->mfc_parent = mfc->mfcc_parent;
1080 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1082 c->mfc_flags |= MFC_STATIC;
1084 write_lock_bh(&mrt_lock);
1085 list_add(&c->list, &mrt->mfc_cache_array[line]);
1086 write_unlock_bh(&mrt_lock);
1089 * Check to see if we resolved a queued list. If so we
1090 * need to send on the frames and tidy up.
1092 spin_lock_bh(&mfc_unres_lock);
1093 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1094 if (uc->mfc_origin == c->mfc_origin &&
1095 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1096 list_del(&uc->list);
1097 atomic_dec(&mrt->cache_resolve_queue_len);
1101 if (list_empty(&mrt->mfc_unres_queue))
1102 del_timer(&mrt->ipmr_expire_timer);
1103 spin_unlock_bh(&mfc_unres_lock);
1106 ipmr_cache_resolve(net, mrt, uc, c);
1107 ipmr_cache_free(uc);
1113 * Close the multicast socket, and clear the vif tables etc
1116 static void mroute_clean_tables(struct mr_table *mrt)
1120 struct mfc_cache *c, *next;
1123 * Shut down all active vif entries
1125 for (i = 0; i < mrt->maxvif; i++) {
1126 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1127 vif_delete(mrt, i, 0, &list);
1129 unregister_netdevice_many(&list);
1134 for (i = 0; i < MFC_LINES; i++) {
1135 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1136 if (c->mfc_flags&MFC_STATIC)
1138 write_lock_bh(&mrt_lock);
1140 write_unlock_bh(&mrt_lock);
1146 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1147 spin_lock_bh(&mfc_unres_lock);
1148 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1150 ipmr_destroy_unres(mrt, c);
1152 spin_unlock_bh(&mfc_unres_lock);
1156 static void mrtsock_destruct(struct sock *sk)
1158 struct net *net = sock_net(sk);
1159 struct mr_table *mrt;
1162 ipmr_for_each_table(mrt, net) {
1163 if (sk == mrt->mroute_sk) {
1164 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1166 write_lock_bh(&mrt_lock);
1167 mrt->mroute_sk = NULL;
1168 write_unlock_bh(&mrt_lock);
1170 mroute_clean_tables(mrt);
1177 * Socket options and virtual interface manipulation. The whole
1178 * virtual interface system is a complete heap, but unfortunately
1179 * that's how BSD mrouted happens to think. Maybe one day with a proper
1180 * MOSPF/PIM router set up we can clean this up.
1183 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1188 struct net *net = sock_net(sk);
1189 struct mr_table *mrt;
1191 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1195 if (optname != MRT_INIT) {
1196 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
1202 if (sk->sk_type != SOCK_RAW ||
1203 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1205 if (optlen != sizeof(int))
1206 return -ENOPROTOOPT;
1209 if (mrt->mroute_sk) {
1214 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1216 write_lock_bh(&mrt_lock);
1217 mrt->mroute_sk = sk;
1218 write_unlock_bh(&mrt_lock);
1220 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1225 if (sk != mrt->mroute_sk)
1227 return ip_ra_control(sk, 0, NULL);
1230 if (optlen != sizeof(vif))
1232 if (copy_from_user(&vif, optval, sizeof(vif)))
1234 if (vif.vifc_vifi >= MAXVIFS)
1237 if (optname == MRT_ADD_VIF) {
1238 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
1240 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1246 * Manipulate the forwarding caches. These live
1247 * in a sort of kernel/user symbiosis.
1251 if (optlen != sizeof(mfc))
1253 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1256 if (optname == MRT_DEL_MFC)
1257 ret = ipmr_mfc_delete(mrt, &mfc);
1259 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
1263 * Control PIM assert.
1268 if (get_user(v,(int __user *)optval))
1270 mrt->mroute_do_assert = (v) ? 1 : 0;
1273 #ifdef CONFIG_IP_PIMSM
1278 if (get_user(v,(int __user *)optval))
1284 if (v != mrt->mroute_do_pim) {
1285 mrt->mroute_do_pim = v;
1286 mrt->mroute_do_assert = v;
1292 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1297 if (optlen != sizeof(u32))
1299 if (get_user(v, (u32 __user *)optval))
1301 if (sk == mrt->mroute_sk)
1306 if (!ipmr_new_table(net, v))
1308 raw_sk(sk)->ipmr_table = v;
1314 * Spurious command, or MRT_VERSION which you cannot
1318 return -ENOPROTOOPT;
1323 * Getsock opt support for the multicast routing system.
1326 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1330 struct net *net = sock_net(sk);
1331 struct mr_table *mrt;
1333 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1337 if (optname != MRT_VERSION &&
1338 #ifdef CONFIG_IP_PIMSM
1341 optname!=MRT_ASSERT)
1342 return -ENOPROTOOPT;
1344 if (get_user(olr, optlen))
1347 olr = min_t(unsigned int, olr, sizeof(int));
1351 if (put_user(olr, optlen))
1353 if (optname == MRT_VERSION)
1355 #ifdef CONFIG_IP_PIMSM
1356 else if (optname == MRT_PIM)
1357 val = mrt->mroute_do_pim;
1360 val = mrt->mroute_do_assert;
1361 if (copy_to_user(optval, &val, olr))
1367 * The IP multicast ioctl support routines.
1370 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1372 struct sioc_sg_req sr;
1373 struct sioc_vif_req vr;
1374 struct vif_device *vif;
1375 struct mfc_cache *c;
1376 struct net *net = sock_net(sk);
1377 struct mr_table *mrt;
1379 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1385 if (copy_from_user(&vr, arg, sizeof(vr)))
1387 if (vr.vifi >= mrt->maxvif)
1389 read_lock(&mrt_lock);
1390 vif = &mrt->vif_table[vr.vifi];
1391 if (VIF_EXISTS(mrt, vr.vifi)) {
1392 vr.icount = vif->pkt_in;
1393 vr.ocount = vif->pkt_out;
1394 vr.ibytes = vif->bytes_in;
1395 vr.obytes = vif->bytes_out;
1396 read_unlock(&mrt_lock);
1398 if (copy_to_user(arg, &vr, sizeof(vr)))
1402 read_unlock(&mrt_lock);
1403 return -EADDRNOTAVAIL;
1405 if (copy_from_user(&sr, arg, sizeof(sr)))
1408 read_lock(&mrt_lock);
1409 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1411 sr.pktcnt = c->mfc_un.res.pkt;
1412 sr.bytecnt = c->mfc_un.res.bytes;
1413 sr.wrong_if = c->mfc_un.res.wrong_if;
1414 read_unlock(&mrt_lock);
1416 if (copy_to_user(arg, &sr, sizeof(sr)))
1420 read_unlock(&mrt_lock);
1421 return -EADDRNOTAVAIL;
1423 return -ENOIOCTLCMD;
1428 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1430 struct net_device *dev = ptr;
1431 struct net *net = dev_net(dev);
1432 struct mr_table *mrt;
1433 struct vif_device *v;
1437 if (event != NETDEV_UNREGISTER)
1440 ipmr_for_each_table(mrt, net) {
1441 v = &mrt->vif_table[0];
1442 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1444 vif_delete(mrt, ct, 1, &list);
1447 unregister_netdevice_many(&list);
1452 static struct notifier_block ip_mr_notifier = {
1453 .notifier_call = ipmr_device_event,
1457 * Encapsulate a packet by attaching a valid IPIP header to it.
1458 * This avoids tunnel drivers and other mess and gives us the speed so
1459 * important for multicast video.
1462 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1465 struct iphdr *old_iph = ip_hdr(skb);
1467 skb_push(skb, sizeof(struct iphdr));
1468 skb->transport_header = skb->network_header;
1469 skb_reset_network_header(skb);
1473 iph->tos = old_iph->tos;
1474 iph->ttl = old_iph->ttl;
1478 iph->protocol = IPPROTO_IPIP;
1480 iph->tot_len = htons(skb->len);
1481 ip_select_ident(iph, skb_dst(skb), NULL);
1484 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1488 static inline int ipmr_forward_finish(struct sk_buff *skb)
1490 struct ip_options * opt = &(IPCB(skb)->opt);
1492 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1494 if (unlikely(opt->optlen))
1495 ip_forward_options(skb);
1497 return dst_output(skb);
1501 * Processing handlers for ipmr_forward
1504 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1505 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1507 const struct iphdr *iph = ip_hdr(skb);
1508 struct vif_device *vif = &mrt->vif_table[vifi];
1509 struct net_device *dev;
1513 if (vif->dev == NULL)
1516 #ifdef CONFIG_IP_PIMSM
1517 if (vif->flags & VIFF_REGISTER) {
1519 vif->bytes_out += skb->len;
1520 vif->dev->stats.tx_bytes += skb->len;
1521 vif->dev->stats.tx_packets++;
1522 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1527 if (vif->flags&VIFF_TUNNEL) {
1528 struct flowi fl = { .oif = vif->link,
1530 { .daddr = vif->remote,
1531 .saddr = vif->local,
1532 .tos = RT_TOS(iph->tos) } },
1533 .proto = IPPROTO_IPIP };
1534 if (ip_route_output_key(net, &rt, &fl))
1536 encap = sizeof(struct iphdr);
1538 struct flowi fl = { .oif = vif->link,
1540 { .daddr = iph->daddr,
1541 .tos = RT_TOS(iph->tos) } },
1542 .proto = IPPROTO_IPIP };
1543 if (ip_route_output_key(net, &rt, &fl))
1547 dev = rt->u.dst.dev;
1549 if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1550 /* Do not fragment multicasts. Alas, IPv4 does not
1551 allow to send ICMP, so that packets will disappear
1555 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1560 encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1562 if (skb_cow(skb, encap)) {
1568 vif->bytes_out += skb->len;
1571 skb_dst_set(skb, &rt->u.dst);
1572 ip_decrease_ttl(ip_hdr(skb));
1574 /* FIXME: forward and output firewalls used to be called here.
1575 * What do we do with netfilter? -- RR */
1576 if (vif->flags & VIFF_TUNNEL) {
1577 ip_encap(skb, vif->local, vif->remote);
1578 /* FIXME: extra output firewall step used to be here. --RR */
1579 vif->dev->stats.tx_packets++;
1580 vif->dev->stats.tx_bytes += skb->len;
1583 IPCB(skb)->flags |= IPSKB_FORWARDED;
1586 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1587 * not only before forwarding, but after forwarding on all output
1588 * interfaces. It is clear, if mrouter runs a multicasting
1589 * program, it should receive packets not depending to what interface
1590 * program is joined.
1591 * If we will not make it, the program will have to join on all
1592 * interfaces. On the other hand, multihoming host (or router, but
1593 * not mrouter) cannot join to more than one interface - it will
1594 * result in receiving multiple packets.
1596 NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1597 ipmr_forward_finish);
1605 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1609 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1610 if (mrt->vif_table[ct].dev == dev)
1616 /* "local" means that we should preserve one skb (for local delivery) */
1618 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1619 struct sk_buff *skb, struct mfc_cache *cache,
1625 vif = cache->mfc_parent;
1626 cache->mfc_un.res.pkt++;
1627 cache->mfc_un.res.bytes += skb->len;
1630 * Wrong interface: drop packet and (maybe) send PIM assert.
1632 if (mrt->vif_table[vif].dev != skb->dev) {
1635 if (skb_rtable(skb)->fl.iif == 0) {
1636 /* It is our own packet, looped back.
1637 Very complicated situation...
1639 The best workaround until routing daemons will be
1640 fixed is not to redistribute packet, if it was
1641 send through wrong interface. It means, that
1642 multicast applications WILL NOT work for
1643 (S,G), which have default multicast route pointing
1644 to wrong oif. In any case, it is not a good
1645 idea to use multicasting applications on router.
1650 cache->mfc_un.res.wrong_if++;
1651 true_vifi = ipmr_find_vif(mrt, skb->dev);
1653 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1654 /* pimsm uses asserts, when switching from RPT to SPT,
1655 so that we cannot check that packet arrived on an oif.
1656 It is bad, but otherwise we would need to move pretty
1657 large chunk of pimd to kernel. Ough... --ANK
1659 (mrt->mroute_do_pim ||
1660 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1662 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1663 cache->mfc_un.res.last_assert = jiffies;
1664 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1669 mrt->vif_table[vif].pkt_in++;
1670 mrt->vif_table[vif].bytes_in += skb->len;
1675 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1676 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1678 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1680 ipmr_queue_xmit(net, mrt, skb2, cache,
1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1690 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1692 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1705 * Multicast packets for forwarding arrive here
1708 int ip_mr_input(struct sk_buff *skb)
1710 struct mfc_cache *cache;
1711 struct net *net = dev_net(skb->dev);
1712 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1713 struct mr_table *mrt;
1716 /* Packet is looped back after forward, it should not be
1717 forwarded second time, but still can be delivered locally.
1719 if (IPCB(skb)->flags&IPSKB_FORWARDED)
1722 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1727 if (IPCB(skb)->opt.router_alert) {
1728 if (ip_call_ra_chain(skb))
1730 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1731 /* IGMPv1 (and broken IGMPv2 implementations sort of
1732 Cisco IOS <= 11.2(8)) do not put router alert
1733 option to IGMP packets destined to routable
1734 groups. It is very bad, because it means
1735 that we can forward NO IGMP messages.
1737 read_lock(&mrt_lock);
1738 if (mrt->mroute_sk) {
1740 raw_rcv(mrt->mroute_sk, skb);
1741 read_unlock(&mrt_lock);
1744 read_unlock(&mrt_lock);
1748 read_lock(&mrt_lock);
1749 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1752 * No usable cache entry
1754 if (cache == NULL) {
1758 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1759 ip_local_deliver(skb);
1761 read_unlock(&mrt_lock);
1767 vif = ipmr_find_vif(mrt, skb->dev);
1769 int err = ipmr_cache_unresolved(mrt, vif, skb);
1770 read_unlock(&mrt_lock);
1774 read_unlock(&mrt_lock);
1779 ip_mr_forward(net, mrt, skb, cache, local);
1781 read_unlock(&mrt_lock);
1784 return ip_local_deliver(skb);
1790 return ip_local_deliver(skb);
1795 #ifdef CONFIG_IP_PIMSM
1796 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1797 unsigned int pimlen)
1799 struct net_device *reg_dev = NULL;
1800 struct iphdr *encap;
1802 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1805 a. packet is really destinted to a multicast group
1806 b. packet is not a NULL-REGISTER
1807 c. packet is not truncated
1809 if (!ipv4_is_multicast(encap->daddr) ||
1810 encap->tot_len == 0 ||
1811 ntohs(encap->tot_len) + pimlen > skb->len)
1814 read_lock(&mrt_lock);
1815 if (mrt->mroute_reg_vif_num >= 0)
1816 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1819 read_unlock(&mrt_lock);
1821 if (reg_dev == NULL)
1824 skb->mac_header = skb->network_header;
1825 skb_pull(skb, (u8*)encap - skb->data);
1826 skb_reset_network_header(skb);
1828 skb->protocol = htons(ETH_P_IP);
1830 skb->pkt_type = PACKET_HOST;
1832 reg_dev->stats.rx_bytes += skb->len;
1833 reg_dev->stats.rx_packets++;
1842 #ifdef CONFIG_IP_PIMSM_V1
1844 * Handle IGMP messages of PIMv1
1847 int pim_rcv_v1(struct sk_buff * skb)
1849 struct igmphdr *pim;
1850 struct net *net = dev_net(skb->dev);
1851 struct mr_table *mrt;
1853 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1856 pim = igmp_hdr(skb);
1858 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1861 if (!mrt->mroute_do_pim ||
1862 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1865 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1873 #ifdef CONFIG_IP_PIMSM_V2
1874 static int pim_rcv(struct sk_buff * skb)
1876 struct pimreghdr *pim;
1877 struct net *net = dev_net(skb->dev);
1878 struct mr_table *mrt;
1880 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1883 pim = (struct pimreghdr *)skb_transport_header(skb);
1884 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1885 (pim->flags&PIM_NULL_REGISTER) ||
1886 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1887 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1890 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1893 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1902 ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
1906 struct rtnexthop *nhp;
1907 u8 *b = skb_tail_pointer(skb);
1908 struct rtattr *mp_head;
1910 /* If cache is unresolved, don't try to parse IIF and OIF */
1911 if (c->mfc_parent > MAXVIFS)
1914 if (VIF_EXISTS(mrt, c->mfc_parent))
1915 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1917 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1919 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1920 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1921 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1922 goto rtattr_failure;
1923 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1924 nhp->rtnh_flags = 0;
1925 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1926 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1927 nhp->rtnh_len = sizeof(*nhp);
1930 mp_head->rta_type = RTA_MULTIPATH;
1931 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1932 rtm->rtm_type = RTN_MULTICAST;
1940 int ipmr_get_route(struct net *net,
1941 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1944 struct mr_table *mrt;
1945 struct mfc_cache *cache;
1946 struct rtable *rt = skb_rtable(skb);
1948 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1952 read_lock(&mrt_lock);
1953 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1955 if (cache == NULL) {
1956 struct sk_buff *skb2;
1958 struct net_device *dev;
1962 read_unlock(&mrt_lock);
1967 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1968 read_unlock(&mrt_lock);
1971 skb2 = skb_clone(skb, GFP_ATOMIC);
1973 read_unlock(&mrt_lock);
1977 skb_push(skb2, sizeof(struct iphdr));
1978 skb_reset_network_header(skb2);
1980 iph->ihl = sizeof(struct iphdr) >> 2;
1981 iph->saddr = rt->rt_src;
1982 iph->daddr = rt->rt_dst;
1984 err = ipmr_cache_unresolved(mrt, vif, skb2);
1985 read_unlock(&mrt_lock);
1989 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1990 cache->mfc_flags |= MFC_NOTIFY;
1991 err = ipmr_fill_mroute(mrt, skb, cache, rtm);
1992 read_unlock(&mrt_lock);
1996 #ifdef CONFIG_PROC_FS
1998 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2000 struct ipmr_vif_iter {
2001 struct seq_net_private p;
2002 struct mr_table *mrt;
2006 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2007 struct ipmr_vif_iter *iter,
2010 struct mr_table *mrt = iter->mrt;
2012 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2013 if (!VIF_EXISTS(mrt, iter->ct))
2016 return &mrt->vif_table[iter->ct];
2021 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2022 __acquires(mrt_lock)
2024 struct ipmr_vif_iter *iter = seq->private;
2025 struct net *net = seq_file_net(seq);
2026 struct mr_table *mrt;
2028 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2030 return ERR_PTR(-ENOENT);
2034 read_lock(&mrt_lock);
2035 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2039 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2041 struct ipmr_vif_iter *iter = seq->private;
2042 struct net *net = seq_file_net(seq);
2043 struct mr_table *mrt = iter->mrt;
2046 if (v == SEQ_START_TOKEN)
2047 return ipmr_vif_seq_idx(net, iter, 0);
2049 while (++iter->ct < mrt->maxvif) {
2050 if (!VIF_EXISTS(mrt, iter->ct))
2052 return &mrt->vif_table[iter->ct];
2057 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2058 __releases(mrt_lock)
2060 read_unlock(&mrt_lock);
2063 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2065 struct ipmr_vif_iter *iter = seq->private;
2066 struct mr_table *mrt = iter->mrt;
2068 if (v == SEQ_START_TOKEN) {
2070 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2072 const struct vif_device *vif = v;
2073 const char *name = vif->dev ? vif->dev->name : "none";
2076 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2077 vif - mrt->vif_table,
2078 name, vif->bytes_in, vif->pkt_in,
2079 vif->bytes_out, vif->pkt_out,
2080 vif->flags, vif->local, vif->remote);
2085 static const struct seq_operations ipmr_vif_seq_ops = {
2086 .start = ipmr_vif_seq_start,
2087 .next = ipmr_vif_seq_next,
2088 .stop = ipmr_vif_seq_stop,
2089 .show = ipmr_vif_seq_show,
2092 static int ipmr_vif_open(struct inode *inode, struct file *file)
2094 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2095 sizeof(struct ipmr_vif_iter));
2098 static const struct file_operations ipmr_vif_fops = {
2099 .owner = THIS_MODULE,
2100 .open = ipmr_vif_open,
2102 .llseek = seq_lseek,
2103 .release = seq_release_net,
2106 struct ipmr_mfc_iter {
2107 struct seq_net_private p;
2108 struct mr_table *mrt;
2109 struct list_head *cache;
2114 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2115 struct ipmr_mfc_iter *it, loff_t pos)
2117 struct mr_table *mrt = it->mrt;
2118 struct mfc_cache *mfc;
2120 read_lock(&mrt_lock);
2121 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2122 it->cache = &mrt->mfc_cache_array[it->ct];
2123 list_for_each_entry(mfc, it->cache, list)
2127 read_unlock(&mrt_lock);
2129 spin_lock_bh(&mfc_unres_lock);
2130 it->cache = &mrt->mfc_unres_queue;
2131 list_for_each_entry(mfc, it->cache, list)
2134 spin_unlock_bh(&mfc_unres_lock);
2141 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2143 struct ipmr_mfc_iter *it = seq->private;
2144 struct net *net = seq_file_net(seq);
2145 struct mr_table *mrt;
2147 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2149 return ERR_PTR(-ENOENT);
2154 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2158 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2160 struct mfc_cache *mfc = v;
2161 struct ipmr_mfc_iter *it = seq->private;
2162 struct net *net = seq_file_net(seq);
2163 struct mr_table *mrt = it->mrt;
2167 if (v == SEQ_START_TOKEN)
2168 return ipmr_mfc_seq_idx(net, seq->private, 0);
2170 if (mfc->list.next != it->cache)
2171 return list_entry(mfc->list.next, struct mfc_cache, list);
2173 if (it->cache == &mrt->mfc_unres_queue)
2176 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2178 while (++it->ct < MFC_LINES) {
2179 it->cache = &mrt->mfc_cache_array[it->ct];
2180 if (list_empty(it->cache))
2182 return list_first_entry(it->cache, struct mfc_cache, list);
2185 /* exhausted cache_array, show unresolved */
2186 read_unlock(&mrt_lock);
2187 it->cache = &mrt->mfc_unres_queue;
2190 spin_lock_bh(&mfc_unres_lock);
2191 if (!list_empty(it->cache))
2192 return list_first_entry(it->cache, struct mfc_cache, list);
2195 spin_unlock_bh(&mfc_unres_lock);
2201 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2203 struct ipmr_mfc_iter *it = seq->private;
2204 struct mr_table *mrt = it->mrt;
2206 if (it->cache == &mrt->mfc_unres_queue)
2207 spin_unlock_bh(&mfc_unres_lock);
2208 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2209 read_unlock(&mrt_lock);
2212 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2216 if (v == SEQ_START_TOKEN) {
2218 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2220 const struct mfc_cache *mfc = v;
2221 const struct ipmr_mfc_iter *it = seq->private;
2222 const struct mr_table *mrt = it->mrt;
2224 seq_printf(seq, "%08lX %08lX %-3hd",
2225 (unsigned long) mfc->mfc_mcastgrp,
2226 (unsigned long) mfc->mfc_origin,
2229 if (it->cache != &mrt->mfc_unres_queue) {
2230 seq_printf(seq, " %8lu %8lu %8lu",
2231 mfc->mfc_un.res.pkt,
2232 mfc->mfc_un.res.bytes,
2233 mfc->mfc_un.res.wrong_if);
2234 for (n = mfc->mfc_un.res.minvif;
2235 n < mfc->mfc_un.res.maxvif; n++ ) {
2236 if (VIF_EXISTS(mrt, n) &&
2237 mfc->mfc_un.res.ttls[n] < 255)
2240 n, mfc->mfc_un.res.ttls[n]);
2243 /* unresolved mfc_caches don't contain
2244 * pkt, bytes and wrong_if values
2246 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2248 seq_putc(seq, '\n');
2253 static const struct seq_operations ipmr_mfc_seq_ops = {
2254 .start = ipmr_mfc_seq_start,
2255 .next = ipmr_mfc_seq_next,
2256 .stop = ipmr_mfc_seq_stop,
2257 .show = ipmr_mfc_seq_show,
2260 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2262 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2263 sizeof(struct ipmr_mfc_iter));
2266 static const struct file_operations ipmr_mfc_fops = {
2267 .owner = THIS_MODULE,
2268 .open = ipmr_mfc_open,
2270 .llseek = seq_lseek,
2271 .release = seq_release_net,
2275 #ifdef CONFIG_IP_PIMSM_V2
2276 static const struct net_protocol pim_protocol = {
2284 * Setup for IP multicast routing
2286 static int __net_init ipmr_net_init(struct net *net)
2290 err = ipmr_rules_init(net);
2294 #ifdef CONFIG_PROC_FS
2296 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2298 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2299 goto proc_cache_fail;
2303 #ifdef CONFIG_PROC_FS
2305 proc_net_remove(net, "ip_mr_vif");
2307 ipmr_rules_exit(net);
2313 static void __net_exit ipmr_net_exit(struct net *net)
2315 #ifdef CONFIG_PROC_FS
2316 proc_net_remove(net, "ip_mr_cache");
2317 proc_net_remove(net, "ip_mr_vif");
2319 ipmr_rules_exit(net);
2322 static struct pernet_operations ipmr_net_ops = {
2323 .init = ipmr_net_init,
2324 .exit = ipmr_net_exit,
2327 int __init ip_mr_init(void)
2331 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2332 sizeof(struct mfc_cache),
2333 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2338 err = register_pernet_subsys(&ipmr_net_ops);
2340 goto reg_pernet_fail;
2342 err = register_netdevice_notifier(&ip_mr_notifier);
2344 goto reg_notif_fail;
2345 #ifdef CONFIG_IP_PIMSM_V2
2346 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2347 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2349 goto add_proto_fail;
2354 #ifdef CONFIG_IP_PIMSM_V2
2356 unregister_netdevice_notifier(&ip_mr_notifier);
2359 unregister_pernet_subsys(&ipmr_net_ops);
2361 kmem_cache_destroy(mrt_cachep);