net/ipv4/ipmr.c

   1 /*
   2  *      IP multicast routing support for mrouted 3.6/3.8
   3  *
   4  *              (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *        Linux Consultancy and Custom Driver Development
   6  *
   7  *      This program is free software; you can redistribute it and/or
   8  *      modify it under the terms of the GNU General Public License
   9  *      as published by the Free Software Foundation; either version
  10  *      2 of the License, or (at your option) any later version.
  11  *
  12  *      Fixes:
  13  *      Michael Chastain        :       Incorrect size of copying.
  14  *      Alan Cox                :       Added the cache manager code
  15  *      Alan Cox                :       Fixed the clone/copy bug and device race.
  16  *      Mike McLagan            :       Routing by source
  17  *      Malcolm Beattie         :       Buffer handling fixes.
  18  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
  19  *      SVR Anand               :       Fixed several multicast bugs and problems.
  20  *      Alexey Kuznetsov        :       Status, optimisations and more.
  21  *      Brad Parker             :       Better behaviour on mrouted upcall
  22  *                                      overflow.
  23  *      Carlos Picoto           :       PIMv1 Support
  24  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
  25  *                                      Relax this requrement to work with older peers.
  26  *
  27  */
  28
  29 #include <asm/system.h>
  30 #include <asm/uaccess.h>
  31 #include <linux/types.h>
  32 #include <linux/capability.h>
  33 #include <linux/errno.h>
  34 #include <linux/timer.h>
  35 #include <linux/mm.h>
  36 #include <linux/kernel.h>
  37 #include <linux/fcntl.h>
  38 #include <linux/stat.h>
  39 #include <linux/socket.h>
  40 #include <linux/in.h>
  41 #include <linux/inet.h>
  42 #include <linux/netdevice.h>
  43 #include <linux/inetdevice.h>
  44 #include <linux/igmp.h>
  45 #include <linux/proc_fs.h>
  46 #include <linux/seq_file.h>
  47 #include <linux/mroute.h>
  48 #include <linux/init.h>
  49 #include <linux/if_ether.h>
  50 #include <linux/slab.h>
  51 #include <net/net_namespace.h>
  52 #include <net/ip.h>
  53 #include <net/protocol.h>
  54 #include <linux/skbuff.h>
  55 #include <net/route.h>
  56 #include <net/sock.h>
  57 #include <net/icmp.h>
  58 #include <net/udp.h>
  59 #include <net/raw.h>
  60 #include <linux/notifier.h>
  61 #include <linux/if_arp.h>
  62 #include <linux/netfilter_ipv4.h>
  63 #include <net/ipip.h>
  64 #include <net/checksum.h>
  65 #include <net/netlink.h>
  66
  67 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
  68 #define CONFIG_IP_PIMSM 1
  69 #endif
  70
  71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
  72    Note that the changes are semaphored via rtnl_lock.
  73  */
  74
  75 static DEFINE_RWLOCK(mrt_lock);
  76
  77 /*
  78  *      Multicast router control variables
  79  */
  80
  81 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
  82
  83 /* Special spinlock for queue of unresolved entries */
  84 static DEFINE_SPINLOCK(mfc_unres_lock);
  85
  86 /* We return to original Alan's scheme. Hash table of resolved
  87    entries is changed only in process context and protected
  88    with weak lock mrt_lock. Queue of unresolved entries is protected
  89    with strong spinlock mfc_unres_lock.
  90
  91    In this case data path is free of exclusive locks at all.
  92  */
  93
  94 static struct kmem_cache *mrt_cachep __read_mostly;
  95
  96 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
  97 static int ipmr_cache_report(struct net *net,
  98                              struct sk_buff *pkt, vifi_t vifi, int assert);
  99 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 100
 101 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 102
 103 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
 104 {
 105         struct net *net = dev_net(dev);
 106
 107         dev_close(dev);
 108
 109         dev = __dev_get_by_name(net, "tunl0");
 110         if (dev) {
 111                 const struct net_device_ops *ops = dev->netdev_ops;
 112                 struct ifreq ifr;
 113                 struct ip_tunnel_parm p;
 114
 115                 memset(&p, 0, sizeof(p));
 116                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 117                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 118                 p.iph.version = 4;
 119                 p.iph.ihl = 5;
 120                 p.iph.protocol = IPPROTO_IPIP;
 121                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 122                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 123
 124                 if (ops->ndo_do_ioctl) {
 125                         mm_segment_t oldfs = get_fs();
 126
 127                         set_fs(KERNEL_DS);
 128                         ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
 129                         set_fs(oldfs);
 130                 }
 131         }
 132 }
 133
 134 static
 135 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 136 {
 137         struct net_device  *dev;
 138
 139         dev = __dev_get_by_name(net, "tunl0");
 140
 141         if (dev) {
 142                 const struct net_device_ops *ops = dev->netdev_ops;
 143                 int err;
 144                 struct ifreq ifr;
 145                 struct ip_tunnel_parm p;
 146                 struct in_device  *in_dev;
 147
 148                 memset(&p, 0, sizeof(p));
 149                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
 150                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
 151                 p.iph.version = 4;
 152                 p.iph.ihl = 5;
 153                 p.iph.protocol = IPPROTO_IPIP;
 154                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
 155                 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 156
 157                 if (ops->ndo_do_ioctl) {
 158                         mm_segment_t oldfs = get_fs();
 159
 160                         set_fs(KERNEL_DS);
 161                         err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
 162                         set_fs(oldfs);
 163                 } else
 164                         err = -EOPNOTSUPP;
 165
 166                 dev = NULL;
 167
 168                 if (err == 0 &&
 169                     (dev = __dev_get_by_name(net, p.name)) != NULL) {
 170                         dev->flags |= IFF_MULTICAST;
 171
 172                         in_dev = __in_dev_get_rtnl(dev);
 173                         if (in_dev == NULL)
 174                                 goto failure;
 175
 176                         ipv4_devconf_setall(in_dev);
 177                         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 178
 179                         if (dev_open(dev))
 180                                 goto failure;
 181                         dev_hold(dev);
 182                 }
 183         }
 184         return dev;
 185
 186 failure:
 187         /* allow the register to be completed before unregistering. */
 188         rtnl_unlock();
 189         rtnl_lock();
 190
 191         unregister_netdevice(dev);
 192         return NULL;
 193 }
 194
 195 #ifdef CONFIG_IP_PIMSM
 196
 197 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 198 {
 199         struct net *net = dev_net(dev);
 200
 201         read_lock(&mrt_lock);
 202         dev->stats.tx_bytes += skb->len;
 203         dev->stats.tx_packets++;
 204         ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
 205                           IGMPMSG_WHOLEPKT);
 206         read_unlock(&mrt_lock);
 207         kfree_skb(skb);
 208         return NETDEV_TX_OK;
 209 }
 210
 211 static const struct net_device_ops reg_vif_netdev_ops = {
 212         .ndo_start_xmit = reg_vif_xmit,
 213 };
 214
 215 static void reg_vif_setup(struct net_device *dev)
 216 {
 217         dev->type               = ARPHRD_PIMREG;
 218         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
 219         dev->flags              = IFF_NOARP;
 220         dev->netdev_ops         = &reg_vif_netdev_ops,
 221         dev->destructor         = free_netdev;
 222         dev->features           |= NETIF_F_NETNS_LOCAL;
 223 }
 224
 225 static struct net_device *ipmr_reg_vif(struct net *net)
 226 {
 227         struct net_device *dev;
 228         struct in_device *in_dev;
 229
 230         dev = alloc_netdev(0, "pimreg", reg_vif_setup);
 231
 232         if (dev == NULL)
 233                 return NULL;
 234
 235         dev_net_set(dev, net);
 236
 237         if (register_netdevice(dev)) {
 238                 free_netdev(dev);
 239                 return NULL;
 240         }
 241         dev->iflink = 0;
 242
 243         rcu_read_lock();
 244         if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
 245                 rcu_read_unlock();
 246                 goto failure;
 247         }
 248
 249         ipv4_devconf_setall(in_dev);
 250         IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
 251         rcu_read_unlock();
 252
 253         if (dev_open(dev))
 254                 goto failure;
 255
 256         dev_hold(dev);
 257
 258         return dev;
 259
 260 failure:
 261         /* allow the register to be completed before unregistering. */
 262         rtnl_unlock();
 263         rtnl_lock();
 264
 265         unregister_netdevice(dev);
 266         return NULL;
 267 }
 268 #endif
 269
 270 /*
 271  *      Delete a VIF entry
 272  *      @notify: Set to 1, if the caller is a notifier_call
 273  */
 274
 275 static int vif_delete(struct net *net, int vifi, int notify,
 276                       struct list_head *head)
 277 {
 278         struct vif_device *v;
 279         struct net_device *dev;
 280         struct in_device *in_dev;
 281
 282         if (vifi < 0 || vifi >= net->ipv4.maxvif)
 283                 return -EADDRNOTAVAIL;
 284
 285         v = &net->ipv4.vif_table[vifi];
 286
 287         write_lock_bh(&mrt_lock);
 288         dev = v->dev;
 289         v->dev = NULL;
 290
 291         if (!dev) {
 292                 write_unlock_bh(&mrt_lock);
 293                 return -EADDRNOTAVAIL;
 294         }
 295
 296 #ifdef CONFIG_IP_PIMSM
 297         if (vifi == net->ipv4.mroute_reg_vif_num)
 298                 net->ipv4.mroute_reg_vif_num = -1;
 299 #endif
 300
 301         if (vifi+1 == net->ipv4.maxvif) {
 302                 int tmp;
 303                 for (tmp=vifi-1; tmp>=0; tmp--) {
 304                         if (VIF_EXISTS(net, tmp))
 305                                 break;
 306                 }
 307                 net->ipv4.maxvif = tmp+1;
 308         }
 309
 310         write_unlock_bh(&mrt_lock);
 311
 312         dev_set_allmulti(dev, -1);
 313
 314         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
 315                 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
 316                 ip_rt_multicast_event(in_dev);
 317         }
 318
 319         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
 320                 unregister_netdevice_queue(dev, head);
 321
 322         dev_put(dev);
 323         return 0;
 324 }
 325
 326 static inline void ipmr_cache_free(struct mfc_cache *c)
 327 {
 328         release_net(mfc_net(c));
 329         kmem_cache_free(mrt_cachep, c);
 330 }
 331
 332 /* Destroy an unresolved cache entry, killing queued skbs
 333    and reporting error to netlink readers.
 334  */
 335
 336 static void ipmr_destroy_unres(struct mfc_cache *c)
 337 {
 338         struct sk_buff *skb;
 339         struct nlmsgerr *e;
 340         struct net *net = mfc_net(c);
 341
 342         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 343
 344         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 345                 if (ip_hdr(skb)->version == 0) {
 346                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 347                         nlh->nlmsg_type = NLMSG_ERROR;
 348                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 349                         skb_trim(skb, nlh->nlmsg_len);
 350                         e = NLMSG_DATA(nlh);
 351                         e->error = -ETIMEDOUT;
 352                         memset(&e->msg, 0, sizeof(e->msg));
 353
 354                         rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 355                 } else
 356                         kfree_skb(skb);
 357         }
 358
 359         ipmr_cache_free(c);
 360 }
 361
 362
 363 /* Timer process for the unresolved queue. */
 364
 365 static void ipmr_expire_process(unsigned long arg)
 366 {
 367         struct net *net = (struct net *)arg;
 368         unsigned long now;
 369         unsigned long expires;
 370         struct mfc_cache *c, **cp;
 371
 372         if (!spin_trylock(&mfc_unres_lock)) {
 373                 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
 374                 return;
 375         }
 376
 377         if (net->ipv4.mfc_unres_queue == NULL)
 378                 goto out;
 379
 380         now = jiffies;
 381         expires = 10*HZ;
 382         cp = &net->ipv4.mfc_unres_queue;
 383
 384         while ((c=*cp) != NULL) {
 385                 if (time_after(c->mfc_un.unres.expires, now)) {
 386                         unsigned long interval = c->mfc_un.unres.expires - now;
 387                         if (interval < expires)
 388                                 expires = interval;
 389                         cp = &c->next;
 390                         continue;
 391                 }
 392
 393                 *cp = c->next;
 394
 395                 ipmr_destroy_unres(c);
 396         }
 397
 398         if (net->ipv4.mfc_unres_queue != NULL)
 399                 mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
 400
 401 out:
 402         spin_unlock(&mfc_unres_lock);
 403 }
 404
 405 /* Fill oifs list. It is called under write locked mrt_lock. */
 406
 407 static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
 408 {
 409         int vifi;
 410         struct net *net = mfc_net(cache);
 411
 412         cache->mfc_un.res.minvif = MAXVIFS;
 413         cache->mfc_un.res.maxvif = 0;
 414         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 415
 416         for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
 417                 if (VIF_EXISTS(net, vifi) &&
 418                     ttls[vifi] && ttls[vifi] < 255) {
 419                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 420                         if (cache->mfc_un.res.minvif > vifi)
 421                                 cache->mfc_un.res.minvif = vifi;
 422                         if (cache->mfc_un.res.maxvif <= vifi)
 423                                 cache->mfc_un.res.maxvif = vifi + 1;
 424                 }
 425         }
 426 }
 427
 428 static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 429 {
 430         int vifi = vifc->vifc_vifi;
 431         struct vif_device *v = &net->ipv4.vif_table[vifi];
 432         struct net_device *dev;
 433         struct in_device *in_dev;
 434         int err;
 435
 436         /* Is vif busy ? */
 437         if (VIF_EXISTS(net, vifi))
 438                 return -EADDRINUSE;
 439
 440         switch (vifc->vifc_flags) {
 441 #ifdef CONFIG_IP_PIMSM
 442         case VIFF_REGISTER:
 443                 /*
 444                  * Special Purpose VIF in PIM
 445                  * All the packets will be sent to the daemon
 446                  */
 447                 if (net->ipv4.mroute_reg_vif_num >= 0)
 448                         return -EADDRINUSE;
 449                 dev = ipmr_reg_vif(net);
 450                 if (!dev)
 451                         return -ENOBUFS;
 452                 err = dev_set_allmulti(dev, 1);
 453                 if (err) {
 454                         unregister_netdevice(dev);
 455                         dev_put(dev);
 456                         return err;
 457                 }
 458                 break;
 459 #endif
 460         case VIFF_TUNNEL:
 461                 dev = ipmr_new_tunnel(net, vifc);
 462                 if (!dev)
 463                         return -ENOBUFS;
 464                 err = dev_set_allmulti(dev, 1);
 465                 if (err) {
 466                         ipmr_del_tunnel(dev, vifc);
 467                         dev_put(dev);
 468                         return err;
 469                 }
 470                 break;
 471
 472         case VIFF_USE_IFINDEX:
 473         case 0:
 474                 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
 475                         dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
 476                         if (dev && dev->ip_ptr == NULL) {
 477                                 dev_put(dev);
 478                                 return -EADDRNOTAVAIL;
 479                         }
 480                 } else
 481                         dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
 482
 483                 if (!dev)
 484                         return -EADDRNOTAVAIL;
 485                 err = dev_set_allmulti(dev, 1);
 486                 if (err) {
 487                         dev_put(dev);
 488                         return err;
 489                 }
 490                 break;
 491         default:
 492                 return -EINVAL;
 493         }
 494
 495         if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
 496                 dev_put(dev);
 497                 return -EADDRNOTAVAIL;
 498         }
 499         IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
 500         ip_rt_multicast_event(in_dev);
 501
 502         /*
 503          *      Fill in the VIF structures
 504          */
 505         v->rate_limit = vifc->vifc_rate_limit;
 506         v->local = vifc->vifc_lcl_addr.s_addr;
 507         v->remote = vifc->vifc_rmt_addr.s_addr;
 508         v->flags = vifc->vifc_flags;
 509         if (!mrtsock)
 510                 v->flags |= VIFF_STATIC;
 511         v->threshold = vifc->vifc_threshold;
 512         v->bytes_in = 0;
 513         v->bytes_out = 0;
 514         v->pkt_in = 0;
 515         v->pkt_out = 0;
 516         v->link = dev->ifindex;
 517         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
 518                 v->link = dev->iflink;
 519
 520         /* And finish update writing critical data */
 521         write_lock_bh(&mrt_lock);
 522         v->dev = dev;
 523 #ifdef CONFIG_IP_PIMSM
 524         if (v->flags&VIFF_REGISTER)
 525                 net->ipv4.mroute_reg_vif_num = vifi;
 526 #endif
 527         if (vifi+1 > net->ipv4.maxvif)
 528                 net->ipv4.maxvif = vifi+1;
 529         write_unlock_bh(&mrt_lock);
 530         return 0;
 531 }
 532
 533 static struct mfc_cache *ipmr_cache_find(struct net *net,
 534                                          __be32 origin,
 535                                          __be32 mcastgrp)
 536 {
 537         int line = MFC_HASH(mcastgrp, origin);
 538         struct mfc_cache *c;
 539
 540         for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
 541                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
 542                         break;
 543         }
 544         return c;
 545 }
 546
 547 /*
 548  *      Allocate a multicast cache entry
 549  */
 550 static struct mfc_cache *ipmr_cache_alloc(struct net *net)
 551 {
 552         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 553         if (c == NULL)
 554                 return NULL;
 555         c->mfc_un.res.minvif = MAXVIFS;
 556         mfc_net_set(c, net);
 557         return c;
 558 }
 559
 560 static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
 561 {
 562         struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 563         if (c == NULL)
 564                 return NULL;
 565         skb_queue_head_init(&c->mfc_un.unres.unresolved);
 566         c->mfc_un.unres.expires = jiffies + 10*HZ;
 567         mfc_net_set(c, net);
 568         return c;
 569 }
 570
 571 /*
 572  *      A cache entry has gone into a resolved state from queued
 573  */
 574
 575 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 576 {
 577         struct sk_buff *skb;
 578         struct nlmsgerr *e;
 579
 580         /*
 581          *      Play the pending entries through our router
 582          */
 583
 584         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
 585                 if (ip_hdr(skb)->version == 0) {
 586                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 587
 588                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
 589                                 nlh->nlmsg_len = (skb_tail_pointer(skb) -
 590                                                   (u8 *)nlh);
 591                         } else {
 592                                 nlh->nlmsg_type = NLMSG_ERROR;
 593                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
 594                                 skb_trim(skb, nlh->nlmsg_len);
 595                                 e = NLMSG_DATA(nlh);
 596                                 e->error = -EMSGSIZE;
 597                                 memset(&e->msg, 0, sizeof(e->msg));
 598                         }
 599
 600                         rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
 601                 } else
 602                         ip_mr_forward(skb, c, 0);
 603         }
 604 }
 605
 606 /*
 607  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
 608  *      expects the following bizarre scheme.
 609  *
 610  *      Called under mrt_lock.
 611  */
 612
 613 static int ipmr_cache_report(struct net *net,
 614                              struct sk_buff *pkt, vifi_t vifi, int assert)
 615 {
 616         struct sk_buff *skb;
 617         const int ihl = ip_hdrlen(pkt);
 618         struct igmphdr *igmp;
 619         struct igmpmsg *msg;
 620         int ret;
 621
 622 #ifdef CONFIG_IP_PIMSM
 623         if (assert == IGMPMSG_WHOLEPKT)
 624                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 625         else
 626 #endif
 627                 skb = alloc_skb(128, GFP_ATOMIC);
 628
 629         if (!skb)
 630                 return -ENOBUFS;
 631
 632 #ifdef CONFIG_IP_PIMSM
 633         if (assert == IGMPMSG_WHOLEPKT) {
 634                 /* Ugly, but we have no choice with this interface.
 635                    Duplicate old header, fix ihl, length etc.
 636                    And all this only to mangle msg->im_msgtype and
 637                    to set msg->im_mbz to "mbz" :-)
 638                  */
 639                 skb_push(skb, sizeof(struct iphdr));
 640                 skb_reset_network_header(skb);
 641                 skb_reset_transport_header(skb);
 642                 msg = (struct igmpmsg *)skb_network_header(skb);
 643                 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 644                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
 645                 msg->im_mbz = 0;
 646                 msg->im_vif = net->ipv4.mroute_reg_vif_num;
 647                 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 648                 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 649                                              sizeof(struct iphdr));
 650         } else
 651 #endif
 652         {
 653
 654         /*
 655          *      Copy the IP header
 656          */
 657
 658         skb->network_header = skb->tail;
 659         skb_put(skb, ihl);
 660         skb_copy_to_linear_data(skb, pkt->data, ihl);
 661         ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
 662         msg = (struct igmpmsg *)skb_network_header(skb);
 663         msg->im_vif = vifi;
 664         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 665
 666         /*
 667          *      Add our header
 668          */
 669
 670         igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
 671         igmp->type      =
 672         msg->im_msgtype = assert;
 673         igmp->code      =       0;
 674         ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
 675         skb->transport_header = skb->network_header;
 676         }
 677
 678         if (net->ipv4.mroute_sk == NULL) {
 679                 kfree_skb(skb);
 680                 return -EINVAL;
 681         }
 682
 683         /*
 684          *      Deliver to mrouted
 685          */
 686         ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
 687         if (ret < 0) {
 688                 if (net_ratelimit())
 689                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
 690                 kfree_skb(skb);
 691         }
 692
 693         return ret;
 694 }
 695
 696 /*
 697  *      Queue a packet for resolution. It gets locked cache entry!
 698  */
 699
 700 static int
 701 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 702 {
 703         int err;
 704         struct mfc_cache *c;
 705         const struct iphdr *iph = ip_hdr(skb);
 706
 707         spin_lock_bh(&mfc_unres_lock);
 708         for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
 709                 if (c->mfc_mcastgrp == iph->daddr &&
 710                     c->mfc_origin == iph->saddr)
 711                         break;
 712         }
 713
 714         if (c == NULL) {
 715                 /*
 716                  *      Create a new entry if allowable
 717                  */
 718
 719                 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
 720                     (c = ipmr_cache_alloc_unres(net)) == NULL) {
 721                         spin_unlock_bh(&mfc_unres_lock);
 722
 723                         kfree_skb(skb);
 724                         return -ENOBUFS;
 725                 }
 726
 727                 /*
 728                  *      Fill in the new cache entry
 729                  */
 730                 c->mfc_parent   = -1;
 731                 c->mfc_origin   = iph->saddr;
 732                 c->mfc_mcastgrp = iph->daddr;
 733
 734                 /*
 735                  *      Reflect first query at mrouted.
 736                  */
 737                 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
 738                 if (err < 0) {
 739                         /* If the report failed throw the cache entry
 740                            out - Brad Parker
 741                          */
 742                         spin_unlock_bh(&mfc_unres_lock);
 743
 744                         ipmr_cache_free(c);
 745                         kfree_skb(skb);
 746                         return err;
 747                 }
 748
 749                 atomic_inc(&net->ipv4.cache_resolve_queue_len);
 750                 c->next = net->ipv4.mfc_unres_queue;
 751                 net->ipv4.mfc_unres_queue = c;
 752
 753                 mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
 754         }
 755
 756         /*
 757          *      See if we can append the packet
 758          */
 759         if (c->mfc_un.unres.unresolved.qlen>3) {
 760                 kfree_skb(skb);
 761                 err = -ENOBUFS;
 762         } else {
 763                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
 764                 err = 0;
 765         }
 766
 767         spin_unlock_bh(&mfc_unres_lock);
 768         return err;
 769 }
 770
 771 /*
 772  *      MFC cache manipulation by user space mroute daemon
 773  */
 774
 775 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 776 {
 777         int line;
 778         struct mfc_cache *c, **cp;
 779
 780         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 781
 782         for (cp = &net->ipv4.mfc_cache_array[line];
 783              (c = *cp) != NULL; cp = &c->next) {
 784                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 785                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 786                         write_lock_bh(&mrt_lock);
 787                         *cp = c->next;
 788                         write_unlock_bh(&mrt_lock);
 789
 790                         ipmr_cache_free(c);
 791                         return 0;
 792                 }
 793         }
 794         return -ENOENT;
 795 }
 796
 797 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 798 {
 799         int line;
 800         struct mfc_cache *uc, *c, **cp;
 801
 802         if (mfc->mfcc_parent >= MAXVIFS)
 803                 return -ENFILE;
 804
 805         line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 806
 807         for (cp = &net->ipv4.mfc_cache_array[line];
 808              (c = *cp) != NULL; cp = &c->next) {
 809                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 810                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
 811                         break;
 812         }
 813
 814         if (c != NULL) {
 815                 write_lock_bh(&mrt_lock);
 816                 c->mfc_parent = mfc->mfcc_parent;
 817                 ipmr_update_thresholds(c, mfc->mfcc_ttls);
 818                 if (!mrtsock)
 819                         c->mfc_flags |= MFC_STATIC;
 820                 write_unlock_bh(&mrt_lock);
 821                 return 0;
 822         }
 823
 824         if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 825                 return -EINVAL;
 826
 827         c = ipmr_cache_alloc(net);
 828         if (c == NULL)
 829                 return -ENOMEM;
 830
 831         c->mfc_origin = mfc->mfcc_origin.s_addr;
 832         c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 833         c->mfc_parent = mfc->mfcc_parent;
 834         ipmr_update_thresholds(c, mfc->mfcc_ttls);
 835         if (!mrtsock)
 836                 c->mfc_flags |= MFC_STATIC;
 837
 838         write_lock_bh(&mrt_lock);
 839         c->next = net->ipv4.mfc_cache_array[line];
 840         net->ipv4.mfc_cache_array[line] = c;
 841         write_unlock_bh(&mrt_lock);
 842
 843         /*
 844          *      Check to see if we resolved a queued list. If so we
 845          *      need to send on the frames and tidy up.
 846          */
 847         spin_lock_bh(&mfc_unres_lock);
 848         for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
 849              cp = &uc->next) {
 850                 if (uc->mfc_origin == c->mfc_origin &&
 851                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 852                         *cp = uc->next;
 853                         atomic_dec(&net->ipv4.cache_resolve_queue_len);
 854                         break;
 855                 }
 856         }
 857         if (net->ipv4.mfc_unres_queue == NULL)
 858                 del_timer(&net->ipv4.ipmr_expire_timer);
 859         spin_unlock_bh(&mfc_unres_lock);
 860
 861         if (uc) {
 862                 ipmr_cache_resolve(uc, c);
 863                 ipmr_cache_free(uc);
 864         }
 865         return 0;
 866 }
 867
 868 /*
 869  *      Close the multicast socket, and clear the vif tables etc
 870  */
 871
 872 static void mroute_clean_tables(struct net *net)
 873 {
 874         int i;
 875         LIST_HEAD(list);
 876
 877         /*
 878          *      Shut down all active vif entries
 879          */
 880         for (i = 0; i < net->ipv4.maxvif; i++) {
 881                 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
 882                         vif_delete(net, i, 0, &list);
 883         }
 884         unregister_netdevice_many(&list);
 885
 886         /*
 887          *      Wipe the cache
 888          */
 889         for (i=0; i<MFC_LINES; i++) {
 890                 struct mfc_cache *c, **cp;
 891
 892                 cp = &net->ipv4.mfc_cache_array[i];
 893                 while ((c = *cp) != NULL) {
 894                         if (c->mfc_flags&MFC_STATIC) {
 895                                 cp = &c->next;
 896                                 continue;
 897                         }
 898                         write_lock_bh(&mrt_lock);
 899                         *cp = c->next;
 900                         write_unlock_bh(&mrt_lock);
 901
 902                         ipmr_cache_free(c);
 903                 }
 904         }
 905
 906         if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
 907                 struct mfc_cache *c, **cp;
 908
 909                 spin_lock_bh(&mfc_unres_lock);
 910                 cp = &net->ipv4.mfc_unres_queue;
 911                 while ((c = *cp) != NULL) {
 912                         *cp = c->next;
 913                         ipmr_destroy_unres(c);
 914                 }
 915                 spin_unlock_bh(&mfc_unres_lock);
 916         }
 917 }
 918
 919 static void mrtsock_destruct(struct sock *sk)
 920 {
 921         struct net *net = sock_net(sk);
 922
 923         rtnl_lock();
 924         if (sk == net->ipv4.mroute_sk) {
 925                 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 926
 927                 write_lock_bh(&mrt_lock);
 928                 net->ipv4.mroute_sk = NULL;
 929                 write_unlock_bh(&mrt_lock);
 930
 931                 mroute_clean_tables(net);
 932         }
 933         rtnl_unlock();
 934 }
 935
 936 /*
 937  *      Socket options and virtual interface manipulation. The whole
 938  *      virtual interface system is a complete heap, but unfortunately
 939  *      that's how BSD mrouted happens to think. Maybe one day with a proper
 940  *      MOSPF/PIM router set up we can clean this up.
 941  */
 942
 943 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 944 {
 945         int ret;
 946         struct vifctl vif;
 947         struct mfcctl mfc;
 948         struct net *net = sock_net(sk);
 949
 950         if (optname != MRT_INIT) {
 951                 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
 952                         return -EACCES;
 953         }
 954
 955         switch (optname) {
 956         case MRT_INIT:
 957                 if (sk->sk_type != SOCK_RAW ||
 958                     inet_sk(sk)->inet_num != IPPROTO_IGMP)
 959                         return -EOPNOTSUPP;
 960                 if (optlen != sizeof(int))
 961                         return -ENOPROTOOPT;
 962
 963                 rtnl_lock();
 964                 if (net->ipv4.mroute_sk) {
 965                         rtnl_unlock();
 966                         return -EADDRINUSE;
 967                 }
 968
 969                 ret = ip_ra_control(sk, 1, mrtsock_destruct);
 970                 if (ret == 0) {
 971                         write_lock_bh(&mrt_lock);
 972                         net->ipv4.mroute_sk = sk;
 973                         write_unlock_bh(&mrt_lock);
 974
 975                         IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
 976                 }
 977                 rtnl_unlock();
 978                 return ret;
 979         case MRT_DONE:
 980                 if (sk != net->ipv4.mroute_sk)
 981                         return -EACCES;
 982                 return ip_ra_control(sk, 0, NULL);
 983         case MRT_ADD_VIF:
 984         case MRT_DEL_VIF:
 985                 if (optlen != sizeof(vif))
 986                         return -EINVAL;
 987                 if (copy_from_user(&vif, optval, sizeof(vif)))
 988                         return -EFAULT;
 989                 if (vif.vifc_vifi >= MAXVIFS)
 990                         return -ENFILE;
 991                 rtnl_lock();
 992                 if (optname == MRT_ADD_VIF) {
 993                         ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
 994                 } else {
 995                         ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
 996                 }
 997                 rtnl_unlock();
 998                 return ret;
 999
1000                 /*
1001                  *      Manipulate the forwarding caches. These live
1002                  *      in a sort of kernel/user symbiosis.
1003                  */
1004         case MRT_ADD_MFC:
1005         case MRT_DEL_MFC:
1006                 if (optlen != sizeof(mfc))
1007                         return -EINVAL;
1008                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1009                         return -EFAULT;
1010                 rtnl_lock();
1011                 if (optname == MRT_DEL_MFC)
1012                         ret = ipmr_mfc_delete(net, &mfc);
1013                 else
1014                         ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
1015                 rtnl_unlock();
1016                 return ret;
1017                 /*
1018                  *      Control PIM assert.
1019                  */
1020         case MRT_ASSERT:
1021         {
1022                 int v;
1023                 if (get_user(v,(int __user *)optval))
1024                         return -EFAULT;
1025                 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1026                 return 0;
1027         }
1028 #ifdef CONFIG_IP_PIMSM
1029         case MRT_PIM:
1030         {
1031                 int v;
1032
1033                 if (get_user(v,(int __user *)optval))
1034                         return -EFAULT;
1035                 v = (v) ? 1 : 0;
1036
1037                 rtnl_lock();
1038                 ret = 0;
1039                 if (v != net->ipv4.mroute_do_pim) {
1040                         net->ipv4.mroute_do_pim = v;
1041                         net->ipv4.mroute_do_assert = v;
1042                 }
1043                 rtnl_unlock();
1044                 return ret;
1045         }
1046 #endif
1047         /*
1048          *      Spurious command, or MRT_VERSION which you cannot
1049          *      set.
1050          */
1051         default:
1052                 return -ENOPROTOOPT;
1053         }
1054 }
1055
1056 /*
1057  *      Getsock opt support for the multicast routing system.
1058  */
1059
1060 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1061 {
1062         int olr;
1063         int val;
1064         struct net *net = sock_net(sk);
1065
1066         if (optname != MRT_VERSION &&
1067 #ifdef CONFIG_IP_PIMSM
1068            optname!=MRT_PIM &&
1069 #endif
1070            optname!=MRT_ASSERT)
1071                 return -ENOPROTOOPT;
1072
1073         if (get_user(olr, optlen))
1074                 return -EFAULT;
1075
1076         olr = min_t(unsigned int, olr, sizeof(int));
1077         if (olr < 0)
1078                 return -EINVAL;
1079
1080         if (put_user(olr, optlen))
1081                 return -EFAULT;
1082         if (optname == MRT_VERSION)
1083                 val = 0x0305;
1084 #ifdef CONFIG_IP_PIMSM
1085         else if (optname == MRT_PIM)
1086                 val = net->ipv4.mroute_do_pim;
1087 #endif
1088         else
1089                 val = net->ipv4.mroute_do_assert;
1090         if (copy_to_user(optval, &val, olr))
1091                 return -EFAULT;
1092         return 0;
1093 }
1094
1095 /*
1096  *      The IP multicast ioctl support routines.
1097  */
1098
1099 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1100 {
1101         struct sioc_sg_req sr;
1102         struct sioc_vif_req vr;
1103         struct vif_device *vif;
1104         struct mfc_cache *c;
1105         struct net *net = sock_net(sk);
1106
1107         switch (cmd) {
1108         case SIOCGETVIFCNT:
1109                 if (copy_from_user(&vr, arg, sizeof(vr)))
1110                         return -EFAULT;
1111                 if (vr.vifi >= net->ipv4.maxvif)
1112                         return -EINVAL;
1113                 read_lock(&mrt_lock);
1114                 vif = &net->ipv4.vif_table[vr.vifi];
1115                 if (VIF_EXISTS(net, vr.vifi)) {
1116                         vr.icount = vif->pkt_in;
1117                         vr.ocount = vif->pkt_out;
1118                         vr.ibytes = vif->bytes_in;
1119                         vr.obytes = vif->bytes_out;
1120                         read_unlock(&mrt_lock);
1121
1122                         if (copy_to_user(arg, &vr, sizeof(vr)))
1123                                 return -EFAULT;
1124                         return 0;
1125                 }
1126                 read_unlock(&mrt_lock);
1127                 return -EADDRNOTAVAIL;
1128         case SIOCGETSGCNT:
1129                 if (copy_from_user(&sr, arg, sizeof(sr)))
1130                         return -EFAULT;
1131
1132                 read_lock(&mrt_lock);
1133                 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1134                 if (c) {
1135                         sr.pktcnt = c->mfc_un.res.pkt;
1136                         sr.bytecnt = c->mfc_un.res.bytes;
1137                         sr.wrong_if = c->mfc_un.res.wrong_if;
1138                         read_unlock(&mrt_lock);
1139
1140                         if (copy_to_user(arg, &sr, sizeof(sr)))
1141                                 return -EFAULT;
1142                         return 0;
1143                 }
1144                 read_unlock(&mrt_lock);
1145                 return -EADDRNOTAVAIL;
1146         default:
1147                 return -ENOIOCTLCMD;
1148         }
1149 }
1150
1151
1152 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1153 {
1154         struct net_device *dev = ptr;
1155         struct net *net = dev_net(dev);
1156         struct vif_device *v;
1157         int ct;
1158         LIST_HEAD(list);
1159
1160         if (event != NETDEV_UNREGISTER)
1161                 return NOTIFY_DONE;
1162         v = &net->ipv4.vif_table[0];
1163         for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1164                 if (v->dev == dev)
1165                         vif_delete(net, ct, 1, &list);
1166         }
1167         unregister_netdevice_many(&list);
1168         return NOTIFY_DONE;
1169 }
1170
1171
1172 static struct notifier_block ip_mr_notifier = {
1173         .notifier_call = ipmr_device_event,
1174 };
1175
1176 /*
1177  *      Encapsulate a packet by attaching a valid IPIP header to it.
1178  *      This avoids tunnel drivers and other mess and gives us the speed so
1179  *      important for multicast video.
1180  */
1181
1182 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1183 {
1184         struct iphdr *iph;
1185         struct iphdr *old_iph = ip_hdr(skb);
1186
1187         skb_push(skb, sizeof(struct iphdr));
1188         skb->transport_header = skb->network_header;
1189         skb_reset_network_header(skb);
1190         iph = ip_hdr(skb);
1191
1192         iph->version    =       4;
1193         iph->tos        =       old_iph->tos;
1194         iph->ttl        =       old_iph->ttl;
1195         iph->frag_off   =       0;
1196         iph->daddr      =       daddr;
1197         iph->saddr      =       saddr;
1198         iph->protocol   =       IPPROTO_IPIP;
1199         iph->ihl        =       5;
1200         iph->tot_len    =       htons(skb->len);
1201         ip_select_ident(iph, skb_dst(skb), NULL);
1202         ip_send_check(iph);
1203
1204         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1205         nf_reset(skb);
1206 }
1207
1208 static inline int ipmr_forward_finish(struct sk_buff *skb)
1209 {
1210         struct ip_options * opt = &(IPCB(skb)->opt);
1211
1212         IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1213
1214         if (unlikely(opt->optlen))
1215                 ip_forward_options(skb);
1216
1217         return dst_output(skb);
1218 }
1219
1220 /*
1221  *      Processing handlers for ipmr_forward
1222  */
1223
1224 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1225 {
1226         struct net *net = mfc_net(c);
1227         const struct iphdr *iph = ip_hdr(skb);
1228         struct vif_device *vif = &net->ipv4.vif_table[vifi];
1229         struct net_device *dev;
1230         struct rtable *rt;
1231         int    encap = 0;
1232
1233         if (vif->dev == NULL)
1234                 goto out_free;
1235
1236 #ifdef CONFIG_IP_PIMSM
1237         if (vif->flags & VIFF_REGISTER) {
1238                 vif->pkt_out++;
1239                 vif->bytes_out += skb->len;
1240                 vif->dev->stats.tx_bytes += skb->len;
1241                 vif->dev->stats.tx_packets++;
1242                 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1243                 goto out_free;
1244         }
1245 #endif
1246
1247         if (vif->flags&VIFF_TUNNEL) {
1248                 struct flowi fl = { .oif = vif->link,
1249                                     .nl_u = { .ip4_u =
1250                                               { .daddr = vif->remote,
1251                                                 .saddr = vif->local,
1252                                                 .tos = RT_TOS(iph->tos) } },
1253                                     .proto = IPPROTO_IPIP };
1254                 if (ip_route_output_key(net, &rt, &fl))
1255                         goto out_free;
1256                 encap = sizeof(struct iphdr);
1257         } else {
1258                 struct flowi fl = { .oif = vif->link,
1259                                     .nl_u = { .ip4_u =
1260                                               { .daddr = iph->daddr,
1261                                                 .tos = RT_TOS(iph->tos) } },
1262                                     .proto = IPPROTO_IPIP };
1263                 if (ip_route_output_key(net, &rt, &fl))
1264                         goto out_free;
1265         }
1266
1267         dev = rt->u.dst.dev;
1268
1269         if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
1270                 /* Do not fragment multicasts. Alas, IPv4 does not
1271                    allow to send ICMP, so that packets will disappear
1272                    to blackhole.
1273                  */
1274
1275                 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1276                 ip_rt_put(rt);
1277                 goto out_free;
1278         }
1279
1280         encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
1281
1282         if (skb_cow(skb, encap)) {
1283                 ip_rt_put(rt);
1284                 goto out_free;
1285         }
1286
1287         vif->pkt_out++;
1288         vif->bytes_out += skb->len;
1289
1290         skb_dst_drop(skb);
1291         skb_dst_set(skb, &rt->u.dst);
1292         ip_decrease_ttl(ip_hdr(skb));
1293
1294         /* FIXME: forward and output firewalls used to be called here.
1295          * What do we do with netfilter? -- RR */
1296         if (vif->flags & VIFF_TUNNEL) {
1297                 ip_encap(skb, vif->local, vif->remote);
1298                 /* FIXME: extra output firewall step used to be here. --RR */
1299                 vif->dev->stats.tx_packets++;
1300                 vif->dev->stats.tx_bytes += skb->len;
1301         }
1302
1303         IPCB(skb)->flags |= IPSKB_FORWARDED;
1304
1305         /*
1306          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1307          * not only before forwarding, but after forwarding on all output
1308          * interfaces. It is clear, if mrouter runs a multicasting
1309          * program, it should receive packets not depending to what interface
1310          * program is joined.
1311          * If we will not make it, the program will have to join on all
1312          * interfaces. On the other hand, multihoming host (or router, but
1313          * not mrouter) cannot join to more than one interface - it will
1314          * result in receiving multiple packets.
1315          */
1316         NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
1317                 ipmr_forward_finish);
1318         return;
1319
1320 out_free:
1321         kfree_skb(skb);
1322         return;
1323 }
1324
1325 static int ipmr_find_vif(struct net_device *dev)
1326 {
1327         struct net *net = dev_net(dev);
1328         int ct;
1329         for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1330                 if (net->ipv4.vif_table[ct].dev == dev)
1331                         break;
1332         }
1333         return ct;
1334 }
1335
1336 /* "local" means that we should preserve one skb (for local delivery) */
1337
1338 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1339 {
1340         int psend = -1;
1341         int vif, ct;
1342         struct net *net = mfc_net(cache);
1343
1344         vif = cache->mfc_parent;
1345         cache->mfc_un.res.pkt++;
1346         cache->mfc_un.res.bytes += skb->len;
1347
1348         /*
1349          * Wrong interface: drop packet and (maybe) send PIM assert.
1350          */
1351         if (net->ipv4.vif_table[vif].dev != skb->dev) {
1352                 int true_vifi;
1353
1354                 if (skb_rtable(skb)->fl.iif == 0) {
1355                         /* It is our own packet, looped back.
1356                            Very complicated situation...
1357
1358                            The best workaround until routing daemons will be
1359                            fixed is not to redistribute packet, if it was
1360                            send through wrong interface. It means, that
1361                            multicast applications WILL NOT work for
1362                            (S,G), which have default multicast route pointing
1363                            to wrong oif. In any case, it is not a good
1364                            idea to use multicasting applications on router.
1365                          */
1366                         goto dont_forward;
1367                 }
1368
1369                 cache->mfc_un.res.wrong_if++;
1370                 true_vifi = ipmr_find_vif(skb->dev);
1371
1372                 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1373                     /* pimsm uses asserts, when switching from RPT to SPT,
1374                        so that we cannot check that packet arrived on an oif.
1375                        It is bad, but otherwise we would need to move pretty
1376                        large chunk of pimd to kernel. Ough... --ANK
1377                      */
1378                     (net->ipv4.mroute_do_pim ||
1379                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
1380                     time_after(jiffies,
1381                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1382                         cache->mfc_un.res.last_assert = jiffies;
1383                         ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1384                 }
1385                 goto dont_forward;
1386         }
1387
1388         net->ipv4.vif_table[vif].pkt_in++;
1389         net->ipv4.vif_table[vif].bytes_in += skb->len;
1390
1391         /*
1392          *      Forward the frame
1393          */
1394         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1395                 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1396                         if (psend != -1) {
1397                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1398                                 if (skb2)
1399                                         ipmr_queue_xmit(skb2, cache, psend);
1400                         }
1401                         psend = ct;
1402                 }
1403         }
1404         if (psend != -1) {
1405                 if (local) {
1406                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1407                         if (skb2)
1408                                 ipmr_queue_xmit(skb2, cache, psend);
1409                 } else {
1410                         ipmr_queue_xmit(skb, cache, psend);
1411                         return 0;
1412                 }
1413         }
1414
1415 dont_forward:
1416         if (!local)
1417                 kfree_skb(skb);
1418         return 0;
1419 }
1420
1421
1422 /*
1423  *      Multicast packets for forwarding arrive here
1424  */
1425
1426 int ip_mr_input(struct sk_buff *skb)
1427 {
1428         struct mfc_cache *cache;
1429         struct net *net = dev_net(skb->dev);
1430         int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1431
1432         /* Packet is looped back after forward, it should not be
1433            forwarded second time, but still can be delivered locally.
1434          */
1435         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1436                 goto dont_forward;
1437
1438         if (!local) {
1439                     if (IPCB(skb)->opt.router_alert) {
1440                             if (ip_call_ra_chain(skb))
1441                                     return 0;
1442                     } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
1443                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1444                                Cisco IOS <= 11.2(8)) do not put router alert
1445                                option to IGMP packets destined to routable
1446                                groups. It is very bad, because it means
1447                                that we can forward NO IGMP messages.
1448                              */
1449                             read_lock(&mrt_lock);
1450                             if (net->ipv4.mroute_sk) {
1451                                     nf_reset(skb);
1452                                     raw_rcv(net->ipv4.mroute_sk, skb);
1453                                     read_unlock(&mrt_lock);
1454                                     return 0;
1455                             }
1456                             read_unlock(&mrt_lock);
1457                     }
1458         }
1459
1460         read_lock(&mrt_lock);
1461         cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1462
1463         /*
1464          *      No usable cache entry
1465          */
1466         if (cache == NULL) {
1467                 int vif;
1468
1469                 if (local) {
1470                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1471                         ip_local_deliver(skb);
1472                         if (skb2 == NULL) {
1473                                 read_unlock(&mrt_lock);
1474                                 return -ENOBUFS;
1475                         }
1476                         skb = skb2;
1477                 }
1478
1479                 vif = ipmr_find_vif(skb->dev);
1480                 if (vif >= 0) {
1481                         int err = ipmr_cache_unresolved(net, vif, skb);
1482                         read_unlock(&mrt_lock);
1483
1484                         return err;
1485                 }
1486                 read_unlock(&mrt_lock);
1487                 kfree_skb(skb);
1488                 return -ENODEV;
1489         }
1490
1491         ip_mr_forward(skb, cache, local);
1492
1493         read_unlock(&mrt_lock);
1494
1495         if (local)
1496                 return ip_local_deliver(skb);
1497
1498         return 0;
1499
1500 dont_forward:
1501         if (local)
1502                 return ip_local_deliver(skb);
1503         kfree_skb(skb);
1504         return 0;
1505 }
1506
1507 #ifdef CONFIG_IP_PIMSM
1508 static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1509 {
1510         struct net_device *reg_dev = NULL;
1511         struct iphdr *encap;
1512         struct net *net = dev_net(skb->dev);
1513
1514         encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1515         /*
1516            Check that:
1517            a. packet is really destinted to a multicast group
1518            b. packet is not a NULL-REGISTER
1519            c. packet is not truncated
1520          */
1521         if (!ipv4_is_multicast(encap->daddr) ||
1522             encap->tot_len == 0 ||
1523             ntohs(encap->tot_len) + pimlen > skb->len)
1524                 return 1;
1525
1526         read_lock(&mrt_lock);
1527         if (net->ipv4.mroute_reg_vif_num >= 0)
1528                 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1529         if (reg_dev)
1530                 dev_hold(reg_dev);
1531         read_unlock(&mrt_lock);
1532
1533         if (reg_dev == NULL)
1534                 return 1;
1535
1536         skb->mac_header = skb->network_header;
1537         skb_pull(skb, (u8*)encap - skb->data);
1538         skb_reset_network_header(skb);
1539         skb->dev = reg_dev;
1540         skb->protocol = htons(ETH_P_IP);
1541         skb->ip_summed = 0;
1542         skb->pkt_type = PACKET_HOST;
1543         skb_dst_drop(skb);
1544         reg_dev->stats.rx_bytes += skb->len;
1545         reg_dev->stats.rx_packets++;
1546         nf_reset(skb);
1547         netif_rx(skb);
1548         dev_put(reg_dev);
1549
1550         return 0;
1551 }
1552 #endif
1553
1554 #ifdef CONFIG_IP_PIMSM_V1
1555 /*
1556  * Handle IGMP messages of PIMv1
1557  */
1558
1559 int pim_rcv_v1(struct sk_buff * skb)
1560 {
1561         struct igmphdr *pim;
1562         struct net *net = dev_net(skb->dev);
1563
1564         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1565                 goto drop;
1566
1567         pim = igmp_hdr(skb);
1568
1569         if (!net->ipv4.mroute_do_pim ||
1570             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1571                 goto drop;
1572
1573         if (__pim_rcv(skb, sizeof(*pim))) {
1574 drop:
1575                 kfree_skb(skb);
1576         }
1577         return 0;
1578 }
1579 #endif
1580
1581 #ifdef CONFIG_IP_PIMSM_V2
1582 static int pim_rcv(struct sk_buff * skb)
1583 {
1584         struct pimreghdr *pim;
1585
1586         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1587                 goto drop;
1588
1589         pim = (struct pimreghdr *)skb_transport_header(skb);
1590         if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1591             (pim->flags&PIM_NULL_REGISTER) ||
1592             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1593              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1594                 goto drop;
1595
1596         if (__pim_rcv(skb, sizeof(*pim))) {
1597 drop:
1598                 kfree_skb(skb);
1599         }
1600         return 0;
1601 }
1602 #endif
1603
1604 static int
1605 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1606 {
1607         int ct;
1608         struct rtnexthop *nhp;
1609         struct net *net = mfc_net(c);
1610         u8 *b = skb_tail_pointer(skb);
1611         struct rtattr *mp_head;
1612
1613         /* If cache is unresolved, don't try to parse IIF and OIF */
1614         if (c->mfc_parent > MAXVIFS)
1615                 return -ENOENT;
1616
1617         if (VIF_EXISTS(net, c->mfc_parent))
1618                 RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
1619
1620         mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1621
1622         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1623                 if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
1624                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1625                                 goto rtattr_failure;
1626                         nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1627                         nhp->rtnh_flags = 0;
1628                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1629                         nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1630                         nhp->rtnh_len = sizeof(*nhp);
1631                 }
1632         }
1633         mp_head->rta_type = RTA_MULTIPATH;
1634         mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1635         rtm->rtm_type = RTN_MULTICAST;
1636         return 1;
1637
1638 rtattr_failure:
1639         nlmsg_trim(skb, b);
1640         return -EMSGSIZE;
1641 }
1642
1643 int ipmr_get_route(struct net *net,
1644                    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1645 {
1646         int err;
1647         struct mfc_cache *cache;
1648         struct rtable *rt = skb_rtable(skb);
1649
1650         read_lock(&mrt_lock);
1651         cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1652
1653         if (cache == NULL) {
1654                 struct sk_buff *skb2;
1655                 struct iphdr *iph;
1656                 struct net_device *dev;
1657                 int vif;
1658
1659                 if (nowait) {
1660                         read_unlock(&mrt_lock);
1661                         return -EAGAIN;
1662                 }
1663
1664                 dev = skb->dev;
1665                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1666                         read_unlock(&mrt_lock);
1667                         return -ENODEV;
1668                 }
1669                 skb2 = skb_clone(skb, GFP_ATOMIC);
1670                 if (!skb2) {
1671                         read_unlock(&mrt_lock);
1672                         return -ENOMEM;
1673                 }
1674
1675                 skb_push(skb2, sizeof(struct iphdr));
1676                 skb_reset_network_header(skb2);
1677                 iph = ip_hdr(skb2);
1678                 iph->ihl = sizeof(struct iphdr) >> 2;
1679                 iph->saddr = rt->rt_src;
1680                 iph->daddr = rt->rt_dst;
1681                 iph->version = 0;
1682                 err = ipmr_cache_unresolved(net, vif, skb2);
1683                 read_unlock(&mrt_lock);
1684                 return err;
1685         }
1686
1687         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1688                 cache->mfc_flags |= MFC_NOTIFY;
1689         err = ipmr_fill_mroute(skb, cache, rtm);
1690         read_unlock(&mrt_lock);
1691         return err;
1692 }
1693
1694 #ifdef CONFIG_PROC_FS
1695 /*
1696  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1697  */
1698 struct ipmr_vif_iter {
1699         struct seq_net_private p;
1700         int ct;
1701 };
1702
1703 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1704                                            struct ipmr_vif_iter *iter,
1705                                            loff_t pos)
1706 {
1707         for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1708                 if (!VIF_EXISTS(net, iter->ct))
1709                         continue;
1710                 if (pos-- == 0)
1711                         return &net->ipv4.vif_table[iter->ct];
1712         }
1713         return NULL;
1714 }
1715
1716 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1717         __acquires(mrt_lock)
1718 {
1719         struct net *net = seq_file_net(seq);
1720
1721         read_lock(&mrt_lock);
1722         return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1723                 : SEQ_START_TOKEN;
1724 }
1725
1726 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1727 {
1728         struct ipmr_vif_iter *iter = seq->private;
1729         struct net *net = seq_file_net(seq);
1730
1731         ++*pos;
1732         if (v == SEQ_START_TOKEN)
1733                 return ipmr_vif_seq_idx(net, iter, 0);
1734
1735         while (++iter->ct < net->ipv4.maxvif) {
1736                 if (!VIF_EXISTS(net, iter->ct))
1737                         continue;
1738                 return &net->ipv4.vif_table[iter->ct];
1739         }
1740         return NULL;
1741 }
1742
1743 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1744         __releases(mrt_lock)
1745 {
1746         read_unlock(&mrt_lock);
1747 }
1748
1749 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1750 {
1751         struct net *net = seq_file_net(seq);
1752
1753         if (v == SEQ_START_TOKEN) {
1754                 seq_puts(seq,
1755                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1756         } else {
1757                 const struct vif_device *vif = v;
1758                 const char *name =  vif->dev ? vif->dev->name : "none";
1759
1760                 seq_printf(seq,
1761                            "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1762                            vif - net->ipv4.vif_table,
1763                            name, vif->bytes_in, vif->pkt_in,
1764                            vif->bytes_out, vif->pkt_out,
1765                            vif->flags, vif->local, vif->remote);
1766         }
1767         return 0;
1768 }
1769
1770 static const struct seq_operations ipmr_vif_seq_ops = {
1771         .start = ipmr_vif_seq_start,
1772         .next  = ipmr_vif_seq_next,
1773         .stop  = ipmr_vif_seq_stop,
1774         .show  = ipmr_vif_seq_show,
1775 };
1776
1777 static int ipmr_vif_open(struct inode *inode, struct file *file)
1778 {
1779         return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1780                             sizeof(struct ipmr_vif_iter));
1781 }
1782
1783 static const struct file_operations ipmr_vif_fops = {
1784         .owner   = THIS_MODULE,
1785         .open    = ipmr_vif_open,
1786         .read    = seq_read,
1787         .llseek  = seq_lseek,
1788         .release = seq_release_net,
1789 };
1790
1791 struct ipmr_mfc_iter {
1792         struct seq_net_private p;
1793         struct mfc_cache **cache;
1794         int ct;
1795 };
1796
1797
1798 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1799                                           struct ipmr_mfc_iter *it, loff_t pos)
1800 {
1801         struct mfc_cache *mfc;
1802
1803         it->cache = net->ipv4.mfc_cache_array;
1804         read_lock(&mrt_lock);
1805         for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1806                 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1807                      mfc; mfc = mfc->next)
1808                         if (pos-- == 0)
1809                                 return mfc;
1810         read_unlock(&mrt_lock);
1811
1812         it->cache = &net->ipv4.mfc_unres_queue;
1813         spin_lock_bh(&mfc_unres_lock);
1814         for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
1815                 if (pos-- == 0)
1816                         return mfc;
1817         spin_unlock_bh(&mfc_unres_lock);
1818
1819         it->cache = NULL;
1820         return NULL;
1821 }
1822
1823
1824 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1825 {
1826         struct ipmr_mfc_iter *it = seq->private;
1827         struct net *net = seq_file_net(seq);
1828
1829         it->cache = NULL;
1830         it->ct = 0;
1831         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1832                 : SEQ_START_TOKEN;
1833 }
1834
1835 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1836 {
1837         struct mfc_cache *mfc = v;
1838         struct ipmr_mfc_iter *it = seq->private;
1839         struct net *net = seq_file_net(seq);
1840
1841         ++*pos;
1842
1843         if (v == SEQ_START_TOKEN)
1844                 return ipmr_mfc_seq_idx(net, seq->private, 0);
1845
1846         if (mfc->next)
1847                 return mfc->next;
1848
1849         if (it->cache == &net->ipv4.mfc_unres_queue)
1850                 goto end_of_list;
1851
1852         BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1853
1854         while (++it->ct < MFC_LINES) {
1855                 mfc = net->ipv4.mfc_cache_array[it->ct];
1856                 if (mfc)
1857                         return mfc;
1858         }
1859
1860         /* exhausted cache_array, show unresolved */
1861         read_unlock(&mrt_lock);
1862         it->cache = &net->ipv4.mfc_unres_queue;
1863         it->ct = 0;
1864
1865         spin_lock_bh(&mfc_unres_lock);
1866         mfc = net->ipv4.mfc_unres_queue;
1867         if (mfc)
1868                 return mfc;
1869
1870  end_of_list:
1871         spin_unlock_bh(&mfc_unres_lock);
1872         it->cache = NULL;
1873
1874         return NULL;
1875 }
1876
1877 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1878 {
1879         struct ipmr_mfc_iter *it = seq->private;
1880         struct net *net = seq_file_net(seq);
1881
1882         if (it->cache == &net->ipv4.mfc_unres_queue)
1883                 spin_unlock_bh(&mfc_unres_lock);
1884         else if (it->cache == net->ipv4.mfc_cache_array)
1885                 read_unlock(&mrt_lock);
1886 }
1887
1888 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1889 {
1890         int n;
1891         struct net *net = seq_file_net(seq);
1892
1893         if (v == SEQ_START_TOKEN) {
1894                 seq_puts(seq,
1895                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1896         } else {
1897                 const struct mfc_cache *mfc = v;
1898                 const struct ipmr_mfc_iter *it = seq->private;
1899
1900                 seq_printf(seq, "%08lX %08lX %-3hd",
1901                            (unsigned long) mfc->mfc_mcastgrp,
1902                            (unsigned long) mfc->mfc_origin,
1903                            mfc->mfc_parent);
1904
1905                 if (it->cache != &net->ipv4.mfc_unres_queue) {
1906                         seq_printf(seq, " %8lu %8lu %8lu",
1907                                    mfc->mfc_un.res.pkt,
1908                                    mfc->mfc_un.res.bytes,
1909                                    mfc->mfc_un.res.wrong_if);
1910                         for (n = mfc->mfc_un.res.minvif;
1911                              n < mfc->mfc_un.res.maxvif; n++ ) {
1912                                 if (VIF_EXISTS(net, n) &&
1913                                     mfc->mfc_un.res.ttls[n] < 255)
1914                                         seq_printf(seq,
1915                                            " %2d:%-3d",
1916                                            n, mfc->mfc_un.res.ttls[n]);
1917                         }
1918                 } else {
1919                         /* unresolved mfc_caches don't contain
1920                          * pkt, bytes and wrong_if values
1921                          */
1922                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
1923                 }
1924                 seq_putc(seq, '\n');
1925         }
1926         return 0;
1927 }
1928
1929 static const struct seq_operations ipmr_mfc_seq_ops = {
1930         .start = ipmr_mfc_seq_start,
1931         .next  = ipmr_mfc_seq_next,
1932         .stop  = ipmr_mfc_seq_stop,
1933         .show  = ipmr_mfc_seq_show,
1934 };
1935
1936 static int ipmr_mfc_open(struct inode *inode, struct file *file)
1937 {
1938         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1939                             sizeof(struct ipmr_mfc_iter));
1940 }
1941
1942 static const struct file_operations ipmr_mfc_fops = {
1943         .owner   = THIS_MODULE,
1944         .open    = ipmr_mfc_open,
1945         .read    = seq_read,
1946         .llseek  = seq_lseek,
1947         .release = seq_release_net,
1948 };
1949 #endif
1950
1951 #ifdef CONFIG_IP_PIMSM_V2
1952 static const struct net_protocol pim_protocol = {
1953         .handler        =       pim_rcv,
1954         .netns_ok       =       1,
1955 };
1956 #endif
1957
1958
1959 /*
1960  *      Setup for IP multicast routing
1961  */
1962 static int __net_init ipmr_net_init(struct net *net)
1963 {
1964         int err = 0;
1965
1966         net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1967                                       GFP_KERNEL);
1968         if (!net->ipv4.vif_table) {
1969                 err = -ENOMEM;
1970                 goto fail;
1971         }
1972
1973         /* Forwarding cache */
1974         net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1975                                             sizeof(struct mfc_cache *),
1976                                             GFP_KERNEL);
1977         if (!net->ipv4.mfc_cache_array) {
1978                 err = -ENOMEM;
1979                 goto fail_mfc_cache;
1980         }
1981
1982         setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
1983                     (unsigned long)net);
1984
1985 #ifdef CONFIG_IP_PIMSM
1986         net->ipv4.mroute_reg_vif_num = -1;
1987 #endif
1988
1989 #ifdef CONFIG_PROC_FS
1990         err = -ENOMEM;
1991         if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1992                 goto proc_vif_fail;
1993         if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1994                 goto proc_cache_fail;
1995 #endif
1996         return 0;
1997
1998 #ifdef CONFIG_PROC_FS
1999 proc_cache_fail:
2000         proc_net_remove(net, "ip_mr_vif");
2001 proc_vif_fail:
2002         kfree(net->ipv4.mfc_cache_array);
2003 #endif
2004 fail_mfc_cache:
2005         kfree(net->ipv4.vif_table);
2006 fail:
2007         return err;
2008 }
2009
2010 static void __net_exit ipmr_net_exit(struct net *net)
2011 {
2012 #ifdef CONFIG_PROC_FS
2013         proc_net_remove(net, "ip_mr_cache");
2014         proc_net_remove(net, "ip_mr_vif");
2015 #endif
2016         kfree(net->ipv4.mfc_cache_array);
2017         kfree(net->ipv4.vif_table);
2018 }
2019
2020 static struct pernet_operations ipmr_net_ops = {
2021         .init = ipmr_net_init,
2022         .exit = ipmr_net_exit,
2023 };
2024
2025 int __init ip_mr_init(void)
2026 {
2027         int err;
2028
2029         mrt_cachep = kmem_cache_create("ip_mrt_cache",
2030                                        sizeof(struct mfc_cache),
2031                                        0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2032                                        NULL);
2033         if (!mrt_cachep)
2034                 return -ENOMEM;
2035
2036         err = register_pernet_subsys(&ipmr_net_ops);
2037         if (err)
2038                 goto reg_pernet_fail;
2039
2040         err = register_netdevice_notifier(&ip_mr_notifier);
2041         if (err)
2042                 goto reg_notif_fail;
2043 #ifdef CONFIG_IP_PIMSM_V2
2044         if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2045                 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2046                 err = -EAGAIN;
2047                 goto add_proto_fail;
2048         }
2049 #endif
2050         return 0;
2051
2052 #ifdef CONFIG_IP_PIMSM_V2
2053 add_proto_fail:
2054         unregister_netdevice_notifier(&ip_mr_notifier);
2055 #endif
2056 reg_notif_fail:
2057         unregister_pernet_subsys(&ipmr_net_ops);
2058 reg_pernet_fail:
2059         kmem_cache_destroy(mrt_cachep);
2060         return err;
2061 }