IPv6: fix race between cleanup and add/delete address
[pandora-kernel.git] / net / ipv6 / addrconf.c
index de7a194..6cf3ee1 100644 (file)
@@ -278,31 +278,31 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 
 static int snmp6_alloc_dev(struct inet6_dev *idev)
 {
-       if (snmp_mib_init((void **)idev->stats.ipv6,
+       if (snmp_mib_init((void __percpu **)idev->stats.ipv6,
                          sizeof(struct ipstats_mib)) < 0)
                goto err_ip;
-       if (snmp_mib_init((void **)idev->stats.icmpv6,
+       if (snmp_mib_init((void __percpu **)idev->stats.icmpv6,
                          sizeof(struct icmpv6_mib)) < 0)
                goto err_icmp;
-       if (snmp_mib_init((void **)idev->stats.icmpv6msg,
+       if (snmp_mib_init((void __percpu **)idev->stats.icmpv6msg,
                          sizeof(struct icmpv6msg_mib)) < 0)
                goto err_icmpmsg;
 
        return 0;
 
 err_icmpmsg:
-       snmp_mib_free((void **)idev->stats.icmpv6);
+       snmp_mib_free((void __percpu **)idev->stats.icmpv6);
 err_icmp:
-       snmp_mib_free((void **)idev->stats.ipv6);
+       snmp_mib_free((void __percpu **)idev->stats.ipv6);
 err_ip:
        return -ENOMEM;
 }
 
 static void snmp6_free_dev(struct inet6_dev *idev)
 {
-       snmp_mib_free((void **)idev->stats.icmpv6msg);
-       snmp_mib_free((void **)idev->stats.icmpv6);
-       snmp_mib_free((void **)idev->stats.ipv6);
+       snmp_mib_free((void __percpu **)idev->stats.icmpv6msg);
+       snmp_mib_free((void __percpu **)idev->stats.icmpv6);
+       snmp_mib_free((void __percpu **)idev->stats.ipv6);
 }
 
 /* Nobody refers to this device, we may destroy it. */
@@ -502,8 +502,11 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
        if (p == &net->ipv6.devconf_dflt->forwarding)
                return 0;
 
-       if (!rtnl_trylock())
+       if (!rtnl_trylock()) {
+               /* Restore the original values before restarting */
+               *p = old;
                return restart_syscall();
+       }
 
        if (p == &net->ipv6.devconf_all->forwarding) {
                __s32 newf = net->ipv6.devconf_all->forwarding;
@@ -989,8 +992,7 @@ struct ipv6_saddr_dst {
 
 static inline int ipv6_saddr_preferred(int type)
 {
-       if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|
-                   IPV6_ADDR_LOOPBACK|IPV6_ADDR_RESERVED))
+       if (type & (IPV6_ADDR_MAPPED|IPV6_ADDR_COMPATv4|IPV6_ADDR_LOOPBACK))
                return 1;
        return 0;
 }
@@ -2613,7 +2615,7 @@ static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
        struct inet6_dev *idev;
-       struct inet6_ifaddr *ifa, **bifa;
+       struct inet6_ifaddr *ifa, *keep_list, **bifa;
        struct net *net = dev_net(dev);
        int i;
 
@@ -2646,11 +2648,12 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
                write_lock_bh(&addrconf_hash_lock);
                while ((ifa = *bifa) != NULL) {
-                       if (ifa->idev == idev) {
+                       if (ifa->idev == idev &&
+                           (how || !(ifa->flags&IFA_F_PERMANENT) ||
+                            ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
                                *bifa = ifa->lst_next;
                                ifa->lst_next = NULL;
-                               addrconf_del_timer(ifa);
-                               in6_ifa_put(ifa);
+                               __in6_ifa_put(ifa);
                                continue;
                        }
                        bifa = &ifa->lst_next;
@@ -2686,11 +2689,40 @@ static int addrconf_ifdown(struct net_device *dev, int how)
                write_lock_bh(&idev->lock);
        }
 #endif
+       keep_list = NULL;
+       bifa = &keep_list;
        while ((ifa = idev->addr_list) != NULL) {
                idev->addr_list = ifa->if_next;
                ifa->if_next = NULL;
-               ifa->dead = 1;
+
                addrconf_del_timer(ifa);
+
+               /* If just doing link down, and address is permanent
+                  and not link-local, then retain it. */
+               if (how == 0 &&
+                   (ifa->flags&IFA_F_PERMANENT) &&
+                   !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
+
+                       /* Move to holding list */
+                       *bifa = ifa;
+                       bifa = &ifa->if_next;
+
+                       /* If not doing DAD on this address, just keep it. */
+                       if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
+                           idev->cnf.accept_dad <= 0 ||
+                           (ifa->flags & IFA_F_NODAD))
+                               continue;
+
+                       /* If it was tentative already, no need to notify */
+                       if (ifa->flags & IFA_F_TENTATIVE)
+                               continue;
+
+                       /* Flag it for later restoration when link comes up */
+                       ifa->flags |= IFA_F_TENTATIVE;
+                       in6_ifa_hold(ifa);
+               } else {
+                       ifa->dead = 1;
+               }
                write_unlock_bh(&idev->lock);
 
                __ipv6_ifa_notify(RTM_DELADDR, ifa);
@@ -2699,6 +2731,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
                write_lock_bh(&idev->lock);
        }
+
+       idev->addr_list = keep_list;
+
        write_unlock_bh(&idev->lock);
 
        /* Step 5: Discard multicast list */
@@ -2724,28 +2759,29 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 static void addrconf_rs_timer(unsigned long data)
 {
        struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+       struct inet6_dev *idev = ifp->idev;
 
-       if (ifp->idev->cnf.forwarding)
+       read_lock(&idev->lock);
+       if (idev->dead || !(idev->if_flags & IF_READY))
                goto out;
 
-       if (ifp->idev->if_flags & IF_RA_RCVD) {
-               /*
-                *      Announcement received after solicitation
-                *      was sent
-                */
+       if (idev->cnf.forwarding)
+               goto out;
+
+       /* Announcement received after solicitation was sent */
+       if (idev->if_flags & IF_RA_RCVD)
                goto out;
-       }
 
        spin_lock(&ifp->lock);
-       if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
+       if (ifp->probes++ < idev->cnf.rtr_solicits) {
                /* The wait after the last probe can be shorter */
                addrconf_mod_timer(ifp, AC_RS,
-                                  (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
-                                  ifp->idev->cnf.rtr_solicit_delay :
-                                  ifp->idev->cnf.rtr_solicit_interval);
+                                  (ifp->probes == idev->cnf.rtr_solicits) ?
+                                  idev->cnf.rtr_solicit_delay :
+                                  idev->cnf.rtr_solicit_interval);
                spin_unlock(&ifp->lock);
 
-               ndisc_send_rs(ifp->idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
+               ndisc_send_rs(idev->dev, &ifp->addr, &in6addr_linklocal_allrouters);
        } else {
                spin_unlock(&ifp->lock);
                /*
@@ -2753,10 +2789,11 @@ static void addrconf_rs_timer(unsigned long data)
                 * assumption any longer.
                 */
                printk(KERN_DEBUG "%s: no IPv6 routers present\n",
-                      ifp->idev->dev->name);
+                      idev->dev->name);
        }
 
 out:
+       read_unlock(&idev->lock);
        in6_ifa_put(ifp);
 }
 
@@ -2789,14 +2826,14 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
        read_lock_bh(&idev->lock);
        if (ifp->dead)
                goto out;
-       spin_lock_bh(&ifp->lock);
 
+       spin_lock(&ifp->lock);
        if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
            idev->cnf.accept_dad < 1 ||
            !(ifp->flags&IFA_F_TENTATIVE) ||
            ifp->flags & IFA_F_NODAD) {
                ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
-               spin_unlock_bh(&ifp->lock);
+               spin_unlock(&ifp->lock);
                read_unlock_bh(&idev->lock);
 
                addrconf_dad_completed(ifp);
@@ -2804,7 +2841,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
        }
 
        if (!(idev->if_flags & IF_READY)) {
-               spin_unlock_bh(&ifp->lock);
+               spin_unlock(&ifp->lock);
                read_unlock_bh(&idev->lock);
                /*
                 * If the device is not ready:
@@ -2824,7 +2861,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
                ip6_ins_rt(ifp->rt);
 
        addrconf_dad_kick(ifp);
-       spin_unlock_bh(&ifp->lock);
+       spin_unlock(&ifp->lock);
 out:
        read_unlock_bh(&idev->lock);
 }
@@ -2835,20 +2872,21 @@ static void addrconf_dad_timer(unsigned long data)
        struct inet6_dev *idev = ifp->idev;
        struct in6_addr mcaddr;
 
-       read_lock_bh(&idev->lock);
-       if (idev->dead) {
-               read_unlock_bh(&idev->lock);
+       read_lock(&idev->lock);
+       if (idev->dead || !(idev->if_flags & IF_READY)) {
+               read_unlock(&idev->lock);
                goto out;
        }
-       spin_lock_bh(&ifp->lock);
+
+       spin_lock(&ifp->lock);
        if (ifp->probes == 0) {
                /*
                 * DAD was successful
                 */
 
                ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
-               spin_unlock_bh(&ifp->lock);
-               read_unlock_bh(&idev->lock);
+               spin_unlock(&ifp->lock);
+               read_unlock(&idev->lock);
 
                addrconf_dad_completed(ifp);
 
@@ -2857,8 +2895,8 @@ static void addrconf_dad_timer(unsigned long data)
 
        ifp->probes--;
        addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
-       spin_unlock_bh(&ifp->lock);
-       read_unlock_bh(&idev->lock);
+       spin_unlock(&ifp->lock);
+       read_unlock(&idev->lock);
 
        /* send a neighbour solicitation for our addr */
        addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
@@ -2905,12 +2943,12 @@ static void addrconf_dad_run(struct inet6_dev *idev) {
 
        read_lock_bh(&idev->lock);
        for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
-               spin_lock_bh(&ifp->lock);
+               spin_lock(&ifp->lock);
                if (!(ifp->flags & IFA_F_TENTATIVE)) {
-                       spin_unlock_bh(&ifp->lock);
+                       spin_unlock(&ifp->lock);
                        continue;
                }
-               spin_unlock_bh(&ifp->lock);
+               spin_unlock(&ifp->lock);
                addrconf_dad_kick(ifp);
        }
        read_unlock_bh(&idev->lock);
@@ -3027,14 +3065,14 @@ static const struct file_operations if6_fops = {
        .release        = seq_release_net,
 };
 
-static int if6_proc_net_init(struct net *net)
+static int __net_init if6_proc_net_init(struct net *net)
 {
        if (!proc_net_fops_create(net, "if_inet6", S_IRUGO, &if6_fops))
                return -ENOMEM;
        return 0;
 }
 
-static void if6_proc_net_exit(struct net *net)
+static void __net_exit if6_proc_net_exit(struct net *net)
 {
        proc_net_remove(net, "if_inet6");
 }
@@ -3752,8 +3790,8 @@ static inline size_t inet6_if_nlmsg_size(void)
                 );
 }
 
-static inline void __snmp6_fill_stats(u64 *stats, void **mib, int items,
-                                     int bytes)
+static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
+                                     int items, int bytes)
 {
        int i;
        int pad = bytes - sizeof(u64) * items;
@@ -3772,10 +3810,10 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 {
        switch(attrtype) {
        case IFLA_INET6_STATS:
-               __snmp6_fill_stats(stats, (void **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
+               __snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
                break;
        case IFLA_INET6_ICMP6STATS:
-               __snmp6_fill_stats(stats, (void **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
+               __snmp6_fill_stats(stats, (void __percpu **)idev->stats.icmpv6, ICMP6_MIB_MAX, bytes);
                break;
        }
 }
@@ -4028,12 +4066,15 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write,
 {
        int *valp = ctl->data;
        int val = *valp;
+       loff_t pos = *ppos;
        int ret;
 
        ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write)
                ret = addrconf_fixup_forwarding(ctl, valp, val);
+       if (ret)
+               *ppos = pos;
        return ret;
 }
 
@@ -4075,8 +4116,11 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
        if (p == &net->ipv6.devconf_dflt->disable_ipv6)
                return 0;
 
-       if (!rtnl_trylock())
+       if (!rtnl_trylock()) {
+               /* Restore the original values before restarting */
+               *p = old;
                return restart_syscall();
+       }
 
        if (p == &net->ipv6.devconf_all->disable_ipv6) {
                __s32 newf = net->ipv6.devconf_all->disable_ipv6;
@@ -4095,12 +4139,15 @@ int addrconf_sysctl_disable(ctl_table *ctl, int write,
 {
        int *valp = ctl->data;
        int val = *valp;
+       loff_t pos = *ppos;
        int ret;
 
        ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
        if (write)
                ret = addrconf_disable_ipv6(ctl, valp, val);
+       if (ret)
+               *ppos = pos;
        return ret;
 }
 
@@ -4402,8 +4449,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)
 
 static void addrconf_sysctl_register(struct inet6_dev *idev)
 {
-       neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
-                             NET_IPV6_NEIGH, "ipv6",
+       neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6",
                              &ndisc_ifinfo_sysctl_change);
        __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
                                        idev, &idev->cnf);
@@ -4418,7 +4464,7 @@ static void addrconf_sysctl_unregister(struct inet6_dev *idev)
 
 #endif
 
-static int addrconf_init_net(struct net *net)
+static int __net_init addrconf_init_net(struct net *net)
 {
        int err;
        struct ipv6_devconf *all, *dflt;
@@ -4467,7 +4513,7 @@ err_alloc_all:
        return err;
 }
 
-static void addrconf_exit_net(struct net *net)
+static void __net_exit addrconf_exit_net(struct net *net)
 {
 #ifdef CONFIG_SYSCTL
        __addrconf_sysctl_unregister(net->ipv6.devconf_dflt);