Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[pandora-kernel.git] / drivers / net / bonding / bond_main.c
index 3eebfe2..423298c 100644 (file)
@@ -98,6 +98,7 @@ static char *xmit_hash_policy = NULL;
 static int arp_interval = BOND_LINK_ARP_INTERV;
 static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, };
 static char *arp_validate = NULL;
+static int fail_over_mac = 0;
 struct bond_params bonding_defaults;
 
 module_param(max_bonds, int, 0);
@@ -131,6 +132,8 @@ module_param_array(arp_ip_target, charp, NULL, 0);
 MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
 module_param(arp_validate, charp, 0);
 MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all");
+module_param(fail_over_mac, int, 0);
+MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC.  0 of off (default), 1 for on.");
 
 /*----------------------------- Global variables ----------------------------*/
 
@@ -185,6 +188,7 @@ struct bond_parm_tbl arp_validate_tbl[] = {
 /*-------------------------- Forward declarations ---------------------------*/
 
 static void bond_send_gratuitous_arp(struct bonding *bond);
+static void bond_deinit(struct net_device *bond_dev);
 
 /*---------------------------- General routines -----------------------------*/
 
@@ -1100,11 +1104,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
                /* when bonding does not set the slave MAC address, the bond MAC
                 * address is the one of the active slave.
                 */
-               if (new_active && !bond->do_set_mac_addr)
+               if (new_active && bond->params.fail_over_mac)
                        memcpy(bond->dev->dev_addr,  new_active->dev->dev_addr,
                                new_active->dev->addr_len);
-
-               bond_send_gratuitous_arp(bond);
+               if (bond->curr_active_slave &&
+                       test_bit(__LINK_STATE_LINKWATCH_PENDING,
+                                       &bond->curr_active_slave->dev->state)) {
+                       dprintk("delaying gratuitous arp on %s\n",
+                               bond->curr_active_slave->dev->name);
+                       bond->send_grat_arp = 1;
+               } else
+                       bond_send_gratuitous_arp(bond);
        }
 }
 
@@ -1225,7 +1235,8 @@ static int bond_compute_features(struct bonding *bond)
        struct slave *slave;
        struct net_device *bond_dev = bond->dev;
        unsigned long features = bond_dev->features;
-       unsigned short max_hard_header_len = ETH_HLEN;
+       unsigned short max_hard_header_len = max((u16)ETH_HLEN,
+                                               bond_dev->hard_header_len);
        int i;
 
        features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES);
@@ -1250,7 +1261,10 @@ static int bond_compute_features(struct bonding *bond)
 static void bond_setup_by_slave(struct net_device *bond_dev,
                                struct net_device *slave_dev)
 {
+       struct bonding *bond = bond_dev->priv;
+
        bond_dev->neigh_setup           = slave_dev->neigh_setup;
+       bond_dev->header_ops            = slave_dev->header_ops;
 
        bond_dev->type              = slave_dev->type;
        bond_dev->hard_header_len   = slave_dev->hard_header_len;
@@ -1258,6 +1272,7 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
 
        memcpy(bond_dev->broadcast, slave_dev->broadcast,
                slave_dev->addr_len);
+       bond->setup_by_slave = 1;
 }
 
 /* enslave device <slave> to bond device <master> */
@@ -1357,16 +1372,16 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
        if (slave_dev->set_mac_address == NULL) {
                if (bond->slave_cnt == 0) {
                        printk(KERN_WARNING DRV_NAME
-                               ": %s: Warning: The first slave device you "
-                               "specified does not support setting the MAC "
-                               "address. This bond MAC address would be that "
-                               "of the active slave.\n", bond_dev->name);
-                       bond->do_set_mac_addr = 0;
-               } else if (bond->do_set_mac_addr) {
+                              ": %s: Warning: The first slave device "
+                              "specified does not support setting the MAC "
+                              "address. Enabling the fail_over_mac option.",
+                              bond_dev->name);
+                       bond->params.fail_over_mac = 1;
+               } else if (!bond->params.fail_over_mac) {
                        printk(KERN_ERR DRV_NAME
-                               ": %s: Error: The slave device you specified "
-                               "does not support setting the MAC addres,."
-                               "but this bond uses this practice. \n"
+                               ": %s: Error: The slave device specified "
+                               "does not support setting the MAC address, "
+                               "but fail_over_mac is not enabled.\n"
                                , bond_dev->name);
                        res = -EOPNOTSUPP;
                        goto err_undo_flags;
@@ -1391,7 +1406,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
         */
        memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN);
 
-       if (bond->do_set_mac_addr) {
+       if (!bond->params.fail_over_mac) {
                /*
                 * Set slave to master's mac address.  The application already
                 * set the master's mac address to that of the first slave
@@ -1576,15 +1591,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
        case BOND_MODE_TLB:
        case BOND_MODE_ALB:
                new_slave->state = BOND_STATE_ACTIVE;
-               if ((!bond->curr_active_slave) &&
-                   (new_slave->link != BOND_LINK_DOWN)) {
-                       /* first slave or no active slave yet, and this link
-                        * is OK, so make this interface the active one
-                        */
-                       bond_change_active_slave(bond, new_slave);
-               } else {
-                       bond_set_slave_inactive_flags(new_slave);
-               }
+               bond_set_slave_inactive_flags(new_slave);
                break;
        default:
                dprintk("This slave is always active in trunk mode\n");
@@ -1627,7 +1634,7 @@ err_close:
        dev_close(slave_dev);
 
 err_restore_mac:
-       if (bond->do_set_mac_addr) {
+       if (!bond->params.fail_over_mac) {
                memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
                addr.sa_family = slave_dev->type;
                dev_set_mac_address(slave_dev, &addr);
@@ -1740,9 +1747,23 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
                bond_alb_deinit_slave(bond, slave);
        }
 
-       if (oldcurrent == slave)
+       if (oldcurrent == slave) {
+               /*
+                * Note that we hold RTNL over this sequence, so there
+                * is no concern that another slave add/remove event
+                * will interfere.
+                */
+               write_unlock_bh(&bond->lock);
+               read_lock(&bond->lock);
+               write_lock_bh(&bond->curr_slave_lock);
+
                bond_select_active_slave(bond);
 
+               write_unlock_bh(&bond->curr_slave_lock);
+               read_unlock(&bond->lock);
+               write_lock_bh(&bond->lock);
+       }
+
        if (bond->slave_cnt == 0) {
                bond_set_carrier(bond);
 
@@ -1804,7 +1825,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
        /* close slave before restoring its mac address */
        dev_close(slave_dev);
 
-       if (bond->do_set_mac_addr) {
+       if (!bond->params.fail_over_mac) {
                /* restore original ("permanent") mac address */
                memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
                addr.sa_family = slave_dev->type;
@@ -1820,6 +1841,35 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
        return 0;  /* deletion OK */
 }
 
+/*
+* Destroy a bonding device.
+* Must be under rtnl_lock when this function is called.
+*/
+void bond_destroy(struct bonding *bond)
+{
+       bond_deinit(bond->dev);
+       bond_destroy_sysfs_entry(bond);
+       unregister_netdevice(bond->dev);
+}
+
+/*
+* First release a slave and than destroy the bond if no more slaves iare left.
+* Must be under rtnl_lock when this function is called.
+*/
+int  bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev)
+{
+       struct bonding *bond = bond_dev->priv;
+       int ret;
+
+       ret = bond_release(bond_dev, slave_dev);
+       if ((ret == 0) && (bond->slave_cnt == 0)) {
+               printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n",
+                      bond_dev->name, bond_dev->name);
+               bond_destroy(bond);
+       }
+       return ret;
+}
+
 /*
  * This function releases all slaves.
  */
@@ -1896,7 +1946,7 @@ static int bond_release_all(struct net_device *bond_dev)
                /* close slave before restoring its mac address */
                dev_close(slave_dev);
 
-               if (bond->do_set_mac_addr) {
+               if (!bond->params.fail_over_mac) {
                        /* restore original ("permanent") mac address*/
                        memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN);
                        addr.sa_family = slave_dev->type;
@@ -1969,16 +2019,19 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
                return -EINVAL;
        }
 
-       write_lock_bh(&bond->lock);
+       read_lock(&bond->lock);
 
+       read_lock(&bond->curr_slave_lock);
        old_active = bond->curr_active_slave;
+       read_unlock(&bond->curr_slave_lock);
+
        new_active = bond_get_slave_by_dev(bond, slave_dev);
 
        /*
         * Changing to the current active: do nothing; return success.
         */
        if (new_active && (new_active == old_active)) {
-               write_unlock_bh(&bond->lock);
+               read_unlock(&bond->lock);
                return 0;
        }
 
@@ -1986,12 +2039,14 @@ static int bond_ioctl_change_active(struct net_device *bond_dev, struct net_devi
            (old_active) &&
            (new_active->link == BOND_LINK_UP) &&
            IS_UP(new_active->dev)) {
+               write_lock_bh(&bond->curr_slave_lock);
                bond_change_active_slave(bond, new_active);
+               write_unlock_bh(&bond->curr_slave_lock);
        } else {
                res = -EINVAL;
        }
 
-       write_unlock_bh(&bond->lock);
+       read_unlock(&bond->lock);
 
        return res;
 }
@@ -2003,9 +2058,9 @@ static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
        info->bond_mode = bond->params.mode;
        info->miimon = bond->params.miimon;
 
-       read_lock_bh(&bond->lock);
+       read_lock(&bond->lock);
        info->num_slaves = bond->slave_cnt;
-       read_unlock_bh(&bond->lock);
+       read_unlock(&bond->lock);
 
        return 0;
 }
@@ -2020,7 +2075,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
                return -ENODEV;
        }
 
-       read_lock_bh(&bond->lock);
+       read_lock(&bond->lock);
 
        bond_for_each_slave(bond, slave, i) {
                if (i == (int)info->slave_id) {
@@ -2029,7 +2084,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
                }
        }
 
-       read_unlock_bh(&bond->lock);
+       read_unlock(&bond->lock);
 
        if (found) {
                strcpy(info->slave_name, slave->dev->name);
@@ -2045,26 +2100,25 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
 
 /*-------------------------------- Monitoring -------------------------------*/
 
-/* this function is called regularly to monitor each slave's link. */
-void bond_mii_monitor(struct net_device *bond_dev)
+/*
+ * if !have_locks, return nonzero if a failover is necessary.  if
+ * have_locks, do whatever failover activities are needed.
+ *
+ * This is to separate the inspection and failover steps for locking
+ * purposes; failover requires rtnl, but acquiring it for every
+ * inspection is undesirable, so a wrapper first does inspection, and
+ * the acquires the necessary locks and calls again to perform
+ * failover if needed.  Since all locks are dropped, a complete
+ * restart is needed between calls.
+ */
+static int __bond_mii_monitor(struct bonding *bond, int have_locks)
 {
-       struct bonding *bond = bond_dev->priv;
        struct slave *slave, *oldcurrent;
        int do_failover = 0;
-       int delta_in_ticks;
        int i;
 
-       read_lock(&bond->lock);
-
-       delta_in_ticks = (bond->params.miimon * HZ) / 1000;
-
-       if (bond->kill_timers) {
+       if (bond->slave_cnt == 0)
                goto out;
-       }
-
-       if (bond->slave_cnt == 0) {
-               goto re_arm;
-       }
 
        /* we will try to read the link status of each of our slaves, and
         * set their IFF_RUNNING flag appropriately. For each slave not
@@ -2072,6 +2126,17 @@ void bond_mii_monitor(struct net_device *bond_dev)
         * program could monitor the link itself if needed.
         */
 
+       if (bond->send_grat_arp) {
+               if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING,
+                               &bond->curr_active_slave->dev->state))
+                       dprintk("Needs to send gratuitous arp but not yet\n");
+               else {
+                       dprintk("sending delayed gratuitous arp on on %s\n",
+                               bond->curr_active_slave->dev->name);
+                       bond_send_gratuitous_arp(bond);
+                       bond->send_grat_arp = 0;
+               }
+       }
        read_lock(&bond->curr_slave_lock);
        oldcurrent = bond->curr_active_slave;
        read_unlock(&bond->curr_slave_lock);
@@ -2087,7 +2152,11 @@ void bond_mii_monitor(struct net_device *bond_dev)
                switch (slave->link) {
                case BOND_LINK_UP:      /* the link was up */
                        if (link_state == BMSR_LSTATUS) {
-                               /* link stays up, nothing more to do */
+                               if (!oldcurrent) {
+                                       if (!have_locks)
+                                               return 1;
+                                       do_failover = 1;
+                               }
                                break;
                        } else { /* link going down */
                                slave->link  = BOND_LINK_FAIL;
@@ -2102,7 +2171,7 @@ void bond_mii_monitor(struct net_device *bond_dev)
                                               ": %s: link status down for %s "
                                               "interface %s, disabling it in "
                                               "%d ms.\n",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               IS_UP(slave_dev)
                                               ? ((bond->params.mode == BOND_MODE_ACTIVEBACKUP)
                                                  ? ((slave == oldcurrent)
@@ -2120,6 +2189,9 @@ void bond_mii_monitor(struct net_device *bond_dev)
                        if (link_state != BMSR_LSTATUS) {
                                /* link stays down */
                                if (slave->delay <= 0) {
+                                       if (!have_locks)
+                                               return 1;
+
                                        /* link down for too long time */
                                        slave->link = BOND_LINK_DOWN;
 
@@ -2135,7 +2207,7 @@ void bond_mii_monitor(struct net_device *bond_dev)
                                               ": %s: link status definitely "
                                               "down for interface %s, "
                                               "disabling it\n",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               slave_dev->name);
 
                                        /* notify ad that the link status has changed */
@@ -2161,7 +2233,7 @@ void bond_mii_monitor(struct net_device *bond_dev)
                                printk(KERN_INFO DRV_NAME
                                       ": %s: link status up again after %d "
                                       "ms for interface %s.\n",
-                                      bond_dev->name,
+                                      bond->dev->name,
                                       (bond->params.downdelay - slave->delay) * bond->params.miimon,
                                       slave_dev->name);
                        }
@@ -2181,7 +2253,7 @@ void bond_mii_monitor(struct net_device *bond_dev)
                                               ": %s: link status up for "
                                               "interface %s, enabling it "
                                               "in %d ms.\n",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               slave_dev->name,
                                               bond->params.updelay * bond->params.miimon);
                                }
@@ -2197,12 +2269,15 @@ void bond_mii_monitor(struct net_device *bond_dev)
                                printk(KERN_INFO DRV_NAME
                                       ": %s: link status down again after %d "
                                       "ms for interface %s.\n",
-                                      bond_dev->name,
+                                      bond->dev->name,
                                       (bond->params.updelay - slave->delay) * bond->params.miimon,
                                       slave_dev->name);
                        } else {
                                /* link stays up */
                                if (slave->delay == 0) {
+                                       if (!have_locks)
+                                               return 1;
+
                                        /* now the link has been up for long time enough */
                                        slave->link = BOND_LINK_UP;
                                        slave->jiffies = jiffies;
@@ -2221,7 +2296,7 @@ void bond_mii_monitor(struct net_device *bond_dev)
                                        printk(KERN_INFO DRV_NAME
                                               ": %s: link status definitely "
                                               "up for interface %s.\n",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               slave_dev->name);
 
                                        /* notify ad that the link status has changed */
@@ -2247,7 +2322,7 @@ void bond_mii_monitor(struct net_device *bond_dev)
                        /* Should not happen */
                        printk(KERN_ERR DRV_NAME
                               ": %s: Error: %s Illegal value (link=%d)\n",
-                              bond_dev->name,
+                              bond->dev->name,
                               slave->dev->name,
                               slave->link);
                        goto out;
@@ -2268,22 +2343,52 @@ void bond_mii_monitor(struct net_device *bond_dev)
        } /* end of for */
 
        if (do_failover) {
-               write_lock(&bond->curr_slave_lock);
+               ASSERT_RTNL();
+
+               write_lock_bh(&bond->curr_slave_lock);
 
                bond_select_active_slave(bond);
 
-               write_unlock(&bond->curr_slave_lock);
+               write_unlock_bh(&bond->curr_slave_lock);
+
        } else
                bond_set_carrier(bond);
 
-re_arm:
-       if (bond->params.miimon) {
-               mod_timer(&bond->mii_timer, jiffies + delta_in_ticks);
-       }
 out:
-       read_unlock(&bond->lock);
+       return 0;
 }
 
+/*
+ * bond_mii_monitor
+ *
+ * Really a wrapper that splits the mii monitor into two phases: an
+ * inspection, then (if inspection indicates something needs to be
+ * done) an acquisition of appropriate locks followed by another pass
+ * to implement whatever link state changes are indicated.
+ */
+void bond_mii_monitor(struct work_struct *work)
+{
+       struct bonding *bond = container_of(work, struct bonding,
+                                           mii_work.work);
+       unsigned long delay;
+
+       read_lock(&bond->lock);
+       if (bond->kill_timers) {
+               read_unlock(&bond->lock);
+               return;
+       }
+       if (__bond_mii_monitor(bond, 0)) {
+               read_unlock(&bond->lock);
+               rtnl_lock();
+               read_lock(&bond->lock);
+               __bond_mii_monitor(bond, 1);
+               rtnl_unlock();
+       }
+
+       delay = ((bond->params.miimon * HZ) / 1000) ? : 1;
+       read_unlock(&bond->lock);
+       queue_delayed_work(bond->wq, &bond->mii_work, delay);
+}
 
 static __be32 bond_glean_dev_ip(struct net_device *dev)
 {
@@ -2473,7 +2578,7 @@ static void bond_send_gratuitous_arp(struct bonding *bond)
 
        if (bond->master_ip) {
                bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip,
-                                 bond->master_ip, 0);
+                               bond->master_ip, 0);
        }
 
        list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
@@ -2582,9 +2687,10 @@ out:
  * arp is transmitted to generate traffic. see activebackup_arp_monitor for
  * arp monitoring in active backup mode.
  */
-void bond_loadbalance_arp_mon(struct net_device *bond_dev)
+void bond_loadbalance_arp_mon(struct work_struct *work)
 {
-       struct bonding *bond = bond_dev->priv;
+       struct bonding *bond = container_of(work, struct bonding,
+                                           arp_work.work);
        struct slave *slave, *oldcurrent;
        int do_failover = 0;
        int delta_in_ticks;
@@ -2631,13 +2737,13 @@ void bond_loadbalance_arp_mon(struct net_device *bond_dev)
                                        printk(KERN_INFO DRV_NAME
                                               ": %s: link status definitely "
                                               "up for interface %s, ",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               slave->dev->name);
                                        do_failover = 1;
                                } else {
                                        printk(KERN_INFO DRV_NAME
                                               ": %s: interface %s is now up\n",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               slave->dev->name);
                                }
                        }
@@ -2661,7 +2767,7 @@ void bond_loadbalance_arp_mon(struct net_device *bond_dev)
 
                                printk(KERN_INFO DRV_NAME
                                       ": %s: interface %s is now down.\n",
-                                      bond_dev->name,
+                                      bond->dev->name,
                                       slave->dev->name);
 
                                if (slave == oldcurrent) {
@@ -2683,17 +2789,19 @@ void bond_loadbalance_arp_mon(struct net_device *bond_dev)
        }
 
        if (do_failover) {
-               write_lock(&bond->curr_slave_lock);
+               rtnl_lock();
+               write_lock_bh(&bond->curr_slave_lock);
 
                bond_select_active_slave(bond);
 
-               write_unlock(&bond->curr_slave_lock);
+               write_unlock_bh(&bond->curr_slave_lock);
+               rtnl_unlock();
+
        }
 
 re_arm:
-       if (bond->params.arp_interval) {
-               mod_timer(&bond->arp_timer, jiffies + delta_in_ticks);
-       }
+       if (bond->params.arp_interval)
+               queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
 out:
        read_unlock(&bond->lock);
 }
@@ -2713,9 +2821,10 @@ out:
  * may have received.
  * see loadbalance_arp_monitor for arp monitoring in load balancing mode
  */
-void bond_activebackup_arp_mon(struct net_device *bond_dev)
+void bond_activebackup_arp_mon(struct work_struct *work)
 {
-       struct bonding *bond = bond_dev->priv;
+       struct bonding *bond = container_of(work, struct bonding,
+                                           arp_work.work);
        struct slave *slave;
        int delta_in_ticks;
        int i;
@@ -2744,7 +2853,9 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
 
                                slave->link = BOND_LINK_UP;
 
-                               write_lock(&bond->curr_slave_lock);
+                               rtnl_lock();
+
+                               write_lock_bh(&bond->curr_slave_lock);
 
                                if ((!bond->curr_active_slave) &&
                                    ((jiffies - slave->dev->trans_start) <= delta_in_ticks)) {
@@ -2767,18 +2878,19 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
                                        printk(KERN_INFO DRV_NAME
                                               ": %s: %s is up and now the "
                                               "active interface\n",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               slave->dev->name);
                                        netif_carrier_on(bond->dev);
                                } else {
                                        printk(KERN_INFO DRV_NAME
                                               ": %s: backup interface %s is "
                                               "now up\n",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               slave->dev->name);
                                }
 
-                               write_unlock(&bond->curr_slave_lock);
+                               write_unlock_bh(&bond->curr_slave_lock);
+                               rtnl_unlock();
                        }
                } else {
                        read_lock(&bond->curr_slave_lock);
@@ -2810,7 +2922,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
 
                                printk(KERN_INFO DRV_NAME
                                       ": %s: backup interface %s is now down\n",
-                                      bond_dev->name,
+                                      bond->dev->name,
                                       slave->dev->name);
                        } else {
                                read_unlock(&bond->curr_slave_lock);
@@ -2845,15 +2957,18 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
                        printk(KERN_INFO DRV_NAME
                               ": %s: link status down for active interface "
                               "%s, disabling it\n",
-                              bond_dev->name,
+                              bond->dev->name,
                               slave->dev->name);
 
-                       write_lock(&bond->curr_slave_lock);
+                       rtnl_lock();
+                       write_lock_bh(&bond->curr_slave_lock);
 
                        bond_select_active_slave(bond);
                        slave = bond->curr_active_slave;
 
-                       write_unlock(&bond->curr_slave_lock);
+                       write_unlock_bh(&bond->curr_slave_lock);
+
+                       rtnl_unlock();
 
                        bond->current_arp_slave = slave;
 
@@ -2867,14 +2982,17 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
                        printk(KERN_INFO DRV_NAME
                               ": %s: changing from interface %s to primary "
                               "interface %s\n",
-                              bond_dev->name,
+                              bond->dev->name,
                               slave->dev->name,
                               bond->primary_slave->dev->name);
 
                        /* primary is up so switch to it */
-                       write_lock(&bond->curr_slave_lock);
+                       rtnl_lock();
+                       write_lock_bh(&bond->curr_slave_lock);
                        bond_change_active_slave(bond, bond->primary_slave);
-                       write_unlock(&bond->curr_slave_lock);
+                       write_unlock_bh(&bond->curr_slave_lock);
+
+                       rtnl_unlock();
 
                        slave = bond->primary_slave;
                        slave->jiffies = jiffies;
@@ -2931,7 +3049,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
                                        printk(KERN_INFO DRV_NAME
                                               ": %s: backup interface %s is "
                                               "now down.\n",
-                                              bond_dev->name,
+                                              bond->dev->name,
                                               slave->dev->name);
                                }
                        }
@@ -2940,7 +3058,7 @@ void bond_activebackup_arp_mon(struct net_device *bond_dev)
 
 re_arm:
        if (bond->params.arp_interval) {
-               mod_timer(&bond->arp_timer, jiffies + delta_in_ticks);
+               queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
        }
 out:
        read_unlock(&bond->lock);
@@ -2961,7 +3079,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos)
 
        /* make sure the bond won't be taken away */
        read_lock(&dev_base_lock);
-       read_lock_bh(&bond->lock);
+       read_lock(&bond->lock);
 
        if (*pos == 0) {
                return SEQ_START_TOKEN;
@@ -2995,7 +3113,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v)
 {
        struct bonding *bond = seq->private;
 
-       read_unlock_bh(&bond->lock);
+       read_unlock(&bond->lock);
        read_unlock(&dev_base_lock);
 }
 
@@ -3010,9 +3128,15 @@ static void bond_info_show_master(struct seq_file *seq)
        curr = bond->curr_active_slave;
        read_unlock(&bond->curr_slave_lock);
 
-       seq_printf(seq, "Bonding Mode: %s\n",
+       seq_printf(seq, "Bonding Mode: %s",
                   bond_mode_name(bond->params.mode));
 
+       if (bond->params.mode == BOND_MODE_ACTIVEBACKUP &&
+           bond->params.fail_over_mac)
+               seq_printf(seq, " (fail_over_mac)");
+
+       seq_printf(seq, "\n");
+
        if (bond->params.mode == BOND_MODE_XOR ||
                bond->params.mode == BOND_MODE_8023AD) {
                seq_printf(seq, "Transmit Hash Policy: %s (%d)\n",
@@ -3292,7 +3416,10 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave
        switch (event) {
        case NETDEV_UNREGISTER:
                if (bond_dev) {
-                       bond_release(bond_dev, slave_dev);
+                       if (bond->setup_by_slave)
+                               bond_release_and_destroy(bond_dev, slave_dev);
+                       else
+                               bond_release(bond_dev, slave_dev);
                }
                break;
        case NETDEV_CHANGE:
@@ -3519,15 +3646,11 @@ static int bond_xmit_hash_policy_l2(struct sk_buff *skb,
 static int bond_open(struct net_device *bond_dev)
 {
        struct bonding *bond = bond_dev->priv;
-       struct timer_list *mii_timer = &bond->mii_timer;
-       struct timer_list *arp_timer = &bond->arp_timer;
 
        bond->kill_timers = 0;
 
        if ((bond->params.mode == BOND_MODE_TLB) ||
            (bond->params.mode == BOND_MODE_ALB)) {
-               struct timer_list *alb_timer = &(BOND_ALB_INFO(bond).alb_timer);
-
                /* bond_alb_initialize must be called before the timer
                 * is started.
                 */
@@ -3536,44 +3659,31 @@ static int bond_open(struct net_device *bond_dev)
                        return -1;
                }
 
-               init_timer(alb_timer);
-               alb_timer->expires  = jiffies + 1;
-               alb_timer->data     = (unsigned long)bond;
-               alb_timer->function = (void *)&bond_alb_monitor;
-               add_timer(alb_timer);
+               INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
+               queue_delayed_work(bond->wq, &bond->alb_work, 0);
        }
 
        if (bond->params.miimon) {  /* link check interval, in milliseconds. */
-               init_timer(mii_timer);
-               mii_timer->expires  = jiffies + 1;
-               mii_timer->data     = (unsigned long)bond_dev;
-               mii_timer->function = (void *)&bond_mii_monitor;
-               add_timer(mii_timer);
+               INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
+               queue_delayed_work(bond->wq, &bond->mii_work, 0);
        }
 
        if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
-               init_timer(arp_timer);
-               arp_timer->expires  = jiffies + 1;
-               arp_timer->data     = (unsigned long)bond_dev;
-               if (bond->params.mode == BOND_MODE_ACTIVEBACKUP) {
-                       arp_timer->function = (void *)&bond_activebackup_arp_mon;
-               } else {
-                       arp_timer->function = (void *)&bond_loadbalance_arp_mon;
-               }
+               if (bond->params.mode == BOND_MODE_ACTIVEBACKUP)
+                       INIT_DELAYED_WORK(&bond->arp_work,
+                                         bond_activebackup_arp_mon);
+               else
+                       INIT_DELAYED_WORK(&bond->arp_work,
+                                         bond_loadbalance_arp_mon);
+
+               queue_delayed_work(bond->wq, &bond->arp_work, 0);
                if (bond->params.arp_validate)
                        bond_register_arp(bond);
-
-               add_timer(arp_timer);
        }
 
        if (bond->params.mode == BOND_MODE_8023AD) {
-               struct timer_list *ad_timer = &(BOND_AD_INFO(bond).ad_timer);
-               init_timer(ad_timer);
-               ad_timer->expires  = jiffies + 1;
-               ad_timer->data     = (unsigned long)bond;
-               ad_timer->function = (void *)&bond_3ad_state_machine_handler;
-               add_timer(ad_timer);
-
+               INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
+               queue_delayed_work(bond->wq, &bond->ad_work, 0);
                /* register to receive LACPDUs */
                bond_register_lacpdu(bond);
        }
@@ -3601,25 +3711,21 @@ static int bond_close(struct net_device *bond_dev)
 
        write_unlock_bh(&bond->lock);
 
-       /* del_timer_sync must run without holding the bond->lock
-        * because a running timer might be trying to hold it too
-        */
-
        if (bond->params.miimon) {  /* link check interval, in milliseconds. */
-               del_timer_sync(&bond->mii_timer);
+               cancel_delayed_work(&bond->mii_work);
        }
 
        if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
-               del_timer_sync(&bond->arp_timer);
+               cancel_delayed_work(&bond->arp_work);
        }
 
        switch (bond->params.mode) {
        case BOND_MODE_8023AD:
-               del_timer_sync(&(BOND_AD_INFO(bond).ad_timer));
+               cancel_delayed_work(&bond->ad_work);
                break;
        case BOND_MODE_TLB:
        case BOND_MODE_ALB:
-               del_timer_sync(&(BOND_ALB_INFO(bond).alb_timer));
+               cancel_delayed_work(&bond->alb_work);
                break;
        default:
                break;
@@ -3716,13 +3822,13 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
                if (mii->reg_num == 1) {
                        struct bonding *bond = bond_dev->priv;
                        mii->val_out = 0;
-                       read_lock_bh(&bond->lock);
+                       read_lock(&bond->lock);
                        read_lock(&bond->curr_slave_lock);
                        if (netif_carrier_ok(bond->dev)) {
                                mii->val_out = BMSR_LSTATUS;
                        }
                        read_unlock(&bond->curr_slave_lock);
-                       read_unlock_bh(&bond->lock);
+                       read_unlock(&bond->lock);
                }
 
                return 0;
@@ -3939,8 +4045,12 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
 
        dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None"));
 
-       if (!bond->do_set_mac_addr)
-               return -EOPNOTSUPP;
+       /*
+        * If fail_over_mac is enabled, do nothing and return success.
+        * Returning an error causes ifenslave to fail.
+        */
+       if (bond->params.fail_over_mac)
+               return 0;
 
        if (!is_valid_ether_addr(sa->sa_data)) {
                return -EADDRNOTAVAIL;
@@ -4010,8 +4120,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
 {
        struct bonding *bond = bond_dev->priv;
        struct slave *slave, *start_at;
-       int i;
-       int res = 1;
+       int i, slave_no, res = 1;
 
        read_lock(&bond->lock);
 
@@ -4019,29 +4128,29 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev
                goto out;
        }
 
-       read_lock(&bond->curr_slave_lock);
-       slave = start_at = bond->curr_active_slave;
-       read_unlock(&bond->curr_slave_lock);
+       /*
+        * Concurrent TX may collide on rr_tx_counter; we accept that
+        * as being rare enough not to justify using an atomic op here
+        */
+       slave_no = bond->rr_tx_counter++ % bond->slave_cnt;
 
-       if (!slave) {
-               goto out;
+       bond_for_each_slave(bond, slave, i) {
+               slave_no--;
+               if (slave_no < 0) {
+                       break;
+               }
        }
 
+       start_at = slave;
        bond_for_each_slave_from(bond, slave, i, start_at) {
                if (IS_UP(slave->dev) &&
                    (slave->link == BOND_LINK_UP) &&
                    (slave->state == BOND_STATE_ACTIVE)) {
                        res = bond_dev_queue_xmit(bond, skb, slave->dev);
-
-                       write_lock(&bond->curr_slave_lock);
-                       bond->curr_active_slave = slave->next;
-                       write_unlock(&bond->curr_slave_lock);
-
                        break;
                }
        }
 
-
 out:
        if (res) {
                /* no suitable interface, frame not sent */
@@ -4273,12 +4382,18 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
 
        bond->params = *params; /* copy params struct */
 
+       bond->wq = create_singlethread_workqueue(bond_dev->name);
+       if (!bond->wq)
+               return -ENOMEM;
+
        /* Initialize pointers */
        bond->first_slave = NULL;
        bond->curr_active_slave = NULL;
        bond->current_arp_slave = NULL;
        bond->primary_slave = NULL;
        bond->dev = bond_dev;
+       bond->send_grat_arp = 0;
+       bond->setup_by_slave = 0;
        INIT_LIST_HEAD(&bond->vlan_list);
 
        /* Initialize the device entry points */
@@ -4290,6 +4405,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
        bond_dev->set_multicast_list = bond_set_multicast_list;
        bond_dev->change_mtu = bond_change_mtu;
        bond_dev->set_mac_address = bond_set_mac_address;
+       bond_dev->validate_addr = NULL;
 
        bond_set_mode_ops(bond, bond->params.mode);
 
@@ -4327,10 +4443,6 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
 #ifdef CONFIG_PROC_FS
        bond_create_proc_entry(bond);
 #endif
-
-       /* set do_set_mac_addr to true on startup */
-       bond->do_set_mac_addr = 1;
-
        list_add_tail(&bond->bond_list, &bond_dev_list);
 
        return 0;
@@ -4339,7 +4451,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
 /* De-initialize device specific data.
  * Caller must hold rtnl_lock.
  */
-void bond_deinit(struct net_device *bond_dev)
+static void bond_deinit(struct net_device *bond_dev)
 {
        struct bonding *bond = bond_dev->priv;
 
@@ -4664,6 +4776,11 @@ static int bond_check_params(struct bond_params *params)
                primary = NULL;
        }
 
+       if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP))
+               printk(KERN_WARNING DRV_NAME
+                      ": Warning: fail_over_mac only affects "
+                      "active-backup mode.\n");
+
        /* fill params struct with the proper values */
        params->mode = bond_mode;
        params->xmit_policy = xmit_hashtype;
@@ -4675,6 +4792,7 @@ static int bond_check_params(struct bond_params *params)
        params->use_carrier = use_carrier;
        params->lacp_fast = lacp_fast;
        params->primary[0] = 0;
+       params->fail_over_mac = fail_over_mac;
 
        if (primary) {
                strncpy(params->primary, primary, IFNAMSIZ);
@@ -4755,10 +4873,32 @@ out_rtnl:
        return res;
 }
 
+static void bond_work_cancel_all(struct bonding *bond)
+{
+       write_lock_bh(&bond->lock);
+       bond->kill_timers = 1;
+       write_unlock_bh(&bond->lock);
+
+       if (bond->params.miimon && delayed_work_pending(&bond->mii_work))
+               cancel_delayed_work(&bond->mii_work);
+
+       if (bond->params.arp_interval && delayed_work_pending(&bond->arp_work))
+               cancel_delayed_work(&bond->arp_work);
+
+       if (bond->params.mode == BOND_MODE_ALB &&
+           delayed_work_pending(&bond->alb_work))
+               cancel_delayed_work(&bond->alb_work);
+
+       if (bond->params.mode == BOND_MODE_8023AD &&
+           delayed_work_pending(&bond->ad_work))
+               cancel_delayed_work(&bond->ad_work);
+}
+
 static int __init bonding_init(void)
 {
        int i;
        int res;
+       struct bonding *bond, *nxt;
 
        printk(KERN_INFO "%s", version);
 
@@ -4785,6 +4925,11 @@ static int __init bonding_init(void)
 
        goto out;
 err:
+       list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) {
+               bond_work_cancel_all(bond);
+               destroy_workqueue(bond->wq);
+       }
+
        rtnl_lock();
        bond_free_all();
        bond_destroy_sysfs();