Merge commit '3ff195b011d7decf501a4d55aeed312731094796' into for-linus
[pandora-kernel.git] / drivers / md / md.c
index f48ba41..46b3a04 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/delay.h>
 #include <linux/raid/md_p.h>
 #include <linux/raid/md_u.h>
+#include <linux/slab.h>
 #include "md.h"
 #include "bitmap.h"
 
@@ -240,7 +241,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
        atomic_inc(&mddev->active_io);
        rcu_read_unlock();
 
-       rv = mddev->pers->make_request(q, bio);
+       rv = mddev->pers->make_request(mddev, bio);
 
        cpu = part_stat_lock();
        part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
@@ -254,6 +255,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
        return rv;
 }
 
+/* mddev_suspend makes sure no new requests are submitted
+ * to the device, and that any requests that have been submitted
+ * are completely handled.
+ * Once ->stop is called and completes, the module will be completely
+ * unused.
+ */
 static void mddev_suspend(mddev_t *mddev)
 {
        BUG_ON(mddev->suspended);
@@ -261,13 +268,6 @@ static void mddev_suspend(mddev_t *mddev)
        synchronize_rcu();
        wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
        mddev->pers->quiesce(mddev, 1);
-       md_unregister_thread(mddev->thread);
-       mddev->thread = NULL;
-       /* we now know that no code is executing in the personality module,
-        * except possibly the tail end of a ->bi_end_io function, but that
-        * is certain to complete before the module has a chance to get
-        * unloaded
-        */
 }
 
 static void mddev_resume(mddev_t *mddev)
@@ -354,7 +354,7 @@ static void md_submit_barrier(struct work_struct *ws)
                bio_endio(bio, 0);
        else {
                bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
-               if (mddev->pers->make_request(mddev->queue, bio))
+               if (mddev->pers->make_request(mddev, bio))
                        generic_make_request(bio);
                mddev->barrier = POST_REQUEST_BARRIER;
                submit_barriers(mddev);
@@ -416,6 +416,27 @@ static void mddev_put(mddev_t *mddev)
        spin_unlock(&all_mddevs_lock);
 }
 
+static void mddev_init(mddev_t *mddev)
+{
+       mutex_init(&mddev->open_mutex);
+       mutex_init(&mddev->reconfig_mutex);
+       mutex_init(&mddev->bitmap_info.mutex);
+       INIT_LIST_HEAD(&mddev->disks);
+       INIT_LIST_HEAD(&mddev->all_mddevs);
+       init_timer(&mddev->safemode_timer);
+       atomic_set(&mddev->active, 1);
+       atomic_set(&mddev->openers, 0);
+       atomic_set(&mddev->active_io, 0);
+       spin_lock_init(&mddev->write_lock);
+       atomic_set(&mddev->flush_pending, 0);
+       init_waitqueue_head(&mddev->sb_wait);
+       init_waitqueue_head(&mddev->recovery_wait);
+       mddev->reshape_position = MaxSector;
+       mddev->resync_min = 0;
+       mddev->resync_max = MaxSector;
+       mddev->level = LEVEL_NONE;
+}
+
 static mddev_t * mddev_find(dev_t unit)
 {
        mddev_t *mddev, *new = NULL;
@@ -482,23 +503,7 @@ static mddev_t * mddev_find(dev_t unit)
        else
                new->md_minor = MINOR(unit) >> MdpMinorShift;
 
-       mutex_init(&new->open_mutex);
-       mutex_init(&new->reconfig_mutex);
-       mutex_init(&new->bitmap_info.mutex);
-       INIT_LIST_HEAD(&new->disks);
-       INIT_LIST_HEAD(&new->all_mddevs);
-       init_timer(&new->safemode_timer);
-       atomic_set(&new->active, 1);
-       atomic_set(&new->openers, 0);
-       atomic_set(&new->active_io, 0);
-       spin_lock_init(&new->write_lock);
-       atomic_set(&new->flush_pending, 0);
-       init_waitqueue_head(&new->sb_wait);
-       init_waitqueue_head(&new->recovery_wait);
-       new->reshape_position = MaxSector;
-       new->resync_min = 0;
-       new->resync_max = MaxSector;
-       new->level = LEVEL_NONE;
+       mddev_init(new);
 
        goto retry;
 }
@@ -1066,10 +1071,13 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                                mddev->bitmap_info.default_offset;
 
        } else if (mddev->pers == NULL) {
-               /* Insist on good event counter while assembling */
+               /* Insist on good event counter while assembling, except
+                * for spares (which don't need an event count) */
                ++ev1;
-               if (ev1 < mddev->events) 
-                       return -EINVAL;
+               if (sb->disks[rdev->desc_nr].state & (
+                           (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))
+                       if (ev1 < mddev->events) 
+                               return -EINVAL;
        } else if (mddev->bitmap) {
                /* if adding to array with a bitmap, then we can accept an
                 * older device ... but not too old.
@@ -1465,10 +1473,14 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
                }
 
        } else if (mddev->pers == NULL) {
-               /* Insist of good event counter while assembling */
+               /* Insist of good event counter while assembling, except for
+                * spares (which don't need an event count) */
                ++ev1;
-               if (ev1 < mddev->events)
-                       return -EINVAL;
+               if (rdev->desc_nr >= 0 &&
+                   rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
+                   le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < 0xfffe)
+                       if (ev1 < mddev->events)
+                               return -EINVAL;
        } else if (mddev->bitmap) {
                /* If adding to array with a bitmap, then we can accept an
                 * older device, but not too old.
@@ -1803,7 +1815,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
                kobject_del(&rdev->kobj);
                goto fail;
        }
-       rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state");
+       rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state");
 
        list_add_rcu(&rdev->same_set, &mddev->disks);
        bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
@@ -2084,7 +2096,6 @@ static void sync_sbs(mddev_t * mddev, int nospares)
                if (rdev->sb_events == mddev->events ||
                    (nospares &&
                     rdev->raid_disk < 0 &&
-                    (rdev->sb_events&1)==0 &&
                     rdev->sb_events+1 == mddev->events)) {
                        /* Don't update this superblock */
                        rdev->sb_loaded = 2;
@@ -2137,22 +2148,14 @@ repeat:
         * and 'events' is odd, we can roll back to the previous clean state */
        if (nospares
            && (mddev->in_sync && mddev->recovery_cp == MaxSector)
-           && (mddev->events & 1)
-           && mddev->events != 1)
+           && mddev->can_decrease_events
+           && mddev->events != 1) {
                mddev->events--;
-       else {
+               mddev->can_decrease_events = 0;
+       } else {
                /* otherwise we have to go forward and ... */
                mddev->events ++;
-               if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
-                       /* .. if the array isn't clean, an 'even' event must also go
-                        * to spares. */
-                       if ((mddev->events&1)==0)
-                               nospares = 0;
-               } else {
-                       /* otherwise an 'odd' event must go to spares */
-                       if ((mddev->events&1))
-                               nospares = 0;
-               }
+               mddev->can_decrease_events = nospares;
        }
 
        if (!mddev->events) {
@@ -2681,7 +2684,7 @@ static void rdev_free(struct kobject *ko)
        mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
        kfree(rdev);
 }
-static struct sysfs_ops rdev_sysfs_ops = {
+static const struct sysfs_ops rdev_sysfs_ops = {
        .show           = rdev_attr_show,
        .store          = rdev_attr_store,
 };
@@ -2930,9 +2933,10 @@ level_show(mddev_t *mddev, char *page)
 static ssize_t
 level_store(mddev_t *mddev, const char *buf, size_t len)
 {
-       char level[16];
+       char clevel[16];
        ssize_t rv = len;
        struct mdk_personality *pers;
+       long level;
        void *priv;
        mdk_rdev_t *rdev;
 
@@ -2965,19 +2969,22 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
        }
 
        /* Now find the new personality */
-       if (len == 0 || len >= sizeof(level))
+       if (len == 0 || len >= sizeof(clevel))
                return -EINVAL;
-       strncpy(level, buf, len);
-       if (level[len-1] == '\n')
+       strncpy(clevel, buf, len);
+       if (clevel[len-1] == '\n')
                len--;
-       level[len] = 0;
+       clevel[len] = 0;
+       if (strict_strtol(clevel, 10, &level))
+               level = LEVEL_NONE;
 
-       request_module("md-%s", level);
+       if (request_module("md-%s", clevel) != 0)
+               request_module("md-level-%s", clevel);
        spin_lock(&pers_lock);
-       pers = find_pers(LEVEL_NONE, level);
+       pers = find_pers(level, clevel);
        if (!pers || !try_module_get(pers->owner)) {
                spin_unlock(&pers_lock);
-               printk(KERN_WARNING "md: personality %s not loaded\n", level);
+               printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
                return -EINVAL;
        }
        spin_unlock(&pers_lock);
@@ -2990,7 +2997,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
        if (!pers->takeover) {
                module_put(pers->owner);
                printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
-                      mdname(mddev), level);
+                      mdname(mddev), clevel);
                return -EINVAL;
        }
 
@@ -3006,7 +3013,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
                mddev->delta_disks = 0;
                module_put(pers->owner);
                printk(KERN_WARNING "md: %s: %s would not accept array\n",
-                      mdname(mddev), level);
+                      mdname(mddev), clevel);
                return PTR_ERR(priv);
        }
 
@@ -3021,7 +3028,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
                        printk(KERN_WARNING
                               "md: cannot register extra attributes for %s\n",
                               mdname(mddev));
-               mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
+               mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
        }               
        if (mddev->pers->sync_request != NULL &&
            pers->sync_request == NULL) {
@@ -3071,6 +3078,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
        sysfs_notify(&mddev->kobj, NULL, "level");
+       md_new_event(mddev);
        return rv;
 }
 
@@ -4139,7 +4147,7 @@ static void md_free(struct kobject *ko)
        kfree(mddev);
 }
 
-static struct sysfs_ops md_sysfs_ops = {
+static const struct sysfs_ops md_sysfs_ops = {
        .show   = md_attr_show,
        .store  = md_attr_store,
 };
@@ -4253,7 +4261,7 @@ static int md_alloc(dev_t dev, char *name)
        mutex_unlock(&disks_mutex);
        if (!error) {
                kobject_uevent(&mddev->kobj, KOBJ_ADD);
-               mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
+               mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state");
        }
        mddev_put(mddev);
        return error;
@@ -4466,7 +4474,7 @@ static int md_run(mddev_t *mddev)
                        printk(KERN_WARNING
                               "md: cannot register extra attributes for %s\n",
                               mdname(mddev));
-               mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
+               mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
        } else if (mddev->ro == 2) /* auto-readonly not meaningful */
                mddev->ro = 0;
 
@@ -4591,6 +4599,7 @@ static void md_clean(mddev_t *mddev)
        mddev->layout = 0;
        mddev->max_disks = 0;
        mddev->events = 0;
+       mddev->can_decrease_events = 0;
        mddev->delta_disks = 0;
        mddev->new_level = LEVEL_NONE;
        mddev->new_layout = 0;