md: Don't truncate size at 4TB for RAID0 and Linear
[pandora-kernel.git] / drivers / md / md.c
index 84acfe7..2887f22 100644 (file)
@@ -392,6 +392,8 @@ void mddev_suspend(struct mddev *mddev)
        synchronize_rcu();
        wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
        mddev->pers->quiesce(mddev, 1);
+
+       del_timer_sync(&mddev->safemode_timer);
 }
 EXPORT_SYMBOL_GPL(mddev_suspend);
 
@@ -451,7 +453,7 @@ static void submit_flushes(struct work_struct *ws)
                        atomic_inc(&rdev->nr_pending);
                        atomic_inc(&rdev->nr_pending);
                        rcu_read_unlock();
-                       bi = bio_alloc_mddev(GFP_KERNEL, 0, mddev);
+                       bi = bio_alloc_mddev(GFP_NOIO, 0, mddev);
                        bi->bi_end_io = md_end_flush;
                        bi->bi_private = rdev;
                        bi->bi_bdev = rdev->bdev;
@@ -570,7 +572,7 @@ static void mddev_put(struct mddev *mddev)
            mddev->ctime == 0 && !mddev->hold_active) {
                /* Array is not configured at all, and not held active,
                 * so destroy it */
-               list_del(&mddev->all_mddevs);
+               list_del_init(&mddev->all_mddevs);
                bs = mddev->bio_set;
                mddev->bio_set = NULL;
                if (mddev->gendisk) {
@@ -1142,8 +1144,11 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
                        ret = 0;
        }
        rdev->sectors = rdev->sb_start;
-       /* Limit to 4TB as metadata cannot record more than that */
-       if (rdev->sectors >= (2ULL << 32))
+       /* Limit to 4TB as metadata cannot record more than that.
+        * (not needed for Linear and RAID0 as metadata doesn't
+        * record this size)
+        */
+       if (rdev->sectors >= (2ULL << 32) && sb->level >= 1)
                rdev->sectors = (2ULL << 32) - 2;
 
        if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
@@ -1425,7 +1430,7 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
        /* Limit to 4TB as metadata cannot record more than that.
         * 4TB == 2^32 KB, or 2*2^32 sectors.
         */
-       if (num_sectors >= (2ULL << 32))
+       if (num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1)
                num_sectors = (2ULL << 32) - 2;
        md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
                       rdev->sb_page);
@@ -1801,13 +1806,13 @@ retry:
                                                | BB_LEN(internal_bb));
                                *bbp++ = cpu_to_le64(store_bb);
                        }
+                       bb->changed = 0;
                        if (read_seqretry(&bb->lock, seq))
                                goto retry;
 
                        bb->sector = (rdev->sb_start +
                                      (int)le32_to_cpu(sb->bblog_offset));
                        bb->size = le16_to_cpu(sb->bblog_size);
-                       bb->changed = 0;
                }
        }
 
@@ -2362,6 +2367,7 @@ repeat:
                        clear_bit(MD_CHANGE_PENDING, &mddev->flags);
                        list_for_each_entry(rdev, &mddev->disks, same_set) {
                                if (rdev->badblocks.changed) {
+                                       rdev->badblocks.changed = 0;
                                        md_ack_all_badblocks(&rdev->badblocks);
                                        md_error(mddev, rdev);
                                }
@@ -2546,7 +2552,8 @@ state_show(struct md_rdev *rdev, char *page)
                sep = ",";
        }
        if (test_bit(Blocked, &rdev->flags) ||
-           rdev->badblocks.unacked_exist) {
+           (rdev->badblocks.unacked_exist
+            && !test_bit(Faulty, &rdev->flags))) {
                len += sprintf(page+len, "%sblocked", sep);
                sep = ",";
        }
@@ -3696,8 +3703,8 @@ array_state_show(struct mddev *mddev, char *page)
        return sprintf(page, "%s\n", array_states[st]);
 }
 
-static int do_md_stop(struct mddev * mddev, int ro, int is_open);
-static int md_set_readonly(struct mddev * mddev, int is_open);
+static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
+static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
 static int do_md_run(struct mddev * mddev);
 static int restart_array(struct mddev *mddev);
 
@@ -3713,14 +3720,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
                /* stopping an active array */
                if (atomic_read(&mddev->openers) > 0)
                        return -EBUSY;
-               err = do_md_stop(mddev, 0, 0);
+               err = do_md_stop(mddev, 0, NULL);
                break;
        case inactive:
                /* stopping an active array */
                if (mddev->pers) {
                        if (atomic_read(&mddev->openers) > 0)
                                return -EBUSY;
-                       err = do_md_stop(mddev, 2, 0);
+                       err = do_md_stop(mddev, 2, NULL);
                } else
                        err = 0; /* already inactive */
                break;
@@ -3728,7 +3735,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
                break; /* not supported yet */
        case readonly:
                if (mddev->pers)
-                       err = md_set_readonly(mddev, 0);
+                       err = md_set_readonly(mddev, NULL);
                else {
                        mddev->ro = 1;
                        set_disk_ro(mddev->gendisk, 1);
@@ -3738,7 +3745,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
        case read_auto:
                if (mddev->pers) {
                        if (mddev->ro == 0)
-                               err = md_set_readonly(mddev, 0);
+                               err = md_set_readonly(mddev, NULL);
                        else if (mddev->ro == 1)
                                err = restart_array(mddev);
                        if (err == 0) {
@@ -3788,6 +3795,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
        if (err)
                return err;
        else {
+               if (mddev->hold_active == UNTIL_IOCTL)
+                       mddev->hold_active = 0;
                sysfs_notify_dirent_safe(mddev->sysfs_state);
                return len;
        }
@@ -4487,11 +4496,20 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
 
        if (!entry->show)
                return -EIO;
+       spin_lock(&all_mddevs_lock);
+       if (list_empty(&mddev->all_mddevs)) {
+               spin_unlock(&all_mddevs_lock);
+               return -EBUSY;
+       }
+       mddev_get(mddev);
+       spin_unlock(&all_mddevs_lock);
+
        rv = mddev_lock(mddev);
        if (!rv) {
                rv = entry->show(mddev, page);
                mddev_unlock(mddev);
        }
+       mddev_put(mddev);
        return rv;
 }
 
@@ -4507,13 +4525,19 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
                return -EIO;
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
+       spin_lock(&all_mddevs_lock);
+       if (list_empty(&mddev->all_mddevs)) {
+               spin_unlock(&all_mddevs_lock);
+               return -EBUSY;
+       }
+       mddev_get(mddev);
+       spin_unlock(&all_mddevs_lock);
        rv = mddev_lock(mddev);
-       if (mddev->hold_active == UNTIL_IOCTL)
-               mddev->hold_active = 0;
        if (!rv) {
                rv = entry->store(mddev, page, length);
                mddev_unlock(mddev);
        }
+       mddev_put(mddev);
        return rv;
 }
 
@@ -5057,15 +5081,17 @@ void md_stop(struct mddev *mddev)
 }
 EXPORT_SYMBOL_GPL(md_stop);
 
-static int md_set_readonly(struct mddev *mddev, int is_open)
+static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
 {
        int err = 0;
        mutex_lock(&mddev->open_mutex);
-       if (atomic_read(&mddev->openers) > is_open) {
+       if (atomic_read(&mddev->openers) > !!bdev) {
                printk("md: %s still in use.\n",mdname(mddev));
                err = -EBUSY;
                goto out;
        }
+       if (bdev)
+               sync_blockdev(bdev);
        if (mddev->pers) {
                __md_stop_writes(mddev);
 
@@ -5087,18 +5113,26 @@ out:
  *   0 - completely stop and dis-assemble array
  *   2 - stop but do not disassemble array
  */
-static int do_md_stop(struct mddev * mddev, int mode, int is_open)
+static int do_md_stop(struct mddev * mddev, int mode,
+                     struct block_device *bdev)
 {
        struct gendisk *disk = mddev->gendisk;
        struct md_rdev *rdev;
 
        mutex_lock(&mddev->open_mutex);
-       if (atomic_read(&mddev->openers) > is_open ||
+       if (atomic_read(&mddev->openers) > !!bdev ||
            mddev->sysfs_active) {
                printk("md: %s still in use.\n",mdname(mddev));
                mutex_unlock(&mddev->open_mutex);
                return -EBUSY;
        }
+       if (bdev)
+               /* It is possible IO was issued on some other
+                * open file which was closed before we took ->open_mutex.
+                * As that was not the last close __blkdev_put will not
+                * have called sync_blockdev, so we must.
+                */
+               sync_blockdev(bdev);
 
        if (mddev->pers) {
                if (mddev->ro)
@@ -5172,7 +5206,7 @@ static void autorun_array(struct mddev *mddev)
        err = do_md_run(mddev);
        if (err) {
                printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
-               do_md_stop(mddev, 0, 0);
+               do_md_stop(mddev, 0, NULL);
        }
 }
 
@@ -6163,11 +6197,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
                        goto done_unlock;
 
                case STOP_ARRAY:
-                       err = do_md_stop(mddev, 0, 1);
+                       err = do_md_stop(mddev, 0, bdev);
                        goto done_unlock;
 
                case STOP_ARRAY_RO:
-                       err = md_set_readonly(mddev, 1);
+                       err = md_set_readonly(mddev, bdev);
                        goto done_unlock;
 
                case BLKROSET:
@@ -7342,8 +7376,7 @@ static int remove_and_add_spares(struct mddev *mddev)
                                        spares++;
                                        md_new_event(mddev);
                                        set_bit(MD_CHANGE_DEVS, &mddev->flags);
-                               } else
-                                       break;
+                               }
                        }
                }
        }
@@ -7840,6 +7873,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
                                  s + rdev->data_offset, sectors, acknowledged);
        if (rv) {
                /* Make sure they get written out promptly */
+               sysfs_notify_dirent_safe(rdev->sysfs_state);
                set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
                md_wakeup_thread(rdev->mddev->thread);
        }
@@ -8079,30 +8113,24 @@ static int md_notify_reboot(struct notifier_block *this,
        struct mddev *mddev;
        int need_delay = 0;
 
-       if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
-
-               printk(KERN_INFO "md: stopping all md devices.\n");
-
-               for_each_mddev(mddev, tmp) {
-                       if (mddev_trylock(mddev)) {
-                               /* Force a switch to readonly even array
-                                * appears to still be in use.  Hence
-                                * the '100'.
-                                */
-                               md_set_readonly(mddev, 100);
-                               mddev_unlock(mddev);
-                       }
-                       need_delay = 1;
+       for_each_mddev(mddev, tmp) {
+               if (mddev_trylock(mddev)) {
+                       if (mddev->pers)
+                               __md_stop_writes(mddev);
+                       mddev->safemode = 2;
+                       mddev_unlock(mddev);
                }
-               /*
-                * certain more exotic SCSI devices are known to be
-                * volatile wrt too early system reboots. While the
-                * right place to handle this issue is the given
-                * driver, we do want to have a safe RAID driver ...
-                */
-               if (need_delay)
-                       mdelay(1000*1);
+               need_delay = 1;
        }
+       /*
+        * certain more exotic SCSI devices are known to be
+        * volatile wrt too early system reboots. While the
+        * right place to handle this issue is the given
+        * driver, we do want to have a safe RAID driver ...
+        */
+       if (need_delay)
+               mdelay(1000*1);
+
        return NOTIFY_DONE;
 }