Merge branch 'for-linus' of git://git390.osdl.marist.edu/pub/scm/linux-2.6
[pandora-kernel.git] / drivers / md / md.c
index b95dd8a..53bd46d 100644 (file)
 #include <linux/raid/bitmap.h>
 #include <linux/sysctl.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
-#include <linux/suspend.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
 #include <linux/ctype.h>
+#include <linux/freezer.h>
 
 #include <linux/init.h>
 
@@ -389,8 +389,12 @@ static int super_written(struct bio *bio, unsigned int bytes_done, int error)
        if (bio->bi_size)
                return 1;
 
-       if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags))
+       if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+               printk("md: super_written gets error=%d, uptodate=%d\n",
+                      error, test_bit(BIO_UPTODATE, &bio->bi_flags));
+               WARN_ON(test_bit(BIO_UPTODATE, &bio->bi_flags));
                md_error(mddev, rdev);
+       }
 
        if (atomic_dec_and_test(&mddev->pending_writes))
                wake_up(&mddev->sb_wait);
@@ -970,12 +974,13 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
  * version 1 superblock
  */
 
-static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
+static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
 {
-       unsigned int disk_csum, csum;
+       __le32 disk_csum;
+       u32 csum;
        unsigned long long newcsum;
        int size = 256 + le32_to_cpu(sb->max_dev)*2;
-       unsigned int *isuper = (unsigned int*)sb;
+       __le32 *isuper = (__le32*)sb;
        int i;
 
        disk_csum = sb->sb_csum;
@@ -985,7 +990,7 @@ static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
                newcsum += le32_to_cpu(*isuper++);
 
        if (size == 2)
-               newcsum += le16_to_cpu(*(unsigned short*) isuper);
+               newcsum += le16_to_cpu(*(__le16*) isuper);
 
        csum = (newcsum & 0xffffffff) + (newcsum >> 32);
        sb->sb_csum = disk_csum;
@@ -1102,7 +1107,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
        if (le32_to_cpu(sb->chunksize))
                rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
 
-       if (le32_to_cpu(sb->size) > rdev->size*2)
+       if (le64_to_cpu(sb->size) > rdev->size*2)
                return -EINVAL;
        return ret;
 }
@@ -1224,7 +1229,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
        else
                sb->resync_offset = cpu_to_le64(0);
 
-       sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
+       sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
 
        sb->raid_disks = cpu_to_le32(mddev->raid_disks);
        sb->size = cpu_to_le64(mddev->size<<1);
@@ -1408,7 +1413,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
        struct block_device *bdev;
        char b[BDEVNAME_SIZE];
 
-       bdev = open_partition_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+       bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
        if (IS_ERR(bdev)) {
                printk(KERN_ERR "md: could not open %s.\n",
                        __bdevname(dev, b));
@@ -1418,7 +1423,7 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
        if (err) {
                printk(KERN_ERR "md: could not bd_claim %s.\n",
                        bdevname(bdev, b));
-               blkdev_put_partition(bdev);
+               blkdev_put(bdev);
                return err;
        }
        rdev->bdev = bdev;
@@ -1432,7 +1437,7 @@ static void unlock_rdev(mdk_rdev_t *rdev)
        if (!bdev)
                MD_BUG();
        bd_release(bdev);
-       blkdev_put_partition(bdev);
+       blkdev_put(bdev);
 }
 
 void md_autodetect_dev(dev_t dev);
@@ -1998,6 +2003,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
        kobject_init(&rdev->kobj);
 
        rdev->desc_nr = -1;
+       rdev->saved_raid_disk = -1;
        rdev->flags = 0;
        rdev->data_offset = 0;
        rdev->sb_events = 0;
@@ -3194,6 +3200,7 @@ static int do_md_run(mddev_t * mddev)
 
        mddev->changed = 1;
        md_new_event(mddev);
+       kobject_uevent(&mddev->gendisk->kobj, KOBJ_CHANGE);
        return 0;
 }
 
@@ -3409,6 +3416,7 @@ static void autorun_devices(int part)
 
        printk(KERN_INFO "md: autorun ...\n");
        while (!list_empty(&pending_raid_disks)) {
+               int unit;
                dev_t dev;
                LIST_HEAD(candidates);
                rdev0 = list_entry(pending_raid_disks.next,
@@ -3428,16 +3436,19 @@ static void autorun_devices(int part)
                 * mostly sane superblocks. It's time to allocate the
                 * mddev.
                 */
-               if (rdev0->preferred_minor < 0 || rdev0->preferred_minor >= MAX_MD_DEVS) {
+               if (part) {
+                       dev = MKDEV(mdp_major,
+                                   rdev0->preferred_minor << MdpMinorShift);
+                       unit = MINOR(dev) >> MdpMinorShift;
+               } else {
+                       dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
+                       unit = MINOR(dev);
+               }
+               if (rdev0->preferred_minor != unit) {
                        printk(KERN_INFO "md: unit number in %s is bad: %d\n",
                               bdevname(rdev0->bdev, b), rdev0->preferred_minor);
                        break;
                }
-               if (part)
-                       dev = MKDEV(mdp_major,
-                                   rdev0->preferred_minor << MdpMinorShift);
-               else
-                       dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
 
                md_probe(dev, NULL, NULL);
                mddev = mddev_find(dev);
@@ -3841,6 +3852,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
        }
        clear_bit(In_sync, &rdev->flags);
        rdev->desc_nr = -1;
+       rdev->saved_raid_disk = -1;
        err = bind_rdev_to_array(rdev, mddev);
        if (err)
                goto abort_export;
@@ -4034,11 +4046,8 @@ static int update_size(mddev_t *mddev, unsigned long size)
                return -EBUSY;
        ITERATE_RDEV(mddev,rdev,tmp) {
                sector_t avail;
-               if (rdev->sb_offset > rdev->data_offset)
-                       avail = (rdev->sb_offset*2) - rdev->data_offset;
-               else
-                       avail = get_capacity(rdev->bdev->bd_disk)
-                               - rdev->data_offset;
+               avail = rdev->size * 2;
+
                if (fit && (size == 0 || size > avail/2))
                        size = avail/2;
                if (avail < ((sector_t)size << 1))
@@ -4414,7 +4423,7 @@ static int md_open(struct inode *inode, struct file *file)
        mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
        int err;
 
-       if ((err = mddev_lock(mddev)))
+       if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
                goto out;
 
        err = 0;
@@ -4430,8 +4439,7 @@ static int md_release(struct inode *inode, struct file * file)
 {
        mddev_t *mddev = inode->i_bdev->bd_disk->private_data;
 
-       if (!mddev)
-               BUG();
+       BUG_ON(!mddev);
        mddev_put(mddev);
 
        return 0;
@@ -4478,6 +4486,7 @@ static int md_thread(void * arg)
         * many dirty RAID5 blocks.
         */
 
+       current->flags |= PF_NOFREEZE;
        allow_signal(SIGKILL);
        while (!kthread_should_stop()) {
 
@@ -4494,7 +4503,6 @@ static int md_thread(void * arg)
                         test_bit(THREAD_WAKEUP, &thread->flags)
                         || kthread_should_stop(),
                         thread->timeout);
-               try_to_freeze();
 
                clear_bit(THREAD_WAKEUP, &thread->flags);
 
@@ -4641,9 +4649,11 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
        seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)",
                   (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)?
                    "reshape" :
-                     (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
-                      "resync" : "recovery")),
-                     per_milli/10, per_milli % 10,
+                   (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)?
+                    "check" :
+                    (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
+                     "resync" : "recovery"))),
+                  per_milli/10, per_milli % 10,
                   (unsigned long long) resync,
                   (unsigned long long) max_blocks);
 
@@ -4836,8 +4846,8 @@ static int md_seq_show(struct seq_file *seq, void *v)
                                chunk_kb ? "KB" : "B");
                        if (bitmap->file) {
                                seq_printf(seq, ", file: ");
-                               seq_path(seq, bitmap->file->f_vfsmnt,
-                                        bitmap->file->f_dentry," \t\n");
+                               seq_path(seq, bitmap->file->f_path.mnt,
+                                        bitmap->file->f_path.dentry," \t\n");
                        }
 
                        seq_printf(seq, "\n");
@@ -4902,6 +4912,7 @@ static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
 }
 
 static struct file_operations md_seq_fops = {
+       .owner          = THIS_MODULE,
        .open           = md_seq_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
@@ -5032,6 +5043,7 @@ void md_do_sync(mddev_t *mddev)
        int skipped = 0;
        struct list_head *rtmp;
        mdk_rdev_t *rdev;
+       char *desc;
 
        /* just incase thread restarts... */
        if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -5039,6 +5051,18 @@ void md_do_sync(mddev_t *mddev)
        if (mddev->ro) /* never try to sync a read-only array */
                return;
 
+       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
+               if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+                       desc = "data-check";
+               else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+                       desc = "requested-resync";
+               else
+                       desc = "resync";
+       } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+               desc = "reshape";
+       else
+               desc = "recovery";
+
        /* we overload curr_resync somewhat here.
         * 0 == not engaged in resync at all
         * 2 == checking that there is no conflict with another sync
@@ -5082,10 +5106,10 @@ void md_do_sync(mddev_t *mddev)
                                prepare_to_wait(&resync_wait, &wq, TASK_UNINTERRUPTIBLE);
                                if (!kthread_should_stop() &&
                                    mddev2->curr_resync >= mddev->curr_resync) {
-                                       printk(KERN_INFO "md: delaying resync of %s"
-                                              " until %s has finished resync (they"
+                                       printk(KERN_INFO "md: delaying %s of %s"
+                                              " until %s has finished (they"
                                               " share one or more physical units)\n",
-                                              mdname(mddev), mdname(mddev2));
+                                              desc, mdname(mddev), mdname(mddev2));
                                        mddev_put(mddev2);
                                        schedule();
                                        finish_wait(&resync_wait, &wq);
@@ -5121,12 +5145,12 @@ void md_do_sync(mddev_t *mddev)
                                j = rdev->recovery_offset;
        }
 
-       printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
-       printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
-               " %d KB/sec/disc.\n", speed_min(mddev));
+       printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev));
+       printk(KERN_INFO "md: minimum _guaranteed_  speed:"
+               " %d KB/sec/disk.\n", speed_min(mddev));
        printk(KERN_INFO "md: using maximum available idle IO bandwidth "
-              "(but not more than %d KB/sec) for reconstruction.\n",
-              speed_max(mddev));
+              "(but not more than %d KB/sec) for %s.\n",
+              speed_max(mddev), desc);
 
        is_mddev_idle(mddev); /* this also initializes IO event counters */
 
@@ -5152,8 +5176,8 @@ void md_do_sync(mddev_t *mddev)
 
        if (j>2) {
                printk(KERN_INFO 
-                       "md: resuming recovery of %s from checkpoint.\n",
-                       mdname(mddev));
+                      "md: resuming %s of %s from checkpoint.\n",
+                      desc, mdname(mddev));
                mddev->curr_resync = j;
        }
 
@@ -5236,7 +5260,7 @@ void md_do_sync(mddev_t *mddev)
                        }
                }
        }
-       printk(KERN_INFO "md: %s: sync done.\n",mdname(mddev));
+       printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
        /*
         * this also signals 'finished resyncing' to md_stop
         */
@@ -5256,8 +5280,8 @@ void md_do_sync(mddev_t *mddev)
                        if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
                                if (mddev->curr_resync >= mddev->recovery_cp) {
                                        printk(KERN_INFO
-                                              "md: checkpointing recovery of %s.\n",
-                                              mdname(mddev));
+                                              "md: checkpointing %s of %s.\n",
+                                              desc, mdname(mddev));
                                        mddev->recovery_cp = mddev->curr_resync;
                                }
                        } else
@@ -5509,22 +5533,15 @@ static void md_geninit(void)
 
 static int __init md_init(void)
 {
-       printk(KERN_INFO "md: md driver %d.%d.%d MAX_MD_DEVS=%d,"
-                       " MD_SB_DISKS=%d\n",
-                       MD_MAJOR_VERSION, MD_MINOR_VERSION,
-                       MD_PATCHLEVEL_VERSION, MAX_MD_DEVS, MD_SB_DISKS);
-       printk(KERN_INFO "md: bitmap version %d.%d\n", BITMAP_MAJOR_HI,
-                       BITMAP_MINOR);
-
        if (register_blkdev(MAJOR_NR, "md"))
                return -1;
        if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
                unregister_blkdev(MAJOR_NR, "md");
                return -1;
        }
-       blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE,
-                               md_probe, NULL, NULL);
-       blk_register_region(MKDEV(mdp_major, 0), MAX_MD_DEVS<<MdpMinorShift, THIS_MODULE,
+       blk_register_region(MKDEV(MAJOR_NR, 0), 1UL<<MINORBITS, THIS_MODULE,
+                           md_probe, NULL, NULL);
+       blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE,
                            md_probe, NULL, NULL);
 
        register_reboot_notifier(&md_notifier);
@@ -5583,8 +5600,8 @@ static __exit void md_exit(void)
        mddev_t *mddev;
        struct list_head *tmp;
 
-       blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS);
-       blk_unregister_region(MKDEV(mdp_major,0), MAX_MD_DEVS << MdpMinorShift);
+       blk_unregister_region(MKDEV(MAJOR_NR,0), 1U << MINORBITS);
+       blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS);
 
        unregister_blkdev(MAJOR_NR,"md");
        unregister_blkdev(mdp_major, "mdp");