[PATCH] make dm-mirror not issue invalid resync requests
[pandora-kernel.git] / drivers / md / raid10.c
index 5bd1e9e..713dc9c 100644 (file)
@@ -47,7 +47,7 @@
 
 static void unplug_slaves(mddev_t *mddev);
 
-static void * r10bio_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
        conf_t *conf = data;
        r10bio_t *r10_bio;
@@ -81,7 +81,7 @@ static void r10bio_pool_free(void *r10_bio, void *data)
  * one for write (we recover only one drive per r10buf)
  *
  */
-static void * r10buf_pool_alloc(unsigned int __nocast gfp_flags, void *data)
+static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 {
        conf_t *conf = data;
        struct page *page;
@@ -496,6 +496,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
        int disk, slot, nslot;
        const int sectors = r10_bio->sectors;
        sector_t new_distance, current_distance;
+       mdk_rdev_t *rdev;
 
        raid10_find_phys(conf, r10_bio);
        rcu_read_lock();
@@ -510,8 +511,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
                slot = 0;
                disk = r10_bio->devs[slot].devnum;
 
-               while (!conf->mirrors[disk].rdev ||
-                      !conf->mirrors[disk].rdev->in_sync) {
+               while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+                      !test_bit(In_sync, &rdev->flags)) {
                        slot++;
                        if (slot == conf->copies) {
                                slot = 0;
@@ -527,8 +528,8 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
        /* make sure the disk is operational */
        slot = 0;
        disk = r10_bio->devs[slot].devnum;
-       while (!conf->mirrors[disk].rdev ||
-              !conf->mirrors[disk].rdev->in_sync) {
+       while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+              !test_bit(In_sync, &rdev->flags)) {
                slot ++;
                if (slot == conf->copies) {
                        disk = -1;
@@ -547,11 +548,15 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
                int ndisk = r10_bio->devs[nslot].devnum;
 
 
-               if (!conf->mirrors[ndisk].rdev ||
-                   !conf->mirrors[ndisk].rdev->in_sync)
+               if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
+                   !test_bit(In_sync, &rdev->flags))
                        continue;
 
-               if (!atomic_read(&conf->mirrors[ndisk].rdev->nr_pending)) {
+               /* This optimisation is debatable, and completely destroys
+                * sequential read speed for 'far copies' arrays.  So only
+                * keep it for 'near' arrays, and review those later.
+                */
+               if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) {
                        disk = ndisk;
                        slot = nslot;
                        break;
@@ -569,7 +574,7 @@ rb_out:
        r10_bio->read_slot = slot;
 /*     conf->next_seq_sect = this_sector + sectors;*/
 
-       if (disk >= 0 && conf->mirrors[disk].rdev)
+       if (disk >= 0 && (rdev=rcu_dereference(conf->mirrors[disk].rdev))!= NULL)
                atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
        rcu_read_unlock();
 
@@ -583,8 +588,8 @@ static void unplug_slaves(mddev_t *mddev)
 
        rcu_read_lock();
        for (i=0; i<mddev->raid_disks; i++) {
-               mdk_rdev_t *rdev = conf->mirrors[i].rdev;
-               if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
+               mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+               if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
                        request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
                        atomic_inc(&rdev->nr_pending);
@@ -614,8 +619,8 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
 
        rcu_read_lock();
        for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-               mdk_rdev_t *rdev = conf->mirrors[i].rdev;
-               if (rdev && !rdev->faulty) {
+               mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+               if (rdev && !test_bit(Faulty, &rdev->flags)) {
                        struct block_device *bdev = rdev->bdev;
                        request_queue_t *r_queue = bdev_get_queue(bdev);
 
@@ -668,6 +673,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
        struct bio *read_bio;
        int i;
        int chunk_sects = conf->chunk_mask + 1;
+       const int rw = bio_data_dir(bio);
 
        if (unlikely(bio_barrier(bio))) {
                bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
@@ -718,13 +724,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
        conf->nr_pending++;
        spin_unlock_irq(&conf->resync_lock);
 
-       if (bio_data_dir(bio)==WRITE) {
-               disk_stat_inc(mddev->gendisk, writes);
-               disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bio));
-       } else {
-               disk_stat_inc(mddev->gendisk, reads);
-               disk_stat_add(mddev->gendisk, read_sectors, bio_sectors(bio));
-       }
+       disk_stat_inc(mddev->gendisk, ios[rw]);
+       disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
 
        r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
 
@@ -734,7 +735,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
        r10_bio->mddev = mddev;
        r10_bio->sector = bio->bi_sector;
 
-       if (bio_data_dir(bio) == READ) {
+       if (rw == READ) {
                /*
                 * read balancing logic:
                 */
@@ -772,9 +773,10 @@ static int make_request(request_queue_t *q, struct bio * bio)
        rcu_read_lock();
        for (i = 0;  i < conf->copies; i++) {
                int d = r10_bio->devs[i].devnum;
-               if (conf->mirrors[d].rdev &&
-                   !conf->mirrors[d].rdev->faulty) {
-                       atomic_inc(&conf->mirrors[d].rdev->nr_pending);
+               mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
+               if (rdev &&
+                   !test_bit(Faulty, &rdev->flags)) {
+                       atomic_inc(&rdev->nr_pending);
                        r10_bio->devs[i].bio = bio;
                } else
                        r10_bio->devs[i].bio = NULL;
@@ -828,7 +830,7 @@ static void status(struct seq_file *seq, mddev_t *mddev)
        for (i = 0; i < conf->raid_disks; i++)
                seq_printf(seq, "%s",
                              conf->mirrors[i].rdev &&
-                             conf->mirrors[i].rdev->in_sync ? "U" : "_");
+                             test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
        seq_printf(seq, "]");
 }
 
@@ -843,7 +845,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
         * next level up know.
         * else mark the drive as failed
         */
-       if (rdev->in_sync
+       if (test_bit(In_sync, &rdev->flags)
            && conf->working_disks == 1)
                /*
                 * Don't fail the drive, just return an IO error.
@@ -853,7 +855,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
                 * really dead" tests...
                 */
                return;
-       if (rdev->in_sync) {
+       if (test_bit(In_sync, &rdev->flags)) {
                mddev->degraded++;
                conf->working_disks--;
                /*
@@ -861,8 +863,8 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
                 */
                set_bit(MD_RECOVERY_ERR, &mddev->recovery);
        }
-       rdev->in_sync = 0;
-       rdev->faulty = 1;
+       clear_bit(In_sync, &rdev->flags);
+       set_bit(Faulty, &rdev->flags);
        mddev->sb_dirty = 1;
        printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
                "       Operation continuing on %d devices\n",
@@ -887,7 +889,8 @@ static void print_conf(conf_t *conf)
                tmp = conf->mirrors + i;
                if (tmp->rdev)
                        printk(" disk %d, wo:%d, o:%d, dev:%s\n",
-                               i, !tmp->rdev->in_sync, !tmp->rdev->faulty,
+                               i, !test_bit(In_sync, &tmp->rdev->flags),
+                               !test_bit(Faulty, &tmp->rdev->flags),
                                bdevname(tmp->rdev->bdev,b));
        }
 }
@@ -940,11 +943,11 @@ static int raid10_spare_active(mddev_t *mddev)
        for (i = 0; i < conf->raid_disks; i++) {
                tmp = conf->mirrors + i;
                if (tmp->rdev
-                   && !tmp->rdev->faulty
-                   && !tmp->rdev->in_sync) {
+                   && !test_bit(Faulty, &tmp->rdev->flags)
+                   && !test_bit(In_sync, &tmp->rdev->flags)) {
                        conf->working_disks++;
                        mddev->degraded--;
-                       tmp->rdev->in_sync = 1;
+                       set_bit(In_sync, &tmp->rdev->flags);
                }
        }
 
@@ -984,7 +987,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
                        p->head_position = 0;
                        rdev->raid_disk = mirror;
                        found = 1;
-                       p->rdev = rdev;
+                       rcu_assign_pointer(p->rdev, rdev);
                        break;
                }
 
@@ -1002,7 +1005,7 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
        print_conf(conf);
        rdev = p->rdev;
        if (rdev) {
-               if (rdev->in_sync ||
+               if (test_bit(In_sync, &rdev->flags) ||
                    atomic_read(&rdev->nr_pending)) {
                        err = -EBUSY;
                        goto abort;
@@ -1418,7 +1421,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
                for (i=0 ; i<conf->raid_disks; i++)
                        if (conf->mirrors[i].rdev &&
-                           !conf->mirrors[i].rdev->in_sync) {
+                           !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) {
                                /* want to reconstruct this device */
                                r10bio_t *rb2 = r10_bio;
 
@@ -1439,7 +1442,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                                for (j=0; j<conf->copies;j++) {
                                        int d = r10_bio->devs[j].devnum;
                                        if (conf->mirrors[d].rdev &&
-                                           conf->mirrors[d].rdev->in_sync) {
+                                           test_bit(In_sync, &conf->mirrors[d].rdev->flags)) {
                                                /* This is where we read from */
                                                bio = r10_bio->devs[0].bio;
                                                bio->bi_next = biolist;
@@ -1515,7 +1518,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
                        bio = r10_bio->devs[i].bio;
                        bio->bi_end_io = NULL;
                        if (conf->mirrors[d].rdev == NULL ||
-                           conf->mirrors[d].rdev->faulty)
+                           test_bit(Faulty, &conf->mirrors[d].rdev->flags))
                                continue;
                        atomic_inc(&conf->mirrors[d].rdev->nr_pending);
                        atomic_inc(&r10_bio->remaining);
@@ -1701,7 +1704,7 @@ static int run(mddev_t *mddev)
                        mddev->queue->max_sectors = (PAGE_SIZE>>9);
 
                disk->head_position = 0;
-               if (!rdev->faulty && rdev->in_sync)
+               if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags))
                        conf->working_disks++;
        }
        conf->raid_disks = mddev->raid_disks;