X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=drivers%2Fmd%2Fraid10.c;h=8fb44da255aef2c55db22e569152242caafedc71;hp=c025a8276dc18b5844e7a6870b42236ac45bd86c;hb=33a3e89aacee6a385dd59ab4fa193ff2dd412d98;hpb=2cbb6160b56038aad0ce0b05bed7a75d6f086171 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c025a8276dc1..8fb44da255ae 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include "md.h" @@ -389,7 +390,17 @@ static void raid10_end_write_request(struct bio *bio, int error) sector_t first_bad; int bad_sectors; - set_bit(R10BIO_Uptodate, &r10_bio->state); + /* + * Do not set R10BIO_Uptodate if the current device is + * rebuilding or Faulty. This is because we cannot use + * such device for properly reading the data back (we could + * potentially use it, if the current write would have felt + * before rdev->recovery_offset, but for simplicity we don't + * check this here. + */ + if (test_bit(In_sync, &conf->mirrors[dev].rdev->flags) && + !test_bit(Faulty, &conf->mirrors[dev].rdev->flags)) + set_bit(R10BIO_Uptodate, &r10_bio->state); /* Maybe we can clear some bad blocks. */ if (is_badblock(conf->mirrors[dev].rdev, @@ -789,9 +800,22 @@ static void wait_barrier(struct r10conf *conf) spin_lock_irq(&conf->resync_lock); if (conf->barrier) { conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + /* Wait for the barrier to drop. + * However if there are already pending + * requests (preventing the barrier from + * rising completely), and the + * pre-process bio queue isn't empty, + * then don't wait, as we need to empty + * that queue to get the nr_pending + * count down. + */ + wait_event_lock_irq(conf->wait_barrier, + !conf->barrier || + (conf->nr_pending && + current->bio_list && + !bio_list_empty(current->bio_list)), conf->resync_lock, - ); + ); conf->nr_waiting--; } conf->nr_pending++; @@ -842,7 +866,7 @@ static void unfreeze_array(struct r10conf *conf) spin_unlock_irq(&conf->resync_lock); } -static int make_request(struct mddev *mddev, struct bio * bio) +static void make_request(struct mddev *mddev, struct bio * bio) { struct r10conf *conf = mddev->private; struct mirror_info *mirror; @@ -861,7 +885,7 @@ static int make_request(struct mddev *mddev, struct bio * bio) if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); - return 0; + return; } /* If this request crosses a chunk boundary, we need to @@ -893,10 +917,8 @@ static int make_request(struct mddev *mddev, struct bio * bio) conf->nr_waiting++; spin_unlock_irq(&conf->resync_lock); - if (make_request(mddev, &bp->bio1)) - generic_make_request(&bp->bio1); - if (make_request(mddev, &bp->bio2)) - generic_make_request(&bp->bio2); + make_request(mddev, &bp->bio1); + make_request(mddev, &bp->bio2); spin_lock_irq(&conf->resync_lock); conf->nr_waiting--; @@ -904,14 +926,14 @@ static int make_request(struct mddev *mddev, struct bio * bio) spin_unlock_irq(&conf->resync_lock); bio_pair_release(bp); - return 0; + return; bad_map: printk("md/raid10:%s: make_request bug: can't convert block across chunks" " or bigger than %dk %llu %d\n", mdname(mddev), chunk_sects/2, (unsigned long long)bio->bi_sector, bio->bi_size >> 10); bio_io_error(bio); - return 0; + return; } md_write_start(mddev, bio); @@ -954,7 +976,7 @@ read_again: slot = r10_bio->read_slot; if (disk < 0) { raid_end_bio_io(r10_bio); - return 0; + return; } mirror = conf->mirrors + disk; @@ -975,7 +997,7 @@ read_again: /* Could not read all from this device, so we will * need another r10_bio. */ - sectors_handled = (r10_bio->sectors + max_sectors + sectors_handled = (r10_bio->sector + max_sectors - bio->bi_sector); r10_bio->sectors = max_sectors; spin_lock_irq(&conf->device_lock); @@ -983,7 +1005,7 @@ read_again: bio->bi_phys_segments = 2; else bio->bi_phys_segments++; - spin_unlock(&conf->device_lock); + spin_unlock_irq(&conf->device_lock); /* Cannot call generic_make_request directly * as that will be queued in __generic_make_request * and subsequent mempool_alloc might block @@ -1002,7 +1024,7 @@ read_again: goto read_again; } else generic_make_request(read_bio); - return 0; + return; } /* @@ -1176,7 +1198,6 @@ retry_write: if (do_sync || !mddev->bitmap || !plugged) md_wakeup_thread(mddev->thread); - return 0; } static void status(struct seq_file *seq, struct mddev *mddev) @@ -1215,14 +1236,16 @@ static int enough(struct r10conf *conf, int ignore) do { int n = conf->copies; int cnt = 0; + int this = first; while (n--) { - if (conf->mirrors[first].rdev && - first != ignore) + if (conf->mirrors[this].rdev && + this != ignore) cnt++; - first = (first+1) % conf->raid_disks; + this = (this+1) % conf->raid_disks; } if (cnt == 0) return 0; + first = (first + conf->near_copies) % conf->raid_disks; } while (first != 0); return 1; } @@ -1249,14 +1272,15 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; spin_unlock_irqrestore(&conf->device_lock, flags); - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + /* + * If recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n" "md/raid10:%s: Operation continuing on %d devices.\n", @@ -1908,7 +1932,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 if (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s<<9, conf->tmppage, WRITE) + s, conf->tmppage, WRITE) == 0) { /* Well, this device is dead */ printk(KERN_NOTICE @@ -1945,7 +1969,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 switch (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s<<9, conf->tmppage, + s, conf->tmppage, READ)) { case 0: /* Well, this device is dead */ @@ -2108,7 +2132,7 @@ read_more: rdev = conf->mirrors[mirror].rdev; printk_ratelimited( KERN_ERR - "md/raid10:%s: %s: redirecting" + "md/raid10:%s: %s: redirecting " "sector %llu to another mirror\n", mdname(mddev), bdevname(rdev->bdev, b), @@ -2192,6 +2216,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) } put_buf(r10_bio); } else { + bool fail = false; for (m = 0; m < conf->copies; m++) { int dev = r10_bio->devs[m].devnum; struct bio *bio = r10_bio->devs[m].bio; @@ -2204,6 +2229,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_dec_pending(rdev, conf->mddev); } else if (bio != NULL && !test_bit(BIO_UPTODATE, &bio->bi_flags)) { + fail = true; if (!narrow_write_error(r10_bio, m)) { md_error(conf->mddev, rdev); set_bit(R10BIO_Degraded, @@ -2212,10 +2238,18 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_dec_pending(rdev, conf->mddev); } } - if (test_bit(R10BIO_WriteError, - &r10_bio->state)) - close_write(r10_bio); - raid_end_bio_io(r10_bio); + if (fail) { + spin_lock_irq(&conf->device_lock); + list_add(&r10_bio->retry_list, &conf->bio_end_io_list); + conf->nr_queued++; + spin_unlock_irq(&conf->device_lock); + md_wakeup_thread(conf->mddev->thread); + } else { + if (test_bit(R10BIO_WriteError, + &r10_bio->state)) + close_write(r10_bio); + raid_end_bio_io(r10_bio); + } } } @@ -2229,6 +2263,31 @@ static void raid10d(struct mddev *mddev) md_check_recovery(mddev); + if (!list_empty_careful(&conf->bio_end_io_list) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)) { + LIST_HEAD(tmp); + spin_lock_irqsave(&conf->device_lock, flags); + if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { + while (!list_empty(&conf->bio_end_io_list)) { + list_move(conf->bio_end_io_list.prev, &tmp); + conf->nr_queued--; + } + } + spin_unlock_irqrestore(&conf->device_lock, flags); + while (!list_empty(&tmp)) { + r10_bio = list_first_entry(&conf->bio_end_io_list, + struct r10bio, retry_list); + list_del(&r10_bio->retry_list); + if (mddev->degraded) + set_bit(R10BIO_Degraded, &r10_bio->state); + + if (test_bit(R10BIO_WriteError, + &r10_bio->state)) + close_write(r10_bio); + raid_end_bio_io(r10_bio); + } + } + blk_start_plug(&plug); for (;;) { @@ -2425,6 +2484,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* want to reconstruct this device */ rb2 = r10_bio; sect = raid10_find_virt(conf, sector_nr, i); + if (sect >= mddev->resync_max_sectors) { + /* last stripe is not complete - don't + * try to recover this sector. + */ + continue; + } /* Unless we are doing a full sync, we only need * to recover the block if it is set in the bitmap */ @@ -2534,10 +2599,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (j == conf->copies) { /* Cannot recover, so abort the recovery or * record a bad block */ - put_buf(r10_bio); - if (rb2) - atomic_dec(&rb2->remaining); - r10_bio = rb2; if (any_working) { /* problem is that there are bad blocks * on other device(s) @@ -2561,6 +2622,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, conf->mirrors[i].recovery_disabled = mddev->recovery_disabled; } + put_buf(r10_bio); + if (rb2) + atomic_dec(&rb2->remaining); + r10_bio = rb2; break; } } @@ -2622,7 +2687,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, else { bad_sectors -= (sector - first_bad); if (max_sync > bad_sectors) - max_sync = max_sync; + max_sync = bad_sectors; continue; } } @@ -2831,6 +2896,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); + INIT_LIST_HEAD(&conf->bio_end_io_list); spin_lock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier);