X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=drivers%2Fmd%2Fraid10.c;h=8fb44da255aef2c55db22e569152242caafedc71;hp=685ddf325ee43f4492466c34a50a4a836c766e65;hb=33a3e89aacee6a385dd59ab4fa193ff2dd412d98;hpb=4a1dba72384193753e44e15d9d05a50be6587271 diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 685ddf325ee4..8fb44da255ae 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -390,7 +390,17 @@ static void raid10_end_write_request(struct bio *bio, int error) sector_t first_bad; int bad_sectors; - set_bit(R10BIO_Uptodate, &r10_bio->state); + /* + * Do not set R10BIO_Uptodate if the current device is + * rebuilding or Faulty. This is because we cannot use + * such device for properly reading the data back (we could + * potentially use it, if the current write would have felt + * before rdev->recovery_offset, but for simplicity we don't + * check this here. + */ + if (test_bit(In_sync, &conf->mirrors[dev].rdev->flags) && + !test_bit(Faulty, &conf->mirrors[dev].rdev->flags)) + set_bit(R10BIO_Uptodate, &r10_bio->state); /* Maybe we can clear some bad blocks. */ if (is_badblock(conf->mirrors[dev].rdev, @@ -790,9 +800,22 @@ static void wait_barrier(struct r10conf *conf) spin_lock_irq(&conf->resync_lock); if (conf->barrier) { conf->nr_waiting++; - wait_event_lock_irq(conf->wait_barrier, !conf->barrier, + /* Wait for the barrier to drop. + * However if there are already pending + * requests (preventing the barrier from + * rising completely), and the + * pre-process bio queue isn't empty, + * then don't wait, as we need to empty + * that queue to get the nr_pending + * count down. + */ + wait_event_lock_irq(conf->wait_barrier, + !conf->barrier || + (conf->nr_pending && + current->bio_list && + !bio_list_empty(current->bio_list)), conf->resync_lock, - ); + ); conf->nr_waiting--; } conf->nr_pending++; @@ -974,7 +997,7 @@ read_again: /* Could not read all from this device, so we will * need another r10_bio. */ - sectors_handled = (r10_bio->sectors + max_sectors + sectors_handled = (r10_bio->sector + max_sectors - bio->bi_sector); r10_bio->sectors = max_sectors; spin_lock_irq(&conf->device_lock); @@ -982,7 +1005,7 @@ read_again: bio->bi_phys_segments = 2; else bio->bi_phys_segments++; - spin_unlock(&conf->device_lock); + spin_unlock_irq(&conf->device_lock); /* Cannot call generic_make_request directly * as that will be queued in __generic_make_request * and subsequent mempool_alloc might block @@ -1213,14 +1236,16 @@ static int enough(struct r10conf *conf, int ignore) do { int n = conf->copies; int cnt = 0; + int this = first; while (n--) { - if (conf->mirrors[first].rdev && - first != ignore) + if (conf->mirrors[this].rdev && + this != ignore) cnt++; - first = (first+1) % conf->raid_disks; + this = (this+1) % conf->raid_disks; } if (cnt == 0) return 0; + first = (first + conf->near_copies) % conf->raid_disks; } while (first != 0); return 1; } @@ -1247,14 +1272,15 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) spin_lock_irqsave(&conf->device_lock, flags); mddev->degraded++; spin_unlock_irqrestore(&conf->device_lock, flags); - /* - * if recovery is running, make sure it aborts. - */ - set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + /* + * If recovery is running, make sure it aborts. + */ + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); + set_bit(MD_CHANGE_PENDING, &mddev->flags); printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n" "md/raid10:%s: Operation continuing on %d devices.\n", @@ -1906,7 +1932,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 if (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s<<9, conf->tmppage, WRITE) + s, conf->tmppage, WRITE) == 0) { /* Well, this device is dead */ printk(KERN_NOTICE @@ -1943,7 +1969,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 switch (r10_sync_page_io(rdev, r10_bio->devs[sl].addr + sect, - s<<9, conf->tmppage, + s, conf->tmppage, READ)) { case 0: /* Well, this device is dead */ @@ -2106,7 +2132,7 @@ read_more: rdev = conf->mirrors[mirror].rdev; printk_ratelimited( KERN_ERR - "md/raid10:%s: %s: redirecting" + "md/raid10:%s: %s: redirecting " "sector %llu to another mirror\n", mdname(mddev), bdevname(rdev->bdev, b), @@ -2190,6 +2216,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) } put_buf(r10_bio); } else { + bool fail = false; for (m = 0; m < conf->copies; m++) { int dev = r10_bio->devs[m].devnum; struct bio *bio = r10_bio->devs[m].bio; @@ -2202,6 +2229,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_dec_pending(rdev, conf->mddev); } else if (bio != NULL && !test_bit(BIO_UPTODATE, &bio->bi_flags)) { + fail = true; if (!narrow_write_error(r10_bio, m)) { md_error(conf->mddev, rdev); set_bit(R10BIO_Degraded, @@ -2210,10 +2238,18 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_dec_pending(rdev, conf->mddev); } } - if (test_bit(R10BIO_WriteError, - &r10_bio->state)) - close_write(r10_bio); - raid_end_bio_io(r10_bio); + if (fail) { + spin_lock_irq(&conf->device_lock); + list_add(&r10_bio->retry_list, &conf->bio_end_io_list); + conf->nr_queued++; + spin_unlock_irq(&conf->device_lock); + md_wakeup_thread(conf->mddev->thread); + } else { + if (test_bit(R10BIO_WriteError, + &r10_bio->state)) + close_write(r10_bio); + raid_end_bio_io(r10_bio); + } } } @@ -2227,6 +2263,31 @@ static void raid10d(struct mddev *mddev) md_check_recovery(mddev); + if (!list_empty_careful(&conf->bio_end_io_list) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)) { + LIST_HEAD(tmp); + spin_lock_irqsave(&conf->device_lock, flags); + if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { + while (!list_empty(&conf->bio_end_io_list)) { + list_move(conf->bio_end_io_list.prev, &tmp); + conf->nr_queued--; + } + } + spin_unlock_irqrestore(&conf->device_lock, flags); + while (!list_empty(&tmp)) { + r10_bio = list_first_entry(&conf->bio_end_io_list, + struct r10bio, retry_list); + list_del(&r10_bio->retry_list); + if (mddev->degraded) + set_bit(R10BIO_Degraded, &r10_bio->state); + + if (test_bit(R10BIO_WriteError, + &r10_bio->state)) + close_write(r10_bio); + raid_end_bio_io(r10_bio); + } + } + blk_start_plug(&plug); for (;;) { @@ -2423,6 +2484,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, /* want to reconstruct this device */ rb2 = r10_bio; sect = raid10_find_virt(conf, sector_nr, i); + if (sect >= mddev->resync_max_sectors) { + /* last stripe is not complete - don't + * try to recover this sector. + */ + continue; + } /* Unless we are doing a full sync, we only need * to recover the block if it is set in the bitmap */ @@ -2532,10 +2599,6 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, if (j == conf->copies) { /* Cannot recover, so abort the recovery or * record a bad block */ - put_buf(r10_bio); - if (rb2) - atomic_dec(&rb2->remaining); - r10_bio = rb2; if (any_working) { /* problem is that there are bad blocks * on other device(s) @@ -2559,6 +2622,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, conf->mirrors[i].recovery_disabled = mddev->recovery_disabled; } + put_buf(r10_bio); + if (rb2) + atomic_dec(&rb2->remaining); + r10_bio = rb2; break; } } @@ -2620,7 +2687,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, else { bad_sectors -= (sector - first_bad); if (max_sync > bad_sectors) - max_sync = max_sync; + max_sync = bad_sectors; continue; } } @@ -2829,6 +2896,7 @@ static struct r10conf *setup_conf(struct mddev *mddev) spin_lock_init(&conf->device_lock); INIT_LIST_HEAD(&conf->retry_list); + INIT_LIST_HEAD(&conf->bio_end_io_list); spin_lock_init(&conf->resync_lock); init_waitqueue_head(&conf->wait_barrier);