wake_up(&conf->wait_barrier);
if (sectors_handled < (bio->bi_size >> 9)) {
- /* We need another r1_bio. It has already been counted
+ /* We need another r10_bio. It has already been counted
* in bio->bi_phys_segments.
*/
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R10BIO_Uptodate, &r10_bio->state);
- else {
+ else
+ /* The write handler will notice the lack of
+ * R10BIO_Uptodate and record any errors etc
+ */
atomic_add(r10_bio->sectors,
&conf->mirrors[d].rdev->corrected_errors);
- if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
- md_error(r10_bio->mddev,
- conf->mirrors[d].rdev);
- }
/* for reconstruct, we always reschedule after a read.
* for resync, only after all reads
}
}
-static void end_sync_write(struct bio *bio, int error)
+static void end_sync_request(r10bio_t *r10_bio)
{
- int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- r10bio_t *r10_bio = bio->bi_private;
mddev_t *mddev = r10_bio->mddev;
- conf_t *conf = mddev->private;
- int d;
- sector_t first_bad;
- int bad_sectors;
- int slot;
-
- d = find_bio_disk(conf, r10_bio, bio, &slot);
- if (!uptodate)
- md_error(mddev, conf->mirrors[d].rdev);
- else if (is_badblock(conf->mirrors[d].rdev,
- r10_bio->devs[slot].addr,
- r10_bio->sectors,
- &first_bad, &bad_sectors))
- set_bit(R10BIO_MadeGood, &r10_bio->state);
-
- rdev_dec_pending(conf->mirrors[d].rdev, mddev);
while (atomic_dec_and_test(&r10_bio->remaining)) {
if (r10_bio->master_bio == NULL) {
/* the primary of several recovery bios */
sector_t s = r10_bio->sectors;
- if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
reschedule_retry(r10_bio);
else
put_buf(r10_bio);
break;
} else {
r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio;
- if (test_bit(R10BIO_MadeGood, &r10_bio->state))
+ if (test_bit(R10BIO_MadeGood, &r10_bio->state) ||
+ test_bit(R10BIO_WriteError, &r10_bio->state))
reschedule_retry(r10_bio);
else
put_buf(r10_bio);
}
}
+static void end_sync_write(struct bio *bio, int error)
+{
+ int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
+ r10bio_t *r10_bio = bio->bi_private;
+ mddev_t *mddev = r10_bio->mddev;
+ conf_t *conf = mddev->private;
+ int d;
+ sector_t first_bad;
+ int bad_sectors;
+ int slot;
+
+ d = find_bio_disk(conf, r10_bio, bio, &slot);
+
+ if (!uptodate) {
+ set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags);
+ set_bit(R10BIO_WriteError, &r10_bio->state);
+ } else if (is_badblock(conf->mirrors[d].rdev,
+ r10_bio->devs[slot].addr,
+ r10_bio->sectors,
+ &first_bad, &bad_sectors))
+ set_bit(R10BIO_MadeGood, &r10_bio->state);
+
+ rdev_dec_pending(conf->mirrors[d].rdev, mddev);
+
+ end_sync_request(r10_bio);
+}
+
/*
* Note: sync and recover and handled very differently for raid10
* This code is for resync.
* The second for writing.
*
*/
+static void fix_recovery_read_error(r10bio_t *r10_bio)
+{
+ /* We got a read error during recovery.
+ * We repeat the read in smaller page-sized sections.
+ * If a read succeeds, write it to the new device or record
+ * a bad block if we cannot.
+ * If a read fails, record a bad block on both old and
+ * new devices.
+ */
+ mddev_t *mddev = r10_bio->mddev;
+ conf_t *conf = mddev->private;
+ struct bio *bio = r10_bio->devs[0].bio;
+ sector_t sect = 0;
+ int sectors = r10_bio->sectors;
+ int idx = 0;
+ int dr = r10_bio->devs[0].devnum;
+ int dw = r10_bio->devs[1].devnum;
+
+ while (sectors) {
+ int s = sectors;
+ mdk_rdev_t *rdev;
+ sector_t addr;
+ int ok;
+
+ if (s > (PAGE_SIZE>>9))
+ s = PAGE_SIZE >> 9;
+
+ rdev = conf->mirrors[dr].rdev;
+ addr = r10_bio->devs[0].addr + sect,
+ ok = sync_page_io(rdev,
+ addr,
+ s << 9,
+ bio->bi_io_vec[idx].bv_page,
+ READ, false);
+ if (ok) {
+ rdev = conf->mirrors[dw].rdev;
+ addr = r10_bio->devs[1].addr + sect;
+ ok = sync_page_io(rdev,
+ addr,
+ s << 9,
+ bio->bi_io_vec[idx].bv_page,
+ WRITE, false);
+ if (!ok)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ }
+ if (!ok) {
+ /* We don't worry if we cannot set a bad block -
+ * it really is bad so there is no loss in not
+ * recording it yet
+ */
+ rdev_set_badblocks(rdev, addr, s, 0);
+
+ if (rdev != conf->mirrors[dw].rdev) {
+ /* need bad block on destination too */
+ mdk_rdev_t *rdev2 = conf->mirrors[dw].rdev;
+ addr = r10_bio->devs[1].addr + sect;
+ ok = rdev_set_badblocks(rdev2, addr, s, 0);
+ if (!ok) {
+ /* just abort the recovery */
+ printk(KERN_NOTICE
+ "md/raid10:%s: recovery aborted"
+ " due to read error\n",
+ mdname(mddev));
+
+ conf->mirrors[dw].recovery_disabled
+ = mddev->recovery_disabled;
+ set_bit(MD_RECOVERY_INTR,
+ &mddev->recovery);
+ break;
+ }
+ }
+ }
+
+ sectors -= s;
+ sect += s;
+ idx++;
+ }
+}
static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
{
int d;
struct bio *wbio;
+ if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) {
+ fix_recovery_read_error(r10_bio);
+ end_sync_request(r10_bio);
+ return;
+ }
+
/*
* share the pages with the first bio
* and submit the write request
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
- if (test_bit(R10BIO_Uptodate, &r10_bio->state))
- generic_make_request(wbio);
- else {
- printk(KERN_NOTICE
- "md/raid10:%s: recovery aborted due to read error\n",
- mdname(mddev));
- conf->mirrors[d].recovery_disabled = mddev->recovery_disabled;
- set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- bio_endio(wbio, 0);
- }
+ generic_make_request(wbio);
}
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}
+static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+ int sectors, struct page *page, int rw)
+{
+ sector_t first_bad;
+ int bad_sectors;
+
+ if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
+ && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
+ return -1;
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ /* success */
+ return 1;
+ if (rw == WRITE)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ /* need to record an error - either for the block or the device */
+ if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+ md_error(rdev->mddev, rdev);
+ return 0;
+}
+
/*
* This is a kernel thread which:
*
rcu_read_unlock();
if (!success) {
- /* Cannot read from anywhere -- bye bye array */
+ /* Cannot read from anywhere, just mark the block
+ * as bad on the first device to discourage future
+ * reads.
+ */
int dn = r10_bio->devs[r10_bio->read_slot].devnum;
- md_error(mddev, conf->mirrors[dn].rdev);
+ rdev = conf->mirrors[dn].rdev;
+
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[r10_bio->read_slot].addr
+ + sect,
+ s, 0))
+ md_error(mddev, rdev);
break;
}
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (sync_page_io(rdev,
- r10_bio->devs[sl].addr +
- sect,
- s<<9, conf->tmppage, WRITE, false)
+ if (r10_sync_page_io(rdev,
+ r10_bio->devs[sl].addr +
+ sect,
+ s<<9, conf->tmppage, WRITE)
== 0) {
/* Well, this device is dead */
printk(KERN_NOTICE
"drive\n",
mdname(mddev),
bdevname(rdev->bdev, b));
- md_error(mddev, rdev);
}
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (sync_page_io(rdev,
- r10_bio->devs[sl].addr +
- sect,
- s<<9, conf->tmppage,
- READ, false) == 0) {
+ switch (r10_sync_page_io(rdev,
+ r10_bio->devs[sl].addr +
+ sect,
+ s<<9, conf->tmppage,
+ READ)) {
+ case 0:
/* Well, this device is dead */
printk(KERN_NOTICE
"md/raid10:%s: unable to read back "
"drive\n",
mdname(mddev),
bdevname(rdev->bdev, b));
-
- md_error(mddev, rdev);
- } else {
+ break;
+ case 1:
printk(KERN_INFO
"md/raid10:%s: read error corrected"
" (%d sectors at %llu on %s)\n",
/* Some sort of write request has finished and it
* succeeded in writing where we thought there was a
* bad block. So forget the bad block.
+ * Or possibly if failed and we need to record
+ * a bad block.
*/
int m;
mdk_rdev_t *rdev;
if (test_bit(R10BIO_IsSync, &r10_bio->state) ||
test_bit(R10BIO_IsRecover, &r10_bio->state)) {
- for (m = 0; m < conf->copies; m++)
- if (r10_bio->devs[m].bio &&
- test_bit(BIO_UPTODATE,
+ for (m = 0; m < conf->copies; m++) {
+ int dev = r10_bio->devs[m].devnum;
+ rdev = conf->mirrors[dev].rdev;
+ if (r10_bio->devs[m].bio == NULL)
+ continue;
+ if (test_bit(BIO_UPTODATE,
&r10_bio->devs[m].bio->bi_flags)) {
- int dev = r10_bio->devs[m].devnum;
- rdev = conf->mirrors[dev].rdev;
rdev_clear_badblocks(
rdev,
r10_bio->devs[m].addr,
r10_bio->sectors);
+ } else {
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[m].addr,
+ r10_bio->sectors, 0))
+ md_error(conf->mddev, rdev);
}
+ }
put_buf(r10_bio);
} else {
for (m = 0; m < conf->copies; m++) {
for (j=0; j<conf->copies;j++) {
int k;
int d = r10_bio->devs[j].devnum;
+ sector_t from_addr, to_addr;
mdk_rdev_t *rdev;
sector_t sector, first_bad;
int bad_sectors;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
bio->bi_rw = READ;
- bio->bi_sector = r10_bio->devs[j].addr +
+ from_addr = r10_bio->devs[j].addr;
+ bio->bi_sector = from_addr +
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
bio->bi_rw = WRITE;
- bio->bi_sector = r10_bio->devs[k].addr +
+ to_addr = r10_bio->devs[k].addr;
+ bio->bi_sector = to_addr +
conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
r10_bio->devs[0].devnum = d;
+ r10_bio->devs[0].addr = from_addr;
r10_bio->devs[1].devnum = i;
+ r10_bio->devs[1].addr = to_addr;
break;
}