(unsigned long long)sh->sector, i, dev->toread,
dev->read, dev->towrite, dev->written,
test_bit(R5_LOCKED, &dev->flags));
- BUG();
+ WARN_ON(1);
}
dev->flags = 0;
raid5_build_block(sh, i, previous);
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
+ /* We have already checked bad blocks for reads. Now
+ * need to check for writes.
+ */
+ while ((rw & WRITE) && rdev &&
+ test_bit(WriteErrorSeen, &rdev->flags)) {
+ sector_t first_bad;
+ int bad_sectors;
+ int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors);
+ if (!bad)
+ break;
+
+ if (bad < 0) {
+ set_bit(BlockedBadBlocks, &rdev->flags);
+ if (!conf->mddev->external &&
+ conf->mddev->flags) {
+ /* It is very unlikely, but we might
+ * still need to write out the
+ * bad block log - better give it
+ * a chance*/
+ md_check_recovery(conf->mddev);
+ }
+ md_wait_for_blocked_rdev(rdev, conf->mddev);
+ } else {
+ /* Acknowledged bad block - skip the write */
+ rdev_dec_pending(rdev, conf->mddev);
+ rdev = NULL;
+ }
+ }
+
if (rdev) {
if (s->syncing || s->expanding || s->expanded)
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
- if ((rw & WRITE) &&
- test_bit(R5_ReWrite, &sh->dev[i].flags))
- atomic_add(STRIPE_SECTORS,
- &rdev->corrected_errors);
generic_make_request(bi);
} else {
if (rw & WRITE)
(unsigned long long)(sh->sector
+ rdev->data_offset),
bdevname(rdev->bdev, b));
+ atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
}
raid5_conf_t *conf = sh->raid_conf;
int disks = sh->disks, i;
int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
+ sector_t first_bad;
+ int bad_sectors;
for (i=0 ; i<disks; i++)
if (bi == &sh->dev[i].req)
return;
}
- if (!uptodate)
- md_error(conf->mddev, conf->disks[i].rdev);
+ if (!uptodate) {
+ set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags);
+ set_bit(R5_WriteError, &sh->dev[i].flags);
+ } else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors))
+ set_bit(R5_MadeGood, &sh->dev[i].flags);
rdev_dec_pending(conf->disks[i].rdev, conf->mddev);
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
+ set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT
rcu_read_lock();
rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && test_bit(In_sync, &rdev->flags))
- /* multiple read failures in one stripe */
- md_error(conf->mddev, rdev);
+ atomic_inc(&rdev->nr_pending);
+ else
+ rdev = NULL;
rcu_read_unlock();
+ if (rdev) {
+ if (!rdev_set_badblocks(
+ rdev,
+ sh->sector,
+ STRIPE_SECTORS, 0))
+ md_error(conf->mddev, rdev);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
}
spin_lock_irq(&conf->device_lock);
/* fail all writes first */
if (bitmap_end)
bitmap_endwrite(conf->mddev->bitmap, sh->sector,
STRIPE_SECTORS, 0, 0);
+ /* If we were in the middle of a write the parity block might
+ * still be locked - so just clear all R5_LOCKED flags
+ */
+ clear_bit(R5_LOCKED, &sh->dev[i].flags);
}
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
md_wakeup_thread(conf->mddev->thread);
}
+static void
+handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh,
+ struct stripe_head_state *s)
+{
+ int abort = 0;
+ int i;
+
+ md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
+ clear_bit(STRIPE_SYNCING, &sh->state);
+ s->syncing = 0;
+ /* There is nothing more to do for sync/check/repair.
+ * For recover we need to record a bad block on all
+ * non-sync devices, or abort the recovery
+ */
+ if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
+ return;
+ /* During recovery devices cannot be removed, so locking and
+ * refcounting of rdevs is not needed
+ */
+ for (i = 0; i < conf->raid_disks; i++) {
+ mdk_rdev_t *rdev = conf->disks[i].rdev;
+ if (!rdev
+ || test_bit(Faulty, &rdev->flags)
+ || test_bit(In_sync, &rdev->flags))
+ continue;
+ if (!rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ }
+ if (abort) {
+ conf->recovery_disabled = conf->mddev->recovery_disabled;
+ set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+ }
+}
+
/* fetch_block - checks the given member device to see if its data needs
* to be read or computed to satisfy a request.
*
spin_lock_irq(&conf->device_lock);
for (i=disks; i--; ) {
mdk_rdev_t *rdev;
+ sector_t first_bad;
+ int bad_sectors;
+ int is_bad = 0;
dev = &sh->dev[i];
if (dev->written)
s->written++;
rdev = rcu_dereference(conf->disks[i].rdev);
- if (s->blocked_rdev == NULL &&
- rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
- s->blocked_rdev = rdev;
- atomic_inc(&rdev->nr_pending);
+ if (rdev) {
+ is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS,
+ &first_bad, &bad_sectors);
+ if (s->blocked_rdev == NULL
+ && (test_bit(Blocked, &rdev->flags)
+ || is_bad < 0)) {
+ if (is_bad < 0)
+ set_bit(BlockedBadBlocks,
+ &rdev->flags);
+ s->blocked_rdev = rdev;
+ atomic_inc(&rdev->nr_pending);
+ }
}
clear_bit(R5_Insync, &dev->flags);
if (!rdev)
/* Not in-sync */;
- else if (test_bit(In_sync, &rdev->flags))
+ else if (is_bad) {
+ /* also not in-sync */
+ if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+ /* treat as in-sync, but with a read error
+ * which we can now try to correct
+ */
+ set_bit(R5_Insync, &dev->flags);
+ set_bit(R5_ReadError, &dev->flags);
+ }
+ } else if (test_bit(In_sync, &rdev->flags))
set_bit(R5_Insync, &dev->flags);
else {
/* in sync if before recovery_offset */
if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset)
set_bit(R5_Insync, &dev->flags);
}
+ if (test_bit(R5_WriteError, &dev->flags)) {
+ clear_bit(R5_Insync, &dev->flags);
+ if (!test_bit(Faulty, &rdev->flags)) {
+ s->handle_bad_blocks = 1;
+ atomic_inc(&rdev->nr_pending);
+ } else
+ clear_bit(R5_WriteError, &dev->flags);
+ }
+ if (test_bit(R5_MadeGood, &dev->flags)) {
+ if (!test_bit(Faulty, &rdev->flags)) {
+ s->handle_bad_blocks = 1;
+ atomic_inc(&rdev->nr_pending);
+ } else
+ clear_bit(R5_MadeGood, &dev->flags);
+ }
if (!test_bit(R5_Insync, &dev->flags)) {
/* The ReadError flag will just be confusing now */
clear_bit(R5_ReadError, &dev->flags);
analyse_stripe(sh, &s);
+ if (s.handle_bad_blocks) {
+ set_bit(STRIPE_HANDLE, &sh->state);
+ goto finish;
+ }
+
if (unlikely(s.blocked_rdev)) {
if (s.syncing || s.expanding || s.expanded ||
s.to_write || s.written) {
*/
if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written)
handle_failed_stripe(conf, sh, &s, disks, &s.return_bi);
- if (s.failed > conf->max_degraded && s.syncing) {
- md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
- clear_bit(STRIPE_SYNCING, &sh->state);
- s.syncing = 0;
- }
+ if (s.failed > conf->max_degraded && s.syncing)
+ handle_failed_sync(conf, sh, &s);
/*
* might be able to return some write requests if the parity blocks
if (unlikely(s.blocked_rdev))
md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
+ if (s.handle_bad_blocks)
+ for (i = disks; i--; ) {
+ mdk_rdev_t *rdev;
+ struct r5dev *dev = &sh->dev[i];
+ if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
+ /* We own a safe reference to the rdev */
+ rdev = conf->disks[i].rdev;
+ if (!rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ md_error(conf->mddev, rdev);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
+ if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
+ rdev = conf->disks[i].rdev;
+ rdev_clear_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS);
+ rdev_dec_pending(rdev, conf->mddev);
+ }
+ }
+
if (s.ops_request)
raid_run_ops(sh, s.ops_request);
ops_run_io(sh, &s);
-
if (s.dec_preread_active) {
/* We delay this until after ops_run_io so that if make_request
* is waiting on a flush, it won't continue until the writes
rcu_read_lock();
rdev = rcu_dereference(conf->disks[dd_idx].rdev);
if (rdev && test_bit(In_sync, &rdev->flags)) {
+ sector_t first_bad;
+ int bad_sectors;
+
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
raid_bio->bi_next = (void*)rdev;
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
align_bi->bi_sector += rdev->data_offset;
- if (!bio_fits_rdev(align_bi)) {
- /* too big in some way */
+ if (!bio_fits_rdev(align_bi) ||
+ is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
+ &first_bad, &bad_sectors)) {
+ /* too big in some way, or has a known bad block */
bio_put(align_bi);
rdev_dec_pending(rdev, mddev);
return 0;
release_stripe(sh);
cond_resched();
+ if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
+ md_check_recovery(mddev);
+
spin_lock_irq(&conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);
* isn't possible.
*/
if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->recovery_disabled != conf->recovery_disabled &&
!has_failed(conf) &&
number < conf->raid_disks) {
err = -EBUSY;
int first = 0;
int last = conf->raid_disks - 1;
+ if (mddev->recovery_disabled == conf->recovery_disabled)
+ return -EBUSY;
+
if (has_failed(conf))
/* no point adding a device */
return -EINVAL;
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) {
if (raid5_add_disk(mddev, rdev) == 0) {
- char nm[20];
if (rdev->raid_disk
>= conf->previous_raid_disks) {
set_bit(In_sync, &rdev->flags);
added_devices++;
} else
rdev->recovery_offset = 0;
- sprintf(nm, "rd%d", rdev->raid_disk);
- if (sysfs_create_link(&mddev->kobj,
- &rdev->kobj, nm))
+
+ if (sysfs_link_rdev(mddev, rdev))
/* Failure here is OK */;
}
} else if (rdev->raid_disk >= conf->previous_raid_disks
d++) {
mdk_rdev_t *rdev = conf->disks[d].rdev;
if (rdev && raid5_remove_disk(mddev, d) == 0) {
- char nm[20];
- sprintf(nm, "rd%d", rdev->raid_disk);
- sysfs_remove_link(&mddev->kobj, nm);
+ sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
}
}