return ret;
}
+static int raid10_congested(void *data, int bits)
+{
+ mddev_t *mddev = data;
+ conf_t *conf = mddev_to_conf(mddev);
+ int i, ret = 0;
+
+ rcu_read_lock();
+ for (i = 0; i < mddev->raid_disks && ret == 0; i++) {
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
+ request_queue_t *q = bdev_get_queue(rdev->bdev);
+
+ ret |= bdi_congested(&q->backing_dev_info, bits);
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+
/* Barriers....
* Sometimes we need to suspend IO while we do something else,
* either some resync/recovery, or reconfigure the array.
int i;
int chunk_sects = conf->chunk_mask + 1;
const int rw = bio_data_dir(bio);
+ const int do_sync = bio_sync(bio);
struct bio_list bl;
unsigned long flags;
mirror->rdev->data_offset;
read_bio->bi_bdev = mirror->rdev->bdev;
read_bio->bi_end_io = raid10_end_read_request;
- read_bio->bi_rw = READ;
+ read_bio->bi_rw = READ | do_sync;
read_bio->bi_private = r10_bio;
generic_make_request(read_bio);
conf->mirrors[d].rdev->data_offset;
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_rw = WRITE;
+ mbio->bi_rw = WRITE | do_sync;
mbio->bi_private = r10_bio;
atomic_inc(&r10_bio->remaining);
blk_plug_device(mddev->queue);
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (do_sync)
+ md_wakeup_thread(mddev->thread);
+
return 0;
}
seq_printf(seq, " %d far-copies", conf->far_copies);
}
seq_printf(seq, " [%d/%d] [", conf->raid_disks,
- conf->working_disks);
+ conf->raid_disks - mddev->degraded);
for (i = 0; i < conf->raid_disks; i++)
seq_printf(seq, "%s",
conf->mirrors[i].rdev &&
* else mark the drive as failed
*/
if (test_bit(In_sync, &rdev->flags)
- && conf->working_disks == 1)
+ && conf->raid_disks-mddev->degraded == 1)
/*
* Don't fail the drive, just return an IO error.
* The test should really be more sophisticated than
* really dead" tests...
*/
return;
- if (test_bit(In_sync, &rdev->flags)) {
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
- conf->working_disks--;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
}
- clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags);
- mddev->sb_dirty = 1;
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
" Operation continuing on %d devices\n",
- bdevname(rdev->bdev,b), conf->working_disks);
+ bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
static void print_conf(conf_t *conf)
printk("(!conf)\n");
return;
}
- printk(" --- wd:%d rd:%d\n", conf->working_disks,
+ printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks);
for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->mirrors + i;
if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags)
- && !test_bit(In_sync, &tmp->rdev->flags)) {
- conf->working_disks++;
+ && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--;
- set_bit(In_sync, &tmp->rdev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
}
"raid10:%s: read error corrected"
" (%d sectors at %llu on %s)\n",
mdname(mddev), s,
- (unsigned long long)sect+
- rdev->data_offset,
+ (unsigned long long)(sect+
+ rdev->data_offset),
bdevname(rdev->bdev, b));
rdev_dec_pending(rdev, mddev);
(unsigned long long)r10_bio->sector);
raid_end_bio_io(r10_bio);
} else {
+ const int do_sync = bio_sync(r10_bio->master_bio);
rdev = conf->mirrors[mirror].rdev;
if (printk_ratelimit())
printk(KERN_ERR "raid10: %s: redirecting sector %llu to"
bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
+ rdev->data_offset;
bio->bi_bdev = rdev->bdev;
- bio->bi_rw = READ;
+ bio->bi_rw = READ | do_sync;
bio->bi_private = r10_bio;
bio->bi_end_io = raid10_end_read_request;
unplug = 1;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
- bio->bi_rw = 0;
+ bio->bi_rw = READ;
bio->bi_sector = r10_bio->devs[j].addr +
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_write;
- bio->bi_rw = 1;
+ bio->bi_rw = WRITE;
bio->bi_sector = r10_bio->devs[k].addr +
conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
biolist = bio;
bio->bi_private = r10_bio;
bio->bi_end_io = end_sync_read;
- bio->bi_rw = 0;
+ bio->bi_rw = READ;
bio->bi_sector = r10_bio->devs[i].addr +
conf->mirrors[d].rdev->data_offset;
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
mddev->queue->max_sectors = (PAGE_SIZE>>9);
disk->head_position = 0;
- if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags))
- conf->working_disks++;
}
conf->raid_disks = mddev->raid_disks;
conf->mddev = mddev;
disk = conf->mirrors + i;
if (!disk->rdev ||
- !test_bit(In_sync, &rdev->flags)) {
+ !test_bit(In_sync, &disk->rdev->flags)) {
disk->head_position = 0;
mddev->degraded++;
}
mddev->queue->unplug_fn = raid10_unplug;
mddev->queue->issue_flush_fn = raid10_issue_flush;
+ mddev->queue->backing_dev_info.congested_fn = raid10_congested;
+ mddev->queue->backing_dev_info.congested_data = mddev;
/* Calculate max read-ahead size.
* We need to readahead at least twice a whole stripe....