X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fmd%2Fraid5.c;h=0ef5362c8d028ff6c6362a66292d2bd8d0b4aa22;hb=88ba2aa586c874681c072101287e15d40de7e6e2;hp=062df846fd6212ed64ed771870f567bde0d19fc3;hpb=ab69ae12ceef7f23c578a3c230144e94a167a821;p=pandora-kernel.git diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 062df846fd62..0ef5362c8d02 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -525,14 +525,12 @@ async_copy_data(int frombio, struct bio *bio, struct page *page, bio_page = bio_iovec_idx(bio, i)->bv_page; if (frombio) tx = async_memcpy(page, bio_page, page_offset, - b_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + b_offset, clen, 0, + tx, NULL, NULL); else tx = async_memcpy(bio_page, page, b_offset, - page_offset, clen, - ASYNC_TX_DEP_ACK, - tx, NULL, NULL); + page_offset, clen, 0, + tx, NULL, NULL); } if (clen < len) /* hit end of page */ break; @@ -615,8 +613,7 @@ static void ops_run_biofill(struct stripe_head *sh) } atomic_inc(&sh->count); - async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, - ops_complete_biofill, sh); + async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh); } static void ops_complete_compute5(void *stripe_head_ref) @@ -701,8 +698,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) } tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, - ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, - ops_complete_prexor, sh); + ASYNC_TX_XOR_DROP_DST, tx, + ops_complete_prexor, sh); return tx; } @@ -809,7 +806,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST * for the synchronous xor case */ - flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | + flags = ASYNC_TX_ACK | (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); atomic_inc(&sh->count); @@ -854,11 +851,11 @@ static void ops_run_check(struct stripe_head *sh) xor_srcs[count++] = dev->page; } - tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, + tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); atomic_inc(&sh->count); - tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, + tx = async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_check, sh); } @@ -948,7 +945,6 @@ static int grow_stripes(raid5_conf_t *conf, int num) return 0; } -#ifdef CONFIG_MD_RAID5_RESHAPE static int resize_stripes(raid5_conf_t *conf, int newsize) { /* Make all the stripes able to hold 'newsize' devices. @@ -1073,7 +1069,6 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) conf->pool_size = newsize; return err; } -#endif static int drop_one_stripe(raid5_conf_t *conf) { @@ -2689,8 +2684,8 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, /* place all the copies on one channel */ tx = async_memcpy(sh2->dev[dd_idx].page, - sh->dev[i].page, 0, 0, STRIPE_SIZE, - ASYNC_TX_DEP_ACK, tx, NULL, NULL); + sh->dev[i].page, 0, 0, STRIPE_SIZE, + 0, tx, NULL, NULL); set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); @@ -3639,10 +3634,9 @@ static int make_request(struct request_queue *q, struct bio * bi) retry: previous = 0; + disks = conf->raid_disks; prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); - if (likely(conf->reshape_progress == MaxSector)) - disks = conf->raid_disks; - else { + if (unlikely(conf->reshape_progress != MaxSector)) { /* spinlock is needed as reshape_progress may be * 64bit on a 32bit platform, and so it might be * possible to see a half-updated value @@ -3652,7 +3646,6 @@ static int make_request(struct request_queue *q, struct bio * bi) * to check again. */ spin_lock_irq(&conf->device_lock); - disks = conf->raid_disks; if (mddev->delta_disks < 0 ? logical_sector < conf->reshape_progress : logical_sector >= conf->reshape_progress) { @@ -3681,7 +3674,7 @@ static int make_request(struct request_queue *q, struct bio * bi) sh = get_active_stripe(conf, new_sector, previous, (bi->bi_rw&RWA_MASK)); if (sh) { - if (unlikely(conf->reshape_progress != MaxSector)) { + if (unlikely(previous)) { /* expansion might have moved on while waiting for a * stripe, so we must do the range check again. * Expansion could still move past after this @@ -3692,10 +3685,9 @@ static int make_request(struct request_queue *q, struct bio * bi) */ int must_retry = 0; spin_lock_irq(&conf->device_lock); - if ((mddev->delta_disks < 0 - ? logical_sector >= conf->reshape_progress - : logical_sector < conf->reshape_progress) - && previous) + if (mddev->delta_disks < 0 + ? logical_sector >= conf->reshape_progress + : logical_sector < conf->reshape_progress) /* mismatch, need to try again */ must_retry = 1; spin_unlock_irq(&conf->device_lock); @@ -3771,7 +3763,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped int new_data_disks = conf->raid_disks - conf->max_degraded; int i; int dd_idx; - sector_t writepos, safepos, gap; + sector_t writepos, readpos, safepos; sector_t stripe_addr; int reshape_sectors; struct list_head stripes; @@ -3811,26 +3803,46 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped */ writepos = conf->reshape_progress; sector_div(writepos, new_data_disks); + readpos = conf->reshape_progress; + sector_div(readpos, data_disks); safepos = conf->reshape_safe; sector_div(safepos, data_disks); if (mddev->delta_disks < 0) { writepos -= reshape_sectors; + readpos += reshape_sectors; safepos += reshape_sectors; - gap = conf->reshape_safe - conf->reshape_progress; } else { writepos += reshape_sectors; + readpos -= reshape_sectors; safepos -= reshape_sectors; - gap = conf->reshape_progress - conf->reshape_safe; } + /* 'writepos' is the most advanced device address we might write. + * 'readpos' is the least advanced device address we might read. + * 'safepos' is the least address recorded in the metadata as having + * been reshaped. + * If 'readpos' is behind 'writepos', then there is no way that we can + * ensure safety in the face of a crash - that must be done by userspace + * making a backup of the data. So in that case there is no particular + * rush to update metadata. + * Otherwise if 'safepos' is behind 'writepos', then we really need to + * update the metadata to advance 'safepos' to match 'readpos' so that + * we can be safe in the event of a crash. + * So we insist on updating metadata if safepos is behind writepos and + * readpos is beyond writepos. + * In any case, update the metadata every 10 seconds. + * Maybe that number should be configurable, but I'm not sure it is + * worth it.... maybe it could be a multiple of safemode_delay??? + */ if ((mddev->delta_disks < 0 - ? writepos < safepos - : writepos > safepos) || - gap > (new_data_disks)*3000*2 /*3Meg*/) { + ? (safepos > writepos && readpos < writepos) + : (safepos < writepos && readpos > writepos)) || + time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { /* Cannot proceed until we've updated the superblock... */ wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes)==0); mddev->reshape_position = conf->reshape_progress; + conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); wait_event(mddev->sb_wait, mddev->flags == 0 || @@ -3928,6 +3940,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes) == 0); mddev->reshape_position = conf->reshape_progress; + conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); wait_event(mddev->sb_wait, @@ -4822,7 +4835,6 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) return 0; } -#ifdef CONFIG_MD_RAID5_RESHAPE static int raid5_check_reshape(mddev_t *mddev) { raid5_conf_t *conf = mddev_to_conf(mddev); @@ -4963,11 +4975,11 @@ static int raid5_start_reshape(mddev_t *mddev) spin_unlock_irq(&conf->device_lock); return -EAGAIN; } + conf->reshape_checkpoint = jiffies; md_wakeup_thread(mddev->sync_thread); md_new_event(mddev); return 0; } -#endif /* This is called from the reshape thread and should make any * changes needed in 'conf' @@ -4981,6 +4993,7 @@ static void end_reshape(raid5_conf_t *conf) conf->previous_raid_disks = conf->raid_disks; conf->reshape_progress = MaxSector; spin_unlock_irq(&conf->device_lock); + wake_up(&conf->wait_for_overlap); /* read-ahead size must cover two whole stripes, which is * 2 * (datadisks) * chunksize where 'n' is the number of raid devices @@ -5289,11 +5302,9 @@ static struct mdk_personality raid6_personality = .sync_request = sync_request, .resize = raid5_resize, .size = raid5_size, -#ifdef CONFIG_MD_RAID5_RESHAPE .check_reshape = raid5_check_reshape, .start_reshape = raid5_start_reshape, .finish_reshape = raid5_finish_reshape, -#endif .quiesce = raid5_quiesce, .takeover = raid6_takeover, .reconfig = raid6_reconfig, @@ -5314,11 +5325,9 @@ static struct mdk_personality raid5_personality = .sync_request = sync_request, .resize = raid5_resize, .size = raid5_size, -#ifdef CONFIG_MD_RAID5_RESHAPE .check_reshape = raid5_check_reshape, .start_reshape = raid5_start_reshape, .finish_reshape = raid5_finish_reshape, -#endif .quiesce = raid5_quiesce, .takeover = raid5_takeover, .reconfig = raid5_reconfig, @@ -5340,11 +5349,9 @@ static struct mdk_personality raid4_personality = .sync_request = sync_request, .resize = raid5_resize, .size = raid5_size, -#ifdef CONFIG_MD_RAID5_RESHAPE .check_reshape = raid5_check_reshape, .start_reshape = raid5_start_reshape, .finish_reshape = raid5_finish_reshape, -#endif .quiesce = raid5_quiesce, };