X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=drivers%2Fmd%2Fmd.c;h=2580ac1b9b0ff06f34d37f12ea829682017e5ef1;hb=041924ec2f40efa6a3163144a5481a000804199d;hp=87620b705bee01182e37cdb0a95bf825f75d77b2;hpb=e9f20d6f03e8df393b001dab6dc5226c2a5daf57;p=pandora-kernel.git diff --git a/drivers/md/md.c b/drivers/md/md.c index 87620b705bee..2580ac1b9b0f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock); static void md_print_devices(void); +static DECLARE_WAIT_QUEUE_HEAD(resync_wait); + #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } /* @@ -274,15 +276,18 @@ static mddev_t * mddev_find(dev_t unit) atomic_set(&new->active, 1); spin_lock_init(&new->write_lock); init_waitqueue_head(&new->sb_wait); + init_waitqueue_head(&new->recovery_wait); new->reshape_position = MaxSector; new->resync_max = MaxSector; + new->level = LEVEL_NONE; new->queue = blk_alloc_queue(GFP_KERNEL); if (!new->queue) { kfree(new); return NULL; } - set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); + /* Can be unlocked because the queue is new: no concurrency */ + queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue); blk_queue_make_request(new->queue, md_fail_request); @@ -1368,6 +1373,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) MD_BUG(); return -EINVAL; } + + /* prevent duplicates */ + if (find_rdev(mddev, rdev->bdev->bd_dev)) + return -EEXIST; + /* make sure rdev->size exceeds mddev->size */ if (rdev->size && (mddev->size == 0 || rdev->size < mddev->size)) { if (mddev->pers) { @@ -1651,6 +1661,8 @@ static void md_update_sb(mddev_t * mddev, int force_change) int sync_req; int nospares = 0; + if (mddev->external) + return; repeat: spin_lock_irq(&mddev->write_lock); @@ -1819,6 +1831,10 @@ state_show(mdk_rdev_t *rdev, char *page) len += sprintf(page+len, "%swrite_mostly",sep); sep = ","; } + if (test_bit(Blocked, &rdev->flags)) { + len += sprintf(page+len, "%sblocked", sep); + sep = ","; + } if (!test_bit(Faulty, &rdev->flags) && !test_bit(In_sync, &rdev->flags)) { len += sprintf(page+len, "%sspare", sep); @@ -1835,6 +1851,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) * remove - disconnects the device * writemostly - sets write_mostly * -writemostly - clears write_mostly + * blocked - sets the Blocked flag + * -blocked - clears the Blocked flag */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -1856,6 +1874,16 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) err = 0; } else if (cmd_match(buf, "-writemostly")) { clear_bit(WriteMostly, &rdev->flags); + err = 0; + } else if (cmd_match(buf, "blocked")) { + set_bit(Blocked, &rdev->flags); + err = 0; + } else if (cmd_match(buf, "-blocked")) { + clear_bit(Blocked, &rdev->flags); + wake_up(&rdev->blocked_wait); + set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); + md_wakeup_thread(rdev->mddev->thread); + err = 0; } return err ? err : len; @@ -2096,7 +2124,7 @@ rdev_attr_store(struct kobject *kobj, struct attribute *attr, rv = -EBUSY; else rv = entry->store(rdev, page, length); - mddev_unlock(rdev->mddev); + mddev_unlock(mddev); } return rv; } @@ -2185,7 +2213,9 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi goto abort_free; } } + INIT_LIST_HEAD(&rdev->same_set); + init_waitqueue_head(&rdev->blocked_wait); return rdev; @@ -2456,7 +2486,6 @@ resync_start_show(mddev_t *mddev, char *page) static ssize_t resync_start_store(mddev_t *mddev, const char *buf, size_t len) { - /* can only set chunk_size if array is not yet active */ char *e; unsigned long long n = simple_strtoull(buf, &e, 10); @@ -2590,15 +2619,20 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) err = do_md_stop(mddev, 1); else { mddev->ro = 1; + set_disk_ro(mddev->gendisk, 1); err = do_md_run(mddev); } break; case read_auto: - /* stopping an active array */ if (mddev->pers) { - err = do_md_stop(mddev, 1); - if (err == 0) - mddev->ro = 2; /* FIXME mark devices writable */ + if (mddev->ro != 1) + err = do_md_stop(mddev, 1); + else + err = restart_array(mddev); + if (err == 0) { + mddev->ro = 2; + set_disk_ro(mddev->gendisk, 0); + } } else { mddev->ro = 2; err = do_md_run(mddev); @@ -2611,6 +2645,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) if (atomic_read(&mddev->writes_pending) == 0) { if (mddev->in_sync == 0) { mddev->in_sync = 1; + if (mddev->safemode == 1) + mddev->safemode = 0; if (mddev->persistent) set_bit(MD_CHANGE_CLEAN, &mddev->flags); @@ -2634,6 +2670,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) err = 0; } else { mddev->ro = 0; + set_disk_ro(mddev->gendisk, 0); err = do_md_run(mddev); } break; @@ -2978,6 +3015,36 @@ degraded_show(mddev_t *mddev, char *page) } static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded); +static ssize_t +sync_force_parallel_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%d\n", mddev->parallel_resync); +} + +static ssize_t +sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len) +{ + long n; + + if (strict_strtol(buf, 10, &n)) + return -EINVAL; + + if (n != 0 && n != 1) + return -EINVAL; + + mddev->parallel_resync = n; + + if (mddev->sync_thread) + wake_up(&resync_wait); + + return len; +} + +/* force parallel resync, even with shared block devices */ +static struct md_sysfs_entry md_sync_force_parallel = +__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR, + sync_force_parallel_show, sync_force_parallel_store); + static ssize_t sync_speed_show(mddev_t *mddev, char *page) { @@ -3153,6 +3220,7 @@ static struct attribute *md_redundancy_attrs[] = { &md_sync_min.attr, &md_sync_max.attr, &md_sync_speed.attr, + &md_sync_force_parallel.attr, &md_sync_completed.attr, &md_max_sync.attr, &md_suspend_lo.attr, @@ -3657,6 +3725,8 @@ static int do_md_stop(mddev_t * mddev, int mode) module_put(mddev->pers->owner); mddev->pers = NULL; + /* tell userspace to handle 'inactive' */ + sysfs_notify(&mddev->kobj, NULL, "array_state"); set_capacity(disk, 0); mddev->changed = 1; @@ -3711,6 +3781,30 @@ static int do_md_stop(mddev_t * mddev, int mode) mddev->reshape_position = MaxSector; mddev->external = 0; mddev->persistent = 0; + mddev->level = LEVEL_NONE; + mddev->clevel[0] = 0; + mddev->flags = 0; + mddev->ro = 0; + mddev->metadata_type[0] = 0; + mddev->chunk_size = 0; + mddev->ctime = mddev->utime = 0; + mddev->layout = 0; + mddev->max_disks = 0; + mddev->events = 0; + mddev->delta_disks = 0; + mddev->new_level = LEVEL_NONE; + mddev->new_layout = 0; + mddev->new_chunk = 0; + mddev->curr_resync = 0; + mddev->resync_mismatches = 0; + mddev->suspend_lo = mddev->suspend_hi = 0; + mddev->sync_speed_min = mddev->sync_speed_max = 0; + mddev->recovery = 0; + mddev->in_sync = 0; + mddev->changed = 0; + mddev->degraded = 0; + mddev->barriers_work = 0; + mddev->safemode = 0; } else if (mddev->pers) printk(KERN_INFO "md: %s switched to read-only mode.\n", @@ -3803,8 +3897,10 @@ static void autorun_devices(int part) md_probe(dev, NULL, NULL); mddev = mddev_find(dev); - if (!mddev) { - printk(KERN_ERR + if (!mddev || !mddev->gendisk) { + if (mddev) + mddev_put(mddev); + printk(KERN_ERR "md: cannot allocate memory for md drive.\n"); break; } @@ -3929,8 +4025,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg) if (!buf) goto out; - ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname)); - if (!ptr) + ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname)); + if (IS_ERR(ptr)) goto out; strcpy(file->pathname, ptr); @@ -4918,6 +5014,9 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) if (!rdev || test_bit(Faulty, &rdev->flags)) return; + + if (mddev->external) + set_bit(Blocked, &rdev->flags); /* dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", mdname(mddev), @@ -5338,7 +5437,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) atomic_sub(blocks, &mddev->recovery_active); wake_up(&mddev->recovery_wait); if (!ok) { - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_wakeup_thread(mddev->thread); // stop recovery, signal do_sync .... } @@ -5364,6 +5463,8 @@ void md_write_start(mddev_t *mddev, struct bio *bi) md_wakeup_thread(mddev->sync_thread); } atomic_inc(&mddev->writes_pending); + if (mddev->safemode == 1) + mddev->safemode = 0; if (mddev->in_sync) { spin_lock_irq(&mddev->write_lock); if (mddev->in_sync) { @@ -5372,8 +5473,11 @@ void md_write_start(mddev_t *mddev, struct bio *bi) md_wakeup_thread(mddev->thread); } spin_unlock_irq(&mddev->write_lock); + sysfs_notify(&mddev->kobj, NULL, "array_state"); } - wait_event(mddev->sb_wait, mddev->flags==0); + wait_event(mddev->sb_wait, + !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } void md_write_end(mddev_t *mddev) @@ -5408,13 +5512,17 @@ void md_allow_write(mddev_t *mddev) mddev->safemode = 1; spin_unlock_irq(&mddev->write_lock); md_update_sb(mddev, 0); + + sysfs_notify(&mddev->kobj, NULL, "array_state"); + /* wait for the dirty state to be recorded in the metadata */ + wait_event(mddev->sb_wait, + !test_bit(MD_CHANGE_CLEAN, &mddev->flags) && + !test_bit(MD_CHANGE_PENDING, &mddev->flags)); } else spin_unlock_irq(&mddev->write_lock); } EXPORT_SYMBOL_GPL(md_allow_write); -static DECLARE_WAIT_QUEUE_HEAD(resync_wait); - #define SYNC_MARKS 10 #define SYNC_MARK_STEP (3*HZ) void md_do_sync(mddev_t *mddev) @@ -5478,8 +5586,9 @@ void md_do_sync(mddev_t *mddev) for_each_mddev(mddev2, tmp) { if (mddev2 == mddev) continue; - if (mddev2->curr_resync && - match_mddev_units(mddev,mddev2)) { + if (!mddev->parallel_resync + && mddev2->curr_resync + && match_mddev_units(mddev, mddev2)) { DEFINE_WAIT(wq); if (mddev < mddev2 && mddev->curr_resync == 2) { /* arbitrarily yield */ @@ -5559,7 +5668,6 @@ void md_do_sync(mddev_t *mddev) window/2,(unsigned long long) max_sectors/2); atomic_set(&mddev->recovery_active, 0); - init_waitqueue_head(&mddev->recovery_wait); last_check = 0; if (j>2) { @@ -5584,7 +5692,7 @@ void md_do_sync(mddev_t *mddev) sectors = mddev->pers->sync_request(mddev, j, &skipped, currspeed < speed_min(mddev)); if (sectors == 0) { - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); goto out; } @@ -5607,8 +5715,7 @@ void md_do_sync(mddev_t *mddev) last_check = io_sectors; - if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) || - test_bit(MD_RECOVERY_ERR, &mddev->recovery)) + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) break; repeat: @@ -5662,8 +5769,7 @@ void md_do_sync(mddev_t *mddev) /* tell personality that we are finished */ mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); - if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && - !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && + if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && mddev->curr_resync > 2) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -5718,7 +5824,7 @@ static int remove_and_add_spares(mddev_t *mddev) rdev_for_each(rdev, rtmp, mddev) if (rdev->raid_disk >= 0 && - !mddev->external && + !test_bit(Blocked, &rdev->flags) && (test_bit(Faulty, &rdev->flags) || ! test_bit(In_sync, &rdev->flags)) && atomic_read(&rdev->nr_pending)==0) { @@ -5732,7 +5838,10 @@ static int remove_and_add_spares(mddev_t *mddev) } if (mddev->degraded) { - rdev_for_each(rdev, rtmp, mddev) + rdev_for_each(rdev, rtmp, mddev) { + if (rdev->raid_disk >= 0 && + !test_bit(In_sync, &rdev->flags)) + spares++; if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { rdev->recovery_offset = 0; @@ -5750,6 +5859,7 @@ static int remove_and_add_spares(mddev_t *mddev) } else break; } + } } return spares; } @@ -5763,7 +5873,7 @@ static int remove_and_add_spares(mddev_t *mddev) * to do that as needed. * When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in * "->recovery" and create a thread at ->sync_thread. - * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR) + * When the thread finishes it sets MD_RECOVERY_DONE * and wakeups up this thread which will reap the thread and finish up. * This thread also removes any faulty devices (with nr_pending == 0). * @@ -5788,7 +5898,7 @@ void md_check_recovery(mddev_t *mddev) return; if (signal_pending(current)) { - if (mddev->pers->sync_request) { + if (mddev->pers->sync_request && !mddev->external) { printk(KERN_INFO "md: %s in immediate safe mode\n", mdname(mddev)); mddev->safemode = 2; @@ -5800,7 +5910,7 @@ void md_check_recovery(mddev_t *mddev) (mddev->flags && !mddev->external) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || - (mddev->safemode == 1) || + (mddev->external == 0 && mddev->safemode == 1) || (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) && !mddev->in_sync && mddev->recovery_cp == MaxSector) )) @@ -5809,16 +5919,20 @@ void md_check_recovery(mddev_t *mddev) if (mddev_trylock(mddev)) { int spares = 0; - spin_lock_irq(&mddev->write_lock); - if (mddev->safemode && !atomic_read(&mddev->writes_pending) && - !mddev->in_sync && mddev->recovery_cp == MaxSector) { - mddev->in_sync = 1; - if (mddev->persistent) - set_bit(MD_CHANGE_CLEAN, &mddev->flags); + if (!mddev->external) { + spin_lock_irq(&mddev->write_lock); + if (mddev->safemode && + !atomic_read(&mddev->writes_pending) && + !mddev->in_sync && + mddev->recovery_cp == MaxSector) { + mddev->in_sync = 1; + if (mddev->persistent) + set_bit(MD_CHANGE_CLEAN, &mddev->flags); + } + if (mddev->safemode == 1) + mddev->safemode = 0; + spin_unlock_irq(&mddev->write_lock); } - if (mddev->safemode == 1) - mddev->safemode = 0; - spin_unlock_irq(&mddev->write_lock); if (mddev->flags) md_update_sb(mddev, 0); @@ -5834,8 +5948,7 @@ void md_check_recovery(mddev_t *mddev) /* resync has finished, collect result */ md_unregister_thread(mddev->sync_thread); mddev->sync_thread = NULL; - if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && - !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { /* success...*/ /* activate any spares */ mddev->pers->spare_active(mddev); @@ -5859,7 +5972,6 @@ void md_check_recovery(mddev_t *mddev) * might be left set */ clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - clear_bit(MD_RECOVERY_ERR, &mddev->recovery); clear_bit(MD_RECOVERY_INTR, &mddev->recovery); clear_bit(MD_RECOVERY_DONE, &mddev->recovery); @@ -5913,6 +6025,16 @@ void md_check_recovery(mddev_t *mddev) } } +void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) +{ + sysfs_notify(&rdev->kobj, NULL, "state"); + wait_event_timeout(rdev->blocked_wait, + !test_bit(Blocked, &rdev->flags), + msecs_to_jiffies(5000)); + rdev_dec_pending(rdev, mddev); +} +EXPORT_SYMBOL(md_wait_for_blocked_rdev); + static int md_notify_reboot(struct notifier_block *this, unsigned long code, void *x) { @@ -5947,13 +6069,9 @@ static struct notifier_block md_notifier = { static void md_geninit(void) { - struct proc_dir_entry *p; - dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t)); - p = create_proc_entry("mdstat", S_IRUGO, NULL); - if (p) - p->proc_fops = &md_seq_fops; + proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops); } static int __init md_init(void)