static void md_print_devices(void);
+static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
/*
{
atomic_inc(&md_event_count);
wake_up(&md_event_waiters);
- sysfs_notify(&mddev->kobj, NULL, "sync_action");
}
EXPORT_SYMBOL_GPL(md_new_event);
atomic_set(&new->active, 1);
spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
+ init_waitqueue_head(&new->recovery_wait);
new->reshape_position = MaxSector;
+ new->resync_min = 0;
new->resync_max = MaxSector;
new->level = LEVEL_NONE;
err = 0;
}
+ if (!err)
+ sysfs_notify(&rdev->kobj, NULL, "state");
return err ? err : len;
}
static struct rdev_sysfs_entry rdev_state =
slot = -1;
else if (e==buf || (*e && *e!= '\n'))
return -EINVAL;
- if (rdev->mddev->pers) {
+ if (rdev->mddev->pers && slot == -1) {
/* Setting 'slot' on an active array requires also
* updating the 'rd%d' link, and communicating
* with the personality with ->hot_*_disk.
* failed/spare devices. This normally happens automatically,
* but not when the metadata is externally managed.
*/
- if (slot != -1)
- return -EBUSY;
if (rdev->raid_disk == -1)
return -EEXIST;
/* personality does all needed checks */
sysfs_remove_link(&rdev->mddev->kobj, nm);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
md_wakeup_thread(rdev->mddev->thread);
+ } else if (rdev->mddev->pers) {
+ mdk_rdev_t *rdev2;
+ struct list_head *tmp;
+ /* Activating a spare .. or possibly reactivating
+ * if we every get bitmaps working here.
+ */
+
+ if (rdev->raid_disk != -1)
+ return -EBUSY;
+
+ if (rdev->mddev->pers->hot_add_disk == NULL)
+ return -EINVAL;
+
+ rdev_for_each(rdev2, tmp, rdev->mddev)
+ if (rdev2->raid_disk == slot)
+ return -EEXIST;
+
+ rdev->raid_disk = slot;
+ if (test_bit(In_sync, &rdev->flags))
+ rdev->saved_raid_disk = slot;
+ else
+ rdev->saved_raid_disk = -1;
+ err = rdev->mddev->pers->
+ hot_add_disk(rdev->mddev, rdev);
+ if (err) {
+ rdev->raid_disk = -1;
+ return err;
+ } else
+ sysfs_notify(&rdev->kobj, NULL, "state");
+ sprintf(nm, "rd%d", rdev->raid_disk);
+ if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm))
+ printk(KERN_WARNING
+ "md: cannot register "
+ "%s for %s\n",
+ nm, mdname(rdev->mddev));
+
+ /* don't wakeup anyone, leave that to userspace. */
} else {
if (slot >= rdev->mddev->raid_disks)
return -ENOSPC;
clear_bit(Faulty, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);
set_bit(In_sync, &rdev->flags);
+ sysfs_notify(&rdev->kobj, NULL, "state");
}
return len;
}
unsigned long long offset = simple_strtoull(buf, &e, 10);
if (e==buf || (*e && *e != '\n'))
return -EINVAL;
- if (rdev->mddev->pers)
+ if (rdev->mddev->pers && rdev->raid_disk >= 0)
return -EBUSY;
if (rdev->size && rdev->mddev->external)
/* Must set offset before size, so overlap checks
if (e==buf || (*e && *e != '\n'))
return -EINVAL;
- if (my_mddev->pers)
+ if (my_mddev->pers && rdev->raid_disk >= 0)
return -EBUSY;
rdev->size = size;
if (size > oldsize && rdev->mddev->external) {
}
if (err)
return err;
- else
+ else {
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
return len;
+ }
}
static struct md_sysfs_entry md_array_state =
__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
type = "check";
else
type = "repair";
- } else
+ } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
type = "recover";
}
return sprintf(page, "%s\n", type);
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
return -EBUSY;
- else if (cmd_match(page, "resync") || cmd_match(page, "recover"))
+ else if (cmd_match(page, "resync"))
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- else if (cmd_match(page, "reshape")) {
+ else if (cmd_match(page, "recover")) {
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ } else if (cmd_match(page, "reshape")) {
int err;
if (mddev->pers->start_reshape == NULL)
return -EINVAL;
err = mddev->pers->start_reshape(mddev);
if (err)
return err;
+ sysfs_notify(&mddev->kobj, NULL, "degraded");
} else {
if (cmd_match(page, "check"))
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
+ sysfs_notify(&mddev->kobj, NULL, "sync_action");
return len;
}
}
static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
+static ssize_t
+sync_force_parallel_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d\n", mddev->parallel_resync);
+}
+
+static ssize_t
+sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ long n;
+
+ if (strict_strtol(buf, 10, &n))
+ return -EINVAL;
+
+ if (n != 0 && n != 1)
+ return -EINVAL;
+
+ mddev->parallel_resync = n;
+
+ if (mddev->sync_thread)
+ wake_up(&resync_wait);
+
+ return len;
+}
+
+/* force parallel resync, even with shared block devices */
+static struct md_sysfs_entry md_sync_force_parallel =
+__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
+ sync_force_parallel_show, sync_force_parallel_store);
+
static ssize_t
sync_speed_show(mddev_t *mddev, char *page)
{
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
+static ssize_t
+min_sync_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (unsigned long long)mddev->resync_min);
+}
+static ssize_t
+min_sync_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ unsigned long long min;
+ if (strict_strtoull(buf, 10, &min))
+ return -EINVAL;
+ if (min > mddev->resync_max)
+ return -EINVAL;
+ if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
+ return -EBUSY;
+
+ /* Must be a multiple of chunk_size */
+ if (mddev->chunk_size) {
+ if (min & (sector_t)((mddev->chunk_size>>9)-1))
+ return -EINVAL;
+ }
+ mddev->resync_min = min;
+
+ return len;
+}
+
+static struct md_sysfs_entry md_min_sync =
+__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
+
static ssize_t
max_sync_show(mddev_t *mddev, char *page)
{
if (strncmp(buf, "max", 3) == 0)
mddev->resync_max = MaxSector;
else {
- char *ep;
- unsigned long long max = simple_strtoull(buf, &ep, 10);
- if (ep == buf || (*ep != 0 && *ep != '\n'))
+ unsigned long long max;
+ if (strict_strtoull(buf, 10, &max))
+ return -EINVAL;
+ if (max < mddev->resync_min)
return -EINVAL;
if (max < mddev->resync_max &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
&md_sync_min.attr,
&md_sync_max.attr,
&md_sync_speed.attr,
+ &md_sync_force_parallel.attr,
&md_sync_completed.attr,
+ &md_min_sync.attr,
&md_max_sync.attr,
&md_suspend_lo.attr,
&md_suspend_hi.attr,
disk->queue = mddev->queue;
add_disk(disk);
mddev->gendisk = disk;
- mutex_unlock(&disks_mutex);
error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj,
"%s", "md");
+ mutex_unlock(&disks_mutex);
if (error)
printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
disk->disk_name);
{
mddev_t *mddev = (mddev_t *) data;
- mddev->safemode = 1;
+ if (!atomic_read(&mddev->writes_pending)) {
+ mddev->safemode = 1;
+ if (mddev->external)
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+ }
md_wakeup_thread(mddev->thread);
}
return -EINVAL;
}
}
+ sysfs_notify(&rdev->kobj, NULL, "state");
}
md_probe(mddev->unit, NULL, NULL);
mddev->changed = 1;
md_new_event(mddev);
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+ sysfs_notify(&mddev->kobj, NULL, "sync_action");
+ sysfs_notify(&mddev->kobj, NULL, "degraded");
kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE);
return 0;
}
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
err = 0;
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+
} else
err = -EINVAL;
module_put(mddev->pers->owner);
mddev->pers = NULL;
+ /* tell userspace to handle 'inactive' */
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
set_capacity(disk, 0);
mddev->changed = 1;
mddev->size = 0;
mddev->raid_disks = 0;
mddev->recovery_cp = 0;
+ mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->reshape_position = MaxSector;
mddev->external = 0;
mdname(mddev));
err = 0;
md_new_event(mddev);
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
out:
return err;
}
md_probe(dev, NULL, NULL);
mddev = mddev_find(dev);
- if (!mddev) {
- printk(KERN_ERR
+ if (!mddev || !mddev->gendisk) {
+ if (mddev)
+ mddev_put(mddev);
+ printk(KERN_ERR
"md: cannot allocate memory for md drive.\n");
break;
}
if (!buf)
goto out;
- ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname));
- if (!ptr)
+ ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
+ if (IS_ERR(ptr))
goto out;
strcpy(file->pathname, ptr);
}
if (err)
export_rdev(rdev);
+ else
+ sysfs_notify(&rdev->kobj, NULL, "state");
md_update_sb(mddev, 1);
+ if (mddev->degraded)
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
return err;
char b[BDEVNAME_SIZE];
mdk_rdev_t *rdev;
- if (!mddev->pers)
- return -ENODEV;
-
rdev = find_rdev(mddev, dev);
if (!rdev)
return -ENXIO;
mddev->ro && mddev->pers) {
if (mddev->ro == 2) {
mddev->ro = 0;
- set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- md_wakeup_thread(mddev->thread);
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ md_wakeup_thread(mddev->thread);
} else {
err = -EROFS;
if (!mddev->pers->error_handler)
return;
mddev->pers->error_handler(mddev,rdev);
+ if (mddev->degraded)
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ set_bit(StateChanged, &rdev->flags);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
atomic_sub(blocks, &mddev->recovery_active);
wake_up(&mddev->recovery_wait);
if (!ok) {
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_wakeup_thread(mddev->thread);
// stop recovery, signal do_sync ....
}
*/
void md_write_start(mddev_t *mddev, struct bio *bi)
{
+ int did_change = 0;
if (bio_data_dir(bi) != WRITE)
return;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread);
+ did_change = 1;
}
atomic_inc(&mddev->writes_pending);
if (mddev->safemode == 1)
mddev->in_sync = 0;
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
md_wakeup_thread(mddev->thread);
+ did_change = 1;
}
spin_unlock_irq(&mddev->write_lock);
}
- wait_event(mddev->sb_wait, mddev->flags==0);
+ if (did_change)
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
}
void md_write_end(mddev_t *mddev)
return;
if (mddev->ro)
return;
+ if (!mddev->pers->sync_request)
+ return;
spin_lock_irq(&mddev->write_lock);
if (mddev->in_sync) {
mddev->safemode = 1;
spin_unlock_irq(&mddev->write_lock);
md_update_sb(mddev, 0);
+
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+ /* wait for the dirty state to be recorded in the metadata */
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
} else
spin_unlock_irq(&mddev->write_lock);
}
EXPORT_SYMBOL_GPL(md_allow_write);
-static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
-
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
void md_do_sync(mddev_t *mddev)
for_each_mddev(mddev2, tmp) {
if (mddev2 == mddev)
continue;
- if (mddev2->curr_resync &&
- match_mddev_units(mddev,mddev2)) {
+ if (!mddev->parallel_resync
+ && mddev2->curr_resync
+ && match_mddev_units(mddev, mddev2)) {
DEFINE_WAIT(wq);
if (mddev < mddev2 && mddev->curr_resync == 2) {
/* arbitrarily yield */
max_sectors = mddev->resync_max_sectors;
mddev->resync_mismatches = 0;
/* we don't use the checkpoint if there's a bitmap */
- if (!mddev->bitmap &&
- !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ j = mddev->resync_min;
+ else if (!mddev->bitmap)
j = mddev->recovery_cp;
+
} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
max_sectors = mddev->size << 1;
else {
window/2,(unsigned long long) max_sectors/2);
atomic_set(&mddev->recovery_active, 0);
- init_waitqueue_head(&mddev->recovery_wait);
last_check = 0;
if (j>2) {
sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev));
if (sectors == 0) {
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out;
}
last_check = io_sectors;
- if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
- test_bit(MD_RECOVERY_ERR, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
break;
repeat:
/* tell personality that we are finished */
mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
- if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
+ if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
mddev->curr_resync > 2) {
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
skip:
mddev->curr_resync = 0;
+ mddev->resync_min = 0;
mddev->resync_max = MaxSector;
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait);
}
if (mddev->degraded) {
- rdev_for_each(rdev, rtmp, mddev)
+ rdev_for_each(rdev, rtmp, mddev) {
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(In_sync, &rdev->flags))
+ spares++;
if (rdev->raid_disk < 0
&& !test_bit(Faulty, &rdev->flags)) {
rdev->recovery_offset = 0;
- if (mddev->pers->hot_add_disk(mddev,rdev)) {
+ if (mddev->pers->
+ hot_add_disk(mddev, rdev) == 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
if (sysfs_create_link(&mddev->kobj,
} else
break;
}
+ }
}
return spares;
}
* to do that as needed.
* When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
* "->recovery" and create a thread at ->sync_thread.
- * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR)
+ * When the thread finishes it sets MD_RECOVERY_DONE
* and wakeups up this thread which will reap the thread and finish up.
* This thread also removes any faulty devices (with nr_pending == 0).
*
int spares = 0;
if (!mddev->external) {
+ int did_change = 0;
spin_lock_irq(&mddev->write_lock);
if (mddev->safemode &&
!atomic_read(&mddev->writes_pending) &&
!mddev->in_sync &&
mddev->recovery_cp == MaxSector) {
mddev->in_sync = 1;
+ did_change = 1;
if (mddev->persistent)
set_bit(MD_CHANGE_CLEAN, &mddev->flags);
}
if (mddev->safemode == 1)
mddev->safemode = 0;
spin_unlock_irq(&mddev->write_lock);
+ if (did_change)
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
}
if (mddev->flags)
md_update_sb(mddev, 0);
+ rdev_for_each(rdev, rtmp, mddev)
+ if (test_and_clear_bit(StateChanged, &rdev->flags))
+ sysfs_notify(&rdev->kobj, NULL, "state");
+
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
/* resync has finished, collect result */
md_unregister_thread(mddev->sync_thread);
mddev->sync_thread = NULL;
- if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* success...*/
/* activate any spares */
- mddev->pers->spare_active(mddev);
+ if (mddev->pers->spare_active(mddev))
+ sysfs_notify(&mddev->kobj, NULL,
+ "degraded");
}
md_update_sb(mddev, 1);
mddev->recovery = 0;
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ sysfs_notify(&mddev->kobj, NULL, "sync_action");
md_new_event(mddev);
goto unlock;
}
+ /* Set RUNNING before clearing NEEDED to avoid
+ * any transients in the value of "sync_action".
+ */
+ set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
/* Clear some bits that don't mean anything, but
* might be left set
*/
- clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
/* Cannot proceed */
goto unlock;
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
+ clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
} else if ((spares = remove_and_add_spares(mddev))) {
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
} else if (mddev->recovery_cp < MaxSector) {
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
} else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
/* nothing to be done ... */
goto unlock;
if (mddev->pers->sync_request) {
- set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
if (spares && mddev->bitmap && ! mddev->bitmap->file) {
/* We are adding a device or devices to an array
* which has the bitmap stored on all devices.
mddev->recovery = 0;
} else
md_wakeup_thread(mddev->sync_thread);
+ sysfs_notify(&mddev->kobj, NULL, "sync_action");
md_new_event(mddev);
}
unlock:
+ if (!mddev->sync_thread) {
+ clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
+ if (test_and_clear_bit(MD_RECOVERY_RECOVER,
+ &mddev->recovery))
+ sysfs_notify(&mddev->kobj, NULL, "sync_action");
+ }
mddev_unlock(mddev);
}
}