[PATCH] md: allow a manual resync with md
authorNeilBrown <neilb@suse.de>
Wed, 9 Nov 2005 05:39:26 +0000 (21:39 -0800)
committerLinus Torvalds <torvalds@g5.osdl.org>
Wed, 9 Nov 2005 15:56:37 +0000 (07:56 -0800)
You can trigger a 'check' with
  echo check > /sys/block/mdX/md/scan_mode
or a check-and-repair errors with
  echo repair > /sys/block/mdX/md/scan_mode

and read the current state from the same file.

Note: personalities need to know the different between 'check' and 'repair',
but don't yet.  Until they do, 'check' will be the same as 'repair' and will
just do a normal resync pass.

Signed-off-by: Neil Brown <neilb@suse.de>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
drivers/md/md.c
include/linux/raid/md_k.h

index 74520b5..3740087 100644 (file)
@@ -1714,9 +1714,60 @@ static struct md_sysfs_entry md_raid_disks = {
        .show = md_show_rdisks,
 };
 
+static ssize_t
+md_show_scan(mddev_t *mddev, char *page)
+{
+       char *type = "none";
+       if (mddev->recovery &
+           ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED))) {
+               if (mddev->recovery & (1<<MD_RECOVERY_SYNC)) {
+                       if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+                               type = "resync";
+                       else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+                               type = "check";
+                       else
+                               type = "repair";
+               } else
+                       type = "recover";
+       }
+       return sprintf(page, "%s\n", type);
+}
+
+static ssize_t
+md_store_scan(mddev_t *mddev, const char *page, size_t len)
+{
+       int canscan=0;
+       if (mddev->recovery &
+           ((1<<MD_RECOVERY_RUNNING) || (1<<MD_RECOVERY_NEEDED)))
+               return -EBUSY;
+       down(&mddev->reconfig_sem);
+       if (mddev->pers && mddev->pers->sync_request)
+               canscan=1;
+       up(&mddev->reconfig_sem);
+       if (!canscan)
+               return -EINVAL;
+
+       if (strcmp(page, "check")==0 || strcmp(page, "check\n")==0)
+               set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+       else if (strcmp(page, "repair")!=0 && strcmp(page, "repair\n")!=0)
+               return -EINVAL;
+       set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
+       set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+       set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+       md_wakeup_thread(mddev->thread);
+       return len;
+}
+
+static struct md_sysfs_entry md_scan_mode = {
+       .attr = {.name = "scan_mode", .mode = S_IRUGO|S_IWUSR },
+       .show = md_show_scan,
+       .store = md_store_scan,
+};
+
 static struct attribute *md_default_attrs[] = {
        &md_level.attr,
        &md_raid_disks.attr,
+       &md_scan_mode.attr,
        NULL,
 };
 
@@ -3855,7 +3906,8 @@ static void md_do_sync(mddev_t *mddev)
 
        is_mddev_idle(mddev); /* this also initializes IO event counters */
        /* we don't use the checkpoint if there's a bitmap */
-       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap)
+       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && !mddev->bitmap
+           && ! test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
                j = mddev->recovery_cp;
        else
                j = 0;
@@ -4093,9 +4145,13 @@ void md_check_recovery(mddev_t *mddev)
                        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
                        goto unlock;
                }
-               if (mddev->recovery)
-                       /* probably just the RECOVERY_NEEDED flag */
-                       mddev->recovery = 0;
+               /* Clear some bits that don't mean anything, but
+                * might be left set
+                */
+               clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+               clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
+               clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
+               clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
 
                /* no recovery is running.
                 * remove any failed drives, then
@@ -4129,14 +4185,17 @@ void md_check_recovery(mddev_t *mddev)
                                }
                }
 
-               if (!spares && (mddev->recovery_cp == MaxSector )) {
-                       /* nothing we can do ... */
+               if (spares) {
+                       clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+                       clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+               } else if (mddev->recovery_cp < MaxSector) {
+                       set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+               } else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+                       /* nothing to be done ... */
                        goto unlock;
-               }
+
                if (mddev->pers->sync_request) {
                        set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
-                       if (!spares)
-                               set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
                        if (spares && mddev->bitmap && ! mddev->bitmap->file) {
                                /* We are adding a device or devices to an array
                                 * which has the bitmap stored on all devices.
index d1dad32..efd04dc 100644 (file)
@@ -182,6 +182,8 @@ struct mddev_s
         * ERR:      and IO error was detected - abort the resync/recovery
         * INTR:     someone requested a (clean) early abort.
         * DONE:     thread is done and is waiting to be reaped
+        * REQUEST:  user-space has requested a sync (used with SYNC)
+        * CHECK:    user-space request for for check-only, no repair
         */
 #define        MD_RECOVERY_RUNNING     0
 #define        MD_RECOVERY_SYNC        1
@@ -189,6 +191,8 @@ struct mddev_s
 #define        MD_RECOVERY_INTR        3
 #define        MD_RECOVERY_DONE        4
 #define        MD_RECOVERY_NEEDED      5
+#define        MD_RECOVERY_REQUESTED   6
+#define        MD_RECOVERY_CHECK       7
        unsigned long                   recovery;
 
        int                             in_sync;        /* know to not need resync */