Merge branch 'for-linus' of git://neil.brown.name/md

author Linus Torvalds <torvalds@linux-foundation.org>

Thu, 13 Aug 2009 17:59:29 +0000 (10:59 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 13 Aug 2009 17:59:29 +0000 (10:59 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Aug 2009 17:59:29 +0000 (10:59 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 13 Aug 2009 17:59:29 +0000 (10:59 -0700)
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 5b98bea..103f2d3 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
         else
                 new->md_minor = MINOR(unit) >> MdpMinorShift;
  
+       mutex_init(&new->open_mutex);
         mutex_init(&new->reconfig_mutex);
         INIT_LIST_HEAD(&new->disks);
         INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
                 /* otherwise we have to go forward and ... */
                 mddev->events ++;
                 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
-                       /* .. if the array isn't clean, insist on an odd 'events' */
-                       if ((mddev->events&1)==0) {
-                               mddev->events++;
+                       /* .. if the array isn't clean, an 'even' event must also go
+                        * to spares. */
+                       if ((mddev->events&1)==0)
                                 nospares = 0;
-                       }
                 } else {
-                       /* otherwise insist on an even 'events' (for clean states) */
-                       if ((mddev->events&1)) {
-                               mddev->events++;
+                       /* otherwise an 'odd' event must go to spares */
+                       if ((mddev->events&1))
                                 nospares = 0;
-                       }
                 }
         }
  
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
                 if (max < mddev->resync_min)
                         return -EINVAL;
                 if (max < mddev->resync_max &&
+                   mddev->ro == 0 &&
                     test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                         return -EBUSY;
  
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
         struct gendisk *disk = mddev->gendisk;
         mdk_rdev_t *rdev;
  
+       mutex_lock(&mddev->open_mutex);
         if (atomic_read(&mddev->openers) > is_open) {
                 printk("md: %s still in use.\n",mdname(mddev));
-               return -EBUSY;
-       }
-
-       if (mddev->pers) {
+               err = -EBUSY;
+       } else if (mddev->pers) {
  
                 if (mddev->sync_thread) {
                         set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
                         set_disk_ro(disk, 1);
                 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
         }
-
+out:
+       mutex_unlock(&mddev->open_mutex);
+       if (err)
+               return err;
         /*
          * Free resources if final stop
          */
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
         blk_integrity_unregister(disk);
         md_new_event(mddev);
         sysfs_notify_dirent(mddev->sysfs_state);
-out:
         return err;
  }
  
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
         }
         BUG_ON(mddev != bdev->bd_disk->private_data);
  
-       if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
+       if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
                 goto out;
  
         err = 0;
         atomic_inc(&mddev->openers);
-       mddev_unlock(mddev);
+       mutex_unlock(&mddev->open_mutex);
  
         check_disk_change(bdev);
   out:
diff --git a/drivers/md/md.h b/drivers/md/md.h

index 78f0316..f8fc188 100644 (file)
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
                                                             * so we don't loop trying */
  
         int                             in_sync;        /* know to not need resync */
+       /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
+        * that we are never stopping an array while it is open.
+        * 'reconfig_mutex' protects all other reconfiguration.
+        * These locks are separate due to conflicting interactions
+        * with bdev->bd_mutex.
+        * Lock ordering is:
+        *  reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
+        *  bd_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
+        */
+       struct mutex                    open_mutex;
         struct mutex                    reconfig_mutex;
         atomic_t                        active;         /* general refcount */
         atomic_t                        openers;        /* number of active opens */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index 2b521ee..b8a2c5d 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                     conf->reshape_progress < raid5_size(mddev, 0, 0)) {
                         sector_nr = raid5_size(mddev, 0, 0)
                                 - conf->reshape_progress;
-               } else if (mddev->delta_disks > 0 &&
+               } else if (mddev->delta_disks >= 0 &&
                            conf->reshape_progress > 0)
                         sector_nr = conf->reshape_progress;
                 sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
                            (old_disks-max_degraded));
                 /* here_old is the first stripe that we might need to read
                  * from */
-               if (here_new >= here_old) {
+               if (mddev->delta_disks == 0) {
+                       /* We cannot be sure it is safe to start an in-place
+                        * reshape.  It is only safe if user-space if monitoring
+                        * and taking constant backups.
+                        * mdadm always starts a situation like this in
+                        * readonly mode so it can take control before
+                        * allowing any writes.  So just check for that.
+                        */
+                       if ((here_new * mddev->new_chunk_sectors != 
+                            here_old * mddev->chunk_sectors) ||
+                           mddev->ro == 0) {
+                               printk(KERN_ERR "raid5: in-place reshape must be started"
+                                      " in read-only mode - aborting\n");
+                               return -EINVAL;
+                       }
+               } else if (mddev->delta_disks < 0
+                   ? (here_new * mddev->new_chunk_sectors <=
+                      here_old * mddev->chunk_sectors)
+                   : (here_new * mddev->new_chunk_sectors >=
+                      here_old * mddev->chunk_sectors)) {
                         /* Reading from the same stripe as writing to - bad */
                         printk(KERN_ERR "raid5: reshape_position too early for "
                                "auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
                                         mddev->degraded--;
                         for (d = conf->raid_disks ;
                              d < conf->raid_disks - mddev->delta_disks;
-                            d++)
-                               raid5_remove_disk(mddev, d);
+                            d++) {
+                               mdk_rdev_t *rdev = conf->disks[d].rdev;
+                               if (rdev && raid5_remove_disk(mddev, d) == 0) {
+                                       char nm[20];
+                                       sprintf(nm, "rd%d", rdev->raid_disk);
+                                       sysfs_remove_link(&mddev->kobj, nm);
+                                       rdev->raid_disk = -1;
+                               }
+                       }
                 }
                 mddev->layout = conf->algorithm;
                 mddev->chunk_sectors = conf->chunk_sectors;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 13 Aug 2009 17:59:29 +0000 (10:59 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 13 Aug 2009 17:59:29 +0000 (10:59 -0700)
drivers/md/md.c		patch \| blob \| history
drivers/md/md.h		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history