md/raid10: fix two bugs in handling of known-bad-blocks.

[pandora-kernel.git] / drivers / md / raid10.c
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 685ddf3..c61067a 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -390,7 +390,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
                 sector_t first_bad;
                 int bad_sectors;
  
-               set_bit(R10BIO_Uptodate, &r10_bio->state);
+               /*
+                * Do not set R10BIO_Uptodate if the current device is
+                * rebuilding or Faulty. This is because we cannot use
+                * such device for properly reading the data back (we could
+                * potentially use it, if the current write would have felt
+                * before rdev->recovery_offset, but for simplicity we don't
+                * check this here.
+                */
+               if (test_bit(In_sync, &conf->mirrors[dev].rdev->flags) &&
+                   !test_bit(Faulty, &conf->mirrors[dev].rdev->flags))
+                       set_bit(R10BIO_Uptodate, &r10_bio->state);
  
                 /* Maybe we can clear some bad blocks. */
                 if (is_badblock(conf->mirrors[dev].rdev,
@@ -790,9 +800,22 @@ static void wait_barrier(struct r10conf *conf)
         spin_lock_irq(&conf->resync_lock);
         if (conf->barrier) {
                 conf->nr_waiting++;
-               wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
+               /* Wait for the barrier to drop.
+                * However if there are already pending
+                * requests (preventing the barrier from
+                * rising completely), and the
+                * pre-process bio queue isn't empty,
+                * then don't wait, as we need to empty
+                * that queue to get the nr_pending
+                * count down.
+                */
+               wait_event_lock_irq(conf->wait_barrier,
+                                   !conf->barrier ||
+                                   (conf->nr_pending &&
+                                    current->bio_list &&
+                                    !bio_list_empty(current->bio_list)),
                                     conf->resync_lock,
-                                   );
+                       );
                 conf->nr_waiting--;
         }
         conf->nr_pending++;
@@ -974,7 +997,7 @@ read_again:
                         /* Could not read all from this device, so we will
                          * need another r10_bio.
                          */
-                       sectors_handled = (r10_bio->sectors + max_sectors
+                       sectors_handled = (r10_bio->sector + max_sectors
                                            - bio->bi_sector);
                         r10_bio->sectors = max_sectors;
                         spin_lock_irq(&conf->device_lock);
@@ -982,7 +1005,7 @@ read_again:
                                 bio->bi_phys_segments = 2;
                         else
                                 bio->bi_phys_segments++;
-                       spin_unlock(&conf->device_lock);
+                       spin_unlock_irq(&conf->device_lock);
                         /* Cannot call generic_make_request directly
                          * as that will be queued in __generic_make_request
                          * and subsequent mempool_alloc might block
@@ -1213,14 +1236,16 @@ static int enough(struct r10conf *conf, int ignore)
         do {
                 int n = conf->copies;
                 int cnt = 0;
+               int this = first;
                 while (n--) {
-                       if (conf->mirrors[first].rdev &&
-                           first != ignore)
+                       if (conf->mirrors[this].rdev &&
+                           this != ignore)
                                 cnt++;
-                       first = (first+1) % conf->raid_disks;
+                       this = (this+1) % conf->raid_disks;
                 }
                 if (cnt == 0)
                         return 0;
+               first = (first + conf->near_copies) % conf->raid_disks;
         } while (first != 0);
         return 1;
  }
@@ -1906,7 +1931,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
                         if (r10_sync_page_io(rdev,
                                              r10_bio->devs[sl].addr +
                                              sect,
-                                            s<<9, conf->tmppage, WRITE)
+                                            s, conf->tmppage, WRITE)
                             == 0) {
                                 /* Well, this device is dead */
                                 printk(KERN_NOTICE
@@ -1943,7 +1968,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
                         switch (r10_sync_page_io(rdev,
                                              r10_bio->devs[sl].addr +
                                              sect,
-                                            s<<9, conf->tmppage,
+                                            s, conf->tmppage,
                                                  READ)) {
                         case 0:
                                 /* Well, this device is dead */
@@ -2106,7 +2131,7 @@ read_more:
         rdev = conf->mirrors[mirror].rdev;
         printk_ratelimited(
                 KERN_ERR
-               "md/raid10:%s: %s: redirecting"
+               "md/raid10:%s: %s: redirecting "
                 "sector %llu to another mirror\n",
                 mdname(mddev),
                 bdevname(rdev->bdev, b),
@@ -2423,6 +2448,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                         /* want to reconstruct this device */
                         rb2 = r10_bio;
                         sect = raid10_find_virt(conf, sector_nr, i);
+                       if (sect >= mddev->resync_max_sectors) {
+                               /* last stripe is not complete - don't
+                                * try to recover this sector.
+                                */
+                               continue;
+                       }
                         /* Unless we are doing a full sync, we only need
                          * to recover the block if it is set in the bitmap
                          */
@@ -2620,7 +2651,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
                                 else {
                                         bad_sectors -= (sector - first_bad);
                                         if (max_sync > bad_sectors)
-                                               max_sync = max_sync;
+                                               max_sync = bad_sectors;
                                         continue;
                                 }
                         }