md: fix raid5 'repair' operations
[pandora-kernel.git] / drivers / md / raid5.c
index 968daca..ee0ea91 100644 (file)
@@ -2369,8 +2369,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 
        /* complete a check operation */
        if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
-           clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
-           clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+               clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
+               clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
                if (s->failed == 0) {
                        if (sh->ops.zero_sum_result == 0)
                                /* parity is correct (on disc,
@@ -2400,16 +2400,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                        canceled_check = 1; /* STRIPE_INSYNC is not set */
        }
 
-       /* check if we can clear a parity disk reconstruct */
-       if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
-               test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-
-               clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
-               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
-       }
-
        /* start a new check operation if there are no failures, the stripe is
         * not insync, and a repair is not in flight
         */
@@ -2424,6 +2414,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                }
        }
 
+       /* check if we can clear a parity disk reconstruct */
+       if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
+           test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+
+               clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
+               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
+               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
+               clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+       }
+
+
        /* Wait for check parity and compute block operations to complete
         * before write-back.  If a failure occurred while the check operation
         * was in flight we need to cycle this stripe through handle_stripe
@@ -2607,6 +2608,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
        }
 }
 
+
 /*
  * handle_stripe - do things to a stripe.
  *
@@ -2632,6 +2634,7 @@ static void handle_stripe5(struct stripe_head *sh)
        struct stripe_head_state s;
        struct r5dev *dev;
        unsigned long pending = 0;
+       mdk_rdev_t *blocked_rdev = NULL;
 
        memset(&s, 0, sizeof(s));
        pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2691,6 +2694,11 @@ static void handle_stripe5(struct stripe_head *sh)
                if (dev->written)
                        s.written++;
                rdev = rcu_dereference(conf->disks[i].rdev);
+               if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+                       blocked_rdev = rdev;
+                       atomic_inc(&rdev->nr_pending);
+                       break;
+               }
                if (!rdev || !test_bit(In_sync, &rdev->flags)) {
                        /* The ReadError flag will just be confusing now */
                        clear_bit(R5_ReadError, &dev->flags);
@@ -2705,6 +2713,11 @@ static void handle_stripe5(struct stripe_head *sh)
        }
        rcu_read_unlock();
 
+       if (unlikely(blocked_rdev)) {
+               set_bit(STRIPE_HANDLE, &sh->state);
+               goto unlock;
+       }
+
        if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
                sh->ops.count++;
 
@@ -2894,8 +2907,13 @@ static void handle_stripe5(struct stripe_head *sh)
        if (sh->ops.count)
                pending = get_stripe_work(sh);
 
+ unlock:
        spin_unlock(&sh->lock);
 
+       /* wait for this device to become unblocked */
+       if (unlikely(blocked_rdev))
+               md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+
        if (pending)
                raid5_run_ops(sh, pending);
 
@@ -2912,6 +2930,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
        struct stripe_head_state s;
        struct r6_state r6s;
        struct r5dev *dev, *pdev, *qdev;
+       mdk_rdev_t *blocked_rdev = NULL;
 
        r6s.qd_idx = raid6_next_disk(pd_idx, disks);
        pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
@@ -2975,6 +2994,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                if (dev->written)
                        s.written++;
                rdev = rcu_dereference(conf->disks[i].rdev);
+               if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
+                       blocked_rdev = rdev;
+                       atomic_inc(&rdev->nr_pending);
+                       break;
+               }
                if (!rdev || !test_bit(In_sync, &rdev->flags)) {
                        /* The ReadError flag will just be confusing now */
                        clear_bit(R5_ReadError, &dev->flags);
@@ -2989,6 +3013,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
                        set_bit(R5_Insync, &dev->flags);
        }
        rcu_read_unlock();
+
+       if (unlikely(blocked_rdev)) {
+               set_bit(STRIPE_HANDLE, &sh->state);
+               goto unlock;
+       }
        pr_debug("locked=%d uptodate=%d to_read=%d"
               " to_write=%d failed=%d failed_num=%d,%d\n",
               s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
@@ -3094,8 +3123,13 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
            !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
                handle_stripe_expansion(conf, sh, &r6s);
 
+ unlock:
        spin_unlock(&sh->lock);
 
+       /* wait for this device to become unblocked */
+       if (unlikely(blocked_rdev))
+               md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);
+
        return_io(return_bi);
 
        for (i=disks; i-- ;) {