Merge commit 'v2.6.39-rc3' into for-2.6.39
[pandora-kernel.git] / drivers / block / drbd / drbd_worker.c
index 4008130..f7e6c92 100644 (file)
 #include "drbd_req.h"
 
 static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
+static int w_make_resync_request(struct drbd_conf *mdev,
+                                struct drbd_work *w, int cancel);
 
 
 
-/* defined here:
-   drbd_md_io_complete
-   drbd_endio_sec
-   drbd_endio_pri
-
- * more endio handlers:
-   atodb_endio in drbd_actlog.c
-   drbd_bm_async_io_complete in drbd_bitmap.c
-
+/* endio handlers:
+ *   drbd_md_io_complete (defined here)
+ *   drbd_endio_pri (defined here)
+ *   drbd_endio_sec (defined here)
+ *   bm_async_io_complete (defined in drbd_bitmap.c)
+ *
  * For all these callbacks, note the following:
  * The callbacks will be called in irq context by the IDE drivers,
  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
@@ -94,7 +93,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
        if (list_empty(&mdev->read_ee))
                wake_up(&mdev->ee_wait);
        if (test_bit(__EE_WAS_ERROR, &e->flags))
-               __drbd_chk_io_error(mdev, FALSE);
+               __drbd_chk_io_error(mdev, false);
        spin_unlock_irqrestore(&mdev->req_lock, flags);
 
        drbd_queue_work(&mdev->data.work, &e->w);
@@ -137,7 +136,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
                : list_empty(&mdev->active_ee);
 
        if (test_bit(__EE_WAS_ERROR, &e->flags))
-               __drbd_chk_io_error(mdev, FALSE);
+               __drbd_chk_io_error(mdev, false);
        spin_unlock_irqrestore(&mdev->req_lock, flags);
 
        if (is_syncer_req)
@@ -163,14 +162,15 @@ void drbd_endio_sec(struct bio *bio, int error)
        int uptodate = bio_flagged(bio, BIO_UPTODATE);
        int is_write = bio_data_dir(bio) == WRITE;
 
-       if (error)
+       if (error && __ratelimit(&drbd_ratelimit_state))
                dev_warn(DEV, "%s: error=%d s=%llus\n",
                                is_write ? "write" : "read", error,
                                (unsigned long long)e->sector);
        if (!error && !uptodate) {
-               dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
-                               is_write ? "write" : "read",
-                               (unsigned long long)e->sector);
+               if (__ratelimit(&drbd_ratelimit_state))
+                       dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
+                                       is_write ? "write" : "read",
+                                       (unsigned long long)e->sector);
                /* strange behavior of some lower level drivers...
                 * fail the request by clearing the uptodate flag,
                 * but do not return any error?! */
@@ -250,13 +250,6 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
        return w_send_read_req(mdev, w, 0);
 }
 
-int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-       ERR_IF(cancel) return 1;
-       dev_err(DEV, "resync inactive, but callback triggered??\n");
-       return 1; /* Simply ignore this! */
-}
-
 void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest)
 {
        struct hash_desc desc;
@@ -373,9 +366,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
        if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
                return 0;
 
-       /* drbd_submit_ee currently fails for one reason only:
-        * not being able to allocate enough bios.
-        * Is dropping the connection going to help? */
+       /* If it failed because of ENOMEM, retry should help.  If it failed
+        * because bio_add_page failed (probably broken lower level driver),
+        * retry may or may not help.
+        * If it does not, you may need to force disconnect. */
        spin_lock_irq(&mdev->req_lock);
        list_del(&e->w.list);
        spin_unlock_irq(&mdev->req_lock);
@@ -386,26 +380,25 @@ defer:
        return -EAGAIN;
 }
 
-void resync_timer_fn(unsigned long data)
+int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
-       struct drbd_conf *mdev = (struct drbd_conf *) data;
-       int queue;
-
-       queue = 1;
        switch (mdev->state.conn) {
        case C_VERIFY_S:
-               mdev->resync_work.cb = w_make_ov_request;
+               w_make_ov_request(mdev, w, cancel);
                break;
        case C_SYNC_TARGET:
-               mdev->resync_work.cb = w_make_resync_request;
+               w_make_resync_request(mdev, w, cancel);
                break;
-       default:
-               queue = 0;
-               mdev->resync_work.cb = w_resync_inactive;
        }
 
-       /* harmless race: list_empty outside data.work.q_lock */
-       if (list_empty(&mdev->resync_work.list) && queue)
+       return 1;
+}
+
+void resync_timer_fn(unsigned long data)
+{
+       struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+       if (list_empty(&mdev->resync_work.list))
                drbd_queue_work(&mdev->data.work, &mdev->resync_work);
 }
 
@@ -438,7 +431,7 @@ static void fifo_add_val(struct fifo_buffer *fb, int value)
                fb->values[i] += value;
 }
 
-int drbd_rs_controller(struct drbd_conf *mdev)
+static int drbd_rs_controller(struct drbd_conf *mdev)
 {
        unsigned int sect_in;  /* Number of sectors that came in since the last turn */
        unsigned int want;     /* The number of sectors we want in the proxy */
@@ -492,7 +485,7 @@ int drbd_rs_controller(struct drbd_conf *mdev)
        return req_sect;
 }
 
-int drbd_rs_number_requests(struct drbd_conf *mdev)
+static int drbd_rs_number_requests(struct drbd_conf *mdev)
 {
        int number;
        if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
@@ -508,8 +501,8 @@ int drbd_rs_number_requests(struct drbd_conf *mdev)
        return number;
 }
 
-int w_make_resync_request(struct drbd_conf *mdev,
-               struct drbd_work *w, int cancel)
+static int w_make_resync_request(struct drbd_conf *mdev,
+                                struct drbd_work *w, int cancel)
 {
        unsigned long bit;
        sector_t sector;
@@ -522,15 +515,6 @@ int w_make_resync_request(struct drbd_conf *mdev,
        if (unlikely(cancel))
                return 1;
 
-       if (unlikely(mdev->state.conn < C_CONNECTED)) {
-               dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected");
-               return 0;
-       }
-
-       if (mdev->state.conn != C_SYNC_TARGET)
-               dev_err(DEV, "%s in w_make_resync_request\n",
-                       drbd_conn_str(mdev->state.conn));
-
        if (mdev->rs_total == 0) {
                /* empty resync? */
                drbd_resync_finished(mdev);
@@ -543,7 +527,6 @@ int w_make_resync_request(struct drbd_conf *mdev,
                   to continue resync with a broken disk makes no sense at
                   all */
                dev_err(DEV, "Disk broke down during resync!\n");
-               mdev->resync_work.cb = w_resync_inactive;
                return 1;
        }
 
@@ -575,9 +558,8 @@ next_sector:
                size = BM_BLOCK_SIZE;
                bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
 
-               if (bit == -1UL) {
+               if (bit == DRBD_END_OF_BITMAP) {
                        mdev->bm_resync_fo = drbd_bm_bits(mdev);
-                       mdev->resync_work.cb = w_resync_inactive;
                        put_ldev(mdev);
                        return 1;
                }
@@ -673,7 +655,6 @@ next_sector:
                 * resync data block, and the last bit is cleared.
                 * until then resync "work" is "inactive" ...
                 */
-               mdev->resync_work.cb = w_resync_inactive;
                put_ldev(mdev);
                return 1;
        }
@@ -694,17 +675,11 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
        if (unlikely(cancel))
                return 1;
 
-       if (unlikely(mdev->state.conn < C_CONNECTED)) {
-               dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected");
-               return 0;
-       }
-
        number = drbd_rs_number_requests(mdev);
 
        sector = mdev->ov_position;
        for (i = 0; i < number; i++) {
                if (sector >= capacity) {
-                       mdev->resync_work.cb = w_resync_inactive;
                        return 1;
                }
 
@@ -735,10 +710,24 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
 }
 
 
+void start_resync_timer_fn(unsigned long data)
+{
+       struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+       drbd_queue_work(&mdev->data.work, &mdev->start_resync_work);
+}
+
 int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
-       drbd_start_resync(mdev, C_SYNC_SOURCE);
+       if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
+               dev_warn(DEV, "w_start_resync later...\n");
+               mdev->start_resync_timer.expires = jiffies + HZ/10;
+               add_timer(&mdev->start_resync_timer);
+               return 1;
+       }
 
+       drbd_start_resync(mdev, C_SYNC_SOURCE);
+       clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
        return 1;
 }
 
@@ -786,8 +775,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
                 * queue (or even the read operations for those packets
                 * is not finished by now).   Retry in 100ms. */
 
-               __set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(HZ / 10);
+               schedule_timeout_interruptible(HZ / 10);
                w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
                if (w) {
                        w->cb = w_resync_finished;
@@ -881,14 +869,18 @@ int drbd_resync_finished(struct drbd_conf *mdev)
                        }
                }
 
-               drbd_uuid_set_bm(mdev, 0UL);
-
-               if (mdev->p_uuid) {
-                       /* Now the two UUID sets are equal, update what we
-                        * know of the peer. */
-                       int i;
-                       for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
-                               mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+               if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
+                       /* for verify runs, we don't update uuids here,
+                        * so there would be nothing to report. */
+                       drbd_uuid_set_bm(mdev, 0UL);
+                       drbd_print_uuids(mdev, "updated UUIDs");
+                       if (mdev->p_uuid) {
+                               /* Now the two UUID sets are equal, update what we
+                                * know of the peer. */
+                               int i;
+                               for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
+                                       mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+                       }
                }
        }
 
@@ -905,11 +897,6 @@ out:
 
        drbd_md_sync(mdev);
 
-       if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
-               dev_info(DEV, "Writing the whole bitmap\n");
-               drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
-       }
-
        if (khelper_cmd)
                drbd_khelper(mdev, khelper_cmd);
 
@@ -990,7 +977,9 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
                put_ldev(mdev);
        }
 
-       if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+       if (mdev->state.conn == C_AHEAD) {
+               ok = drbd_send_ack(mdev, P_RS_CANCEL, e);
+       } else if (likely((e->flags & EE_WAS_ERROR) == 0)) {
                if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
                        inc_rs_pending(mdev);
                        ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
@@ -1092,25 +1081,27 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
        if (unlikely(cancel))
                goto out;
 
-       if (unlikely((e->flags & EE_WAS_ERROR) != 0))
-               goto out;
-
        digest_size = crypto_hash_digestsize(mdev->verify_tfm);
-       /* FIXME if this allocation fails, online verify will not terminate! */
        digest = kmalloc(digest_size, GFP_NOIO);
-       if (digest) {
-               drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
-               inc_rs_pending(mdev);
-               ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
-                                            digest, digest_size, P_OV_REPLY);
-               if (!ok)
-                       dec_rs_pending(mdev);
-               kfree(digest);
+       if (!digest) {
+               ok = 0; /* terminate the connection in case the allocation failed */
+               goto out;
        }
 
+       if (likely(!(e->flags & EE_WAS_ERROR)))
+               drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
+       else
+               memset(digest, 0, digest_size);
+
+       inc_rs_pending(mdev);
+       ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
+                                    digest, digest_size, P_OV_REPLY);
+       if (!ok)
+               dec_rs_pending(mdev);
+       kfree(digest);
+
 out:
        drbd_free_ee(mdev, e);
-
        dec_unacked(mdev);
 
        return ok;
@@ -1125,7 +1116,6 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
                mdev->ov_last_oos_size = size>>9;
        }
        drbd_set_out_of_sync(mdev, sector, size);
-       set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
 }
 
 int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -1161,10 +1151,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
                        eq = !memcmp(digest, di->digest, digest_size);
                        kfree(digest);
                }
-       } else {
-               ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
-               if (__ratelimit(&drbd_ratelimit_state))
-                       dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
        }
 
        dec_unacked(mdev);
@@ -1497,6 +1483,20 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                        drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
                        return;
                }
+       } else /* C_SYNC_SOURCE */ {
+               r = drbd_khelper(mdev, "before-resync-source");
+               r = (r >> 8) & 0xff;
+               if (r > 0) {
+                       if (r == 3) {
+                               dev_info(DEV, "before-resync-source handler returned %d, "
+                                        "ignoring. Old userland tools?", r);
+                       } else {
+                               dev_info(DEV, "before-resync-source handler returned %d, "
+                                        "dropping connection.\n", r);
+                               drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+                               return;
+                       }
+               }
        }
 
        drbd_state_lock(mdev);
@@ -1506,18 +1506,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                return;
        }
 
-       if (side == C_SYNC_TARGET) {
-               mdev->bm_resync_fo = 0;
-       } else /* side == C_SYNC_SOURCE */ {
-               u64 uuid;
-
-               get_random_bytes(&uuid, sizeof(u64));
-               drbd_uuid_set(mdev, UI_BITMAP, uuid);
-               drbd_send_sync_uuid(mdev, uuid);
-
-               D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
-       }
-
        write_lock_irq(&global_state_lock);
        ns = mdev->state;
 
@@ -1555,13 +1543,24 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                _drbd_pause_after(mdev);
        }
        write_unlock_irq(&global_state_lock);
-       put_ldev(mdev);
 
        if (r == SS_SUCCESS) {
                dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
                     drbd_conn_str(ns.conn),
                     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
                     (unsigned long) mdev->rs_total);
+               if (side == C_SYNC_TARGET)
+                       mdev->bm_resync_fo = 0;
+
+               /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
+                * with w_send_oos, or the sync target will get confused as to
+                * how much bits to resync.  We cannot do that always, because for an
+                * empty resync and protocol < 95, we need to do it here, as we call
+                * drbd_resync_finished from here in that case.
+                * We drbd_gen_and_send_sync_uuid here for protocol < 96,
+                * and from after_state_ch otherwise. */
+               if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96)
+                       drbd_gen_and_send_sync_uuid(mdev);
 
                if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
                        /* This still has a race (about when exactly the peers
@@ -1591,6 +1590,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 
                drbd_md_sync(mdev);
        }
+       put_ldev(mdev);
        drbd_state_unlock(mdev);
 }