Merge commit 'v2.6.39-rc3' into for-2.6.39
[pandora-kernel.git] / drivers / block / drbd / drbd_worker.c
index 34f224b..f7e6c92 100644 (file)
 #include "drbd_req.h"
 
 static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int cancel);
+static int w_make_resync_request(struct drbd_conf *mdev,
+                                struct drbd_work *w, int cancel);
 
 
 
-/* defined here:
-   drbd_md_io_complete
-   drbd_endio_sec
-   drbd_endio_pri
-
- * more endio handlers:
-   atodb_endio in drbd_actlog.c
-   drbd_bm_async_io_complete in drbd_bitmap.c
-
+/* endio handlers:
+ *   drbd_md_io_complete (defined here)
+ *   drbd_endio_pri (defined here)
+ *   drbd_endio_sec (defined here)
+ *   bm_async_io_complete (defined in drbd_bitmap.c)
+ *
  * For all these callbacks, note the following:
  * The callbacks will be called in irq context by the IDE drivers,
  * and in Softirqs/Tasklets/BH context by the SCSI drivers.
@@ -94,7 +93,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
        if (list_empty(&mdev->read_ee))
                wake_up(&mdev->ee_wait);
        if (test_bit(__EE_WAS_ERROR, &e->flags))
-               __drbd_chk_io_error(mdev, FALSE);
+               __drbd_chk_io_error(mdev, false);
        spin_unlock_irqrestore(&mdev->req_lock, flags);
 
        drbd_queue_work(&mdev->data.work, &e->w);
@@ -137,7 +136,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
                : list_empty(&mdev->active_ee);
 
        if (test_bit(__EE_WAS_ERROR, &e->flags))
-               __drbd_chk_io_error(mdev, FALSE);
+               __drbd_chk_io_error(mdev, false);
        spin_unlock_irqrestore(&mdev->req_lock, flags);
 
        if (is_syncer_req)
@@ -163,14 +162,15 @@ void drbd_endio_sec(struct bio *bio, int error)
        int uptodate = bio_flagged(bio, BIO_UPTODATE);
        int is_write = bio_data_dir(bio) == WRITE;
 
-       if (error)
+       if (error && __ratelimit(&drbd_ratelimit_state))
                dev_warn(DEV, "%s: error=%d s=%llus\n",
                                is_write ? "write" : "read", error,
                                (unsigned long long)e->sector);
        if (!error && !uptodate) {
-               dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
-                               is_write ? "write" : "read",
-                               (unsigned long long)e->sector);
+               if (__ratelimit(&drbd_ratelimit_state))
+                       dev_warn(DEV, "%s: setting error to -EIO s=%llus\n",
+                                       is_write ? "write" : "read",
+                                       (unsigned long long)e->sector);
                /* strange behavior of some lower level drivers...
                 * fail the request by clearing the uptodate flag,
                 * but do not return any error?! */
@@ -250,13 +250,6 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
        return w_send_read_req(mdev, w, 0);
 }
 
-int w_resync_inactive(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-       ERR_IF(cancel) return 1;
-       dev_err(DEV, "resync inactive, but callback triggered??\n");
-       return 1; /* Simply ignore this! */
-}
-
 void drbd_csum_ee(struct drbd_conf *mdev, struct crypto_hash *tfm, struct drbd_epoch_entry *e, void *digest)
 {
        struct hash_desc desc;
@@ -355,7 +348,7 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
        if (!get_ldev(mdev))
                return -EIO;
 
-       if (drbd_rs_should_slow_down(mdev))
+       if (drbd_rs_should_slow_down(mdev, sector))
                goto defer;
 
        /* GFP_TRY, because if there is no memory available right now, this may
@@ -373,9 +366,10 @@ static int read_for_csum(struct drbd_conf *mdev, sector_t sector, int size)
        if (drbd_submit_ee(mdev, e, READ, DRBD_FAULT_RS_RD) == 0)
                return 0;
 
-       /* drbd_submit_ee currently fails for one reason only:
-        * not being able to allocate enough bios.
-        * Is dropping the connection going to help? */
+       /* If it failed because of ENOMEM, retry should help.  If it failed
+        * because bio_add_page failed (probably broken lower level driver),
+        * retry may or may not help.
+        * If it does not, you may need to force disconnect. */
        spin_lock_irq(&mdev->req_lock);
        list_del(&e->w.list);
        spin_unlock_irq(&mdev->req_lock);
@@ -386,26 +380,25 @@ defer:
        return -EAGAIN;
 }
 
-void resync_timer_fn(unsigned long data)
+int w_resync_timer(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
-       struct drbd_conf *mdev = (struct drbd_conf *) data;
-       int queue;
-
-       queue = 1;
        switch (mdev->state.conn) {
        case C_VERIFY_S:
-               mdev->resync_work.cb = w_make_ov_request;
+               w_make_ov_request(mdev, w, cancel);
                break;
        case C_SYNC_TARGET:
-               mdev->resync_work.cb = w_make_resync_request;
+               w_make_resync_request(mdev, w, cancel);
                break;
-       default:
-               queue = 0;
-               mdev->resync_work.cb = w_resync_inactive;
        }
 
-       /* harmless race: list_empty outside data.work.q_lock */
-       if (list_empty(&mdev->resync_work.list) && queue)
+       return 1;
+}
+
+void resync_timer_fn(unsigned long data)
+{
+       struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+       if (list_empty(&mdev->resync_work.list))
                drbd_queue_work(&mdev->data.work, &mdev->resync_work);
 }
 
@@ -438,7 +431,7 @@ static void fifo_add_val(struct fifo_buffer *fb, int value)
                fb->values[i] += value;
 }
 
-int drbd_rs_controller(struct drbd_conf *mdev)
+static int drbd_rs_controller(struct drbd_conf *mdev)
 {
        unsigned int sect_in;  /* Number of sectors that came in since the last turn */
        unsigned int want;     /* The number of sectors we want in the proxy */
@@ -492,29 +485,36 @@ int drbd_rs_controller(struct drbd_conf *mdev)
        return req_sect;
 }
 
-int w_make_resync_request(struct drbd_conf *mdev,
-               struct drbd_work *w, int cancel)
+static int drbd_rs_number_requests(struct drbd_conf *mdev)
+{
+       int number;
+       if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
+               number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
+               mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
+       } else {
+               mdev->c_sync_rate = mdev->sync_conf.rate;
+               number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+       }
+
+       /* ignore the amount of pending requests, the resync controller should
+        * throttle down to incoming reply rate soon enough anyways. */
+       return number;
+}
+
+static int w_make_resync_request(struct drbd_conf *mdev,
+                                struct drbd_work *w, int cancel)
 {
        unsigned long bit;
        sector_t sector;
        const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
-       int max_segment_size;
-       int number, rollback_i, size, pe, mx;
+       int max_bio_size;
+       int number, rollback_i, size;
        int align, queued, sndbuf;
        int i = 0;
 
        if (unlikely(cancel))
                return 1;
 
-       if (unlikely(mdev->state.conn < C_CONNECTED)) {
-               dev_err(DEV, "Confused in w_make_resync_request()! cstate < Connected");
-               return 0;
-       }
-
-       if (mdev->state.conn != C_SYNC_TARGET)
-               dev_err(DEV, "%s in w_make_resync_request\n",
-                       drbd_conn_str(mdev->state.conn));
-
        if (mdev->rs_total == 0) {
                /* empty resync? */
                drbd_resync_finished(mdev);
@@ -527,49 +527,19 @@ int w_make_resync_request(struct drbd_conf *mdev,
                   to continue resync with a broken disk makes no sense at
                   all */
                dev_err(DEV, "Disk broke down during resync!\n");
-               mdev->resync_work.cb = w_resync_inactive;
                return 1;
        }
 
        /* starting with drbd 8.3.8, we can handle multi-bio EEs,
         * if it should be necessary */
-       max_segment_size =
-               mdev->agreed_pro_version < 94 ? queue_max_segment_size(mdev->rq_queue) :
-               mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_SEGMENT_SIZE;
+       max_bio_size =
+               mdev->agreed_pro_version < 94 ? queue_max_hw_sectors(mdev->rq_queue) << 9 :
+               mdev->agreed_pro_version < 95 ? DRBD_MAX_SIZE_H80_PACKET : DRBD_MAX_BIO_SIZE;
 
-       if (mdev->rs_plan_s.size) { /* mdev->sync_conf.c_plan_ahead */
-               number = drbd_rs_controller(mdev) >> (BM_BLOCK_SHIFT - 9);
-               mdev->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
-       } else {
-               mdev->c_sync_rate = mdev->sync_conf.rate;
-               number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
-       }
-
-       /* Throttle resync on lower level disk activity, which may also be
-        * caused by application IO on Primary/SyncTarget.
-        * Keep this after the call to drbd_rs_controller, as that assumes
-        * to be called as precisely as possible every SLEEP_TIME,
-        * and would be confused otherwise. */
-       if (drbd_rs_should_slow_down(mdev))
+       number = drbd_rs_number_requests(mdev);
+       if (number == 0)
                goto requeue;
 
-       mutex_lock(&mdev->data.mutex);
-       if (mdev->data.socket)
-               mx = mdev->data.socket->sk->sk_rcvbuf / sizeof(struct p_block_req);
-       else
-               mx = 1;
-       mutex_unlock(&mdev->data.mutex);
-
-       /* For resync rates >160MB/sec, allow more pending RS requests */
-       if (number > mx)
-               mx = number;
-
-       /* Limit the number of pending RS requests to no more than the peer's receive buffer */
-       pe = atomic_read(&mdev->rs_pending_cnt);
-       if ((pe + number) > mx) {
-               number = mx - pe;
-       }
-
        for (i = 0; i < number; i++) {
                /* Stop generating RS requests, when half of the send buffer is filled */
                mutex_lock(&mdev->data.mutex);
@@ -588,16 +558,16 @@ next_sector:
                size = BM_BLOCK_SIZE;
                bit  = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
 
-               if (bit == -1UL) {
+               if (bit == DRBD_END_OF_BITMAP) {
                        mdev->bm_resync_fo = drbd_bm_bits(mdev);
-                       mdev->resync_work.cb = w_resync_inactive;
                        put_ldev(mdev);
                        return 1;
                }
 
                sector = BM_BIT_TO_SECT(bit);
 
-               if (drbd_try_rs_begin_io(mdev, sector)) {
+               if (drbd_rs_should_slow_down(mdev, sector) ||
+                   drbd_try_rs_begin_io(mdev, sector)) {
                        mdev->bm_resync_fo = bit;
                        goto requeue;
                }
@@ -608,7 +578,7 @@ next_sector:
                        goto next_sector;
                }
 
-#if DRBD_MAX_SEGMENT_SIZE > BM_BLOCK_SIZE
+#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
                /* try to find some adjacent bits.
                 * we stop if we have already the maximum req size.
                 *
@@ -618,7 +588,7 @@ next_sector:
                align = 1;
                rollback_i = i;
                for (;;) {
-                       if (size + BM_BLOCK_SIZE > max_segment_size)
+                       if (size + BM_BLOCK_SIZE > max_bio_size)
                                break;
 
                        /* Be always aligned */
@@ -685,7 +655,6 @@ next_sector:
                 * resync data block, and the last bit is cleared.
                 * until then resync "work" is "inactive" ...
                 */
-               mdev->resync_work.cb = w_resync_inactive;
                put_ldev(mdev);
                return 1;
        }
@@ -706,27 +675,18 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
        if (unlikely(cancel))
                return 1;
 
-       if (unlikely(mdev->state.conn < C_CONNECTED)) {
-               dev_err(DEV, "Confused in w_make_ov_request()! cstate < Connected");
-               return 0;
-       }
-
-       number = SLEEP_TIME*mdev->sync_conf.rate / ((BM_BLOCK_SIZE/1024)*HZ);
-       if (atomic_read(&mdev->rs_pending_cnt) > number)
-               goto requeue;
-
-       number -= atomic_read(&mdev->rs_pending_cnt);
+       number = drbd_rs_number_requests(mdev);
 
        sector = mdev->ov_position;
        for (i = 0; i < number; i++) {
                if (sector >= capacity) {
-                       mdev->resync_work.cb = w_resync_inactive;
                        return 1;
                }
 
                size = BM_BLOCK_SIZE;
 
-               if (drbd_try_rs_begin_io(mdev, sector)) {
+               if (drbd_rs_should_slow_down(mdev, sector) ||
+                   drbd_try_rs_begin_io(mdev, sector)) {
                        mdev->ov_position = sector;
                        goto requeue;
                }
@@ -744,11 +704,33 @@ static int w_make_ov_request(struct drbd_conf *mdev, struct drbd_work *w, int ca
        mdev->ov_position = sector;
 
  requeue:
+       mdev->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
        mod_timer(&mdev->resync_timer, jiffies + SLEEP_TIME);
        return 1;
 }
 
 
+void start_resync_timer_fn(unsigned long data)
+{
+       struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+       drbd_queue_work(&mdev->data.work, &mdev->start_resync_work);
+}
+
+int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+       if (atomic_read(&mdev->unacked_cnt) || atomic_read(&mdev->rs_pending_cnt)) {
+               dev_warn(DEV, "w_start_resync later...\n");
+               mdev->start_resync_timer.expires = jiffies + HZ/10;
+               add_timer(&mdev->start_resync_timer);
+               return 1;
+       }
+
+       drbd_start_resync(mdev, C_SYNC_SOURCE);
+       clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags);
+       return 1;
+}
+
 int w_ov_finished(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 {
        kfree(w);
@@ -782,6 +764,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
        union drbd_state os, ns;
        struct drbd_work *w;
        char *khelper_cmd = NULL;
+       int verify_done = 0;
 
        /* Remove all elements from the resync LRU. Since future actions
         * might set bits in the (main) bitmap, then the entries in the
@@ -792,9 +775,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
                 * queue (or even the read operations for those packets
                 * is not finished by now).   Retry in 100ms. */
 
-               drbd_kick_lo(mdev);
-               __set_current_state(TASK_INTERRUPTIBLE);
-               schedule_timeout(HZ / 10);
+               schedule_timeout_interruptible(HZ / 10);
                w = kmalloc(sizeof(struct drbd_work), GFP_ATOMIC);
                if (w) {
                        w->cb = w_resync_finished;
@@ -819,6 +800,8 @@ int drbd_resync_finished(struct drbd_conf *mdev)
        spin_lock_irq(&mdev->req_lock);
        os = mdev->state;
 
+       verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
+
        /* This protects us against multiple calls (that can happen in the presence
           of application IO), and against connectivity loss just before we arrive here. */
        if (os.conn <= C_CONNECTED)
@@ -828,8 +811,7 @@ int drbd_resync_finished(struct drbd_conf *mdev)
        ns.conn = C_CONNECTED;
 
        dev_info(DEV, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
-            (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) ?
-            "Online verify " : "Resync",
+            verify_done ? "Online verify " : "Resync",
             dt + mdev->rs_paused, mdev->rs_paused, dbdt);
 
        n_oos = drbd_bm_total_weight(mdev);
@@ -887,14 +869,18 @@ int drbd_resync_finished(struct drbd_conf *mdev)
                        }
                }
 
-               drbd_uuid_set_bm(mdev, 0UL);
-
-               if (mdev->p_uuid) {
-                       /* Now the two UUID sets are equal, update what we
-                        * know of the peer. */
-                       int i;
-                       for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
-                               mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+               if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
+                       /* for verify runs, we don't update uuids here,
+                        * so there would be nothing to report. */
+                       drbd_uuid_set_bm(mdev, 0UL);
+                       drbd_print_uuids(mdev, "updated UUIDs");
+                       if (mdev->p_uuid) {
+                               /* Now the two UUID sets are equal, update what we
+                                * know of the peer. */
+                               int i;
+                               for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
+                                       mdev->p_uuid[i] = mdev->ldev->md.uuid[i];
+                       }
                }
        }
 
@@ -906,15 +892,11 @@ out:
        mdev->rs_total  = 0;
        mdev->rs_failed = 0;
        mdev->rs_paused = 0;
-       mdev->ov_start_sector = 0;
+       if (verify_done)
+               mdev->ov_start_sector = 0;
 
        drbd_md_sync(mdev);
 
-       if (test_and_clear_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags)) {
-               dev_info(DEV, "Writing the whole bitmap\n");
-               drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
-       }
-
        if (khelper_cmd)
                drbd_khelper(mdev, khelper_cmd);
 
@@ -995,7 +977,9 @@ int w_e_end_rsdata_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
                put_ldev(mdev);
        }
 
-       if (likely((e->flags & EE_WAS_ERROR) == 0)) {
+       if (mdev->state.conn == C_AHEAD) {
+               ok = drbd_send_ack(mdev, P_RS_CANCEL, e);
+       } else if (likely((e->flags & EE_WAS_ERROR) == 0)) {
                if (likely(mdev->state.pdsk >= D_INCONSISTENT)) {
                        inc_rs_pending(mdev);
                        ok = drbd_send_block(mdev, P_RS_DATA_REPLY, e);
@@ -1097,25 +1081,27 @@ int w_e_end_ov_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
        if (unlikely(cancel))
                goto out;
 
-       if (unlikely((e->flags & EE_WAS_ERROR) != 0))
-               goto out;
-
        digest_size = crypto_hash_digestsize(mdev->verify_tfm);
-       /* FIXME if this allocation fails, online verify will not terminate! */
        digest = kmalloc(digest_size, GFP_NOIO);
-       if (digest) {
-               drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
-               inc_rs_pending(mdev);
-               ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
-                                            digest, digest_size, P_OV_REPLY);
-               if (!ok)
-                       dec_rs_pending(mdev);
-               kfree(digest);
+       if (!digest) {
+               ok = 0; /* terminate the connection in case the allocation failed */
+               goto out;
        }
 
+       if (likely(!(e->flags & EE_WAS_ERROR)))
+               drbd_csum_ee(mdev, mdev->verify_tfm, e, digest);
+       else
+               memset(digest, 0, digest_size);
+
+       inc_rs_pending(mdev);
+       ok = drbd_send_drequest_csum(mdev, e->sector, e->size,
+                                    digest, digest_size, P_OV_REPLY);
+       if (!ok)
+               dec_rs_pending(mdev);
+       kfree(digest);
+
 out:
        drbd_free_ee(mdev, e);
-
        dec_unacked(mdev);
 
        return ok;
@@ -1130,7 +1116,6 @@ void drbd_ov_oos_found(struct drbd_conf *mdev, sector_t sector, int size)
                mdev->ov_last_oos_size = size>>9;
        }
        drbd_set_out_of_sync(mdev, sector, size);
-       set_bit(WRITE_BM_AFTER_RESYNC, &mdev->flags);
 }
 
 int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
@@ -1166,10 +1151,6 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
                        eq = !memcmp(digest, di->digest, digest_size);
                        kfree(digest);
                }
-       } else {
-               ok = drbd_send_ack(mdev, P_NEG_RS_DREPLY, e);
-               if (__ratelimit(&drbd_ratelimit_state))
-                       dev_err(DEV, "Sending NegDReply. I guess it gets messy.\n");
        }
 
        dec_unacked(mdev);
@@ -1183,7 +1164,13 @@ int w_e_end_ov_reply(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
 
        drbd_free_ee(mdev, e);
 
-       if (--mdev->ov_left == 0) {
+       --mdev->ov_left;
+
+       /* let's advance progress step marks only for every other megabyte */
+       if ((mdev->ov_left & 0x200) == 0x200)
+               drbd_advance_rs_marks(mdev, mdev->ov_left);
+
+       if (mdev->ov_left == 0) {
                ov_oos_print(mdev);
                drbd_resync_finished(mdev);
        }
@@ -1236,6 +1223,22 @@ int w_send_write_hint(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
        return drbd_send_short_cmd(mdev, P_UNPLUG_REMOTE);
 }
 
+int w_send_oos(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+       struct drbd_request *req = container_of(w, struct drbd_request, w);
+       int ok;
+
+       if (unlikely(cancel)) {
+               req_mod(req, send_canceled);
+               return 1;
+       }
+
+       ok = drbd_send_oos(mdev, req);
+       req_mod(req, oos_handed_to_network);
+
+       return ok;
+}
+
 /**
  * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
  * @mdev:      DRBD device.
@@ -1431,6 +1434,17 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
        return retcode;
 }
 
+void drbd_rs_controller_reset(struct drbd_conf *mdev)
+{
+       atomic_set(&mdev->rs_sect_in, 0);
+       atomic_set(&mdev->rs_sect_ev, 0);
+       mdev->rs_in_flight = 0;
+       mdev->rs_planed = 0;
+       spin_lock(&mdev->peer_seq_lock);
+       fifo_set(&mdev->rs_plan_s, 0);
+       spin_unlock(&mdev->peer_seq_lock);
+}
+
 /**
  * drbd_start_resync() - Start the resync process
  * @mdev:      DRBD device.
@@ -1444,13 +1458,18 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
        union drbd_state ns;
        int r;
 
-       if (mdev->state.conn >= C_SYNC_SOURCE) {
+       if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn < C_AHEAD) {
                dev_err(DEV, "Resync already running!\n");
                return;
        }
 
-       /* In case a previous resync run was aborted by an IO error/detach on the peer. */
-       drbd_rs_cancel_all(mdev);
+       if (mdev->state.conn < C_AHEAD) {
+               /* In case a previous resync run was aborted by an IO error/detach on the peer. */
+               drbd_rs_cancel_all(mdev);
+               /* This should be done when we abort the resync. We definitely do not
+                  want to have this for connections going back and forth between
+                  Ahead/Behind and SyncSource/SyncTarget */
+       }
 
        if (side == C_SYNC_TARGET) {
                /* Since application IO was locked out during C_WF_BITMAP_T and
@@ -1464,6 +1483,20 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                        drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
                        return;
                }
+       } else /* C_SYNC_SOURCE */ {
+               r = drbd_khelper(mdev, "before-resync-source");
+               r = (r >> 8) & 0xff;
+               if (r > 0) {
+                       if (r == 3) {
+                               dev_info(DEV, "before-resync-source handler returned %d, "
+                                        "ignoring. Old userland tools?", r);
+                       } else {
+                               dev_info(DEV, "before-resync-source handler returned %d, "
+                                        "dropping connection.\n", r);
+                               drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+                               return;
+                       }
+               }
        }
 
        drbd_state_lock(mdev);
@@ -1473,18 +1506,6 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                return;
        }
 
-       if (side == C_SYNC_TARGET) {
-               mdev->bm_resync_fo = 0;
-       } else /* side == C_SYNC_SOURCE */ {
-               u64 uuid;
-
-               get_random_bytes(&uuid, sizeof(u64));
-               drbd_uuid_set(mdev, UI_BITMAP, uuid);
-               drbd_send_sync_uuid(mdev, uuid);
-
-               D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
-       }
-
        write_lock_irq(&global_state_lock);
        ns = mdev->state;
 
@@ -1522,13 +1543,24 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                _drbd_pause_after(mdev);
        }
        write_unlock_irq(&global_state_lock);
-       put_ldev(mdev);
 
        if (r == SS_SUCCESS) {
                dev_info(DEV, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
                     drbd_conn_str(ns.conn),
                     (unsigned long) mdev->rs_total << (BM_BLOCK_SHIFT-10),
                     (unsigned long) mdev->rs_total);
+               if (side == C_SYNC_TARGET)
+                       mdev->bm_resync_fo = 0;
+
+               /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
+                * with w_send_oos, or the sync target will get confused as to
+                * how much bits to resync.  We cannot do that always, because for an
+                * empty resync and protocol < 95, we need to do it here, as we call
+                * drbd_resync_finished from here in that case.
+                * We drbd_gen_and_send_sync_uuid here for protocol < 96,
+                * and from after_state_ch otherwise. */
+               if (side == C_SYNC_SOURCE && mdev->agreed_pro_version < 96)
+                       drbd_gen_and_send_sync_uuid(mdev);
 
                if (mdev->agreed_pro_version < 95 && mdev->rs_total == 0) {
                        /* This still has a race (about when exactly the peers
@@ -1548,13 +1580,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
                        drbd_resync_finished(mdev);
                }
 
-               atomic_set(&mdev->rs_sect_in, 0);
-               atomic_set(&mdev->rs_sect_ev, 0);
-               mdev->rs_in_flight = 0;
-               mdev->rs_planed = 0;
-               spin_lock(&mdev->peer_seq_lock);
-               fifo_set(&mdev->rs_plan_s, 0);
-               spin_unlock(&mdev->peer_seq_lock);
+               drbd_rs_controller_reset(mdev);
                /* ns.conn may already be != mdev->state.conn,
                 * we may have been paused in between, or become paused until
                 * the timer triggers.
@@ -1564,6 +1590,7 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
 
                drbd_md_sync(mdev);
        }
+       put_ldev(mdev);
        drbd_state_unlock(mdev);
 }