drbd: factored tl_restart() out of tl_clear().
authorPhilipp Reisner <philipp.reisner@linbit.com>
Wed, 12 May 2010 15:08:26 +0000 (17:08 +0200)
committerPhilipp Reisner <philipp.reisner@linbit.com>
Thu, 14 Oct 2010 12:35:58 +0000 (14:35 +0200)
If IO was frozen for a temporal network outage, resend the
content of the transfer-log into the newly established connection.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_req.h

index 11b7c6f..bef9138 100644 (file)
@@ -1138,6 +1138,8 @@ extern void drbd_free_resources(struct drbd_conf *mdev);
 extern void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
                       unsigned int set_size);
 extern void tl_clear(struct drbd_conf *mdev);
+enum drbd_req_event;
+extern void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what);
 extern void _tl_add_barrier(struct drbd_conf *, struct drbd_tl_epoch *);
 extern void drbd_free_sock(struct drbd_conf *mdev);
 extern int drbd_send(struct drbd_conf *mdev, struct socket *sock,
index a86e6f1..a8a0341 100644 (file)
@@ -333,59 +333,94 @@ bail:
        drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
 }
 
-
 /**
- * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * _tl_restart() - Walks the transfer log, and applies an action to all requests
  * @mdev:      DRBD device.
+ * @what:       The action/event to perform with all request objects
  *
- * This is called after the connection to the peer was lost. The storage covered
- * by the requests on the transfer gets marked as our of sync. Called from the
- * receiver thread and the worker thread.
+ * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
+ * restart_frozen_disk_io.
  */
-void tl_clear(struct drbd_conf *mdev)
+static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
 {
-       struct drbd_tl_epoch *b, *tmp;
+       struct drbd_tl_epoch *b, *tmp, **pn;
        struct list_head *le, *tle;
-       struct drbd_request *r;
-       int new_initial_bnr = net_random();
-
-       spin_lock_irq(&mdev->req_lock);
+       struct drbd_request *req;
+       int rv, n_writes, n_reads;
 
        b = mdev->oldest_tle;
+       pn = &mdev->oldest_tle;
        while (b) {
+               n_writes = 0;
+               n_reads = 0;
                list_for_each_safe(le, tle, &b->requests) {
-                       r = list_entry(le, struct drbd_request, tl_requests);
-                       /* It would be nice to complete outside of spinlock.
-                        * But this is easier for now. */
-                       _req_mod(r, connection_lost_while_pending);
+                       req = list_entry(le, struct drbd_request, tl_requests);
+                       rv = _req_mod(req, what);
+
+                       n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
+                       n_reads  += (rv & MR_READ) >> MR_READ_SHIFT;
                }
                tmp = b->next;
 
-               /* there could still be requests on that ring list,
-                * in case local io is still pending */
-               list_del(&b->requests);
-
-               /* dec_ap_pending corresponding to queue_barrier.
-                * the newest barrier may not have been queued yet,
-                * in which case w.cb is still NULL. */
-               if (b->w.cb != NULL)
-                       dec_ap_pending(mdev);
-
-               if (b == mdev->newest_tle) {
-                       /* recycle, but reinit! */
-                       D_ASSERT(tmp == NULL);
-                       INIT_LIST_HEAD(&b->requests);
-                       INIT_LIST_HEAD(&b->w.list);
-                       b->w.cb = NULL;
-                       b->br_number = new_initial_bnr;
-                       b->n_writes = 0;
-
-                       mdev->oldest_tle = b;
-                       break;
+               if (n_writes + n_reads) {
+                       if (what == resend) {
+                               b->n_writes = n_writes;
+                               if (b->w.cb == NULL) {
+                                       b->w.cb = w_send_barrier;
+                                       inc_ap_pending(mdev);
+                                       set_bit(CREATE_BARRIER, &mdev->flags);
+                               }
+
+                               drbd_queue_work(&mdev->data.work, &b->w);
+                       }
+                       pn = &b->next;
+               } else {
+                       /* there could still be requests on that ring list,
+                        * in case local io is still pending */
+                       list_del(&b->requests);
+
+                       /* dec_ap_pending corresponding to queue_barrier.
+                        * the newest barrier may not have been queued yet,
+                        * in which case w.cb is still NULL. */
+                       if (b->w.cb != NULL)
+                               dec_ap_pending(mdev);
+
+                       if (b == mdev->newest_tle) {
+                               /* recycle, but reinit! */
+                               D_ASSERT(tmp == NULL);
+                               INIT_LIST_HEAD(&b->requests);
+                               INIT_LIST_HEAD(&b->w.list);
+                               b->w.cb = NULL;
+                               b->br_number = net_random();
+                               b->n_writes = 0;
+
+                               *pn = b;
+                               break;
+                       }
+                       *pn = tmp;
+                       kfree(b);
                }
-               kfree(b);
                b = tmp;
        }
+}
+
+
+/**
+ * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
+ * @mdev:      DRBD device.
+ *
+ * This is called after the connection to the peer was lost. The storage covered
+ * by the requests on the transfer gets marked as our of sync. Called from the
+ * receiver thread and the worker thread.
+ */
+void tl_clear(struct drbd_conf *mdev)
+{
+       struct list_head *le, *tle;
+       struct drbd_request *r;
+
+       spin_lock_irq(&mdev->req_lock);
+
+       _tl_restart(mdev, connection_lost_while_pending);
 
        /* we expect this list to be empty. */
        D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
@@ -406,6 +441,13 @@ void tl_clear(struct drbd_conf *mdev)
        spin_unlock_irq(&mdev->req_lock);
 }
 
+void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
+{
+       spin_lock_irq(&mdev->req_lock);
+       _tl_restart(mdev, what);
+       spin_unlock_irq(&mdev->req_lock);
+}
+
 /**
  * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
  * @mdev:      DRBD device.
index 88a5e1f..8daa920 100644 (file)
@@ -776,9 +776,6 @@ static int drbd_connect(struct drbd_conf *mdev)
 
        D_ASSERT(!mdev->data.socket);
 
-       if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags))
-               dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n");
-
        if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
                return -2;
 
index d9df1a1..39c2cc3 100644 (file)
@@ -634,6 +634,20 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
                /* else: done by handed_over_to_network */
                break;
 
+       case resend:
+               /* If RQ_NET_OK is already set, we got a P_WRITE_ACK or P_RECV_ACK
+                  before the connection loss; only P_BARRIER_ACK was missing.
+                  Trowing them out of the TL here by pretending we got a BARRIER_ACK
+                  TODO: Either resync them, or ensure peer was not rebooted. */
+               if (!(req->rq_state & RQ_NET_OK)) {
+                       if (req->w.cb) {
+                               drbd_queue_work(&mdev->data.work, &req->w);
+                               rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
+                       }
+                       break;
+               }
+               /* else, fall through to barrier_acked */
+
        case barrier_acked:
                if (!(req->rq_state & RQ_WRITE))
                        break;
index db37c6e..1bcb855 100644 (file)
@@ -104,6 +104,7 @@ enum drbd_req_event {
        read_ahead_completed_with_error,
        write_completed_with_error,
        completed_ok,
+       resend,
        nothing, /* for tracing only */
 };
 
@@ -206,6 +207,13 @@ enum drbd_req_state_bits {
 
 #define RQ_WRITE           (1UL << __RQ_WRITE)
 
+/* For waking up the frozen transfer log mod_req() has to return if the request
+   should be counted in the epoch object*/
+#define MR_WRITE_SHIFT 0
+#define MR_WRITE       (1 << MR_WRITE_SHIFT)
+#define MR_READ_SHIFT  1
+#define MR_READ        (1 << MR_READ_SHIFT)
+
 /* epoch entries */
 static inline
 struct hlist_head *ee_hash_slot(struct drbd_conf *mdev, sector_t sector)