drbd: log UUIDs whenever they change
[pandora-kernel.git] / drivers / block / drbd / drbd_main.c
index 7eb447d..b68332a 100644 (file)
@@ -85,7 +85,8 @@ MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
 MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
 MODULE_VERSION(REL_VERSION);
 MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+                __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
 MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
 
 #include <linux/moduleparam.h>
@@ -115,7 +116,7 @@ module_param(fault_devs, int, 0644);
 #endif
 
 /* module parameter, defined */
-unsigned int minor_count = 32;
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
 int disable_sendpage;
 int allow_oos;
 unsigned int cn_idx = CN_IDX_DRBD;
@@ -335,6 +336,24 @@ bail:
        drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
 }
 
+
+/* In C_AHEAD mode only out_of_sync packets are sent for requests. Detach
+ * those requests from the newsest barrier when changing to an other cstate.
+ *
+ * That headless list vanishes when the last request finished its write or
+ * send out_of_sync packet.  */
+static void tl_forget(struct drbd_conf *mdev)
+{
+       struct drbd_tl_epoch *b;
+
+       if (test_bit(CREATE_BARRIER, &mdev->flags))
+               return;
+
+       b = mdev->newest_tle;
+       list_del(&b->requests);
+       _tl_add_barrier(mdev, b);
+}
+
 /**
  * _tl_restart() - Walks the transfer log, and applies an action to all requests
  * @mdev:      DRBD device.
@@ -1140,6 +1159,10 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
                atomic_inc(&mdev->local_cnt);
 
        mdev->state = ns;
+
+       if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+               drbd_print_uuids(mdev, "attached to UUIDs");
+
        wake_up(&mdev->misc_wait);
        wake_up(&mdev->state_wait);
 
@@ -1242,6 +1265,14 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
        if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
                drbd_resume_al(mdev);
 
+       /* Start a new epoch in case we start to mirror write requests */
+       if (!drbd_should_do_remote(os) && drbd_should_do_remote(ns))
+               tl_forget(mdev);
+
+       /* Do not add local-only requests to an epoch with mirrored requests */
+       if (drbd_should_do_remote(os) && !drbd_should_do_remote(ns))
+               set_bit(CREATE_BARRIER, &mdev->flags);
+
        ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
        if (ascw) {
                ascw->os = os;
@@ -1289,6 +1320,26 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv)
        }
 }
 
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+{
+       int rv;
+
+       D_ASSERT(current == mdev->worker.task);
+
+       /* open coded non-blocking drbd_suspend_io(mdev); */
+       set_bit(SUSPEND_IO, &mdev->flags);
+       if (!is_susp(mdev->state))
+               D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
+
+       drbd_bm_lock(mdev, why);
+       rv = io_fn(mdev);
+       drbd_bm_unlock(mdev);
+
+       drbd_resume_io(mdev);
+
+       return rv;
+}
+
 /**
  * after_state_ch() - Perform after state change actions that may sleep
  * @mdev:      DRBD device.
@@ -1327,16 +1378,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
        nsm.i = -1;
        if (ns.susp_nod) {
-               if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
-                       if (ns.conn == C_CONNECTED)
-                               what = resend, nsm.susp_nod = 0;
-                       else /* ns.conn > C_CONNECTED */
-                               dev_err(DEV, "Unexpected Resynd going on!\n");
-               }
+               if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+                       what = resend;
 
                if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
-                       what = restart_frozen_disk_io, nsm.susp_nod = 0;
+                       what = restart_frozen_disk_io;
 
+               if (what != nothing)
+                       nsm.susp_nod = 0;
        }
 
        if (ns.susp_fen) {
@@ -1367,13 +1416,29 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
                spin_unlock_irq(&mdev->req_lock);
        }
 
+       /* Became sync source.  With protocol >= 96, we still need to send out
+        * the sync uuid now. Need to do that before any drbd_send_state, or
+        * the other side may go "paused sync" before receiving the sync uuids,
+        * which is unexpected. */
+       if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+           (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+           mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
+               drbd_gen_and_send_sync_uuid(mdev);
+               put_ldev(mdev);
+       }
+
        /* Do not change the order of the if above and the two below... */
        if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) {      /* attach on the peer */
                drbd_send_uuids(mdev);
                drbd_send_state(mdev);
        }
-       if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
-               drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+       /* No point in queuing send_bitmap if we don't have a connection
+        * anymore, so check also the _current_ state, not only the new state
+        * at the time this work was queued. */
+       if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+           mdev->state.conn == C_WF_BITMAP_S)
+               drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+                               "send_bitmap (WFBitMapS)");
 
        /* Lost contact to peer's copy of the data */
        if ((os.pdsk >= D_INCONSISTENT &&
@@ -1404,7 +1469,16 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 
                /* D_DISKLESS Peer becomes secondary */
                if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
-                       drbd_al_to_on_disk_bm(mdev);
+                       drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote diskless peer");
+               put_ldev(mdev);
+       }
+
+       /* Write out all changed bits on demote.
+        * Though, no need to da that just yet
+        * if there is a resync going on still */
+       if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+               mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+               drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote");
                put_ldev(mdev);
        }
 
@@ -1523,6 +1597,19 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
                drbd_send_state(mdev);
 
+       /* This triggers bitmap writeout of potentially still unwritten pages
+        * if the resync finished cleanly, or aborted because of peer disk
+        * failure.  Resync aborted because of connection failure does bitmap
+        * writeout from drbd_disconnect.
+        * For resync aborted because of local disk failure, we cannot do
+        * any bitmap writeout anymore.
+        */
+       if (os.conn > C_CONNECTED && ns.conn == C_CONNECTED &&
+           mdev->state.conn == C_CONNECTED && get_ldev(mdev)) {
+               drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
+               put_ldev(mdev);
+       }
+
        /* free tl_hash if we Got thawed and are C_STANDALONE */
        if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
                drbd_free_tl_hash(mdev);
@@ -1904,7 +1991,7 @@ int drbd_send_protocol(struct drbd_conf *mdev)
                else {
                        dev_err(DEV, "--dry-run is not supported by peer");
                        kfree(p);
-                       return 0;
+                       return -1;
                }
        }
        p->conn_flags    = cpu_to_be32(cf);
@@ -1952,12 +2039,36 @@ int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
        return _drbd_send_uuids(mdev, 8);
 }
 
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+       if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+               u64 *uuid = mdev->ldev->md.uuid;
+               dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+                    text,
+                    (unsigned long long)uuid[UI_CURRENT],
+                    (unsigned long long)uuid[UI_BITMAP],
+                    (unsigned long long)uuid[UI_HISTORY_START],
+                    (unsigned long long)uuid[UI_HISTORY_END]);
+               put_ldev(mdev);
+       } else {
+               dev_info(DEV, "%s effective data uuid: %016llX\n",
+                               text,
+                               (unsigned long long)mdev->ed_uuid);
+       }
+}
 
-int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
 {
        struct p_rs_uuid p;
+       u64 uuid;
 
-       p.uuid = cpu_to_be64(val);
+       D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
+
+       uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+       drbd_uuid_set(mdev, UI_BITMAP, uuid);
+       drbd_print_uuids(mdev, "updated sync UUID");
+       drbd_md_sync(mdev);
+       p.uuid = cpu_to_be64(uuid);
 
        return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
                             (struct p_header80 *)&p, sizeof(p));
@@ -2140,9 +2251,15 @@ int fill_bitmap_rle_bits(struct drbd_conf *mdev,
        return len;
 }
 
-enum { OK, FAILED, DONE }
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
 send_bitmap_rle_or_plain(struct drbd_conf *mdev,
-       struct p_header80 *h, struct bm_xfer_ctx *c)
+                        struct p_header80 *h, struct bm_xfer_ctx *c)
 {
        struct p_compressed_bm *p = (void*)h;
        unsigned long num_words;
@@ -2152,7 +2269,7 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
        len = fill_bitmap_rle_bits(mdev, p, c);
 
        if (len < 0)
-               return FAILED;
+               return -EIO;
 
        if (len) {
                DCBP_set_code(p, RLE_VLI_Bits);
@@ -2182,11 +2299,14 @@ send_bitmap_rle_or_plain(struct drbd_conf *mdev,
                if (c->bit_offset > c->bm_bits)
                        c->bit_offset = c->bm_bits;
        }
-       ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
-
-       if (ok == DONE)
-               INFO_bm_xfer_stats(mdev, "send", c);
-       return ok;
+       if (ok) {
+               if (len == 0) {
+                       INFO_bm_xfer_stats(mdev, "send", c);
+                       return 0;
+               } else
+                       return 1;
+       }
+       return -EIO;
 }
 
 /* See the comment at receive_bitmap() */
@@ -2194,7 +2314,7 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
 {
        struct bm_xfer_ctx c;
        struct p_header80 *p;
-       int ret;
+       int err;
 
        ERR_IF(!mdev->bitmap) return false;
 
@@ -2229,11 +2349,11 @@ int _drbd_send_bitmap(struct drbd_conf *mdev)
        };
 
        do {
-               ret = send_bitmap_rle_or_plain(mdev, p, &c);
-       } while (ret == OK);
+               err = send_bitmap_rle_or_plain(mdev, p, &c);
+       } while (err > 0);
 
        free_page((unsigned long) p);
-       return (ret == DONE);
+       return err == 0;
 }
 
 int drbd_send_bitmap(struct drbd_conf *mdev)
@@ -2902,17 +3022,21 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        INIT_LIST_HEAD(&mdev->start_resync_work.list);
        INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
 
-       mdev->resync_work.cb  = w_resync_inactive;
+       mdev->resync_work.cb  = w_resync_timer;
        mdev->unplug_work.cb  = w_send_write_hint;
        mdev->go_diskless.cb  = w_go_diskless;
        mdev->md_sync_work.cb = w_md_sync;
        mdev->bm_io_work.w.cb = w_bitmap_io;
+       mdev->start_resync_work.cb = w_start_resync;
        init_timer(&mdev->resync_timer);
        init_timer(&mdev->md_sync_timer);
+       init_timer(&mdev->start_resync_timer);
        mdev->resync_timer.function = resync_timer_fn;
        mdev->resync_timer.data = (unsigned long) mdev;
        mdev->md_sync_timer.function = md_sync_timer_fn;
        mdev->md_sync_timer.data = (unsigned long) mdev;
+       mdev->start_resync_timer.function = start_resync_timer_fn;
+       mdev->start_resync_timer.data = (unsigned long) mdev;
 
        init_waitqueue_head(&mdev->misc_wait);
        init_waitqueue_head(&mdev->state_wait);
@@ -2983,6 +3107,8 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
        D_ASSERT(list_empty(&mdev->resync_work.list));
        D_ASSERT(list_empty(&mdev->unplug_work.list));
        D_ASSERT(list_empty(&mdev->go_diskless.list));
+
+       drbd_set_defaults(mdev);
 }
 
 
@@ -3230,7 +3356,7 @@ static int drbd_congested(void *congested_data, int bdi_bits)
        char reason = '-';
        int r = 0;
 
-       if (!__inc_ap_bio_cond(mdev)) {
+       if (!may_inc_ap_bio(mdev)) {
                /* DRBD has frozen IO */
                r = bdi_bits;
                reason = 'd';
@@ -3378,7 +3504,7 @@ int __init drbd_init(void)
                return -EINVAL;
        }
 
-       if (1 > minor_count || minor_count > 255) {
+       if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
                printk(KERN_ERR
                        "drbd: invalid minor_count (%d)\n", minor_count);
 #ifdef MODULE
@@ -3646,28 +3772,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
        return rv;
 }
 
-static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
-{
-       static char *uuid_str[UI_EXTENDED_SIZE] = {
-               [UI_CURRENT] = "CURRENT",
-               [UI_BITMAP] = "BITMAP",
-               [UI_HISTORY_START] = "HISTORY_START",
-               [UI_HISTORY_END] = "HISTORY_END",
-               [UI_SIZE] = "SIZE",
-               [UI_FLAGS] = "FLAGS",
-       };
-
-       if (index >= UI_EXTENDED_SIZE) {
-               dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
-               return;
-       }
-
-       dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
-                uuid_str[index],
-                (unsigned long long)mdev->ldev->md.uuid[index]);
-}
-
-
 /**
  * drbd_md_mark_dirty() - Mark meta data super block as dirty
  * @mdev:      DRBD device.
@@ -3697,10 +3801,8 @@ static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
 {
        int i;
 
-       for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+       for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
                mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
-               debug_drbd_uuid(mdev, i+1);
-       }
 }
 
 void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
@@ -3715,7 +3817,6 @@ void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
        }
 
        mdev->ldev->md.uuid[idx] = val;
-       debug_drbd_uuid(mdev, idx);
        drbd_md_mark_dirty(mdev);
 }
 
@@ -3725,7 +3826,6 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
        if (mdev->ldev->md.uuid[idx]) {
                drbd_uuid_move_history(mdev);
                mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
-               debug_drbd_uuid(mdev, UI_HISTORY_START);
        }
        _drbd_uuid_set(mdev, idx, val);
 }
@@ -3740,14 +3840,16 @@ void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
 void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
 {
        u64 val;
+       unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+
+       if (bm_uuid)
+               dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-       dev_info(DEV, "Creating new current UUID\n");
-       D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
        mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
-       debug_drbd_uuid(mdev, UI_BITMAP);
 
        get_random_bytes(&val, sizeof(u64));
        _drbd_uuid_set(mdev, UI_CURRENT, val);
+       drbd_print_uuids(mdev, "new current UUID");
        /* get it to stable storage _now_ */
        drbd_md_sync(mdev);
 }
@@ -3761,16 +3863,12 @@ void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
                drbd_uuid_move_history(mdev);
                mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
                mdev->ldev->md.uuid[UI_BITMAP] = 0;
-               debug_drbd_uuid(mdev, UI_HISTORY_START);
-               debug_drbd_uuid(mdev, UI_BITMAP);
        } else {
-               if (mdev->ldev->md.uuid[UI_BITMAP])
-                       dev_warn(DEV, "bm UUID already set");
+               unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+               if (bm_uuid)
+                       dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
 
-               mdev->ldev->md.uuid[UI_BITMAP] = val;
-               mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
-
-               debug_drbd_uuid(mdev, UI_BITMAP);
+               mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
        }
        drbd_md_mark_dirty(mdev);
 }
@@ -3826,13 +3924,16 @@ int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
 static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
 {
        struct bm_io_work *work = container_of(w, struct bm_io_work, w);
-       int rv;
+       int rv = -EIO;
 
        D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
 
-       drbd_bm_lock(mdev, work->why);
-       rv = work->io_fn(mdev);
-       drbd_bm_unlock(mdev);
+       if (get_ldev(mdev)) {
+               drbd_bm_lock(mdev, work->why);
+               rv = work->io_fn(mdev);
+               drbd_bm_unlock(mdev);
+               put_ldev(mdev);
+       }
 
        clear_bit(BITMAP_IO, &mdev->flags);
        smp_mb__after_clear_bit();