MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
MODULE_VERSION(REL_VERSION);
MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+ __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
#include <linux/moduleparam.h>
#endif
/* module parameter, defined */
-unsigned int minor_count = 32;
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
int disable_sendpage;
int allow_oos;
unsigned int cn_idx = CN_IDX_DRBD;
drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
}
+
+/* In C_AHEAD mode only out_of_sync packets are sent for requests. Detach
+ * those requests from the newsest barrier when changing to an other cstate.
+ *
+ * That headless list vanishes when the last request finished its write or
+ * send out_of_sync packet. */
+static void tl_forget(struct drbd_conf *mdev)
+{
+ struct drbd_tl_epoch *b;
+
+ if (test_bit(CREATE_BARRIER, &mdev->flags))
+ return;
+
+ b = mdev->newest_tle;
+ list_del(&b->requests);
+ _tl_add_barrier(mdev, b);
+}
+
/**
* _tl_restart() - Walks the transfer log, and applies an action to all requests
* @mdev: DRBD device.
atomic_inc(&mdev->local_cnt);
mdev->state = ns;
+
+ if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+ drbd_print_uuids(mdev, "attached to UUIDs");
+
wake_up(&mdev->misc_wait);
wake_up(&mdev->state_wait);
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
drbd_resume_al(mdev);
+ /* Start a new epoch in case we start to mirror write requests */
+ if (!drbd_should_do_remote(os) && drbd_should_do_remote(ns))
+ tl_forget(mdev);
+
+ /* Do not add local-only requests to an epoch with mirrored requests */
+ if (drbd_should_do_remote(os) && !drbd_should_do_remote(ns))
+ set_bit(CREATE_BARRIER, &mdev->flags);
+
ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
if (ascw) {
ascw->os = os;
}
}
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+{
+ int rv;
+
+ D_ASSERT(current == mdev->worker.task);
+
+ /* open coded non-blocking drbd_suspend_io(mdev); */
+ set_bit(SUSPEND_IO, &mdev->flags);
+ if (!is_susp(mdev->state))
+ D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
+
+ drbd_bm_lock(mdev, why);
+ rv = io_fn(mdev);
+ drbd_bm_unlock(mdev);
+
+ drbd_resume_io(mdev);
+
+ return rv;
+}
+
/**
* after_state_ch() - Perform after state change actions that may sleep
* @mdev: DRBD device.
nsm.i = -1;
if (ns.susp_nod) {
- if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
- if (ns.conn == C_CONNECTED)
- what = resend, nsm.susp_nod = 0;
- else /* ns.conn > C_CONNECTED */
- dev_err(DEV, "Unexpected Resynd going on!\n");
- }
+ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+ what = resend;
if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
- what = restart_frozen_disk_io, nsm.susp_nod = 0;
+ what = restart_frozen_disk_io;
+ if (what != nothing)
+ nsm.susp_nod = 0;
}
if (ns.susp_fen) {
spin_unlock_irq(&mdev->req_lock);
}
+ /* Became sync source. With protocol >= 96, we still need to send out
+ * the sync uuid now. Need to do that before any drbd_send_state, or
+ * the other side may go "paused sync" before receiving the sync uuids,
+ * which is unexpected. */
+ if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+ (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+ mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
+ drbd_gen_and_send_sync_uuid(mdev);
+ put_ldev(mdev);
+ }
+
/* Do not change the order of the if above and the two below... */
if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
drbd_send_uuids(mdev);
drbd_send_state(mdev);
}
- if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
- drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+ /* No point in queuing send_bitmap if we don't have a connection
+ * anymore, so check also the _current_ state, not only the new state
+ * at the time this work was queued. */
+ if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+ mdev->state.conn == C_WF_BITMAP_S)
+ drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+ "send_bitmap (WFBitMapS)");
/* Lost contact to peer's copy of the data */
if ((os.pdsk >= D_INCONSISTENT &&
/* D_DISKLESS Peer becomes secondary */
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
- drbd_al_to_on_disk_bm(mdev);
+ drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote diskless peer");
+ put_ldev(mdev);
+ }
+
+ /* Write out all changed bits on demote.
+ * Though, no need to da that just yet
+ * if there is a resync going on still */
+ if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+ mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+ drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote");
put_ldev(mdev);
}
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
drbd_send_state(mdev);
+ /* This triggers bitmap writeout of potentially still unwritten pages
+ * if the resync finished cleanly, or aborted because of peer disk
+ * failure. Resync aborted because of connection failure does bitmap
+ * writeout from drbd_disconnect.
+ * For resync aborted because of local disk failure, we cannot do
+ * any bitmap writeout anymore.
+ */
+ if (os.conn > C_CONNECTED && ns.conn == C_CONNECTED &&
+ mdev->state.conn == C_CONNECTED && get_ldev(mdev)) {
+ drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished");
+ put_ldev(mdev);
+ }
+
/* free tl_hash if we Got thawed and are C_STANDALONE */
if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
drbd_free_tl_hash(mdev);
else {
dev_err(DEV, "--dry-run is not supported by peer");
kfree(p);
- return 0;
+ return -1;
}
}
p->conn_flags = cpu_to_be32(cf);
return _drbd_send_uuids(mdev, 8);
}
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+ if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+ u64 *uuid = mdev->ldev->md.uuid;
+ dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+ text,
+ (unsigned long long)uuid[UI_CURRENT],
+ (unsigned long long)uuid[UI_BITMAP],
+ (unsigned long long)uuid[UI_HISTORY_START],
+ (unsigned long long)uuid[UI_HISTORY_END]);
+ put_ldev(mdev);
+ } else {
+ dev_info(DEV, "%s effective data uuid: %016llX\n",
+ text,
+ (unsigned long long)mdev->ed_uuid);
+ }
+}
-int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
{
struct p_rs_uuid p;
+ u64 uuid;
- p.uuid = cpu_to_be64(val);
+ D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
+
+ uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+ drbd_uuid_set(mdev, UI_BITMAP, uuid);
+ drbd_print_uuids(mdev, "updated sync UUID");
+ drbd_md_sync(mdev);
+ p.uuid = cpu_to_be64(uuid);
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
(struct p_header80 *)&p, sizeof(p));
return len;
}
-enum { OK, FAILED, DONE }
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
send_bitmap_rle_or_plain(struct drbd_conf *mdev,
- struct p_header80 *h, struct bm_xfer_ctx *c)
+ struct p_header80 *h, struct bm_xfer_ctx *c)
{
struct p_compressed_bm *p = (void*)h;
unsigned long num_words;
len = fill_bitmap_rle_bits(mdev, p, c);
if (len < 0)
- return FAILED;
+ return -EIO;
if (len) {
DCBP_set_code(p, RLE_VLI_Bits);
if (c->bit_offset > c->bm_bits)
c->bit_offset = c->bm_bits;
}
- ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
-
- if (ok == DONE)
- INFO_bm_xfer_stats(mdev, "send", c);
- return ok;
+ if (ok) {
+ if (len == 0) {
+ INFO_bm_xfer_stats(mdev, "send", c);
+ return 0;
+ } else
+ return 1;
+ }
+ return -EIO;
}
/* See the comment at receive_bitmap() */
{
struct bm_xfer_ctx c;
struct p_header80 *p;
- int ret;
+ int err;
ERR_IF(!mdev->bitmap) return false;
};
do {
- ret = send_bitmap_rle_or_plain(mdev, p, &c);
- } while (ret == OK);
+ err = send_bitmap_rle_or_plain(mdev, p, &c);
+ } while (err > 0);
free_page((unsigned long) p);
- return (ret == DONE);
+ return err == 0;
}
int drbd_send_bitmap(struct drbd_conf *mdev)
INIT_LIST_HEAD(&mdev->start_resync_work.list);
INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
- mdev->resync_work.cb = w_resync_inactive;
+ mdev->resync_work.cb = w_resync_timer;
mdev->unplug_work.cb = w_send_write_hint;
mdev->go_diskless.cb = w_go_diskless;
mdev->md_sync_work.cb = w_md_sync;
mdev->bm_io_work.w.cb = w_bitmap_io;
+ mdev->start_resync_work.cb = w_start_resync;
init_timer(&mdev->resync_timer);
init_timer(&mdev->md_sync_timer);
+ init_timer(&mdev->start_resync_timer);
mdev->resync_timer.function = resync_timer_fn;
mdev->resync_timer.data = (unsigned long) mdev;
mdev->md_sync_timer.function = md_sync_timer_fn;
mdev->md_sync_timer.data = (unsigned long) mdev;
+ mdev->start_resync_timer.function = start_resync_timer_fn;
+ mdev->start_resync_timer.data = (unsigned long) mdev;
init_waitqueue_head(&mdev->misc_wait);
init_waitqueue_head(&mdev->state_wait);
D_ASSERT(list_empty(&mdev->resync_work.list));
D_ASSERT(list_empty(&mdev->unplug_work.list));
D_ASSERT(list_empty(&mdev->go_diskless.list));
+
+ drbd_set_defaults(mdev);
}
char reason = '-';
int r = 0;
- if (!__inc_ap_bio_cond(mdev)) {
+ if (!may_inc_ap_bio(mdev)) {
/* DRBD has frozen IO */
r = bdi_bits;
reason = 'd';
return -EINVAL;
}
- if (1 > minor_count || minor_count > 255) {
+ if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
printk(KERN_ERR
"drbd: invalid minor_count (%d)\n", minor_count);
#ifdef MODULE
return rv;
}
-static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
-{
- static char *uuid_str[UI_EXTENDED_SIZE] = {
- [UI_CURRENT] = "CURRENT",
- [UI_BITMAP] = "BITMAP",
- [UI_HISTORY_START] = "HISTORY_START",
- [UI_HISTORY_END] = "HISTORY_END",
- [UI_SIZE] = "SIZE",
- [UI_FLAGS] = "FLAGS",
- };
-
- if (index >= UI_EXTENDED_SIZE) {
- dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
- return;
- }
-
- dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
- uuid_str[index],
- (unsigned long long)mdev->ldev->md.uuid[index]);
-}
-
-
/**
* drbd_md_mark_dirty() - Mark meta data super block as dirty
* @mdev: DRBD device.
{
int i;
- for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+ for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
- debug_drbd_uuid(mdev, i+1);
- }
}
void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
}
mdev->ldev->md.uuid[idx] = val;
- debug_drbd_uuid(mdev, idx);
drbd_md_mark_dirty(mdev);
}
if (mdev->ldev->md.uuid[idx]) {
drbd_uuid_move_history(mdev);
mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
- debug_drbd_uuid(mdev, UI_HISTORY_START);
}
_drbd_uuid_set(mdev, idx, val);
}
void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
{
u64 val;
+ unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+
+ if (bm_uuid)
+ dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
- dev_info(DEV, "Creating new current UUID\n");
- D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
- debug_drbd_uuid(mdev, UI_BITMAP);
get_random_bytes(&val, sizeof(u64));
_drbd_uuid_set(mdev, UI_CURRENT, val);
+ drbd_print_uuids(mdev, "new current UUID");
/* get it to stable storage _now_ */
drbd_md_sync(mdev);
}
drbd_uuid_move_history(mdev);
mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
mdev->ldev->md.uuid[UI_BITMAP] = 0;
- debug_drbd_uuid(mdev, UI_HISTORY_START);
- debug_drbd_uuid(mdev, UI_BITMAP);
} else {
- if (mdev->ldev->md.uuid[UI_BITMAP])
- dev_warn(DEV, "bm UUID already set");
+ unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+ if (bm_uuid)
+ dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
- mdev->ldev->md.uuid[UI_BITMAP] = val;
- mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
-
- debug_drbd_uuid(mdev, UI_BITMAP);
+ mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
}
drbd_md_mark_dirty(mdev);
}
static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
{
struct bm_io_work *work = container_of(w, struct bm_io_work, w);
- int rv;
+ int rv = -EIO;
D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
- drbd_bm_lock(mdev, work->why);
- rv = work->io_fn(mdev);
- drbd_bm_unlock(mdev);
+ if (get_ldev(mdev)) {
+ drbd_bm_lock(mdev, work->why);
+ rv = work->io_fn(mdev);
+ drbd_bm_unlock(mdev);
+ put_ldev(mdev);
+ }
clear_bit(BITMAP_IO, &mdev->flags);
smp_mb__after_clear_bit();