MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
MODULE_VERSION(REL_VERSION);
MODULE_LICENSE("GPL");
-MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices (1-255)");
+MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
+ __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
#include <linux/moduleparam.h>
#endif
/* module parameter, defined */
-unsigned int minor_count = 32;
+unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
int disable_sendpage;
int allow_oos;
unsigned int cn_idx = CN_IDX_DRBD;
drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
}
+
/**
* _tl_restart() - Walks the transfer log, and applies an action to all requests
* @mdev: DRBD device.
}
/**
- * cl_wide_st_chg() - TRUE if the state change is a cluster wide one
+ * cl_wide_st_chg() - true if the state change is a cluster wide one
* @mdev: DRBD device.
* @os: old (current) state.
* @ns: new (wanted) state.
atomic_inc(&mdev->local_cnt);
mdev->state = ns;
+
+ if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
+ drbd_print_uuids(mdev, "attached to UUIDs");
+
wake_up(&mdev->misc_wait);
wake_up(&mdev->state_wait);
}
}
+int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
+ int (*io_fn)(struct drbd_conf *),
+ char *why, enum bm_flag flags)
+{
+ int rv;
+
+ D_ASSERT(current == mdev->worker.task);
+
+ /* open coded non-blocking drbd_suspend_io(mdev); */
+ set_bit(SUSPEND_IO, &mdev->flags);
+
+ drbd_bm_lock(mdev, why, flags);
+ rv = io_fn(mdev);
+ drbd_bm_unlock(mdev);
+
+ drbd_resume_io(mdev);
+
+ return rv;
+}
+
/**
* after_state_ch() - Perform after state change actions that may sleep
* @mdev: DRBD device.
nsm.i = -1;
if (ns.susp_nod) {
- if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
- if (ns.conn == C_CONNECTED)
- what = resend, nsm.susp_nod = 0;
- else /* ns.conn > C_CONNECTED */
- dev_err(DEV, "Unexpected Resynd going on!\n");
- }
+ if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+ what = resend;
if (os.disk == D_ATTACHING && ns.disk > D_ATTACHING)
- what = restart_frozen_disk_io, nsm.susp_nod = 0;
+ what = restart_frozen_disk_io;
+ if (what != nothing)
+ nsm.susp_nod = 0;
}
if (ns.susp_fen) {
spin_unlock_irq(&mdev->req_lock);
}
+ /* Became sync source. With protocol >= 96, we still need to send out
+ * the sync uuid now. Need to do that before any drbd_send_state, or
+ * the other side may go "paused sync" before receiving the sync uuids,
+ * which is unexpected. */
+ if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
+ (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
+ mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
+ drbd_gen_and_send_sync_uuid(mdev);
+ put_ldev(mdev);
+ }
+
/* Do not change the order of the if above and the two below... */
if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
drbd_send_uuids(mdev);
drbd_send_state(mdev);
}
- if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S)
- drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, "send_bitmap (WFBitMapS)");
+ /* No point in queuing send_bitmap if we don't have a connection
+ * anymore, so check also the _current_ state, not only the new state
+ * at the time this work was queued. */
+ if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
+ mdev->state.conn == C_WF_BITMAP_S)
+ drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
+ "send_bitmap (WFBitMapS)",
+ BM_LOCKED_TEST_ALLOWED);
/* Lost contact to peer's copy of the data */
if ((os.pdsk >= D_INCONSISTENT &&
/* D_DISKLESS Peer becomes secondary */
if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
- drbd_al_to_on_disk_bm(mdev);
+ /* We may still be Primary ourselves.
+ * No harm done if the bitmap still changes,
+ * redirtied pages will follow later. */
+ drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+ "demote diskless peer", BM_LOCKED_SET_ALLOWED);
+ put_ldev(mdev);
+ }
+
+ /* Write out all changed bits on demote.
+ * Though, no need to da that just yet
+ * if there is a resync going on still */
+ if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
+ mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
+ /* No changes to the bitmap expected this time, so assert that,
+ * even though no harm was done if it did change. */
+ drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
+ "demote", BM_LOCKED_TEST_ALLOWED);
put_ldev(mdev);
}
/* We are in the progress to start a full sync... */
if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
(os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
- drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync");
+ /* no other bitmap changes expected during this phase */
+ drbd_queue_bitmap_io(mdev,
+ &drbd_bmio_set_n_write, &abw_start_sync,
+ "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
/* We are invalidating our self... */
if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
- drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
+ /* other bitmap operation expected during this phase */
+ drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
+ "set_n_write from invalidate", BM_LOCKED_MASK);
/* first half of local IO error, failure to attach,
* or administrative detach */
if (drbd_send_state(mdev))
dev_warn(DEV, "Notified peer that I'm now diskless.\n");
- else
- dev_err(DEV, "Sending state for being diskless failed\n");
/* corresponding get_ldev in __drbd_set_state
* this may finaly trigger drbd_ldev_destroy. */
put_ldev(mdev);
if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
drbd_send_state(mdev);
+ /* This triggers bitmap writeout of potentially still unwritten pages
+ * if the resync finished cleanly, or aborted because of peer disk
+ * failure, or because of connection loss.
+ * For resync aborted because of local disk failure, we cannot do
+ * any bitmap writeout anymore.
+ * No harm done if some bits change during this phase.
+ */
+ if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
+ drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
+ "write from resync_finished", BM_LOCKED_SET_ALLOWED);
+ put_ldev(mdev);
+ }
+
/* free tl_hash if we Got thawed and are C_STANDALONE */
if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
drbd_free_tl_hash(mdev);
if (!try_module_get(THIS_MODULE)) {
dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
spin_unlock_irqrestore(&thi->t_lock, flags);
- return FALSE;
+ return false;
}
init_completion(&thi->stop);
dev_err(DEV, "Couldn't start thread\n");
module_put(THIS_MODULE);
- return FALSE;
+ return false;
}
spin_lock_irqsave(&thi->t_lock, flags);
thi->task = nt;
break;
}
- return TRUE;
+ return true;
}
{
int sent, ok;
- ERR_IF(!h) return FALSE;
- ERR_IF(!size) return FALSE;
+ ERR_IF(!h) return false;
+ ERR_IF(!size) return false;
h->magic = BE_DRBD_MAGIC;
h->command = cpu_to_be16(cmd);
sent = drbd_send(mdev, sock, h, size, msg_flags);
ok = (sent == size);
- if (!ok)
- dev_err(DEV, "short sent %s size=%d sent=%d\n",
+ if (!ok && !signal_pending(current))
+ dev_warn(DEV, "short sent %s size=%d sent=%d\n",
cmdname(cmd), (int)size, sent);
return ok;
}
else {
dev_err(DEV, "--dry-run is not supported by peer");
kfree(p);
- return 0;
+ return -1;
}
}
p->conn_flags = cpu_to_be32(cf);
return _drbd_send_uuids(mdev, 8);
}
+void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
+{
+ if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
+ u64 *uuid = mdev->ldev->md.uuid;
+ dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
+ text,
+ (unsigned long long)uuid[UI_CURRENT],
+ (unsigned long long)uuid[UI_BITMAP],
+ (unsigned long long)uuid[UI_HISTORY_START],
+ (unsigned long long)uuid[UI_HISTORY_END]);
+ put_ldev(mdev);
+ } else {
+ dev_info(DEV, "%s effective data uuid: %016llX\n",
+ text,
+ (unsigned long long)mdev->ed_uuid);
+ }
+}
-int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
+int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
{
struct p_rs_uuid p;
+ u64 uuid;
- p.uuid = cpu_to_be64(val);
+ D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
+
+ uuid = mdev->ldev->md.uuid[UI_BITMAP] + UUID_NEW_BM_OFFSET;
+ drbd_uuid_set(mdev, UI_BITMAP, uuid);
+ drbd_print_uuids(mdev, "updated sync UUID");
+ drbd_md_sync(mdev);
+ p.uuid = cpu_to_be64(uuid);
return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
(struct p_header80 *)&p, sizeof(p));
return len;
}
-enum { OK, FAILED, DONE }
+/**
+ * send_bitmap_rle_or_plain
+ *
+ * Return 0 when done, 1 when another iteration is needed, and a negative error
+ * code upon failure.
+ */
+static int
send_bitmap_rle_or_plain(struct drbd_conf *mdev,
- struct p_header80 *h, struct bm_xfer_ctx *c)
+ struct p_header80 *h, struct bm_xfer_ctx *c)
{
struct p_compressed_bm *p = (void*)h;
unsigned long num_words;
len = fill_bitmap_rle_bits(mdev, p, c);
if (len < 0)
- return FAILED;
+ return -EIO;
if (len) {
DCBP_set_code(p, RLE_VLI_Bits);
if (c->bit_offset > c->bm_bits)
c->bit_offset = c->bm_bits;
}
- ok = ok ? ((len == 0) ? DONE : OK) : FAILED;
-
- if (ok == DONE)
- INFO_bm_xfer_stats(mdev, "send", c);
- return ok;
+ if (ok) {
+ if (len == 0) {
+ INFO_bm_xfer_stats(mdev, "send", c);
+ return 0;
+ } else
+ return 1;
+ }
+ return -EIO;
}
/* See the comment at receive_bitmap() */
{
struct bm_xfer_ctx c;
struct p_header80 *p;
- int ret;
+ int err;
- ERR_IF(!mdev->bitmap) return FALSE;
+ ERR_IF(!mdev->bitmap) return false;
/* maybe we should use some per thread scratch page,
* and allocate that during initial device creation? */
p = (struct p_header80 *) __get_free_page(GFP_NOIO);
if (!p) {
dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
- return FALSE;
+ return false;
}
if (get_ldev(mdev)) {
};
do {
- ret = send_bitmap_rle_or_plain(mdev, p, &c);
- } while (ret == OK);
+ err = send_bitmap_rle_or_plain(mdev, p, &c);
+ } while (err > 0);
free_page((unsigned long) p);
- return (ret == DONE);
+ return err == 0;
}
int drbd_send_bitmap(struct drbd_conf *mdev)
p.set_size = cpu_to_be32(set_size);
if (mdev->state.conn < C_CONNECTED)
- return FALSE;
+ return false;
ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
(struct p_header80 *)&p, sizeof(p));
return ok;
p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
- return FALSE;
+ return false;
ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
(struct p_header80 *)&p, sizeof(p));
return ok;
}
/* called on sndtimeo
- * returns FALSE if we should retry,
- * TRUE if we think connection is dead
+ * returns false if we should retry,
+ * true if we think connection is dead
*/
static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
{
|| mdev->state.conn < C_CONNECTED;
if (drop_it)
- return TRUE;
+ return true;
drop_it = !--mdev->ko_count;
if (!drop_it) {
INIT_LIST_HEAD(&mdev->start_resync_work.list);
INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
- mdev->resync_work.cb = w_resync_inactive;
+ mdev->resync_work.cb = w_resync_timer;
mdev->unplug_work.cb = w_send_write_hint;
mdev->go_diskless.cb = w_go_diskless;
mdev->md_sync_work.cb = w_md_sync;
mdev->bm_io_work.w.cb = w_bitmap_io;
+ mdev->start_resync_work.cb = w_start_resync;
init_timer(&mdev->resync_timer);
init_timer(&mdev->md_sync_timer);
+ init_timer(&mdev->start_resync_timer);
+ init_timer(&mdev->request_timer);
mdev->resync_timer.function = resync_timer_fn;
mdev->resync_timer.data = (unsigned long) mdev;
mdev->md_sync_timer.function = md_sync_timer_fn;
mdev->md_sync_timer.data = (unsigned long) mdev;
+ mdev->start_resync_timer.function = start_resync_timer_fn;
+ mdev->start_resync_timer.data = (unsigned long) mdev;
+ mdev->request_timer.function = request_timer_fn;
+ mdev->request_timer.data = (unsigned long) mdev;
init_waitqueue_head(&mdev->misc_wait);
init_waitqueue_head(&mdev->state_wait);
D_ASSERT(list_empty(&mdev->resync_work.list));
D_ASSERT(list_empty(&mdev->unplug_work.list));
D_ASSERT(list_empty(&mdev->go_diskless.list));
+
+ drbd_set_defaults(mdev);
}
char reason = '-';
int r = 0;
- if (!__inc_ap_bio_cond(mdev)) {
+ if (!may_inc_ap_bio(mdev)) {
/* DRBD has frozen IO */
r = bdi_bits;
reason = 'd';
goto out_no_disk;
mdev->vdisk = disk;
- set_disk_ro(disk, TRUE);
+ set_disk_ro(disk, true);
disk->queue = q;
disk->major = DRBD_MAJOR;
return -EINVAL;
}
- if (1 > minor_count || minor_count > 255) {
+ if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
printk(KERN_ERR
"drbd: invalid minor_count (%d)\n", minor_count);
#ifdef MODULE
if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
/* this was a try anyways ... */
dev_err(DEV, "meta data update failed!\n");
- drbd_chk_io_error(mdev, 1, TRUE);
+ drbd_chk_io_error(mdev, 1, true);
}
/* Update mdev->ldev->md.la_size_sect,
return rv;
}
-static void debug_drbd_uuid(struct drbd_conf *mdev, enum drbd_uuid_index index)
-{
- static char *uuid_str[UI_EXTENDED_SIZE] = {
- [UI_CURRENT] = "CURRENT",
- [UI_BITMAP] = "BITMAP",
- [UI_HISTORY_START] = "HISTORY_START",
- [UI_HISTORY_END] = "HISTORY_END",
- [UI_SIZE] = "SIZE",
- [UI_FLAGS] = "FLAGS",
- };
-
- if (index >= UI_EXTENDED_SIZE) {
- dev_warn(DEV, " uuid_index >= EXTENDED_SIZE\n");
- return;
- }
-
- dynamic_dev_dbg(DEV, " uuid[%s] now %016llX\n",
- uuid_str[index],
- (unsigned long long)mdev->ldev->md.uuid[index]);
-}
-
-
/**
* drbd_md_mark_dirty() - Mark meta data super block as dirty
* @mdev: DRBD device.
{
int i;
- for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++) {
+ for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
- debug_drbd_uuid(mdev, i+1);
- }
}
void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
}
mdev->ldev->md.uuid[idx] = val;
- debug_drbd_uuid(mdev, idx);
drbd_md_mark_dirty(mdev);
}
if (mdev->ldev->md.uuid[idx]) {
drbd_uuid_move_history(mdev);
mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
- debug_drbd_uuid(mdev, UI_HISTORY_START);
}
_drbd_uuid_set(mdev, idx, val);
}
void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
{
u64 val;
+ unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+
+ if (bm_uuid)
+ dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
- dev_info(DEV, "Creating new current UUID\n");
- D_ASSERT(mdev->ldev->md.uuid[UI_BITMAP] == 0);
mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
- debug_drbd_uuid(mdev, UI_BITMAP);
get_random_bytes(&val, sizeof(u64));
_drbd_uuid_set(mdev, UI_CURRENT, val);
+ drbd_print_uuids(mdev, "new current UUID");
/* get it to stable storage _now_ */
drbd_md_sync(mdev);
}
drbd_uuid_move_history(mdev);
mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
mdev->ldev->md.uuid[UI_BITMAP] = 0;
- debug_drbd_uuid(mdev, UI_HISTORY_START);
- debug_drbd_uuid(mdev, UI_BITMAP);
} else {
- if (mdev->ldev->md.uuid[UI_BITMAP])
- dev_warn(DEV, "bm UUID already set");
-
- mdev->ldev->md.uuid[UI_BITMAP] = val;
- mdev->ldev->md.uuid[UI_BITMAP] &= ~((u64)1);
+ unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
+ if (bm_uuid)
+ dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
- debug_drbd_uuid(mdev, UI_BITMAP);
+ mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
}
drbd_md_mark_dirty(mdev);
}
static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
{
struct bm_io_work *work = container_of(w, struct bm_io_work, w);
- int rv;
+ int rv = -EIO;
D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
- drbd_bm_lock(mdev, work->why);
- rv = work->io_fn(mdev);
- drbd_bm_unlock(mdev);
+ if (get_ldev(mdev)) {
+ drbd_bm_lock(mdev, work->why, work->flags);
+ rv = work->io_fn(mdev);
+ drbd_bm_unlock(mdev);
+ put_ldev(mdev);
+ }
clear_bit(BITMAP_IO, &mdev->flags);
smp_mb__after_clear_bit();
clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
work->why = NULL;
+ work->flags = 0;
return 1;
}
void drbd_queue_bitmap_io(struct drbd_conf *mdev,
int (*io_fn)(struct drbd_conf *),
void (*done)(struct drbd_conf *, int),
- char *why)
+ char *why, enum bm_flag flags)
{
D_ASSERT(current == mdev->worker.task);
mdev->bm_io_work.io_fn = io_fn;
mdev->bm_io_work.done = done;
mdev->bm_io_work.why = why;
+ mdev->bm_io_work.flags = flags;
spin_lock_irq(&mdev->req_lock);
set_bit(BITMAP_IO, &mdev->flags);
* freezes application IO while that the actual IO operations runs. This
* functions MAY NOT be called from worker context.
*/
-int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why)
+int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
+ char *why, enum bm_flag flags)
{
int rv;
D_ASSERT(current != mdev->worker.task);
- drbd_suspend_io(mdev);
+ if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+ drbd_suspend_io(mdev);
- drbd_bm_lock(mdev, why);
+ drbd_bm_lock(mdev, why, flags);
rv = io_fn(mdev);
drbd_bm_unlock(mdev);
- drbd_resume_io(mdev);
+ if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
+ drbd_resume_io(mdev);
return rv;
}