#include "drbd_vli.h"
-struct flush_work {
- struct drbd_work w;
- struct drbd_epoch *epoch;
-};
-
enum finish_epoch {
FE_STILL_LIVE,
FE_DESTROYED,
static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
-static struct drbd_epoch *previous_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
-{
- struct drbd_epoch *prev;
- spin_lock(&mdev->epoch_lock);
- prev = list_entry(epoch->list.prev, struct drbd_epoch, list);
- if (prev == epoch || prev == mdev->current_epoch)
- prev = NULL;
- spin_unlock(&mdev->epoch_lock);
- return prev;
-}
#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
spin_unlock_irq(&mdev->req_lock);
list_for_each_entry_safe(e, t, &reclaimed, w.list)
- drbd_free_ee(mdev, e);
+ drbd_free_net_ee(mdev, e);
}
/**
* Is also used from inside an other spin_lock_irq(&mdev->req_lock);
* Either links the page chain back to the global pool,
* or returns all pages to the system. */
-static void drbd_pp_free(struct drbd_conf *mdev, struct page *page)
+static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
{
+ atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
int i;
+
if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count)
i = page_chain_free(page);
else {
drbd_pp_vacant += i;
spin_unlock(&drbd_pp_lock);
}
- atomic_sub(i, &mdev->pp_in_use);
- i = atomic_read(&mdev->pp_in_use);
+ i = atomic_sub_return(i, a);
if (i < 0)
- dev_warn(DEV, "ASSERTION FAILED: pp_in_use: %d < 0\n", i);
+ dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
+ is_net ? "pp_in_use_by_net" : "pp_in_use", i);
wake_up(&drbd_pp_wait);
}
return NULL;
}
-void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net)
{
if (e->flags & EE_HAS_DIGEST)
kfree(e->digest);
- drbd_pp_free(mdev, e->pages);
+ drbd_pp_free(mdev, e->pages, is_net);
D_ASSERT(atomic_read(&e->pending_bios) == 0);
D_ASSERT(hlist_unhashed(&e->colision));
mempool_free(e, drbd_ee_mempool);
LIST_HEAD(work_list);
struct drbd_epoch_entry *e, *t;
int count = 0;
+ int is_net = list == &mdev->net_ee;
spin_lock_irq(&mdev->req_lock);
list_splice_init(list, &work_list);
spin_unlock_irq(&mdev->req_lock);
list_for_each_entry_safe(e, t, &work_list, w.list) {
- drbd_free_ee(mdev, e);
+ drbd_free_some_ee(mdev, e, is_net);
count++;
}
return count;
spin_unlock_irq(&mdev->req_lock);
list_for_each_entry_safe(e, t, &reclaimed, w.list)
- drbd_free_ee(mdev, e);
+ drbd_free_net_ee(mdev, e);
/* possible callbacks here:
* e_end_block, and e_end_resync_block, e_send_discard_ack.
drbd_thread_start(&mdev->asender);
+ if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) {
+ drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET);
+ put_ldev(mdev);
+ }
+
if (!drbd_send_protocol(mdev))
return -1;
drbd_send_sync_param(mdev, &mdev->sync_conf);
*cmd = be16_to_cpu(h->h95.command);
*packet_size = be32_to_cpu(h->h95.length);
} else {
- dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n",
- (long)be32_to_cpu(h->h80.magic),
- h->h80.command, h->h80.length);
+ dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n",
+ be32_to_cpu(h->h80.magic),
+ be16_to_cpu(h->h80.command),
+ be16_to_cpu(h->h80.length));
return FALSE;
}
mdev->last_received = jiffies;
return TRUE;
}
-static enum finish_epoch drbd_flush_after_epoch(struct drbd_conf *mdev, struct drbd_epoch *epoch)
+static void drbd_flush(struct drbd_conf *mdev)
{
int rv;
}
put_ldev(mdev);
}
-
- return drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
-}
-
-static int w_flush(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
- struct flush_work *fw = (struct flush_work *)w;
- struct drbd_epoch *epoch = fw->epoch;
-
- kfree(w);
-
- if (!test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags))
- drbd_flush_after_epoch(mdev, epoch);
-
- drbd_may_finish_epoch(mdev, epoch, EV_PUT |
- (mdev->state.conn < C_CONNECTED ? EV_CLEANUP : 0));
-
- return 1;
}
/**
struct drbd_epoch *epoch,
enum epoch_event ev)
{
- int finish, epoch_size;
+ int epoch_size;
struct drbd_epoch *next_epoch;
- int schedule_flush = 0;
enum finish_epoch rv = FE_STILL_LIVE;
spin_lock(&mdev->epoch_lock);
do {
next_epoch = NULL;
- finish = 0;
epoch_size = atomic_read(&epoch->epoch_size);
break;
case EV_GOT_BARRIER_NR:
set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
-
- /* Special case: If we just switched from WO_bio_barrier to
- WO_bdev_flush we should not finish the current epoch */
- if (test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags) && epoch_size == 1 &&
- mdev->write_ordering != WO_bio_barrier &&
- epoch == mdev->current_epoch)
- clear_bit(DE_CONTAINS_A_BARRIER, &epoch->flags);
- break;
- case EV_BARRIER_DONE:
- set_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags);
break;
case EV_BECAME_LAST:
/* nothing to do*/
if (epoch_size != 0 &&
atomic_read(&epoch->active) == 0 &&
- test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) &&
- epoch->list.prev == &mdev->current_epoch->list &&
- !test_bit(DE_IS_FINISHING, &epoch->flags)) {
- /* Nearly all conditions are met to finish that epoch... */
- if (test_bit(DE_BARRIER_IN_NEXT_EPOCH_DONE, &epoch->flags) ||
- mdev->write_ordering == WO_none ||
- (epoch_size == 1 && test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) ||
- ev & EV_CLEANUP) {
- finish = 1;
- set_bit(DE_IS_FINISHING, &epoch->flags);
- } else if (!test_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags) &&
- mdev->write_ordering == WO_bio_barrier) {
- atomic_inc(&epoch->active);
- schedule_flush = 1;
- }
- }
- if (finish) {
+ test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
if (!(ev & EV_CLEANUP)) {
spin_unlock(&mdev->epoch_lock);
drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
/* atomic_set(&epoch->active, 0); is already zero */
if (rv == FE_STILL_LIVE)
rv = FE_RECYCLED;
+ wake_up(&mdev->ee_wait);
}
}
spin_unlock(&mdev->epoch_lock);
- if (schedule_flush) {
- struct flush_work *fw;
- fw = kmalloc(sizeof(*fw), GFP_ATOMIC);
- if (fw) {
- fw->w.cb = w_flush;
- fw->epoch = epoch;
- drbd_queue_work(&mdev->data.work, &fw->w);
- } else {
- dev_warn(DEV, "Could not kmalloc a flush_work obj\n");
- set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
- /* That is not a recursion, only one level */
- drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE);
- drbd_may_finish_epoch(mdev, epoch, EV_PUT);
- }
- }
-
return rv;
}
[WO_none] = "none",
[WO_drain_io] = "drain",
[WO_bdev_flush] = "flush",
- [WO_bio_barrier] = "barrier",
};
pwo = mdev->write_ordering;
wo = min(pwo, wo);
- if (wo == WO_bio_barrier && mdev->ldev->dc.no_disk_barrier)
- wo = WO_bdev_flush;
if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
wo = WO_drain_io;
if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
wo = WO_none;
mdev->write_ordering = wo;
- if (pwo != mdev->write_ordering || wo == WO_bio_barrier)
+ if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
}
bio->bi_sector = sector;
bio->bi_bdev = mdev->ldev->backing_bdev;
/* we special case some flags in the multi-bio case, see below
- * (REQ_UNPLUG, REQ_HARDBARRIER) */
+ * (REQ_UNPLUG) */
bio->bi_rw = rw;
bio->bi_private = e;
bio->bi_end_io = drbd_endio_sec;
bio->bi_rw &= ~REQ_UNPLUG;
drbd_generic_make_request(mdev, fault_type, bio);
-
- /* strip off REQ_HARDBARRIER,
- * unless it is the first or last bio */
- if (bios && bios->bi_next)
- bios->bi_rw &= ~REQ_HARDBARRIER;
} while (bios);
maybe_kick_lo(mdev);
return 0;
return -ENOMEM;
}
-/**
- * w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
- * @mdev: DRBD device.
- * @w: work object.
- * @cancel: The connection will be closed anyways (unused in this callback)
- */
-int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __releases(local)
-{
- struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
- /* We leave DE_CONTAINS_A_BARRIER and EE_IS_BARRIER in place,
- (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
- so that we can finish that epoch in drbd_may_finish_epoch().
- That is necessary if we already have a long chain of Epochs, before
- we realize that REQ_HARDBARRIER is actually not supported */
-
- /* As long as the -ENOTSUPP on the barrier is reported immediately
- that will never trigger. If it is reported late, we will just
- print that warning and continue correctly for all future requests
- with WO_bdev_flush */
- if (previous_epoch(mdev, e->epoch))
- dev_warn(DEV, "Write ordering was not enforced (one time event)\n");
-
- /* we still have a local reference,
- * get_ldev was done in receive_Data. */
-
- e->w.cb = e_end_block;
- if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_DT_WR) != 0) {
- /* drbd_submit_ee fails for one reason only:
- * if was not able to allocate sufficient bios.
- * requeue, try again later. */
- e->w.cb = w_e_reissue;
- drbd_queue_work(&mdev->data.work, &e->w);
- }
- return 1;
-}
-
static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
{
- int rv, issue_flush;
+ int rv;
struct p_barrier *p = &mdev->data.rbuf.barrier;
struct drbd_epoch *epoch;
* Therefore we must send the barrier_ack after the barrier request was
* completed. */
switch (mdev->write_ordering) {
- case WO_bio_barrier:
case WO_none:
if (rv == FE_RECYCLED)
return TRUE;
- break;
+
+ /* receiver context, in the writeout path of the other node.
+ * avoid potential distributed deadlock */
+ epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
+ if (epoch)
+ break;
+ else
+ dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
+ /* Fall through */
case WO_bdev_flush:
case WO_drain_io:
- if (rv == FE_STILL_LIVE) {
- set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
- drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
- rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
- }
- if (rv == FE_RECYCLED)
- return TRUE;
-
- /* The asender will send all the ACKs and barrier ACKs out, since
- all EEs moved from the active_ee to the done_ee. We need to
- provide a new epoch object for the EEs that come in soon */
- break;
- }
-
- /* receiver context, in the writeout path of the other node.
- * avoid potential distributed deadlock */
- epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
- if (!epoch) {
- dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
- issue_flush = !test_and_set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &mdev->current_epoch->flags);
drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
- if (issue_flush) {
- rv = drbd_flush_after_epoch(mdev, mdev->current_epoch);
- if (rv == FE_RECYCLED)
- return TRUE;
+ drbd_flush(mdev);
+
+ if (atomic_read(&mdev->current_epoch->epoch_size)) {
+ epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
+ if (epoch)
+ break;
}
- drbd_wait_ee_list_empty(mdev, &mdev->done_ee);
+ epoch = mdev->current_epoch;
+ wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
+
+ D_ASSERT(atomic_read(&epoch->active) == 0);
+ D_ASSERT(epoch->flags == 0);
return TRUE;
+ default:
+ dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
+ return FALSE;
}
epoch->flags = 0;
data_size -= rr;
}
kunmap(page);
- drbd_pp_free(mdev, page);
+ drbd_pp_free(mdev, page, 0);
return rv;
}
if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0)
return TRUE;
+ /* drbd_submit_ee currently fails for one reason only:
+ * not being able to allocate enough bios.
+ * Is dropping the connection going to help? */
+ spin_lock_irq(&mdev->req_lock);
+ list_del(&e->w.list);
+ spin_unlock_irq(&mdev->req_lock);
+
drbd_free_ee(mdev, e);
fail:
put_ldev(mdev);
ok = drbd_drain_block(mdev, data_size);
- drbd_send_ack_dp(mdev, P_NEG_ACK, p);
+ drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
}
atomic_add(data_size >> 9, &mdev->rs_sect_in);
{
struct drbd_epoch_entry *e = (struct drbd_epoch_entry *)w;
sector_t sector = e->sector;
- struct drbd_epoch *epoch;
int ok = 1, pcmd;
- if (e->flags & EE_IS_BARRIER) {
- epoch = previous_epoch(mdev, e->epoch);
- if (epoch)
- drbd_may_finish_epoch(mdev, epoch, EV_BARRIER_DONE + (cancel ? EV_CLEANUP : 0));
- }
-
if (mdev->net_conf->wire_protocol == DRBD_PROT_C) {
if (likely((e->flags & EE_WAS_ERROR) == 0)) {
pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
return ret;
}
+static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
+{
+ if (mdev->agreed_pro_version >= 95)
+ return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
+ (dpf & DP_UNPLUG ? REQ_UNPLUG : 0) |
+ (dpf & DP_FUA ? REQ_FUA : 0) |
+ (dpf & DP_FLUSH ? REQ_FUA : 0) |
+ (dpf & DP_DISCARD ? REQ_DISCARD : 0);
+ else
+ return dpf & DP_RW_SYNC ? (REQ_SYNC | REQ_UNPLUG) : 0;
+}
+
/* mirrored write */
static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
{
mdev->peer_seq++;
spin_unlock(&mdev->peer_seq_lock);
- drbd_send_ack_dp(mdev, P_NEG_ACK, p);
+ drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
atomic_inc(&mdev->current_epoch->epoch_size);
return drbd_drain_block(mdev, data_size);
}
e->epoch = mdev->current_epoch;
atomic_inc(&e->epoch->epoch_size);
atomic_inc(&e->epoch->active);
-
- if (mdev->write_ordering == WO_bio_barrier && atomic_read(&e->epoch->epoch_size) == 1) {
- struct drbd_epoch *epoch;
- /* Issue a barrier if we start a new epoch, and the previous epoch
- was not a epoch containing a single request which already was
- a Barrier. */
- epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
- if (epoch == e->epoch) {
- set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
- rw |= REQ_HARDBARRIER;
- e->flags |= EE_IS_BARRIER;
- } else {
- if (atomic_read(&epoch->epoch_size) > 1 ||
- !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
- set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
- set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
- rw |= REQ_HARDBARRIER;
- e->flags |= EE_IS_BARRIER;
- }
- }
- }
spin_unlock(&mdev->epoch_lock);
dp_flags = be32_to_cpu(p->dp_flags);
- if (dp_flags & DP_HARDBARRIER) {
- dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n");
- /* rw |= REQ_HARDBARRIER; */
- }
- if (dp_flags & DP_RW_SYNC)
- rw |= REQ_SYNC | REQ_UNPLUG;
+ rw |= write_flags_to_bio(mdev, dp_flags);
+
if (dp_flags & DP_MAY_SET_IN_SYNC)
e->flags |= EE_MAY_SET_IN_SYNC;
break;
}
- if (mdev->state.pdsk == D_DISKLESS) {
+ if (mdev->state.pdsk < D_INCONSISTENT) {
/* In case we have the only disk of the cluster, */
drbd_set_out_of_sync(mdev, e->sector, e->size);
e->flags |= EE_CALL_AL_COMPLETE_IO;
+ e->flags &= ~EE_MAY_SET_IN_SYNC;
drbd_al_begin_io(mdev, e->sector);
}
if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0)
return TRUE;
+ /* drbd_submit_ee currently fails for one reason only:
+ * not being able to allocate enough bios.
+ * Is dropping the connection going to help? */
+ spin_lock_irq(&mdev->req_lock);
+ list_del(&e->w.list);
+ hlist_del_init(&e->colision);
+ spin_unlock_irq(&mdev->req_lock);
+ if (e->flags & EE_CALL_AL_COMPLETE_IO)
+ drbd_al_complete_io(mdev, e->sector);
+
out_interrupted:
/* yes, the epoch_size now is imbalanced.
* but we drop the connection anyways, so we don't have a chance to
const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
struct drbd_epoch_entry *e;
struct digest_info *di = NULL;
- int size;
+ int size, verb;
unsigned int fault_type;
struct p_block_req *p = &mdev->data.rbuf.block_req;
}
if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
- if (__ratelimit(&drbd_ratelimit_state))
+ verb = 1;
+ switch (cmd) {
+ case P_DATA_REQUEST:
+ drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
+ break;
+ case P_RS_DATA_REQUEST:
+ case P_CSUM_RS_REQUEST:
+ case P_OV_REQUEST:
+ drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
+ break;
+ case P_OV_REPLY:
+ verb = 0;
+ dec_rs_pending(mdev);
+ drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
+ break;
+ default:
+ dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
+ cmdname(cmd));
+ }
+ if (verb && __ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Can not satisfy peer's read request, "
"no local data.\n");
- drbd_send_ack_rp(mdev, cmd == P_DATA_REQUEST ? P_NEG_DREPLY :
- P_NEG_RS_DREPLY , p);
- return TRUE;
+
+ /* drain possibly payload */
+ return drbd_drain_block(mdev, digest_size);
}
/* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
break;
case P_OV_REQUEST:
- if (mdev->state.conn >= C_CONNECTED &&
- mdev->state.conn != C_VERIFY_T)
- dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n",
- drbd_conn_str(mdev->state.conn));
if (mdev->ov_start_sector == ~(sector_t)0 &&
mdev->agreed_pro_version >= 90) {
mdev->ov_start_sector = sector;
if (drbd_submit_ee(mdev, e, READ, fault_type) == 0)
return TRUE;
+ /* drbd_submit_ee currently fails for one reason only:
+ * not being able to allocate enough bios.
+ * Is dropping the connection going to help? */
+ spin_lock_irq(&mdev->req_lock);
+ list_del(&e->w.list);
+ spin_unlock_irq(&mdev->req_lock);
+ /* no drbd_rs_complete_io(), we are dropping the connection anyways */
+
out_free_e:
put_ldev(mdev);
drbd_free_ee(mdev, e);
if (mdev->agreed_pro_version < 94)
max_seg_s = be32_to_cpu(p->max_segment_size);
+ else if (mdev->agreed_pro_version == 94)
+ max_seg_s = DRBD_MAX_SIZE_H80_PACKET;
else /* drbd 8.3.8 onwards */
max_seg_s = DRBD_MAX_SEGMENT_SIZE;
static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size)
{
struct p_state *p = &mdev->data.rbuf.state;
- enum drbd_conns nconn, oconn;
- union drbd_state ns, peer_state;
+ union drbd_state os, ns, peer_state;
enum drbd_disk_state real_peer_disk;
enum chg_state_flags cs_flags;
int rv;
spin_lock_irq(&mdev->req_lock);
retry:
- oconn = nconn = mdev->state.conn;
+ os = ns = mdev->state;
spin_unlock_irq(&mdev->req_lock);
- if (nconn == C_WF_REPORT_PARAMS)
- nconn = C_CONNECTED;
+ /* peer says his disk is uptodate, while we think it is inconsistent,
+ * and this happens while we think we have a sync going on. */
+ if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
+ os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
+ /* If we are (becoming) SyncSource, but peer is still in sync
+ * preparation, ignore its uptodate-ness to avoid flapping, it
+ * will change to inconsistent once the peer reaches active
+ * syncing states.
+ * It may have changed syncer-paused flags, however, so we
+ * cannot ignore this completely. */
+ if (peer_state.conn > C_CONNECTED &&
+ peer_state.conn < C_SYNC_SOURCE)
+ real_peer_disk = D_INCONSISTENT;
+
+ /* if peer_state changes to connected at the same time,
+ * it explicitly notifies us that it finished resync.
+ * Maybe we should finish it up, too? */
+ else if (os.conn >= C_SYNC_SOURCE &&
+ peer_state.conn == C_CONNECTED) {
+ if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
+ drbd_resync_finished(mdev);
+ return TRUE;
+ }
+ }
+
+ /* peer says his disk is inconsistent, while we think it is uptodate,
+ * and this happens while the peer still thinks we have a sync going on,
+ * but we think we are already done with the sync.
+ * We ignore this to avoid flapping pdsk.
+ * This should not happen, if the peer is a recent version of drbd. */
+ if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
+ os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
+ real_peer_disk = D_UP_TO_DATE;
+
+ if (ns.conn == C_WF_REPORT_PARAMS)
+ ns.conn = C_CONNECTED;
if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
get_ldev_if_state(mdev, D_NEGOTIATING)) {
int cr; /* consider resync */
/* if we established a new connection */
- cr = (oconn < C_CONNECTED);
+ cr = (os.conn < C_CONNECTED);
/* if we had an established connection
* and one of the nodes newly attaches a disk */
- cr |= (oconn == C_CONNECTED &&
+ cr |= (os.conn == C_CONNECTED &&
(peer_state.disk == D_NEGOTIATING ||
- mdev->state.disk == D_NEGOTIATING));
+ os.disk == D_NEGOTIATING));
/* if we have both been inconsistent, and the peer has been
* forced to be UpToDate with --overwrite-data */
cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
/* if we had been plain connected, and the admin requested to
* start a sync by "invalidate" or "invalidate-remote" */
- cr |= (oconn == C_CONNECTED &&
+ cr |= (os.conn == C_CONNECTED &&
(peer_state.conn >= C_STARTING_SYNC_S &&
peer_state.conn <= C_WF_BITMAP_T));
if (cr)
- nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
+ ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
put_ldev(mdev);
- if (nconn == C_MASK) {
- nconn = C_CONNECTED;
+ if (ns.conn == C_MASK) {
+ ns.conn = C_CONNECTED;
if (mdev->state.disk == D_NEGOTIATING) {
- drbd_force_state(mdev, NS(disk, D_DISKLESS));
+ drbd_force_state(mdev, NS(disk, D_FAILED));
} else if (peer_state.disk == D_NEGOTIATING) {
dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
peer_state.disk = D_DISKLESS;
} else {
if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
return FALSE;
- D_ASSERT(oconn == C_WF_REPORT_PARAMS);
+ D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
return FALSE;
}
}
spin_lock_irq(&mdev->req_lock);
- if (mdev->state.conn != oconn)
+ if (mdev->state.i != os.i)
goto retry;
clear_bit(CONSIDER_RESYNC, &mdev->flags);
- ns.i = mdev->state.i;
- ns.conn = nconn;
ns.peer = peer_state.role;
ns.pdsk = real_peer_disk;
ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
- if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
+ if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
ns.disk = mdev->new_state_tmp.disk;
- cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD);
- if (ns.pdsk == D_CONSISTENT && ns.susp && nconn == C_CONNECTED && oconn < C_CONNECTED &&
+ cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
+ if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
test_bit(NEW_CUR_UUID, &mdev->flags)) {
/* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this
for temporal network outages! */
return FALSE;
}
- if (oconn > C_WF_REPORT_PARAMS) {
- if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
+ if (os.conn > C_WF_REPORT_PARAMS) {
+ if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
peer_state.disk != D_NEGOTIATING ) {
/* we want resync, peer has not yet decided to sync... */
/* Nowadays only used when forcing a node into primary role and
u64 tmp;
unsigned long s = c->bit_offset;
unsigned long e;
- int len = p->head.length - (sizeof(*p) - sizeof(p->head));
+ int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head));
int toggle = DCBP_get_start(p);
int have;
int bits;
memcpy(p, h, sizeof(*h));
if (drbd_recv(mdev, p->head.payload, data_size) != data_size)
goto out;
- if (p->head.length <= (sizeof(*p) - sizeof(p->head))) {
- dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length);
+ if (data_size <= (sizeof(*p) - sizeof(p->head))) {
+ dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
return FAILED;
}
ret = decode_bitmap_c(mdev, p, &c);
err_out:
drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
}
+ /* If we leave here, we probably want to update at least the
+ * "Connected" indicator on stable storage. Do so explicitly here. */
+ drbd_md_sync(mdev);
}
void drbd_flush_workqueue(struct drbd_conf *mdev)
/* make sure syncer is stopped and w_resume_next_sg queued */
del_timer_sync(&mdev->resync_timer);
- set_bit(STOP_SYNC_TIMER, &mdev->flags);
resync_timer_fn((unsigned long)mdev);
/* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
kfree(mdev->p_uuid);
mdev->p_uuid = NULL;
- if (!mdev->state.susp)
+ if (!is_susp(mdev->state))
tl_clear(mdev);
dev_info(DEV, "Connection closed\n");
if (os.conn == C_DISCONNECTING) {
wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0);
- if (!mdev->state.susp) {
+ if (!is_susp(mdev->state)) {
/* we must not free the tl_hash
* while application io is still on the fly */
wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
i = drbd_release_ee(mdev, &mdev->net_ee);
if (i)
dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
+ i = atomic_read(&mdev->pp_in_use_by_net);
+ if (i)
+ dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
i = atomic_read(&mdev->pp_in_use);
if (i)
dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
update_peer_seq(mdev, be32_to_cpu(p->seq_num));
- drbd_rs_complete_io(mdev, sector);
- drbd_set_in_sync(mdev, sector, blksize);
- /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
- mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
+ if (get_ldev(mdev)) {
+ drbd_rs_complete_io(mdev, sector);
+ drbd_set_in_sync(mdev, sector, blksize);
+ /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
+ mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
+ put_ldev(mdev);
+ }
dec_rs_pending(mdev);
atomic_add(blksize >> 9, &mdev->rs_sect_in);
else
ov_oos_print(mdev);
+ if (!get_ldev(mdev))
+ return TRUE;
+
drbd_rs_complete_io(mdev, sector);
dec_rs_pending(mdev);
drbd_resync_finished(mdev);
}
}
+ put_ldev(mdev);
return TRUE;
}
while (1) {
clear_bit(SIGNAL_ASENDER, &mdev->flags);
flush_signals(current);
- if (!drbd_process_done_ee(mdev)) {
- dev_err(DEV, "process_done_ee() = NOT_OK\n");
+ if (!drbd_process_done_ee(mdev))
goto reconnect;
- }
/* to avoid race with newly queued ACKs */
set_bit(SIGNAL_ASENDER, &mdev->flags);
spin_lock_irq(&mdev->req_lock);
if (received == expect && cmd == NULL) {
if (unlikely(h->magic != BE_DRBD_MAGIC)) {
- dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n",
- (long)be32_to_cpu(h->magic),
- h->command, h->length);
+ dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n",
+ be32_to_cpu(h->magic),
+ be16_to_cpu(h->command),
+ be16_to_cpu(h->length));
goto reconnect;
}
cmd = get_asender_cmd(be16_to_cpu(h->command));
len = be16_to_cpu(h->length);
if (unlikely(cmd == NULL)) {
- dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n",
- (long)be32_to_cpu(h->magic),
- h->command, h->length);
+ dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n",
+ be32_to_cpu(h->magic),
+ be16_to_cpu(h->command),
+ be16_to_cpu(h->length));
goto disconnect;
}
expect = cmd->pkt_size;
if (0) {
reconnect:
drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
+ drbd_md_sync(mdev);
}
if (0) {
disconnect:
drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
+ drbd_md_sync(mdev);
}
clear_bit(SIGNAL_ASENDER, &mdev->flags);