From: Sage Weil Date: Wed, 14 Oct 2009 21:27:38 +0000 (-0700) Subject: ceph: flush dirty caps via the cap_dirty list X-Git-Tag: v2.6.34-rc2~9^2~164 X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=afcdaea3f2a78ce4873bd7e98a6d603bda23d167;p=pandora-kernel.git ceph: flush dirty caps via the cap_dirty list Previously we were flushing dirty caps by passing an extra flag when traversing the delayed caps list. Besides being a bit ugly, that can also miss caps that are dirty but didn't result in a cap requeue: notably, mark_caps_dirty(). Separate the flushing into a separate helper, and traverse the cap_dirty list. This also brings i_dirty_item in line with i_dirty_caps: we are on the list IFF caps != 0. We carry an inode ref IFF dirty_caps|flushing_caps != 0. Lose the unused return value from __ceph_mark_caps_dirty(). Signed-off-by: Sage Weil --- diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 111439d883d2..40b8d3471244 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -997,7 +997,7 @@ void ceph_queue_caps_release(struct inode *inode) if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { dout(" release msg %p full\n", msg); list_move_tail(&msg->list_head, - &session->s_cap_releases_done); + &session->s_cap_releases_done); } else { dout(" release msg %p at %d/%d (%d)\n", msg, (int)le32_to_cpu(head->num), @@ -1292,14 +1292,20 @@ static int __mark_caps_flushing(struct inode *inode, ceph_cap_string(ci->i_flushing_caps | flushing)); ci->i_flushing_caps |= flushing; ci->i_dirty_caps = 0; + dout(" inode %p now !dirty\n", inode); spin_lock(&mdsc->cap_dirty_lock); + list_del_init(&ci->i_dirty_item); + + ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; if (list_empty(&ci->i_flushing_item)) { - list_del_init(&ci->i_dirty_item); list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing); mdsc->num_cap_flushing++; - ci->i_cap_flush_seq = ++mdsc->cap_flush_seq; - dout(" inode %p now flushing seq %lld\n", &ci->vfs_inode, + dout(" inode %p now flushing seq %lld\n", inode, + ci->i_cap_flush_seq); + } else { + list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing); + dout(" inode %p now flushing (more) seq %lld\n", inode, ci->i_cap_flush_seq); } spin_unlock(&mdsc->cap_dirty_lock); @@ -1555,32 +1561,33 @@ ack: * Mark caps dirty. If inode is newly dirty, add to the global dirty * list. */ -int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) +void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) { struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; struct inode *inode = &ci->vfs_inode; - int was = __ceph_caps_dirty(ci); + int was_dirty = ci->i_dirty_caps; int dirty = 0; dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode, ceph_cap_string(mask), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_dirty_caps | mask)); ci->i_dirty_caps |= mask; - if (!was) { + if (!was_dirty) { dout(" inode %p now dirty\n", &ci->vfs_inode); spin_lock(&mdsc->cap_dirty_lock); list_add(&ci->i_dirty_item, &mdsc->cap_dirty); spin_unlock(&mdsc->cap_dirty_lock); - igrab(inode); - dirty |= I_DIRTY_SYNC; + if (ci->i_flushing_caps == 0) { + igrab(inode); + dirty |= I_DIRTY_SYNC; + } } - if ((was & CEPH_CAP_FILE_BUFFER) && + if (((was_dirty | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) && (mask & CEPH_CAP_FILE_BUFFER)) dirty |= I_DIRTY_DATASYNC; if (dirty) __mark_inode_dirty(inode, dirty); __cap_delay_requeue(mdsc, ci); - return was; } /* @@ -2327,7 +2334,7 @@ static void handle_cap_flush_ack(struct inode *inode, int dirty = le32_to_cpu(m->dirty); int cleaned = 0; u64 flush_tid = le64_to_cpu(m->client_tid); - int old_dirty = 0, new_dirty = 0; + int drop = 0; int i; for (i = 0; i < CEPH_CAP_BITS; i++) @@ -2344,9 +2351,7 @@ static void handle_cap_flush_ack(struct inode *inode, if (ci->i_flushing_caps == (ci->i_flushing_caps & ~cleaned)) goto out; - old_dirty = ci->i_dirty_caps | ci->i_flushing_caps; ci->i_flushing_caps &= ~cleaned; - new_dirty = ci->i_dirty_caps | ci->i_flushing_caps; spin_lock(&mdsc->cap_dirty_lock); if (ci->i_flushing_caps == 0) { @@ -2360,17 +2365,19 @@ static void handle_cap_flush_ack(struct inode *inode, mdsc->num_cap_flushing--; wake_up(&mdsc->cap_flushing_wq); dout(" inode %p now !flushing\n", inode); - } - if (old_dirty && !new_dirty) { - dout(" inode %p now clean\n", inode); - list_del_init(&ci->i_dirty_item); + + if (ci->i_dirty_caps == 0) { + dout(" inode %p now clean\n", inode); + BUG_ON(!list_empty(&ci->i_dirty_item)); + drop = 1; + } } spin_unlock(&mdsc->cap_dirty_lock); wake_up(&ci->i_cap_wq); out: spin_unlock(&inode->i_lock); - if (old_dirty && !new_dirty) + if (drop) iput(inode); } @@ -2676,14 +2683,11 @@ bad: /* * Delayed work handler to process end of delayed cap release LRU list. */ -void ceph_check_delayed_caps(struct ceph_mds_client *mdsc, int flushdirty) +void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) { struct ceph_inode_info *ci; int flags = CHECK_CAPS_NODELAY; - if (flushdirty) - flags |= CHECK_CAPS_FLUSH; - dout("check_delayed_caps\n"); while (1) { spin_lock(&mdsc->cap_delay_lock); @@ -2703,6 +2707,32 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc, int flushdirty) spin_unlock(&mdsc->cap_delay_lock); } +/* + * Flush all dirty caps to the mds + */ +void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) +{ + struct ceph_inode_info *ci; + struct inode *inode; + + dout("flush_dirty_caps\n"); + spin_lock(&mdsc->cap_dirty_lock); + while (!list_empty(&mdsc->cap_dirty)) { + ci = list_first_entry(&mdsc->cap_dirty, + struct ceph_inode_info, + i_dirty_item); + inode = igrab(&ci->vfs_inode); + spin_unlock(&mdsc->cap_dirty_lock); + if (inode) { + ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, + NULL); + iput(inode); + } + spin_lock(&mdsc->cap_dirty_lock); + } + spin_unlock(&mdsc->cap_dirty_lock); +} + /* * Drop open file reference. If we were the last open file, * we may need to release capabilities to the MDS (or schedule diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 2b19da31a8b3..12d66c0572ac 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2504,7 +2504,7 @@ static void delayed_work(struct work_struct *work) int renew_caps; dout("mdsc delayed_work\n"); - ceph_check_delayed_caps(mdsc, 0); + ceph_check_delayed_caps(mdsc); mutex_lock(&mdsc->mutex); renew_interval = mdsc->mdsmap->m_session_timeout >> 2; @@ -2627,7 +2627,7 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) mdsc->stopping = 1; drop_leases(mdsc); - ceph_check_delayed_caps(mdsc, 1); + ceph_flush_dirty_caps(mdsc); wait_requests(mdsc); } @@ -2677,7 +2677,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) mutex_unlock(&mdsc->mutex); dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush); - ceph_check_delayed_caps(mdsc, 1); + ceph_flush_dirty_caps(mdsc); wait_unsafe_requests(mdsc, want_tid); wait_event(mdsc->cap_flushing_wq, check_cap_flush(mdsc, want_flush)); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index cfd39ef4023e..0bbf58ab607e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -524,7 +524,7 @@ static inline int __ceph_caps_dirty(struct ceph_inode_info *ci) { return ci->i_dirty_caps | ci->i_flushing_caps; } -extern int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); +extern void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask); extern int ceph_caps_revoking(struct ceph_inode_info *ci, int mask); extern int __ceph_caps_used(struct ceph_inode_info *ci); @@ -814,8 +814,8 @@ extern void __ceph_flush_snaps(struct ceph_inode_info *ci, struct ceph_mds_session **psession); extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_session *session); -extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc, - int flushdirty); +extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); +extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc); extern int ceph_encode_inode_release(void **p, struct inode *inode, int mds, int drop, int unless, int force);