ceph: fix flushing of caps vs cap import
authorSage Weil <sage@newdream.net>
Tue, 18 Jan 2011 16:56:01 +0000 (08:56 -0800)
committerSage Weil <sage@newdream.net>
Wed, 19 Jan 2011 17:23:25 +0000 (09:23 -0800)
If we are mid-flush and a cap is migrated to another node, we need to
resend the cap flush message to the new MDS, and do so with the original
flush_seq to avoid leaking across a sync boundary.  Previously we didn't
redo the flush (we only flushed newly dirty data), which would cause a
later sync to hang forever.

Signed-off-by: Sage Weil <sage@newdream.net>
fs/ceph/caps.c

index f654c7e..7def3f5 100644 (file)
@@ -1560,9 +1560,10 @@ retry_locked:
                /* NOTE: no side-effects allowed, until we take s_mutex */
 
                revoking = cap->implemented & ~cap->issued;
-               if (revoking)
-                       dout(" mds%d revoking %s\n", cap->mds,
-                            ceph_cap_string(revoking));
+               dout(" mds%d cap %p issued %s implemented %s revoking %s\n",
+                    cap->mds, cap, ceph_cap_string(cap->issued),
+                    ceph_cap_string(cap->implemented),
+                    ceph_cap_string(revoking));
 
                if (cap == ci->i_auth_cap &&
                    (cap->issued & CEPH_CAP_FILE_WR)) {
@@ -1942,6 +1943,35 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
        }
 }
 
+static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
+                                    struct ceph_mds_session *session,
+                                    struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_cap *cap;
+       int delayed = 0;
+
+       spin_lock(&inode->i_lock);
+       cap = ci->i_auth_cap;
+       dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
+            ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
+       __ceph_flush_snaps(ci, &session, 1);
+       if (ci->i_flushing_caps) {
+               delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH,
+                                    __ceph_caps_used(ci),
+                                    __ceph_caps_wanted(ci),
+                                    cap->issued | cap->implemented,
+                                    ci->i_flushing_caps, NULL);
+               if (delayed) {
+                       spin_lock(&inode->i_lock);
+                       __cap_delay_requeue(mdsc, ci);
+                       spin_unlock(&inode->i_lock);
+               }
+       } else {
+               spin_unlock(&inode->i_lock);
+       }
+}
+
 
 /*
  * Take references to capabilities we hold, so that we don't release
@@ -2689,7 +2719,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
        ceph_add_cap(inode, session, cap_id, -1,
                     issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH,
                     NULL /* no caps context */);
-       try_flush_caps(inode, session, NULL);
+       kick_flushing_inode_caps(mdsc, session, inode);
        up_read(&mdsc->snap_rwsem);
 
        /* make sure we re-request max_size, if necessary */