ceph: take i_mutex before getting Fw cap
authorYan, Zheng <zheng.z.yan@intel.com>
Fri, 12 Apr 2013 08:11:10 +0000 (16:11 +0800)
committerSage Weil <sage@inktank.com>
Thu, 2 May 2013 04:18:53 +0000 (21:18 -0700)
There is deadlock as illustrated bellow. The fix is taking i_mutex
before getting Fw cap reference.

      write                    truncate                 MDS
---------------------     --------------------      --------------
get Fw cap
                          lock i_mutex
lock i_mutex (blocked)
                          request setattr.size  ->
                                                <-   revoke Fw cap

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: Alex Elder <elder@inktank.com>
Reviewed-by: Sage Weil <sage@inktank.com>
fs/ceph/caps.c
fs/ceph/file.c

index f956310..da0f9b8 100644 (file)
@@ -2052,6 +2052,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
                goto out;
        }
 
+       /* finish pending truncate */
+       while (ci->i_truncate_pending) {
+               spin_unlock(&ci->i_ceph_lock);
+               __ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR));
+               spin_lock(&ci->i_ceph_lock);
+       }
+
        if (need & CEPH_CAP_FILE_WR) {
                if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
                        dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
@@ -2073,12 +2080,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
        }
        have = __ceph_caps_issued(ci, &implemented);
 
-       /*
-        * disallow writes while a truncate is pending
-        */
-       if (ci->i_truncate_pending)
-               have &= ~CEPH_CAP_FILE_WR;
-
        if ((have & need) == need) {
                /*
                 * Look at (implemented & ~have & not) so that we keep waiting
index a65acf3..dd44f35 100644 (file)
@@ -651,7 +651,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
        dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
             inode, ceph_vinop(inode), pos, (unsigned)len, inode);
 again:
-       __ceph_do_pending_vmtruncate(inode, true);
        if (fi->fmode & CEPH_FILE_MODE_LAZY)
                want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
        else
@@ -728,7 +727,7 @@ retry_snap:
                ret = -ENOSPC;
                goto out;
        }
-       __ceph_do_pending_vmtruncate(inode, true);
+       mutex_lock(&inode->i_mutex);
        dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
             inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
             inode->i_size);
@@ -737,8 +736,10 @@ retry_snap:
        else
                want = CEPH_CAP_FILE_BUFFER;
        ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
-       if (ret < 0)
-               goto out_put;
+       if (ret < 0) {
+               mutex_unlock(&inode->i_mutex);
+               goto out;
+       }
 
        dout("aio_write %p %llx.%llx %llu~%u  got cap refs on %s\n",
             inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
@@ -748,10 +749,10 @@ retry_snap:
            (iocb->ki_filp->f_flags & O_DIRECT) ||
            (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
            (fi->flags & CEPH_F_SYNC)) {
+               mutex_unlock(&inode->i_mutex);
                ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
                        &iocb->ki_pos);
        } else {
-               mutex_lock(&inode->i_mutex);
                ret = __generic_file_aio_write(iocb, iov, nr_segs,
                                               &iocb->ki_pos);
                mutex_unlock(&inode->i_mutex);
@@ -766,7 +767,6 @@ retry_snap:
                        __mark_inode_dirty(inode, dirty);
        }
 
-out_put:
        dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
             inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
             ceph_cap_string(got));