ocfs2/dlm: fix deadlock when dispatch assert master
authorJoseph Qi <joseph.qi@huawei.com>
Tue, 22 Sep 2015 21:59:20 +0000 (14:59 -0700)
committerBen Hutchings <ben@decadent.org.uk>
Tue, 13 Oct 2015 02:46:11 +0000 (03:46 +0100)
commit 012572d4fc2e4ddd5c8ec8614d51414ec6cae02a upstream.

The order of the following three spinlocks should be:
dlm_domain_lock < dlm_ctxt->spinlock < dlm_lock_resource->spinlock

But dlm_dispatch_assert_master() is called while holding
dlm_ctxt->spinlock and dlm_lock_resource->spinlock, and then it calls
dlm_grab() which will take dlm_domain_lock.

Once another thread (for example, dlm_query_join_handler) has already
taken dlm_domain_lock, and tries to take dlm_ctxt->spinlock deadlock
happens.

Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: "Junxiao Bi" <junxiao.bi@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[bwh: Backported to 3.2: adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/dlm/dlmrecovery.c

index 7ba6ac1..8e48ba5 100644 (file)
@@ -1411,6 +1411,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
        int found, ret;
        int set_maybe;
        int dispatch_assert = 0;
        int found, ret;
        int set_maybe;
        int dispatch_assert = 0;
+       int dispatched = 0;
 
        if (!dlm_grab(dlm))
                return DLM_MASTER_RESP_NO;
 
        if (!dlm_grab(dlm))
                return DLM_MASTER_RESP_NO;
@@ -1617,13 +1618,16 @@ send_response:
                        mlog(ML_ERROR, "failed to dispatch assert master work\n");
                        response = DLM_MASTER_RESP_ERROR;
                        dlm_lockres_put(res);
                        mlog(ML_ERROR, "failed to dispatch assert master work\n");
                        response = DLM_MASTER_RESP_ERROR;
                        dlm_lockres_put(res);
+               } else {
+                       dispatched = 1;
                }
        } else {
                if (res)
                        dlm_lockres_put(res);
        }
 
                }
        } else {
                if (res)
                        dlm_lockres_put(res);
        }
 
-       dlm_put(dlm);
+       if (!dispatched)
+               dlm_put(dlm);
        return response;
 }
 
        return response;
 }
 
@@ -2041,7 +2045,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
 
 
        /* queue up work for dlm_assert_master_worker */
 
 
        /* queue up work for dlm_assert_master_worker */
-       dlm_grab(dlm);  /* get an extra ref for the work item */
        dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL);
        item->u.am.lockres = res; /* already have a ref */
        /* can optionally ignore node numbers higher than this node */
        dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL);
        item->u.am.lockres = res; /* already have a ref */
        /* can optionally ignore node numbers higher than this node */
index d15b071..0e5013e 100644 (file)
@@ -1689,6 +1689,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
        unsigned int hash;
        int master = DLM_LOCK_RES_OWNER_UNKNOWN;
        u32 flags = DLM_ASSERT_MASTER_REQUERY;
        unsigned int hash;
        int master = DLM_LOCK_RES_OWNER_UNKNOWN;
        u32 flags = DLM_ASSERT_MASTER_REQUERY;
+       int dispatched = 0;
 
        if (!dlm_grab(dlm)) {
                /* since the domain has gone away on this
 
        if (!dlm_grab(dlm)) {
                /* since the domain has gone away on this
@@ -1710,6 +1711,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
                                mlog_errno(-ENOMEM);
                                /* retry!? */
                                BUG();
                                mlog_errno(-ENOMEM);
                                /* retry!? */
                                BUG();
+                       } else {
+                               dispatched = 1;
                        }
                } else /* put.. incase we are not the master */
                        dlm_lockres_put(res);
                        }
                } else /* put.. incase we are not the master */
                        dlm_lockres_put(res);
@@ -1717,7 +1720,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
        }
        spin_unlock(&dlm->spinlock);
 
        }
        spin_unlock(&dlm->spinlock);
 
-       dlm_put(dlm);
+       if (!dispatched)
+               dlm_put(dlm);
        return master;
 }
 
        return master;
 }