ocfs2/dlm: fix deadlock when dispatch assert master
[pandora-kernel.git] / fs / ocfs2 / dlm / dlmrecovery.c
index 01ebfd0..0e5013e 100644 (file)
@@ -540,7 +540,10 @@ master_here:
                /* success!  see if any other nodes need recovery */
                mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
                     dlm->name, dlm->reco.dead_node, dlm->node_num);
-               dlm_reset_recovery(dlm);
+               spin_lock(&dlm->spinlock);
+               __dlm_reset_recovery(dlm);
+               dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+               spin_unlock(&dlm->spinlock);
        }
        dlm_end_recovery(dlm);
 
@@ -698,6 +701,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
                if (all_nodes_done) {
                        int ret;
 
+                       /* Set this flag on recovery master to avoid
+                        * a new recovery for another dead node start
+                        * before the recovery is not done. That may
+                        * cause recovery hung.*/
+                       spin_lock(&dlm->spinlock);
+                       dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
+                       spin_unlock(&dlm->spinlock);
+
                        /* all nodes are now in DLM_RECO_NODE_DATA_DONE state
                         * just send a finalize message to everyone and
                         * clean up */
@@ -1678,6 +1689,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
        unsigned int hash;
        int master = DLM_LOCK_RES_OWNER_UNKNOWN;
        u32 flags = DLM_ASSERT_MASTER_REQUERY;
+       int dispatched = 0;
 
        if (!dlm_grab(dlm)) {
                /* since the domain has gone away on this
@@ -1699,6 +1711,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
                                mlog_errno(-ENOMEM);
                                /* retry!? */
                                BUG();
+                       } else {
+                               dispatched = 1;
                        }
                } else /* put.. incase we are not the master */
                        dlm_lockres_put(res);
@@ -1706,7 +1720,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
        }
        spin_unlock(&dlm->spinlock);
 
-       dlm_put(dlm);
+       if (!dispatched)
+               dlm_put(dlm);
        return master;
 }
 
@@ -1752,13 +1767,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
                                     struct dlm_migratable_lockres *mres)
 {
        struct dlm_migratable_lock *ml;
-       struct list_head *queue;
+       struct list_head *queue, *iter;
        struct list_head *tmpq = NULL;
        struct dlm_lock *newlock = NULL;
        struct dlm_lockstatus *lksb = NULL;
        int ret = 0;
        int i, j, bad;
-       struct dlm_lock *lock = NULL;
+       struct dlm_lock *lock;
        u8 from = O2NM_MAX_NODES;
        unsigned int added = 0;
        __be64 c;
@@ -1793,14 +1808,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
                        /* MIGRATION ONLY! */
                        BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
 
+                       lock = NULL;
                        spin_lock(&res->spinlock);
                        for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
                                tmpq = dlm_list_idx_to_ptr(res, j);
-                               list_for_each_entry(lock, tmpq, list) {
-                                       if (lock->ml.cookie != ml->cookie)
-                                               lock = NULL;
-                                       else
+                               list_for_each(iter, tmpq) {
+                                       lock = list_entry(iter,
+                                                 struct dlm_lock, list);
+                                       if (lock->ml.cookie == ml->cookie)
                                                break;
+                                       lock = NULL;
                                }
                                if (lock)
                                        break;
@@ -2870,8 +2887,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
                                BUG();
                        }
                        dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+                       __dlm_reset_recovery(dlm);
                        spin_unlock(&dlm->spinlock);
-                       dlm_reset_recovery(dlm);
                        dlm_kick_recovery_thread(dlm);
                        break;
                default: