RDMA/cma: Fix deadlock destroying listen requests
authorSean Hefty <sean.hefty@intel.com>
Tue, 9 Oct 2007 18:12:34 +0000 (11:12 -0700)
committerRoland Dreier <rolandd@cisco.com>
Tue, 16 Oct 2007 19:25:49 +0000 (12:25 -0700)
Deadlock condition reported by Kanoj Sarcar <kanoj@netxen.com>.
The deadlock occurs when a connection request arrives at the same
time that a wildcard listen is being destroyed.

A wildcard listen maintains per device listen requests for each
RDMA device in the system.  The per device listens are automatically
added and removed when RDMA devices are inserted or removed from
the system.

When a wildcard listen is destroyed, rdma_destroy_id() acquires
the rdma_cm's device mutex ('lock') to protect against hot-plug
events adding or removing per device listens.  It then tries to
destroy the per device listens by calling ib_destroy_cm_id() or
iw_destroy_cm_id().  It does this while holding the device mutex.

However, if the underlying iw/ib CM reports a connection request
while this is occurring, the rdma_cm callback function will try
to acquire the same device mutex.  Since we're in a callback,
the ib_destroy_cm_id() or iw_destroy_cm_id() calls will block until
their callback thread returns, but the callback is blocked waiting for
the device mutex.

Fix this by re-working how per device listens are destroyed.  Use
rdma_destroy_id(), which avoids the deadlock, in place of
cma_destroy_listen().  Additional synchronization is added to handle
device hot-plug events and ensure that the id is not destroyed twice.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/core/cma.c

index 01ae052..ee946cc 100644 (file)
@@ -114,11 +114,12 @@ struct rdma_id_private {
 
        struct rdma_bind_list   *bind_list;
        struct hlist_node       node;
-       struct list_head        list;
-       struct list_head        listen_list;
+       struct list_head        list; /* listen_any_list or cma_device.list */
+       struct list_head        listen_list; /* per device listens */
        struct cma_device       *cma_dev;
        struct list_head        mc_list;
 
+       int                     internal_id;
        enum cma_state          state;
        spinlock_t              lock;
        struct mutex            qp_mutex;
@@ -745,50 +746,27 @@ static void cma_cancel_route(struct rdma_id_private *id_priv)
        }
 }
 
-static inline int cma_internal_listen(struct rdma_id_private *id_priv)
-{
-       return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
-              cma_any_addr(&id_priv->id.route.addr.src_addr);
-}
-
-static void cma_destroy_listen(struct rdma_id_private *id_priv)
-{
-       cma_exch(id_priv, CMA_DESTROYING);
-
-       if (id_priv->cma_dev) {
-               switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
-               case RDMA_TRANSPORT_IB:
-                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
-                               ib_destroy_cm_id(id_priv->cm_id.ib);
-                       break;
-               case RDMA_TRANSPORT_IWARP:
-                       if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
-                               iw_destroy_cm_id(id_priv->cm_id.iw);
-                       break;
-               default:
-                       break;
-               }
-               cma_detach_from_dev(id_priv);
-       }
-       list_del(&id_priv->listen_list);
-
-       cma_deref_id(id_priv);
-       wait_for_completion(&id_priv->comp);
-
-       kfree(id_priv);
-}
-
 static void cma_cancel_listens(struct rdma_id_private *id_priv)
 {
        struct rdma_id_private *dev_id_priv;
 
+       /*
+        * Remove from listen_any_list to prevent added devices from spawning
+        * additional listen requests.
+        */
        mutex_lock(&lock);
        list_del(&id_priv->list);
 
        while (!list_empty(&id_priv->listen_list)) {
                dev_id_priv = list_entry(id_priv->listen_list.next,
                                         struct rdma_id_private, listen_list);
-               cma_destroy_listen(dev_id_priv);
+               /* sync with device removal to avoid duplicate destruction */
+               list_del_init(&dev_id_priv->list);
+               list_del(&dev_id_priv->listen_list);
+               mutex_unlock(&lock);
+
+               rdma_destroy_id(&dev_id_priv->id);
+               mutex_lock(&lock);
        }
        mutex_unlock(&lock);
 }
@@ -876,6 +854,9 @@ void rdma_destroy_id(struct rdma_cm_id *id)
        cma_deref_id(id_priv);
        wait_for_completion(&id_priv->comp);
 
+       if (id_priv->internal_id)
+               cma_deref_id(id_priv->id.context);
+
        kfree(id_priv->id.route.path_rec);
        kfree(id_priv);
 }
@@ -1432,14 +1413,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 
        cma_attach_to_dev(dev_id_priv, cma_dev);
        list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
+       atomic_inc(&id_priv->refcount);
+       dev_id_priv->internal_id = 1;
 
        ret = rdma_listen(id, id_priv->backlog);
        if (ret)
-               goto err;
-
-       return;
-err:
-       cma_destroy_listen(dev_id_priv);
+               printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
+                      "listening on device %s", ret, cma_dev->device->name);
 }
 
 static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -2787,16 +2767,12 @@ static void cma_process_remove(struct cma_device *cma_dev)
                id_priv = list_entry(cma_dev->id_list.next,
                                     struct rdma_id_private, list);
 
-               if (cma_internal_listen(id_priv)) {
-                       cma_destroy_listen(id_priv);
-                       continue;
-               }
-
+               list_del(&id_priv->listen_list);
                list_del_init(&id_priv->list);
                atomic_inc(&id_priv->refcount);
                mutex_unlock(&lock);
 
-               ret = cma_remove_id_dev(id_priv);
+               ret = id_priv->internal_id ? 1 : cma_remove_id_dev(id_priv);
                cma_deref_id(id_priv);
                if (ret)
                        rdma_destroy_id(&id_priv->id);