IPoIB: Fix multicast race between canceling and completing
authorMichael S. Tsirkin <mst@mellanox.co.il>
Thu, 2 Mar 2006 19:07:47 +0000 (11:07 -0800)
committerRoland Dreier <rolandd@cisco.com>
Mon, 20 Mar 2006 18:08:20 +0000 (10:08 -0800)
ipoib_mcast_stop_thread currently tests mcast->query and if it is
NULL, does not perform wait_for_completion on the mcast and frees the
mcast object directly.

However, since both operations are done without locking, it is
possible that ipoib_mcast_join_complete is in progress on this mcast
object and has set mcast->query to NULL already.

Solve this by:
- taking priv->lock before we change mcast->query in ipoib_mcast_join_complete,
  and keeping it until we no longer need the mcast object
- taking priv->lock around mcast->query test in ipoib_mcast_stop_thread

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
drivers/infiniband/ulp/ipoib/ipoib_multicast.c

index e5dc2a0..fde442a 100644 (file)
@@ -437,9 +437,11 @@ static void ipoib_mcast_join_complete(int status,
        if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
                mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
+       mutex_lock(&mcast_mutex);
+
+       spin_lock_irq(&priv->lock);
        mcast->query = NULL;
 
-       mutex_lock(&mcast_mutex);
        if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
                if (status == -ETIMEDOUT)
                        queue_work(ipoib_workqueue, &priv->mcast_task);
@@ -448,6 +450,7 @@ static void ipoib_mcast_join_complete(int status,
                                           mcast->backoff * HZ);
        } else
                complete(&mcast->done);
+       spin_unlock_irq(&priv->lock);
        mutex_unlock(&mcast_mutex);
 
        return;
@@ -635,21 +638,27 @@ int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
        if (flush)
                flush_workqueue(ipoib_workqueue);
 
+       spin_lock_irq(&priv->lock);
        if (priv->broadcast && priv->broadcast->query) {
                ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
                priv->broadcast->query = NULL;
+               spin_unlock_irq(&priv->lock);
                ipoib_dbg_mcast(priv, "waiting for bcast\n");
                wait_for_completion(&priv->broadcast->done);
-       }
+       } else
+               spin_unlock_irq(&priv->lock);
 
        list_for_each_entry(mcast, &priv->multicast_list, list) {
+               spin_lock_irq(&priv->lock);
                if (mcast->query) {
                        ib_sa_cancel_query(mcast->query_id, mcast->query);
                        mcast->query = NULL;
+                       spin_unlock_irq(&priv->lock);
                        ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
                                        IPOIB_GID_ARG(mcast->mcmember.mgid));
                        wait_for_completion(&mcast->done);
-               }
+               } else
+                       spin_unlock_irq(&priv->lock);
        }
 
        return 0;