RDS/IB: protect the list of IB devices
authorZach Brown <zach.brown@oracle.com>
Thu, 15 Jul 2010 19:34:33 +0000 (12:34 -0700)
committerAndy Grover <andy.grover@oracle.com>
Thu, 9 Sep 2010 01:16:44 +0000 (18:16 -0700)
The RDS IB device list wasn't protected by any locking.  Traversal in
both the get_mr and FMR flushing paths could race with additon and
removal.

List manipulation is done with RCU primatives and is protected by the
write side of a rwsem.  The list traversal in the get_mr fast path is
protected by a rcu read critical section.  The FMR list traversal is
more problematic because it can block while traversing the list.  We
protect this with the read side of the rwsem.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
net/rds/ib.c
net/rds/ib.h
net/rds/ib_rdma.c

index 3eb5617..b12a395 100644 (file)
@@ -53,6 +53,12 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
 module_param(rds_ib_retry_count, int, 0444);
 MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
 
+/*
+ * we have a clumsy combination of RCU and a rwsem protecting this list
+ * because it is used both in the get_mr fast path and while blocking in
+ * the FMR flushing path.
+ */
+DECLARE_RWSEM(rds_ib_devices_lock);
 struct list_head rds_ib_devices;
 
 /* NOTE: if also grabbing ibdev lock, grab this first */
@@ -171,7 +177,10 @@ void rds_ib_add_one(struct ib_device *device)
 
        INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
        INIT_LIST_HEAD(&rds_ibdev->conn_list);
-       list_add_tail(&rds_ibdev->list, &rds_ib_devices);
+
+       down_write(&rds_ib_devices_lock);
+       list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
+       up_write(&rds_ib_devices_lock);
        atomic_inc(&rds_ibdev->refcount);
 
        ib_set_client_data(device, &rds_ib_client, rds_ibdev);
@@ -230,16 +239,20 @@ void rds_ib_remove_one(struct ib_device *device)
 
        rds_ib_dev_shutdown(rds_ibdev);
 
+       /* stop connection attempts from getting a reference to this device. */
+       ib_set_client_data(device, &rds_ib_client, NULL);
+
+       down_write(&rds_ib_devices_lock);
+       list_del_rcu(&rds_ibdev->list);
+       up_write(&rds_ib_devices_lock);
+
        /*
-        * prevent future connection attempts from getting a reference to this
-        * device and wait for currently racing connection attempts to finish
-        * getting their reference
+        * This synchronize rcu is waiting for readers of both the ib
+        * client data and the devices list to finish before we drop
+        * both of those references.
         */
-       ib_set_client_data(device, &rds_ib_client, NULL);
        synchronize_rcu();
        rds_ib_dev_put(rds_ibdev);
-
-       list_del(&rds_ibdev->list);
        rds_ib_dev_put(rds_ibdev);
 }
 
index a13ced5..2189fd4 100644 (file)
@@ -23,6 +23,7 @@
 
 #define RDS_IB_RECYCLE_BATCH_COUNT     32
 
+extern struct rw_semaphore rds_ib_devices_lock;
 extern struct list_head rds_ib_devices;
 
 /*
index 0017964..8f6e221 100644 (file)
@@ -94,8 +94,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
        struct rds_ib_device *rds_ibdev;
        struct rds_ib_ipaddr *i_ipaddr;
 
-       list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
-               rcu_read_lock();
+       rcu_read_lock();
+       list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
                list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
                        if (i_ipaddr->ipaddr == ipaddr) {
                                atomic_inc(&rds_ibdev->refcount);
@@ -103,8 +103,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
                                return rds_ibdev;
                        }
                }
-               rcu_read_unlock();
        }
+       rcu_read_unlock();
 
        return NULL;
 }
@@ -761,12 +761,14 @@ void rds_ib_flush_mrs(void)
 {
        struct rds_ib_device *rds_ibdev;
 
+       down_read(&rds_ib_devices_lock);
        list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
                struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
 
                if (pool)
                        rds_ib_flush_mr_pool(pool, 0, NULL);
        }
+       up_read(&rds_ib_devices_lock);
 }
 
 void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,