NFS4.1 Fix data server connection race
authorAndy Adamson <andros@netapp.com>
Wed, 8 May 2013 20:21:18 +0000 (16:21 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Wed, 8 May 2013 21:19:32 +0000 (17:19 -0400)
Unlike meta data server mounts which support multiple mount points to
the same server via struct nfs_server, data servers support a single connection.

Concurrent calls to setup the data server connection can race where the first
call allocates the nfs_client struct, and before the cache struct nfs_client
pointer can be set, a second call also tries to setup the connection, finds the
already allocated nfs_client, bumps the reference count, re-initializes the
session,etc. This results in a hanging data server session after umount.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/nfs4filelayout.h
fs/nfs/nfs4filelayoutdev.c

index b8da955..235ff95 100644 (file)
@@ -70,6 +70,8 @@ struct nfs4_pnfs_ds {
        struct list_head        ds_addrs;
        struct nfs_client       *ds_clp;
        atomic_t                ds_count;
+       unsigned long           ds_state;
+#define NFS4DS_CONNECTING      0       /* ds is establishing connection */
 };
 
 struct nfs4_file_layout_dsaddr {
index 1fe284f..661a0f6 100644 (file)
@@ -775,6 +775,22 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
        return flseg->fh_array[i];
 }
 
+static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
+{
+       might_sleep();
+       wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING,
+                       nfs_wait_bit_killable, TASK_KILLABLE);
+}
+
+static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
+{
+       smp_mb__before_clear_bit();
+       clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
+       smp_mb__after_clear_bit();
+       wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
+}
+
+
 struct nfs4_pnfs_ds *
 nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
 {
@@ -791,16 +807,22 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
                filelayout_mark_devid_invalid(devid);
                return NULL;
        }
+       if (ds->ds_clp)
+               return ds;
 
-       if (!ds->ds_clp) {
+       if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
                struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
                int err;
 
                err = nfs4_ds_connect(s, ds);
                if (err) {
                        nfs4_mark_deviceid_unavailable(devid);
-                       return NULL;
+                       ds = NULL;
                }
+               nfs4_clear_ds_conn_bit(ds);
+       } else {
+               /* Either ds is connected, or ds is NULL */
+               nfs4_wait_ds_connect(ds);
        }
        return ds;
 }