NFSv4: Fix a race between open() and close()
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 4 Nov 2005 20:32:58 +0000 (15:32 -0500)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 4 Nov 2005 20:32:58 +0000 (15:32 -0500)
 We must not remove the nfs4_state structure from the inode open lists
 before we are in sequence lock.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c

index 78a53f5..5396902 100644 (file)
@@ -214,7 +214,7 @@ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
 extern int nfs4_proc_setclientid_confirm(struct nfs4_client *);
 extern int nfs4_proc_async_renew(struct nfs4_client *);
 extern int nfs4_proc_renew(struct nfs4_client *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode);
+extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 
@@ -248,6 +248,7 @@ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state
 extern void nfs4_put_open_state(struct nfs4_state *);
 extern void nfs4_close_state(struct nfs4_state *, mode_t);
 extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
+extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs4_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
index 933e13b..02fddd0 100644 (file)
@@ -217,13 +217,12 @@ static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid,
        /* Protect against nfs4_find_state() */
        spin_lock(&state->owner->so_lock);
        spin_lock(&inode->i_lock);
-       state->state |= open_flags;
-       /* NB! List reordering - see the reclaim code for why.  */
-       if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++)
-               list_move(&state->open_states, &state->owner->so_states);
+       memcpy(&state->stateid, stateid, sizeof(state->stateid));
+       if ((open_flags & FMODE_WRITE))
+               state->nwriters++;
        if (open_flags & FMODE_READ)
                state->nreaders++;
-       memcpy(&state->stateid, stateid, sizeof(state->stateid));
+       nfs4_state_set_mode_locked(state, state->state | open_flags);
        spin_unlock(&inode->i_lock);
        spin_unlock(&state->owner->so_lock);
 }
@@ -896,7 +895,6 @@ static void nfs4_close_done(struct rpc_task *task)
                        break;
                case -NFS4ERR_STALE_STATEID:
                case -NFS4ERR_EXPIRED:
-                       state->state = calldata->arg.open_flags;
                        nfs4_schedule_state_recovery(server->nfs4_state);
                        break;
                default:
@@ -906,7 +904,6 @@ static void nfs4_close_done(struct rpc_task *task)
                        }
        }
        nfs_refresh_inode(calldata->inode, calldata->res.fattr);
-       state->state = calldata->arg.open_flags;
        nfs4_free_closedata(calldata);
 }
 
@@ -920,24 +917,26 @@ static void nfs4_close_begin(struct rpc_task *task)
                .rpc_resp = &calldata->res,
                .rpc_cred = state->owner->so_cred,
        };
-       int mode = 0;
+       int mode = 0, old_mode;
        int status;
 
        status = nfs_wait_on_sequence(calldata->arg.seqid, task);
        if (status != 0)
                return;
-       /* Don't reorder reads */
-       smp_rmb();
        /* Recalculate the new open mode in case someone reopened the file
         * while we were waiting in line to be scheduled.
         */
-       if (state->nreaders != 0)
-               mode |= FMODE_READ;
-       if (state->nwriters != 0)
-               mode |= FMODE_WRITE;
-       if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-               state->state = mode;
-       if (mode == state->state) {
+       spin_lock(&state->owner->so_lock);
+       spin_lock(&calldata->inode->i_lock);
+       mode = old_mode = state->state;
+       if (state->nreaders == 0)
+               mode &= ~FMODE_READ;
+       if (state->nwriters == 0)
+               mode &= ~FMODE_WRITE;
+       nfs4_state_set_mode_locked(state, mode);
+       spin_unlock(&calldata->inode->i_lock);
+       spin_unlock(&state->owner->so_lock);
+       if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
                nfs4_free_closedata(calldata);
                task->tk_exit = NULL;
                rpc_exit(task, 0);
@@ -961,7 +960,7 @@ static void nfs4_close_begin(struct rpc_task *task)
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode
+int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
 {
        struct nfs_server *server = NFS_SERVER(inode);
        struct nfs4_closedata *calldata;
index 2d5a6a2..959374d 100644 (file)
@@ -366,6 +366,23 @@ nfs4_alloc_open_state(void)
        return state;
 }
 
+void
+nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode)
+{
+       if (state->state == mode)
+               return;
+       /* NB! List reordering - see the reclaim code for why.  */
+       if ((mode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
+               if (mode & FMODE_WRITE)
+                       list_move(&state->open_states, &state->owner->so_states);
+               else
+                       list_move_tail(&state->open_states, &state->owner->so_states);
+       }
+       if (mode == 0)
+               list_del_init(&state->inode_states);
+       state->state = mode;
+}
+
 static struct nfs4_state *
 __nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
 {
@@ -376,10 +393,6 @@ __nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
        list_for_each_entry(state, &nfsi->open_states, inode_states) {
                if (state->owner->so_cred != cred)
                        continue;
-               if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
-                       continue;
-               if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
-                       continue;
                if ((state->state & mode) != mode)
                        continue;
                atomic_inc(&state->count);
@@ -400,7 +413,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 
        list_for_each_entry(state, &nfsi->open_states, inode_states) {
                /* Is this in the process of being freed? */
-               if (state->nreaders == 0 && state->nwriters == 0)
+               if (state->state == 0)
                        continue;
                if (state->owner == owner) {
                        atomic_inc(&state->count);
@@ -481,7 +494,6 @@ void nfs4_put_open_state(struct nfs4_state *state)
        spin_unlock(&inode->i_lock);
        spin_unlock(&owner->so_lock);
        iput(inode);
-       BUG_ON (state->state != 0);
        nfs4_free_open_state(state);
        nfs4_put_state_owner(owner);
 }
@@ -493,7 +505,7 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 {
        struct inode *inode = state->inode;
        struct nfs4_state_owner *owner = state->owner;
-       int newstate;
+       int oldstate, newstate = 0;
 
        atomic_inc(&owner->so_count);
        /* Protect against nfs4_find_state() */
@@ -503,30 +515,20 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
                state->nreaders--;
        if (mode & FMODE_WRITE)
                state->nwriters--;
-       if (state->nwriters == 0) {
-               if (state->nreaders == 0)
-                       list_del_init(&state->inode_states);
-               /* See reclaim code */
-               list_move_tail(&state->open_states, &owner->so_states);
+       oldstate = newstate = state->state;
+       if (state->nreaders == 0)
+               newstate &= ~FMODE_READ;
+       if (state->nwriters == 0)
+               newstate &= ~FMODE_WRITE;
+       if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+               nfs4_state_set_mode_locked(state, newstate);
+               oldstate = newstate;
        }
        spin_unlock(&inode->i_lock);
        spin_unlock(&owner->so_lock);
-       newstate = 0;
-       if (state->state != 0) {
-               if (state->nreaders)
-                       newstate |= FMODE_READ;
-               if (state->nwriters)
-                       newstate |= FMODE_WRITE;
-               if (state->state == newstate)
-                       goto out;
-               if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-                       state->state = newstate;
-                       goto out;
-               }
-               if (nfs4_do_close(inode, state, newstate) == 0)
-                       return;
-       }
-out:
+
+       if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
+               return;
        nfs4_put_open_state(state);
        nfs4_put_state_owner(owner);
 }