Merge branch 'linus'
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 24 Mar 2006 04:44:19 +0000 (23:44 -0500)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Fri, 24 Mar 2006 04:44:19 +0000 (23:44 -0500)
69 files changed:
fs/Kconfig
fs/lockd/clntlock.c
fs/lockd/clntproc.c
fs/lockd/host.c
fs/lockd/mon.c
fs/lockd/svc4proc.c
fs/lockd/svclock.c
fs/lockd/svcproc.c
fs/lockd/svcshare.c
fs/lockd/svcsubs.c
fs/lockd/xdr.c
fs/lockd/xdr4.c
fs/locks.c
fs/namespace.c
fs/nfs/callback.c
fs/nfs/callback_xdr.c
fs/nfs/delegation.c
fs/nfs/delegation.h
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/idmap.c
fs/nfs/inode.c
fs/nfs/iostat.h [new file with mode: 0644]
fs/nfs/mount_clnt.c
fs/nfs/nfs2xdr.c
fs/nfs/nfs3acl.c
fs/nfs/nfs3proc.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/pagelist.c
fs/nfs/proc.c
fs/nfs/read.c
fs/nfs/unlink.c
fs/nfs/write.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4state.c
fs/proc/base.c
include/linux/fs.h
include/linux/lockd/lockd.h
include/linux/lockd/share.h
include/linux/lockd/xdr.h
include/linux/nfs_fs.h
include/linux/nfs_fs_i.h
include/linux/nfs_fs_sb.h
include/linux/nfs_xdr.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/gss_krb5.h
include/linux/sunrpc/metrics.h [new file with mode: 0644]
include/linux/sunrpc/rpc_pipe_fs.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/xprt.h
net/sunrpc/auth.c
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/gss_krb5_seal.c
net/sunrpc/auth_gss/gss_krb5_unseal.c
net/sunrpc/auth_gss/gss_krb5_wrap.c
net/sunrpc/auth_gss/gss_spkm3_mech.c
net/sunrpc/auth_gss/gss_spkm3_seal.c
net/sunrpc/auth_gss/gss_spkm3_unseal.c
net/sunrpc/clnt.c
net/sunrpc/pmap_clnt.c
net/sunrpc/rpc_pipe.c
net/sunrpc/sched.c
net/sunrpc/stats.c
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c

index c8d0a20..e207be6 100644 (file)
@@ -1555,6 +1555,7 @@ config RPCSEC_GSS_SPKM3
        select CRYPTO
        select CRYPTO_MD5
        select CRYPTO_DES
+       select CRYPTO_CAST5
        help
          Provides for secure RPC calls by means of a gss-api
          mechanism based on the SPKM3 public-key mechanism.
index da6354b..bce7444 100644 (file)
@@ -44,32 +44,25 @@ static LIST_HEAD(nlm_blocked);
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
-int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl)
+struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
 {
        struct nlm_wait *block;
 
-       BUG_ON(req->a_block != NULL);
        block = kmalloc(sizeof(*block), GFP_KERNEL);
-       if (block == NULL)
-               return -ENOMEM;
-       block->b_host = host;
-       block->b_lock = fl;
-       init_waitqueue_head(&block->b_wait);
-       block->b_status = NLM_LCK_BLOCKED;
-
-       list_add(&block->b_list, &nlm_blocked);
-       req->a_block = block;
-
-       return 0;
+       if (block != NULL) {
+               block->b_host = host;
+               block->b_lock = fl;
+               init_waitqueue_head(&block->b_wait);
+               block->b_status = NLM_LCK_BLOCKED;
+               list_add(&block->b_list, &nlm_blocked);
+       }
+       return block;
 }
 
-void nlmclnt_finish_block(struct nlm_rqst *req)
+void nlmclnt_finish_block(struct nlm_wait *block)
 {
-       struct nlm_wait *block = req->a_block;
-
        if (block == NULL)
                return;
-       req->a_block = NULL;
        list_del(&block->b_list);
        kfree(block);
 }
@@ -77,15 +70,14 @@ void nlmclnt_finish_block(struct nlm_rqst *req)
 /*
  * Block on a lock
  */
-long nlmclnt_block(struct nlm_rqst *req, long timeout)
+int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 {
-       struct nlm_wait *block = req->a_block;
        long ret;
 
        /* A borken server might ask us to block even if we didn't
         * request it. Just say no!
         */
-       if (!req->a_args.block)
+       if (block == NULL)
                return -EAGAIN;
 
        /* Go to sleep waiting for GRANT callback. Some servers seem
@@ -99,13 +91,10 @@ long nlmclnt_block(struct nlm_rqst *req, long timeout)
        ret = wait_event_interruptible_timeout(block->b_wait,
                        block->b_status != NLM_LCK_BLOCKED,
                        timeout);
-
-       if (block->b_status != NLM_LCK_BLOCKED) {
-               req->a_res.status = block->b_status;
-               block->b_status = NLM_LCK_BLOCKED;
-       }
-
-       return ret;
+       if (ret < 0)
+               return -ERESTARTSYS;
+       req->a_res.status = block->b_status;
+       return 0;
 }
 
 /*
@@ -125,7 +114,15 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
        list_for_each_entry(block, &nlm_blocked, b_list) {
                struct file_lock *fl_blocked = block->b_lock;
 
-               if (!nlm_compare_locks(fl_blocked, fl))
+               if (fl_blocked->fl_start != fl->fl_start)
+                       continue;
+               if (fl_blocked->fl_end != fl->fl_end)
+                       continue;
+               /*
+                * Careful! The NLM server will return the 32-bit "pid" that
+                * we put on the wire: in this case the lockowner "pid".
+                */
+               if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
                        continue;
                if (!nlm_cmp_addr(&block->b_host->h_addr, addr))
                        continue;
@@ -146,34 +143,6 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
  * server crash.
  */
 
-/*
- * Mark the locks for reclaiming.
- * FIXME: In 2.5 we don't want to iterate through any global file_lock_list.
- *        Maintain NLM lock reclaiming lists in the nlm_host instead.
- */
-static
-void nlmclnt_mark_reclaim(struct nlm_host *host)
-{
-       struct file_lock *fl;
-       struct inode *inode;
-       struct list_head *tmp;
-
-       list_for_each(tmp, &file_lock_list) {
-               fl = list_entry(tmp, struct file_lock, fl_link);
-
-               inode = fl->fl_file->f_dentry->d_inode;
-               if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-                       continue;
-               if (fl->fl_u.nfs_fl.owner == NULL)
-                       continue;
-               if (fl->fl_u.nfs_fl.owner->host != host)
-                       continue;
-               if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED))
-                       continue;
-               fl->fl_u.nfs_fl.flags |= NFS_LCK_RECLAIM;
-       }
-}
-
 /*
  * Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
  * that we mark locks for reclaiming, and that we bump the pseudo NSM state.
@@ -186,7 +155,12 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
        host->h_state++;
        host->h_nextrebind = 0;
        nlm_rebind_host(host);
-       nlmclnt_mark_reclaim(host);
+
+       /*
+        * Mark the locks for reclaiming.
+        */
+       list_splice_init(&host->h_granted, &host->h_reclaim);
+
        dprintk("NLM: reclaiming locks for host %s", host->h_name);
 }
 
@@ -215,9 +189,7 @@ reclaimer(void *ptr)
 {
        struct nlm_host   *host = (struct nlm_host *) ptr;
        struct nlm_wait   *block;
-       struct list_head *tmp;
-       struct file_lock *fl;
-       struct inode *inode;
+       struct file_lock *fl, *next;
 
        daemonize("%s-reclaim", host->h_name);
        allow_signal(SIGKILL);
@@ -229,23 +201,13 @@ reclaimer(void *ptr)
 
        /* First, reclaim all locks that have been marked. */
 restart:
-       list_for_each(tmp, &file_lock_list) {
-               fl = list_entry(tmp, struct file_lock, fl_link);
+       list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
+               list_del_init(&fl->fl_u.nfs_fl.list);
 
-               inode = fl->fl_file->f_dentry->d_inode;
-               if (inode->i_sb->s_magic != NFS_SUPER_MAGIC)
-                       continue;
-               if (fl->fl_u.nfs_fl.owner == NULL)
-                       continue;
-               if (fl->fl_u.nfs_fl.owner->host != host)
-                       continue;
-               if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM))
-                       continue;
-
-               fl->fl_u.nfs_fl.flags &= ~NFS_LCK_RECLAIM;
-               nlmclnt_reclaim(host, fl);
                if (signalled())
-                       break;
+                       continue;
+               if (nlmclnt_reclaim(host, fl) == 0)
+                       list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
                goto restart;
        }
 
index 970b6a6..f96e381 100644 (file)
@@ -132,59 +132,18 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
        memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
        lock->caller  = system_utsname.nodename;
        lock->oh.data = req->a_owner;
-       lock->oh.len  = sprintf(req->a_owner, "%d@%s",
-                               current->pid, system_utsname.nodename);
-       locks_copy_lock(&lock->fl, fl);
+       lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
+                               (unsigned int)fl->fl_u.nfs_fl.owner->pid,
+                               system_utsname.nodename);
+       lock->svid = fl->fl_u.nfs_fl.owner->pid;
+       lock->fl.fl_start = fl->fl_start;
+       lock->fl.fl_end = fl->fl_end;
+       lock->fl.fl_type = fl->fl_type;
 }
 
 static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 {
-       struct file_lock *fl = &req->a_args.lock.fl;
-
-       if (fl->fl_ops && fl->fl_ops->fl_release_private)
-               fl->fl_ops->fl_release_private(fl);
-}
-
-/*
- * Initialize arguments for GRANTED call. The nlm_rqst structure
- * has been cleared already.
- */
-int
-nlmclnt_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
-{
-       locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
-       memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-       call->a_args.lock.caller = system_utsname.nodename;
-       call->a_args.lock.oh.len = lock->oh.len;
-
-       /* set default data area */
-       call->a_args.lock.oh.data = call->a_owner;
-
-       if (lock->oh.len > NLMCLNT_OHSIZE) {
-               void *data = kmalloc(lock->oh.len, GFP_KERNEL);
-               if (!data) {
-                       nlmclnt_freegrantargs(call);
-                       return 0;
-               }
-               call->a_args.lock.oh.data = (u8 *) data;
-       }
-
-       memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
-       return 1;
-}
-
-void
-nlmclnt_freegrantargs(struct nlm_rqst *call)
-{
-       struct file_lock *fl = &call->a_args.lock.fl;
-       /*
-        * Check whether we allocated memory for the owner.
-        */
-       if (call->a_args.lock.oh.data != (u8 *) call->a_owner) {
-               kfree(call->a_args.lock.oh.data);
-       }
-       if (fl->fl_ops && fl->fl_ops->fl_release_private)
-               fl->fl_ops->fl_release_private(fl);
+       BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
 }
 
 /*
@@ -193,9 +152,8 @@ nlmclnt_freegrantargs(struct nlm_rqst *call)
 int
 nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 {
-       struct nfs_server       *nfssrv = NFS_SERVER(inode);
        struct nlm_host         *host;
-       struct nlm_rqst         reqst, *call = &reqst;
+       struct nlm_rqst         *call;
        sigset_t                oldset;
        unsigned long           flags;
        int                     status, proto, vers;
@@ -209,23 +167,17 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
        /* Retrieve transport protocol from NFS client */
        proto = NFS_CLIENT(inode)->cl_xprt->prot;
 
-       if (!(host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers)))
+       host = nlmclnt_lookup_host(NFS_ADDR(inode), proto, vers);
+       if (host == NULL)
                return -ENOLCK;
 
-       /* Create RPC client handle if not there, and copy soft
-        * and intr flags from NFS client. */
-       if (host->h_rpcclnt == NULL) {
-               struct rpc_clnt *clnt;
+       call = nlm_alloc_call(host);
+       if (call == NULL)
+               return -ENOMEM;
 
-               /* Bind an rpc client to this host handle (does not
-                * perform a portmapper lookup) */
-               if (!(clnt = nlm_bind_host(host))) {
-                       status = -ENOLCK;
-                       goto done;
-               }
-               clnt->cl_softrtry = nfssrv->client->cl_softrtry;
-               clnt->cl_intr = nfssrv->client->cl_intr;
-       }
+       nlmclnt_locks_init_private(fl, host);
+       /* Set up the argument struct */
+       nlmclnt_setlockargs(call, fl);
 
        /* Keep the old signal mask */
        spin_lock_irqsave(&current->sighand->siglock, flags);
@@ -238,26 +190,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
            && (current->flags & PF_EXITING)) {
                sigfillset(&current->blocked);  /* Mask all signals */
                recalc_sigpending();
-               spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-               call = nlmclnt_alloc_call();
-               if (!call) {
-                       status = -ENOMEM;
-                       goto out_restore;
-               }
                call->a_flags = RPC_TASK_ASYNC;
-       } else {
-               spin_unlock_irqrestore(&current->sighand->siglock, flags);
-               memset(call, 0, sizeof(*call));
-               locks_init_lock(&call->a_args.lock.fl);
-               locks_init_lock(&call->a_res.lock.fl);
        }
-       call->a_host = host;
-
-       nlmclnt_locks_init_private(fl, host);
-
-       /* Set up the argument struct */
-       nlmclnt_setlockargs(call, fl);
+       spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
        if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
                if (fl->fl_type != F_UNLCK) {
@@ -270,41 +206,58 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
        else
                status = -EINVAL;
 
- out_restore:
+       fl->fl_ops->fl_release_private(fl);
+       fl->fl_ops = NULL;
+
        spin_lock_irqsave(&current->sighand->siglock, flags);
        current->blocked = oldset;
        recalc_sigpending();
        spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-done:
        dprintk("lockd: clnt proc returns %d\n", status);
-       nlm_release_host(host);
        return status;
 }
 EXPORT_SYMBOL(nlmclnt_proc);
 
 /*
  * Allocate an NLM RPC call struct
+ *
+ * Note: the caller must hold a reference to host. In case of failure,
+ * this reference will be released.
  */
-struct nlm_rqst *
-nlmclnt_alloc_call(void)
+struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 {
        struct nlm_rqst *call;
 
-       while (!signalled()) {
-               call = (struct nlm_rqst *) kmalloc(sizeof(struct nlm_rqst), GFP_KERNEL);
-               if (call) {
-                       memset(call, 0, sizeof(*call));
+       for(;;) {
+               call = kzalloc(sizeof(*call), GFP_KERNEL);
+               if (call != NULL) {
                        locks_init_lock(&call->a_args.lock.fl);
                        locks_init_lock(&call->a_res.lock.fl);
+                       call->a_host = host;
                        return call;
                }
-               printk("nlmclnt_alloc_call: failed, waiting for memory\n");
+               if (signalled())
+                       break;
+               printk("nlm_alloc_call: failed, waiting for memory\n");
                schedule_timeout_interruptible(5*HZ);
        }
+       nlm_release_host(host);
        return NULL;
 }
 
+void nlm_release_call(struct nlm_rqst *call)
+{
+       nlm_release_host(call->a_host);
+       nlmclnt_release_lockargs(call);
+       kfree(call);
+}
+
+static void nlmclnt_rpc_release(void *data)
+{
+       return nlm_release_call(data);
+}
+
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
 {
        DEFINE_WAIT(wait);
@@ -401,57 +354,45 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
 {
        struct nlm_host *host = req->a_host;
        struct rpc_clnt *clnt;
-       struct rpc_message msg = {
-               .rpc_argp       = &req->a_args,
-               .rpc_resp       = &req->a_res,
-       };
-       int             status;
+       int status = -ENOLCK;
 
        dprintk("lockd: call procedure %d on %s (async)\n",
                        (int)proc, host->h_name);
 
        /* If we have no RPC client yet, create one. */
-       if ((clnt = nlm_bind_host(host)) == NULL)
-               return -ENOLCK;
-       msg.rpc_proc = &clnt->cl_procinfo[proc];
+       clnt = nlm_bind_host(host);
+       if (clnt == NULL)
+               goto out_err;
+       msg->rpc_proc = &clnt->cl_procinfo[proc];
 
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-
+        status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
+       if (status == 0)
+               return 0;
+out_err:
+       nlm_release_call(req);
        return status;
 }
 
-static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
-       struct nlm_host *host = req->a_host;
-       struct rpc_clnt *clnt;
-       struct nlm_args *argp = &req->a_args;
-       struct nlm_res  *resp = &req->a_res;
        struct rpc_message msg = {
-               .rpc_argp       = argp,
-               .rpc_resp       = resp,
+               .rpc_argp       = &req->a_args,
+               .rpc_resp       = &req->a_res,
        };
-       int             status;
-
-       dprintk("lockd: call procedure %d on %s (async)\n",
-                       (int)proc, host->h_name);
-
-       /* If we have no RPC client yet, create one. */
-       if ((clnt = nlm_bind_host(host)) == NULL)
-               return -ENOLCK;
-       msg.rpc_proc = &clnt->cl_procinfo[proc];
+       return __nlm_async_call(req, proc, &msg, tk_ops);
+}
 
-       /* Increment host refcount */
-       nlm_get_host(host);
-        /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
-       if (status < 0)
-               nlm_release_host(host);
-       return status;
+int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+       struct rpc_message msg = {
+               .rpc_argp       = &req->a_res,
+       };
+       return __nlm_async_call(req, proc, &msg, tk_ops);
 }
 
 /*
@@ -463,36 +404,41 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
        int     status;
 
        status = nlmclnt_call(req, NLMPROC_TEST);
-       nlmclnt_release_lockargs(req);
        if (status < 0)
-               return status;
+               goto out;
 
-       status = req->a_res.status;
-       if (status == NLM_LCK_GRANTED) {
-               fl->fl_type = F_UNLCK;
-       } if (status == NLM_LCK_DENIED) {
-               /*
-                * Report the conflicting lock back to the application.
-                */
-               locks_copy_lock(fl, &req->a_res.lock.fl);
-               fl->fl_pid = 0;
-       } else {
-               return nlm_stat_to_errno(req->a_res.status);
+       switch (req->a_res.status) {
+               case NLM_LCK_GRANTED:
+                       fl->fl_type = F_UNLCK;
+                       break;
+               case NLM_LCK_DENIED:
+                       /*
+                        * Report the conflicting lock back to the application.
+                        */
+                       fl->fl_start = req->a_res.lock.fl.fl_start;
+                       fl->fl_end = req->a_res.lock.fl.fl_start;
+                       fl->fl_type = req->a_res.lock.fl.fl_type;
+                       fl->fl_pid = 0;
+                       break;
+               default:
+                       status = nlm_stat_to_errno(req->a_res.status);
        }
-
-       return 0;
+out:
+       nlm_release_call(req);
+       return status;
 }
 
 static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
-       memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl));
-       nlm_get_lockowner(new->fl_u.nfs_fl.owner);
+       new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
+       new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
+       list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
 }
 
 static void nlmclnt_locks_release_private(struct file_lock *fl)
 {
+       list_del(&fl->fl_u.nfs_fl.list);
        nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
-       fl->fl_ops = NULL;
 }
 
 static struct file_lock_operations nlmclnt_lock_ops = {
@@ -504,8 +450,8 @@ static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *ho
 {
        BUG_ON(fl->fl_ops != NULL);
        fl->fl_u.nfs_fl.state = 0;
-       fl->fl_u.nfs_fl.flags = 0;
        fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner);
+       INIT_LIST_HEAD(&fl->fl_u.nfs_fl.list);
        fl->fl_ops = &nlmclnt_lock_ops;
 }
 
@@ -552,57 +498,52 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
        struct nlm_host *host = req->a_host;
        struct nlm_res  *resp = &req->a_res;
-       long timeout;
-       int status;
+       struct nlm_wait *block = NULL;
+       int status = -ENOLCK;
 
        if (!host->h_monitored && nsm_monitor(host) < 0) {
                printk(KERN_NOTICE "lockd: failed to monitor %s\n",
                                        host->h_name);
-               status = -ENOLCK;
                goto out;
        }
 
-       if (req->a_args.block) {
-               status = nlmclnt_prepare_block(req, host, fl);
-               if (status < 0)
-                       goto out;
-       }
+       block = nlmclnt_prepare_block(host, fl);
        for(;;) {
                status = nlmclnt_call(req, NLMPROC_LOCK);
                if (status < 0)
                        goto out_unblock;
-               if (resp->status != NLM_LCK_BLOCKED)
+               if (!req->a_args.block)
                        break;
-               /* Wait on an NLM blocking lock */
-               timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT);
                /* Did a reclaimer thread notify us of a server reboot? */
                if (resp->status ==  NLM_LCK_DENIED_GRACE_PERIOD)
                        continue;
                if (resp->status != NLM_LCK_BLOCKED)
                        break;
-               if (timeout >= 0)
-                       continue;
-               /* We were interrupted. Send a CANCEL request to the server
+               /* Wait on an NLM blocking lock */
+               status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+               /* if we were interrupted. Send a CANCEL request to the server
                 * and exit
                 */
-               status = (int)timeout;
-               goto out_unblock;
+               if (status < 0)
+                       goto out_unblock;
+               if (resp->status != NLM_LCK_BLOCKED)
+                       break;
        }
 
        if (resp->status == NLM_LCK_GRANTED) {
                fl->fl_u.nfs_fl.state = host->h_state;
-               fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED;
                fl->fl_flags |= FL_SLEEP;
+               /* Ensure the resulting lock will get added to granted list */
                do_vfs_lock(fl);
        }
        status = nlm_stat_to_errno(resp->status);
 out_unblock:
-       nlmclnt_finish_block(req);
+       nlmclnt_finish_block(block);
        /* Cancel the blocked request if it is still pending */
        if (resp->status == NLM_LCK_BLOCKED)
                nlmclnt_cancel(host, req->a_args.block, fl);
 out:
-       nlmclnt_release_lockargs(req);
+       nlm_release_call(req);
        return status;
 }
 
@@ -658,10 +599,6 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
        struct nlm_res  *resp = &req->a_res;
        int             status;
 
-       /* Clean the GRANTED flag now so the lock doesn't get
-        * reclaimed while we're stuck in the unlock call. */
-       fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED;
-
        /*
         * Note: the server is supposed to either grant us the unlock
         * request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
@@ -669,32 +606,24 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
         */
        do_vfs_lock(fl);
 
-       if (req->a_flags & RPC_TASK_ASYNC) {
-               status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
-                                       &nlmclnt_unlock_ops);
-               /* Hrmf... Do the unlock early since locks_remove_posix()
-                * really expects us to free the lock synchronously */
-               if (status < 0) {
-                       nlmclnt_release_lockargs(req);
-                       kfree(req);
-               }
-               return status;
-       }
+       if (req->a_flags & RPC_TASK_ASYNC)
+               return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
 
        status = nlmclnt_call(req, NLMPROC_UNLOCK);
-       nlmclnt_release_lockargs(req);
        if (status < 0)
-               return status;
+               goto out;
 
+       status = 0;
        if (resp->status == NLM_LCK_GRANTED)
-               return 0;
+               goto out;
 
        if (resp->status != NLM_LCK_DENIED_NOLOCKS)
                printk("lockd: unexpected unlock status: %d\n", resp->status);
-
        /* What to do now? I'm out of my depth... */
-
-       return -ENOLCK;
+       status = -ENOLCK;
+out:
+       nlm_release_call(req);
+       return status;
 }
 
 static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
@@ -716,9 +645,6 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
        if (status != NLM_LCK_GRANTED)
                printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status);
 die:
-       nlm_release_host(req->a_host);
-       nlmclnt_release_lockargs(req);
-       kfree(req);
        return;
  retry_rebind:
        nlm_rebind_host(req->a_host);
@@ -728,6 +654,7 @@ die:
 
 static const struct rpc_call_ops nlmclnt_unlock_ops = {
        .rpc_call_done = nlmclnt_unlock_callback,
+       .rpc_release = nlmclnt_rpc_release,
 };
 
 /*
@@ -749,20 +676,15 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
        recalc_sigpending();
        spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
-       req = nlmclnt_alloc_call();
+       req = nlm_alloc_call(nlm_get_host(host));
        if (!req)
                return -ENOMEM;
-       req->a_host  = host;
        req->a_flags = RPC_TASK_ASYNC;
 
        nlmclnt_setlockargs(req, fl);
        req->a_args.block = block;
 
-       status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
-       if (status < 0) {
-               nlmclnt_release_lockargs(req);
-               kfree(req);
-       }
+       status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 
        spin_lock_irqsave(&current->sighand->siglock, flags);
        current->blocked = oldset;
@@ -791,6 +713,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
        switch (req->a_res.status) {
        case NLM_LCK_GRANTED:
        case NLM_LCK_DENIED_GRACE_PERIOD:
+       case NLM_LCK_DENIED:
                /* Everything's good */
                break;
        case NLM_LCK_DENIED_NOLOCKS:
@@ -802,9 +725,6 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
        }
 
 die:
-       nlm_release_host(req->a_host);
-       nlmclnt_release_lockargs(req);
-       kfree(req);
        return;
 
 retry_cancel:
@@ -818,6 +738,7 @@ retry_cancel:
 
 static const struct rpc_call_ops nlmclnt_cancel_ops = {
        .rpc_call_done = nlmclnt_cancel_callback,
+       .rpc_release = nlmclnt_rpc_release,
 };
 
 /*
index 82f7a0b..112ebf8 100644 (file)
@@ -123,6 +123,8 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
        nlm_hosts[hash]    = host;
        INIT_LIST_HEAD(&host->h_lockowners);
        spin_lock_init(&host->h_lock);
+       INIT_LIST_HEAD(&host->h_granted);
+       INIT_LIST_HEAD(&host->h_reclaim);
 
        if (++nrhosts > NLM_HOST_MAX)
                next_gc = 0;
@@ -191,11 +193,12 @@ nlm_bind_host(struct nlm_host *host)
                xprt->resvport = 1;     /* NLM requires a reserved port */
 
                /* Existing NLM servers accept AUTH_UNIX only */
-               clnt = rpc_create_client(xprt, host->h_name, &nlm_program,
+               clnt = rpc_new_client(xprt, host->h_name, &nlm_program,
                                        host->h_version, RPC_AUTH_UNIX);
                if (IS_ERR(clnt))
                        goto forgetit;
                clnt->cl_autobind = 1;  /* turn on pmap queries */
+               clnt->cl_softrtry = 1; /* All queries are soft */
 
                host->h_rpcclnt = clnt;
        }
@@ -242,8 +245,12 @@ void nlm_release_host(struct nlm_host *host)
 {
        if (host != NULL) {
                dprintk("lockd: release host %s\n", host->h_name);
-               atomic_dec(&host->h_count);
                BUG_ON(atomic_read(&host->h_count) < 0);
+               if (atomic_dec_and_test(&host->h_count)) {
+                       BUG_ON(!list_empty(&host->h_lockowners));
+                       BUG_ON(!list_empty(&host->h_granted));
+                       BUG_ON(!list_empty(&host->h_reclaim));
+               }
        }
 }
 
@@ -331,7 +338,6 @@ nlm_gc_hosts(void)
                                        rpc_destroy_client(host->h_rpcclnt);
                                }
                        }
-                       BUG_ON(!list_empty(&host->h_lockowners));
                        kfree(host);
                        nrhosts--;
                }
index 0edc03e..5dd52b7 100644 (file)
@@ -35,6 +35,10 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
        struct rpc_clnt *clnt;
        int             status;
        struct nsm_args args;
+       struct rpc_message msg = {
+               .rpc_argp       = &args,
+               .rpc_resp       = res,
+       };
 
        clnt = nsm_create();
        if (IS_ERR(clnt)) {
@@ -49,7 +53,8 @@ nsm_mon_unmon(struct nlm_host *host, u32 proc, struct nsm_res *res)
        args.proc = NLMPROC_NSM_NOTIFY;
        memset(res, 0, sizeof(*res));
 
-       status = rpc_call(clnt, proc, &args, res, 0);
+       msg.rpc_proc = &clnt->cl_procinfo[proc];
+       status = rpc_call_sync(clnt, &msg, 0);
        if (status < 0)
                printk(KERN_DEBUG "nsm_mon_unmon: rpc failed, status=%d\n",
                        status);
@@ -214,12 +219,16 @@ static struct rpc_procinfo        nsm_procedures[] = {
                .p_encode       = (kxdrproc_t) xdr_encode_mon,
                .p_decode       = (kxdrproc_t) xdr_decode_stat_res,
                .p_bufsiz       = MAX(SM_mon_sz, SM_monres_sz) << 2,
+               .p_statidx      = SM_MON,
+               .p_name         = "MONITOR",
        },
 [SM_UNMON] = {
                .p_proc         = SM_UNMON,
                .p_encode       = (kxdrproc_t) xdr_encode_unmon,
                .p_decode       = (kxdrproc_t) xdr_decode_stat,
                .p_bufsiz       = MAX(SM_mon_id_sz, SM_unmonres_sz) << 2,
+               .p_statidx      = SM_UNMON,
+               .p_name         = "UNMONITOR",
        },
 };
 
index b10f913..a2dd9cc 100644 (file)
 
 #define NLMDBG_FACILITY                NLMDBG_CLIENT
 
-static u32     nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlm4svc_callback_ops;
-
 /*
  * Obtain client and file from arguments
  */
@@ -233,84 +229,90 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
        return rpc_success;
 }
 
+/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
+{
+       dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+                       -task->tk_status);
+}
+
+static void nlm4svc_callback_release(void *data)
+{
+       nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+       .rpc_call_done = nlm4svc_callback_exit,
+       .rpc_release = nlm4svc_callback_release,
+};
+
 /*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                            void            *resp)
+static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+               int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-       struct nlm_res  res;
-       u32             stat;
+       struct nlm_host *host;
+       struct nlm_rqst *call;
+       int stat;
 
-       dprintk("lockd: TEST_MSG      called\n");
-       memset(&res, 0, sizeof(res));
+       host = nlmsvc_lookup_host(rqstp);
+       if (host == NULL)
+               return rpc_system_err;
+
+       call = nlm_alloc_call(host);
+       if (call == NULL)
+               return rpc_system_err;
 
-       if ((stat = nlm4svc_proc_test(rqstp, argp, &res)) == 0)
-               stat = nlm4svc_callback(rqstp, NLMPROC_TEST_RES, &res);
-       return stat;
+       stat = func(rqstp, argp, &call->a_res);
+       if (stat != 0) {
+               nlm_release_call(call);
+               return stat;
+       }
+
+       call->a_flags = RPC_TASK_ASYNC;
+       if (nlm_async_reply(call, proc, &nlm4svc_callback_ops) < 0)
+               return rpc_system_err;
+       return rpc_success;
 }
 
-static int
-nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                             void            *resp)
 {
-       struct nlm_res  res;
-       u32             stat;
+       dprintk("lockd: TEST_MSG      called\n");
+       return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
+}
 
+static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+                                            void            *resp)
+{
        dprintk("lockd: LOCK_MSG      called\n");
-       memset(&res, 0, sizeof(res));
-
-       if ((stat = nlm4svc_proc_lock(rqstp, argp, &res)) == 0)
-               stat = nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-       return stat;
+       return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
 }
 
-static int
-nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                               void            *resp)
 {
-       struct nlm_res  res;
-       u32             stat;
-
        dprintk("lockd: CANCEL_MSG    called\n");
-       memset(&res, 0, sizeof(res));
-
-       if ((stat = nlm4svc_proc_cancel(rqstp, argp, &res)) == 0)
-               stat = nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-       return stat;
+       return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
 }
 
-static int
-nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-       struct nlm_res  res;
-       u32             stat;
-
        dprintk("lockd: UNLOCK_MSG    called\n");
-       memset(&res, 0, sizeof(res));
-
-       if ((stat = nlm4svc_proc_unlock(rqstp, argp, &res)) == 0)
-               stat = nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-       return stat;
+       return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
 }
 
-static int
-nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-       struct nlm_res  res;
-       u32             stat;
-
        dprintk("lockd: GRANTED_MSG   called\n");
-       memset(&res, 0, sizeof(res));
-
-       if ((stat = nlm4svc_proc_granted(rqstp, argp, &res)) == 0)
-               stat = nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-       return stat;
+       return nlm4svc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlm4svc_proc_granted);
 }
 
 /*
@@ -471,55 +473,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 }
 
 
-/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-       struct nlm_host *host;
-       struct nlm_rqst *call;
-
-       if (!(call = nlmclnt_alloc_call()))
-               return rpc_system_err;
-
-       host = nlmclnt_lookup_host(&rqstp->rq_addr,
-                               rqstp->rq_prot, rqstp->rq_vers);
-       if (!host) {
-               kfree(call);
-               return rpc_system_err;
-       }
-
-       call->a_flags = RPC_TASK_ASYNC;
-       call->a_host  = host;
-       memcpy(&call->a_args, resp, sizeof(*resp));
-
-       if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
-               goto error;
-
-       return rpc_success;
- error:
-       kfree(call);
-       nlm_release_host(host);
-       return rpc_system_err;
-}
-
-static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
-{
-       struct nlm_rqst *call = data;
-
-       if (task->tk_status < 0) {
-               dprintk("lockd: %4d callback failed (errno = %d)\n",
-                                       task->tk_pid, -task->tk_status);
-       }
-       nlm_release_host(call->a_host);
-       kfree(call);
-}
-
-static const struct rpc_call_ops nlm4svc_callback_ops = {
-       .rpc_call_done = nlm4svc_callback_exit,
-};
-
 /*
  * NLM Server procedures.
  */
index 9cfced6..d2b66ba 100644 (file)
 #define nlm_deadlock   nlm_lck_denied
 #endif
 
+static void nlmsvc_release_block(struct nlm_block *block);
 static void    nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int     nlmsvc_remove_block(struct nlm_block *block);
 
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock);
+static void nlmsvc_freegrantargs(struct nlm_rqst *call);
 static const struct rpc_call_ops nlmsvc_grant_ops;
 
 /*
@@ -58,6 +61,7 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
        struct nlm_block **bp, *b;
 
        dprintk("lockd: nlmsvc_insert_block(%p, %ld)\n", block, when);
+       kref_get(&block->b_count);
        if (block->b_queued)
                nlmsvc_remove_block(block);
        bp = &nlm_blocked;
@@ -90,6 +94,7 @@ nlmsvc_remove_block(struct nlm_block *block)
                if (b == block) {
                        *bp = block->b_next;
                        block->b_queued = 0;
+                       nlmsvc_release_block(block);
                        return 1;
                }
        }
@@ -98,11 +103,10 @@ nlmsvc_remove_block(struct nlm_block *block)
 }
 
 /*
- * Find a block for a given lock and optionally remove it from
- * the list.
+ * Find a block for a given lock
  */
 static struct nlm_block *
-nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
+nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
 {
        struct nlm_block        **head, *block;
        struct file_lock        *fl;
@@ -112,17 +116,14 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock, int remove)
                                (long long)lock->fl.fl_start,
                                (long long)lock->fl.fl_end, lock->fl.fl_type);
        for (head = &nlm_blocked; (block = *head) != 0; head = &block->b_next) {
-               fl = &block->b_call.a_args.lock.fl;
+               fl = &block->b_call->a_args.lock.fl;
                dprintk("lockd: check f=%p pd=%d %Ld-%Ld ty=%d cookie=%s\n",
                                block->b_file, fl->fl_pid,
                                (long long)fl->fl_start,
                                (long long)fl->fl_end, fl->fl_type,
-                               nlmdbg_cookie2a(&block->b_call.a_args.cookie));
+                               nlmdbg_cookie2a(&block->b_call->a_args.cookie));
                if (block->b_file == file && nlm_compare_locks(fl, &lock->fl)) {
-                       if (remove) {
-                               *head = block->b_next;
-                               block->b_queued = 0;
-                       }
+                       kref_get(&block->b_count);
                        return block;
                }
        }
@@ -150,11 +151,13 @@ nlmsvc_find_block(struct nlm_cookie *cookie,  struct sockaddr_in *sin)
        for (block = nlm_blocked; block; block = block->b_next) {
                dprintk("cookie: head of blocked queue %p, block %p\n", 
                        nlm_blocked, block);
-               if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)
+               if (nlm_cookie_match(&block->b_call->a_args.cookie,cookie)
                                && nlm_cmp_addr(sin, &block->b_host->h_addr))
                        break;
        }
 
+       if (block != NULL)
+               kref_get(&block->b_count);
        return block;
 }
 
@@ -174,27 +177,30 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
 {
        struct nlm_block        *block;
        struct nlm_host         *host;
-       struct nlm_rqst         *call;
+       struct nlm_rqst         *call = NULL;
 
        /* Create host handle for callback */
-       host = nlmclnt_lookup_host(&rqstp->rq_addr,
-                               rqstp->rq_prot, rqstp->rq_vers);
+       host = nlmsvc_lookup_host(rqstp);
        if (host == NULL)
                return NULL;
 
+       call = nlm_alloc_call(host);
+       if (call == NULL)
+               return NULL;
+
        /* Allocate memory for block, and initialize arguments */
-       if (!(block = (struct nlm_block *) kmalloc(sizeof(*block), GFP_KERNEL)))
+       block = kzalloc(sizeof(*block), GFP_KERNEL);
+       if (block == NULL)
                goto failed;
-       memset(block, 0, sizeof(*block));
-       locks_init_lock(&block->b_call.a_args.lock.fl);
-       locks_init_lock(&block->b_call.a_res.lock.fl);
+       kref_init(&block->b_count);
 
-       if (!nlmclnt_setgrantargs(&block->b_call, lock))
+       if (!nlmsvc_setgrantargs(call, lock))
                goto failed_free;
 
        /* Set notifier function for VFS, and init args */
-       block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
-       block->b_call.a_args.cookie = *cookie;  /* see above */
+       call->a_args.lock.fl.fl_flags |= FL_SLEEP;
+       call->a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations;
+       call->a_args.cookie = *cookie;  /* see above */
 
        dprintk("lockd: created block %p...\n", block);
 
@@ -202,22 +208,23 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file,
        block->b_daemon = rqstp->rq_server;
        block->b_host   = host;
        block->b_file   = file;
+       file->f_count++;
 
        /* Add to file's list of blocks */
        block->b_fnext  = file->f_blocks;
        file->f_blocks  = block;
 
        /* Set up RPC arguments for callback */
-       call = &block->b_call;
-       call->a_host    = host;
+       block->b_call = call;
        call->a_flags   = RPC_TASK_ASYNC;
+       call->a_block = block;
 
        return block;
 
 failed_free:
        kfree(block);
 failed:
-       nlm_release_host(host);
+       nlm_release_call(call);
        return NULL;
 }
 
@@ -227,29 +234,26 @@ failed:
  * It is the caller's responsibility to check whether the file
  * can be closed hereafter.
  */
-static int
-nlmsvc_delete_block(struct nlm_block *block, int unlock)
+static int nlmsvc_unlink_block(struct nlm_block *block)
 {
-       struct file_lock        *fl = &block->b_call.a_args.lock.fl;
-       struct nlm_file         *file = block->b_file;
-       struct nlm_block        **bp;
-       int status = 0;
-
-       dprintk("lockd: deleting block %p...\n", block);
+       int status;
+       dprintk("lockd: unlinking block %p...\n", block);
 
        /* Remove block from list */
+       status = posix_unblock_lock(block->b_file->f_file, &block->b_call->a_args.lock.fl);
        nlmsvc_remove_block(block);
-       if (unlock)
-               status = posix_unblock_lock(file->f_file, fl);
+       return status;
+}
 
-       /* If the block is in the middle of a GRANT callback,
-        * don't kill it yet. */
-       if (block->b_incall) {
-               nlmsvc_insert_block(block, NLM_NEVER);
-               block->b_done = 1;
-               return status;
-       }
+static void nlmsvc_free_block(struct kref *kref)
+{
+       struct nlm_block *block = container_of(kref, struct nlm_block, b_count);
+       struct nlm_file         *file = block->b_file;
+       struct nlm_block        **bp;
 
+       dprintk("lockd: freeing block %p...\n", block);
+
+       down(&file->f_sema);
        /* Remove block from file's list of blocks */
        for (bp = &file->f_blocks; *bp; bp = &(*bp)->b_fnext) {
                if (*bp == block) {
@@ -257,36 +261,93 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock)
                        break;
                }
        }
+       up(&file->f_sema);
 
-       if (block->b_host)
-               nlm_release_host(block->b_host);
-       nlmclnt_freegrantargs(&block->b_call);
+       nlmsvc_freegrantargs(block->b_call);
+       nlm_release_call(block->b_call);
+       nlm_release_file(block->b_file);
        kfree(block);
-       return status;
+}
+
+static void nlmsvc_release_block(struct nlm_block *block)
+{
+       if (block != NULL)
+               kref_put(&block->b_count, nlmsvc_free_block);
+}
+
+static void nlmsvc_act_mark(struct nlm_host *host, struct nlm_file *file)
+{
+       struct nlm_block *block;
+
+       down(&file->f_sema);
+       for (block = file->f_blocks; block != NULL; block = block->b_fnext)
+               block->b_host->h_inuse = 1;
+       up(&file->f_sema);
+}
+
+static void nlmsvc_act_unlock(struct nlm_host *host, struct nlm_file *file)
+{
+       struct nlm_block *block;
+
+restart:
+       down(&file->f_sema);
+       for (block = file->f_blocks; block != NULL; block = block->b_fnext) {
+               if (host != NULL && host != block->b_host)
+                       continue;
+               if (!block->b_queued)
+                       continue;
+               kref_get(&block->b_count);
+               up(&file->f_sema);
+               nlmsvc_unlink_block(block);
+               nlmsvc_release_block(block);
+               goto restart;
+       }
+       up(&file->f_sema);
 }
 
 /*
  * Loop over all blocks and perform the action specified.
  * (NLM_ACT_CHECK handled by nlmsvc_inspect_file).
  */
-int
+void
 nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 {
-       struct nlm_block        *block, *next;
-       /* XXX: Will everything get cleaned up if we don't unlock here? */
+       if (action == NLM_ACT_MARK)
+               nlmsvc_act_mark(host, file);
+       else
+               nlmsvc_act_unlock(host, file);
+}
 
-       down(&file->f_sema);
-       for (block = file->f_blocks; block; block = next) {
-               next = block->b_fnext;
-               if (action == NLM_ACT_MARK)
-                       block->b_host->h_inuse = 1;
-               else if (action == NLM_ACT_UNLOCK) {
-                       if (host == NULL || host == block->b_host)
-                               nlmsvc_delete_block(block, 1);
-               }
+/*
+ * Initialize arguments for GRANTED call. The nlm_rqst structure
+ * has been cleared already.
+ */
+static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
+{
+       locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
+       memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
+       call->a_args.lock.caller = system_utsname.nodename;
+       call->a_args.lock.oh.len = lock->oh.len;
+
+       /* set default data area */
+       call->a_args.lock.oh.data = call->a_owner;
+       call->a_args.lock.svid = lock->fl.fl_pid;
+
+       if (lock->oh.len > NLMCLNT_OHSIZE) {
+               void *data = kmalloc(lock->oh.len, GFP_KERNEL);
+               if (!data)
+                       return 0;
+               call->a_args.lock.oh.data = (u8 *) data;
        }
-       up(&file->f_sema);
-       return 0;
+
+       memcpy(call->a_args.lock.oh.data, lock->oh.data, lock->oh.len);
+       return 1;
+}
+
+static void nlmsvc_freegrantargs(struct nlm_rqst *call)
+{
+       if (call->a_args.lock.oh.data != call->a_owner)
+               kfree(call->a_args.lock.oh.data);
 }
 
 /*
@@ -297,9 +358,9 @@ u32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
                        struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
-       struct file_lock        *conflock;
-       struct nlm_block        *block;
+       struct nlm_block        *block, *newblock = NULL;
        int                     error;
+       u32                     ret;
 
        dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
                                file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -310,69 +371,65 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
                                wait);
 
 
-       /* Get existing block (in case client is busy-waiting) */
-       block = nlmsvc_lookup_block(file, lock, 0);
-
-       lock->fl.fl_flags |= FL_LOCKD;
-
+       lock->fl.fl_flags &= ~FL_SLEEP;
 again:
        /* Lock file against concurrent access */
        down(&file->f_sema);
+       /* Get existing block (in case client is busy-waiting) */
+       block = nlmsvc_lookup_block(file, lock);
+       if (block == NULL) {
+               if (newblock != NULL)
+                       lock = &newblock->b_call->a_args.lock;
+       } else
+               lock = &block->b_call->a_args.lock;
 
-       if (!(conflock = posix_test_lock(file->f_file, &lock->fl))) {
-               error = posix_lock_file(file->f_file, &lock->fl);
+       error = posix_lock_file(file->f_file, &lock->fl);
+       lock->fl.fl_flags &= ~FL_SLEEP;
 
-               if (block)
-                       nlmsvc_delete_block(block, 0);
-               up(&file->f_sema);
+       dprintk("lockd: posix_lock_file returned %d\n", error);
 
-               dprintk("lockd: posix_lock_file returned %d\n", -error);
-               switch(-error) {
+       switch(error) {
                case 0:
-                       return nlm_granted;
-               case EDEADLK:
-                       return nlm_deadlock;
-               case EAGAIN:
-                       return nlm_lck_denied;
+                       ret = nlm_granted;
+                       goto out;
+               case -EAGAIN:
+                       break;
+               case -EDEADLK:
+                       ret = nlm_deadlock;
+                       goto out;
                default:                        /* includes ENOLCK */
-                       return nlm_lck_denied_nolocks;
-               }
+                       ret = nlm_lck_denied_nolocks;
+                       goto out;
        }
 
-       if (!wait) {
-               up(&file->f_sema);
-               return nlm_lck_denied;
-       }
+       ret = nlm_lck_denied;
+       if (!wait)
+               goto out;
 
-       if (posix_locks_deadlock(&lock->fl, conflock)) {
-               up(&file->f_sema);
-               return nlm_deadlock;
-       }
+       ret = nlm_lck_blocked;
+       if (block != NULL)
+               goto out;
 
        /* If we don't have a block, create and initialize it. Then
         * retry because we may have slept in kmalloc. */
        /* We have to release f_sema as nlmsvc_create_block may try to
         * to claim it while doing host garbage collection */
-       if (block == NULL) {
+       if (newblock == NULL) {
                up(&file->f_sema);
                dprintk("lockd: blocking on this lock (allocating).\n");
-               if (!(block = nlmsvc_create_block(rqstp, file, lock, cookie)))
+               if (!(newblock = nlmsvc_create_block(rqstp, file, lock, cookie)))
                        return nlm_lck_denied_nolocks;
                goto again;
        }
 
        /* Append to list of blocked */
-       nlmsvc_insert_block(block, NLM_NEVER);
-
-       if (list_empty(&block->b_call.a_args.lock.fl.fl_block)) {
-               /* Now add block to block list of the conflicting lock
-                  if we haven't done so. */
-               dprintk("lockd: blocking on this lock.\n");
-               posix_block_lock(conflock, &block->b_call.a_args.lock.fl);
-       }
-
+       nlmsvc_insert_block(newblock, NLM_NEVER);
+out:
        up(&file->f_sema);
-       return nlm_lck_blocked;
+       nlmsvc_release_block(newblock);
+       nlmsvc_release_block(block);
+       dprintk("lockd: nlmsvc_lock returned %u\n", ret);
+       return ret;
 }
 
 /*
@@ -382,8 +439,6 @@ u32
 nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
                                       struct nlm_lock *conflock)
 {
-       struct file_lock        *fl;
-
        dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
                                file->f_file->f_dentry->d_inode->i_sb->s_id,
                                file->f_file->f_dentry->d_inode->i_ino,
@@ -391,13 +446,14 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
                                (long long)lock->fl.fl_start,
                                (long long)lock->fl.fl_end);
 
-       if ((fl = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
+       if (posix_test_lock(file->f_file, &lock->fl, &conflock->fl)) {
                dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
-                               fl->fl_type, (long long)fl->fl_start,
-                               (long long)fl->fl_end);
+                               conflock->fl.fl_type,
+                               (long long)conflock->fl.fl_start,
+                               (long long)conflock->fl.fl_end);
                conflock->caller = "somehost";  /* FIXME */
                conflock->oh.len = 0;           /* don't return OH info */
-               conflock->fl = *fl;
+               conflock->svid = conflock->fl.fl_pid;
                return nlm_lck_denied;
        }
 
@@ -453,9 +509,12 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
                                (long long)lock->fl.fl_end);
 
        down(&file->f_sema);
-       if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-               status = nlmsvc_delete_block(block, 1);
+       block = nlmsvc_lookup_block(file, lock);
        up(&file->f_sema);
+       if (block != NULL) {
+               status = nlmsvc_unlink_block(block);
+               nlmsvc_release_block(block);
+       }
        return status ? nlm_lck_denied : nlm_granted;
 }
 
@@ -473,7 +532,7 @@ nlmsvc_notify_blocked(struct file_lock *fl)
 
        dprintk("lockd: VFS unblock notification for block %p\n", fl);
        for (bp = &nlm_blocked; (block = *bp) != 0; bp = &block->b_next) {
-               if (nlm_compare_locks(&block->b_call.a_args.lock.fl, fl)) {
+               if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
                        nlmsvc_insert_block(block, 0);
                        svc_wake_up(block->b_daemon);
                        return;
@@ -508,17 +567,13 @@ static void
 nlmsvc_grant_blocked(struct nlm_block *block)
 {
        struct nlm_file         *file = block->b_file;
-       struct nlm_lock         *lock = &block->b_call.a_args.lock;
-       struct file_lock        *conflock;
+       struct nlm_lock         *lock = &block->b_call->a_args.lock;
        int                     error;
 
        dprintk("lockd: grant blocked lock %p\n", block);
 
-       /* First thing is lock the file */
-       down(&file->f_sema);
-
        /* Unlink block request from list */
-       nlmsvc_remove_block(block);
+       nlmsvc_unlink_block(block);
 
        /* If b_granted is true this means we've been here before.
         * Just retry the grant callback, possibly refreshing the RPC
@@ -529,24 +584,21 @@ nlmsvc_grant_blocked(struct nlm_block *block)
        }
 
        /* Try the lock operation again */
-       if ((conflock = posix_test_lock(file->f_file, &lock->fl)) != NULL) {
-               /* Bummer, we blocked again */
+       lock->fl.fl_flags |= FL_SLEEP;
+       error = posix_lock_file(file->f_file, &lock->fl);
+       lock->fl.fl_flags &= ~FL_SLEEP;
+
+       switch (error) {
+       case 0:
+               break;
+       case -EAGAIN:
                dprintk("lockd: lock still blocked\n");
                nlmsvc_insert_block(block, NLM_NEVER);
-               posix_block_lock(conflock, &lock->fl);
-               up(&file->f_sema);
                return;
-       }
-
-       /* Alright, no conflicting lock. Now lock it for real. If the
-        * following yields an error, this is most probably due to low
-        * memory. Retry the lock in a few seconds.
-        */
-       if ((error = posix_lock_file(file->f_file, &lock->fl)) < 0) {
+       default:
                printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
                                -error, __FUNCTION__);
                nlmsvc_insert_block(block, 10 * HZ);
-               up(&file->f_sema);
                return;
        }
 
@@ -554,17 +606,15 @@ callback:
        /* Lock was granted by VFS. */
        dprintk("lockd: GRANTing blocked lock.\n");
        block->b_granted = 1;
-       block->b_incall  = 1;
 
        /* Schedule next grant callback in 30 seconds */
        nlmsvc_insert_block(block, 30 * HZ);
 
        /* Call the client */
-       nlm_get_host(block->b_call.a_host);
-       if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
+       kref_get(&block->b_count);
+       if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
                                                &nlmsvc_grant_ops) < 0)
-               nlm_release_host(block->b_call.a_host);
-       up(&file->f_sema);
+               nlmsvc_release_block(block);
 }
 
 /*
@@ -578,20 +628,10 @@ callback:
 static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
        struct nlm_rqst         *call = data;
-       struct nlm_block        *block;
+       struct nlm_block        *block = call->a_block;
        unsigned long           timeout;
-       struct sockaddr_in      *peer_addr = RPC_PEERADDR(task->tk_client);
 
        dprintk("lockd: GRANT_MSG RPC callback\n");
-       dprintk("callback: looking for cookie %s, host (%u.%u.%u.%u)\n",
-               nlmdbg_cookie2a(&call->a_args.cookie),
-               NIPQUAD(peer_addr->sin_addr.s_addr));
-       if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) {
-               dprintk("lockd: no block for cookie %s, host (%u.%u.%u.%u)\n",
-                       nlmdbg_cookie2a(&call->a_args.cookie),
-                       NIPQUAD(peer_addr->sin_addr.s_addr));
-               return;
-       }
 
        /* Technically, we should down the file semaphore here. Since we
         * move the block towards the head of the queue only, no harm
@@ -608,13 +648,18 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
        }
        nlmsvc_insert_block(block, timeout);
        svc_wake_up(block->b_daemon);
-       block->b_incall = 0;
+}
 
-       nlm_release_host(call->a_host);
+void nlmsvc_grant_release(void *data)
+{
+       struct nlm_rqst         *call = data;
+
+       nlmsvc_release_block(call->a_block);
 }
 
 static const struct rpc_call_ops nlmsvc_grant_ops = {
        .rpc_call_done = nlmsvc_grant_callback,
+       .rpc_release = nlmsvc_grant_release,
 };
 
 /*
@@ -634,25 +679,17 @@ nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status
                return;
        file = block->b_file;
 
-       file->f_count++;
-       down(&file->f_sema);
-       block = nlmsvc_find_block(cookie, &rqstp->rq_addr);
        if (block) {
                if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
                        /* Try again in a couple of seconds */
                        nlmsvc_insert_block(block, 10 * HZ);
-                       up(&file->f_sema);
                } else {
                        /* Lock is now held by client, or has been rejected.
                         * In both cases, the block should be removed. */
-                       up(&file->f_sema);
-                       if (status == NLM_LCK_GRANTED)
-                               nlmsvc_delete_block(block, 0);
-                       else
-                               nlmsvc_delete_block(block, 1);
+                       nlmsvc_unlink_block(block);
                }
        }
-       nlm_release_file(file);
+       nlmsvc_release_block(block);
 }
 
 /*
@@ -675,10 +712,12 @@ nlmsvc_retry_blocked(void)
                        break;
                dprintk("nlmsvc_retry_blocked(%p, when=%ld, done=%d)\n",
                        block, block->b_when, block->b_done);
+               kref_get(&block->b_count);
                if (block->b_done)
-                       nlmsvc_delete_block(block, 0);
+                       nlmsvc_unlink_block(block);
                else
                        nlmsvc_grant_blocked(block);
+               nlmsvc_release_block(block);
        }
 
        if ((block = nlm_blocked) && block->b_when != NLM_NEVER)
index 35681d9..d210cf3 100644 (file)
 
 #define NLMDBG_FACILITY                NLMDBG_CLIENT
 
-static u32     nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-
-static const struct rpc_call_ops nlmsvc_callback_ops;
-
 #ifdef CONFIG_LOCKD_V4
 static u32
 cast_to_nlm(u32 status, u32 vers)
@@ -261,84 +257,92 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
        return rpc_success;
 }
 
+/*
+ * This is the generic lockd callback for async RPC calls
+ */
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
+{
+       dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+                       -task->tk_status);
+}
+
+static void nlmsvc_callback_release(void *data)
+{
+       nlm_release_call(data);
+}
+
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+       .rpc_call_done = nlmsvc_callback_exit,
+       .rpc_release = nlmsvc_callback_release,
+};
+
 /*
  * `Async' versions of the above service routines. They aren't really,
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int
-nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
-                                            void            *resp)
+static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+               int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
-       struct nlm_res  res;
-       u32             stat;
+       struct nlm_host *host;
+       struct nlm_rqst *call;
+       int stat;
 
-       dprintk("lockd: TEST_MSG      called\n");
-       memset(&res, 0, sizeof(res));
+       host = nlmsvc_lookup_host(rqstp);
+       if (host == NULL)
+               return rpc_system_err;
 
-       if ((stat = nlmsvc_proc_test(rqstp, argp, &res)) == 0)
-               stat = nlmsvc_callback(rqstp, NLMPROC_TEST_RES, &res);
-       return stat;
+       call = nlm_alloc_call(host);
+       if (call == NULL)
+               return rpc_system_err;
+
+       stat = func(rqstp, argp, &call->a_res);
+       if (stat != 0) {
+               nlm_release_call(call);
+               return stat;
+       }
+
+       call->a_flags = RPC_TASK_ASYNC;
+       if (nlm_async_reply(call, proc, &nlmsvc_callback_ops) < 0)
+               return rpc_system_err;
+       return rpc_success;
 }
 
-static int
-nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                             void            *resp)
 {
-       struct nlm_res  res;
-       u32             stat;
+       dprintk("lockd: TEST_MSG      called\n");
+       return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
+}
 
+static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+                                            void            *resp)
+{
        dprintk("lockd: LOCK_MSG      called\n");
-       memset(&res, 0, sizeof(res));
-
-       if ((stat = nlmsvc_proc_lock(rqstp, argp, &res)) == 0)
-               stat = nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, &res);
-       return stat;
+       return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
 }
 
-static int
-nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                               void            *resp)
 {
-       struct nlm_res  res;
-       u32             stat;
-
        dprintk("lockd: CANCEL_MSG    called\n");
-       memset(&res, 0, sizeof(res));
-
-       if ((stat = nlmsvc_proc_cancel(rqstp, argp, &res)) == 0)
-               stat = nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, &res);
-       return stat;
+       return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
 }
 
 static int
 nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
-       struct nlm_res  res;
-       u32             stat;
-
        dprintk("lockd: UNLOCK_MSG    called\n");
-       memset(&res, 0, sizeof(res));
-
-       if ((stat = nlmsvc_proc_unlock(rqstp, argp, &res)) == 0)
-               stat = nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, &res);
-       return stat;
+       return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
 }
 
 static int
 nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
-       struct nlm_res  res;
-       u32             stat;
-
        dprintk("lockd: GRANTED_MSG   called\n");
-       memset(&res, 0, sizeof(res));
-
-       if ((stat = nlmsvc_proc_granted(rqstp, argp, &res)) == 0)
-               stat = nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, &res);
-       return stat;
+       return nlmsvc_callback(rqstp, NLMPROC_GRANTED_RES, argp, nlmsvc_proc_granted);
 }
 
 /*
@@ -496,55 +500,6 @@ nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
        return rpc_success;
 }
 
-/*
- * This is the generic lockd callback for async RPC calls
- */
-static u32
-nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
-{
-       struct nlm_host *host;
-       struct nlm_rqst *call;
-
-       if (!(call = nlmclnt_alloc_call()))
-               return rpc_system_err;
-
-       host = nlmclnt_lookup_host(&rqstp->rq_addr,
-                               rqstp->rq_prot, rqstp->rq_vers);
-       if (!host) {
-               kfree(call);
-               return rpc_system_err;
-       }
-
-       call->a_flags = RPC_TASK_ASYNC;
-       call->a_host  = host;
-       memcpy(&call->a_args, resp, sizeof(*resp));
-
-       if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
-               goto error;
-
-       return rpc_success;
- error:
-       nlm_release_host(host);
-       kfree(call);
-       return rpc_system_err;
-}
-
-static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
-{
-       struct nlm_rqst *call = data;
-
-       if (task->tk_status < 0) {
-               dprintk("lockd: %4d callback failed (errno = %d)\n",
-                                       task->tk_pid, -task->tk_status);
-       }
-       nlm_release_host(call->a_host);
-       kfree(call);
-}
-
-static const struct rpc_call_ops nlmsvc_callback_ops = {
-       .rpc_call_done = nlmsvc_callback_exit,
-};
-
 /*
  * NLM Server procedures.
  */
index 4943fb7..27288c8 100644 (file)
@@ -88,7 +88,7 @@ nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
  * Traverse all shares for a given file (and host).
  * NLM_ACT_CHECK is handled by nlmsvc_inspect_file.
  */
-int
+void
 nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
 {
        struct nlm_share        *share, **shpp;
@@ -106,6 +106,4 @@ nlmsvc_traverse_shares(struct nlm_host *host, struct nlm_file *file, int action)
                }
                shpp = &share->s_next;
        }
-
-       return 0;
 }
index 62f4a38..c7a6e3a 100644 (file)
@@ -182,7 +182,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, int action)
 again:
        file->f_locks = 0;
        for (fl = inode->i_flock; fl; fl = fl->fl_next) {
-               if (!(fl->fl_flags & FL_LOCKD))
+               if (fl->fl_lmops != &nlmsvc_lock_operations)
                        continue;
 
                /* update current lock count */
@@ -224,9 +224,8 @@ nlm_inspect_file(struct nlm_host *host, struct nlm_file *file, int action)
                if (file->f_count || file->f_blocks || file->f_shares)
                        return 1;
        } else {
-               if (nlmsvc_traverse_blocks(host, file, action)
-                || nlmsvc_traverse_shares(host, file, action))
-                       return 1;
+               nlmsvc_traverse_blocks(host, file, action);
+               nlmsvc_traverse_shares(host, file, action);
        }
        return nlm_traverse_locks(host, file, action);
 }
index 200fbda..766ce06 100644 (file)
@@ -131,10 +131,11 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
         || !(p = nlm_decode_fh(p, &lock->fh))
         || !(p = nlm_decode_oh(p, &lock->oh)))
                return NULL;
+       lock->svid  = ntohl(*p++);
 
        locks_init_lock(fl);
        fl->fl_owner = current->files;
-       fl->fl_pid   = ntohl(*p++);
+       fl->fl_pid   = (pid_t)lock->svid;
        fl->fl_flags = FL_POSIX;
        fl->fl_type  = F_RDLCK;         /* as good as anything else */
        start = ntohl(*p++);
@@ -174,7 +175,7 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
        else
                len = loff_t_to_s32(fl->fl_end - fl->fl_start + 1);
 
-       *p++ = htonl(fl->fl_pid);
+       *p++ = htonl(lock->svid);
        *p++ = htonl(start);
        *p++ = htonl(len);
 
@@ -197,7 +198,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
                struct file_lock        *fl = &resp->lock.fl;
 
                *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-               *p++ = htonl(fl->fl_pid);
+               *p++ = htonl(resp->lock.svid);
 
                /* Encode owner handle. */
                if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -298,7 +299,8 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 
        memset(lock, 0, sizeof(*lock));
        locks_init_lock(&lock->fl);
-       lock->fl.fl_pid = ~(u32) 0;
+       lock->svid = ~(u32) 0;
+       lock->fl.fl_pid = (pid_t)lock->svid;
 
        if (!(p = nlm_decode_cookie(p, &argp->cookie))
         || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -415,7 +417,8 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
                memset(&resp->lock, 0, sizeof(resp->lock));
                locks_init_lock(fl);
                excl = ntohl(*p++);
-               fl->fl_pid = ntohl(*p++);
+               resp->lock.svid = ntohl(*p++);
+               fl->fl_pid = (pid_t)resp->lock.svid;
                if (!(p = nlm_decode_oh(p, &resp->lock.oh)))
                        return -EIO;
 
@@ -543,7 +546,9 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
        .p_proc      = NLMPROC_##proc,                                  \
        .p_encode    = (kxdrproc_t) nlmclt_encode_##argtype,            \
        .p_decode    = (kxdrproc_t) nlmclt_decode_##restype,            \
-       .p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2 \
+       .p_bufsiz    = MAX(NLM_##argtype##_sz, NLM_##restype##_sz) << 2,        \
+       .p_statidx   = NLMPROC_##proc,                                  \
+       .p_name      = #proc,                                           \
        }
 
 static struct rpc_procinfo     nlm_procedures[] = {
index fdcf105..36eb175 100644 (file)
@@ -130,10 +130,11 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
         || !(p = nlm4_decode_fh(p, &lock->fh))
         || !(p = nlm4_decode_oh(p, &lock->oh)))
                return NULL;
+       lock->svid  = ntohl(*p++);
 
        locks_init_lock(fl);
        fl->fl_owner = current->files;
-       fl->fl_pid   = ntohl(*p++);
+       fl->fl_pid   = (pid_t)lock->svid;
        fl->fl_flags = FL_POSIX;
        fl->fl_type  = F_RDLCK;         /* as good as anything else */
        p = xdr_decode_hyper(p, &start);
@@ -167,7 +168,7 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
         || (fl->fl_end > NLM4_OFFSET_MAX && fl->fl_end != OFFSET_MAX))
                return NULL;
 
-       *p++ = htonl(fl->fl_pid);
+       *p++ = htonl(lock->svid);
 
        start = loff_t_to_s64(fl->fl_start);
        if (fl->fl_end == OFFSET_MAX)
@@ -198,7 +199,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
                struct file_lock        *fl = &resp->lock.fl;
 
                *p++ = (fl->fl_type == F_RDLCK)? xdr_zero : xdr_one;
-               *p++ = htonl(fl->fl_pid);
+               *p++ = htonl(resp->lock.svid);
 
                /* Encode owner handle. */
                if (!(p = xdr_encode_netobj(p, &resp->lock.oh)))
@@ -212,8 +213,8 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
                
                p = xdr_encode_hyper(p, start);
                p = xdr_encode_hyper(p, len);
-               dprintk("xdr: encode_testres (status %d pid %d type %d start %Ld end %Ld)\n",
-                       resp->status, fl->fl_pid, fl->fl_type,
+               dprintk("xdr: encode_testres (status %u pid %d type %d start %Ld end %Ld)\n",
+                       resp->status, (int)resp->lock.svid, fl->fl_type,
                        (long long)fl->fl_start,  (long long)fl->fl_end);
        }
 
@@ -303,7 +304,8 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 
        memset(lock, 0, sizeof(*lock));
        locks_init_lock(&lock->fl);
-       lock->fl.fl_pid = ~(u32) 0;
+       lock->svid = ~(u32) 0;
+       lock->fl.fl_pid = (pid_t)lock->svid;
 
        if (!(p = nlm4_decode_cookie(p, &argp->cookie))
         || !(p = xdr_decode_string_inplace(p, &lock->caller,
@@ -420,7 +422,8 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
                memset(&resp->lock, 0, sizeof(resp->lock));
                locks_init_lock(fl);
                excl = ntohl(*p++);
-               fl->fl_pid = ntohl(*p++);
+               resp->lock.svid = ntohl(*p++);
+               fl->fl_pid = (pid_t)resp->lock.svid;
                if (!(p = nlm4_decode_oh(p, &resp->lock.oh)))
                        return -EIO;
 
@@ -548,7 +551,9 @@ nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
        .p_proc      = NLMPROC_##proc,                                  \
        .p_encode    = (kxdrproc_t) nlm4clt_encode_##argtype,           \
        .p_decode    = (kxdrproc_t) nlm4clt_decode_##restype,           \
-       .p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2       \
+       .p_bufsiz    = MAX(NLM4_##argtype##_sz, NLM4_##restype##_sz) << 2,      \
+       .p_statidx   = NLMPROC_##proc,                                  \
+       .p_name      = #proc,                                           \
        }
 
 static struct rpc_procinfo     nlm4_procedures[] = {
index 909eab8..56f996e 100644 (file)
@@ -139,10 +139,7 @@ int lease_break_time = 45;
 #define for_each_lock(inode, lockp) \
        for (lockp = &inode->i_flock; *lockp != NULL; lockp = &(*lockp)->fl_next)
 
-LIST_HEAD(file_lock_list);
-
-EXPORT_SYMBOL(file_lock_list);
-
+static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
 
 static kmem_cache_t *filelock_cache;
@@ -153,6 +150,21 @@ static struct file_lock *locks_alloc_lock(void)
        return kmem_cache_alloc(filelock_cache, SLAB_KERNEL);
 }
 
+static void locks_release_private(struct file_lock *fl)
+{
+       if (fl->fl_ops) {
+               if (fl->fl_ops->fl_release_private)
+                       fl->fl_ops->fl_release_private(fl);
+               fl->fl_ops = NULL;
+       }
+       if (fl->fl_lmops) {
+               if (fl->fl_lmops->fl_release_private)
+                       fl->fl_lmops->fl_release_private(fl);
+               fl->fl_lmops = NULL;
+       }
+
+}
+
 /* Free a lock which is not in use. */
 static void locks_free_lock(struct file_lock *fl)
 {
@@ -169,18 +181,7 @@ static void locks_free_lock(struct file_lock *fl)
        if (!list_empty(&fl->fl_link))
                panic("Attempting to free lock on active lock list");
 
-       if (fl->fl_ops) {
-               if (fl->fl_ops->fl_release_private)
-                       fl->fl_ops->fl_release_private(fl);
-               fl->fl_ops = NULL;
-       }
-
-       if (fl->fl_lmops) {
-               if (fl->fl_lmops->fl_release_private)
-                       fl->fl_lmops->fl_release_private(fl);
-               fl->fl_lmops = NULL;
-       }
-
+       locks_release_private(fl);
        kmem_cache_free(filelock_cache, fl);
 }
 
@@ -218,24 +219,46 @@ static void init_once(void *foo, kmem_cache_t *cache, unsigned long flags)
        locks_init_lock(lock);
 }
 
+static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
+{
+       if (fl->fl_ops) {
+               if (fl->fl_ops->fl_copy_lock)
+                       fl->fl_ops->fl_copy_lock(new, fl);
+               new->fl_ops = fl->fl_ops;
+       }
+       if (fl->fl_lmops) {
+               if (fl->fl_lmops->fl_copy_lock)
+                       fl->fl_lmops->fl_copy_lock(new, fl);
+               new->fl_lmops = fl->fl_lmops;
+       }
+}
+
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
-void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
 {
        new->fl_owner = fl->fl_owner;
        new->fl_pid = fl->fl_pid;
-       new->fl_file = fl->fl_file;
+       new->fl_file = NULL;
        new->fl_flags = fl->fl_flags;
        new->fl_type = fl->fl_type;
        new->fl_start = fl->fl_start;
        new->fl_end = fl->fl_end;
+       new->fl_ops = NULL;
+       new->fl_lmops = NULL;
+}
+
+void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+{
+       locks_release_private(new);
+
+       __locks_copy_lock(new, fl);
+       new->fl_file = fl->fl_file;
        new->fl_ops = fl->fl_ops;
        new->fl_lmops = fl->fl_lmops;
-       if (fl->fl_ops && fl->fl_ops->fl_copy_lock)
-               fl->fl_ops->fl_copy_lock(new, fl);
-       if (fl->fl_lmops && fl->fl_lmops->fl_copy_lock)
-               fl->fl_lmops->fl_copy_lock(new, fl);
+
+       locks_copy_private(new, fl);
 }
 
 EXPORT_SYMBOL(locks_copy_lock);
@@ -654,8 +677,9 @@ static int locks_block_on_timeout(struct file_lock *blocker, struct file_lock *w
        return result;
 }
 
-struct file_lock *
-posix_test_lock(struct file *filp, struct file_lock *fl)
+int
+posix_test_lock(struct file *filp, struct file_lock *fl,
+               struct file_lock *conflock)
 {
        struct file_lock *cfl;
 
@@ -666,9 +690,13 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
                if (posix_locks_conflict(cfl, fl))
                        break;
        }
+       if (cfl) {
+               __locks_copy_lock(conflock, cfl);
+               unlock_kernel();
+               return 1;
+       }
        unlock_kernel();
-
-       return (cfl);
+       return 0;
 }
 
 EXPORT_SYMBOL(posix_test_lock);
@@ -904,7 +932,8 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request)
                                fl->fl_start = request->fl_start;
                                fl->fl_end = request->fl_end;
                                fl->fl_type = request->fl_type;
-                               fl->fl_u = request->fl_u;
+                               locks_release_private(fl);
+                               locks_copy_private(fl, request);
                                request = fl;
                                added = 1;
                        }
@@ -1544,7 +1573,7 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
  */
 int fcntl_getlk(struct file *filp, struct flock __user *l)
 {
-       struct file_lock *fl, file_lock;
+       struct file_lock *fl, cfl, file_lock;
        struct flock flock;
        int error;
 
@@ -1568,7 +1597,7 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)
                else
                  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
        } else {
-               fl = posix_test_lock(filp, &file_lock);
+               fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
        }
  
        flock.l_type = F_UNLCK;
@@ -1698,7 +1727,7 @@ out:
  */
 int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
 {
-       struct file_lock *fl, file_lock;
+       struct file_lock *fl, cfl, file_lock;
        struct flock64 flock;
        int error;
 
@@ -1722,7 +1751,7 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)
                else
                  fl = (file_lock.fl_type == F_UNLCK ? NULL : &file_lock);
        } else {
-               fl = posix_test_lock(filp, &file_lock);
+               fl = (posix_test_lock(filp, &file_lock, &cfl) ? &cfl : NULL);
        }
  
        flock.l_type = F_UNLCK;
@@ -1935,21 +1964,6 @@ void locks_remove_flock(struct file *filp)
        unlock_kernel();
 }
 
-/**
- *     posix_block_lock - blocks waiting for a file lock
- *     @blocker: the lock which is blocking
- *     @waiter: the lock which conflicts and has to wait
- *
- * lockd needs to block waiting for locks.
- */
-void
-posix_block_lock(struct file_lock *blocker, struct file_lock *waiter)
-{
-       locks_insert_block(blocker, waiter);
-}
-
-EXPORT_SYMBOL(posix_block_lock);
-
 /**
  *     posix_unblock_lock - stop waiting for a file lock
  *      @filp:   how the file was opened
index 39c81a8..71e75bc 100644 (file)
@@ -399,6 +399,44 @@ struct seq_operations mounts_op = {
        .show   = show_vfsmnt
 };
 
+static int show_vfsstat(struct seq_file *m, void *v)
+{
+       struct vfsmount *mnt = v;
+       int err = 0;
+
+       /* device */
+       if (mnt->mnt_devname) {
+               seq_puts(m, "device ");
+               mangle(m, mnt->mnt_devname);
+       } else
+               seq_puts(m, "no device");
+
+       /* mount point */
+       seq_puts(m, " mounted on ");
+       seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+       seq_putc(m, ' ');
+
+       /* file system type */
+       seq_puts(m, "with fstype ");
+       mangle(m, mnt->mnt_sb->s_type->name);
+
+       /* optional statistics */
+       if (mnt->mnt_sb->s_op->show_stats) {
+               seq_putc(m, ' ');
+               err = mnt->mnt_sb->s_op->show_stats(m, mnt);
+       }
+
+       seq_putc(m, '\n');
+       return err;
+}
+
+struct seq_operations mountstats_op = {
+       .start  = m_start,
+       .next   = m_next,
+       .stop   = m_stop,
+       .show   = show_vfsstat,
+};
+
 /**
  * may_umount_tree - check if a mount tree is busy
  * @mnt: root of mount tree
index fcd9740..99d2cfb 100644 (file)
@@ -55,7 +55,12 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 
        complete(&nfs_callback_info.started);
 
-       while (nfs_callback_info.users != 0 || !signalled()) {
+       for(;;) {
+               if (signalled()) {
+                       if (nfs_callback_info.users == 0)
+                               break;
+                       flush_signals(current);
+               }
                /*
                 * Listen for a request on the socket
                 */
@@ -73,6 +78,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
                svc_process(serv, rqstp);
        }
 
+       svc_exit_thread(rqstp);
        nfs_callback_info.pid = 0;
        complete(&nfs_callback_info.stopped);
        unlock_kernel();
@@ -134,11 +140,13 @@ int nfs_callback_down(void)
 
        lock_kernel();
        down(&nfs_callback_sema);
-       if (--nfs_callback_info.users || nfs_callback_info.pid == 0)
-               goto out;
-       kill_proc(nfs_callback_info.pid, SIGKILL, 1);
-       wait_for_completion(&nfs_callback_info.stopped);
-out:
+       nfs_callback_info.users--;
+       do {
+               if (nfs_callback_info.users != 0 || nfs_callback_info.pid == 0)
+                       break;
+               if (kill_proc(nfs_callback_info.pid, SIGKILL, 1) < 0)
+                       break;
+       } while (wait_for_completion_timeout(&nfs_callback_info.stopped, 5*HZ) == 0);
        up(&nfs_callback_sema);
        unlock_kernel();
        return ret;
index 7c33b9a..05c38cf 100644 (file)
@@ -330,7 +330,7 @@ static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res)
 
 static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res)
 {
-       uint32_t *savep;
+       uint32_t *savep = NULL;
        unsigned status = res->status;
        
        if (unlikely(status != 0))
@@ -358,23 +358,26 @@ static unsigned process_op(struct svc_rqst *rqstp,
                struct xdr_stream *xdr_in, void *argp,
                struct xdr_stream *xdr_out, void *resp)
 {
-       struct callback_op *op;
-       unsigned int op_nr;
+       struct callback_op *op = &callback_ops[0];
+       unsigned int op_nr = OP_CB_ILLEGAL;
        unsigned int status = 0;
        long maxlen;
        unsigned res;
 
        dprintk("%s: start\n", __FUNCTION__);
        status = decode_op_hdr(xdr_in, &op_nr);
-       if (unlikely(status != 0)) {
-               op_nr = OP_CB_ILLEGAL;
-               op = &callback_ops[0];
-       } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) {
-               op_nr = OP_CB_ILLEGAL;
-               op = &callback_ops[0];
-               status = htonl(NFS4ERR_OP_ILLEGAL);
-       } else
-               op = &callback_ops[op_nr];
+       if (likely(status == 0)) {
+               switch (op_nr) {
+                       case OP_CB_GETATTR:
+                       case OP_CB_RECALL:
+                               op = &callback_ops[op_nr];
+                               break;
+                       default:
+                               op_nr = OP_CB_ILLEGAL;
+                               op = &callback_ops[0];
+                               status = htonl(NFS4ERR_OP_ILLEGAL);
+               }
+       }
 
        maxlen = xdr_out->end - xdr_out->p;
        if (maxlen > 0 && maxlen < PAGE_SIZE) {
@@ -416,6 +419,7 @@ static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp
        decode_compound_hdr_arg(&xdr_in, &hdr_arg);
        hdr_res.taglen = hdr_arg.taglen;
        hdr_res.tag = hdr_arg.tag;
+       hdr_res.nops = NULL;
        encode_compound_hdr_res(&xdr_out, &hdr_res);
 
        for (;;) {
index c6f07c1..d3be923 100644 (file)
@@ -421,3 +421,22 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp)
                nfs_free_delegation(delegation);
        }
 }
+
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
+{
+       struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state;
+       struct nfs_inode *nfsi = NFS_I(inode);
+       struct nfs_delegation *delegation;
+       int res = 0;
+
+       if (nfsi->delegation_state == 0)
+               return 0;
+       spin_lock(&clp->cl_lock);
+       delegation = nfsi->delegation;
+       if (delegation != NULL) {
+               memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+               res = 1;
+       }
+       spin_unlock(&clp->cl_lock);
+       return res;
+}
index 7a0b2bf..3858694 100644 (file)
@@ -41,6 +41,7 @@ void nfs_delegation_reap_unclaimed(struct nfs4_client *clp);
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
 int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
index a1554be..06c48b3 100644 (file)
@@ -34,6 +34,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFS_PARANOIA 1
 /* #define NFS_DEBUG_VERBOSE 1 */
@@ -129,6 +130,9 @@ nfs_opendir(struct inode *inode, struct file *filp)
 {
        int res = 0;
 
+       dfprintk(VFS, "NFS: opendir(%s/%ld)\n",
+                       inode->i_sb->s_id, inode->i_ino);
+
        lock_kernel();
        /* Call generic open code in order to cache credentials */
        if (!res)
@@ -172,7 +176,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
        unsigned long   timestamp;
        int             error;
 
-       dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+       dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
+                       __FUNCTION__, (long long)desc->entry->cookie,
+                       page->index);
 
  again:
        timestamp = jiffies;
@@ -244,7 +250,8 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
                        status;
 
        while((status = dir_decode(desc)) == 0) {
-               dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie);
+               dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
+                               __FUNCTION__, (unsigned long long)entry->cookie);
                if (entry->prev_cookie == *desc->dir_cookie)
                        break;
                if (loop_count++ > 200) {
@@ -252,7 +259,6 @@ int find_dirent(nfs_readdir_descriptor_t *desc)
                        schedule();
                }
        }
-       dfprintk(VFS, "NFS: find_dirent() returns %d\n", status);
        return status;
 }
 
@@ -276,7 +282,8 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
                if (status)
                        break;
 
-               dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index);
+               dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
+                               (unsigned long long)entry->cookie, desc->current_index);
 
                if (desc->file->f_pos == desc->current_index) {
                        *desc->dir_cookie = entry->cookie;
@@ -288,7 +295,6 @@ int find_dirent_index(nfs_readdir_descriptor_t *desc)
                        schedule();
                }
        }
-       dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status);
        return status;
 }
 
@@ -303,7 +309,9 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
        struct page     *page;
        int             status;
 
-       dfprintk(VFS, "NFS: find_dirent_page() searching directory page %ld\n", desc->page_index);
+       dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
+                       __FUNCTION__, desc->page_index,
+                       (long long) *desc->dir_cookie);
 
        page = read_cache_page(inode->i_mapping, desc->page_index,
                               (filler_t *)nfs_readdir_filler, desc);
@@ -324,7 +332,7 @@ int find_dirent_page(nfs_readdir_descriptor_t *desc)
        if (status < 0)
                dir_page_release(desc);
  out:
-       dfprintk(VFS, "NFS: find_dirent_page() returns %d\n", status);
+       dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, status);
        return status;
  read_error:
        page_cache_release(page);
@@ -346,13 +354,15 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 
        /* Always search-by-index from the beginning of the cache */
        if (*desc->dir_cookie == 0) {
-               dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos);
+               dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
+                               (long long)desc->file->f_pos);
                desc->page_index = 0;
                desc->entry->cookie = desc->entry->prev_cookie = 0;
                desc->entry->eof = 0;
                desc->current_index = 0;
        } else
-               dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+               dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
+                               (unsigned long long)*desc->dir_cookie);
 
        for (;;) {
                res = find_dirent_page(desc);
@@ -365,7 +375,8 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
                        schedule();
                }
        }
-       dfprintk(VFS, "NFS: readdir_search_pagecache() returned %d\n", res);
+
+       dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __FUNCTION__, res);
        return res;
 }
 
@@ -390,7 +401,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
        int             loop_count = 0,
                        res;
 
-       dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie);
+       dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
+                       (unsigned long long)entry->cookie);
 
        for(;;) {
                unsigned d_type = DT_UNKNOWN;
@@ -427,7 +439,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
        dir_page_release(desc);
        if (dentry != NULL)
                dput(dentry);
-       dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res);
+       dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
+                       (unsigned long long)*desc->dir_cookie, res);
        return res;
 }
 
@@ -453,7 +466,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
        struct page     *page = NULL;
        int             status;
 
-       dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie);
+       dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
+                       (unsigned long long)*desc->dir_cookie);
 
        page = alloc_page(GFP_HIGHUSER);
        if (!page) {
@@ -485,7 +499,8 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
        desc->entry->cookie = desc->entry->prev_cookie = 0;
        desc->entry->eof = 0;
  out:
-       dfprintk(VFS, "NFS: uncached_readdir() returns %d\n", status);
+       dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
+                       __FUNCTION__, status);
        return status;
  out_release:
        dir_page_release(desc);
@@ -507,6 +522,11 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        struct nfs_fattr fattr;
        long            res;
 
+       dfprintk(VFS, "NFS: readdir(%s/%s) starting at cookie %Lu\n",
+                       dentry->d_parent->d_name.name, dentry->d_name.name,
+                       (long long)filp->f_pos);
+       nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
+
        lock_kernel();
 
        res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
@@ -566,9 +586,12 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                }
        }
        unlock_kernel();
-       if (res < 0)
-               return res;
-       return 0;
+       if (res > 0)
+               res = 0;
+       dfprintk(VFS, "NFS: readdir(%s/%s) returns %ld\n",
+                       dentry->d_parent->d_name.name, dentry->d_name.name,
+                       res);
+       return res;
 }
 
 loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
@@ -599,6 +622,10 @@ out:
  */
 int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
 {
+       dfprintk(VFS, "NFS: fsync_dir(%s/%s) datasync %d\n",
+                       dentry->d_parent->d_name.name, dentry->d_name.name,
+                       datasync);
+
        return 0;
 }
 
@@ -713,6 +740,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
        parent = dget_parent(dentry);
        lock_kernel();
        dir = parent->d_inode;
+       nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
        inode = dentry->d_inode;
 
        if (!inode) {
@@ -722,8 +750,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
        }
 
        if (is_bad_inode(inode)) {
-               dfprintk(VFS, "nfs_lookup_validate: %s/%s has dud inode\n",
-                       dentry->d_parent->d_name.name, dentry->d_name.name);
+               dfprintk(LOOKUPCACHE, "%s: %s/%s has dud inode\n",
+                               __FUNCTION__, dentry->d_parent->d_name.name,
+                               dentry->d_name.name);
                goto out_bad;
        }
 
@@ -755,6 +784,9 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
  out_valid:
        unlock_kernel();
        dput(parent);
+       dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is valid\n",
+                       __FUNCTION__, dentry->d_parent->d_name.name,
+                       dentry->d_name.name);
        return 1;
 out_zap_parent:
        nfs_zap_caches(dir);
@@ -771,6 +803,9 @@ out_zap_parent:
        d_drop(dentry);
        unlock_kernel();
        dput(parent);
+       dfprintk(LOOKUPCACHE, "NFS: %s(%s/%s) is invalid\n",
+                       __FUNCTION__, dentry->d_parent->d_name.name,
+                       dentry->d_name.name);
        return 0;
 }
 
@@ -844,6 +879,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 
        dfprintk(VFS, "NFS: lookup(%s/%s)\n",
                dentry->d_parent->d_name.name, dentry->d_name.name);
+       nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
 
        res = ERR_PTR(-ENAMETOOLONG);
        if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
@@ -865,9 +901,9 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
                res = ERR_PTR(error);
                goto out_unlock;
        }
-       res = ERR_PTR(-EACCES);
        inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr);
-       if (!inode)
+       res = (struct dentry *)inode;
+       if (IS_ERR(res))
                goto out_unlock;
 no_entry:
        res = d_add_unique(dentry, inode);
@@ -912,6 +948,9 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
        struct dentry *res = NULL;
        int error;
 
+       dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
+
        /* Check that we are indeed trying to open this file */
        if (!is_atomic_open(dir, nd))
                goto no_open;
@@ -1057,7 +1096,7 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
                return NULL;
        dentry->d_op = NFS_PROTO(dir)->dentry_ops;
        inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
-       if (!inode) {
+       if (IS_ERR(inode)) {
                dput(dentry);
                return NULL;
        }
@@ -1095,9 +1134,9 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
                if (error < 0)
                        goto out_err;
        }
-       error = -ENOMEM;
        inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
-       if (inode == NULL)
+       error = PTR_ERR(inode);
+       if (IS_ERR(inode))
                goto out_err;
        d_instantiate(dentry, inode);
        return 0;
@@ -1119,8 +1158,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
        int error;
        int open_flags = 0;
 
-       dfprintk(VFS, "NFS: create(%s/%ld, %s\n", dir->i_sb->s_id, 
-               dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
        attr.ia_mode = mode;
        attr.ia_valid = ATTR_MODE;
@@ -1153,8 +1192,8 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
        struct iattr attr;
        int status;
 
-       dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id,
-               dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: mknod(%s/%ld), %s\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
        if (!new_valid_dev(rdev))
                return -EINVAL;
@@ -1186,8 +1225,8 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        struct iattr attr;
        int error;
 
-       dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id,
-               dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: mkdir(%s/%ld), %s\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
        attr.ia_valid = ATTR_MODE;
        attr.ia_mode = mode | S_IFDIR;
@@ -1212,8 +1251,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
        int error;
 
-       dfprintk(VFS, "NFS: rmdir(%s/%ld, %s\n", dir->i_sb->s_id,
-               dir->i_ino, dentry->d_name.name);
+       dfprintk(VFS, "NFS: rmdir(%s/%ld), %s\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
        lock_kernel();
        nfs_begin_data_update(dir);
@@ -1241,6 +1280,7 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
        dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
                dentry->d_parent->d_name.name, dentry->d_name.name, 
                atomic_read(&dentry->d_count));
+       nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
 
 #ifdef NFS_PARANOIA
 if (!dentry->d_inode)
@@ -1268,8 +1308,8 @@ dentry->d_parent->d_name.name, dentry->d_name.name);
                sillycounter++;
                sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
 
-               dfprintk(VFS, "trying to rename %s to %s\n",
-                        dentry->d_name.name, silly);
+               dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+                               dentry->d_name.name, silly);
                
                sdentry = lookup_one_len(silly, dentry->d_parent, slen);
                /*
@@ -1640,6 +1680,8 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
        struct rpc_cred *cred;
        int res = 0;
 
+       nfs_inc_stats(inode, NFSIOS_VFSACCESS);
+
        if (mask == 0)
                goto out;
        /* Is this sys_access() ? */
@@ -1679,13 +1721,15 @@ force_lookup:
                res = PTR_ERR(cred);
        unlock_kernel();
 out:
+       dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
+               inode->i_sb->s_id, inode->i_ino, mask, res);
        return res;
 out_notsup:
        res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
        if (res == 0)
                res = generic_permission(inode, mask, NULL);
        unlock_kernel();
-       return res;
+       goto out;
 }
 
 /*
index 4e9b3a1..cbef57a 100644 (file)
@@ -7,11 +7,11 @@
  *
  * There are important applications whose performance or correctness
  * depends on uncached access to file data.  Database clusters
- * (multiple copies of the same instance running on separate hosts) 
+ * (multiple copies of the same instance running on separate hosts)
  * implement their own cache coherency protocol that subsumes file
- * system cache protocols.  Applications that process datasets 
- * considerably larger than the client's memory do not always benefit 
- * from a local cache.  A streaming video server, for instance, has no 
+ * system cache protocols.  Applications that process datasets
+ * considerably larger than the client's memory do not always benefit
+ * from a local cache.  A streaming video server, for instance, has no
  * need to cache the contents of a file.
  *
  * When an application requests uncached I/O, all read and write requests
@@ -34,6 +34,7 @@
  * 08 Jun 2003 Port to 2.5 APIs  --cel
  * 31 Mar 2004 Handle direct I/O without VFS support  --cel
  * 15 Sep 2004 Parallel async reads  --cel
+ * 04 May 2005 support O_DIRECT with aio  --cel
  *
  */
 
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY                NFSDBG_VFS
-#define MAX_DIRECTIO_SIZE      (4096UL << PAGE_SHIFT)
 
-static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty);
 static kmem_cache_t *nfs_direct_cachep;
 
 /*
@@ -65,38 +66,78 @@ static kmem_cache_t *nfs_direct_cachep;
  */
 struct nfs_direct_req {
        struct kref             kref;           /* release manager */
-       struct list_head        list;           /* nfs_read_data structs */
-       wait_queue_head_t       wait;           /* wait for i/o completion */
+
+       /* I/O parameters */
+       struct list_head        list,           /* nfs_read/write_data structs */
+                               rewrite_list;   /* saved nfs_write_data structs */
+       struct nfs_open_context *ctx;           /* file open context info */
+       struct kiocb *          iocb;           /* controlling i/o request */
+       struct inode *          inode;          /* target file of i/o */
+       unsigned long           user_addr;      /* location of user's buffer */
+       size_t                  user_count;     /* total bytes to move */
+       loff_t                  pos;            /* starting offset in file */
        struct page **          pages;          /* pages in our buffer */
        unsigned int            npages;         /* count of pages */
-       atomic_t                complete,       /* i/os we're waiting for */
-                               count,          /* bytes actually processed */
+
+       /* completion state */
+       spinlock_t              lock;           /* protect completion state */
+       int                     outstanding;    /* i/os we're waiting for */
+       ssize_t                 count,          /* bytes actually processed */
                                error;          /* any reported error */
+       struct completion       completion;     /* wait for i/o completion */
+
+       /* commit state */
+       struct nfs_write_data * commit_data;    /* special write_data for commits */
+       int                     flags;
+#define NFS_ODIRECT_DO_COMMIT          (1)     /* an unstable reply was received */
+#define NFS_ODIRECT_RESCHED_WRITES     (2)     /* write verification failed */
+       struct nfs_writeverf    verf;           /* unstable write verifier */
 };
 
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync);
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode);
 
 /**
- * nfs_get_user_pages - find and set up pages underlying user's buffer
- * rw: direction (read or write)
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * @pages: returned array of page struct pointers underlying user's buffer
+ * nfs_direct_IO - NFS address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of this routine in the address space ops vector means
+ * the NFS client supports direct I/O.  However, we shunt off direct
+ * read and write requests before the VFS gets them, so this method
+ * should never be called.
  */
-static inline int
-nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
-               struct page ***pages)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+{
+       struct dentry *dentry = iocb->ki_filp->f_dentry;
+
+       dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
+                       dentry->d_name.name, (long long) pos, nr_segs);
+
+       return -EINVAL;
+}
+
+static void nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+{
+       int i;
+       for (i = 0; i < npages; i++) {
+               struct page *page = pages[i];
+               if (do_dirty && !PageCompound(page))
+                       set_page_dirty_lock(page);
+               page_cache_release(page);
+       }
+       kfree(pages);
+}
+
+static inline int nfs_get_user_pages(int rw, unsigned long user_addr, size_t size, struct page ***pages)
 {
        int result = -ENOMEM;
        unsigned long page_count;
        size_t array_size;
 
-       /* set an arbitrary limit to prevent type overflow */
-       /* XXX: this can probably be as large as INT_MAX */
-       if (size > MAX_DIRECTIO_SIZE) {
-               *pages = NULL;
-               return -EFBIG;
-       }
-
        page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT;
        page_count -= user_addr >> PAGE_SHIFT;
 
@@ -108,75 +149,117 @@ nfs_get_user_pages(int rw, unsigned long user_addr, size_t size,
                                        page_count, (rw == READ), 0,
                                        *pages, NULL);
                up_read(&current->mm->mmap_sem);
-               /*
-                * If we got fewer pages than expected from get_user_pages(),
-                * the user buffer runs off the end of a mapping; return EFAULT.
-                */
-               if (result >= 0 && result < page_count) {
-                       nfs_free_user_pages(*pages, result, 0);
+               if (result != page_count) {
+                       /*
+                        * If we got fewer pages than expected from
+                        * get_user_pages(), the user buffer runs off the
+                        * end of a mapping; return EFAULT.
+                        */
+                       if (result >= 0) {
+                               nfs_free_user_pages(*pages, result, 0);
+                               result = -EFAULT;
+                       } else
+                               kfree(*pages);
                        *pages = NULL;
-                       result = -EFAULT;
                }
        }
        return result;
 }
 
-/**
- * nfs_free_user_pages - tear down page struct array
- * @pages: array of page struct pointers underlying target buffer
- * @npages: number of pages in the array
- * @do_dirty: dirty the pages as we release them
- */
-static void
-nfs_free_user_pages(struct page **pages, int npages, int do_dirty)
+static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
 {
-       int i;
-       for (i = 0; i < npages; i++) {
-               struct page *page = pages[i];
-               if (do_dirty && !PageCompound(page))
-                       set_page_dirty_lock(page);
-               page_cache_release(page);
-       }
-       kfree(pages);
+       struct nfs_direct_req *dreq;
+
+       dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+       if (!dreq)
+               return NULL;
+
+       kref_init(&dreq->kref);
+       init_completion(&dreq->completion);
+       INIT_LIST_HEAD(&dreq->list);
+       INIT_LIST_HEAD(&dreq->rewrite_list);
+       dreq->iocb = NULL;
+       dreq->ctx = NULL;
+       spin_lock_init(&dreq->lock);
+       dreq->outstanding = 0;
+       dreq->count = 0;
+       dreq->error = 0;
+       dreq->flags = 0;
+
+       return dreq;
 }
 
-/**
- * nfs_direct_req_release - release  nfs_direct_req structure for direct read
- * @kref: kref object embedded in an nfs_direct_req structure
- *
- */
 static void nfs_direct_req_release(struct kref *kref)
 {
        struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+
+       if (dreq->ctx != NULL)
+               put_nfs_open_context(dreq->ctx);
        kmem_cache_free(nfs_direct_cachep, dreq);
 }
 
-/**
- * nfs_direct_read_alloc - allocate nfs_read_data structures for direct read
- * @count: count of bytes for the read request
- * @rsize: local rsize setting
+/*
+ * Collects and returns the final error value/byte-count.
+ */
+static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
+{
+       ssize_t result = -EIOCBQUEUED;
+
+       /* Async requests don't wait here */
+       if (dreq->iocb)
+               goto out;
+
+       result = wait_for_completion_interruptible(&dreq->completion);
+
+       if (!result)
+               result = dreq->error;
+       if (!result)
+               result = dreq->count;
+
+out:
+       kref_put(&dreq->kref, nfs_direct_req_release);
+       return (ssize_t) result;
+}
+
+/*
+ * We must hold a reference to all the pages in this direct read request
+ * until the RPCs complete.  This could be long *after* we are woken up in
+ * nfs_direct_wait (for instance, if someone hits ^C on a slow server).
  *
+ * In addition, synchronous I/O uses a stack-allocated iocb.  Thus we
+ * can't trust the iocb is still valid here if this is a synchronous
+ * request.  If the waiter is woken prematurely, the iocb is long gone.
+ */
+static void nfs_direct_complete(struct nfs_direct_req *dreq)
+{
+       nfs_free_user_pages(dreq->pages, dreq->npages, 1);
+
+       if (dreq->iocb) {
+               long res = (long) dreq->error;
+               if (!res)
+                       res = (long) dreq->count;
+               aio_complete(dreq->iocb, res, 0);
+       }
+       complete_all(&dreq->completion);
+
+       kref_put(&dreq->kref, nfs_direct_req_release);
+}
+
+/*
  * Note we also set the number of requests we have in the dreq when we are
  * done.  This prevents races with I/O completion so we will always wait
  * until all requests have been dispatched and completed.
  */
-static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int rsize)
+static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, size_t rsize)
 {
        struct list_head *list;
        struct nfs_direct_req *dreq;
-       unsigned int reads = 0;
        unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-       dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
+       dreq = nfs_direct_req_alloc();
        if (!dreq)
                return NULL;
 
-       kref_init(&dreq->kref);
-       init_waitqueue_head(&dreq->wait);
-       INIT_LIST_HEAD(&dreq->list);
-       atomic_set(&dreq->count, 0);
-       atomic_set(&dreq->error, 0);
-
        list = &dreq->list;
        for(;;) {
                struct nfs_read_data *data = nfs_readdata_alloc(rpages);
@@ -196,72 +279,70 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
                list_add(&data->pages, list);
 
                data->req = (struct nfs_page *) dreq;
-               reads++;
+               dreq->outstanding++;
                if (nbytes <= rsize)
                        break;
                nbytes -= rsize;
        }
        kref_get(&dreq->kref);
-       atomic_set(&dreq->complete, reads);
        return dreq;
 }
 
-/**
- * nfs_direct_read_result - handle a read reply for a direct read request
- * @data: address of NFS READ operation control block
- * @status: status of this NFS READ operation
- *
- * We must hold a reference to all the pages in this direct read request
- * until the RPCs complete.  This could be long *after* we are woken up in
- * nfs_direct_read_wait (for instance, if someone hits ^C on a slow server).
- */
-static void nfs_direct_read_result(struct nfs_read_data *data, int status)
+static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
+       struct nfs_read_data *data = calldata;
        struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-       if (likely(status >= 0))
-               atomic_add(data->res.count, &dreq->count);
+       if (nfs_readpage_result(task, data) != 0)
+               return;
+
+       spin_lock(&dreq->lock);
+
+       if (likely(task->tk_status >= 0))
+               dreq->count += data->res.count;
        else
-               atomic_set(&dreq->error, status);
+               dreq->error = task->tk_status;
 
-       if (unlikely(atomic_dec_and_test(&dreq->complete))) {
-               nfs_free_user_pages(dreq->pages, dreq->npages, 1);
-               wake_up(&dreq->wait);
-               kref_put(&dreq->kref, nfs_direct_req_release);
+       if (--dreq->outstanding) {
+               spin_unlock(&dreq->lock);
+               return;
        }
+
+       spin_unlock(&dreq->lock);
+       nfs_direct_complete(dreq);
 }
 
-/**
- * nfs_direct_read_schedule - dispatch NFS READ operations for a direct read
- * @dreq: address of nfs_direct_req struct for this request
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- *
+static const struct rpc_call_ops nfs_read_direct_ops = {
+       .rpc_call_done = nfs_direct_read_result,
+       .rpc_release = nfs_readdata_release,
+};
+
+/*
  * For each nfs_read_data struct that was allocated on the list, dispatch
  * an NFS READ operation
  */
-static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
-               struct inode *inode, struct nfs_open_context *ctx,
-               unsigned long user_addr, size_t count, loff_t file_offset)
+static void nfs_direct_read_schedule(struct nfs_direct_req *dreq)
 {
+       struct nfs_open_context *ctx = dreq->ctx;
+       struct inode *inode = ctx->dentry->d_inode;
        struct list_head *list = &dreq->list;
        struct page **pages = dreq->pages;
+       size_t count = dreq->user_count;
+       loff_t pos = dreq->pos;
+       size_t rsize = NFS_SERVER(inode)->rsize;
        unsigned int curpage, pgbase;
-       unsigned int rsize = NFS_SERVER(inode)->rsize;
 
        curpage = 0;
-       pgbase = user_addr & ~PAGE_MASK;
+       pgbase = dreq->user_addr & ~PAGE_MASK;
        do {
                struct nfs_read_data *data;
-               unsigned int bytes;
+               size_t bytes;
 
                bytes = rsize;
                if (count < rsize)
                        bytes = count;
 
+               BUG_ON(list_empty(list));
                data = list_entry(list->next, struct nfs_read_data, pages);
                list_del_init(&data->pages);
 
@@ -269,7 +350,7 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
                data->cred = ctx->cred;
                data->args.fh = NFS_FH(inode);
                data->args.context = ctx;
-               data->args.offset = file_offset;
+               data->args.offset = pos;
                data->args.pgbase = pgbase;
                data->args.pages = &pages[curpage];
                data->args.count = bytes;
@@ -277,77 +358,38 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
                data->res.eof = 0;
                data->res.count = bytes;
 
+               rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+                               &nfs_read_direct_ops, data);
                NFS_PROTO(inode)->read_setup(data);
 
                data->task.tk_cookie = (unsigned long) inode;
-               data->complete = nfs_direct_read_result;
 
                lock_kernel();
                rpc_execute(&data->task);
                unlock_kernel();
 
-               dfprintk(VFS, "NFS: %4d initiated direct read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+               dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
                                data->task.tk_pid,
                                inode->i_sb->s_id,
                                (long long)NFS_FILEID(inode),
                                bytes,
                                (unsigned long long)data->args.offset);
 
-               file_offset += bytes;
+               pos += bytes;
                pgbase += bytes;
                curpage += pgbase >> PAGE_SHIFT;
                pgbase &= ~PAGE_MASK;
 
                count -= bytes;
        } while (count != 0);
+       BUG_ON(!list_empty(list));
 }
 
-/**
- * nfs_direct_read_wait - wait for I/O completion for direct reads
- * @dreq: request on which we are to wait
- * @intr: whether or not this wait can be interrupted
- *
- * Collects and returns the final error value/byte-count.
- */
-static ssize_t nfs_direct_read_wait(struct nfs_direct_req *dreq, int intr)
-{
-       int result = 0;
-
-       if (intr) {
-               result = wait_event_interruptible(dreq->wait,
-                                       (atomic_read(&dreq->complete) == 0));
-       } else {
-               wait_event(dreq->wait, (atomic_read(&dreq->complete) == 0));
-       }
-
-       if (!result)
-               result = atomic_read(&dreq->error);
-       if (!result)
-               result = atomic_read(&dreq->count);
-
-       kref_put(&dreq->kref, nfs_direct_req_release);
-       return (ssize_t) result;
-}
-
-/**
- * nfs_direct_read_seg - Read in one iov segment.  Generate separate
- *                        read RPCs for each "rsize" bytes.
- * @inode: target inode
- * @ctx: target file open context
- * @user_addr: starting address of this segment of user's buffer
- * @count: size of this segment
- * @file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * @nr_pages: number of pages in the array
- *
- */
-static ssize_t nfs_direct_read_seg(struct inode *inode,
-               struct nfs_open_context *ctx, unsigned long user_addr,
-               size_t count, loff_t file_offset, struct page **pages,
-               unsigned int nr_pages)
+static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, unsigned int nr_pages)
 {
        ssize_t result;
        sigset_t oldset;
+       struct inode *inode = iocb->ki_filp->f_mapping->host;
        struct rpc_clnt *clnt = NFS_CLIENT(inode);
        struct nfs_direct_req *dreq;
 
@@ -355,284 +397,350 @@ static ssize_t nfs_direct_read_seg(struct inode *inode,
        if (!dreq)
                return -ENOMEM;
 
+       dreq->user_addr = user_addr;
+       dreq->user_count = count;
+       dreq->pos = pos;
        dreq->pages = pages;
        dreq->npages = nr_pages;
+       dreq->inode = inode;
+       dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+       if (!is_sync_kiocb(iocb))
+               dreq->iocb = iocb;
 
+       nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, count);
        rpc_clnt_sigmask(clnt, &oldset);
-       nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count,
-                                file_offset);
-       result = nfs_direct_read_wait(dreq, clnt->cl_intr);
+       nfs_direct_read_schedule(dreq);
+       result = nfs_direct_wait(dreq);
        rpc_clnt_sigunmask(clnt, &oldset);
 
        return result;
 }
 
-/**
- * nfs_direct_read - For each iov segment, map the user's buffer
- *                   then generate read RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * We've already pushed out any non-direct writes so that this read
- * will see them when we read from the server.
- */
-static ssize_t
-nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx,
-               const struct iovec *iov, loff_t file_offset,
-               unsigned long nr_segs)
+static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 {
-       ssize_t tot_bytes = 0;
-       unsigned long seg = 0;
-
-       while ((seg < nr_segs) && (tot_bytes >= 0)) {
-               ssize_t result;
-               int page_count;
-               struct page **pages;
-               const struct iovec *vec = &iov[seg++];
-               unsigned long user_addr = (unsigned long) vec->iov_base;
-               size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(READ, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-                       if (tot_bytes > 0)
-                               break;
-                        return page_count;
-                }
+       list_splice_init(&dreq->rewrite_list, &dreq->list);
+       while (!list_empty(&dreq->list)) {
+               struct nfs_write_data *data = list_entry(dreq->list.next, struct nfs_write_data, pages);
+               list_del(&data->pages);
+               nfs_writedata_release(data);
+       }
+}
 
-               result = nfs_direct_read_seg(inode, ctx, user_addr, size,
-                               file_offset, pages, page_count);
+#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
+{
+       struct list_head *pos;
 
-               if (result <= 0) {
-                       if (tot_bytes > 0)
-                               break;
-                       return result;
-               }
-               tot_bytes += result;
-               file_offset += result;
-               if (result < size)
-                       break;
+       list_splice_init(&dreq->rewrite_list, &dreq->list);
+       list_for_each(pos, &dreq->list)
+               dreq->outstanding++;
+       dreq->count = 0;
+
+       nfs_direct_write_schedule(dreq, FLUSH_STABLE);
+}
+
+static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
+{
+       struct nfs_write_data *data = calldata;
+       struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+
+       /* Call the NFS version-specific code */
+       if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+               return;
+       if (unlikely(task->tk_status < 0)) {
+               dreq->error = task->tk_status;
+               dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+       }
+       if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+               dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+               dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
        }
 
-       return tot_bytes;
+       dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+       nfs_direct_write_complete(dreq, data->inode);
 }
 
-/**
- * nfs_direct_write_seg - Write out one iov segment.  Generate separate
- *                        write RPCs for each "wsize" bytes, then commit.
- * @inode: target inode
- * @ctx: target file open context
- * user_addr: starting address of this segment of user's buffer
- * count: size of this segment
- * file_offset: offset in file to begin the operation
- * @pages: array of addresses of page structs defining user's buffer
- * nr_pages: size of pages array
- */
-static ssize_t nfs_direct_write_seg(struct inode *inode,
-               struct nfs_open_context *ctx, unsigned long user_addr,
-               size_t count, loff_t file_offset, struct page **pages,
-               int nr_pages)
+static const struct rpc_call_ops nfs_commit_direct_ops = {
+       .rpc_call_done = nfs_direct_commit_result,
+       .rpc_release = nfs_commit_release,
+};
+
+static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
-       const unsigned int wsize = NFS_SERVER(inode)->wsize;
-       size_t request;
-       int curpage, need_commit;
-       ssize_t result, tot_bytes;
-       struct nfs_writeverf first_verf;
-       struct nfs_write_data *wdata;
-
-       wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
-       if (!wdata)
-               return -ENOMEM;
+       struct nfs_write_data *data = dreq->commit_data;
+       struct rpc_task *task = &data->task;
 
-       wdata->inode = inode;
-       wdata->cred = ctx->cred;
-       wdata->args.fh = NFS_FH(inode);
-       wdata->args.context = ctx;
-       wdata->args.stable = NFS_UNSTABLE;
-       if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
-               wdata->args.stable = NFS_FILE_SYNC;
-       wdata->res.fattr = &wdata->fattr;
-       wdata->res.verf = &wdata->verf;
+       data->inode = dreq->inode;
+       data->cred = dreq->ctx->cred;
 
-       nfs_begin_data_update(inode);
-retry:
-       need_commit = 0;
-       tot_bytes = 0;
-       curpage = 0;
-       request = count;
-       wdata->args.pgbase = user_addr & ~PAGE_MASK;
-       wdata->args.offset = file_offset;
-       do {
-               wdata->args.count = request;
-               if (wdata->args.count > wsize)
-                       wdata->args.count = wsize;
-               wdata->args.pages = &pages[curpage];
+       data->args.fh = NFS_FH(data->inode);
+       data->args.offset = dreq->pos;
+       data->args.count = dreq->user_count;
+       data->res.count = 0;
+       data->res.fattr = &data->fattr;
+       data->res.verf = &data->verf;
 
-               dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n",
-                       wdata->args.count, (long long) wdata->args.offset,
-                       user_addr + tot_bytes, wdata->args.pgbase, curpage);
+       rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
+                               &nfs_commit_direct_ops, data);
+       NFS_PROTO(data->inode)->commit_setup(data, 0);
 
-               lock_kernel();
-               result = NFS_PROTO(inode)->write(wdata);
-               unlock_kernel();
+       data->task.tk_priority = RPC_PRIORITY_NORMAL;
+       data->task.tk_cookie = (unsigned long)data->inode;
+       /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
+       dreq->commit_data = NULL;
 
-               if (result <= 0) {
-                       if (tot_bytes > 0)
-                               break;
-                       goto out;
-               }
+       dprintk("NFS: %5u initiated commit call\n", task->tk_pid);
 
-               if (tot_bytes == 0)
-                       memcpy(&first_verf.verifier, &wdata->verf.verifier,
-                                               sizeof(first_verf.verifier));
-               if (wdata->verf.committed != NFS_FILE_SYNC) {
-                       need_commit = 1;
-                       if (memcmp(&first_verf.verifier, &wdata->verf.verifier,
-                                       sizeof(first_verf.verifier)))
-                               goto sync_retry;
-               }
+       lock_kernel();
+       rpc_execute(&data->task);
+       unlock_kernel();
+}
 
-               tot_bytes += result;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+       int flags = dreq->flags;
 
-               /* in case of a short write: stop now, let the app recover */
-               if (result < wdata->args.count)
+       dreq->flags = 0;
+       switch (flags) {
+               case NFS_ODIRECT_DO_COMMIT:
+                       nfs_direct_commit_schedule(dreq);
                        break;
+               case NFS_ODIRECT_RESCHED_WRITES:
+                       nfs_direct_write_reschedule(dreq);
+                       break;
+               default:
+                       nfs_end_data_update(inode);
+                       if (dreq->commit_data != NULL)
+                               nfs_commit_free(dreq->commit_data);
+                       nfs_direct_free_writedata(dreq);
+                       nfs_direct_complete(dreq);
+       }
+}
 
-               wdata->args.offset += result;
-               wdata->args.pgbase += result;
-               curpage += wdata->args.pgbase >> PAGE_SHIFT;
-               wdata->args.pgbase &= ~PAGE_MASK;
-               request -= result;
-       } while (request != 0);
+static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+       dreq->commit_data = nfs_commit_alloc(0);
+       if (dreq->commit_data != NULL)
+               dreq->commit_data->req = (struct nfs_page *) dreq;
+}
+#else
+static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
+{
+       dreq->commit_data = NULL;
+}
 
-       /*
-        * Commit data written so far, even in the event of an error
-        */
-       if (need_commit) {
-               wdata->args.count = tot_bytes;
-               wdata->args.offset = file_offset;
+static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
+{
+       nfs_end_data_update(inode);
+       nfs_direct_free_writedata(dreq);
+       nfs_direct_complete(dreq);
+}
+#endif
 
-               lock_kernel();
-               result = NFS_PROTO(inode)->commit(wdata);
-               unlock_kernel();
+static struct nfs_direct_req *nfs_direct_write_alloc(size_t nbytes, size_t wsize)
+{
+       struct list_head *list;
+       struct nfs_direct_req *dreq;
+       unsigned int wpages = (wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+       dreq = nfs_direct_req_alloc();
+       if (!dreq)
+               return NULL;
+
+       list = &dreq->list;
+       for(;;) {
+               struct nfs_write_data *data = nfs_writedata_alloc(wpages);
 
-               if (result < 0 || memcmp(&first_verf.verifier,
-                                        &wdata->verf.verifier,
-                                        sizeof(first_verf.verifier)) != 0)
-                       goto sync_retry;
+               if (unlikely(!data)) {
+                       while (!list_empty(list)) {
+                               data = list_entry(list->next,
+                                                 struct nfs_write_data, pages);
+                               list_del(&data->pages);
+                               nfs_writedata_free(data);
+                       }
+                       kref_put(&dreq->kref, nfs_direct_req_release);
+                       return NULL;
+               }
+
+               INIT_LIST_HEAD(&data->pages);
+               list_add(&data->pages, list);
+
+               data->req = (struct nfs_page *) dreq;
+               dreq->outstanding++;
+               if (nbytes <= wsize)
+                       break;
+               nbytes -= wsize;
        }
-       result = tot_bytes;
 
-out:
-       nfs_end_data_update(inode);
-       nfs_writedata_free(wdata);
-       return result;
+       nfs_alloc_commit_data(dreq);
 
-sync_retry:
-       wdata->args.stable = NFS_FILE_SYNC;
-       goto retry;
+       kref_get(&dreq->kref);
+       return dreq;
 }
 
-/**
- * nfs_direct_write - For each iov segment, map the user's buffer
- *                    then generate write and commit RPCs.
- * @inode: target inode
- * @ctx: target file open context
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
- * Upon return, generic_file_direct_IO invalidates any cached pages
- * that non-direct readers might access, so they will pick up these
- * writes immediately.
- */
-static ssize_t nfs_direct_write(struct inode *inode,
-               struct nfs_open_context *ctx, const struct iovec *iov,
-               loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
-       ssize_t tot_bytes = 0;
-       unsigned long seg = 0;
-
-       while ((seg < nr_segs) && (tot_bytes >= 0)) {
-               ssize_t result;
-               int page_count;
-               struct page **pages;
-               const struct iovec *vec = &iov[seg++];
-               unsigned long user_addr = (unsigned long) vec->iov_base;
-               size_t size = vec->iov_len;
-
-                page_count = nfs_get_user_pages(WRITE, user_addr, size, &pages);
-                if (page_count < 0) {
-                        nfs_free_user_pages(pages, 0, 0);
-                       if (tot_bytes > 0)
-                               break;
-                        return page_count;
-                }
+       struct nfs_write_data *data = calldata;
+       struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+       int status = task->tk_status;
+
+       if (nfs_writeback_done(task, data) != 0)
+               return;
+
+       spin_lock(&dreq->lock);
 
-               result = nfs_direct_write_seg(inode, ctx, user_addr, size,
-                               file_offset, pages, page_count);
-               nfs_free_user_pages(pages, page_count, 0);
+       if (likely(status >= 0))
+               dreq->count += data->res.count;
+       else
+               dreq->error = task->tk_status;
 
-               if (result <= 0) {
-                       if (tot_bytes > 0)
+       if (data->res.verf->committed != NFS_FILE_SYNC) {
+               switch (dreq->flags) {
+                       case 0:
+                               memcpy(&dreq->verf, &data->verf, sizeof(dreq->verf));
+                               dreq->flags = NFS_ODIRECT_DO_COMMIT;
                                break;
-                       return result;
+                       case NFS_ODIRECT_DO_COMMIT:
+                               if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
+                                       dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+                                       dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+                               }
                }
-               tot_bytes += result;
-               file_offset += result;
-               if (result < size)
-                       break;
        }
-       return tot_bytes;
+       /* In case we have to resend */
+       data->args.stable = NFS_FILE_SYNC;
+
+       spin_unlock(&dreq->lock);
 }
 
-/**
- * nfs_direct_IO - NFS address space operation for direct I/O
- * rw: direction (read or write)
- * @iocb: target I/O control block
- * @iov: array of vectors that define I/O buffer
- * file_offset: offset in file to begin the operation
- * nr_segs: size of iovec array
- *
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
  */
-ssize_t
-nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
-               loff_t file_offset, unsigned long nr_segs)
+static void nfs_direct_write_release(void *calldata)
 {
-       ssize_t result = -EINVAL;
-       struct file *file = iocb->ki_filp;
-       struct nfs_open_context *ctx;
-       struct dentry *dentry = file->f_dentry;
-       struct inode *inode = dentry->d_inode;
+       struct nfs_write_data *data = calldata;
+       struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-       /*
-        * No support for async yet
-        */
-       if (!is_sync_kiocb(iocb))
-               return result;
-
-       ctx = (struct nfs_open_context *)file->private_data;
-       switch (rw) {
-       case READ:
-               dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n",
-                               dentry->d_name.name, file_offset, nr_segs);
-
-               result = nfs_direct_read(inode, ctx, iov,
-                                               file_offset, nr_segs);
-               break;
-       case WRITE:
-               dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n",
-                               dentry->d_name.name, file_offset, nr_segs);
-
-               result = nfs_direct_write(inode, ctx, iov,
-                                               file_offset, nr_segs);
-               break;
-       default:
-               break;
+       spin_lock(&dreq->lock);
+       if (--dreq->outstanding) {
+               spin_unlock(&dreq->lock);
+               return;
        }
+       spin_unlock(&dreq->lock);
+
+       nfs_direct_write_complete(dreq, data->inode);
+}
+
+static const struct rpc_call_ops nfs_write_direct_ops = {
+       .rpc_call_done = nfs_direct_write_result,
+       .rpc_release = nfs_direct_write_release,
+};
+
+/*
+ * For each nfs_write_data struct that was allocated on the list, dispatch
+ * an NFS WRITE operation
+ */
+static void nfs_direct_write_schedule(struct nfs_direct_req *dreq, int sync)
+{
+       struct nfs_open_context *ctx = dreq->ctx;
+       struct inode *inode = ctx->dentry->d_inode;
+       struct list_head *list = &dreq->list;
+       struct page **pages = dreq->pages;
+       size_t count = dreq->user_count;
+       loff_t pos = dreq->pos;
+       size_t wsize = NFS_SERVER(inode)->wsize;
+       unsigned int curpage, pgbase;
+
+       curpage = 0;
+       pgbase = dreq->user_addr & ~PAGE_MASK;
+       do {
+               struct nfs_write_data *data;
+               size_t bytes;
+
+               bytes = wsize;
+               if (count < wsize)
+                       bytes = count;
+
+               BUG_ON(list_empty(list));
+               data = list_entry(list->next, struct nfs_write_data, pages);
+               list_move_tail(&data->pages, &dreq->rewrite_list);
+
+               data->inode = inode;
+               data->cred = ctx->cred;
+               data->args.fh = NFS_FH(inode);
+               data->args.context = ctx;
+               data->args.offset = pos;
+               data->args.pgbase = pgbase;
+               data->args.pages = &pages[curpage];
+               data->args.count = bytes;
+               data->res.fattr = &data->fattr;
+               data->res.count = bytes;
+               data->res.verf = &data->verf;
+
+               rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
+                               &nfs_write_direct_ops, data);
+               NFS_PROTO(inode)->write_setup(data, sync);
+
+               data->task.tk_priority = RPC_PRIORITY_NORMAL;
+               data->task.tk_cookie = (unsigned long) inode;
+
+               lock_kernel();
+               rpc_execute(&data->task);
+               unlock_kernel();
+
+               dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+                               data->task.tk_pid,
+                               inode->i_sb->s_id,
+                               (long long)NFS_FILEID(inode),
+                               bytes,
+                               (unsigned long long)data->args.offset);
+
+               pos += bytes;
+               pgbase += bytes;
+               curpage += pgbase >> PAGE_SHIFT;
+               pgbase &= ~PAGE_MASK;
+
+               count -= bytes;
+       } while (count != 0);
+       BUG_ON(!list_empty(list));
+}
+
+static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, size_t count, loff_t pos, struct page **pages, int nr_pages)
+{
+       ssize_t result;
+       sigset_t oldset;
+       struct inode *inode = iocb->ki_filp->f_mapping->host;
+       struct rpc_clnt *clnt = NFS_CLIENT(inode);
+       struct nfs_direct_req *dreq;
+       size_t wsize = NFS_SERVER(inode)->wsize;
+       int sync = 0;
+
+       dreq = nfs_direct_write_alloc(count, wsize);
+       if (!dreq)
+               return -ENOMEM;
+       if (dreq->commit_data == NULL || count < wsize)
+               sync = FLUSH_STABLE;
+
+       dreq->user_addr = user_addr;
+       dreq->user_count = count;
+       dreq->pos = pos;
+       dreq->pages = pages;
+       dreq->npages = nr_pages;
+       dreq->inode = inode;
+       dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data);
+       if (!is_sync_kiocb(iocb))
+               dreq->iocb = iocb;
+
+       nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count);
+
+       nfs_begin_data_update(inode);
+
+       rpc_clnt_sigmask(clnt, &oldset);
+       nfs_direct_write_schedule(dreq, sync);
+       result = nfs_direct_wait(dreq);
+       rpc_clnt_sigunmask(clnt, &oldset);
+
        return result;
 }
 
@@ -640,49 +748,40 @@ nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
  * nfs_file_direct_read - file direct read operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer into which to read data
- * count: number of bytes to read
- * pos: byte offset in file where reading starts
+ * @count: number of bytes to read
+ * @pos: byte offset in file where reading starts
  *
  * We use this function for direct reads instead of calling
  * generic_file_aio_read() in order to avoid gfar's check to see if
  * the request starts before the end of the file.  For that check
  * to work, we must generate a GETATTR before each direct read, and
  * even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change.  So our preference is simply
+ * READ where the file size could change.  Our preference is simply
  * to do all reads the application wants, and the server will take
  * care of managing the end of file boundary.
- * 
+ *
  * This function also eliminates unnecessarily updating the file's
  * atime locally, as the NFS server sets the file's atime, and this
  * client must read the updated atime from the server back into its
  * cache.
  */
-ssize_t
-nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
 {
        ssize_t retval = -EINVAL;
-       loff_t *ppos = &iocb->ki_pos;
+       int page_count;
+       struct page **pages;
        struct file *file = iocb->ki_filp;
-       struct nfs_open_context *ctx =
-                       (struct nfs_open_context *) file->private_data;
        struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       struct iovec iov = {
-               .iov_base = buf,
-               .iov_len = count,
-       };
 
        dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n",
                file->f_dentry->d_parent->d_name.name,
                file->f_dentry->d_name.name,
                (unsigned long) count, (long long) pos);
 
-       if (!is_sync_kiocb(iocb))
-               goto out;
        if (count < 0)
                goto out;
        retval = -EFAULT;
-       if (!access_ok(VERIFY_WRITE, iov.iov_base, iov.iov_len))
+       if (!access_ok(VERIFY_WRITE, buf, count))
                goto out;
        retval = 0;
        if (!count)
@@ -692,9 +791,16 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
        if (retval)
                goto out;
 
-       retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
+       retval = nfs_get_user_pages(READ, (unsigned long) buf,
+                                               count, &pages);
+       if (retval < 0)
+               goto out;
+       page_count = retval;
+
+       retval = nfs_direct_read(iocb, (unsigned long) buf, count, pos,
+                                               pages, page_count);
        if (retval > 0)
-               *ppos = pos + retval;
+               iocb->ki_pos = pos + retval;
 
 out:
        return retval;
@@ -704,8 +810,8 @@ out:
  * nfs_file_direct_write - file direct write operation for NFS files
  * @iocb: target I/O control block
  * @buf: user's buffer from which to write data
- * count: number of bytes to write
- * pos: byte offset in file where writing starts
+ * @count: number of bytes to write
+ * @pos: byte offset in file where writing starts
  *
  * We use this function for direct writes instead of calling
  * generic_file_aio_write() in order to avoid taking the inode
@@ -725,28 +831,19 @@ out:
  * Note that O_APPEND is not supported for NFS direct writes, as there
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
-ssize_t
-nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos)
 {
        ssize_t retval;
+       int page_count;
+       struct page **pages;
        struct file *file = iocb->ki_filp;
-       struct nfs_open_context *ctx =
-                       (struct nfs_open_context *) file->private_data;
        struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       struct iovec iov = {
-               .iov_base = (char __user *)buf,
-       };
 
        dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
                file->f_dentry->d_parent->d_name.name,
                file->f_dentry->d_name.name,
                (unsigned long) count, (long long) pos);
 
-       retval = -EINVAL;
-       if (!is_sync_kiocb(iocb))
-               goto out;
-
        retval = generic_write_checks(file, &pos, &count, 0);
        if (retval)
                goto out;
@@ -757,19 +854,35 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
        retval = 0;
        if (!count)
                goto out;
-       iov.iov_len = count,
 
        retval = -EFAULT;
-       if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
+       if (!access_ok(VERIFY_READ, buf, count))
                goto out;
 
        retval = nfs_sync_mapping(mapping);
        if (retval)
                goto out;
 
-       retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
+       retval = nfs_get_user_pages(WRITE, (unsigned long) buf,
+                                               count, &pages);
+       if (retval < 0)
+               goto out;
+       page_count = retval;
+
+       retval = nfs_direct_write(iocb, (unsigned long) buf, count,
+                                       pos, pages, page_count);
+
+       /*
+        * XXX: nfs_end_data_update() already ensures this file's
+        *      cached data is subsequently invalidated.  Do we really
+        *      need to call invalidate_inode_pages2() again here?
+        *
+        *      For aio writes, this invalidation will almost certainly
+        *      occur before the writes complete.  Kind of racey.
+        */
        if (mapping->nrpages)
                invalidate_inode_pages2(mapping);
+
        if (retval > 0)
                iocb->ki_pos = pos + retval;
 
@@ -777,6 +890,10 @@ out:
        return retval;
 }
 
+/**
+ * nfs_init_directcache - create a slab cache for nfs_direct_req structures
+ *
+ */
 int nfs_init_directcache(void)
 {
        nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
@@ -789,6 +906,10 @@ int nfs_init_directcache(void)
        return 0;
 }
 
+/**
+ * nfs_init_directcache - destroy the slab cache for nfs_direct_req structures
+ *
+ */
 void nfs_destroy_directcache(void)
 {
        if (kmem_cache_destroy(nfs_direct_cachep))
index 7a79fbe..5263b28 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/system.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY                NFSDBG_FILE
 
@@ -102,18 +103,15 @@ static int nfs_check_flags(int flags)
 static int
 nfs_file_open(struct inode *inode, struct file *filp)
 {
-       struct nfs_server *server = NFS_SERVER(inode);
-       int (*open)(struct inode *, struct file *);
        int res;
 
        res = nfs_check_flags(filp->f_flags);
        if (res)
                return res;
 
+       nfs_inc_stats(inode, NFSIOS_VFSOPEN);
        lock_kernel();
-       /* Do NFSv4 open() call */
-       if ((open = server->rpc_ops->file_open) != NULL)
-               res = open(inode, filp);
+       res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp);
        unlock_kernel();
        return res;
 }
@@ -124,6 +122,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
        /* Ensure that dirty pages are flushed out with the right creds */
        if (filp->f_mode & FMODE_WRITE)
                filemap_fdatawrite(filp->f_mapping);
+       nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
        return NFS_PROTO(inode)->file_release(inode, filp);
 }
 
@@ -199,6 +198,7 @@ nfs_file_flush(struct file *file)
 
        if ((file->f_mode & FMODE_WRITE) == 0)
                return 0;
+       nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
        lock_kernel();
        /* Ensure that data+attribute caches are up to date after close() */
        status = nfs_wb_all(inode);
@@ -229,6 +229,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos)
                (unsigned long) count, (unsigned long) pos);
 
        result = nfs_revalidate_file(inode, iocb->ki_filp);
+       nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
        if (!result)
                result = generic_file_aio_read(iocb, buf, count, pos);
        return result;
@@ -282,6 +283,7 @@ nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 
        dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
 
+       nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
        lock_kernel();
        status = nfs_wb_all(inode);
        if (!status) {
@@ -316,6 +318,17 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
        return status;
 }
 
+static int nfs_invalidate_page(struct page *page, unsigned long offset)
+{
+       /* FIXME: we really should cancel any unstarted writes on this page */
+       return 1;
+}
+
+static int nfs_release_page(struct page *page, gfp_t gfp)
+{
+       return !nfs_wb_page(page->mapping->host, page);
+}
+
 struct address_space_operations nfs_file_aops = {
        .readpage = nfs_readpage,
        .readpages = nfs_readpages,
@@ -324,6 +337,8 @@ struct address_space_operations nfs_file_aops = {
        .writepages = nfs_writepages,
        .prepare_write = nfs_prepare_write,
        .commit_write = nfs_commit_write,
+       .invalidatepage = nfs_invalidate_page,
+       .releasepage = nfs_release_page,
 #ifdef CONFIG_NFS_DIRECTIO
        .direct_IO = nfs_direct_IO,
 #endif
@@ -365,6 +380,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t
        if (!count)
                goto out;
 
+       nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
        result = generic_file_aio_write(iocb, buf, count, pos);
 out:
        return result;
@@ -376,15 +392,17 @@ out_swapfile:
 
 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
 {
-       struct file_lock *cfl;
+       struct file_lock cfl;
        struct inode *inode = filp->f_mapping->host;
        int status = 0;
 
        lock_kernel();
        /* Try local locking first */
-       cfl = posix_test_lock(filp, fl);
-       if (cfl != NULL) {
-               locks_copy_lock(fl, cfl);
+       if (posix_test_lock(filp, fl, &cfl)) {
+               fl->fl_start = cfl.fl_start;
+               fl->fl_end = cfl.fl_end;
+               fl->fl_type = cfl.fl_type;
+               fl->fl_pid = cfl.fl_pid;
                goto out;
        }
 
@@ -425,10 +443,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 {
        struct inode *inode = filp->f_mapping->host;
-       sigset_t oldset;
        int status;
 
-       rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
        /*
         * Flush all pending writes before doing anything
         * with locks..
@@ -446,17 +462,14 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
        else
                status = do_vfs_lock(filp, fl);
        unlock_kernel();
-       rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
        return status;
 }
 
 static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 {
        struct inode *inode = filp->f_mapping->host;
-       sigset_t oldset;
        int status;
 
-       rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset);
        /*
         * Flush all pending writes before doing anything
         * with locks..
@@ -489,7 +502,6 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
        nfs_sync_mapping(filp->f_mapping);
        nfs_zap_caches(inode);
 out:
-       rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
        return status;
 }
 
@@ -504,9 +516,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
                        inode->i_sb->s_id, inode->i_ino,
                        fl->fl_type, fl->fl_flags,
                        (long long)fl->fl_start, (long long)fl->fl_end);
-
-       if (!inode)
-               return -EINVAL;
+       nfs_inc_stats(inode, NFSIOS_VFSLOCK);
 
        /* No mandatory locks over NFS */
        if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
@@ -531,9 +541,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
                        inode->i_sb->s_id, inode->i_ino,
                        fl->fl_type, fl->fl_flags);
 
-       if (!inode)
-               return -EINVAL;
-
        /*
         * No BSD flocks over NFS allowed.
         * Note: we could try to fake a POSIX lock request here by
index 821edd3..3fab5b0 100644 (file)
@@ -35,6 +35,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/slab.h>
@@ -74,8 +75,8 @@ struct idmap {
        struct dentry        *idmap_dentry;
        wait_queue_head_t     idmap_wq;
        struct idmap_msg      idmap_im;
-       struct semaphore      idmap_lock;    /* Serializes upcalls */
-       struct semaphore      idmap_im_lock; /* Protects the hashtable */
+       struct mutex          idmap_lock;    /* Serializes upcalls */
+       struct mutex          idmap_im_lock; /* Protects the hashtable */
        struct idmap_hashtable idmap_user_hash;
        struct idmap_hashtable idmap_group_hash;
 };
@@ -101,11 +102,9 @@ nfs_idmap_new(struct nfs4_client *clp)
 
        if (clp->cl_idmap != NULL)
                return;
-        if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
+        if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
                 return;
 
-       memset(idmap, 0, sizeof(*idmap));
-
        snprintf(idmap->idmap_path, sizeof(idmap->idmap_path),
            "%s/idmap", clp->cl_rpcclient->cl_pathname);
 
@@ -116,8 +115,8 @@ nfs_idmap_new(struct nfs4_client *clp)
                return;
        }
 
-        init_MUTEX(&idmap->idmap_lock);
-        init_MUTEX(&idmap->idmap_im_lock);
+        mutex_init(&idmap->idmap_lock);
+        mutex_init(&idmap->idmap_im_lock);
        init_waitqueue_head(&idmap->idmap_wq);
        idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
        idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -132,6 +131,8 @@ nfs_idmap_delete(struct nfs4_client *clp)
 
        if (!idmap)
                return;
+       dput(idmap->idmap_dentry);
+       idmap->idmap_dentry = NULL;
        rpc_unlink(idmap->idmap_path);
        clp->cl_idmap = NULL;
        kfree(idmap);
@@ -232,8 +233,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
        if (namelen >= IDMAP_NAMESZ)
                return -EINVAL;
 
-       down(&idmap->idmap_lock);
-       down(&idmap->idmap_im_lock);
+       mutex_lock(&idmap->idmap_lock);
+       mutex_lock(&idmap->idmap_im_lock);
 
        he = idmap_lookup_name(h, name, namelen);
        if (he != NULL) {
@@ -259,11 +260,11 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
        }
 
        set_current_state(TASK_UNINTERRUPTIBLE);
-       up(&idmap->idmap_im_lock);
+       mutex_unlock(&idmap->idmap_im_lock);
        schedule();
        current->state = TASK_RUNNING;
        remove_wait_queue(&idmap->idmap_wq, &wq);
-       down(&idmap->idmap_im_lock);
+       mutex_lock(&idmap->idmap_im_lock);
 
        if (im->im_status & IDMAP_STATUS_SUCCESS) {
                *id = im->im_id;
@@ -272,8 +273,8 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 
  out:
        memset(im, 0, sizeof(*im));
-       up(&idmap->idmap_im_lock);
-       up(&idmap->idmap_lock);
+       mutex_unlock(&idmap->idmap_im_lock);
+       mutex_unlock(&idmap->idmap_lock);
        return (ret);
 }
 
@@ -293,8 +294,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 
        im = &idmap->idmap_im;
 
-       down(&idmap->idmap_lock);
-       down(&idmap->idmap_im_lock);
+       mutex_lock(&idmap->idmap_lock);
+       mutex_lock(&idmap->idmap_im_lock);
 
        he = idmap_lookup_id(h, id);
        if (he != 0) {
@@ -320,11 +321,11 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
        }
 
        set_current_state(TASK_UNINTERRUPTIBLE);
-       up(&idmap->idmap_im_lock);
+       mutex_unlock(&idmap->idmap_im_lock);
        schedule();
        current->state = TASK_RUNNING;
        remove_wait_queue(&idmap->idmap_wq, &wq);
-       down(&idmap->idmap_im_lock);
+       mutex_lock(&idmap->idmap_im_lock);
 
        if (im->im_status & IDMAP_STATUS_SUCCESS) {
                if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0)
@@ -335,8 +336,8 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 
  out:
        memset(im, 0, sizeof(*im));
-       up(&idmap->idmap_im_lock);
-       up(&idmap->idmap_lock);
+       mutex_unlock(&idmap->idmap_im_lock);
+       mutex_unlock(&idmap->idmap_lock);
        return ret;
 }
 
@@ -380,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
         if (copy_from_user(&im_in, src, mlen) != 0)
                return (-EFAULT);
 
-       down(&idmap->idmap_im_lock);
+       mutex_lock(&idmap->idmap_im_lock);
 
        ret = mlen;
        im->im_status = im_in.im_status;
@@ -440,7 +441,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
                idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id);
        ret = mlen;
 out:
-       up(&idmap->idmap_im_lock);
+       mutex_unlock(&idmap->idmap_im_lock);
        return ret;
 }
 
@@ -452,10 +453,10 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
 
        if (msg->errno >= 0)
                return;
-       down(&idmap->idmap_im_lock);
+       mutex_lock(&idmap->idmap_im_lock);
        im->im_status = IDMAP_STATUS_LOOKUPFAIL;
        wake_up(&idmap->idmap_wq);
-       up(&idmap->idmap_im_lock);
+       mutex_unlock(&idmap->idmap_im_lock);
 }
 
 /* 
index a77ee95..60aac58 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/unistd.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/metrics.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
@@ -42,6 +43,7 @@
 #include "nfs4_fs.h"
 #include "callback.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY                NFSDBG_VFS
 #define NFS_PARANOIA 1
@@ -65,6 +67,7 @@ static void nfs_clear_inode(struct inode *);
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct super_block *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
+static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static void nfs_zap_acl_cache(struct inode *);
 
 static struct rpc_program      nfs_program;
@@ -78,6 +81,7 @@ static struct super_operations nfs_sops = {
        .clear_inode    = nfs_clear_inode,
        .umount_begin   = nfs_umount_begin,
        .show_options   = nfs_show_options,
+       .show_stats     = nfs_show_stats,
 };
 
 /*
@@ -133,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
 static int
 nfs_write_inode(struct inode *inode, int sync)
 {
-       int flags = sync ? FLUSH_WAIT : 0;
+       int flags = sync ? FLUSH_SYNC : 0;
        int ret;
 
        ret = nfs_commit_inode(inode, flags);
@@ -237,7 +241,6 @@ static struct inode *
 nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
 {
        struct nfs_server       *server = NFS_SB(sb);
-       struct inode *rooti;
        int                     error;
 
        error = server->rpc_ops->getroot(server, rootfh, fsinfo);
@@ -246,10 +249,7 @@ nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *f
                return ERR_PTR(error);
        }
 
-       rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
-       if (!rooti)
-               return ERR_PTR(-ENOMEM);
-       return rooti;
+       return nfs_fhget(sb, rootfh, fsinfo->fattr);
 }
 
 /*
@@ -277,6 +277,10 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 
        sb->s_magic      = NFS_SUPER_MAGIC;
 
+       server->io_stats = nfs_alloc_iostats();
+       if (server->io_stats == NULL)
+               return -ENOMEM;
+
        root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
        /* Did getting the root inode fail? */
        if (IS_ERR(root_inode)) {
@@ -290,6 +294,9 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
        }
        sb->s_root->d_op = server->rpc_ops->dentry_ops;
 
+       /* mount time stamp, in seconds */
+       server->mount_time = jiffies;
+
        /* Get some general file system info */
        if (server->namelen == 0 &&
            server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
@@ -396,6 +403,9 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 
        nfs_init_timeout_values(&timeparms, proto, data->timeo, data->retrans);
 
+       server->retrans_timeo = timeparms.to_initval;
+       server->retrans_count = timeparms.to_retries;
+
        /* create transport and client */
        xprt = xprt_create_proto(proto, &server->addr, &timeparms);
        if (IS_ERR(xprt)) {
@@ -579,7 +589,7 @@ nfs_statfs(struct super_block *sb, struct kstatfs *buf)
 
 }
 
-static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
 {
        static struct proc_nfs_info {
                int flag;
@@ -588,28 +598,26 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
        } nfs_info[] = {
                { NFS_MOUNT_SOFT, ",soft", ",hard" },
                { NFS_MOUNT_INTR, ",intr", "" },
-               { NFS_MOUNT_POSIX, ",posix", "" },
                { NFS_MOUNT_NOCTO, ",nocto", "" },
                { NFS_MOUNT_NOAC, ",noac", "" },
-               { NFS_MOUNT_NONLM, ",nolock", ",lock" },
+               { NFS_MOUNT_NONLM, ",nolock", "" },
                { NFS_MOUNT_NOACL, ",noacl", "" },
                { 0, NULL, NULL }
        };
        struct proc_nfs_info *nfs_infop;
-       struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
        char buf[12];
        char *proto;
 
-       seq_printf(m, ",v%d", nfss->rpc_ops->version);
+       seq_printf(m, ",vers=%d", nfss->rpc_ops->version);
        seq_printf(m, ",rsize=%d", nfss->rsize);
        seq_printf(m, ",wsize=%d", nfss->wsize);
-       if (nfss->acregmin != 3*HZ)
+       if (nfss->acregmin != 3*HZ || showdefaults)
                seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
-       if (nfss->acregmax != 60*HZ)
+       if (nfss->acregmax != 60*HZ || showdefaults)
                seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
-       if (nfss->acdirmin != 30*HZ)
+       if (nfss->acdirmin != 30*HZ || showdefaults)
                seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
-       if (nfss->acdirmax != 60*HZ)
+       if (nfss->acdirmax != 60*HZ || showdefaults)
                seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
        for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
                if (nfss->flags & nfs_infop->flag)
@@ -629,8 +637,96 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
                        proto = buf;
        }
        seq_printf(m, ",proto=%s", proto);
+       seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ);
+       seq_printf(m, ",retrans=%u", nfss->retrans_count);
+}
+
+static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
+{
+       struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+
+       nfs_show_mount_options(m, nfss, 0);
+
        seq_puts(m, ",addr=");
        seq_escape(m, nfss->hostname, " \t\n\\");
+
+       return 0;
+}
+
+static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
+{
+       int i, cpu;
+       struct nfs_server *nfss = NFS_SB(mnt->mnt_sb);
+       struct rpc_auth *auth = nfss->client->cl_auth;
+       struct nfs_iostats totals = { };
+
+       seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS);
+
+       /*
+        * Display all mount option settings
+        */
+       seq_printf(m, "\n\topts:\t");
+       seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw");
+       seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : "");
+       seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : "");
+       seq_puts(m, mnt->mnt_sb->s_flags & MS_NODIRATIME ? ",nodiratime" : "");
+       nfs_show_mount_options(m, nfss, 1);
+
+       seq_printf(m, "\n\tage:\t%lu", (jiffies - nfss->mount_time) / HZ);
+
+       seq_printf(m, "\n\tcaps:\t");
+       seq_printf(m, "caps=0x%x", nfss->caps);
+       seq_printf(m, ",wtmult=%d", nfss->wtmult);
+       seq_printf(m, ",dtsize=%d", nfss->dtsize);
+       seq_printf(m, ",bsize=%d", nfss->bsize);
+       seq_printf(m, ",namelen=%d", nfss->namelen);
+
+#ifdef CONFIG_NFS_V4
+       if (nfss->rpc_ops->version == 4) {
+               seq_printf(m, "\n\tnfsv4:\t");
+               seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
+               seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
+               seq_printf(m, ",acl=0x%x", nfss->acl_bitmask);
+       }
+#endif
+
+       /*
+        * Display security flavor in effect for this mount
+        */
+       seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+       if (auth->au_flavor)
+               seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+
+       /*
+        * Display superblock I/O counters
+        */
+       for (cpu = 0; cpu < NR_CPUS; cpu++) {
+               struct nfs_iostats *stats;
+
+               if (!cpu_possible(cpu))
+                       continue;
+
+               preempt_disable();
+               stats = per_cpu_ptr(nfss->io_stats, cpu);
+
+               for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+                       totals.events[i] += stats->events[i];
+               for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+                       totals.bytes[i] += stats->bytes[i];
+
+               preempt_enable();
+       }
+
+       seq_printf(m, "\n\tevents:\t");
+       for (i = 0; i < __NFSIOS_COUNTSMAX; i++)
+               seq_printf(m, "%lu ", totals.events[i]);
+       seq_printf(m, "\n\tbytes:\t");
+       for (i = 0; i < __NFSIOS_BYTESMAX; i++)
+               seq_printf(m, "%Lu ", totals.bytes[i]);
+       seq_printf(m, "\n");
+
+       rpc_print_iostats(m, nfss->client);
+
        return 0;
 }
 
@@ -660,6 +756,8 @@ static void nfs_zap_caches_locked(struct inode *inode)
        struct nfs_inode *nfsi = NFS_I(inode);
        int mode = inode->i_mode;
 
+       nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
+
        NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
        NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
 
@@ -751,7 +849,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                .fh     = fh,
                .fattr  = fattr
        };
-       struct inode *inode = NULL;
+       struct inode *inode = ERR_PTR(-ENOENT);
        unsigned long hash;
 
        if ((fattr->valid & NFS_ATTR_FATTR) == 0)
@@ -764,8 +862,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
        hash = nfs_fattr_to_ino_t(fattr);
 
-       if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc)))
+       inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc);
+       if (inode == NULL) {
+               inode = ERR_PTR(-ENOMEM);
                goto out_no_inode;
+       }
 
        if (inode->i_state & I_NEW) {
                struct nfs_inode *nfsi = NFS_I(inode);
@@ -834,7 +935,7 @@ out:
        return inode;
 
 out_no_inode:
-       printk("nfs_fhget: iget failed\n");
+       dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
        goto out;
 }
 
@@ -847,6 +948,8 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
        struct nfs_fattr fattr;
        int error;
 
+       nfs_inc_stats(inode, NFSIOS_VFSSETATTR);
+
        if (attr->ia_valid & ATTR_SIZE) {
                if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
                        attr->ia_valid &= ~ATTR_SIZE;
@@ -859,11 +962,9 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 
        lock_kernel();
        nfs_begin_data_update(inode);
-       /* Write all dirty data if we're changing file permissions or size */
-       if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
-               filemap_write_and_wait(inode->i_mapping);
-               nfs_wb_all(inode);
-       }
+       /* Write all dirty data */
+       filemap_write_and_wait(inode->i_mapping);
+       nfs_wb_all(inode);
        /*
         * Return any delegations if we're going to change ACLs
         */
@@ -902,6 +1003,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
                spin_unlock(&inode->i_lock);
        }
        if ((attr->ia_valid & ATTR_SIZE) != 0) {
+               nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
                inode->i_size = attr->ia_size;
                vmtruncate(inode, attr->ia_size);
        }
@@ -949,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        int err;
 
        /* Flush out writes to the server in order to update c/mtime */
-       nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
+       nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
 
        /*
         * We may force a getattr if the user cares about atime.
@@ -973,7 +1075,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        return err;
 }
 
-struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred)
+static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
 {
        struct nfs_open_context *ctx;
 
@@ -981,6 +1083,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rp
        if (ctx != NULL) {
                atomic_set(&ctx->count, 1);
                ctx->dentry = dget(dentry);
+               ctx->vfsmnt = mntget(mnt);
                ctx->cred = get_rpccred(cred);
                ctx->state = NULL;
                ctx->lockowner = current->files;
@@ -1011,6 +1114,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
                if (ctx->cred != NULL)
                        put_rpccred(ctx->cred);
                dput(ctx->dentry);
+               mntput(ctx->vfsmnt);
                kfree(ctx);
        }
 }
@@ -1019,7 +1123,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
  */
-void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
        struct inode *inode = filp->f_dentry->d_inode;
        struct nfs_inode *nfsi = NFS_I(inode);
@@ -1051,7 +1155,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
        return ctx;
 }
 
-void nfs_file_clear_open_context(struct file *filp)
+static void nfs_file_clear_open_context(struct file *filp)
 {
        struct inode *inode = filp->f_dentry->d_inode;
        struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data;
@@ -1076,7 +1180,7 @@ int nfs_open(struct inode *inode, struct file *filp)
        cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
        if (IS_ERR(cred))
                return PTR_ERR(cred);
-       ctx = alloc_nfs_open_context(filp->f_dentry, cred);
+       ctx = alloc_nfs_open_context(filp->f_vfsmnt, filp->f_dentry, cred);
        put_rpccred(cred);
        if (ctx == NULL)
                return -ENOMEM;
@@ -1185,6 +1289,7 @@ int nfs_attribute_timeout(struct inode *inode)
  */
 int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 {
+       nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
        if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
                        && !nfs_attribute_timeout(inode))
                return NFS_STALE(inode) ? -ESTALE : 0;
@@ -1201,6 +1306,7 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
        struct nfs_inode *nfsi = NFS_I(inode);
 
        if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
+               nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
                if (S_ISREG(inode->i_mode))
                        nfs_sync_mapping(mapping);
                invalidate_inode_pages2(mapping);
@@ -1299,39 +1405,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
        if ((fattr->valid & NFS_ATTR_FATTR) == 0)
                return 0;
 
+       /* Has the inode gone and changed behind our back? */
+       if (nfsi->fileid != fattr->fileid
+                       || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
+               return -EIO;
+       }
+
        /* Are we in the process of updating data on the server? */
        data_unstable = nfs_caches_unstable(inode);
 
        /* Do atomic weak cache consistency updates */
        nfs_wcc_update_inode(inode, fattr);
 
-       if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
-                       nfsi->change_attr != fattr->change_attr) {
+       if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0) {
+               if (nfsi->change_attr == fattr->change_attr)
+                       goto out;
                nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                if (!data_unstable)
                        nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
        }
 
-       /* Has the inode gone and changed behind our back? */
-       if (nfsi->fileid != fattr->fileid
-                       || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
-               return -EIO;
-       }
-
-       cur_size = i_size_read(inode);
-       new_isize = nfs_size_to_loff_t(fattr->size);
-
        /* Verify a few of the more important attributes */
        if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
                nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
                if (!data_unstable)
                        nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
        }
-       if (cur_size != new_isize) {
-               nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
-               if (nfsi->npages == 0)
-                       nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
-       }
+
+       cur_size = i_size_read(inode);
+       new_isize = nfs_size_to_loff_t(fattr->size);
+       if (cur_size != new_isize && nfsi->npages == 0)
+               nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 
        /* Have any file permissions changed? */
        if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
@@ -1343,6 +1447,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
        if (inode->i_nlink != fattr->nlink)
                nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 
+out:
        if (!timespec_equal(&inode->i_atime, &fattr->atime))
                nfsi->cache_validity |= NFS_INO_INVALID_ATIME;
 
@@ -1481,15 +1586,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                nfsi->cache_change_attribute = jiffies;
        }
 
-       if ((fattr->valid & NFS_ATTR_FATTR_V4)
-           && nfsi->change_attr != fattr->change_attr) {
-               dprintk("NFS: change_attr change on server for file %s/%ld\n",
-                      inode->i_sb->s_id, inode->i_ino);
-               nfsi->change_attr = fattr->change_attr;
-               invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-               nfsi->cache_change_attribute = jiffies;
-       }
-
        /* If ctime has changed we should definitely clear access+acl caches */
        if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
                invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
@@ -1519,8 +1615,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                inode->i_blksize = fattr->du.nfs2.blocksize;
        }
 
+       if ((fattr->valid & NFS_ATTR_FATTR_V4)) {
+               if (nfsi->change_attr != fattr->change_attr) {
+                       dprintk("NFS: change_attr change on server for file %s/%ld\n",
+                                       inode->i_sb->s_id, inode->i_ino);
+                       nfsi->change_attr = fattr->change_attr;
+                       invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
+                       nfsi->cache_change_attribute = jiffies;
+               } else
+                       invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA);
+       }
+
        /* Update attrtimeo value if we're out of the unstable period */
        if (invalid & NFS_INO_INVALID_ATTR) {
+               nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
                nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
                nfsi->attrtimeo_timestamp = jiffies;
        } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
@@ -1637,10 +1745,9 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type,
 #endif /* CONFIG_NFS_V3 */
 
        s = ERR_PTR(-ENOMEM);
-       server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+       server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
        if (!server)
                goto out_err;
-       memset(server, 0, sizeof(struct nfs_server));
        /* Zero out the NFS state stuff */
        init_nfsv4_state(server);
        server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -1712,6 +1819,7 @@ static void nfs_kill_super(struct super_block *s)
 
        rpciod_down();          /* release rpciod */
 
+       nfs_free_iostats(server->io_stats);
        kfree(server->hostname);
        kfree(server);
 }
@@ -1738,6 +1846,7 @@ static struct super_operations nfs4_sops = {
        .clear_inode    = nfs4_clear_inode,
        .umount_begin   = nfs_umount_begin,
        .show_options   = nfs_show_options,
+       .show_stats     = nfs_show_stats,
 };
 
 /*
@@ -1800,6 +1909,9 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 
        nfs_init_timeout_values(&timeparms, data->proto, data->timeo, data->retrans);
 
+       server->retrans_timeo = timeparms.to_initval;
+       server->retrans_count = timeparms.to_retries;
+
        clp = nfs4_get_client(&server->addr.sin_addr);
        if (!clp) {
                dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__);
@@ -1941,10 +2053,9 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type,
                return ERR_PTR(-EINVAL);
        }
 
-       server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL);
+       server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL);
        if (!server)
                return ERR_PTR(-ENOMEM);
-       memset(server, 0, sizeof(struct nfs_server));
        /* Zero out the NFS state stuff */
        init_nfsv4_state(server);
        server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL);
@@ -2024,10 +2135,12 @@ static void nfs4_kill_super(struct super_block *sb)
 
        if (server->client != NULL && !IS_ERR(server->client))
                rpc_shutdown_client(server->client);
-       rpciod_down();          /* release rpciod */
 
        destroy_nfsv4_state(server);
 
+       rpciod_down();
+
+       nfs_free_iostats(server->io_stats);
        kfree(server->hostname);
        kfree(server);
 }
diff --git a/fs/nfs/iostat.h b/fs/nfs/iostat.h
new file mode 100644 (file)
index 0000000..6350ecb
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ *  linux/fs/nfs/iostat.h
+ *
+ *  Declarations for NFS client per-mount statistics
+ *
+ *  Copyright (C) 2005, 2006 Chuck Lever <cel@netapp.com>
+ *
+ *  NFS client per-mount statistics provide information about the health of
+ *  the NFS client and the health of each NFS mount point.  Generally these
+ *  are not for detailed problem diagnosis, but simply to indicate that there
+ *  is a problem.
+ *
+ *  These counters are not meant to be human-readable, but are meant to be
+ *  integrated into system monitoring tools such as "sar" and "iostat".  As
+ *  such, the counters are sampled by the tools over time, and are never
+ *  zeroed after a file system is mounted.  Moving averages can be computed
+ *  by the tools by taking the difference between two instantaneous samples
+ *  and dividing that by the time between the samples.
+ */
+
+#ifndef _NFS_IOSTAT
+#define _NFS_IOSTAT
+
+#define NFS_IOSTAT_VERS                "1.0"
+
+/*
+ * NFS byte counters
+ *
+ * 1.  SERVER - the number of payload bytes read from or written to the
+ *     server by the NFS client via an NFS READ or WRITE request.
+ *
+ * 2.  NORMAL - the number of bytes read or written by applications via
+ *     the read(2) and write(2) system call interfaces.
+ *
+ * 3.  DIRECT - the number of bytes read or written from files opened
+ *     with the O_DIRECT flag.
+ *
+ * These counters give a view of the data throughput into and out of the NFS
+ * client.  Comparing the number of bytes requested by an application with the
+ * number of bytes the client requests from the server can provide an
+ * indication of client efficiency (per-op, cache hits, etc).
+ *
+ * These counters can also help characterize which access methods are in
+ * use.  DIRECT by itself shows whether there is any O_DIRECT traffic.
+ * NORMAL + DIRECT shows how much data is going through the system call
+ * interface.  A large amount of SERVER traffic without much NORMAL or
+ * DIRECT traffic shows that applications are using mapped files.
+ *
+ * NFS page counters
+ *
+ * These count the number of pages read or written via nfs_readpage(),
+ * nfs_readpages(), or their write equivalents.
+ */
+enum nfs_stat_bytecounters {
+       NFSIOS_NORMALREADBYTES = 0,
+       NFSIOS_NORMALWRITTENBYTES,
+       NFSIOS_DIRECTREADBYTES,
+       NFSIOS_DIRECTWRITTENBYTES,
+       NFSIOS_SERVERREADBYTES,
+       NFSIOS_SERVERWRITTENBYTES,
+       NFSIOS_READPAGES,
+       NFSIOS_WRITEPAGES,
+       __NFSIOS_BYTESMAX,
+};
+
+/*
+ * NFS event counters
+ *
+ * These counters provide a low-overhead way of monitoring client activity
+ * without enabling NFS trace debugging.  The counters show the rate at
+ * which VFS requests are made, and how often the client invalidates its
+ * data and attribute caches.  This allows system administrators to monitor
+ * such things as how close-to-open is working, and answer questions such
+ * as "why are there so many GETATTR requests on the wire?"
+ *
+ * They also count anamolous events such as short reads and writes, silly
+ * renames due to close-after-delete, and operations that change the size
+ * of a file (such operations can often be the source of data corruption
+ * if applications aren't using file locking properly).
+ */
+enum nfs_stat_eventcounters {
+       NFSIOS_INODEREVALIDATE = 0,
+       NFSIOS_DENTRYREVALIDATE,
+       NFSIOS_DATAINVALIDATE,
+       NFSIOS_ATTRINVALIDATE,
+       NFSIOS_VFSOPEN,
+       NFSIOS_VFSLOOKUP,
+       NFSIOS_VFSACCESS,
+       NFSIOS_VFSUPDATEPAGE,
+       NFSIOS_VFSREADPAGE,
+       NFSIOS_VFSREADPAGES,
+       NFSIOS_VFSWRITEPAGE,
+       NFSIOS_VFSWRITEPAGES,
+       NFSIOS_VFSGETDENTS,
+       NFSIOS_VFSSETATTR,
+       NFSIOS_VFSFLUSH,
+       NFSIOS_VFSFSYNC,
+       NFSIOS_VFSLOCK,
+       NFSIOS_VFSRELEASE,
+       NFSIOS_CONGESTIONWAIT,
+       NFSIOS_SETATTRTRUNC,
+       NFSIOS_EXTENDWRITE,
+       NFSIOS_SILLYRENAME,
+       NFSIOS_SHORTREAD,
+       NFSIOS_SHORTWRITE,
+       NFSIOS_DELAY,
+       __NFSIOS_COUNTSMAX,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/percpu.h>
+#include <linux/cache.h>
+
+struct nfs_iostats {
+       unsigned long long      bytes[__NFSIOS_BYTESMAX];
+       unsigned long           events[__NFSIOS_COUNTSMAX];
+} ____cacheline_aligned;
+
+static inline void nfs_inc_server_stats(struct nfs_server *server, enum nfs_stat_eventcounters stat)
+{
+       struct nfs_iostats *iostats;
+       int cpu;
+
+       cpu = get_cpu();
+       iostats = per_cpu_ptr(server->io_stats, cpu);
+       iostats->events[stat] ++;
+       put_cpu_no_resched();
+}
+
+static inline void nfs_inc_stats(struct inode *inode, enum nfs_stat_eventcounters stat)
+{
+       nfs_inc_server_stats(NFS_SERVER(inode), stat);
+}
+
+static inline void nfs_add_server_stats(struct nfs_server *server, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+       struct nfs_iostats *iostats;
+       int cpu;
+
+       cpu = get_cpu();
+       iostats = per_cpu_ptr(server->io_stats, cpu);
+       iostats->bytes[stat] += addend;
+       put_cpu_no_resched();
+}
+
+static inline void nfs_add_stats(struct inode *inode, enum nfs_stat_bytecounters stat, unsigned long addend)
+{
+       nfs_add_server_stats(NFS_SERVER(inode), stat, addend);
+}
+
+static inline struct nfs_iostats *nfs_alloc_iostats(void)
+{
+       return alloc_percpu(struct nfs_iostats);
+}
+
+static inline void nfs_free_iostats(struct nfs_iostats *stats)
+{
+       if (stats != NULL)
+               free_percpu(stats);
+}
+
+#endif
+#endif
index db99b8f..c44d87b 100644 (file)
@@ -49,9 +49,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
        struct mnt_fhstatus     result = {
                .fh             = fh
        };
+       struct rpc_message msg  = {
+               .rpc_argp       = path,
+               .rpc_resp       = &result,
+       };
        char                    hostname[32];
        int                     status;
-       int                     call;
 
        dprintk("NFS:      nfs_mount(%08x:%s)\n",
                        (unsigned)ntohl(addr->sin_addr.s_addr), path);
@@ -61,8 +64,12 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
        if (IS_ERR(mnt_clnt))
                return PTR_ERR(mnt_clnt);
 
-       call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT;
-       status = rpc_call(mnt_clnt, call, path, &result, 0);
+       if (version == NFS_MNT3_VERSION)
+               msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
+       else
+               msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+
+       status = rpc_call_sync(mnt_clnt, &msg, 0);
        return status < 0? status : (result.status? -EACCES : 0);
 }
 
@@ -137,6 +144,8 @@ static struct rpc_procinfo  mnt_procedures[] = {
          .p_encode             = (kxdrproc_t) xdr_encode_dirpath,      
          .p_decode             = (kxdrproc_t) xdr_decode_fhstatus,
          .p_bufsiz             = MNT_dirpath_sz << 2,
+         .p_statidx            = MNTPROC_MNT,
+         .p_name               = "MOUNT",
        },
 };
 
@@ -146,6 +155,8 @@ static struct rpc_procinfo mnt3_procedures[] = {
          .p_encode             = (kxdrproc_t) xdr_encode_dirpath,
          .p_decode             = (kxdrproc_t) xdr_decode_fhstatus3,
          .p_bufsiz             = MNT_dirpath_sz << 2,
+         .p_statidx            = MOUNTPROC3_MNT,
+         .p_name               = "MOUNT",
        },
 };
 
index 7fc0560..8cdc792 100644 (file)
@@ -682,7 +682,9 @@ nfs_stat_to_errno(int stat)
        .p_encode   =  (kxdrproc_t) nfs_xdr_##argtype,                  \
        .p_decode   =  (kxdrproc_t) nfs_xdr_##restype,                  \
        .p_bufsiz   =  MAX(NFS_##argtype##_sz,NFS_##restype##_sz) << 2, \
-       .p_timer    =  timer                                            \
+       .p_timer    =  timer,                                           \
+       .p_statidx  =  NFSPROC_##proc,                                  \
+       .p_name     =  #proc,                                           \
        }
 struct rpc_procinfo    nfs_procedures[] = {
     PROC(GETATTR,      fhandle,        attrstat, 1),
index 6a5bbc0..3328787 100644 (file)
@@ -190,6 +190,10 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
        struct nfs3_getaclres res = {
                .fattr =        &fattr,
        };
+       struct rpc_message msg = {
+               .rpc_argp       = &args,
+               .rpc_resp       = &res,
+       };
        struct posix_acl *acl;
        int status, count;
 
@@ -218,8 +222,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
                return NULL;
 
        dprintk("NFS call getacl\n");
-       status = rpc_call(server->client_acl, ACLPROC3_GETACL,
-                         &args, &res, 0);
+       msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
+       status = rpc_call_sync(server->client_acl, &msg, 0);
        dprintk("NFS reply getacl: %d\n", status);
 
        /* pages may have been allocated at the xdr layer. */
@@ -286,6 +290,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
                .acl_access = acl,
                .pages = pages,
        };
+       struct rpc_message msg = {
+               .rpc_argp       = &args,
+               .rpc_resp       = &fattr,
+       };
        int status, count;
 
        status = -EOPNOTSUPP;
@@ -306,8 +314,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 
        dprintk("NFS call setacl\n");
        nfs_begin_data_update(inode);
-       status = rpc_call(server->client_acl, ACLPROC3_SETACL,
-                         &args, &fattr, 0);
+       msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
+       status = rpc_call_sync(server->client_acl, &msg, 0);
        spin_lock(&inode->i_lock);
        NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS;
        spin_unlock(&inode->i_lock);
index ed67567..cf186f0 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/smp_lock.h>
 #include <linux/nfs_mount.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY                NFSDBG_PROC
 
 extern struct rpc_procinfo nfs3_procedures[];
@@ -41,27 +43,14 @@ nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
        return res;
 }
 
-static inline int
-nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
-       struct rpc_message msg = {
-               .rpc_proc       = &clnt->cl_procinfo[proc],
-               .rpc_argp       = argp,
-               .rpc_resp       = resp,
-       };
-       return nfs3_rpc_wrapper(clnt, &msg, flags);
-}
-
-#define rpc_call(clnt, proc, argp, resp, flags) \
-               nfs3_rpc_call_wrapper(clnt, proc, argp, resp, flags)
-#define rpc_call_sync(clnt, msg, flags) \
-               nfs3_rpc_wrapper(clnt, msg, flags)
+#define rpc_call_sync(clnt, msg, flags)        nfs3_rpc_wrapper(clnt, msg, flags)
 
 static int
-nfs3_async_handle_jukebox(struct rpc_task *task)
+nfs3_async_handle_jukebox(struct rpc_task *task, struct inode *inode)
 {
        if (task->tk_status != -EJUKEBOX)
                return 0;
+       nfs_inc_stats(inode, NFSIOS_DELAY);
        task->tk_status = 0;
        rpc_restart_call(task);
        rpc_delay(task, NFS_JUKEBOX_RETRY_TIME);
@@ -72,14 +61,21 @@ static int
 do_proc_get_root(struct rpc_clnt *client, struct nfs_fh *fhandle,
                 struct nfs_fsinfo *info)
 {
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_FSINFO],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = info,
+       };
        int     status;
 
        dprintk("%s: call  fsinfo\n", __FUNCTION__);
        nfs_fattr_init(info->fattr);
-       status = rpc_call(client, NFS3PROC_FSINFO, fhandle, info, 0);
+       status = rpc_call_sync(client, &msg, 0);
        dprintk("%s: reply fsinfo: %d\n", __FUNCTION__, status);
        if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
-               status = rpc_call(client, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
+               msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+               msg.rpc_resp = info->fattr;
+               status = rpc_call_sync(client, &msg, 0);
                dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
        }
        return status;
@@ -107,12 +103,16 @@ static int
 nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
                struct nfs_fattr *fattr)
 {
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_GETATTR],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = fattr,
+       };
        int     status;
 
        dprintk("NFS call  getattr\n");
        nfs_fattr_init(fattr);
-       status = rpc_call(server->client, NFS3PROC_GETATTR,
-                         fhandle, fattr, 0);
+       status = rpc_call_sync(server->client, &msg, 0);
        dprintk("NFS reply getattr: %d\n", status);
        return status;
 }
@@ -126,11 +126,16 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                .fh             = NFS_FH(inode),
                .sattr          = sattr,
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_SETATTR],
+               .rpc_argp       = &arg,
+               .rpc_resp       = fattr,
+       };
        int     status;
 
        dprintk("NFS call  setattr\n");
        nfs_fattr_init(fattr);
-       status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0);
+       status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
        if (status == 0)
                nfs_setattr_update_inode(inode, sattr);
        dprintk("NFS reply setattr: %d\n", status);
@@ -152,15 +157,23 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name,
                .fh             = fhandle,
                .fattr          = fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_LOOKUP],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int                     status;
 
        dprintk("NFS call  lookup %s\n", name->name);
        nfs_fattr_init(&dir_attr);
        nfs_fattr_init(fattr);
-       status = rpc_call(NFS_CLIENT(dir), NFS3PROC_LOOKUP, &arg, &res, 0);
-       if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR))
-               status = rpc_call(NFS_CLIENT(dir), NFS3PROC_GETATTR,
-                        fhandle, fattr, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+       if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) {
+               msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR];
+               msg.rpc_argp = fhandle;
+               msg.rpc_resp = fattr;
+               status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+       }
        dprintk("NFS reply lookup: %d\n", status);
        if (status >= 0)
                status = nfs_refresh_inode(dir, &dir_attr);
@@ -180,7 +193,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
                .rpc_proc       = &nfs3_procedures[NFS3PROC_ACCESS],
                .rpc_argp       = &arg,
                .rpc_resp       = &res,
-               .rpc_cred       = entry->cred
+               .rpc_cred       = entry->cred,
        };
        int mode = entry->mask;
        int status;
@@ -226,12 +239,16 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
                .pglen          = pglen,
                .pages          = &page
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_READLINK],
+               .rpc_argp       = &args,
+               .rpc_resp       = &fattr,
+       };
        int                     status;
 
        dprintk("NFS call  readlink\n");
        nfs_fattr_init(&fattr);
-       status = rpc_call(NFS_CLIENT(inode), NFS3PROC_READLINK,
-                         &args, &fattr, 0);
+       status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
        nfs_refresh_inode(inode, &fattr);
        dprintk("NFS reply readlink: %d\n", status);
        return status;
@@ -327,6 +344,11 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                .fh             = &fhandle,
                .fattr          = &fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_CREATE],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        mode_t mode = sattr->ia_mode;
        int status;
 
@@ -343,8 +365,8 @@ nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 again:
        nfs_fattr_init(&dir_attr);
        nfs_fattr_init(&fattr);
-       status = rpc_call(NFS_CLIENT(dir), NFS3PROC_CREATE, &arg, &res, 0);
-       nfs_post_op_update_inode(dir, &dir_attr);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+       nfs_refresh_inode(dir, &dir_attr);
 
        /* If the server doesn't support the exclusive creation semantics,
         * try again with simple 'guarded' mode. */
@@ -447,7 +469,7 @@ nfs3_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
        struct rpc_message *msg = &task->tk_msg;
        struct nfs_fattr        *dir_attr;
 
-       if (nfs3_async_handle_jukebox(task))
+       if (nfs3_async_handle_jukebox(task, dir->d_inode))
                return 1;
        if (msg->rpc_argp) {
                dir_attr = (struct nfs_fattr*)msg->rpc_resp;
@@ -474,12 +496,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
                .fromattr       = &old_dir_attr,
                .toattr         = &new_dir_attr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_RENAME],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int                     status;
 
        dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
        nfs_fattr_init(&old_dir_attr);
        nfs_fattr_init(&new_dir_attr);
-       status = rpc_call(NFS_CLIENT(old_dir), NFS3PROC_RENAME, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
        nfs_post_op_update_inode(old_dir, &old_dir_attr);
        nfs_post_op_update_inode(new_dir, &new_dir_attr);
        dprintk("NFS reply rename: %d\n", status);
@@ -500,12 +527,17 @@ nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
                .dir_attr       = &dir_attr,
                .fattr          = &fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_LINK],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int                     status;
 
        dprintk("NFS call  link %s\n", name->name);
        nfs_fattr_init(&dir_attr);
        nfs_fattr_init(&fattr);
-       status = rpc_call(NFS_CLIENT(inode), NFS3PROC_LINK, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
        nfs_post_op_update_inode(dir, &dir_attr);
        nfs_post_op_update_inode(inode, &fattr);
        dprintk("NFS reply link: %d\n", status);
@@ -531,6 +563,11 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
                .fh             = fhandle,
                .fattr          = fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_SYMLINK],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int                     status;
 
        if (path->len > NFS3_MAXPATHLEN)
@@ -538,7 +575,7 @@ nfs3_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
        dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
        nfs_fattr_init(&dir_attr);
        nfs_fattr_init(fattr);
-       status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SYMLINK, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_post_op_update_inode(dir, &dir_attr);
        dprintk("NFS reply symlink: %d\n", status);
        return status;
@@ -560,6 +597,11 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
                .fh             = &fhandle,
                .fattr          = &fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_MKDIR],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int mode = sattr->ia_mode;
        int status;
 
@@ -569,7 +611,7 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
 
        nfs_fattr_init(&dir_attr);
        nfs_fattr_init(&fattr);
-       status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_post_op_update_inode(dir, &dir_attr);
        if (status != 0)
                goto out;
@@ -591,11 +633,16 @@ nfs3_proc_rmdir(struct inode *dir, struct qstr *name)
                .name           = name->name,
                .len            = name->len
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_RMDIR],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &dir_attr,
+       };
        int                     status;
 
        dprintk("NFS call  rmdir %s\n", name->name);
        nfs_fattr_init(&dir_attr);
-       status = rpc_call(NFS_CLIENT(dir), NFS3PROC_RMDIR, &arg, &dir_attr, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_post_op_update_inode(dir, &dir_attr);
        dprintk("NFS reply rmdir: %d\n", status);
        return status;
@@ -672,6 +719,11 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                .fh             = &fh,
                .fattr          = &fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_MKNOD],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        mode_t mode = sattr->ia_mode;
        int status;
 
@@ -690,7 +742,7 @@ nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 
        nfs_fattr_init(&dir_attr);
        nfs_fattr_init(&fattr);
-       status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_post_op_update_inode(dir, &dir_attr);
        if (status != 0)
                goto out;
@@ -707,11 +759,16 @@ static int
 nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
                 struct nfs_fsstat *stat)
 {
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_FSSTAT],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = stat,
+       };
        int     status;
 
        dprintk("NFS call  fsstat\n");
        nfs_fattr_init(stat->fattr);
-       status = rpc_call(server->client, NFS3PROC_FSSTAT, fhandle, stat, 0);
+       status = rpc_call_sync(server->client, &msg, 0);
        dprintk("NFS reply statfs: %d\n", status);
        return status;
 }
@@ -720,11 +777,16 @@ static int
 nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
                 struct nfs_fsinfo *info)
 {
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_FSINFO],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = info,
+       };
        int     status;
 
        dprintk("NFS call  fsinfo\n");
        nfs_fattr_init(info->fattr);
-       status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
+       status = rpc_call_sync(server->client_sys, &msg, 0);
        dprintk("NFS reply fsinfo: %d\n", status);
        return status;
 }
@@ -733,40 +795,34 @@ static int
 nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
                   struct nfs_pathconf *info)
 {
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs3_procedures[NFS3PROC_PATHCONF],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = info,
+       };
        int     status;
 
        dprintk("NFS call  pathconf\n");
        nfs_fattr_init(info->fattr);
-       status = rpc_call(server->client, NFS3PROC_PATHCONF, fhandle, info, 0);
+       status = rpc_call_sync(server->client, &msg, 0);
        dprintk("NFS reply pathconf: %d\n", status);
        return status;
 }
 
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs3_read_done(struct rpc_task *task, void *calldata)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-       struct nfs_read_data *data = calldata;
-
-       if (nfs3_async_handle_jukebox(task))
-               return;
+       if (nfs3_async_handle_jukebox(task, data->inode))
+               return -EAGAIN;
        /* Call back common NFS readpage processing */
        if (task->tk_status >= 0)
                nfs_refresh_inode(data->inode, &data->fattr);
-       nfs_readpage_result(task, calldata);
+       return 0;
 }
 
-static const struct rpc_call_ops nfs3_read_ops = {
-       .rpc_call_done = nfs3_read_done,
-       .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data)
 {
-       struct rpc_task         *task = &data->task;
-       struct inode            *inode = data->inode;
-       int                     flags;
        struct rpc_message      msg = {
                .rpc_proc       = &nfs3_procedures[NFS3PROC_READ],
                .rpc_argp       = &data->args,
@@ -774,37 +830,20 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
                .rpc_cred       = data->cred,
        };
 
-       /* N.B. Do we need to test? Never called for swapfile inode */
-       flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-       /* Finalize the task. */
-       rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
-       rpc_call_setup(task, &msg, 0);
+       rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs3_write_done(struct rpc_task *task, void *calldata)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-       struct nfs_write_data *data = calldata;
-
-       if (nfs3_async_handle_jukebox(task))
-               return;
+       if (nfs3_async_handle_jukebox(task, data->inode))
+               return -EAGAIN;
        if (task->tk_status >= 0)
                nfs_post_op_update_inode(data->inode, data->res.fattr);
-       nfs_writeback_done(task, calldata);
+       return 0;
 }
 
-static const struct rpc_call_ops nfs3_write_ops = {
-       .rpc_call_done = nfs3_write_done,
-       .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 {
-       struct rpc_task         *task = &data->task;
-       struct inode            *inode = data->inode;
-       int                     stable;
-       int                     flags;
        struct rpc_message      msg = {
                .rpc_proc       = &nfs3_procedures[NFS3PROC_WRITE],
                .rpc_argp       = &data->args,
@@ -812,45 +851,28 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
                .rpc_cred       = data->cred,
        };
 
+       data->args.stable = NFS_UNSTABLE;
        if (how & FLUSH_STABLE) {
-               if (!NFS_I(inode)->ncommit)
-                       stable = NFS_FILE_SYNC;
-               else
-                       stable = NFS_DATA_SYNC;
-       } else
-               stable = NFS_UNSTABLE;
-       data->args.stable = stable;
-
-       /* Set the initial flags for the task.  */
-       flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+               data->args.stable = NFS_FILE_SYNC;
+               if (NFS_I(data->inode)->ncommit)
+                       data->args.stable = NFS_DATA_SYNC;
+       }
 
        /* Finalize the task. */
-       rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
-       rpc_call_setup(task, &msg, 0);
+       rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs3_commit_done(struct rpc_task *task, void *calldata)
+static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-       struct nfs_write_data *data = calldata;
-
-       if (nfs3_async_handle_jukebox(task))
-               return;
+       if (nfs3_async_handle_jukebox(task, data->inode))
+               return -EAGAIN;
        if (task->tk_status >= 0)
                nfs_post_op_update_inode(data->inode, data->res.fattr);
-       nfs_commit_done(task, calldata);
+       return 0;
 }
 
-static const struct rpc_call_ops nfs3_commit_ops = {
-       .rpc_call_done = nfs3_commit_done,
-       .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-       struct rpc_task         *task = &data->task;
-       struct inode            *inode = data->inode;
-       int                     flags;
        struct rpc_message      msg = {
                .rpc_proc       = &nfs3_procedures[NFS3PROC_COMMIT],
                .rpc_argp       = &data->args,
@@ -858,12 +880,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
                .rpc_cred       = data->cred,
        };
 
-       /* Set the initial flags for the task.  */
-       flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-       /* Finalize the task. */
-       rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
-       rpc_call_setup(task, &msg, 0);
+       rpc_call_setup(&data->task, &msg, 0);
 }
 
 static int
@@ -902,8 +919,11 @@ struct nfs_rpc_ops nfs_v3_clientops = {
        .pathconf       = nfs3_proc_pathconf,
        .decode_dirent  = nfs3_decode_dirent,
        .read_setup     = nfs3_proc_read_setup,
+       .read_done      = nfs3_read_done,
        .write_setup    = nfs3_proc_write_setup,
+       .write_done     = nfs3_write_done,
        .commit_setup   = nfs3_proc_commit_setup,
+       .commit_done    = nfs3_commit_done,
        .file_open      = nfs_open,
        .file_release   = nfs_release,
        .lock           = nfs3_proc_lock,
index b6c0b50..2d8701a 100644 (file)
@@ -1109,7 +1109,9 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr)
        .p_encode    = (kxdrproc_t) nfs3_xdr_##argtype,                 \
        .p_decode    = (kxdrproc_t) nfs3_xdr_##restype,                 \
        .p_bufsiz    = MAX(NFS3_##argtype##_sz,NFS3_##restype##_sz) << 2,       \
-       .p_timer     = timer                                            \
+       .p_timer     = timer,                                           \
+       .p_statidx   = NFS3PROC_##proc,                                 \
+       .p_name      = #proc,                                           \
        }
 
 struct rpc_procinfo    nfs3_procedures[] = {
@@ -1150,6 +1152,7 @@ static struct rpc_procinfo        nfs3_acl_procedures[] = {
                .p_decode = (kxdrproc_t) nfs3_xdr_getaclres,
                .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2,
                .p_timer = 1,
+               .p_name = "GETACL",
        },
        [ACLPROC3_SETACL] = {
                .p_proc = ACLPROC3_SETACL,
@@ -1157,6 +1160,7 @@ static struct rpc_procinfo        nfs3_acl_procedures[] = {
                .p_decode = (kxdrproc_t) nfs3_xdr_setaclres,
                .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2,
                .p_timer = 0,
+               .p_name = "SETACL",
        },
 };
 
index f8c0066..47ece1d 100644 (file)
@@ -51,6 +51,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PROC
 
@@ -335,7 +336,7 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
        if (!(data->f_attr.valid & NFS_ATTR_FATTR))
                goto out;
        inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
-       if (inode == NULL)
+       if (IS_ERR(inode))
                goto out;
        state = nfs4_get_open_state(inode, data->owner);
        if (state == NULL)
@@ -604,11 +605,14 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
        int status;
 
        atomic_inc(&data->count);
+       /*
+        * If rpc_run_task() ends up calling ->rpc_release(), we
+        * want to ensure that it takes the 'error' code path.
+        */
+       data->rpc_status = -ENOMEM;
        task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
-       if (IS_ERR(task)) {
-               nfs4_opendata_free(data);
+       if (IS_ERR(task))
                return PTR_ERR(task);
-       }
        status = nfs4_wait_for_completion_rpc_task(task);
        if (status != 0) {
                data->cancelled = 1;
@@ -707,11 +711,14 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
        int status;
 
        atomic_inc(&data->count);
+       /*
+        * If rpc_run_task() ends up calling ->rpc_release(), we
+        * want to ensure that it takes the 'error' code path.
+        */
+       data->rpc_status = -ENOMEM;
        task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
-       if (IS_ERR(task)) {
-               nfs4_opendata_free(data);
+       if (IS_ERR(task))
                return PTR_ERR(task);
-       }
        status = nfs4_wait_for_completion_rpc_task(task);
        if (status != 0) {
                data->cancelled = 1;
@@ -908,7 +915,7 @@ out_put_state_owner:
 static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
 {
        struct nfs4_exception exception = { };
-       struct nfs4_state *res;
+       struct nfs4_state *res = ERR_PTR(-EIO);
        int err;
 
        do {
@@ -1017,12 +1024,12 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
        return res;
 }
 
-static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
-                struct nfs_fh *fhandle, struct iattr *sattr,
-                struct nfs4_state *state)
+static int _nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+                struct iattr *sattr, struct nfs4_state *state)
 {
+       struct nfs_server *server = NFS_SERVER(inode);
         struct nfs_setattrargs  arg = {
-                .fh             = fhandle,
+                .fh             = NFS_FH(inode),
                 .iap            = sattr,
                .server         = server,
                .bitmask = server->attr_bitmask,
@@ -1041,7 +1048,9 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
 
        nfs_fattr_init(fattr);
 
-       if (state != NULL) {
+       if (nfs4_copy_delegation_stateid(&arg.stateid, inode)) {
+               /* Use that stateid */
+       } else if (state != NULL) {
                msg.rpc_cred = state->owner->so_cred;
                nfs4_copy_stateid(&arg.stateid, state, current->files);
        } else
@@ -1053,16 +1062,15 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
        return status;
 }
 
-static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr,
-                struct nfs_fh *fhandle, struct iattr *sattr,
-                struct nfs4_state *state)
+static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
+                struct iattr *sattr, struct nfs4_state *state)
 {
+       struct nfs_server *server = NFS_SERVER(inode);
        struct nfs4_exception exception = { };
        int err;
        do {
                err = nfs4_handle_exception(server,
-                               _nfs4_do_setattr(server, fattr, fhandle, sattr,
-                                       state),
+                               _nfs4_do_setattr(inode, fattr, sattr, state),
                                &exception);
        } while (exception.retry);
        return err;
@@ -1503,8 +1511,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        if (ctx != NULL)
                state = ctx->state;
 
-       status = nfs4_do_setattr(NFS_SERVER(inode), fattr,
-                       NFS_FH(inode), sattr, state);
+       status = nfs4_do_setattr(inode, fattr, sattr, state);
        if (status == 0)
                nfs_setattr_update_inode(inode, sattr);
        if (ctx != NULL)
@@ -1823,8 +1830,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        d_instantiate(dentry, igrab(state->inode));
        if (flags & O_EXCL) {
                struct nfs_fattr fattr;
-               status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
-                                    NFS_FH(state->inode), sattr, state);
+               status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
                if (status == 0)
                        nfs_setattr_update_inode(state->inode, sattr);
        }
@@ -2344,75 +2350,50 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
        return err;
 }
 
-static void nfs4_read_done(struct rpc_task *task, void *calldata)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-       struct nfs_read_data *data = calldata;
-       struct inode *inode = data->inode;
+       struct nfs_server *server = NFS_SERVER(data->inode);
 
-       if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+       if (nfs4_async_handle_error(task, server) == -EAGAIN) {
                rpc_restart_call(task);
-               return;
+               return -EAGAIN;
        }
        if (task->tk_status > 0)
-               renew_lease(NFS_SERVER(inode), data->timestamp);
-       /* Call back common NFS readpage processing */
-       nfs_readpage_result(task, calldata);
+               renew_lease(server, data->timestamp);
+       return 0;
 }
 
-static const struct rpc_call_ops nfs4_read_ops = {
-       .rpc_call_done = nfs4_read_done,
-       .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data)
 {
-       struct rpc_task *task = &data->task;
        struct rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
                .rpc_argp = &data->args,
                .rpc_resp = &data->res,
                .rpc_cred = data->cred,
        };
-       struct inode *inode = data->inode;
-       int flags;
 
        data->timestamp   = jiffies;
 
-       /* N.B. Do we need to test? Never called for swapfile inode */
-       flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-       /* Finalize the task. */
-       rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
-       rpc_call_setup(task, &msg, 0);
+       rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs4_write_done(struct rpc_task *task, void *calldata)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-       struct nfs_write_data *data = calldata;
        struct inode *inode = data->inode;
        
        if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
                rpc_restart_call(task);
-               return;
+               return -EAGAIN;
        }
        if (task->tk_status >= 0) {
                renew_lease(NFS_SERVER(inode), data->timestamp);
                nfs_post_op_update_inode(inode, data->res.fattr);
        }
-       /* Call back common NFS writeback processing */
-       nfs_writeback_done(task, calldata);
+       return 0;
 }
 
-static const struct rpc_call_ops nfs4_write_ops = {
-       .rpc_call_done = nfs4_write_done,
-       .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 {
-       struct rpc_task *task = &data->task;
        struct rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
                .rpc_argp = &data->args,
@@ -2422,7 +2403,6 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
        struct inode *inode = data->inode;
        struct nfs_server *server = NFS_SERVER(inode);
        int stable;
-       int flags;
        
        if (how & FLUSH_STABLE) {
                if (!NFS_I(inode)->ncommit)
@@ -2437,57 +2417,37 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 
        data->timestamp   = jiffies;
 
-       /* Set the initial flags for the task.  */
-       flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
        /* Finalize the task. */
-       rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
-       rpc_call_setup(task, &msg, 0);
+       rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs4_commit_done(struct rpc_task *task, void *calldata)
+static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-       struct nfs_write_data *data = calldata;
        struct inode *inode = data->inode;
        
        if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
                rpc_restart_call(task);
-               return;
+               return -EAGAIN;
        }
        if (task->tk_status >= 0)
                nfs_post_op_update_inode(inode, data->res.fattr);
-       /* Call back common NFS writeback processing */
-       nfs_commit_done(task, calldata);
+       return 0;
 }
 
-static const struct rpc_call_ops nfs4_commit_ops = {
-       .rpc_call_done = nfs4_commit_done,
-       .rpc_release = nfs_commit_release,
-};
-
-static void
-nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 {
-       struct rpc_task *task = &data->task;
        struct rpc_message msg = {
                .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
                .rpc_argp = &data->args,
                .rpc_resp = &data->res,
                .rpc_cred = data->cred,
        };      
-       struct inode *inode = data->inode;
-       struct nfs_server *server = NFS_SERVER(inode);
-       int flags;
+       struct nfs_server *server = NFS_SERVER(data->inode);
        
        data->args.bitmask = server->attr_bitmask;
        data->res.server = server;
 
-       /* Set the initial flags for the task.  */
-       flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
-       /* Finalize the task. */
-       rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
-       rpc_call_setup(task, &msg, 0);  
+       rpc_call_setup(&data->task, &msg, 0);
 }
 
 /*
@@ -2755,8 +2715,10 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
                                rpc_wake_up_task(task);
                        task->tk_status = 0;
                        return -EAGAIN;
-               case -NFS4ERR_GRACE:
                case -NFS4ERR_DELAY:
+                       nfs_inc_server_stats((struct nfs_server *) server,
+                                               NFSIOS_DELAY);
+               case -NFS4ERR_GRACE:
                        rpc_delay(task, NFS4_POLL_RETRY_MAX);
                        task->tk_status = 0;
                        return -EAGAIN;
@@ -2893,8 +2855,7 @@ int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short p
        return status;
 }
 
-int
-nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 {
        struct nfs_fsinfo fsinfo;
        struct rpc_message msg = {
@@ -2918,6 +2879,24 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
        return status;
 }
 
+int nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
+{
+       long timeout;
+       int err;
+       do {
+               err = _nfs4_proc_setclientid_confirm(clp, cred);
+               switch (err) {
+                       case 0:
+                               return err;
+                       case -NFS4ERR_RESOURCE:
+                               /* The IBM lawyers misread another document! */
+                       case -NFS4ERR_DELAY:
+                               err = nfs4_delay(clp->cl_rpcclient, &timeout);
+               }
+       } while (err == 0);
+       return err;
+}
+
 struct nfs4_delegreturndata {
        struct nfs4_delegreturnargs args;
        struct nfs4_delegreturnres res;
@@ -2958,7 +2937,7 @@ static void nfs4_delegreturn_release(void *calldata)
        kfree(calldata);
 }
 
-const static struct rpc_call_ops nfs4_delegreturn_ops = {
+static const struct rpc_call_ops nfs4_delegreturn_ops = {
        .rpc_call_prepare = nfs4_delegreturn_prepare,
        .rpc_call_done = nfs4_delegreturn_done,
        .rpc_release = nfs4_delegreturn_release,
@@ -2986,10 +2965,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
        data->rpc_status = 0;
 
        task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
-       if (IS_ERR(task)) {
-               nfs4_delegreturn_release(data);
+       if (IS_ERR(task))
                return PTR_ERR(task);
-       }
        status = nfs4_wait_for_completion_rpc_task(task);
        if (status == 0) {
                status = data->rpc_status;
@@ -3209,7 +3186,6 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
                struct nfs_seqid *seqid)
 {
        struct nfs4_unlockdata *data;
-       struct rpc_task *task;
 
        data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
        if (data == NULL) {
@@ -3219,10 +3195,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 
        /* Unlock _before_ we do the RPC call */
        do_vfs_lock(fl->fl_file, fl);
-       task = rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
-       if (IS_ERR(task))
-               nfs4_locku_release_calldata(data);
-       return task;
+       return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
 }
 
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3403,10 +3376,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
                data->arg.reclaim = 1;
        task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
                        &nfs4_lock_ops, data);
-       if (IS_ERR(task)) {
-               nfs4_lock_release(data);
+       if (IS_ERR(task))
                return PTR_ERR(task);
-       }
        ret = nfs4_wait_for_completion_rpc_task(task);
        if (ret == 0) {
                ret = data->rpc_status;
@@ -3588,6 +3559,8 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
 {
        size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1;
 
+       if (!nfs4_server_supports_acls(NFS_SERVER(dentry->d_inode)))
+               return 0;
        if (buf && buflen < len)
                return -ERANGE;
        if (buf)
@@ -3644,8 +3617,11 @@ struct nfs_rpc_ops       nfs_v4_clientops = {
        .pathconf       = nfs4_proc_pathconf,
        .decode_dirent  = nfs4_decode_dirent,
        .read_setup     = nfs4_proc_read_setup,
+       .read_done      = nfs4_read_done,
        .write_setup    = nfs4_proc_write_setup,
+       .write_done     = nfs4_write_done,
        .commit_setup   = nfs4_proc_commit_setup,
+       .commit_done    = nfs4_commit_done,
        .file_open      = nfs_open,
        .file_release   = nfs_release,
        .lock           = nfs4_proc_lock,
index afad025..96e5b82 100644 (file)
@@ -977,6 +977,7 @@ out:
 out_error:
        printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
                                NIPQUAD(clp->cl_addr.s_addr), -status);
+       set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
        goto out;
 }
 
index 4bbf5ef..b956753 100644 (file)
@@ -4344,6 +4344,8 @@ nfs_stat_to_errno(int stat)
        .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,            \
        .p_decode = (kxdrproc_t) nfs4_xdr_##restype,            \
        .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
+       .p_statidx = NFSPROC4_CLNT_##proc,                      \
+       .p_name   = #proc,                                      \
     }
 
 struct rpc_procinfo    nfs4_procedures[] = {
index d53857b..106aca3 100644 (file)
@@ -85,6 +85,9 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
        atomic_set(&req->wb_complete, 0);
        req->wb_index   = page->index;
        page_cache_get(page);
+       BUG_ON(PagePrivate(page));
+       BUG_ON(!PageLocked(page));
+       BUG_ON(page->mapping->host != inode);
        req->wb_offset  = offset;
        req->wb_pgbase  = offset;
        req->wb_bytes   = count;
@@ -132,9 +135,11 @@ void nfs_clear_page_writeback(struct nfs_page *req)
 {
        struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
 
-       spin_lock(&nfsi->req_lock);
-       radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
-       spin_unlock(&nfsi->req_lock);
+       if (req->wb_page != NULL) {
+               spin_lock(&nfsi->req_lock);
+               radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
+               spin_unlock(&nfsi->req_lock);
+       }
        nfs_unlock_request(req);
 }
 
@@ -147,8 +152,9 @@ void nfs_clear_page_writeback(struct nfs_page *req)
  */
 void nfs_clear_request(struct nfs_page *req)
 {
-       if (req->wb_page) {
-               page_cache_release(req->wb_page);
+       struct page *page = req->wb_page;
+       if (page != NULL) {
+               page_cache_release(page);
                req->wb_page = NULL;
        }
 }
index f5150d7..9dd85ca 100644 (file)
@@ -58,16 +58,23 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 {
        struct nfs_fattr *fattr = info->fattr;
        struct nfs2_fsstat fsinfo;
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_GETATTR],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = fattr,
+       };
        int status;
 
        dprintk("%s: call getattr\n", __FUNCTION__);
        nfs_fattr_init(fattr);
-       status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
+       status = rpc_call_sync(server->client_sys, &msg, 0);
        dprintk("%s: reply getattr: %d\n", __FUNCTION__, status);
        if (status)
                return status;
        dprintk("%s: call statfs\n", __FUNCTION__);
-       status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+       msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
+       msg.rpc_resp = &fsinfo;
+       status = rpc_call_sync(server->client_sys, &msg, 0);
        dprintk("%s: reply statfs: %d\n", __FUNCTION__, status);
        if (status)
                return status;
@@ -90,12 +97,16 @@ static int
 nfs_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
                struct nfs_fattr *fattr)
 {
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_GETATTR],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = fattr,
+       };
        int     status;
 
        dprintk("NFS call  getattr\n");
        nfs_fattr_init(fattr);
-       status = rpc_call(server->client, NFSPROC_GETATTR,
-                               fhandle, fattr, 0);
+       status = rpc_call_sync(server->client, &msg, 0);
        dprintk("NFS reply getattr: %d\n", status);
        return status;
 }
@@ -109,6 +120,11 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
                .fh     = NFS_FH(inode),
                .sattr  = sattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_SETATTR],
+               .rpc_argp       = &arg,
+               .rpc_resp       = fattr,
+       };
        int     status;
 
        /* Mask out the non-modebit related stuff from attr->ia_mode */
@@ -116,7 +132,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
        dprintk("NFS call  setattr\n");
        nfs_fattr_init(fattr);
-       status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0);
+       status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
        if (status == 0)
                nfs_setattr_update_inode(inode, sattr);
        dprintk("NFS reply setattr: %d\n", status);
@@ -136,11 +152,16 @@ nfs_proc_lookup(struct inode *dir, struct qstr *name,
                .fh             = fhandle,
                .fattr          = fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_LOOKUP],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int                     status;
 
        dprintk("NFS call  lookup %s\n", name->name);
        nfs_fattr_init(fattr);
-       status = rpc_call(NFS_CLIENT(dir), NFSPROC_LOOKUP, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        dprintk("NFS reply lookup: %d\n", status);
        return status;
 }
@@ -154,10 +175,14 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
                .pglen          = pglen,
                .pages          = &page
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_READLINK],
+               .rpc_argp       = &args,
+       };
        int                     status;
 
        dprintk("NFS call  readlink\n");
-       status = rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &args, NULL, 0);
+       status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
        dprintk("NFS reply readlink: %d\n", status);
        return status;
 }
@@ -233,11 +258,16 @@ nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                .fh             = &fhandle,
                .fattr          = &fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_CREATE],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int                     status;
 
        nfs_fattr_init(&fattr);
        dprintk("NFS call  create %s\n", dentry->d_name.name);
-       status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        if (status == 0)
                status = nfs_instantiate(dentry, &fhandle, &fattr);
        dprintk("NFS reply create: %d\n", status);
@@ -263,6 +293,11 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                .fh             = &fhandle,
                .fattr          = &fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_CREATE],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int status, mode;
 
        dprintk("NFS call  mknod %s\n", dentry->d_name.name);
@@ -277,13 +312,13 @@ nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
        }
 
        nfs_fattr_init(&fattr);
-       status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_mark_for_revalidate(dir);
 
        if (status == -EINVAL && S_ISFIFO(mode)) {
                sattr->ia_mode = mode;
                nfs_fattr_init(&fattr);
-               status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0);
+               status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        }
        if (status == 0)
                status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -302,8 +337,6 @@ nfs_proc_remove(struct inode *dir, struct qstr *name)
        struct rpc_message      msg = { 
                .rpc_proc       = &nfs_procedures[NFSPROC_REMOVE],
                .rpc_argp       = &arg,
-               .rpc_resp       = NULL,
-               .rpc_cred       = NULL
        };
        int                     status;
 
@@ -355,10 +388,14 @@ nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
                .toname         = new_name->name,
                .tolen          = new_name->len
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_RENAME],
+               .rpc_argp       = &arg,
+       };
        int                     status;
 
        dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
-       status = rpc_call(NFS_CLIENT(old_dir), NFSPROC_RENAME, &arg, NULL, 0);
+       status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
        nfs_mark_for_revalidate(old_dir);
        nfs_mark_for_revalidate(new_dir);
        dprintk("NFS reply rename: %d\n", status);
@@ -374,10 +411,14 @@ nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
                .toname         = name->name,
                .tolen          = name->len
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_LINK],
+               .rpc_argp       = &arg,
+       };
        int                     status;
 
        dprintk("NFS call  link %s\n", name->name);
-       status = rpc_call(NFS_CLIENT(inode), NFSPROC_LINK, &arg, NULL, 0);
+       status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
        nfs_mark_for_revalidate(inode);
        nfs_mark_for_revalidate(dir);
        dprintk("NFS reply link: %d\n", status);
@@ -397,6 +438,10 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
                .tolen          = path->len,
                .sattr          = sattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_SYMLINK],
+               .rpc_argp       = &arg,
+       };
        int                     status;
 
        if (path->len > NFS2_MAXPATHLEN)
@@ -404,7 +449,7 @@ nfs_proc_symlink(struct inode *dir, struct qstr *name, struct qstr *path,
        dprintk("NFS call  symlink %s -> %s\n", name->name, path->name);
        nfs_fattr_init(fattr);
        fhandle->size = 0;
-       status = rpc_call(NFS_CLIENT(dir), NFSPROC_SYMLINK, &arg, NULL, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_mark_for_revalidate(dir);
        dprintk("NFS reply symlink: %d\n", status);
        return status;
@@ -425,11 +470,16 @@ nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
                .fh             = &fhandle,
                .fattr          = &fattr
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_MKDIR],
+               .rpc_argp       = &arg,
+               .rpc_resp       = &res,
+       };
        int                     status;
 
        dprintk("NFS call  mkdir %s\n", dentry->d_name.name);
        nfs_fattr_init(&fattr);
-       status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_mark_for_revalidate(dir);
        if (status == 0)
                status = nfs_instantiate(dentry, &fhandle, &fattr);
@@ -445,10 +495,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
                .name           = name->name,
                .len            = name->len
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_RMDIR],
+               .rpc_argp       = &arg,
+       };
        int                     status;
 
        dprintk("NFS call  rmdir %s\n", name->name);
-       status = rpc_call(NFS_CLIENT(dir), NFSPROC_RMDIR, &arg, NULL, 0);
+       status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_mark_for_revalidate(dir);
        dprintk("NFS reply rmdir: %d\n", status);
        return status;
@@ -470,13 +524,12 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                .fh             = NFS_FH(dir),
                .cookie         = cookie,
                .count          = count,
-               .pages          = &page
+               .pages          = &page,
        };
        struct rpc_message      msg = {
                .rpc_proc       = &nfs_procedures[NFSPROC_READDIR],
                .rpc_argp       = &arg,
-               .rpc_resp       = NULL,
-               .rpc_cred       = cred
+               .rpc_cred       = cred,
        };
        int                     status;
 
@@ -495,11 +548,16 @@ nfs_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
                        struct nfs_fsstat *stat)
 {
        struct nfs2_fsstat fsinfo;
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_STATFS],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = &fsinfo,
+       };
        int     status;
 
        dprintk("NFS call  statfs\n");
        nfs_fattr_init(stat->fattr);
-       status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+       status = rpc_call_sync(server->client, &msg, 0);
        dprintk("NFS reply statfs: %d\n", status);
        if (status)
                goto out;
@@ -518,11 +576,16 @@ nfs_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
                        struct nfs_fsinfo *info)
 {
        struct nfs2_fsstat fsinfo;
+       struct rpc_message msg = {
+               .rpc_proc       = &nfs_procedures[NFSPROC_STATFS],
+               .rpc_argp       = fhandle,
+               .rpc_resp       = &fsinfo,
+       };
        int     status;
 
        dprintk("NFS call  fsinfo\n");
        nfs_fattr_init(info->fattr);
-       status = rpc_call(server->client, NFSPROC_STATFS, fhandle, &fsinfo, 0);
+       status = rpc_call_sync(server->client, &msg, 0);
        dprintk("NFS reply fsinfo: %d\n", status);
        if (status)
                goto out;
@@ -550,10 +613,8 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void nfs_read_done(struct rpc_task *task, void *calldata)
+static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
-       struct nfs_read_data *data = calldata;
-
        if (task->tk_status >= 0) {
                nfs_refresh_inode(data->inode, data->res.fattr);
                /* Emulate the eof flag, which isn't normally needed in NFSv2
@@ -562,20 +623,11 @@ static void nfs_read_done(struct rpc_task *task, void *calldata)
                if (data->args.offset + data->args.count >= data->res.fattr->size)
                        data->res.eof = 1;
        }
-       nfs_readpage_result(task, calldata);
+       return 0;
 }
 
-static const struct rpc_call_ops nfs_read_ops = {
-       .rpc_call_done = nfs_read_done,
-       .rpc_release = nfs_readdata_release,
-};
-
-static void
-nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data)
 {
-       struct rpc_task         *task = &data->task;
-       struct inode            *inode = data->inode;
-       int                     flags;
        struct rpc_message      msg = {
                .rpc_proc       = &nfs_procedures[NFSPROC_READ],
                .rpc_argp       = &data->args,
@@ -583,34 +635,18 @@ nfs_proc_read_setup(struct nfs_read_data *data)
                .rpc_cred       = data->cred,
        };
 
-       /* N.B. Do we need to test? Never called for swapfile inode */
-       flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-
-       /* Finalize the task. */
-       rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
-       rpc_call_setup(task, &msg, 0);
+       rpc_call_setup(&data->task, &msg, 0);
 }
 
-static void nfs_write_done(struct rpc_task *task, void *calldata)
+static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-       struct nfs_write_data *data = calldata;
-
        if (task->tk_status >= 0)
                nfs_post_op_update_inode(data->inode, data->res.fattr);
-       nfs_writeback_done(task, calldata);
+       return 0;
 }
 
-static const struct rpc_call_ops nfs_write_ops = {
-       .rpc_call_done = nfs_write_done,
-       .rpc_release = nfs_writedata_release,
-};
-
-static void
-nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
 {
-       struct rpc_task         *task = &data->task;
-       struct inode            *inode = data->inode;
-       int                     flags;
        struct rpc_message      msg = {
                .rpc_proc       = &nfs_procedures[NFSPROC_WRITE],
                .rpc_argp       = &data->args,
@@ -621,12 +657,8 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
        /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
        data->args.stable = NFS_FILE_SYNC;
 
-       /* Set the initial flags for the task.  */
-       flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-
        /* Finalize the task. */
-       rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
-       rpc_call_setup(task, &msg, 0);
+       rpc_call_setup(&data->task, &msg, 0);
 }
 
 static void
@@ -672,7 +704,9 @@ struct nfs_rpc_ops  nfs_v2_clientops = {
        .pathconf       = nfs_proc_pathconf,
        .decode_dirent  = nfs_decode_dirent,
        .read_setup     = nfs_proc_read_setup,
+       .read_done      = nfs_read_done,
        .write_setup    = nfs_proc_write_setup,
+       .write_done     = nfs_write_done,
        .commit_setup   = nfs_proc_commit_setup,
        .file_open      = nfs_open,
        .file_release   = nfs_release,
index 05eb43f..3961524 100644 (file)
 
 #include <asm/system.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ  (32)
 
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+       struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+       if (p) {
+               memset(p, 0, sizeof(*p));
+               INIT_LIST_HEAD(&p->pages);
+               if (pagecount < NFS_PAGEVEC_SIZE)
+                       p->pagevec = &p->page_array[0];
+               else {
+                       size_t size = ++pagecount * sizeof(struct page *);
+                       p->pagevec = kmalloc(size, GFP_NOFS);
+                       if (p->pagevec) {
+                               memset(p->pagevec, 0, size);
+                       } else {
+                               mempool_free(p, nfs_rdata_mempool);
+                               p = NULL;
+                       }
+               }
+       }
+       return p;
+}
+
+void nfs_readdata_free(struct nfs_read_data *p)
+{
+       if (p && (p->pagevec != &p->page_array[0]))
+               kfree(p->pagevec);
+       mempool_free(p, nfs_rdata_mempool);
+}
+
 void nfs_readdata_release(void *data)
 {
         nfs_readdata_free(data);
@@ -133,6 +165,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
                }
                count -= result;
                rdata->args.pgbase += result;
+               nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
                /* Note: result == 0 should only happen if we're caching
                 * a write that extends the file and punches a hole.
                 */
@@ -196,9 +230,11 @@ static void nfs_readpage_release(struct nfs_page *req)
  * Set up the NFS read request struct
  */
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+               const struct rpc_call_ops *call_ops,
                unsigned int count, unsigned int offset)
 {
        struct inode            *inode;
+       int flags;
 
        data->req         = req;
        data->inode       = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +252,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
        data->res.eof     = 0;
        nfs_fattr_init(&data->fattr);
 
+       /* Set up the initial task struct. */
+       flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+       rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
        NFS_PROTO(inode)->read_setup(data);
 
        data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +342,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
                list_del_init(&data->pages);
 
                data->pagevec[0] = page;
-               data->complete = nfs_readpage_result_partial;
 
                if (nbytes > rsize) {
-                       nfs_read_rpcsetup(req, data, rsize, offset);
+                       nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+                                       rsize, offset);
                        offset += rsize;
                        nbytes -= rsize;
                } else {
-                       nfs_read_rpcsetup(req, data, nbytes, offset);
+                       nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+                                       nbytes, offset);
                        nbytes = 0;
                }
                nfs_execute_read(data);
@@ -356,8 +396,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
        }
        req = nfs_list_entry(data->pages.next);
 
-       data->complete = nfs_readpage_result_full;
-       nfs_read_rpcsetup(req, data, count, 0);
+       nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 
        nfs_execute_read(data);
        return 0;
@@ -391,12 +430,15 @@ nfs_pagein_list(struct list_head *head, int rpages)
 /*
  * Handle a read reply that fills part of a page.
  */
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 {
+       struct nfs_read_data *data = calldata;
        struct nfs_page *req = data->req;
        struct page *page = req->wb_page;
  
-       if (status >= 0) {
+       if (nfs_readpage_result(task, data) != 0)
+               return;
+       if (task->tk_status >= 0) {
                unsigned int request = data->args.count;
                unsigned int result = data->res.count;
 
@@ -415,20 +457,28 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
        }
 }
 
+static const struct rpc_call_ops nfs_read_partial_ops = {
+       .rpc_call_done = nfs_readpage_result_partial,
+       .rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
+       struct nfs_read_data *data = calldata;
        unsigned int count = data->res.count;
 
+       if (nfs_readpage_result(task, data) != 0)
+               return;
        while (!list_empty(&data->pages)) {
                struct nfs_page *req = nfs_list_entry(data->pages.next);
                struct page *page = req->wb_page;
                nfs_list_remove_request(req);
 
-               if (status >= 0) {
+               if (task->tk_status >= 0) {
                        if (count < PAGE_CACHE_SIZE) {
                                if (count < req->wb_bytes)
                                        memclear_highpage_flush(page,
@@ -444,22 +494,33 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
        }
 }
 
+static const struct rpc_call_ops nfs_read_full_ops = {
+       .rpc_call_done = nfs_readpage_result_full,
+       .rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
-       struct nfs_read_data *data = calldata;
        struct nfs_readargs *argp = &data->args;
        struct nfs_readres *resp = &data->res;
-       int status = task->tk_status;
+       int status;
 
        dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-               task->tk_pid, status);
+               task->tk_pid, task->tk_status);
+
+       status = NFS_PROTO(data->inode)->read_done(task, data);
+       if (status != 0)
+               return status;
+
+       nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
        /* Is this a short read? */
        if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+               nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
                /* Has the server at least made some progress? */
                if (resp->count != 0) {
                        /* Yes, so retry the read at the end of the data */
@@ -467,14 +528,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
                        argp->pgbase += resp->count;
                        argp->count -= resp->count;
                        rpc_restart_call(task);
-                       return;
+                       return -EAGAIN;
                }
                task->tk_status = -EIO;
        }
        spin_lock(&data->inode->i_lock);
        NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
        spin_unlock(&data->inode->i_lock);
-       data->complete(data, status);
+       return 0;
 }
 
 /*
@@ -491,6 +552,9 @@ int nfs_readpage(struct file *file, struct page *page)
 
        dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
                page, PAGE_CACHE_SIZE, page->index);
+       nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+       nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
        /*
         * Try to flush any pending writes to the file..
         *
@@ -570,6 +634,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
                        inode->i_sb->s_id,
                        (long long)NFS_FILEID(inode),
                        nr_pages);
+       nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
        if (filp == NULL) {
                desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,6 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
        if (!list_empty(&head)) {
                int err = nfs_pagein_list(&head, server->rpages);
                if (!ret)
+                       nfs_add_stats(inode, NFSIOS_READPAGES, err);
                        ret = err;
        }
        put_nfs_open_context(desc.ctx);
index a65c7b5..0e28189 100644 (file)
@@ -163,10 +163,9 @@ nfs_async_unlink(struct dentry *dentry)
        struct rpc_clnt *clnt = NFS_CLIENT(dir->d_inode);
        int             status = -ENOMEM;
 
-       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data)
                goto out;
-       memset(data, 0, sizeof(*data));
 
        data->cred = rpcauth_lookupcred(clnt->cl_auth, 0);
        if (IS_ERR(data->cred)) {
index 9449b68..3f52254 100644 (file)
@@ -63,6 +63,7 @@
 #include <linux/smp_lock.h>
 
 #include "delegation.h"
+#include "iostat.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
 
@@ -76,23 +77,52 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
                                            struct inode *,
                                            struct page *,
                                            unsigned int, unsigned int);
-static void nfs_writeback_done_partial(struct nfs_write_data *, int);
-static void nfs_writeback_done_full(struct nfs_write_data *, int);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
                           unsigned int npages, int how);
+static const struct rpc_call_ops nfs_write_partial_ops;
+static const struct rpc_call_ops nfs_write_full_ops;
+static const struct rpc_call_ops nfs_commit_ops;
 
 static kmem_cache_t *nfs_wdata_cachep;
-mempool_t *nfs_wdata_mempool;
+static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 {
        struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
 
+       if (p) {
+               memset(p, 0, sizeof(*p));
+               INIT_LIST_HEAD(&p->pages);
+               if (pagecount < NFS_PAGEVEC_SIZE)
+                       p->pagevec = &p->page_array[0];
+               else {
+                       size_t size = ++pagecount * sizeof(struct page *);
+                       p->pagevec = kzalloc(size, GFP_NOFS);
+                       if (!p->pagevec) {
+                               mempool_free(p, nfs_commit_mempool);
+                               p = NULL;
+                       }
+               }
+       }
+       return p;
+}
+
+void nfs_commit_free(struct nfs_write_data *p)
+{
+       if (p && (p->pagevec != &p->page_array[0]))
+               kfree(p->pagevec);
+       mempool_free(p, nfs_commit_mempool);
+}
+
+struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
+{
+       struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
        if (p) {
                memset(p, 0, sizeof(*p));
                INIT_LIST_HEAD(&p->pages);
@@ -104,7 +134,7 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
                        if (p->pagevec) {
                                memset(p->pagevec, 0, size);
                        } else {
-                               mempool_free(p, nfs_commit_mempool);
+                               mempool_free(p, nfs_wdata_mempool);
                                p = NULL;
                        }
                }
@@ -112,11 +142,11 @@ static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
        return p;
 }
 
-static inline void nfs_commit_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
 {
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
-       mempool_free(p, nfs_commit_mempool);
+       mempool_free(p, nfs_wdata_mempool);
 }
 
 void nfs_writedata_release(void *wdata)
@@ -136,6 +166,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
        end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
        if (i_size >= end)
                return;
+       nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
        i_size_write(inode, end);
 }
 
@@ -225,6 +256,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
                wdata->args.pgbase += result;
                written += result;
                count -= result;
+               nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
        } while (count);
        /* Update file length */
        nfs_grow_file(page, offset, written);
@@ -281,6 +313,9 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
        int priority = wb_priority(wbc);
        int err;
 
+       nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
+       nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
+
        /*
         * Note: We need to ensure that we have a reference to the inode
         *       if we are to do asynchronous writes. If not, waiting
@@ -345,6 +380,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
        struct inode *inode = mapping->host;
        int err;
 
+       nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
+
        err = generic_writepages(mapping, wbc);
        if (err)
                return err;
@@ -356,6 +393,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
        err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
        if (err < 0)
                goto out;
+       nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
        wbc->nr_to_write -= err;
        if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
                err = nfs_wait_on_requests(inode, 0, 0);
@@ -391,6 +429,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
                if (nfs_have_delegation(inode, FMODE_WRITE))
                        nfsi->change_attr++;
        }
+       SetPagePrivate(req->wb_page);
        nfsi->npages++;
        atomic_inc(&req->wb_count);
        return 0;
@@ -407,6 +446,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
        BUG_ON (!NFS_WBACK_BUSY(req));
 
        spin_lock(&nfsi->req_lock);
+       ClearPagePrivate(req->wb_page);
        radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
        nfsi->npages--;
        if (!nfsi->npages) {
@@ -499,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
  *
  * Interruptible by signals only if mounted with intr flag.
  */
-static int
-nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_page *req;
@@ -513,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
        else
                idx_end = idx_start + npages - 1;
 
-       spin_lock(&nfsi->req_lock);
        next = idx_start;
        while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
                if (req->wb_index > idx_end)
@@ -526,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
                spin_unlock(&nfsi->req_lock);
                error = nfs_wait_on_request(req);
                nfs_release_request(req);
+               spin_lock(&nfsi->req_lock);
                if (error < 0)
                        return error;
-               spin_lock(&nfsi->req_lock);
                res++;
        }
-       spin_unlock(&nfsi->req_lock);
        return res;
 }
 
+static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+{
+       struct nfs_inode *nfsi = NFS_I(inode);
+       int ret;
+
+       spin_lock(&nfsi->req_lock);
+       ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+       spin_unlock(&nfsi->req_lock);
+       return ret;
+}
+
 /*
  * nfs_scan_dirty - Scan an inode for dirty requests
  * @inode: NFS inode to scan
@@ -586,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
        }
        return res;
 }
+#else
+static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+{
+       return 0;
+}
 #endif
 
 static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
@@ -598,6 +651,9 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
 
        if (!bdi_write_congested(bdi))
                return 0;
+
+       nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+
        if (intr) {
                struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
                sigset_t oldset;
@@ -653,8 +709,11 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
                                spin_unlock(&nfsi->req_lock);
                                error = nfs_wait_on_request(req);
                                nfs_release_request(req);
-                               if (error < 0)
+                               if (error < 0) {
+                                       if (new)
+                                               nfs_release_request(new);
                                        return ERR_PTR(error);
+                               }
                                continue;
                        }
                        spin_unlock(&nfsi->req_lock);
@@ -748,6 +807,8 @@ int nfs_updatepage(struct file *file, struct page *page,
        struct nfs_page *req;
        int             status = 0;
 
+       nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
+
        dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
                file->f_dentry->d_parent->d_name.name,
                file->f_dentry->d_name.name, count,
@@ -857,10 +918,12 @@ static inline int flush_task_priority(int how)
  */
 static void nfs_write_rpcsetup(struct nfs_page *req,
                struct nfs_write_data *data,
+               const struct rpc_call_ops *call_ops,
                unsigned int count, unsigned int offset,
                int how)
 {
        struct inode            *inode;
+       int flags;
 
        /* Set up the RPC argument and reply structs
         * NB: take care not to mess about with data->commit et al. */
@@ -881,6 +944,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
        data->res.verf    = &data->verf;
        nfs_fattr_init(&data->fattr);
 
+       /* Set up the initial task struct.  */
+       flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+       rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
        NFS_PROTO(inode)->write_setup(data, how);
 
        data->task.tk_priority = flush_task_priority(how);
@@ -910,7 +976,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
 {
        struct nfs_page *req = nfs_list_entry(head->next);
        struct page *page = req->wb_page;
@@ -944,14 +1010,15 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
                list_del_init(&data->pages);
 
                data->pagevec[0] = page;
-               data->complete = nfs_writeback_done_partial;
 
                if (nbytes > wsize) {
-                       nfs_write_rpcsetup(req, data, wsize, offset, how);
+                       nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+                                       wsize, offset, how);
                        offset += wsize;
                        nbytes -= wsize;
                } else {
-                       nfs_write_rpcsetup(req, data, nbytes, offset, how);
+                       nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+                                       nbytes, offset, how);
                        nbytes = 0;
                }
                nfs_execute_write(data);
@@ -978,16 +1045,13 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
+static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 {
        struct nfs_page         *req;
        struct page             **pages;
        struct nfs_write_data   *data;
        unsigned int            count;
 
-       if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
-               return nfs_flush_multi(head, inode, how);
-
        data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
        if (!data)
                goto out_bad;
@@ -1005,9 +1069,8 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
        }
        req = nfs_list_entry(data->pages.next);
 
-       data->complete = nfs_writeback_done_full;
        /* Set up the argument struct */
-       nfs_write_rpcsetup(req, data, count, 0, how);
+       nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 
        nfs_execute_write(data);
        return 0;
@@ -1021,24 +1084,32 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
        return -ENOMEM;
 }
 
-static int
-nfs_flush_list(struct list_head *head, int wpages, int how)
+static int nfs_flush_list(struct inode *inode, struct list_head *head, int npages, int how)
 {
        LIST_HEAD(one_request);
-       struct nfs_page         *req;
-       int                     error = 0;
-       unsigned int            pages = 0;
+       int (*flush_one)(struct inode *, struct list_head *, int);
+       struct nfs_page *req;
+       int wpages = NFS_SERVER(inode)->wpages;
+       int wsize = NFS_SERVER(inode)->wsize;
+       int error;
 
-       while (!list_empty(head)) {
-               pages += nfs_coalesce_requests(head, &one_request, wpages);
+       flush_one = nfs_flush_one;
+       if (wsize < PAGE_CACHE_SIZE)
+               flush_one = nfs_flush_multi;
+       /* For single writes, FLUSH_STABLE is more efficient */
+       if (npages <= wpages && npages == NFS_I(inode)->npages
+                       && nfs_list_entry(head->next)->wb_bytes <= wsize)
+               how |= FLUSH_STABLE;
+
+       do {
+               nfs_coalesce_requests(head, &one_request, wpages);
                req = nfs_list_entry(one_request.next);
-               error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how);
+               error = flush_one(inode, &one_request, how);
                if (error < 0)
-                       break;
-       }
-       if (error >= 0)
-               return pages;
-
+                       goto out_err;
+       } while (!list_empty(head));
+       return 0;
+out_err:
        while (!list_empty(head)) {
                req = nfs_list_entry(head->next);
                nfs_list_remove_request(req);
@@ -1051,8 +1122,9 @@ nfs_flush_list(struct list_head *head, int wpages, int how)
 /*
  * Handle a write reply that flushed part of a page.
  */
-static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 {
+       struct nfs_write_data   *data = calldata;
        struct nfs_page         *req = data->req;
        struct page             *page = req->wb_page;
 
@@ -1062,11 +1134,14 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
                req->wb_bytes,
                (long long)req_offset(req));
 
-       if (status < 0) {
+       if (nfs_writeback_done(task, data) != 0)
+               return;
+
+       if (task->tk_status < 0) {
                ClearPageUptodate(page);
                SetPageError(page);
-               req->wb_context->error = status;
-               dprintk(", error = %d\n", status);
+               req->wb_context->error = task->tk_status;
+               dprintk(", error = %d\n", task->tk_status);
        } else {
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
                if (data->verf.committed < NFS_FILE_SYNC) {
@@ -1087,6 +1162,11 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
                nfs_writepage_release(req);
 }
 
+static const struct rpc_call_ops nfs_write_partial_ops = {
+       .rpc_call_done = nfs_writeback_done_partial,
+       .rpc_release = nfs_writedata_release,
+};
+
 /*
  * Handle a write reply that flushes a whole page.
  *
@@ -1094,11 +1174,15 @@ static void nfs_writeback_done_partial(struct nfs_write_data *data, int status)
  *       writebacks since the page->count is kept > 1 for as long
  *       as the page has a write request pending.
  */
-static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
+static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 {
+       struct nfs_write_data   *data = calldata;
        struct nfs_page         *req;
        struct page             *page;
 
+       if (nfs_writeback_done(task, data) != 0)
+               return;
+
        /* Update attributes as result of writeback. */
        while (!list_empty(&data->pages)) {
                req = nfs_list_entry(data->pages.next);
@@ -1111,13 +1195,13 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
                        req->wb_bytes,
                        (long long)req_offset(req));
 
-               if (status < 0) {
+               if (task->tk_status < 0) {
                        ClearPageUptodate(page);
                        SetPageError(page);
-                       req->wb_context->error = status;
+                       req->wb_context->error = task->tk_status;
                        end_page_writeback(page);
                        nfs_inode_remove_request(req);
-                       dprintk(", error = %d\n", status);
+                       dprintk(", error = %d\n", task->tk_status);
                        goto next;
                }
                end_page_writeback(page);
@@ -1139,18 +1223,30 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
        }
 }
 
+static const struct rpc_call_ops nfs_write_full_ops = {
+       .rpc_call_done = nfs_writeback_done_full,
+       .rpc_release = nfs_writedata_release,
+};
+
+
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task, void *calldata)
+int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 {
-       struct nfs_write_data   *data = calldata;
        struct nfs_writeargs    *argp = &data->args;
        struct nfs_writeres     *resp = &data->res;
+       int status;
 
        dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
                task->tk_pid, task->tk_status);
 
+       /* Call the NFS version-specific code */
+       status = NFS_PROTO(data->inode)->write_done(task, data);
+       if (status != 0)
+               return status;
+       nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
        if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
                /* We tried a write call, but the server did not
@@ -1176,6 +1272,8 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
        if (task->tk_status >= 0 && resp->count < argp->count) {
                static unsigned long    complain;
 
+               nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE);
+
                /* Has the server at least made some progress? */
                if (resp->count != 0) {
                        /* Was this an NFSv2 write or an NFSv3 stable write? */
@@ -1191,7 +1289,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
                                argp->stable = NFS_FILE_SYNC;
                        }
                        rpc_restart_call(task);
-                       return;
+                       return -EAGAIN;
                }
                if (time_before(complain, jiffies)) {
                        printk(KERN_WARNING
@@ -1202,11 +1300,7 @@ void nfs_writeback_done(struct rpc_task *task, void *calldata)
                /* Can't do anything about it except throw an error. */
                task->tk_status = -EIO;
        }
-
-       /*
-        * Process the nfs_page list
-        */
-       data->complete(data, task->tk_status);
+       return 0;
 }
 
 
@@ -1220,10 +1314,12 @@ void nfs_commit_release(void *wdata)
  * Set up the argument/result storage required for the RPC call.
  */
 static void nfs_commit_rpcsetup(struct list_head *head,
-               struct nfs_write_data *data, int how)
+               struct nfs_write_data *data,
+               int how)
 {
        struct nfs_page         *first;
        struct inode            *inode;
+       int flags;
 
        /* Set up the RPC argument and reply structs
         * NB: take care not to mess about with data->commit et al. */
@@ -1243,7 +1339,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
        data->res.fattr   = &data->fattr;
        data->res.verf    = &data->verf;
        nfs_fattr_init(&data->fattr);
-       
+
+       /* Set up the initial task struct.  */
+       flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+       rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
        NFS_PROTO(inode)->commit_setup(data, how);
 
        data->task.tk_priority = flush_task_priority(how);
@@ -1284,7 +1383,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 /*
  * COMMIT call returned
  */
-void nfs_commit_done(struct rpc_task *task, void *calldata)
+static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
        struct nfs_write_data   *data = calldata;
        struct nfs_page         *req;
@@ -1293,6 +1392,10 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
         dprintk("NFS: %4d nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
 
+       /* Call the NFS version-specific code */
+       if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
+               return;
+
        while (!list_empty(&data->pages)) {
                req = nfs_list_entry(data->pages.next);
                nfs_list_remove_request(req);
@@ -1326,6 +1429,16 @@ void nfs_commit_done(struct rpc_task *task, void *calldata)
        }
        sub_page_state(nr_unstable,res);
 }
+
+static const struct rpc_call_ops nfs_commit_ops = {
+       .rpc_call_done = nfs_commit_done,
+       .rpc_release = nfs_commit_release,
+};
+#else
+static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
+{
+       return 0;
+}
 #endif
 
 static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
@@ -1333,24 +1446,16 @@ static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        LIST_HEAD(head);
-       int                     res,
-                               error = 0;
+       int res;
 
        spin_lock(&nfsi->req_lock);
        res = nfs_scan_dirty(inode, &head, idx_start, npages);
        spin_unlock(&nfsi->req_lock);
        if (res) {
-               struct nfs_server *server = NFS_SERVER(inode);
-
-               /* For single writes, FLUSH_STABLE is more efficient */
-               if (res == nfsi->npages && nfsi->npages <= server->wpages) {
-                       if (res > 1 || nfs_list_entry(head.next)->wb_bytes <= server->wsize)
-                               how |= FLUSH_STABLE;
-               }
-               error = nfs_flush_list(&head, server->wpages, how);
+               int error = nfs_flush_list(inode, &head, res, how);
+               if (error < 0)
+                       return error;
        }
-       if (error < 0)
-               return error;
        return res;
 }
 
@@ -1359,14 +1464,13 @@ int nfs_commit_inode(struct inode *inode, int how)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        LIST_HEAD(head);
-       int                     res,
-                               error = 0;
+       int res;
 
        spin_lock(&nfsi->req_lock);
        res = nfs_scan_commit(inode, &head, 0, 0);
        spin_unlock(&nfsi->req_lock);
        if (res) {
-               error = nfs_commit_list(inode, &head, how);
+               int error = nfs_commit_list(inode, &head, how);
                if (error < 0)
                        return error;
        }
@@ -1374,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
 }
 #endif
 
-int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
-                 unsigned int npages, int how)
+int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
+               unsigned int npages, int how)
 {
+       struct nfs_inode *nfsi = NFS_I(inode);
+       LIST_HEAD(head);
        int nocommit = how & FLUSH_NOCOMMIT;
-       int wait = how & FLUSH_WAIT;
-       int error;
-
-       how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
+       int pages, ret;
 
+       how &= ~FLUSH_NOCOMMIT;
+       spin_lock(&nfsi->req_lock);
        do {
-               if (wait) {
-                       error = nfs_wait_on_requests(inode, idx_start, npages);
-                       if (error != 0)
-                               continue;
-               }
-               error = nfs_flush_inode(inode, idx_start, npages, how);
-               if (error != 0)
+               ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
+               if (ret != 0)
                        continue;
-               if (!nocommit)
-                       error = nfs_commit_inode(inode, how);
-       } while (error > 0);
-       return error;
+               pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+               if (pages != 0) {
+                       spin_unlock(&nfsi->req_lock);
+                       ret = nfs_flush_list(inode, &head, pages, how);
+                       spin_lock(&nfsi->req_lock);
+                       continue;
+               }
+               if (nocommit)
+                       break;
+               pages = nfs_scan_commit(inode, &head, 0, 0);
+               if (pages == 0)
+                       break;
+               spin_unlock(&nfsi->req_lock);
+               ret = nfs_commit_list(inode, &head, how);
+               spin_lock(&nfsi->req_lock);
+       } while (ret >= 0);
+       spin_unlock(&nfsi->req_lock);
+       return ret;
 }
 
 int nfs_init_writepagecache(void)
index d828662..4f391cb 100644 (file)
@@ -326,6 +326,8 @@ out:
         .p_encode = (kxdrproc_t) nfs4_xdr_##argtype,                    \
         .p_decode = (kxdrproc_t) nfs4_xdr_##restype,                    \
         .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
+        .p_statidx = NFSPROC4_CB_##call,                               \
+       .p_name   = #proc,                                              \
 }
 
 static struct rpc_procinfo     nfs4_cb_procedures[] = {
index 1143cfb..f6ab762 100644 (file)
@@ -2639,7 +2639,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
        struct nfs4_stateid *lock_stp;
        struct file *filp;
        struct file_lock file_lock;
-       struct file_lock *conflock;
+       struct file_lock conflock;
        int status = 0;
        unsigned int strhashval;
 
@@ -2775,11 +2775,11 @@ conflicting_lock:
        /* XXX There is a race here. Future patch needed to provide 
         * an atomic posix_lock_and_test_file
         */
-       if (!(conflock = posix_test_lock(filp, &file_lock))) {
+       if (!posix_test_lock(filp, &file_lock, &conflock)) {
                status = nfserr_serverfault;
                goto out;
        }
-       nfs4_set_lock_denied(conflock, &lock->lk_denied);
+       nfs4_set_lock_denied(&conflock, &lock->lk_denied);
 out:
        if (status && lock->lk_is_new && lock_sop)
                release_stateowner(lock_sop);
@@ -2800,7 +2800,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
        struct inode *inode;
        struct file file;
        struct file_lock file_lock;
-       struct file_lock *conflicting_lock;
+       struct file_lock conflock;
        int status;
 
        if (nfs4_in_grace())
@@ -2864,10 +2864,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
        file.f_dentry = current_fh->fh_dentry;
 
        status = nfs_ok;
-       conflicting_lock = posix_test_lock(&file, &file_lock);
-       if (conflicting_lock) {
+       if (posix_test_lock(&file, &file_lock, &conflock)) {
                status = nfserr_denied;
-               nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+               nfs4_set_lock_denied(&conflock, &lockt->lt_denied);
        }
 out:
        nfs4_unlock_state();
index 20feb75..8f1f49c 100644 (file)
@@ -104,6 +104,7 @@ enum pid_directory_inos {
        PROC_TGID_MAPS,
        PROC_TGID_NUMA_MAPS,
        PROC_TGID_MOUNTS,
+       PROC_TGID_MOUNTSTATS,
        PROC_TGID_WCHAN,
 #ifdef CONFIG_MMU
        PROC_TGID_SMAPS,
@@ -144,6 +145,7 @@ enum pid_directory_inos {
        PROC_TID_MAPS,
        PROC_TID_NUMA_MAPS,
        PROC_TID_MOUNTS,
+       PROC_TID_MOUNTSTATS,
        PROC_TID_WCHAN,
 #ifdef CONFIG_MMU
        PROC_TID_SMAPS,
@@ -201,6 +203,7 @@ static struct pid_entry tgid_base_stuff[] = {
        E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
        E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
        E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
+       E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
 #ifdef CONFIG_MMU
        E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
 #endif
@@ -732,6 +735,38 @@ static struct file_operations proc_mounts_operations = {
        .poll           = mounts_poll,
 };
 
+extern struct seq_operations mountstats_op;
+static int mountstats_open(struct inode *inode, struct file *file)
+{
+       struct task_struct *task = proc_task(inode);
+       int ret = seq_open(file, &mountstats_op);
+
+       if (!ret) {
+               struct seq_file *m = file->private_data;
+               struct namespace *namespace;
+               task_lock(task);
+               namespace = task->namespace;
+               if (namespace)
+                       get_namespace(namespace);
+               task_unlock(task);
+
+               if (namespace)
+                       m->private = namespace;
+               else {
+                       seq_release(inode, file);
+                       ret = -EINVAL;
+               }
+       }
+       return ret;
+}
+
+static struct file_operations proc_mountstats_operations = {
+       .open           = mountstats_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = mounts_release,
+};
+
 #define PROC_BLOCK_SIZE        (3*1024)                /* 4K page size but our output routines use some slack for overruns */
 
 static ssize_t proc_info_read(struct file * file, char __user * buf,
@@ -1730,6 +1765,10 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
                        inode->i_fop = &proc_smaps_operations;
                        break;
 #endif
+               case PROC_TID_MOUNTSTATS:
+               case PROC_TGID_MOUNTSTATS:
+                       inode->i_fop = &proc_mountstats_operations;
+                       break;
 #ifdef CONFIG_SECURITY
                case PROC_TID_ATTR:
                        inode->i_nlink = 2;
index 9b34a1b..404d391 100644 (file)
@@ -671,7 +671,6 @@ extern spinlock_t files_lock;
 #define FL_POSIX       1
 #define FL_FLOCK       2
 #define FL_ACCESS      8       /* not trying to lock, just looking */
-#define FL_LOCKD       16      /* lock held by rpc.lockd */
 #define FL_LEASE       32      /* lease held on this file */
 #define FL_SLEEP       128     /* A blocking lock */
 
@@ -735,8 +734,6 @@ struct file_lock {
 #define OFFT_OFFSET_MAX        INT_LIMIT(off_t)
 #endif
 
-extern struct list_head file_lock_list;
-
 #include <linux/fcntl.h>
 
 extern int fcntl_getlk(struct file *, struct flock __user *);
@@ -758,10 +755,9 @@ extern void locks_init_lock(struct file_lock *);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_flock(struct file *);
-extern struct file_lock *posix_test_lock(struct file *, struct file_lock *);
+extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
-extern void posix_block_lock(struct file_lock *, struct file_lock *);
 extern int posix_unblock_lock(struct file *, struct file_lock *);
 extern int posix_locks_deadlock(struct file_lock *, struct file_lock *);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
@@ -1090,6 +1086,7 @@ struct super_operations {
        void (*umount_begin) (struct super_block *);
 
        int (*show_options)(struct seq_file *, struct vfsmount *);
+       int (*show_stats)(struct seq_file *, struct vfsmount *);
 
        ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
index ef21ed2..995f89d 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/config.h>
 #include <linux/in.h>
 #include <linux/fs.h>
+#include <linux/kref.h>
 #include <linux/utsname.h>
 #include <linux/nfsd/nfsfh.h>
 #include <linux/lockd/bind.h>
@@ -58,6 +59,8 @@ struct nlm_host {
        unsigned long           h_expires;      /* eligible for GC */
        struct list_head        h_lockowners;   /* Lockowners for the client */
        spinlock_t              h_lock;
+       struct list_head        h_granted;      /* Locks in GRANTED state */
+       struct list_head        h_reclaim;      /* Locks in RECLAIM state */
 };
 
 /*
@@ -83,9 +86,9 @@ struct nlm_rqst {
        struct nlm_host *       a_host;         /* host handle */
        struct nlm_args         a_args;         /* arguments */
        struct nlm_res          a_res;          /* result */
-       struct nlm_wait *       a_block;
+       struct nlm_block *      a_block;
        unsigned int            a_retries;      /* Retry count */
-       char                    a_owner[NLMCLNT_OHSIZE];
+       u8                      a_owner[NLMCLNT_OHSIZE];
 };
 
 /*
@@ -110,16 +113,16 @@ struct nlm_file {
  */
 #define NLM_NEVER              (~(unsigned long) 0)
 struct nlm_block {
+       struct kref             b_count;        /* Reference count */
        struct nlm_block *      b_next;         /* linked list (all blocks) */
        struct nlm_block *      b_fnext;        /* linked list (per file) */
-       struct nlm_rqst         b_call;         /* RPC args & callback info */
+       struct nlm_rqst *       b_call;         /* RPC args & callback info */
        struct svc_serv *       b_daemon;       /* NLM service */
        struct nlm_host *       b_host;         /* host handle for RPC clnt */
        unsigned long           b_when;         /* next re-xmit */
        unsigned int            b_id;           /* block id */
        unsigned char           b_queued;       /* re-queued */
        unsigned char           b_granted;      /* VFS granted lock */
-       unsigned char           b_incall;       /* doing callback */
        unsigned char           b_done;         /* callback complete */
        struct nlm_file *       b_file;         /* file in question */
 };
@@ -145,15 +148,16 @@ extern unsigned long              nlmsvc_timeout;
 /*
  * Lockd client functions
  */
-struct nlm_rqst * nlmclnt_alloc_call(void);
-int              nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl);
-void             nlmclnt_finish_block(struct nlm_rqst *req);
-long             nlmclnt_block(struct nlm_rqst *req, long timeout);
+struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
+void             nlm_release_call(struct nlm_rqst *);
+int              nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+int              nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
+struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
+void             nlmclnt_finish_block(struct nlm_wait *block);
+int              nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
 u32              nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
 void             nlmclnt_recovery(struct nlm_host *, u32);
 int              nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
-int              nlmclnt_setgrantargs(struct nlm_rqst *, struct nlm_lock *);
-void             nlmclnt_freegrantargs(struct nlm_rqst *);
 
 /*
  * Host cache
@@ -172,7 +176,6 @@ extern struct nlm_host *nlm_find_client(void);
 /*
  * Server-side lock handling
  */
-int              nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 u32              nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
                                        struct nlm_lock *, int, struct nlm_cookie *);
 u32              nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
@@ -180,7 +183,7 @@ u32           nlmsvc_testlock(struct nlm_file *, struct nlm_lock *,
                                        struct nlm_lock *);
 u32              nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
 unsigned long    nlmsvc_retry_blocked(void);
-int              nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
+void             nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
                                        int action);
 void     nlmsvc_grant_reply(struct svc_rqst *, struct nlm_cookie *, u32);
 
index 5d8aa32..c75a424 100644 (file)
@@ -25,6 +25,6 @@ u32   nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
                                               struct nlm_args *);
 u32    nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
                                               struct nlm_args *);
-int    nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
+void   nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *, int);
 
 #endif /* LINUX_LOCKD_SHARE_H */
index d7a5cc4..bb0a0f1 100644 (file)
@@ -28,6 +28,7 @@ struct nlm_lock {
        int                     len;    /* length of "caller" */
        struct nfs_fh           fh;
        struct xdr_netobj       oh;
+       u32                     svid;
        struct file_lock        fl;
 };
 
index b4dc6e2..cbebd7d 100644 (file)
@@ -56,9 +56,7 @@
  * When flushing a cluster of dirty pages, there can be different
  * strategies:
  */
-#define FLUSH_AGING            0       /* only flush old buffers */
 #define FLUSH_SYNC             1       /* file being synced, or contention */
-#define FLUSH_WAIT             2       /* wait for completion */
 #define FLUSH_STABLE           4       /* commit to stable storage */
 #define FLUSH_LOWPRI           8       /* low priority background flush */
 #define FLUSH_HIGHPRI          16      /* high priority memory reclaim flush */
@@ -78,6 +76,7 @@ struct nfs_access_entry {
 struct nfs4_state;
 struct nfs_open_context {
        atomic_t count;
+       struct vfsmount *vfsmnt;
        struct dentry *dentry;
        struct rpc_cred *cred;
        struct nfs4_state *state;
@@ -118,8 +117,7 @@ struct nfs_inode {
        unsigned long           cache_validity;         /* bit mask */
 
        /*
-        * read_cache_jiffies is when we started read-caching this inode,
-        * and read_cache_mtime is the mtime of the inode at that time.
+        * read_cache_jiffies is when we started read-caching this inode.
         * attrtimeo is for how long the cached information is assumed
         * to be valid. A successful attribute revalidation doubles
         * attrtimeo (up to acregmax/acdirmax), a failure resets it to
@@ -128,11 +126,6 @@ struct nfs_inode {
         * We need to revalidate the cached attrs for this inode if
         *
         *      jiffies - read_cache_jiffies > attrtimeo
-        *
-        * and invalidate any cached data/flush out any dirty pages if
-        * we find that
-        *
-        *      mtime != read_cache_mtime
         */
        unsigned long           read_cache_jiffies;
        unsigned long           attrtimeo;
@@ -311,12 +304,9 @@ extern void nfs_begin_attr_update(struct inode *);
 extern void nfs_end_attr_update(struct inode *);
 extern void nfs_begin_data_update(struct inode *);
 extern void nfs_end_data_update(struct inode *);
-extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
-extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
-extern void nfs_file_clear_open_context(struct file *filp);
 
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern u32 root_nfs_parse_addr(char *name); /*__init*/
@@ -415,21 +405,22 @@ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *task, void *data);
-extern void nfs_writedata_release(void *data);
+extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
+extern void nfs_writedata_release(void *);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern void nfs_commit_done(struct rpc_task *, void *data);
-extern void nfs_commit_release(void *data);
+struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount);
+void nfs_commit_free(struct nfs_write_data *p);
 #endif
 
 /*
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern int  nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
+extern int  nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
+extern void nfs_commit_release(void *wdata);
 #else
 static inline int
 nfs_commit_inode(struct inode *inode, int how)
@@ -447,7 +438,7 @@ nfs_have_writebacks(struct inode *inode)
 static inline int
 nfs_wb_all(struct inode *inode)
 {
-       int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT);
+       int error = nfs_sync_inode_wait(inode, 0, 0, 0);
        return (error < 0) ? error : 0;
 }
 
@@ -456,8 +447,8 @@ nfs_wb_all(struct inode *inode)
  */
 static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
 {
-       int error = nfs_sync_inode(inode, page->index, 1,
-                       how | FLUSH_WAIT | FLUSH_STABLE);
+       int error = nfs_sync_inode_wait(inode, page->index, 1,
+                       how | FLUSH_STABLE);
        return (error < 0) ? error : 0;
 }
 
@@ -469,37 +460,8 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
 /*
  * Allocate and free nfs_write_data structures
  */
-extern mempool_t *nfs_wdata_mempool;
-
-static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
-{
-       struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
-
-       if (p) {
-               memset(p, 0, sizeof(*p));
-               INIT_LIST_HEAD(&p->pages);
-               if (pagecount < NFS_PAGEVEC_SIZE)
-                       p->pagevec = &p->page_array[0];
-               else {
-                       size_t size = ++pagecount * sizeof(struct page *);
-                       p->pagevec = kmalloc(size, GFP_NOFS);
-                       if (p->pagevec) {
-                               memset(p->pagevec, 0, size);
-                       } else {
-                               mempool_free(p, nfs_wdata_mempool);
-                               p = NULL;
-                       }
-               }
-       }
-       return p;
-}
-
-static inline void nfs_writedata_free(struct nfs_write_data *p)
-{
-       if (p && (p->pagevec != &p->page_array[0]))
-               kfree(p->pagevec);
-       mempool_free(p, nfs_wdata_mempool);
-}
+extern struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount);
+extern void nfs_writedata_free(struct nfs_write_data *p);
 
 /*
  * linux/fs/nfs/read.c
@@ -507,44 +469,14 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
                struct list_head *, unsigned);
-extern void nfs_readpage_result(struct rpc_task *, void *);
-extern void  nfs_readdata_release(void *data);
-
+extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
+extern void nfs_readdata_release(void *data);
 
 /*
  * Allocate and free nfs_read_data structures
  */
-extern mempool_t *nfs_rdata_mempool;
-
-static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
-{
-       struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
-
-       if (p) {
-               memset(p, 0, sizeof(*p));
-               INIT_LIST_HEAD(&p->pages);
-               if (pagecount < NFS_PAGEVEC_SIZE)
-                       p->pagevec = &p->page_array[0];
-               else {
-                       size_t size = ++pagecount * sizeof(struct page *);
-                       p->pagevec = kmalloc(size, GFP_NOFS);
-                       if (p->pagevec) {
-                               memset(p->pagevec, 0, size);
-                       } else {
-                               mempool_free(p, nfs_rdata_mempool);
-                               p = NULL;
-                       }
-               }
-       }
-       return p;
-}
-
-static inline void nfs_readdata_free(struct nfs_read_data *p)
-{
-       if (p && (p->pagevec != &p->page_array[0]))
-               kfree(p->pagevec);
-       mempool_free(p, nfs_rdata_mempool);
-}
+extern struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount);
+extern void nfs_readdata_free(struct nfs_read_data *p);
 
 /*
  * linux/fs/nfs3proc.c
index e2c18da..8617302 100644 (file)
@@ -12,8 +12,8 @@ struct nlm_lockowner;
  */
 struct nfs_lock_info {
        u32             state;
-       u32             flags;
        struct nlm_lockowner *owner;
+       struct list_head list;
 };
 
 struct nfs4_lock_state;
@@ -21,10 +21,4 @@ struct nfs4_lock_info {
        struct nfs4_lock_state *owner;
 };
 
-/*
- * Lock flag values
- */
-#define NFS_LCK_GRANTED                0x0001          /* lock has been granted */
-#define NFS_LCK_RECLAIM                0x0002          /* lock marked for reclaiming */
-
 #endif
index 3d3a305..65dec21 100644 (file)
@@ -4,6 +4,8 @@
 #include <linux/list.h>
 #include <linux/backing-dev.h>
 
+struct nfs_iostats;
+
 /*
  * NFS client parameters stored in the superblock.
  */
@@ -12,6 +14,7 @@ struct nfs_server {
        struct rpc_clnt *       client_sys;     /* 2nd handle for FSINFO */
        struct rpc_clnt *       client_acl;     /* ACL RPC client handle */
        struct nfs_rpc_ops *    rpc_ops;        /* NFS protocol vector */
+       struct nfs_iostats *    io_stats;       /* I/O statistics */
        struct backing_dev_info backing_dev_info;
        int                     flags;          /* various flags */
        unsigned int            caps;           /* server capabilities */
@@ -26,10 +29,13 @@ struct nfs_server {
        unsigned int            acregmax;
        unsigned int            acdirmin;
        unsigned int            acdirmax;
+       unsigned long           retrans_timeo;  /* retransmit timeout */
+       unsigned int            retrans_count;  /* number of retransmit tries */
        unsigned int            namelen;
        char *                  hostname;       /* remote hostname */
        struct nfs_fh           fh;
        struct sockaddr_in      addr;
+       unsigned long           mount_time;     /* when this fs was mounted */
 #ifdef CONFIG_NFS_V4
        /* Our own IP address, as a null-terminated string.
         * This is used to generate the clientid, and the callback address.
index 6d6f69e..7fafc4c 100644 (file)
@@ -695,7 +695,6 @@ struct nfs_read_data {
 #ifdef CONFIG_NFS_V4
        unsigned long           timestamp;      /* For lease renewal */
 #endif
-       void (*complete) (struct nfs_read_data *, int);
        struct page             *page_array[NFS_PAGEVEC_SIZE + 1];
 };
 
@@ -714,7 +713,6 @@ struct nfs_write_data {
 #ifdef CONFIG_NFS_V4
        unsigned long           timestamp;      /* For lease renewal */
 #endif
-       void (*complete) (struct nfs_write_data *, int);
        struct page             *page_array[NFS_PAGEVEC_SIZE + 1];
 };
 
@@ -769,8 +767,11 @@ struct nfs_rpc_ops {
                             struct nfs_pathconf *);
        u32 *   (*decode_dirent)(u32 *, struct nfs_entry *, int plus);
        void    (*read_setup)   (struct nfs_read_data *);
+       int     (*read_done)  (struct rpc_task *, struct nfs_read_data *);
        void    (*write_setup)  (struct nfs_write_data *, int how);
+       int     (*write_done)  (struct rpc_task *, struct nfs_write_data *);
        void    (*commit_setup) (struct nfs_write_data *, int how);
+       int     (*commit_done) (struct rpc_task *, struct nfs_write_data *);
        int     (*file_open)   (struct inode *, struct file *);
        int     (*file_release) (struct inode *, struct file *);
        int     (*lock)(struct file *, int, struct file_lock *);
index f147e6b..8fe9f35 100644 (file)
@@ -45,7 +45,8 @@ struct rpc_clnt {
        char *                  cl_server;      /* server machine name */
        char *                  cl_protname;    /* protocol name */
        struct rpc_auth *       cl_auth;        /* authenticator */
-       struct rpc_stat *       cl_stats;       /* statistics */
+       struct rpc_stat *       cl_stats;       /* per-program statistics */
+       struct rpc_iostats *    cl_metrics;     /* per-client statistics */
 
        unsigned int            cl_softrtry : 1,/* soft timeouts */
                                cl_intr     : 1,/* interruptible */
@@ -59,6 +60,7 @@ struct rpc_clnt {
        int                     cl_nodelen;     /* nodename length */
        char                    cl_nodename[UNX_MAXNODENAME];
        char                    cl_pathname[30];/* Path in rpc_pipe_fs */
+       struct vfsmount *       cl_vfsmnt;
        struct dentry *         cl_dentry;      /* inode */
        struct rpc_clnt *       cl_parent;      /* Points to parent of clones */
        struct rpc_rtt          cl_rtt_default;
@@ -100,6 +102,8 @@ struct rpc_procinfo {
        unsigned int            p_bufsiz;       /* req. buffer size */
        unsigned int            p_count;        /* call count */
        unsigned int            p_timer;        /* Which RTT timer to use */
+       u32                     p_statidx;      /* Which procedure to account */
+       char *                  p_name;         /* name of procedure */
 };
 
 #define RPC_CONGESTED(clnt)    (RPCXPRT_CONGESTED((clnt)->cl_xprt))
@@ -137,20 +141,6 @@ size_t             rpc_max_payload(struct rpc_clnt *);
 void           rpc_force_rebind(struct rpc_clnt *);
 int            rpc_ping(struct rpc_clnt *clnt, int flags);
 
-static __inline__
-int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags)
-{
-       struct rpc_message msg = {
-               .rpc_proc       = &clnt->cl_procinfo[proc],
-               .rpc_argp       = argp,
-               .rpc_resp       = resp,
-               .rpc_cred       = NULL
-       };
-       return rpc_call_sync(clnt, &msg, flags);
-}
-               
-extern void rpciod_wake_up(void);
-
 /*
  * Helper function for NFSroot support
  */
index 2c3601d..1279280 100644 (file)
@@ -53,6 +53,8 @@ struct krb5_ctx {
        struct xdr_netobj       mech_used;
 };
 
+extern spinlock_t krb5_seq_lock;
+
 #define KG_TOK_MIC_MSG    0x0101
 #define KG_TOK_WRAP_MSG   0x0201
 
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
new file mode 100644 (file)
index 0000000..8f96e9d
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ *  linux/include/linux/sunrpc/metrics.h
+ *
+ *  Declarations for RPC client per-operation metrics
+ *
+ *  Copyright (C) 2005 Chuck Lever <cel@netapp.com>
+ *
+ *  RPC client per-operation statistics provide latency and retry
+ *  information about each type of RPC procedure in a given RPC program.
+ *  These statistics are not for detailed problem diagnosis, but simply
+ *  to indicate whether the problem is local or remote.
+ *
+ *  These counters are not meant to be human-readable, but are meant to be
+ *  integrated into system monitoring tools such as "sar" and "iostat".  As
+ *  such, the counters are sampled by the tools over time, and are never
+ *  zeroed after a file system is mounted.  Moving averages can be computed
+ *  by the tools by taking the difference between two instantaneous samples
+ *  and dividing that by the time between the samples.
+ *
+ *  The counters are maintained in a single array per RPC client, indexed
+ *  by procedure number.  There is no need to maintain separate counter
+ *  arrays per-CPU because these counters are always modified behind locks.
+ */
+
+#ifndef _LINUX_SUNRPC_METRICS_H
+#define _LINUX_SUNRPC_METRICS_H
+
+#include <linux/seq_file.h>
+
+#define RPC_IOSTATS_VERS       "1.0"
+
+struct rpc_iostats {
+       /*
+        * These counters give an idea about how many request
+        * transmissions are required, on average, to complete that
+        * particular procedure.  Some procedures may require more
+        * than one transmission because the server is unresponsive,
+        * the client is retransmitting too aggressively, or the
+        * requests are large and the network is congested.
+        */
+       unsigned long           om_ops,         /* count of operations */
+                               om_ntrans,      /* count of RPC transmissions */
+                               om_timeouts;    /* count of major timeouts */
+
+       /*
+        * These count how many bytes are sent and received for a
+        * given RPC procedure type.  This indicates how much load a
+        * particular procedure is putting on the network.  These
+        * counts include the RPC and ULP headers, and the request
+        * payload.
+        */
+       unsigned long long      om_bytes_sent,  /* count of bytes out */
+                               om_bytes_recv;  /* count of bytes in */
+
+       /*
+        * The length of time an RPC request waits in queue before
+        * transmission, the network + server latency of the request,
+        * and the total time the request spent from init to release
+        * are measured.
+        */
+       unsigned long long      om_queue,       /* jiffies queued for xmit */
+                               om_rtt,         /* jiffies for RPC RTT */
+                               om_execute;     /* jiffies for RPC execution */
+} ____cacheline_aligned;
+
+struct rpc_task;
+struct rpc_clnt;
+
+/*
+ * EXPORTed functions for managing rpc_iostats structures
+ */
+struct rpc_iostats *   rpc_alloc_iostats(struct rpc_clnt *);
+void                   rpc_count_iostats(struct rpc_task *);
+void                   rpc_print_iostats(struct seq_file *, struct rpc_clnt *);
+void                   rpc_free_iostats(struct rpc_iostats *);
+
+#endif /* _LINUX_SUNRPC_METRICS_H */
index 6392934..2c2189c 100644 (file)
@@ -45,6 +45,8 @@ extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
 extern int rpc_rmdir(char *);
 extern struct dentry *rpc_mkpipe(char *, void *, struct rpc_pipe_ops *, int flags);
 extern int rpc_unlink(char *);
+extern struct vfsmount *rpc_get_mount(void);
+extern void rpc_put_mount(void);
 
 #endif
 #endif
index 8b25629..82a91bb 100644 (file)
@@ -86,6 +86,12 @@ struct rpc_task {
                struct work_struct      tk_work;        /* Async task work queue */
                struct rpc_wait         tk_wait;        /* RPC wait */
        } u;
+
+       unsigned short          tk_timeouts;    /* maj timeouts */
+       size_t                  tk_bytes_sent;  /* total bytes sent */
+       unsigned long           tk_start;       /* RPC task init timestamp */
+       long                    tk_rtt;         /* round-trip time (jiffies) */
+
 #ifdef RPC_DEBUG
        unsigned short          tk_pid;         /* debugging aid */
 #endif
@@ -203,6 +209,7 @@ struct rpc_wait_queue {
        unsigned char           priority;               /* current priority */
        unsigned char           count;                  /* # task groups remaining serviced so far */
        unsigned char           nr;                     /* # tasks remaining for cookie */
+       unsigned short          qlen;                   /* total # tasks waiting in queue */
 #ifdef RPC_DEBUG
        const char *            name;
 #endif
@@ -269,13 +276,13 @@ void *            rpc_malloc(struct rpc_task *, size_t);
 void           rpc_free(struct rpc_task *);
 int            rpciod_up(void);
 void           rpciod_down(void);
-void           rpciod_wake_up(void);
 int            __rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
 #ifdef RPC_DEBUG
 void           rpc_show_tasks(void);
 #endif
 int            rpc_init_mempool(void);
 void           rpc_destroy_mempool(void);
+extern struct workqueue_struct *rpciod_workqueue;
 
 static inline void rpc_exit(struct rpc_task *task, int status)
 {
index 6ef99b1..7eebbab 100644 (file)
@@ -114,6 +114,7 @@ struct rpc_xprt_ops {
        void            (*release_request)(struct rpc_task *task);
        void            (*close)(struct rpc_xprt *xprt);
        void            (*destroy)(struct rpc_xprt *xprt);
+       void            (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
 };
 
 struct rpc_xprt {
@@ -187,6 +188,18 @@ struct rpc_xprt {
 
        struct list_head        recv;
 
+       struct {
+               unsigned long           bind_count,     /* total number of binds */
+                                       connect_count,  /* total number of connects */
+                                       connect_start,  /* connect start timestamp */
+                                       connect_time,   /* jiffies waiting for connect */
+                                       sends,          /* how many complete requests */
+                                       recvs,          /* how many complete requests */
+                                       bad_xids;       /* lookup_rqst didn't find XID */
+
+               unsigned long long      req_u,          /* average requests on the wire */
+                                       bklog_u;        /* backlog queue utilization */
+       } stat;
 
        void                    (*old_data_ready)(struct sock *, int);
        void                    (*old_state_change)(struct sock *);
index 8d6f1a1..55163af 100644 (file)
@@ -64,14 +64,26 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
        struct rpc_authops      *ops;
        u32                     flavor = pseudoflavor_to_flavor(pseudoflavor);
 
-       if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor]))
-               return ERR_PTR(-EINVAL);
+       auth = ERR_PTR(-EINVAL);
+       if (flavor >= RPC_AUTH_MAXFLAVOR)
+               goto out;
+
+       /* FIXME - auth_flavors[] really needs an rw lock,
+        * and module refcounting. */
+#ifdef CONFIG_KMOD
+       if ((ops = auth_flavors[flavor]) == NULL)
+               request_module("rpc-auth-%u", flavor);
+#endif
+       if ((ops = auth_flavors[flavor]) == NULL)
+               goto out;
        auth = ops->create(clnt, pseudoflavor);
        if (IS_ERR(auth))
                return auth;
        if (clnt->cl_auth)
                rpcauth_destroy(clnt->cl_auth);
        clnt->cl_auth = auth;
+
+out:
        return auth;
 }
 
index bb46efd..900ef31 100644 (file)
@@ -721,6 +721,8 @@ gss_destroy(struct rpc_auth *auth)
 
        gss_auth = container_of(auth, struct gss_auth, rpc_auth);
        rpc_unlink(gss_auth->path);
+       dput(gss_auth->dentry);
+       gss_auth->dentry = NULL;
        gss_mech_put(gss_auth->mech);
 
        rpcauth_free_credcache(auth);
index d0dfdfd..f433112 100644 (file)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
+spinlock_t krb5_seq_lock = SPIN_LOCK_UNLOCKED;
+
 u32
 gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
                struct xdr_netobj *token)
 {
        struct krb5_ctx         *ctx = gss_ctx->internal_ctx_id;
        s32                     checksum_type;
-       struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
+       char                    cksumdata[16];
+       struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        unsigned char           *ptr, *krb5_hdr, *msg_start;
        s32                     now;
+       u32                     seq_send;
 
        dprintk("RPC:     gss_krb5_seal\n");
 
@@ -133,16 +137,15 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
                BUG();
        }
 
-       kfree(md5cksum.data);
+       spin_lock(&krb5_seq_lock);
+       seq_send = ctx->seq_send++;
+       spin_unlock(&krb5_seq_lock);
 
        if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
-                              ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+                              seq_send, krb5_hdr + 16, krb5_hdr + 8)))
                goto out_err;
 
-       ctx->seq_send++;
-
        return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
 out_err:
-       kfree(md5cksum.data);
        return GSS_S_FAILURE;
 }
index db055fd..0828cf6 100644 (file)
@@ -79,7 +79,8 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
        int                     signalg;
        int                     sealalg;
        s32                     checksum_type;
-       struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
+       char                    cksumdata[16];
+       struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        s32                     now;
        int                     direction;
        s32                     seqnum;
@@ -176,6 +177,5 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 
        ret = GSS_S_COMPLETE;
 out:
-       kfree(md5cksum.data);
        return ret;
 }
index af777cf..89d1f3e 100644 (file)
@@ -121,12 +121,14 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 {
        struct krb5_ctx         *kctx = ctx->internal_ctx_id;
        s32                     checksum_type;
-       struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
+       char                    cksumdata[16];
+       struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        int                     blocksize = 0, plainlen;
        unsigned char           *ptr, *krb5_hdr, *msg_start;
        s32                     now;
        int                     headlen;
        struct page             **tmp_pages;
+       u32                     seq_send;
 
        dprintk("RPC:     gss_wrap_kerberos\n");
 
@@ -205,23 +207,22 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
                BUG();
        }
 
-       kfree(md5cksum.data);
+       spin_lock(&krb5_seq_lock);
+       seq_send = kctx->seq_send++;
+       spin_unlock(&krb5_seq_lock);
 
        /* XXX would probably be more efficient to compute checksum
         * and encrypt at the same time: */
        if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
-                              kctx->seq_send, krb5_hdr + 16, krb5_hdr + 8)))
+                              seq_send, krb5_hdr + 16, krb5_hdr + 8)))
                goto out_err;
 
        if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
                                                                        pages))
                goto out_err;
 
-       kctx->seq_send++;
-
        return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
 out_err:
-       if (md5cksum.data) kfree(md5cksum.data);
        return GSS_S_FAILURE;
 }
 
@@ -232,7 +233,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
        int                     signalg;
        int                     sealalg;
        s32                     checksum_type;
-       struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
+       char                    cksumdata[16];
+       struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        s32                     now;
        int                     direction;
        s32                     seqnum;
@@ -358,6 +360,5 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 
        ret = GSS_S_COMPLETE;
 out:
-       if (md5cksum.data) kfree(md5cksum.data);
        return ret;
 }
index 5840080..5bf11cc 100644 (file)
@@ -102,6 +102,12 @@ get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg)
                        alg_mode = CRYPTO_TFM_MODE_CBC;
                        setkey = 1;
                        break;
+               case NID_cast5_cbc:
+                       /* XXXX here in name only, not used */
+                       alg_name = "cast5";
+                       alg_mode = CRYPTO_TFM_MODE_CBC;
+                       setkey = 0; /* XXX will need to set to 1 */
+                       break;
                case NID_md5:
                        if (key.len == 0) {
                                dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
index 86fbf7c..18c7862 100644 (file)
@@ -57,7 +57,8 @@ spkm3_make_token(struct spkm3_ctx *ctx,
 {
        s32                     checksum_type;
        char                    tokhdrbuf[25];
-       struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
+       char                    cksumdata[16];
+       struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        struct xdr_netobj       mic_hdr = {.len = 0, .data = tokhdrbuf};
        int                     tokenlen = 0;
        unsigned char           *ptr;
@@ -115,13 +116,11 @@ spkm3_make_token(struct spkm3_ctx *ctx,
                dprintk("RPC: gss_spkm3_seal: SPKM_WRAP_TOK not supported\n");
                goto out_err;
        }
-       kfree(md5cksum.data);
 
        /* XXX need to implement sequence numbers, and ctx->expired */
 
        return  GSS_S_COMPLETE;
 out_err:
-       kfree(md5cksum.data);
        token->data = NULL;
        token->len = 0;
        return GSS_S_FAILURE;
index 96851b0..8537f58 100644 (file)
@@ -56,7 +56,8 @@ spkm3_read_token(struct spkm3_ctx *ctx,
 {
        s32                     code;
        struct xdr_netobj       wire_cksum = {.len =0, .data = NULL};
-       struct xdr_netobj       md5cksum = {.len = 0, .data = NULL};
+       char                    cksumdata[16];
+       struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        unsigned char           *ptr = (unsigned char *)read_token->data;
        unsigned char           *cksum;
        int                     bodysize, md5elen;
@@ -120,7 +121,6 @@ spkm3_read_token(struct spkm3_ctx *ctx,
        /* XXX: need to add expiration and sequencing */
        ret = GSS_S_COMPLETE;
 out:
-       kfree(md5cksum.data);
        kfree(wire_cksum.data);
        return ret;
 }
index d784797..aa8965e 100644 (file)
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/utsname.h>
+#include <linux/workqueue.h>
 
 #include <linux/sunrpc/clnt.h>
-#include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
-
-#include <linux/nfs.h>
+#include <linux/sunrpc/metrics.h>
 
 
 #define RPC_SLACK_SPACE                (1024)  /* total overkill */
@@ -71,8 +70,15 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
        static uint32_t clntid;
        int error;
 
+       clnt->cl_vfsmnt = ERR_PTR(-ENOENT);
+       clnt->cl_dentry = ERR_PTR(-ENOENT);
        if (dir_name == NULL)
                return 0;
+
+       clnt->cl_vfsmnt = rpc_get_mount();
+       if (IS_ERR(clnt->cl_vfsmnt))
+               return PTR_ERR(clnt->cl_vfsmnt);
+
        for (;;) {
                snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
                                "%s/clnt%x", dir_name,
@@ -85,6 +91,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
                if (error != -EEXIST) {
                        printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
                                        clnt->cl_pathname, error);
+                       rpc_put_mount();
                        return error;
                }
        }
@@ -147,6 +154,7 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
        clnt->cl_vers     = version->number;
        clnt->cl_prot     = xprt->prot;
        clnt->cl_stats    = program->stats;
+       clnt->cl_metrics  = rpc_alloc_iostats(clnt);
        rpc_init_wait_queue(&clnt->cl_pmap_default.pm_bindwait, "bindwait");
 
        if (!clnt->cl_port)
@@ -175,7 +183,11 @@ rpc_new_client(struct rpc_xprt *xprt, char *servname,
        return clnt;
 
 out_no_auth:
-       rpc_rmdir(clnt->cl_pathname);
+       if (!IS_ERR(clnt->cl_dentry)) {
+               rpc_rmdir(clnt->cl_pathname);
+               dput(clnt->cl_dentry);
+               rpc_put_mount();
+       }
 out_no_path:
        if (clnt->cl_server != clnt->cl_inline_name)
                kfree(clnt->cl_server);
@@ -240,11 +252,15 @@ rpc_clone_client(struct rpc_clnt *clnt)
        new->cl_autobind = 0;
        new->cl_oneshot = 0;
        new->cl_dead = 0;
+       if (!IS_ERR(new->cl_dentry)) {
+               dget(new->cl_dentry);
+               rpc_get_mount();
+       }
        rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
        if (new->cl_auth)
                atomic_inc(&new->cl_auth->au_count);
        new->cl_pmap            = &new->cl_pmap_default;
-       rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait");
+       new->cl_metrics         = rpc_alloc_iostats(clnt);
        return new;
 out_no_clnt:
        printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
@@ -314,6 +330,12 @@ rpc_destroy_client(struct rpc_clnt *clnt)
        if (clnt->cl_server != clnt->cl_inline_name)
                kfree(clnt->cl_server);
 out_free:
+       rpc_free_iostats(clnt->cl_metrics);
+       clnt->cl_metrics = NULL;
+       if (!IS_ERR(clnt->cl_dentry)) {
+               dput(clnt->cl_dentry);
+               rpc_put_mount();
+       }
        kfree(clnt);
        return 0;
 }
@@ -473,15 +495,16 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
        int             status;
 
        /* If this client is slain all further I/O fails */
+       status = -EIO;
        if (clnt->cl_dead) 
-               return -EIO;
+               goto out_release;
 
        flags |= RPC_TASK_ASYNC;
 
        /* Create/initialize a new RPC task */
        status = -ENOMEM;
        if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
-               goto out;
+               goto out_release;
 
        /* Mask signals on GSS_AUTH upcalls */
        rpc_task_sigmask(task, &oldset);                
@@ -496,7 +519,10 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
                rpc_release_task(task);
 
        rpc_restore_sigmask(&oldset);           
-out:
+       return status;
+out_release:
+       if (tk_ops->rpc_release != NULL)
+               tk_ops->rpc_release(data);
        return status;
 }
 
@@ -993,6 +1019,8 @@ call_timeout(struct rpc_task *task)
        }
 
        dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
+       task->tk_timeouts++;
+
        if (RPC_IS_SOFT(task)) {
                printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
                                clnt->cl_protname, clnt->cl_server);
@@ -1045,6 +1073,11 @@ call_decode(struct rpc_task *task)
                return;
        }
 
+       /*
+        * Ensure that we see all writes made by xprt_complete_rqst()
+        * before it changed req->rq_received.
+        */
+       smp_rmb();
        req->rq_rcv_buf.len = req->rq_private_buf.len;
 
        /* Check that the softirq receive buffer is valid */
@@ -1194,8 +1227,8 @@ call_verify(struct rpc_task *task)
                        task->tk_action = call_bind;
                        goto out_retry;
                case RPC_AUTH_TOOWEAK:
-                       printk(KERN_NOTICE "call_verify: server requires stronger "
-                              "authentication.\n");
+                       printk(KERN_NOTICE "call_verify: server %s requires stronger "
+                              "authentication.\n", task->tk_client->cl_server);
                        break;
                default:
                        printk(KERN_WARNING "call_verify: unknown auth error: %x\n", n);
index 8139ce6..d25b054 100644 (file)
@@ -82,6 +82,7 @@ rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt)
        rpc_call_setup(child, &msg, 0);
 
        /* ... and run the child task */
+       task->tk_xprt->stat.bind_count++;
        rpc_run_child(task, child, pmap_getport_done);
        return;
 
@@ -103,6 +104,11 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
                .pm_prot        = prot,
                .pm_port        = 0
        };
+       struct rpc_message msg = {
+               .rpc_proc       = &pmap_procedures[PMAP_GETPORT],
+               .rpc_argp       = &map,
+               .rpc_resp       = &map.pm_port,
+       };
        struct rpc_clnt *pmap_clnt;
        char            hostname[32];
        int             status;
@@ -116,7 +122,7 @@ rpc_getport_external(struct sockaddr_in *sin, __u32 prog, __u32 vers, int prot)
                return PTR_ERR(pmap_clnt);
 
        /* Setup the call info struct */
-       status = rpc_call(pmap_clnt, PMAP_GETPORT, &map, &map.pm_port, 0);
+       status = rpc_call_sync(pmap_clnt, &msg, 0);
 
        if (status >= 0) {
                if (map.pm_port != 0)
@@ -161,16 +167,27 @@ pmap_getport_done(struct rpc_task *task)
 int
 rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 {
-       struct sockaddr_in      sin;
-       struct rpc_portmap      map;
+       struct sockaddr_in      sin = {
+               .sin_family     = AF_INET,
+               .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+       };
+       struct rpc_portmap      map = {
+               .pm_prog        = prog,
+               .pm_vers        = vers,
+               .pm_prot        = prot,
+               .pm_port        = port,
+       };
+       struct rpc_message msg = {
+               .rpc_proc       = &pmap_procedures[port ? PMAP_SET : PMAP_UNSET],
+               .rpc_argp       = &map,
+               .rpc_resp       = okay,
+       };
        struct rpc_clnt         *pmap_clnt;
        int error = 0;
 
        dprintk("RPC: registering (%d, %d, %d, %d) with portmapper.\n",
                        prog, vers, prot, port);
 
-       sin.sin_family = AF_INET;
-       sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
        pmap_clnt = pmap_create("localhost", &sin, IPPROTO_UDP, 1);
        if (IS_ERR(pmap_clnt)) {
                error = PTR_ERR(pmap_clnt);
@@ -178,13 +195,7 @@ rpc_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
                return error;
        }
 
-       map.pm_prog = prog;
-       map.pm_vers = vers;
-       map.pm_prot = prot;
-       map.pm_port = port;
-
-       error = rpc_call(pmap_clnt, port? PMAP_SET : PMAP_UNSET,
-                                       &map, okay, 0);
+       error = rpc_call_sync(pmap_clnt, &msg, 0);
 
        if (error < 0) {
                printk(KERN_WARNING
@@ -260,6 +271,8 @@ static struct rpc_procinfo  pmap_procedures[] = {
          .p_decode             = (kxdrproc_t) xdr_decode_bool,
          .p_bufsiz             = 4,
          .p_count              = 1,
+         .p_statidx            = PMAP_SET,
+         .p_name               = "SET",
        },
 [PMAP_UNSET] = {
          .p_proc               = PMAP_UNSET,
@@ -267,6 +280,8 @@ static struct rpc_procinfo  pmap_procedures[] = {
          .p_decode             = (kxdrproc_t) xdr_decode_bool,
          .p_bufsiz             = 4,
          .p_count              = 1,
+         .p_statidx            = PMAP_UNSET,
+         .p_name               = "UNSET",
        },
 [PMAP_GETPORT] = {
          .p_proc               = PMAP_GETPORT,
@@ -274,6 +289,8 @@ static struct rpc_procinfo  pmap_procedures[] = {
          .p_decode             = (kxdrproc_t) xdr_decode_port,
          .p_bufsiz             = 4,
          .p_count              = 1,
+         .p_statidx            = PMAP_GETPORT,
+         .p_name               = "GETPORT",
        },
 };
 
index a5c0c7b..391d2bf 100644 (file)
@@ -91,7 +91,8 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
                res = 0;
        } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) {
                if (list_empty(&rpci->pipe))
-                       schedule_delayed_work(&rpci->queue_timeout,
+                       queue_delayed_work(rpciod_workqueue,
+                                       &rpci->queue_timeout,
                                        RPC_UPCALL_TIMEOUT);
                list_add_tail(&msg->list, &rpci->pipe);
                rpci->pipelen += msg->len;
@@ -132,7 +133,7 @@ rpc_close_pipes(struct inode *inode)
                if (ops->release_pipe)
                        ops->release_pipe(inode);
                cancel_delayed_work(&rpci->queue_timeout);
-               flush_scheduled_work();
+               flush_workqueue(rpciod_workqueue);
        }
        rpc_inode_setowner(inode, NULL);
        mutex_unlock(&inode->i_mutex);
@@ -434,14 +435,17 @@ static struct rpc_filelist authfiles[] = {
        },
 };
 
-static int
-rpc_get_mount(void)
+struct vfsmount *rpc_get_mount(void)
 {
-       return simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+       int err;
+
+       err = simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count);
+       if (err != 0)
+               return ERR_PTR(err);
+       return rpc_mount;
 }
 
-static void
-rpc_put_mount(void)
+void rpc_put_mount(void)
 {
        simple_release_fs(&rpc_mount, &rpc_mount_count);
 }
@@ -451,12 +455,13 @@ rpc_lookup_parent(char *path, struct nameidata *nd)
 {
        if (path[0] == '\0')
                return -ENOENT;
-       if (rpc_get_mount()) {
+       nd->mnt = rpc_get_mount();
+       if (IS_ERR(nd->mnt)) {
                printk(KERN_WARNING "%s: %s failed to mount "
                               "pseudofilesystem \n", __FILE__, __FUNCTION__);
-               return -ENODEV;
+               return PTR_ERR(nd->mnt);
        }
-       nd->mnt = mntget(rpc_mount);
+       mntget(nd->mnt);
        nd->dentry = dget(rpc_mount->mnt_root);
        nd->last_type = LAST_ROOT;
        nd->flags = LOOKUP_PARENT;
@@ -593,7 +598,6 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry)
        d_instantiate(dentry, inode);
        dir->i_nlink++;
        inode_dir_notify(dir, DN_CREATE);
-       rpc_get_mount();
        return 0;
 out_err:
        printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
@@ -614,7 +618,6 @@ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
        if (!error) {
                inode_dir_notify(dir, DN_DELETE);
                d_drop(dentry);
-               rpc_put_mount();
        }
        return 0;
 }
@@ -668,7 +671,7 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
 out:
        mutex_unlock(&dir->i_mutex);
        rpc_release_path(&nd);
-       return dentry;
+       return dget(dentry);
 err_depopulate:
        rpc_depopulate(dentry);
        __rpc_rmdir(dir, dentry);
@@ -732,7 +735,7 @@ rpc_mkpipe(char *path, void *private, struct rpc_pipe_ops *ops, int flags)
 out:
        mutex_unlock(&dir->i_mutex);
        rpc_release_path(&nd);
-       return dentry;
+       return dget(dentry);
 err_dput:
        dput(dentry);
        dentry = ERR_PTR(-ENOMEM);
index dff0779..b9969b9 100644 (file)
@@ -65,7 +65,7 @@ static LIST_HEAD(all_tasks);
  */
 static DEFINE_MUTEX(rpciod_mutex);
 static unsigned int            rpciod_users;
-static struct workqueue_struct *rpciod_workqueue;
+struct workqueue_struct *rpciod_workqueue;
 
 /*
  * Spinlock for other critical sections of code.
@@ -182,6 +182,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *
        else
                list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
        task->u.tk_wait.rpc_waitq = queue;
+       queue->qlen++;
        rpc_set_queued(task);
 
        dprintk("RPC: %4d added to queue %p \"%s\"\n",
@@ -216,6 +217,7 @@ static void __rpc_remove_wait_queue(struct rpc_task *task)
                __rpc_remove_wait_queue_priority(task);
        else
                list_del(&task->u.tk_wait.list);
+       queue->qlen--;
        dprintk("RPC: %4d removed from queue %p \"%s\"\n",
                                task->tk_pid, queue, rpc_qname(queue));
 }
@@ -816,6 +818,9 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
 
        BUG_ON(task->tk_ops == NULL);
 
+       /* starting timestamp */
+       task->tk_start = jiffies;
+
        dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
                                current->pid);
 }
@@ -917,8 +922,11 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 {
        struct rpc_task *task;
        task = rpc_new_task(clnt, flags, ops, data);
-       if (task == NULL)
+       if (task == NULL) {
+               if (ops->rpc_release != NULL)
+                       ops->rpc_release(data);
                return ERR_PTR(-ENOMEM);
+       }
        atomic_inc(&task->tk_count);
        rpc_execute(task);
        return task;
index 4979f22..790941e 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/seq_file.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/metrics.h>
 
 #define RPCDBG_FACILITY        RPCDBG_MISC
 
@@ -106,6 +107,120 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
        }
 }
 
+/**
+ * rpc_alloc_iostats - allocate an rpc_iostats structure
+ * @clnt: RPC program, version, and xprt
+ *
+ */
+struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
+{
+       unsigned int ops = clnt->cl_maxproc;
+       size_t size = ops * sizeof(struct rpc_iostats);
+       struct rpc_iostats *new;
+
+       new = kmalloc(size, GFP_KERNEL);
+       if (new)
+               memset(new, 0 , size);
+       return new;
+}
+EXPORT_SYMBOL(rpc_alloc_iostats);
+
+/**
+ * rpc_free_iostats - release an rpc_iostats structure
+ * @stats: doomed rpc_iostats structure
+ *
+ */
+void rpc_free_iostats(struct rpc_iostats *stats)
+{
+       kfree(stats);
+}
+EXPORT_SYMBOL(rpc_free_iostats);
+
+/**
+ * rpc_count_iostats - tally up per-task stats
+ * @task: completed rpc_task
+ *
+ * Relies on the caller for serialization.
+ */
+void rpc_count_iostats(struct rpc_task *task)
+{
+       struct rpc_rqst *req = task->tk_rqstp;
+       struct rpc_iostats *stats = task->tk_client->cl_metrics;
+       struct rpc_iostats *op_metrics;
+       long rtt, execute, queue;
+
+       if (!stats || !req)
+               return;
+       op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
+
+       op_metrics->om_ops++;
+       op_metrics->om_ntrans += req->rq_ntrans;
+       op_metrics->om_timeouts += task->tk_timeouts;
+
+       op_metrics->om_bytes_sent += task->tk_bytes_sent;
+       op_metrics->om_bytes_recv += req->rq_received;
+
+       queue = (long)req->rq_xtime - task->tk_start;
+       if (queue < 0)
+               queue = -queue;
+       op_metrics->om_queue += queue;
+
+       rtt = task->tk_rtt;
+       if (rtt < 0)
+               rtt = -rtt;
+       op_metrics->om_rtt += rtt;
+
+       execute = (long)jiffies - task->tk_start;
+       if (execute < 0)
+               execute = -execute;
+       op_metrics->om_execute += execute;
+}
+
+void _print_name(struct seq_file *seq, unsigned int op, struct rpc_procinfo *procs)
+{
+       if (procs[op].p_name)
+               seq_printf(seq, "\t%12s: ", procs[op].p_name);
+       else if (op == 0)
+               seq_printf(seq, "\t        NULL: ");
+       else
+               seq_printf(seq, "\t%12u: ", op);
+}
+
+#define MILLISECS_PER_JIFFY    (1000 / HZ)
+
+void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
+{
+       struct rpc_iostats *stats = clnt->cl_metrics;
+       struct rpc_xprt *xprt = clnt->cl_xprt;
+       unsigned int op, maxproc = clnt->cl_maxproc;
+
+       if (!stats)
+               return;
+
+       seq_printf(seq, "\tRPC iostats version: %s  ", RPC_IOSTATS_VERS);
+       seq_printf(seq, "p/v: %u/%u (%s)\n",
+                       clnt->cl_prog, clnt->cl_vers, clnt->cl_protname);
+
+       if (xprt)
+               xprt->ops->print_stats(xprt, seq);
+
+       seq_printf(seq, "\tper-op statistics\n");
+       for (op = 0; op < maxproc; op++) {
+               struct rpc_iostats *metrics = &stats[op];
+               _print_name(seq, op, clnt->cl_procinfo);
+               seq_printf(seq, "%lu %lu %lu %Lu %Lu %Lu %Lu %Lu\n",
+                               metrics->om_ops,
+                               metrics->om_ntrans,
+                               metrics->om_timeouts,
+                               metrics->om_bytes_sent,
+                               metrics->om_bytes_recv,
+                               metrics->om_queue * MILLISECS_PER_JIFFY,
+                               metrics->om_rtt * MILLISECS_PER_JIFFY,
+                               metrics->om_execute * MILLISECS_PER_JIFFY);
+       }
+}
+EXPORT_SYMBOL(rpc_print_iostats);
+
 /*
  * Register/unregister RPC proc files
  */
index 8ff2c8a..4dd5b3c 100644 (file)
 #include <linux/random.h>
 
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/metrics.h>
 
 /*
  * Local variables
  */
 
 #ifdef RPC_DEBUG
-# undef  RPC_DEBUG_DATA
 # define RPCDBG_FACILITY       RPCDBG_XPRT
 #endif
 
@@ -548,6 +548,7 @@ void xprt_connect(struct rpc_task *task)
 
                task->tk_timeout = xprt->connect_timeout;
                rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
+               xprt->stat.connect_start = jiffies;
                xprt->ops->connect(task);
        }
        return;
@@ -558,6 +559,8 @@ static void xprt_connect_status(struct rpc_task *task)
        struct rpc_xprt *xprt = task->tk_xprt;
 
        if (task->tk_status >= 0) {
+               xprt->stat.connect_count++;
+               xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
                dprintk("RPC: %4d xprt_connect_status: connection established\n",
                                task->tk_pid);
                return;
@@ -601,16 +604,14 @@ static void xprt_connect_status(struct rpc_task *task)
 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
 {
        struct list_head *pos;
-       struct rpc_rqst *req = NULL;
 
        list_for_each(pos, &xprt->recv) {
                struct rpc_rqst *entry = list_entry(pos, struct rpc_rqst, rq_list);
-               if (entry->rq_xid == xid) {
-                       req = entry;
-                       break;
-               }
+               if (entry->rq_xid == xid)
+                       return entry;
        }
-       return req;
+       xprt->stat.bad_xids++;
+       return NULL;
 }
 
 /**
@@ -646,7 +647,12 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
        dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
                        task->tk_pid, ntohl(req->rq_xid), copied);
 
+       task->tk_xprt->stat.recvs++;
+       task->tk_rtt = (long)jiffies - req->rq_xtime;
+
        list_del_init(&req->rq_list);
+       /* Ensure all writes are done before we update req->rq_received */
+       smp_wmb();
        req->rq_received = req->rq_private_buf.len = copied;
        rpc_wake_up_task(task);
 }
@@ -723,7 +729,6 @@ void xprt_transmit(struct rpc_task *task)
 
        dprintk("RPC: %4d xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
 
-       smp_rmb();
        if (!req->rq_received) {
                if (list_empty(&req->rq_list)) {
                        spin_lock_bh(&xprt->transport_lock);
@@ -744,12 +749,19 @@ void xprt_transmit(struct rpc_task *task)
        if (status == 0) {
                dprintk("RPC: %4d xmit complete\n", task->tk_pid);
                spin_lock_bh(&xprt->transport_lock);
+
                xprt->ops->set_retrans_timeout(task);
+
+               xprt->stat.sends++;
+               xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
+               xprt->stat.bklog_u += xprt->backlog.qlen;
+
                /* Don't race with disconnect */
                if (!xprt_connected(xprt))
                        task->tk_status = -ENOTCONN;
                else if (!req->rq_received)
                        rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
+
                xprt->ops->release_xprt(xprt, task);
                spin_unlock_bh(&xprt->transport_lock);
                return;
@@ -848,6 +860,7 @@ void xprt_release(struct rpc_task *task)
 
        if (!(req = task->tk_rqstp))
                return;
+       rpc_count_iostats(task);
        spin_lock_bh(&xprt->transport_lock);
        xprt->ops->release_xprt(xprt, task);
        if (xprt->ops->release_request)
index c458f8d..4b4e7df 100644 (file)
@@ -382,6 +382,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
                /* If we've sent the entire packet, immediately
                 * reset the count of bytes sent. */
                req->rq_bytes_sent += status;
+               task->tk_bytes_sent += status;
                if (likely(req->rq_bytes_sent >= req->rq_slen)) {
                        req->rq_bytes_sent = 0;
                        return 0;
@@ -1114,6 +1115,8 @@ static void xs_tcp_connect_worker(void *args)
        }
 
        /* Tell the socket layer to start connecting... */
+       xprt->stat.connect_count++;
+       xprt->stat.connect_start = jiffies;
        status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
                        sizeof(xprt->addr), O_NONBLOCK);
        dprintk("RPC: %p  connect status %d connected %d sock state %d\n",
@@ -1177,6 +1180,50 @@ static void xs_connect(struct rpc_task *task)
        }
 }
 
+/**
+ * xs_udp_print_stats - display UDP socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+       seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
+                       xprt->port,
+                       xprt->stat.bind_count,
+                       xprt->stat.sends,
+                       xprt->stat.recvs,
+                       xprt->stat.bad_xids,
+                       xprt->stat.req_u,
+                       xprt->stat.bklog_u);
+}
+
+/**
+ * xs_tcp_print_stats - display TCP socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+       long idle_time = 0;
+
+       if (xprt_connected(xprt))
+               idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+       seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
+                       xprt->port,
+                       xprt->stat.bind_count,
+                       xprt->stat.connect_count,
+                       xprt->stat.connect_time,
+                       idle_time,
+                       xprt->stat.sends,
+                       xprt->stat.recvs,
+                       xprt->stat.bad_xids,
+                       xprt->stat.req_u,
+                       xprt->stat.bklog_u);
+}
+
 static struct rpc_xprt_ops xs_udp_ops = {
        .set_buffer_size        = xs_udp_set_buffer_size,
        .reserve_xprt           = xprt_reserve_xprt_cong,
@@ -1191,6 +1238,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
        .release_request        = xprt_release_rqst_cong,
        .close                  = xs_close,
        .destroy                = xs_destroy,
+       .print_stats            = xs_udp_print_stats,
 };
 
 static struct rpc_xprt_ops xs_tcp_ops = {
@@ -1204,6 +1252,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
        .set_retrans_timeout    = xprt_set_retrans_timeout_def,
        .close                  = xs_close,
        .destroy                = xs_destroy,
+       .print_stats            = xs_tcp_print_stats,
 };
 
 /**