Merge branch 'nfs-for-2.6.37' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Oct 2010 20:48:29 +0000 (13:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Oct 2010 20:48:29 +0000 (13:48 -0700)
* 'nfs-for-2.6.37' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (67 commits)
  SUNRPC: Cleanup duplicate assignment in rpcauth_refreshcred
  nfs: fix unchecked value
  Ask for time_delta during fsinfo probe
  Revalidate caches on lock
  SUNRPC: After calling xprt_release(), we must restart from call_reserve
  NFSv4: Fix up the 'dircount' hint in encode_readdir
  NFSv4: Clean up nfs4_decode_dirent
  NFSv4: nfs4_decode_dirent must clear entry->fattr->valid
  NFSv4: Fix a regression in decode_getfattr
  NFSv4: Fix up decode_attr_filehandle() to handle the case of empty fh pointer
  NFS: Ensure we check all allocation return values in new readdir code
  NFS: Readdir plus in v4
  NFS: introduce generic decode_getattr function
  NFS: check xdr_decode for errors
  NFS: nfs_readdir_filler catch all errors
  NFS: readdir with vmapped pages
  NFS: remove page size checking code
  NFS: decode_dirent should use an xdr_stream
  SUNRPC: Add a helper function xdr_inline_peek
  NFS: remove readdir plus limit
  ...

41 files changed:
Documentation/filesystems/nfs/00-INDEX
Documentation/filesystems/nfs/idmapper.txt [new file with mode: 0644]
Documentation/filesystems/nfs/nfsroot.txt
Documentation/kernel-parameters.txt
fs/lockd/clntlock.c
fs/lockd/clntproc.c
fs/nfs/Kconfig
fs/nfs/client.c
fs/nfs/dir.c
fs/nfs/file.c
fs/nfs/idmap.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/mount_clnt.c
fs/nfs/nfs2xdr.c
fs/nfs/nfs3proc.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/nfsroot.c
fs/nfs/proc.c
fs/nfs/read.c
fs/nfs/super.c
fs/nfs/sysctl.c
fs/nfs/unlink.c
fs/nfs/write.c
include/linux/nfs_fs.h
include/linux/nfs_fs_sb.h
include/linux/nfs_idmap.h
include/linux/nfs_mount.h
include/linux/nfs_xdr.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/xdr.h
init/do_mounts.c
net/sunrpc/auth.c
net/sunrpc/clnt.c
net/sunrpc/rpcb_clnt.c
net/sunrpc/sched.c
net/sunrpc/xdr.c

index 2f68cd6..3225a56 100644 (file)
@@ -14,3 +14,5 @@ nfsroot.txt
        - short guide on setting up a diskless box with NFS root filesystem.
 rpc-cache.txt
        - introduction to the caching mechanisms in the sunrpc layer.
+idmapper.txt
+       - information for configuring request-keys to be used by idmapper
diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt
new file mode 100644 (file)
index 0000000..c385204
--- /dev/null
@@ -0,0 +1,67 @@
+
+=========
+ID Mapper
+=========
+Id mapper is used by NFS to translate user and group ids into names, and to
+translate user and group names into ids.  Part of this translation involves
+performing an upcall to userspace to request the information.  Id mapper will
+user request-key to perform this upcall and cache the result.  The program
+/usr/sbin/nfs.upcall should be called by request-key, and will perform the
+translation and initialize a key with the resulting information.
+
+ NFS_USE_NEW_IDMAPPER must be selected when configuring the kernel to use this
+ feature.
+
+===========
+Configuring
+===========
+The file /etc/request-key.conf will need to be modified so /sbin/request-key can
+direct the upcall.  The following line should be added:
+
+#OP    TYPE    DESCRIPTION     CALLOUT INFO    PROGRAM ARG1 ARG2 ARG3 ...
+#======        ======= =============== =============== ===============================
+create id_resolver     *       *               /usr/sbin/nfs.upcall %k %d 600
+
+This will direct all id_resolver requests to the program /usr/sbin/nfs.upcall.
+The last parameter, 600, defines how many seconds into the future the key will
+expire.  This parameter is optional for /usr/sbin/nfs.upcall.  When the timeout
+is not specified, nfs.upcall will default to 600 seconds.
+
+id mapper uses for key descriptions:
+         uid:  Find the UID for the given user
+         gid:  Find the GID for the given group
+        user:  Find the user  name for the given UID
+       group:  Find the group name for the given GID
+
+You can handle any of these individually, rather than using the generic upcall
+program.  If you would like to use your own program for a uid lookup then you
+would edit your request-key.conf so it look similar to this:
+
+#OP    TYPE    DESCRIPTION     CALLOUT INFO    PROGRAM ARG1 ARG2 ARG3 ...
+#======        ======= =============== =============== ===============================
+create id_resolver     uid:*   *               /some/other/program  %k %d 600
+create id_resolver     *       *               /usr/sbin/nfs.upcall %k %d 600
+
+Notice that the new line was added above the line for the generic program.
+request-key will find the first matching line and corresponding program.  In
+this case, /some/other/program will handle all uid lookups and
+/usr/sbin/nfs.upcall will handle gid, user, and group lookups.
+
+See <file:Documentation/keys-request-keys.txt> for more information about the
+request-key function.
+
+
+==========
+nfs.upcall
+==========
+nfs.upcall is designed to be called by request-key, and should not be run "by
+hand".  This program takes two arguments, a serialized key and a key
+description.  The serialized key is first converted into a key_serial_t, and
+then passed as an argument to keyctl_instantiate (both are part of keyutils.h).
+
+The actual lookups are performed by functions found in nfsidmap.h.  nfs.upcall
+determines the correct function to call by looking at the first part of the
+description string.  For example, a uid lookup description will appear as
+"uid:user@domain".
+
+nfs.upcall will return 0 if the key was instantiated, and non-zero otherwise.
index f2430a7..90c71c6 100644 (file)
@@ -159,6 +159,28 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
                 Default: any
 
 
+nfsrootdebug
+
+  This parameter enables debugging messages to appear in the kernel
+  log at boot time so that administrators can verify that the correct
+  NFS mount options, server address, and root path are passed to the
+  NFS client.
+
+
+rdinit=<executable file>
+
+  To specify which file contains the program that starts system
+  initialization, administrators can use this command line parameter.
+  The default value of this parameter is "/init".  If the specified
+  file exists and the kernel can execute it, root filesystem related
+  kernel command line parameters, including `nfsroot=', are ignored.
+
+  A description of the process of mounting the root file system can be
+  found in:
+
+    Documentation/early-userspace/README
+
+
 
 
 3.) Boot Loader
index b660085..4bc2f3c 100644 (file)
@@ -1541,12 +1541,15 @@ and is between 256 and 4096 characters. It is defined in the file
                        1 to enable accounting
                        Default value is 0.
 
-       nfsaddrs=       [NFS]
+       nfsaddrs=       [NFS] Deprecated.  Use ip= instead.
                        See Documentation/filesystems/nfs/nfsroot.txt.
 
        nfsroot=        [NFS] nfs root filesystem for disk-less boxes.
                        See Documentation/filesystems/nfs/nfsroot.txt.
 
+       nfsrootdebug    [NFS] enable nfsroot debugging messages.
+                       See Documentation/filesystems/nfs/nfsroot.txt.
+
        nfs.callback_tcpport=
                        [NFS] set the TCP port on which the NFSv4 callback
                        channel should listen.
index 64fd427..d5bb868 100644 (file)
@@ -42,6 +42,7 @@ struct nlm_wait {
 };
 
 static LIST_HEAD(nlm_blocked);
+static DEFINE_SPINLOCK(nlm_blocked_lock);
 
 /**
  * nlmclnt_init - Set up per-NFS mount point lockd data structures
@@ -97,7 +98,10 @@ struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *
                block->b_lock = fl;
                init_waitqueue_head(&block->b_wait);
                block->b_status = nlm_lck_blocked;
+
+               spin_lock(&nlm_blocked_lock);
                list_add(&block->b_list, &nlm_blocked);
+               spin_unlock(&nlm_blocked_lock);
        }
        return block;
 }
@@ -106,7 +110,9 @@ void nlmclnt_finish_block(struct nlm_wait *block)
 {
        if (block == NULL)
                return;
+       spin_lock(&nlm_blocked_lock);
        list_del(&block->b_list);
+       spin_unlock(&nlm_blocked_lock);
        kfree(block);
 }
 
@@ -154,6 +160,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
         * Look up blocked request based on arguments. 
         * Warning: must not use cookie to match it!
         */
+       spin_lock(&nlm_blocked_lock);
        list_for_each_entry(block, &nlm_blocked, b_list) {
                struct file_lock *fl_blocked = block->b_lock;
 
@@ -178,6 +185,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
                wake_up(&block->b_wait);
                res = nlm_granted;
        }
+       spin_unlock(&nlm_blocked_lock);
        return res;
 }
 
@@ -216,10 +224,6 @@ reclaimer(void *ptr)
        allow_signal(SIGKILL);
 
        down_write(&host->h_rwsem);
-
-       /* This one ensures that our parent doesn't terminate while the
-        * reclaim is in progress */
-       lock_kernel();
        lockd_up();     /* note: this cannot fail as lockd is already running */
 
        dprintk("lockd: reclaiming locks for host %s\n", host->h_name);
@@ -260,16 +264,17 @@ restart:
        dprintk("NLM: done reclaiming locks for host %s\n", host->h_name);
 
        /* Now, wake up all processes that sleep on a blocked lock */
+       spin_lock(&nlm_blocked_lock);
        list_for_each_entry(block, &nlm_blocked, b_list) {
                if (block->b_host == host) {
                        block->b_status = nlm_lck_denied_grace_period;
                        wake_up(&block->b_wait);
                }
        }
+       spin_unlock(&nlm_blocked_lock);
 
        /* Release host handle after use */
        nlm_release_host(host);
        lockd_down();
-       unlock_kernel();
        return 0;
 }
index 7932c39..47ea1e1 100644 (file)
@@ -166,7 +166,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
        /* Set up the argument struct */
        nlmclnt_setlockargs(call, fl);
 
-       lock_kernel();
        if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
                if (fl->fl_type != F_UNLCK) {
                        call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
@@ -177,10 +176,8 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
                status = nlmclnt_test(call, fl);
        else
                status = -EINVAL;
-
        fl->fl_ops->fl_release_private(fl);
        fl->fl_ops = NULL;
-       unlock_kernel();
 
        dprintk("lockd: clnt proc returns %d\n", status);
        return status;
@@ -226,9 +223,7 @@ void nlm_release_call(struct nlm_rqst *call)
 
 static void nlmclnt_rpc_release(void *data)
 {
-       lock_kernel();
        nlm_release_call(data);
-       unlock_kernel();
 }
 
 static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -448,14 +443,18 @@ out:
 
 static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
+       spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
        new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
        new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
        list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
+       spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
 }
 
 static void nlmclnt_locks_release_private(struct file_lock *fl)
 {
+       spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
        list_del(&fl->fl_u.nfs_fl.list);
+       spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
        nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
 }
 
@@ -721,9 +720,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 die:
        return;
  retry_rebind:
-       lock_kernel();
        nlm_rebind_host(req->a_host);
-       unlock_kernel();
  retry_unlock:
        rpc_restart_call(task);
 }
@@ -801,9 +798,7 @@ retry_cancel:
        /* Don't ever retry more than 3 times */
        if (req->a_retries++ >= NLMCLNT_MAX_RETRIES)
                goto die;
-       lock_kernel();
        nlm_rebind_host(req->a_host);
-       unlock_kernel();
        rpc_restart_call(task);
        rpc_delay(task, 30 * HZ);
 }
index b950415..5c55c26 100644 (file)
@@ -118,3 +118,14 @@ config NFS_USE_KERNEL_DNS
        select DNS_RESOLVER
        select KEYS
        default y
+
+config NFS_USE_NEW_IDMAPPER
+       bool "Use the new idmapper upcall routine"
+       depends on NFS_V4 && KEYS
+       help
+         Say Y here if you want NFS to use the new idmapper upcall functions.
+         You will need /sbin/request-key (usually provided by the keyutils
+         package).  For details, read
+         <file:Documentation/filesystems/nfs/idmapper.txt>.
+
+         If you are unsure, say N.
index e734072..a882785 100644 (file)
@@ -635,7 +635,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
  */
 static void nfs_destroy_server(struct nfs_server *server)
 {
-       if (!(server->flags & NFS_MOUNT_NONLM))
+       if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
+                       !(server->flags & NFS_MOUNT_LOCAL_FCNTL))
                nlmclnt_done(server->nlm_host);
 }
 
@@ -657,7 +658,8 @@ static int nfs_start_lockd(struct nfs_server *server)
 
        if (nlm_init.nfs_version > 3)
                return 0;
-       if (server->flags & NFS_MOUNT_NONLM)
+       if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) &&
+                       (server->flags & NFS_MOUNT_LOCAL_FCNTL))
                return 0;
 
        switch (clp->cl_proto) {
@@ -901,8 +903,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
        server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
 
        server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
-       if (server->dtsize > PAGE_CACHE_SIZE)
-               server->dtsize = PAGE_CACHE_SIZE;
+       if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
+               server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
        if (server->dtsize > server->rsize)
                server->dtsize = server->rsize;
 
@@ -913,6 +915,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
 
        server->maxfilesize = fsinfo->maxfilesize;
 
+       server->time_delta = fsinfo->time_delta;
+
        /* We're airborne Set socket buffersize */
        rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
 }
@@ -1356,8 +1360,9 @@ static int nfs4_init_server(struct nfs_server *server,
 
        /* Initialise the client representation from the mount data */
        server->flags = data->flags;
-       server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
-               NFS_CAP_POSIX_LOCK;
+       server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
+       if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
+                       server->caps |= NFS_CAP_READDIRPLUS;
        server->options = data->options;
 
        /* Get a client record */
index e257172..257e405 100644 (file)
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/vmalloc.h>
 
-#include "nfs4_fs.h"
 #include "delegation.h"
 #include "iostat.h"
 #include "internal.h"
+#include "fscache.h"
 
 /* #define NFS_DEBUG_VERBOSE 1 */
 
@@ -55,6 +56,7 @@ static int nfs_rename(struct inode *, struct dentry *,
                      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, int);
 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
+static int nfs_readdir_clear_array(struct page*, gfp_t);
 
 const struct file_operations nfs_dir_operations = {
        .llseek         = nfs_llseek_dir,
@@ -80,6 +82,10 @@ const struct inode_operations nfs_dir_inode_operations = {
        .setattr        = nfs_setattr,
 };
 
+const struct address_space_operations nfs_dir_addr_space_ops = {
+       .releasepage = nfs_readdir_clear_array,
+};
+
 #ifdef CONFIG_NFS_V3
 const struct inode_operations nfs3_dir_inode_operations = {
        .create         = nfs_create,
@@ -104,8 +110,9 @@ const struct inode_operations nfs3_dir_inode_operations = {
 #ifdef CONFIG_NFS_V4
 
 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
 const struct inode_operations nfs4_dir_inode_operations = {
-       .create         = nfs_create,
+       .create         = nfs_open_create,
        .lookup         = nfs_atomic_lookup,
        .link           = nfs_link,
        .unlink         = nfs_unlink,
@@ -150,51 +157,197 @@ nfs_opendir(struct inode *inode, struct file *filp)
        return res;
 }
 
-typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int);
+struct nfs_cache_array_entry {
+       u64 cookie;
+       u64 ino;
+       struct qstr string;
+};
+
+struct nfs_cache_array {
+       unsigned int size;
+       int eof_index;
+       u64 last_cookie;
+       struct nfs_cache_array_entry array[0];
+};
+
+#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
+
+typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 typedef struct {
        struct file     *file;
        struct page     *page;
        unsigned long   page_index;
-       __be32          *ptr;
        u64             *dir_cookie;
        loff_t          current_index;
-       struct nfs_entry *entry;
        decode_dirent_t decode;
-       int             plus;
+
        unsigned long   timestamp;
        unsigned long   gencount;
-       int             timestamp_valid;
+       unsigned int    cache_entry_index;
+       unsigned int    plus:1;
+       unsigned int    eof:1;
 } nfs_readdir_descriptor_t;
 
-/* Now we cache directories properly, by stuffing the dirent
- * data directly in the page cache.
- *
- * Inode invalidation due to refresh etc. takes care of
- * _everything_, no sloppy entry flushing logic, no extraneous
- * copying, network direct to page cache, the way it was meant
- * to be.
- *
- * NOTE: Dirent information verification is done always by the
- *      page-in of the RPC reply, nowhere else, this simplies
- *      things substantially.
+/*
+ * The caller is responsible for calling nfs_readdir_release_array(page)
  */
 static
-int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
+struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
+{
+       if (page == NULL)
+               return ERR_PTR(-EIO);
+       return (struct nfs_cache_array *)kmap(page);
+}
+
+static
+void nfs_readdir_release_array(struct page *page)
+{
+       kunmap(page);
+}
+
+/*
+ * we are freeing strings created by nfs_add_to_readdir_array()
+ */
+static
+int nfs_readdir_clear_array(struct page *page, gfp_t mask)
+{
+       struct nfs_cache_array *array = nfs_readdir_get_array(page);
+       int i;
+       for (i = 0; i < array->size; i++)
+               kfree(array->array[i].string.name);
+       nfs_readdir_release_array(page);
+       return 0;
+}
+
+/*
+ * the caller is responsible for freeing qstr.name
+ * when called by nfs_readdir_add_to_array, the strings will be freed in
+ * nfs_clear_readdir_array()
+ */
+static
+int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
+{
+       string->len = len;
+       string->name = kmemdup(name, len, GFP_KERNEL);
+       if (string->name == NULL)
+               return -ENOMEM;
+       string->hash = full_name_hash(name, len);
+       return 0;
+}
+
+static
+int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
+{
+       struct nfs_cache_array *array = nfs_readdir_get_array(page);
+       struct nfs_cache_array_entry *cache_entry;
+       int ret;
+
+       if (IS_ERR(array))
+               return PTR_ERR(array);
+       ret = -EIO;
+       if (array->size >= MAX_READDIR_ARRAY)
+               goto out;
+
+       cache_entry = &array->array[array->size];
+       cache_entry->cookie = entry->prev_cookie;
+       cache_entry->ino = entry->ino;
+       ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
+       if (ret)
+               goto out;
+       array->last_cookie = entry->cookie;
+       if (entry->eof == 1)
+               array->eof_index = array->size;
+       array->size++;
+out:
+       nfs_readdir_release_array(page);
+       return ret;
+}
+
+static
+int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+{
+       loff_t diff = desc->file->f_pos - desc->current_index;
+       unsigned int index;
+
+       if (diff < 0)
+               goto out_eof;
+       if (diff >= array->size) {
+               if (array->eof_index > 0)
+                       goto out_eof;
+               desc->current_index += array->size;
+               return -EAGAIN;
+       }
+
+       index = (unsigned int)diff;
+       *desc->dir_cookie = array->array[index].cookie;
+       desc->cache_entry_index = index;
+       if (index == array->eof_index)
+               desc->eof = 1;
+       return 0;
+out_eof:
+       desc->eof = 1;
+       return -EBADCOOKIE;
+}
+
+static
+int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+{
+       int i;
+       int status = -EAGAIN;
+
+       for (i = 0; i < array->size; i++) {
+               if (i == array->eof_index) {
+                       desc->eof = 1;
+                       status = -EBADCOOKIE;
+               }
+               if (array->array[i].cookie == *desc->dir_cookie) {
+                       desc->cache_entry_index = i;
+                       status = 0;
+                       break;
+               }
+       }
+
+       return status;
+}
+
+static
+int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
+{
+       struct nfs_cache_array *array;
+       int status = -EBADCOOKIE;
+
+       if (desc->dir_cookie == NULL)
+               goto out;
+
+       array = nfs_readdir_get_array(desc->page);
+       if (IS_ERR(array)) {
+               status = PTR_ERR(array);
+               goto out;
+       }
+
+       if (*desc->dir_cookie == 0)
+               status = nfs_readdir_search_for_pos(array, desc);
+       else
+               status = nfs_readdir_search_for_cookie(array, desc);
+
+       nfs_readdir_release_array(desc->page);
+out:
+       return status;
+}
+
+/* Fill a page with xdr information before transferring to the cache page */
+static
+int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
+                       struct nfs_entry *entry, struct file *file, struct inode *inode)
 {
-       struct file     *file = desc->file;
-       struct inode    *inode = file->f_path.dentry->d_inode;
        struct rpc_cred *cred = nfs_file_cred(file);
        unsigned long   timestamp, gencount;
        int             error;
 
-       dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
-                       __func__, (long long)desc->entry->cookie,
-                       page->index);
-
  again:
        timestamp = jiffies;
        gencount = nfs_inc_attr_generation_counter();
-       error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page,
+       error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
                                          NFS_SERVER(inode)->dtsize, desc->plus);
        if (error < 0) {
                /* We requested READDIRPLUS, but the server doesn't grok it */
@@ -208,190 +361,292 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
        }
        desc->timestamp = timestamp;
        desc->gencount = gencount;
-       desc->timestamp_valid = 1;
-       SetPageUptodate(page);
-       /* Ensure consistent page alignment of the data.
-        * Note: assumes we have exclusive access to this mapping either
-        *       through inode->i_mutex or some other mechanism.
-        */
-       if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
-               /* Should never happen */
-               nfs_zap_mapping(inode, inode->i_mapping);
-       }
-       unlock_page(page);
-       return 0;
- error:
-       unlock_page(page);
-       return -EIO;
+error:
+       return error;
 }
 
-static inline
-int dir_decode(nfs_readdir_descriptor_t *desc)
+/* Fill in an entry based on the xdr code stored in desc->page */
+static
+int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
 {
-       __be32  *p = desc->ptr;
-       p = desc->decode(p, desc->entry, desc->plus);
+       __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
        if (IS_ERR(p))
                return PTR_ERR(p);
-       desc->ptr = p;
-       if (desc->timestamp_valid) {
-               desc->entry->fattr->time_start = desc->timestamp;
-               desc->entry->fattr->gencount = desc->gencount;
-       } else
-               desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
+
+       entry->fattr->time_start = desc->timestamp;
+       entry->fattr->gencount = desc->gencount;
        return 0;
 }
 
-static inline
-void dir_page_release(nfs_readdir_descriptor_t *desc)
+static
+int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 {
-       kunmap(desc->page);
-       page_cache_release(desc->page);
-       desc->page = NULL;
-       desc->ptr = NULL;
+       struct nfs_inode *node;
+       if (dentry->d_inode == NULL)
+               goto different;
+       node = NFS_I(dentry->d_inode);
+       if (node->fh.size != entry->fh->size)
+               goto different;
+       if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
+               goto different;
+       return 1;
+different:
+       return 0;
 }
 
-/*
- * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the next entry with cookie '*desc->dir_cookie'.
- *
- * If the end of the buffer has been reached, return -EAGAIN, if not,
- * return the offset within the buffer of the next entry to be
- * read.
- */
-static inline
-int find_dirent(nfs_readdir_descriptor_t *desc)
+static
+void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 {
-       struct nfs_entry *entry = desc->entry;
-       int             loop_count = 0,
-                       status;
+       struct qstr filename = {
+               .len = entry->len,
+               .name = entry->name,
+       };
+       struct dentry *dentry;
+       struct dentry *alias;
+       struct inode *dir = parent->d_inode;
+       struct inode *inode;
 
-       while((status = dir_decode(desc)) == 0) {
-               dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n",
-                               __func__, (unsigned long long)entry->cookie);
-               if (entry->prev_cookie == *desc->dir_cookie)
-                       break;
-               if (loop_count++ > 200) {
-                       loop_count = 0;
-                       schedule();
+       if (filename.name[0] == '.') {
+               if (filename.len == 1)
+                       return;
+               if (filename.len == 2 && filename.name[1] == '.')
+                       return;
+       }
+       filename.hash = full_name_hash(filename.name, filename.len);
+
+       dentry = d_lookup(parent, &filename);
+       if (dentry != NULL) {
+               if (nfs_same_file(dentry, entry)) {
+                       nfs_refresh_inode(dentry->d_inode, entry->fattr);
+                       goto out;
+               } else {
+                       d_drop(dentry);
+                       dput(dentry);
                }
        }
-       return status;
+
+       dentry = d_alloc(parent, &filename);
+       if (dentry == NULL)
+               return;
+
+       dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+       inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
+       if (IS_ERR(inode))
+               goto out;
+
+       alias = d_materialise_unique(dentry, inode);
+       if (IS_ERR(alias))
+               goto out;
+       else if (alias) {
+               nfs_set_verifier(alias, nfs_save_change_attribute(dir));
+               dput(alias);
+       } else
+               nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+
+out:
+       dput(dentry);
+}
+
+/* Perform conversion from xdr to cache array */
+static
+void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
+                               void *xdr_page, struct page *page, unsigned int buflen)
+{
+       struct xdr_stream stream;
+       struct xdr_buf buf;
+       __be32 *ptr = xdr_page;
+       int status;
+       struct nfs_cache_array *array;
+
+       buf.head->iov_base = xdr_page;
+       buf.head->iov_len = buflen;
+       buf.tail->iov_len = 0;
+       buf.page_base = 0;
+       buf.page_len = 0;
+       buf.buflen = buf.head->iov_len;
+       buf.len = buf.head->iov_len;
+
+       xdr_init_decode(&stream, &buf, ptr);
+
+
+       do {
+               status = xdr_decode(desc, entry, &stream);
+               if (status != 0)
+                       break;
+
+               if (nfs_readdir_add_to_array(entry, page) == -1)
+                       break;
+               if (desc->plus == 1)
+                       nfs_prime_dcache(desc->file->f_path.dentry, entry);
+       } while (!entry->eof);
+
+       if (status == -EBADCOOKIE && entry->eof) {
+               array = nfs_readdir_get_array(page);
+               array->eof_index = array->size - 1;
+               status = 0;
+               nfs_readdir_release_array(page);
+       }
+}
+
+static
+void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages)
+{
+       unsigned int i;
+       for (i = 0; i < npages; i++)
+               put_page(pages[i]);
+}
+
+static
+void nfs_readdir_free_large_page(void *ptr, struct page **pages,
+               unsigned int npages)
+{
+       vm_unmap_ram(ptr, npages);
+       nfs_readdir_free_pagearray(pages, npages);
 }
 
 /*
- * Given a pointer to a buffer that has already been filled by a call
- * to readdir, find the entry at offset 'desc->file->f_pos'.
- *
- * If the end of the buffer has been reached, return -EAGAIN, if not,
- * return the offset within the buffer of the next entry to be
- * read.
+ * nfs_readdir_large_page will allocate pages that must be freed with a call
+ * to nfs_readdir_free_large_page
  */
-static inline
-int find_dirent_index(nfs_readdir_descriptor_t *desc)
+static
+void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
 {
-       struct nfs_entry *entry = desc->entry;
-       int             loop_count = 0,
-                       status;
+       void *ptr;
+       unsigned int i;
+
+       for (i = 0; i < npages; i++) {
+               struct page *page = alloc_page(GFP_KERNEL);
+               if (page == NULL)
+                       goto out_freepages;
+               pages[i] = page;
+       }
 
-       for(;;) {
-               status = dir_decode(desc);
-               if (status)
-                       break;
+       ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
+       if (!IS_ERR_OR_NULL(ptr))
+               return ptr;
+out_freepages:
+       nfs_readdir_free_pagearray(pages, i);
+       return NULL;
+}
+
+static
+int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
+{
+       struct page *pages[NFS_MAX_READDIR_PAGES];
+       void *pages_ptr = NULL;
+       struct nfs_entry entry;
+       struct file     *file = desc->file;
+       struct nfs_cache_array *array;
+       int status = 0;
+       unsigned int array_size = ARRAY_SIZE(pages);
+
+       entry.prev_cookie = 0;
+       entry.cookie = *desc->dir_cookie;
+       entry.eof = 0;
+       entry.fh = nfs_alloc_fhandle();
+       entry.fattr = nfs_alloc_fattr();
+       if (entry.fh == NULL || entry.fattr == NULL)
+               goto out;
 
-               dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n",
-                               (unsigned long long)entry->cookie, desc->current_index);
+       array = nfs_readdir_get_array(page);
+       memset(array, 0, sizeof(struct nfs_cache_array));
+       array->eof_index = -1;
 
-               if (desc->file->f_pos == desc->current_index) {
-                       *desc->dir_cookie = entry->cookie;
+       pages_ptr = nfs_readdir_large_page(pages, array_size);
+       if (!pages_ptr)
+               goto out_release_array;
+       do {
+               status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
+
+               if (status < 0)
                        break;
-               }
-               desc->current_index++;
-               if (loop_count++ > 200) {
-                       loop_count = 0;
-                       schedule();
-               }
-       }
+               nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
+       } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
+
+       nfs_readdir_free_large_page(pages_ptr, pages, array_size);
+out_release_array:
+       nfs_readdir_release_array(page);
+out:
+       nfs_free_fattr(entry.fattr);
+       nfs_free_fhandle(entry.fh);
        return status;
 }
 
 /*
- * Find the given page, and call find_dirent() or find_dirent_index in
- * order to try to return the next entry.
+ * Now we cache directories properly, by converting xdr information
+ * to an array that can be used for lookups later.  This results in
+ * fewer cache pages, since we can store more information on each page.
+ * We only need to convert from xdr once so future lookups are much simpler
  */
-static inline
-int find_dirent_page(nfs_readdir_descriptor_t *desc)
+static
+int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 {
        struct inode    *inode = desc->file->f_path.dentry->d_inode;
-       struct page     *page;
-       int             status;
 
-       dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n",
-                       __func__, desc->page_index,
-                       (long long) *desc->dir_cookie);
+       if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
+               goto error;
+       SetPageUptodate(page);
 
-       /* If we find the page in the page_cache, we cannot be sure
-        * how fresh the data is, so we will ignore readdir_plus attributes.
-        */
-       desc->timestamp_valid = 0;
-       page = read_cache_page(inode->i_mapping, desc->page_index,
-                              (filler_t *)nfs_readdir_filler, desc);
-       if (IS_ERR(page)) {
-               status = PTR_ERR(page);
-               goto out;
+       if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
+               /* Should never happen */
+               nfs_zap_mapping(inode, inode->i_mapping);
        }
+       unlock_page(page);
+       return 0;
+ error:
+       unlock_page(page);
+       return -EIO;
+}
 
-       /* NOTE: Someone else may have changed the READDIRPLUS flag */
-       desc->page = page;
-       desc->ptr = kmap(page);         /* matching kunmap in nfs_do_filldir */
-       if (*desc->dir_cookie != 0)
-               status = find_dirent(desc);
-       else
-               status = find_dirent_index(desc);
-       if (status < 0)
-               dir_page_release(desc);
- out:
-       dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
-       return status;
+static
+void cache_page_release(nfs_readdir_descriptor_t *desc)
+{
+       page_cache_release(desc->page);
+       desc->page = NULL;
+}
+
+static
+struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
+{
+       struct page *page;
+       page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
+                       desc->page_index, (filler_t *)nfs_readdir_filler, desc);
+       if (IS_ERR(page))
+               desc->eof = 1;
+       return page;
 }
 
 /*
- * Recurse through the page cache pages, and return a
- * filled nfs_entry structure of the next directory entry if possible.
- *
- * The target for the search is '*desc->dir_cookie' if non-0,
- * 'desc->file->f_pos' otherwise
+ * Returns 0 if desc->dir_cookie was found on page desc->page_index
  */
+static
+int find_cache_page(nfs_readdir_descriptor_t *desc)
+{
+       int res;
+
+       desc->page = get_cache_page(desc);
+       if (IS_ERR(desc->page))
+               return PTR_ERR(desc->page);
+
+       res = nfs_readdir_search_array(desc);
+       if (res == 0)
+               return 0;
+       cache_page_release(desc);
+       return res;
+}
+
+/* Search for desc->dir_cookie from the beginning of the page cache */
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 {
-       int             loop_count = 0;
-       int             res;
-
-       /* Always search-by-index from the beginning of the cache */
-       if (*desc->dir_cookie == 0) {
-               dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
-                               (long long)desc->file->f_pos);
-               desc->page_index = 0;
-               desc->entry->cookie = desc->entry->prev_cookie = 0;
-               desc->entry->eof = 0;
-               desc->current_index = 0;
-       } else
-               dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
-                               (unsigned long long)*desc->dir_cookie);
+       int res = -EAGAIN;
 
-       for (;;) {
-               res = find_dirent_page(desc);
+       while (1) {
+               res = find_cache_page(desc);
                if (res != -EAGAIN)
                        break;
-               /* Align to beginning of next page */
-               desc->page_index ++;
-               if (loop_count++ > 200) {
-                       loop_count = 0;
-                       schedule();
-               }
+               desc->page_index++;
        }
-
-       dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res);
        return res;
 }
 
@@ -400,8 +655,6 @@ static inline unsigned int dt_type(struct inode *inode)
        return (inode->i_mode >> 12) & 15;
 }
 
-static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
-
 /*
  * Once we've found the start of the dirent within a page: fill 'er up...
  */
@@ -410,49 +663,36 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
                   filldir_t filldir)
 {
        struct file     *file = desc->file;
-       struct nfs_entry *entry = desc->entry;
-       struct dentry   *dentry = NULL;
-       u64             fileid;
-       int             loop_count = 0,
-                       res;
-
-       dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
-                       (unsigned long long)entry->cookie);
-
-       for(;;) {
-               unsigned d_type = DT_UNKNOWN;
-               /* Note: entry->prev_cookie contains the cookie for
-                *       retrieving the current dirent on the server */
-               fileid = entry->ino;
-
-               /* Get a dentry if we have one */
-               if (dentry != NULL)
-                       dput(dentry);
-               dentry = nfs_readdir_lookup(desc);
+       int i = 0;
+       int res = 0;
+       struct nfs_cache_array *array = NULL;
+       unsigned int d_type = DT_UNKNOWN;
+       struct dentry *dentry = NULL;
 
-               /* Use readdirplus info */
-               if (dentry != NULL && dentry->d_inode != NULL) {
-                       d_type = dt_type(dentry->d_inode);
-                       fileid = NFS_FILEID(dentry->d_inode);
-               }
+       array = nfs_readdir_get_array(desc->page);
 
-               res = filldir(dirent, entry->name, entry->len, 
-                             file->f_pos, nfs_compat_user_ino64(fileid),
-                             d_type);
+       for (i = desc->cache_entry_index; i < array->size; i++) {
+               d_type = DT_UNKNOWN;
+
+               res = filldir(dirent, array->array[i].string.name,
+                       array->array[i].string.len, file->f_pos,
+                       nfs_compat_user_ino64(array->array[i].ino), d_type);
                if (res < 0)
                        break;
                file->f_pos++;
-               *desc->dir_cookie = entry->cookie;
-               if (dir_decode(desc) != 0) {
-                       desc->page_index ++;
+               desc->cache_entry_index = i;
+               if (i < (array->size-1))
+                       *desc->dir_cookie = array->array[i+1].cookie;
+               else
+                       *desc->dir_cookie = array->last_cookie;
+               if (i == array->eof_index) {
+                       desc->eof = 1;
                        break;
                }
-               if (loop_count++ > 200) {
-                       loop_count = 0;
-                       schedule();
-               }
        }
-       dir_page_release(desc);
+
+       nfs_readdir_release_array(desc->page);
+       cache_page_release(desc);
        if (dentry != NULL)
                dput(dentry);
        dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
@@ -476,12 +716,9 @@ static inline
 int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
                     filldir_t filldir)
 {
-       struct file     *file = desc->file;
-       struct inode    *inode = file->f_path.dentry->d_inode;
-       struct rpc_cred *cred = nfs_file_cred(file);
        struct page     *page = NULL;
        int             status;
-       unsigned long   timestamp, gencount;
+       struct inode *inode = desc->file->f_path.dentry->d_inode;
 
        dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
                        (unsigned long long)*desc->dir_cookie);
@@ -491,38 +728,22 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
                status = -ENOMEM;
                goto out;
        }
-       timestamp = jiffies;
-       gencount = nfs_inc_attr_generation_counter();
-       status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
-                                               *desc->dir_cookie, page,
-                                               NFS_SERVER(inode)->dtsize,
-                                               desc->plus);
-       desc->page = page;
-       desc->ptr = kmap(page);         /* matching kunmap in nfs_do_filldir */
-       if (status >= 0) {
-               desc->timestamp = timestamp;
-               desc->gencount = gencount;
-               desc->timestamp_valid = 1;
-               if ((status = dir_decode(desc)) == 0)
-                       desc->entry->prev_cookie = *desc->dir_cookie;
-       } else
+
+       if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
                status = -EIO;
-       if (status < 0)
                goto out_release;
+       }
 
+       desc->page_index = 0;
+       desc->page = page;
        status = nfs_do_filldir(desc, dirent, filldir);
 
-       /* Reset read descriptor so it searches the page cache from
-        * the start upon the next call to readdir_search_pagecache() */
-       desc->page_index = 0;
-       desc->entry->cookie = desc->entry->prev_cookie = 0;
-       desc->entry->eof = 0;
  out:
        dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
                        __func__, status);
        return status;
  out_release:
-       dir_page_release(desc);
+       cache_page_release(desc);
        goto out;
 }
 
@@ -536,7 +757,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        struct inode    *inode = dentry->d_inode;
        nfs_readdir_descriptor_t my_desc,
                        *desc = &my_desc;
-       struct nfs_entry my_entry;
        int res = -ENOMEM;
 
        dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -557,26 +777,17 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
        desc->decode = NFS_PROTO(inode)->decode_dirent;
        desc->plus = NFS_USE_READDIRPLUS(inode);
 
-       my_entry.cookie = my_entry.prev_cookie = 0;
-       my_entry.eof = 0;
-       my_entry.fh = nfs_alloc_fhandle();
-       my_entry.fattr = nfs_alloc_fattr();
-       if (my_entry.fh == NULL || my_entry.fattr == NULL)
-               goto out_alloc_failed;
-
-       desc->entry = &my_entry;
-
        nfs_block_sillyrename(dentry);
        res = nfs_revalidate_mapping(inode, filp->f_mapping);
        if (res < 0)
                goto out;
 
-       while(!desc->entry->eof) {
+       while (desc->eof != 1) {
                res = readdir_search_pagecache(desc);
 
                if (res == -EBADCOOKIE) {
                        /* This means either end of directory */
-                       if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) {
+                       if (*desc->dir_cookie && desc->eof == 0) {
                                /* Or that the server has 'lost' a cookie */
                                res = uncached_readdir(desc, dirent, filldir);
                                if (res >= 0)
@@ -588,8 +799,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
                if (res == -ETOOSMALL && desc->plus) {
                        clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
                        nfs_zap_caches(inode);
+                       desc->page_index = 0;
                        desc->plus = 0;
-                       desc->entry->eof = 0;
+                       desc->eof = 0;
                        continue;
                }
                if (res < 0)
@@ -605,9 +817,6 @@ out:
        nfs_unblock_sillyrename(dentry);
        if (res > 0)
                res = 0;
-out_alloc_failed:
-       nfs_free_fattr(my_entry.fattr);
-       nfs_free_fhandle(my_entry.fh);
        dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
                        dentry->d_parent->d_name.name, dentry->d_name.name,
                        res);
@@ -1029,10 +1238,63 @@ static int is_atomic_open(struct nameidata *nd)
        return 1;
 }
 
+static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
+{
+       struct path path = {
+               .mnt = nd->path.mnt,
+               .dentry = dentry,
+       };
+       struct nfs_open_context *ctx;
+       struct rpc_cred *cred;
+       fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+
+       cred = rpc_lookup_cred();
+       if (IS_ERR(cred))
+               return ERR_CAST(cred);
+       ctx = alloc_nfs_open_context(&path, cred, fmode);
+       put_rpccred(cred);
+       if (ctx == NULL)
+               return ERR_PTR(-ENOMEM);
+       return ctx;
+}
+
+static int do_open(struct inode *inode, struct file *filp)
+{
+       nfs_fscache_set_inode_cookie(inode, filp);
+       return 0;
+}
+
+static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
+{
+       struct file *filp;
+       int ret = 0;
+
+       /* If the open_intent is for execute, we have an extra check to make */
+       if (ctx->mode & FMODE_EXEC) {
+               ret = nfs_may_open(ctx->path.dentry->d_inode,
+                               ctx->cred,
+                               nd->intent.open.flags);
+               if (ret < 0)
+                       goto out;
+       }
+       filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
+       if (IS_ERR(filp))
+               ret = PTR_ERR(filp);
+       else
+               nfs_file_set_open_context(filp, ctx);
+out:
+       put_nfs_open_context(ctx);
+       return ret;
+}
+
 static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+       struct nfs_open_context *ctx;
+       struct iattr attr;
        struct dentry *res = NULL;
-       int error;
+       struct inode *inode;
+       int open_flags;
+       int err;
 
        dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
                        dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1054,13 +1316,32 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
                goto out;
        }
 
+       ctx = nameidata_to_nfs_open_context(dentry, nd);
+       res = ERR_CAST(ctx);
+       if (IS_ERR(ctx))
+               goto out;
+
+       open_flags = nd->intent.open.flags;
+       if (nd->flags & LOOKUP_CREATE) {
+               attr.ia_mode = nd->intent.open.create_mode;
+               attr.ia_valid = ATTR_MODE;
+               if (!IS_POSIXACL(dir))
+                       attr.ia_mode &= ~current_umask();
+       } else {
+               open_flags &= ~(O_EXCL | O_CREAT);
+               attr.ia_valid = 0;
+       }
+
        /* Open the file on the server */
-       res = nfs4_atomic_open(dir, dentry, nd);
-       if (IS_ERR(res)) {
-               error = PTR_ERR(res);
-               switch (error) {
+       nfs_block_sillyrename(dentry->d_parent);
+       inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
+       if (IS_ERR(inode)) {
+               nfs_unblock_sillyrename(dentry->d_parent);
+               put_nfs_open_context(ctx);
+               switch (PTR_ERR(inode)) {
                        /* Make a negative dentry */
                        case -ENOENT:
+                               d_add(dentry, NULL);
                                res = NULL;
                                goto out;
                        /* This turned out not to be a regular file */
@@ -1072,11 +1353,25 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
                                        goto no_open;
                        /* case -EINVAL: */
                        default:
+                               res = ERR_CAST(inode);
                                goto out;
                }
-       } else if (res != NULL)
+       }
+       res = d_add_unique(dentry, inode);
+       nfs_unblock_sillyrename(dentry->d_parent);
+       if (res != NULL) {
+               dput(ctx->path.dentry);
+               ctx->path.dentry = dget(res);
                dentry = res;
+       }
+       err = nfs_intent_set_file(nd, ctx);
+       if (err < 0) {
+               if (res != NULL)
+                       dput(res);
+               return ERR_PTR(err);
+       }
 out:
+       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
        return res;
 no_open:
        return nfs_lookup(dir, dentry, nd);
@@ -1087,12 +1382,15 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
        struct dentry *parent = NULL;
        struct inode *inode = dentry->d_inode;
        struct inode *dir;
+       struct nfs_open_context *ctx;
        int openflags, ret = 0;
 
        if (!is_atomic_open(nd) || d_mountpoint(dentry))
                goto no_open;
+
        parent = dget_parent(dentry);
        dir = parent->d_inode;
+
        /* We can't create new files in nfs_open_revalidate(), so we
         * optimize away revalidation of negative dentries.
         */
@@ -1112,99 +1410,96 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
        /* We can't create new files, or truncate existing ones here */
        openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
 
+       ctx = nameidata_to_nfs_open_context(dentry, nd);
+       ret = PTR_ERR(ctx);
+       if (IS_ERR(ctx))
+               goto out;
        /*
         * Note: we're not holding inode->i_mutex and so may be racing with
         * operations that change the directory. We therefore save the
         * change attribute *before* we do the RPC call.
         */
-       ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
+       inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
+       if (IS_ERR(inode)) {
+               ret = PTR_ERR(inode);
+               switch (ret) {
+               case -EPERM:
+               case -EACCES:
+               case -EDQUOT:
+               case -ENOSPC:
+               case -EROFS:
+                       goto out_put_ctx;
+               default:
+                       goto out_drop;
+               }
+       }
+       iput(inode);
+       if (inode != dentry->d_inode)
+               goto out_drop;
+
+       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+       ret = nfs_intent_set_file(nd, ctx);
+       if (ret >= 0)
+               ret = 1;
 out:
        dput(parent);
-       if (!ret)
-               d_drop(dentry);
        return ret;
+out_drop:
+       d_drop(dentry);
+       ret = 0;
+out_put_ctx:
+       put_nfs_open_context(ctx);
+       goto out;
+
 no_open_dput:
        dput(parent);
 no_open:
        return nfs_lookup_revalidate(dentry, nd);
 }
-#endif /* CONFIG_NFSV4 */
 
-static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
+static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
+               struct nameidata *nd)
 {
-       struct dentry *parent = desc->file->f_path.dentry;
-       struct inode *dir = parent->d_inode;
-       struct nfs_entry *entry = desc->entry;
-       struct dentry *dentry, *alias;
-       struct qstr name = {
-               .name = entry->name,
-               .len = entry->len,
-       };
-       struct inode *inode;
-       unsigned long verf = nfs_save_change_attribute(dir);
+       struct nfs_open_context *ctx = NULL;
+       struct iattr attr;
+       int error;
+       int open_flags = 0;
 
-       switch (name.len) {
-               case 2:
-                       if (name.name[0] == '.' && name.name[1] == '.')
-                               return dget_parent(parent);
-                       break;
-               case 1:
-                       if (name.name[0] == '.')
-                               return dget(parent);
-       }
+       dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
+                       dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
 
-       spin_lock(&dir->i_lock);
-       if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) {
-               spin_unlock(&dir->i_lock);
-               return NULL;
-       }
-       spin_unlock(&dir->i_lock);
+       attr.ia_mode = mode;
+       attr.ia_valid = ATTR_MODE;
 
-       name.hash = full_name_hash(name.name, name.len);
-       dentry = d_lookup(parent, &name);
-       if (dentry != NULL) {
-               /* Is this a positive dentry that matches the readdir info? */
-               if (dentry->d_inode != NULL &&
-                               (NFS_FILEID(dentry->d_inode) == entry->ino ||
-                               d_mountpoint(dentry))) {
-                       if (!desc->plus || entry->fh->size == 0)
-                               return dentry;
-                       if (nfs_compare_fh(NFS_FH(dentry->d_inode),
-                                               entry->fh) == 0)
-                               goto out_renew;
-               }
-               /* No, so d_drop to allow one to be created */
-               d_drop(dentry);
-               dput(dentry);
-       }
-       if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
-               return NULL;
-       if (name.len > NFS_SERVER(dir)->namelen)
-               return NULL;
-       /* Note: caller is already holding the dir->i_mutex! */
-       dentry = d_alloc(parent, &name);
-       if (dentry == NULL)
-               return NULL;
-       dentry->d_op = NFS_PROTO(dir)->dentry_ops;
-       inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
-       if (IS_ERR(inode)) {
-               dput(dentry);
-               return NULL;
-       }
+       if ((nd->flags & LOOKUP_CREATE) != 0) {
+               open_flags = nd->intent.open.flags;
 
-       alias = d_materialise_unique(dentry, inode);
-       if (alias != NULL) {
-               dput(dentry);
-               if (IS_ERR(alias))
-                       return NULL;
-               dentry = alias;
+               ctx = nameidata_to_nfs_open_context(dentry, nd);
+               error = PTR_ERR(ctx);
+               if (IS_ERR(ctx))
+                       goto out_err_drop;
        }
 
-out_renew:
-       nfs_set_verifier(dentry, verf);
-       return dentry;
+       error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
+       if (error != 0)
+               goto out_put_ctx;
+       if (ctx != NULL) {
+               error = nfs_intent_set_file(nd, ctx);
+               if (error < 0)
+                       goto out_err;
+       }
+       return 0;
+out_put_ctx:
+       if (ctx != NULL)
+               put_nfs_open_context(ctx);
+out_err_drop:
+       d_drop(dentry);
+out_err:
+       return error;
 }
 
+#endif /* CONFIG_NFSV4 */
+
 /*
  * Code common to create, mkdir, and mknod.
  */
@@ -1258,7 +1553,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 {
        struct iattr attr;
        int error;
-       int open_flags = 0;
 
        dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
                        dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1266,10 +1560,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
        attr.ia_mode = mode;
        attr.ia_valid = ATTR_MODE;
 
-       if ((nd->flags & LOOKUP_CREATE) != 0)
-               open_flags = nd->intent.open.flags;
-
-       error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
+       error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL);
        if (error != 0)
                goto out_err;
        return 0;
@@ -1351,76 +1642,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
        return error;
 }
 
-static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
-{
-       static unsigned int sillycounter;
-       const int      fileidsize  = sizeof(NFS_FILEID(dentry->d_inode))*2;
-       const int      countersize = sizeof(sillycounter)*2;
-       const int      slen        = sizeof(".nfs")+fileidsize+countersize-1;
-       char           silly[slen+1];
-       struct qstr    qsilly;
-       struct dentry *sdentry;
-       int            error = -EIO;
-
-       dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
-               dentry->d_parent->d_name.name, dentry->d_name.name, 
-               atomic_read(&dentry->d_count));
-       nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
-
-       /*
-        * We don't allow a dentry to be silly-renamed twice.
-        */
-       error = -EBUSY;
-       if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
-               goto out;
-
-       sprintf(silly, ".nfs%*.*Lx",
-               fileidsize, fileidsize,
-               (unsigned long long)NFS_FILEID(dentry->d_inode));
-
-       /* Return delegation in anticipation of the rename */
-       nfs_inode_return_delegation(dentry->d_inode);
-
-       sdentry = NULL;
-       do {
-               char *suffix = silly + slen - countersize;
-
-               dput(sdentry);
-               sillycounter++;
-               sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
-
-               dfprintk(VFS, "NFS: trying to rename %s to %s\n",
-                               dentry->d_name.name, silly);
-               
-               sdentry = lookup_one_len(silly, dentry->d_parent, slen);
-               /*
-                * N.B. Better to return EBUSY here ... it could be
-                * dangerous to delete the file while it's in use.
-                */
-               if (IS_ERR(sdentry))
-                       goto out;
-       } while(sdentry->d_inode != NULL); /* need negative lookup */
-
-       qsilly.name = silly;
-       qsilly.len  = strlen(silly);
-       if (dentry->d_inode) {
-               error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
-                               dir, &qsilly);
-               nfs_mark_for_revalidate(dentry->d_inode);
-       } else
-               error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
-                               dir, &qsilly);
-       if (!error) {
-               nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-               d_move(dentry, sdentry);
-               error = nfs_async_unlink(dir, dentry);
-               /* If we return 0 we don't unlink */
-       }
-       dput(sdentry);
-out:
-       return error;
-}
-
 /*
  * Remove a file after making sure there are no pending writes,
  * and after checking that the file has only one user. 
@@ -1711,14 +1932,14 @@ static void nfs_access_free_list(struct list_head *head)
 int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
        LIST_HEAD(head);
-       struct nfs_inode *nfsi;
+       struct nfs_inode *nfsi, *next;
        struct nfs_access_entry *cache;
 
        if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
                return (nr_to_scan == 0) ? 0 : -1;
 
        spin_lock(&nfs_access_lru_lock);
-       list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+       list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
                struct inode *inode;
 
                if (nr_to_scan-- == 0)
index 05bf3c0..e18c31e 100644 (file)
@@ -551,7 +551,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct file *filp = vma->vm_file;
        struct dentry *dentry = filp->f_path.dentry;
        unsigned pagelen;
-       int ret = -EINVAL;
+       int ret = VM_FAULT_NOPAGE;
        struct address_space *mapping;
 
        dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
@@ -567,21 +567,20 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
        if (mapping != dentry->d_inode->i_mapping)
                goto out_unlock;
 
-       ret = 0;
        pagelen = nfs_page_length(page);
        if (pagelen == 0)
                goto out_unlock;
 
-       ret = nfs_flush_incompatible(filp, page);
-       if (ret != 0)
-               goto out_unlock;
+       ret = VM_FAULT_LOCKED;
+       if (nfs_flush_incompatible(filp, page) == 0 &&
+           nfs_updatepage(filp, page, 0, pagelen) == 0)
+               goto out;
 
-       ret = nfs_updatepage(filp, page, 0, pagelen);
+       ret = VM_FAULT_SIGBUS;
 out_unlock:
-       if (!ret)
-               return VM_FAULT_LOCKED;
        unlock_page(page);
-       return VM_FAULT_SIGBUS;
+out:
+       return ret;
 }
 
 static const struct vm_operations_struct nfs_file_vm_ops = {
@@ -684,7 +683,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
        return ret;
 }
 
-static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
+static int
+do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
        struct inode *inode = filp->f_mapping->host;
        int status = 0;
@@ -699,7 +699,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
        if (nfs_have_delegation(inode, FMODE_READ))
                goto out_noconflict;
 
-       if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+       if (is_local)
                goto out_noconflict;
 
        status = NFS_PROTO(inode)->lock(filp, cmd, fl);
@@ -726,7 +726,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
        return res;
 }
 
-static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
+static int
+do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
        struct inode *inode = filp->f_mapping->host;
        int status;
@@ -741,15 +742,24 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
         *      If we're signalled while cleaning up locks on process exit, we
         *      still need to complete the unlock.
         */
-       /* Use local locking if mounted with "-onolock" */
-       if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+       /*
+        * Use local locking if mounted with "-onolock" or with appropriate
+        * "-olocal_lock="
+        */
+       if (!is_local)
                status = NFS_PROTO(inode)->lock(filp, cmd, fl);
        else
                status = do_vfs_lock(filp, fl);
        return status;
 }
 
-static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
+static int
+is_time_granular(struct timespec *ts) {
+       return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
+}
+
+static int
+do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
        struct inode *inode = filp->f_mapping->host;
        int status;
@@ -762,20 +772,31 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
        if (status != 0)
                goto out;
 
-       /* Use local locking if mounted with "-onolock" */
-       if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+       /*
+        * Use local locking if mounted with "-onolock" or with appropriate
+        * "-olocal_lock="
+        */
+       if (!is_local)
                status = NFS_PROTO(inode)->lock(filp, cmd, fl);
        else
                status = do_vfs_lock(filp, fl);
        if (status < 0)
                goto out;
+
        /*
-        * Make sure we clear the cache whenever we try to get the lock.
+        * Revalidate the cache if the server has time stamps granular
+        * enough to detect subsecond changes.  Otherwise, clear the
+        * cache to prevent missing any changes.
+        *
         * This makes locking act as a cache coherency point.
         */
        nfs_sync_mapping(filp->f_mapping);
-       if (!nfs_have_delegation(inode, FMODE_READ))
-               nfs_zap_caches(inode);
+       if (!nfs_have_delegation(inode, FMODE_READ)) {
+               if (is_time_granular(&NFS_SERVER(inode)->time_delta))
+                       __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+               else
+                       nfs_zap_caches(inode);
+       }
 out:
        return status;
 }
@@ -787,6 +808,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
        struct inode *inode = filp->f_mapping->host;
        int ret = -ENOLCK;
+       int is_local = 0;
 
        dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
                        filp->f_path.dentry->d_parent->d_name.name,
@@ -800,6 +822,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
        if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
                goto out_err;
 
+       if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
+               is_local = 1;
+
        if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
                ret = NFS_PROTO(inode)->lock_check_bounds(fl);
                if (ret < 0)
@@ -807,11 +832,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
        }
 
        if (IS_GETLK(cmd))
-               ret = do_getlk(filp, cmd, fl);
+               ret = do_getlk(filp, cmd, fl, is_local);
        else if (fl->fl_type == F_UNLCK)
-               ret = do_unlk(filp, cmd, fl);
+               ret = do_unlk(filp, cmd, fl, is_local);
        else
-               ret = do_setlk(filp, cmd, fl);
+               ret = do_setlk(filp, cmd, fl, is_local);
 out_err:
        return ret;
 }
@@ -821,6 +846,9 @@ out_err:
  */
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 {
+       struct inode *inode = filp->f_mapping->host;
+       int is_local = 0;
+
        dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
                        filp->f_path.dentry->d_parent->d_name.name,
                        filp->f_path.dentry->d_name.name,
@@ -829,14 +857,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
        if (!(fl->fl_flags & FL_FLOCK))
                return -ENOLCK;
 
+       if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
+               is_local = 1;
+
        /* We're simulating flock() locks using posix locks on the server */
        fl->fl_owner = (fl_owner_t)filp;
        fl->fl_start = 0;
        fl->fl_end = OFFSET_MAX;
 
        if (fl->fl_type == F_UNLCK)
-               return do_unlk(filp, cmd, fl);
-       return do_setlk(filp, cmd, fl);
+               return do_unlk(filp, cmd, fl, is_local);
+       return do_setlk(filp, cmd, fl, is_local);
 }
 
 /*
index 21a84d4..dec47ed 100644 (file)
  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
+
+#include <linux/slab.h>
+#include <linux/cred.h>
+#include <linux/nfs_idmap.h>
+#include <linux/keyctl.h>
+#include <linux/key-type.h>
+#include <linux/rcupdate.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+
+#include <keys/user-type.h>
+
+#define NFS_UINT_MAXLEN 11
+
+const struct cred *id_resolver_cache;
+
+struct key_type key_type_id_resolver = {
+       .name           = "id_resolver",
+       .instantiate    = user_instantiate,
+       .match          = user_match,
+       .revoke         = user_revoke,
+       .destroy        = user_destroy,
+       .describe       = user_describe,
+       .read           = user_read,
+};
+
+int nfs_idmap_init(void)
+{
+       struct cred *cred;
+       struct key *keyring;
+       int ret = 0;
+
+       printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name);
+
+       cred = prepare_kernel_cred(NULL);
+       if (!cred)
+               return -ENOMEM;
+
+       keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred,
+                            (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+                            KEY_USR_VIEW | KEY_USR_READ,
+                            KEY_ALLOC_NOT_IN_QUOTA);
+       if (IS_ERR(keyring)) {
+               ret = PTR_ERR(keyring);
+               goto failed_put_cred;
+       }
+
+       ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
+       if (ret < 0)
+               goto failed_put_key;
+
+       ret = register_key_type(&key_type_id_resolver);
+       if (ret < 0)
+               goto failed_put_key;
+
+       cred->thread_keyring = keyring;
+       cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
+       id_resolver_cache = cred;
+       return 0;
+
+failed_put_key:
+       key_put(keyring);
+failed_put_cred:
+       put_cred(cred);
+       return ret;
+}
+
+void nfs_idmap_quit(void)
+{
+       key_revoke(id_resolver_cache->thread_keyring);
+       unregister_key_type(&key_type_id_resolver);
+       put_cred(id_resolver_cache);
+}
+
+/*
+ * Assemble the description to pass to request_key()
+ * This function will allocate a new string and update dest to point
+ * at it.  The caller is responsible for freeing dest.
+ *
+ * On error 0 is returned.  Otherwise, the length of dest is returned.
+ */
+static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
+                               const char *type, size_t typelen, char **desc)
+{
+       char *cp;
+       size_t desclen = typelen + namelen + 2;
+
+       *desc = kmalloc(desclen, GFP_KERNEL);
+       if (!desc)
+               return -ENOMEM;
+
+       cp = *desc;
+       memcpy(cp, type, typelen);
+       cp += typelen;
+       *cp++ = ':';
+
+       memcpy(cp, name, namelen);
+       cp += namelen;
+       *cp = '\0';
+       return desclen;
+}
+
+static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
+               const char *type, void *data, size_t data_size)
+{
+       const struct cred *saved_cred;
+       struct key *rkey;
+       char *desc;
+       struct user_key_payload *payload;
+       ssize_t ret;
+
+       ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
+       if (ret <= 0)
+               goto out;
+
+       saved_cred = override_creds(id_resolver_cache);
+       rkey = request_key(&key_type_id_resolver, desc, "");
+       revert_creds(saved_cred);
+       kfree(desc);
+       if (IS_ERR(rkey)) {
+               ret = PTR_ERR(rkey);
+               goto out;
+       }
+
+       rcu_read_lock();
+       rkey->perm |= KEY_USR_VIEW;
+
+       ret = key_validate(rkey);
+       if (ret < 0)
+               goto out_up;
+
+       payload = rcu_dereference(rkey->payload.data);
+       if (IS_ERR_OR_NULL(payload)) {
+               ret = PTR_ERR(payload);
+               goto out_up;
+       }
+
+       ret = payload->datalen;
+       if (ret > 0 && ret <= data_size)
+               memcpy(data, payload->data, ret);
+       else
+               ret = -EINVAL;
+
+out_up:
+       rcu_read_unlock();
+       key_put(rkey);
+out:
+       return ret;
+}
+
+
+/* ID -> Name */
+static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       int id_len;
+       ssize_t ret;
+
+       id_len = snprintf(id_str, sizeof(id_str), "%u", id);
+       ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen);
+       if (ret < 0)
+               return -EINVAL;
+       return ret;
+}
+
+/* Name -> ID */
+static int nfs_idmap_lookup_id(const char *name, size_t namelen,
+                               const char *type, __u32 *id)
+{
+       char id_str[NFS_UINT_MAXLEN];
+       long id_long;
+       ssize_t data_size;
+       int ret = 0;
+
+       data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN);
+       if (data_size <= 0) {
+               ret = -EINVAL;
+       } else {
+               ret = strict_strtol(id_str, 10, &id_long);
+               *id = (__u32)id_long;
+       }
+       return ret;
+}
+
+int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
+{
+       return nfs_idmap_lookup_id(name, namelen, "uid", uid);
+}
+
+int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid)
+{
+       return nfs_idmap_lookup_id(name, namelen, "gid", gid);
+}
+
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
+{
+       return nfs_idmap_lookup_name(uid, "user", buf, buflen);
+}
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen)
+{
+       return nfs_idmap_lookup_name(gid, "group", buf, buflen);
+}
+
+#else  /* CONFIG_NFS_USE_IDMAPPER not defined */
+
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/init.h>
@@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele
        return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
 }
 
-int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf)
+int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
 {
        struct idmap *idmap = clp->cl_idmap;
 
        return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
 }
-int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf)
+int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
 {
        struct idmap *idmap = clp->cl_idmap;
 
        return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
 }
 
+#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
index 7d2d6c7..6eec286 100644 (file)
@@ -234,9 +234,6 @@ nfs_init_locked(struct inode *inode, void *opaque)
        return 0;
 }
 
-/* Don't use READDIRPLUS on directories that we believe are too large */
-#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
-
 /*
  * This is our front-end to iget that looks up inodes by file handle
  * instead of inode number.
@@ -291,8 +288,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                } else if (S_ISDIR(inode->i_mode)) {
                        inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
                        inode->i_fop = &nfs_dir_operations;
-                       if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
-                           && fattr->size <= NFS_LIMIT_READDIRPLUS)
+                       if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
                                set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
                        /* Deal with crossing mountpoints */
                        if ((fattr->valid & NFS_ATTR_FATTR_FSID)
@@ -623,7 +619,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
        nfs_revalidate_inode(server, inode);
 }
 
-static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred)
+struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode)
 {
        struct nfs_open_context *ctx;
 
@@ -633,11 +629,13 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
                path_get(&ctx->path);
                ctx->cred = get_rpccred(cred);
                ctx->state = NULL;
+               ctx->mode = f_mode;
                ctx->flags = 0;
                ctx->error = 0;
                ctx->dir_cookie = 0;
                nfs_init_lock_context(&ctx->lock_context);
                ctx->lock_context.open_context = ctx;
+               INIT_LIST_HEAD(&ctx->list);
        }
        return ctx;
 }
@@ -653,11 +651,15 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 {
        struct inode *inode = ctx->path.dentry->d_inode;
 
-       if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+       if (!list_empty(&ctx->list)) {
+               if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
+                       return;
+               list_del(&ctx->list);
+               spin_unlock(&inode->i_lock);
+       } else if (!atomic_dec_and_test(&ctx->lock_context.count))
                return;
-       list_del(&ctx->list);
-       spin_unlock(&inode->i_lock);
-       NFS_PROTO(inode)->close_context(ctx, is_sync);
+       if (inode != NULL)
+               NFS_PROTO(inode)->close_context(ctx, is_sync);
        if (ctx->cred != NULL)
                put_rpccred(ctx->cred);
        path_put(&ctx->path);
@@ -673,7 +675,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
  * Ensure that mmap has a recent RPC credential for use when writing out
  * shared pages
  */
-static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
+void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
 {
        struct inode *inode = filp->f_path.dentry->d_inode;
        struct nfs_inode *nfsi = NFS_I(inode);
@@ -730,11 +732,10 @@ int nfs_open(struct inode *inode, struct file *filp)
        cred = rpc_lookup_cred();
        if (IS_ERR(cred))
                return PTR_ERR(cred);
-       ctx = alloc_nfs_open_context(&filp->f_path, cred);
+       ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode);
        put_rpccred(cred);
        if (ctx == NULL)
                return -ENOMEM;
-       ctx->mode = filp->f_mode;
        nfs_file_set_open_context(filp, ctx);
        put_nfs_open_context(ctx);
        nfs_fscache_set_inode_cookie(inode, filp);
@@ -1493,7 +1494,7 @@ static int nfsiod_start(void)
 {
        struct workqueue_struct *wq;
        dprintk("RPC:       creating workqueue nfsiod\n");
-       wq = create_singlethread_workqueue("nfsiod");
+       wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
        if (wq == NULL)
                return -ENOMEM;
        nfsiod_workqueue = wq;
@@ -1521,6 +1522,10 @@ static int __init init_nfs_fs(void)
 {
        int err;
 
+       err = nfs_idmap_init();
+       if (err < 0)
+               goto out9;
+
        err = nfs_dns_resolver_init();
        if (err < 0)
                goto out8;
@@ -1585,6 +1590,8 @@ out6:
 out7:
        nfs_dns_resolver_destroy();
 out8:
+       nfs_idmap_quit();
+out9:
        return err;
 }
 
@@ -1597,6 +1604,7 @@ static void __exit exit_nfs_fs(void)
        nfs_destroy_nfspagecache();
        nfs_fscache_unregister();
        nfs_dns_resolver_destroy();
+       nfs_idmap_quit();
 #ifdef CONFIG_PROC_FS
        rpc_proc_unregister("nfs");
 #endif
index c961bc9..db08ff3 100644 (file)
@@ -62,6 +62,12 @@ struct nfs_clone_mount {
  */
 #define NFS_UNSPEC_PORT                (-1)
 
+/*
+ * Maximum number of pages that readdir can use for creating
+ * a vmapped array of pages.
+ */
+#define NFS_MAX_READDIR_PAGES 8
+
 /*
  * In-kernel mount arguments
  */
@@ -181,15 +187,15 @@ extern void nfs_destroy_directcache(void);
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
-extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int);
+extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 
 /* nfs3xdr.c */
 extern struct rpc_procinfo nfs3_procedures[];
-extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int);
+extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
-extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 #endif
 #ifdef CONFIG_NFS_V4_1
 extern const u32 nfs41_maxread_overhead;
index 59047f8..d610203 100644 (file)
@@ -436,7 +436,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
 
        for (i = 0; i < entries; i++) {
                flavors[i] = ntohl(*p++);
-               dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]);
+               dprintk("NFS:   auth flavor[%u]: %d\n", i, flavors[i]);
        }
        *count = i;
 
index db8846a..e6bf457 100644 (file)
@@ -337,10 +337,10 @@ nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
 static int
 nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
 {
-       p = xdr_encode_fhandle(p, args->fromfh);
-       p = xdr_encode_array(p, args->fromname, args->fromlen);
-       p = xdr_encode_fhandle(p, args->tofh);
-       p = xdr_encode_array(p, args->toname, args->tolen);
+       p = xdr_encode_fhandle(p, args->old_dir);
+       p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
+       p = xdr_encode_fhandle(p, args->new_dir);
+       p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
        req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
        return 0;
 }
@@ -423,9 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
        struct page **page;
        size_t hdrlen;
        unsigned int pglen, recvd;
-       u32 len;
        int status, nr = 0;
-       __be32 *end, *entry, *kaddr;
 
        if ((status = ntohl(*p++)))
                return nfs_stat_to_errno(status);
@@ -445,80 +443,59 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
        if (pglen > recvd)
                pglen = recvd;
        page = rcvbuf->pages;
-       kaddr = p = kmap_atomic(*page, KM_USER0);
-       end = (__be32 *)((char *)p + pglen);
-       entry = p;
-
-       /* Make sure the packet actually has a value_follows and EOF entry */
-       if ((entry + 1) > end)
-               goto short_pkt;
-
-       for (; *p++; nr++) {
-               if (p + 2 > end)
-                       goto short_pkt;
-               p++; /* fileid */
-               len = ntohl(*p++);
-               p += XDR_QUADLEN(len) + 1;      /* name plus cookie */
-               if (len > NFS2_MAXNAMLEN) {
-                       dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
-                                               len);
-                       goto err_unmap;
-               }
-               if (p + 2 > end)
-                       goto short_pkt;
-               entry = p;
-       }
-
-       /*
-        * Apparently some server sends responses that are a valid size, but
-        * contain no entries, and have value_follows==0 and EOF==0. For
-        * those, just set the EOF marker.
-        */
-       if (!nr && entry[1] == 0) {
-               dprintk("NFS: readdir reply truncated!\n");
-               entry[1] = 1;
-       }
- out:
-       kunmap_atomic(kaddr, KM_USER0);
        return nr;
- short_pkt:
-       /*
-        * When we get a short packet there are 2 possibilities. We can
-        * return an error, or fix up the response to look like a valid
-        * response and return what we have so far. If there are no
-        * entries and the packet was short, then return -EIO. If there
-        * are valid entries in the response, return them and pretend that
-        * the call was successful, but incomplete. The caller can retry the
-        * readdir starting at the last cookie.
-        */
-       entry[0] = entry[1] = 0;
-       if (!nr)
-               nr = -errno_NFSERR_IO;
-       goto out;
-err_unmap:
-       nr = -errno_NFSERR_IO;
-       goto out;
+}
+
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+       dprintk("nfs: %s: prematurely hit end of receive buffer. "
+               "Remaining buffer length is %tu words.\n",
+               func, xdr->end - xdr->p);
 }
 
 __be32 *
-nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
 {
-       if (!*p++) {
-               if (!*p)
+       __be32 *p;
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       if (!ntohl(*p++)) {
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(!p))
+                       goto out_overflow;
+               if (!ntohl(*p++))
                        return ERR_PTR(-EAGAIN);
                entry->eof = 1;
                return ERR_PTR(-EBADCOOKIE);
        }
 
+       p = xdr_inline_decode(xdr, 8);
+       if (unlikely(!p))
+               goto out_overflow;
+
        entry->ino        = ntohl(*p++);
        entry->len        = ntohl(*p++);
+
+       p = xdr_inline_decode(xdr, entry->len + 4);
+       if (unlikely(!p))
+               goto out_overflow;
        entry->name       = (const char *) p;
        p                += XDR_QUADLEN(entry->len);
        entry->prev_cookie        = entry->cookie;
        entry->cookie     = ntohl(*p++);
-       entry->eof        = !p[0] && p[1];
+
+       p = xdr_inline_peek(xdr, 8);
+       if (p != NULL)
+               entry->eof = !p[0] && p[1];
+       else
+               entry->eof = 0;
 
        return p;
+
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return ERR_PTR(-EIO);
 }
 
 /*
@@ -596,7 +573,6 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
        struct kvec *iov = rcvbuf->head;
        size_t hdrlen;
        u32 len, recvd;
-       char    *kaddr;
        int     status;
 
        if ((status = ntohl(*p++)))
@@ -623,10 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
                return -EIO;
        }
 
-       /* NULL terminate the string we got */
-       kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
-       kaddr[len+rcvbuf->page_base] = '\0';
-       kunmap_atomic(kaddr, KM_USER0);
+       xdr_terminate_string(rcvbuf, len);
        return 0;
 }
 
index fabb4f2..ce939c0 100644 (file)
@@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
  */
 static int
 nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                int flags, struct nameidata *nd)
+                int flags, struct nfs_open_context *ctx)
 {
        struct nfs3_createdata *data;
        mode_t mode = sattr->ia_mode;
@@ -438,19 +438,38 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
        return 1;
 }
 
+static void
+nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+{
+       msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
+}
+
+static int
+nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
+                     struct inode *new_dir)
+{
+       struct nfs_renameres *res;
+
+       if (nfs3_async_handle_jukebox(task, old_dir))
+               return 0;
+       res = task->tk_msg.rpc_resp;
+
+       nfs_post_op_update_inode(old_dir, res->old_fattr);
+       nfs_post_op_update_inode(new_dir, res->new_fattr);
+       return 1;
+}
+
 static int
 nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
                 struct inode *new_dir, struct qstr *new_name)
 {
-       struct nfs3_renameargs  arg = {
-               .fromfh         = NFS_FH(old_dir),
-               .fromname       = old_name->name,
-               .fromlen        = old_name->len,
-               .tofh           = NFS_FH(new_dir),
-               .toname         = new_name->name,
-               .tolen          = new_name->len
+       struct nfs_renameargs   arg = {
+               .old_dir        = NFS_FH(old_dir),
+               .old_name       = old_name,
+               .new_dir        = NFS_FH(new_dir),
+               .new_name       = new_name,
        };
-       struct nfs3_renameres res;
+       struct nfs_renameres res;
        struct rpc_message msg = {
                .rpc_proc       = &nfs3_procedures[NFS3PROC_RENAME],
                .rpc_argp       = &arg,
@@ -460,17 +479,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
 
        dprintk("NFS call  rename %s -> %s\n", old_name->name, new_name->name);
 
-       res.fromattr = nfs_alloc_fattr();
-       res.toattr = nfs_alloc_fattr();
-       if (res.fromattr == NULL || res.toattr == NULL)
+       res.old_fattr = nfs_alloc_fattr();
+       res.new_fattr = nfs_alloc_fattr();
+       if (res.old_fattr == NULL || res.new_fattr == NULL)
                goto out;
 
        status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
-       nfs_post_op_update_inode(old_dir, res.fromattr);
-       nfs_post_op_update_inode(new_dir, res.toattr);
+       nfs_post_op_update_inode(old_dir, res.old_fattr);
+       nfs_post_op_update_inode(new_dir, res.new_fattr);
 out:
-       nfs_free_fattr(res.toattr);
-       nfs_free_fattr(res.fromattr);
+       nfs_free_fattr(res.old_fattr);
+       nfs_free_fattr(res.new_fattr);
        dprintk("NFS reply rename: %d\n", status);
        return status;
 }
@@ -611,7 +630,7 @@ out:
  */
 static int
 nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-                 u64 cookie, struct page *page, unsigned int count, int plus)
+                 u64 cookie, struct page **pages, unsigned int count, int plus)
 {
        struct inode            *dir = dentry->d_inode;
        __be32                  *verf = NFS_COOKIEVERF(dir);
@@ -621,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                .verf           = {verf[0], verf[1]},
                .plus           = plus,
                .count          = count,
-               .pages          = &page
+               .pages          = pages
        };
        struct nfs3_readdirres  res = {
                .verf           = verf,
@@ -652,7 +671,8 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 
        nfs_free_fattr(res.dir_attr);
 out:
-       dprintk("NFS reply readdir: %d\n", status);
+       dprintk("NFS reply readdir%s: %d\n",
+                       plus? "plus" : "", status);
        return status;
 }
 
@@ -722,7 +742,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
        dprintk("NFS call  fsstat\n");
        nfs_fattr_init(stat->fattr);
        status = rpc_call_sync(server->client, &msg, 0);
-       dprintk("NFS reply statfs: %d\n", status);
+       dprintk("NFS reply fsstat: %d\n", status);
        return status;
 }
 
@@ -844,6 +864,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .unlink_setup   = nfs3_proc_unlink_setup,
        .unlink_done    = nfs3_proc_unlink_done,
        .rename         = nfs3_proc_rename,
+       .rename_setup   = nfs3_proc_rename_setup,
+       .rename_done    = nfs3_proc_rename_done,
        .link           = nfs3_proc_link,
        .symlink        = nfs3_proc_symlink,
        .mkdir          = nfs3_proc_mkdir,
index 9769704..d9a5e83 100644 (file)
@@ -100,6 +100,13 @@ static const umode_t nfs_type2fmt[] = {
        [NF3FIFO] = S_IFIFO,
 };
 
+static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
+{
+       dprintk("nfs: %s: prematurely hit end of receive buffer. "
+               "Remaining buffer length is %tu words.\n",
+               func, xdr->end - xdr->p);
+}
+
 /*
  * Common NFS XDR functions as inlines
  */
@@ -119,6 +126,29 @@ xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
        return NULL;
 }
 
+static inline __be32 *
+xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh)
+{
+       __be32 *p;
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       fh->size = ntohl(*p++);
+
+       if (fh->size <= NFS3_FHSIZE) {
+               p = xdr_inline_decode(xdr, fh->size);
+               if (unlikely(!p))
+                       goto out_overflow;
+               memcpy(fh->data, p, fh->size);
+               return p + XDR_QUADLEN(fh->size);
+       }
+       return NULL;
+
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return ERR_PTR(-EIO);
+}
+
 /*
  * Encode/decode time.
  */
@@ -240,6 +270,26 @@ xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
        return p;
 }
 
+static inline __be32 *
+xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr)
+{
+       __be32 *p;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       if (ntohl(*p++)) {
+               p = xdr_inline_decode(xdr, 84);
+               if (unlikely(!p))
+                       goto out_overflow;
+               p = xdr_decode_fattr(p, fattr);
+       }
+       return p;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return ERR_PTR(-EIO);
+}
+
 static inline __be32 *
 xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
 {
@@ -442,12 +492,12 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
  * Encode RENAME arguments
  */
 static int
-nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args)
+nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
 {
-       p = xdr_encode_fhandle(p, args->fromfh);
-       p = xdr_encode_array(p, args->fromname, args->fromlen);
-       p = xdr_encode_fhandle(p, args->tofh);
-       p = xdr_encode_array(p, args->toname, args->tolen);
+       p = xdr_encode_fhandle(p, args->old_dir);
+       p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
+       p = xdr_encode_fhandle(p, args->new_dir);
+       p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
        req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
        return 0;
 }
@@ -504,9 +554,8 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
        struct kvec *iov = rcvbuf->head;
        struct page **page;
        size_t hdrlen;
-       u32 len, recvd, pglen;
+       u32 recvd, pglen;
        int status, nr = 0;
-       __be32 *entry, *end, *kaddr;
 
        status = ntohl(*p++);
        /* Decode post_op_attrs */
@@ -536,99 +585,38 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
        if (pglen > recvd)
                pglen = recvd;
        page = rcvbuf->pages;
-       kaddr = p = kmap_atomic(*page, KM_USER0);
-       end = (__be32 *)((char *)p + pglen);
-       entry = p;
-
-       /* Make sure the packet actually has a value_follows and EOF entry */
-       if ((entry + 1) > end)
-               goto short_pkt;
-
-       for (; *p++; nr++) {
-               if (p + 3 > end)
-                       goto short_pkt;
-               p += 2;                         /* inode # */
-               len = ntohl(*p++);              /* string length */
-               p += XDR_QUADLEN(len) + 2;      /* name + cookie */
-               if (len > NFS3_MAXNAMLEN) {
-                       dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
-                                               len);
-                       goto err_unmap;
-               }
 
-               if (res->plus) {
-                       /* post_op_attr */
-                       if (p + 2 > end)
-                               goto short_pkt;
-                       if (*p++) {
-                               p += 21;
-                               if (p + 1 > end)
-                                       goto short_pkt;
-                       }
-                       /* post_op_fh3 */
-                       if (*p++) {
-                               if (p + 1 > end)
-                                       goto short_pkt;
-                               len = ntohl(*p++);
-                               if (len > NFS3_FHSIZE) {
-                                       dprintk("NFS: giant filehandle in "
-                                               "readdir (len 0x%x)!\n", len);
-                                       goto err_unmap;
-                               }
-                               p += XDR_QUADLEN(len);
-                       }
-               }
-
-               if (p + 2 > end)
-                       goto short_pkt;
-               entry = p;
-       }
-
-       /*
-        * Apparently some server sends responses that are a valid size, but
-        * contain no entries, and have value_follows==0 and EOF==0. For
-        * those, just set the EOF marker.
-        */
-       if (!nr && entry[1] == 0) {
-               dprintk("NFS: readdir reply truncated!\n");
-               entry[1] = 1;
-       }
- out:
-       kunmap_atomic(kaddr, KM_USER0);
        return nr;
- short_pkt:
-       /*
-        * When we get a short packet there are 2 possibilities. We can
-        * return an error, or fix up the response to look like a valid
-        * response and return what we have so far. If there are no
-        * entries and the packet was short, then return -EIO. If there
-        * are valid entries in the response, return them and pretend that
-        * the call was successful, but incomplete. The caller can retry the
-        * readdir starting at the last cookie.
-        */
-       entry[0] = entry[1] = 0;
-       if (!nr)
-               nr = -errno_NFSERR_IO;
-       goto out;
-err_unmap:
-       nr = -errno_NFSERR_IO;
-       goto out;
 }
 
 __be32 *
-nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
 {
+       __be32 *p;
        struct nfs_entry old = *entry;
 
-       if (!*p++) {
-               if (!*p)
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       if (!ntohl(*p++)) {
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(!p))
+                       goto out_overflow;
+               if (!ntohl(*p++))
                        return ERR_PTR(-EAGAIN);
                entry->eof = 1;
                return ERR_PTR(-EBADCOOKIE);
        }
 
+       p = xdr_inline_decode(xdr, 12);
+       if (unlikely(!p))
+               goto out_overflow;
        p = xdr_decode_hyper(p, &entry->ino);
        entry->len  = ntohl(*p++);
+
+       p = xdr_inline_decode(xdr, entry->len + 8);
+       if (unlikely(!p))
+               goto out_overflow;
        entry->name = (const char *) p;
        p += XDR_QUADLEN(entry->len);
        entry->prev_cookie = entry->cookie;
@@ -636,10 +624,17 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 
        if (plus) {
                entry->fattr->valid = 0;
-               p = xdr_decode_post_op_attr(p, entry->fattr);
+               p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
+               if (IS_ERR(p))
+                       goto out_overflow_exit;
                /* In fact, a post_op_fh3: */
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(!p))
+                       goto out_overflow;
                if (*p++) {
-                       p = xdr_decode_fhandle(p, entry->fh);
+                       p = xdr_decode_fhandle_stream(xdr, entry->fh);
+                       if (IS_ERR(p))
+                               goto out_overflow_exit;
                        /* Ugh -- server reply was truncated */
                        if (p == NULL) {
                                dprintk("NFS: FH truncated\n");
@@ -650,8 +645,18 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
                        memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
        }
 
-       entry->eof = !p[0] && p[1];
+       p = xdr_inline_peek(xdr, 8);
+       if (p != NULL)
+               entry->eof = !p[0] && p[1];
+       else
+               entry->eof = 0;
+
        return p;
+
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+out_overflow_exit:
+       return ERR_PTR(-EIO);
 }
 
 /*
@@ -824,7 +829,6 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
        struct kvec *iov = rcvbuf->head;
        size_t hdrlen;
        u32 len, recvd;
-       char    *kaddr;
        int     status;
 
        status = ntohl(*p++);
@@ -857,10 +861,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
                return -EIO;
        }
 
-       /* NULL terminate the string we got */
-       kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
-       kaddr[len+rcvbuf->page_base] = '\0';
-       kunmap_atomic(kaddr, KM_USER0);
+       xdr_terminate_string(rcvbuf, len);
        return 0;
 }
 
@@ -970,14 +971,14 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
  * Decode RENAME reply
  */
 static int
-nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res)
+nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
 {
        int     status;
 
        if ((status = ntohl(*p++)) != 0)
                status = nfs_stat_to_errno(status);
-       p = xdr_decode_wcc_data(p, res->fromattr);
-       p = xdr_decode_wcc_data(p, res->toattr);
+       p = xdr_decode_wcc_data(p, res->old_fattr);
+       p = xdr_decode_wcc_data(p, res->new_fattr);
        return status;
 }
 
@@ -1043,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
        res->wtmult = ntohl(*p++);
        res->dtpref = ntohl(*p++);
        p = xdr_decode_hyper(p, &res->maxfilesize);
+       p = xdr_decode_time3(p, &res->time_delta);
 
-       /* ignore time_delta and properties */
+       /* ignore properties */
        res->lease_time = 0;
        return 0;
 }
index 311e15c..9fa4963 100644 (file)
@@ -242,8 +242,6 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
-extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
-extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
 extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
                struct nfs4_fs_locations *fs_locations, struct page *page);
@@ -333,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
-extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 extern struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
index 089da5b..e87fe61 100644 (file)
@@ -129,7 +129,7 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
                        | FATTR4_WORD0_MAXREAD
                        | FATTR4_WORD0_MAXWRITE
                        | FATTR4_WORD0_LEASE_TIME,
-                       0
+                       FATTR4_WORD1_TIME_DELTA
 };
 
 const u32 nfs4_fs_locations_bitmap[2] = {
@@ -255,9 +255,6 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
                        nfs4_state_mark_reclaim_nograce(clp, state);
                        goto do_state_recovery;
                case -NFS4ERR_STALE_STATEID:
-                       if (state == NULL)
-                               break;
-                       nfs4_state_mark_reclaim_reboot(clp, state);
                case -NFS4ERR_STALE_CLIENTID:
                case -NFS4ERR_EXPIRED:
                        goto do_state_recovery;
@@ -334,10 +331,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
  * Must be called while holding tbl->slot_tbl_lock
  */
 static void
-nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
+nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
 {
+       int free_slotid = free_slot - tbl->slots;
        int slotid = free_slotid;
 
+       BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
        /* clear used bit in bitmap */
        __clear_bit(slotid, tbl->used_slots);
 
@@ -379,7 +378,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
        struct nfs4_slot_table *tbl;
 
        tbl = &res->sr_session->fc_slot_table;
-       if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) {
+       if (!res->sr_slot) {
                /* just wake up the next guy waiting since
                 * we may have not consumed a slot after all */
                dprintk("%s: No slot\n", __func__);
@@ -387,17 +386,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
        }
 
        spin_lock(&tbl->slot_tbl_lock);
-       nfs4_free_slot(tbl, res->sr_slotid);
+       nfs4_free_slot(tbl, res->sr_slot);
        nfs41_check_drain_session_complete(res->sr_session);
        spin_unlock(&tbl->slot_tbl_lock);
-       res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+       res->sr_slot = NULL;
 }
 
 static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
 {
        unsigned long timestamp;
-       struct nfs4_slot_table *tbl;
-       struct nfs4_slot *slot;
        struct nfs_client *clp;
 
        /*
@@ -410,17 +407,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
                res->sr_status = NFS_OK;
 
        /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
-       if (res->sr_slotid == NFS4_MAX_SLOT_TABLE)
+       if (!res->sr_slot)
                goto out;
 
-       tbl = &res->sr_session->fc_slot_table;
-       slot = tbl->slots + res->sr_slotid;
-
        /* Check the SEQUENCE operation status */
        switch (res->sr_status) {
        case 0:
                /* Update the slot's sequence and clientid lease timer */
-               ++slot->seq_nr;
+               ++res->sr_slot->seq_nr;
                timestamp = res->sr_renewal_time;
                clp = res->sr_session->clp;
                do_renew_lease(clp, timestamp);
@@ -433,12 +427,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
                 * returned NFS4ERR_DELAY as per Section 2.10.6.2
                 * of RFC5661.
                 */
-               dprintk("%s: slot=%d seq=%d: Operation in progress\n",
-                               __func__, res->sr_slotid, slot->seq_nr);
+               dprintk("%s: slot=%ld seq=%d: Operation in progress\n",
+                       __func__,
+                       res->sr_slot - res->sr_session->fc_slot_table.slots,
+                       res->sr_slot->seq_nr);
                goto out_retry;
        default:
                /* Just update the slot sequence no. */
-               ++slot->seq_nr;
+               ++res->sr_slot->seq_nr;
        }
 out:
        /* The session may be reset by one of the error handlers. */
@@ -505,10 +501,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
 
        dprintk("--> %s\n", __func__);
        /* slot already allocated? */
-       if (res->sr_slotid != NFS4_MAX_SLOT_TABLE)
+       if (res->sr_slot != NULL)
                return 0;
 
-       res->sr_slotid = NFS4_MAX_SLOT_TABLE;
        tbl = &session->fc_slot_table;
 
        spin_lock(&tbl->slot_tbl_lock);
@@ -550,7 +545,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
        dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
 
        res->sr_session = session;
-       res->sr_slotid = slotid;
+       res->sr_slot = slot;
        res->sr_renewal_time = jiffies;
        res->sr_status_flags = 0;
        /*
@@ -576,8 +571,9 @@ int nfs4_setup_sequence(const struct nfs_server *server,
                goto out;
        }
 
-       dprintk("--> %s clp %p session %p sr_slotid %d\n",
-               __func__, session->clp, session, res->sr_slotid);
+       dprintk("--> %s clp %p session %p sr_slot %ld\n",
+               __func__, session->clp, session, res->sr_slot ?
+                       res->sr_slot - session->fc_slot_table.slots : -1);
 
        ret = nfs41_setup_sequence(session, args, res, cache_reply,
                                   task);
@@ -650,7 +646,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
                .callback_data = &data
        };
 
-       res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+       res->sr_slot = NULL;
        if (privileged)
                task_setup.callback_ops = &nfs41_call_priv_sync_ops;
        task = rpc_run_task(&task_setup);
@@ -735,7 +731,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
        p->o_res.server = p->o_arg.server;
        nfs_fattr_init(&p->f_attr);
        nfs_fattr_init(&p->dir_attr);
-       p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 }
 
 static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
@@ -1120,6 +1115,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
        clear_bit(NFS_DELEGATED_STATE, &state->flags);
        smp_rmb();
        if (state->n_rdwr != 0) {
+               clear_bit(NFS_O_RDWR_STATE, &state->flags);
                ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
                if (ret != 0)
                        return ret;
@@ -1127,6 +1123,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
                        return -ESTALE;
        }
        if (state->n_wronly != 0) {
+               clear_bit(NFS_O_WRONLY_STATE, &state->flags);
                ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
                if (ret != 0)
                        return ret;
@@ -1134,6 +1131,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
                        return -ESTALE;
        }
        if (state->n_rdonly != 0) {
+               clear_bit(NFS_O_RDONLY_STATE, &state->flags);
                ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
                if (ret != 0)
                        return ret;
@@ -1188,7 +1186,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
        int err;
        do {
                err = _nfs4_do_open_reclaim(ctx, state);
-               if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED)
+               if (err != -NFS4ERR_DELAY)
                        break;
                nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
@@ -1258,6 +1256,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_BAD_STATEID:
                                nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
+                       case -EKEYEXPIRED:
+                               /*
+                                * User RPCSEC_GSS context has expired.
+                                * We cannot recover this stateid now, so
+                                * skip it and allow recovery thread to
+                                * proceed.
+                                */
                        case -ENOMEM:
                                err = 0;
                                goto out;
@@ -1605,7 +1610,6 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
                        goto out;
                case -NFS4ERR_GRACE:
                case -NFS4ERR_DELAY:
-               case -EKEYEXPIRED:
                        nfs4_handle_exception(server, err, &exception);
                        err = 0;
                }
@@ -1975,7 +1979,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
        calldata->res.fattr = &calldata->fattr;
        calldata->res.seqid = calldata->arg.seqid;
        calldata->res.server = server;
-       calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
        path_get(path);
        calldata->path = *path;
 
@@ -1998,120 +2001,17 @@ out:
        return status;
 }
 
-static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode)
-{
-       struct file *filp;
-       int ret;
-
-       /* If the open_intent is for execute, we have an extra check to make */
-       if (fmode & FMODE_EXEC) {
-               ret = nfs_may_open(state->inode,
-                               state->owner->so_cred,
-                               nd->intent.open.flags);
-               if (ret < 0)
-                       goto out_close;
-       }
-       filp = lookup_instantiate_filp(nd, path->dentry, NULL);
-       if (!IS_ERR(filp)) {
-               struct nfs_open_context *ctx;
-               ctx = nfs_file_open_context(filp);
-               ctx->state = state;
-               return 0;
-       }
-       ret = PTR_ERR(filp);
-out_close:
-       nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
-       return ret;
-}
-
-struct dentry *
-nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+static struct inode *
+nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
 {
-       struct path path = {
-               .mnt = nd->path.mnt,
-               .dentry = dentry,
-       };
-       struct dentry *parent;
-       struct iattr attr;
-       struct rpc_cred *cred;
        struct nfs4_state *state;
-       struct dentry *res;
-       int open_flags = nd->intent.open.flags;
-       fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
-
-       if (nd->flags & LOOKUP_CREATE) {
-               attr.ia_mode = nd->intent.open.create_mode;
-               attr.ia_valid = ATTR_MODE;
-               if (!IS_POSIXACL(dir))
-                       attr.ia_mode &= ~current_umask();
-       } else {
-               open_flags &= ~O_EXCL;
-               attr.ia_valid = 0;
-               BUG_ON(open_flags & O_CREAT);
-       }
 
-       cred = rpc_lookup_cred();
-       if (IS_ERR(cred))
-               return (struct dentry *)cred;
-       parent = dentry->d_parent;
        /* Protect against concurrent sillydeletes */
-       nfs_block_sillyrename(parent);
-       state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred);
-       put_rpccred(cred);
-       if (IS_ERR(state)) {
-               if (PTR_ERR(state) == -ENOENT) {
-                       d_add(dentry, NULL);
-                       nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-               }
-               nfs_unblock_sillyrename(parent);
-               return (struct dentry *)state;
-       }
-       res = d_add_unique(dentry, igrab(state->inode));
-       if (res != NULL)
-               path.dentry = res;
-       nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
-       nfs_unblock_sillyrename(parent);
-       nfs4_intent_set_file(nd, &path, state, fmode);
-       return res;
-}
-
-int
-nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
-{
-       struct path path = {
-               .mnt = nd->path.mnt,
-               .dentry = dentry,
-       };
-       struct rpc_cred *cred;
-       struct nfs4_state *state;
-       fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
-
-       cred = rpc_lookup_cred();
-       if (IS_ERR(cred))
-               return PTR_ERR(cred);
-       state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
-       put_rpccred(cred);
-       if (IS_ERR(state)) {
-               switch (PTR_ERR(state)) {
-                       case -EPERM:
-                       case -EACCES:
-                       case -EDQUOT:
-                       case -ENOSPC:
-                       case -EROFS:
-                               return PTR_ERR(state);
-                       default:
-                               goto out_drop;
-               }
-       }
-       if (state->inode == dentry->d_inode) {
-               nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-               nfs4_intent_set_file(nd, &path, state, fmode);
-               return 1;
-       }
-       nfs4_close_sync(&path, state, fmode);
-out_drop:
-       d_drop(dentry);
-       return 0;
+       state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred);
+       if (IS_ERR(state))
+               return ERR_CAST(state);
+       ctx->state = state;
+       return igrab(state->inode);
 }
 
 static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
@@ -2568,36 +2468,34 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
 
 static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-                 int flags, struct nameidata *nd)
+                 int flags, struct nfs_open_context *ctx)
 {
-       struct path path = {
-               .mnt = nd->path.mnt,
+       struct path my_path = {
                .dentry = dentry,
        };
+       struct path *path = &my_path;
        struct nfs4_state *state;
-       struct rpc_cred *cred;
-       fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE);
+       struct rpc_cred *cred = NULL;
+       fmode_t fmode = 0;
        int status = 0;
 
-       cred = rpc_lookup_cred();
-       if (IS_ERR(cred)) {
-               status = PTR_ERR(cred);
-               goto out;
+       if (ctx != NULL) {
+               cred = ctx->cred;
+               path = &ctx->path;
+               fmode = ctx->mode;
        }
-       state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred);
+       state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
        d_drop(dentry);
        if (IS_ERR(state)) {
                status = PTR_ERR(state);
-               goto out_putcred;
+               goto out;
        }
        d_add(dentry, igrab(state->inode));
        nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
-       if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
-               status = nfs4_intent_set_file(nd, &path, state, fmode);
+       if (ctx != NULL)
+               ctx->state = state;
        else
-               nfs4_close_sync(&path, state, fmode);
-out_putcred:
-       put_rpccred(cred);
+               nfs4_close_sync(path, state, fmode);
 out:
        return status;
 }
@@ -2655,6 +2553,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
 
        args->bitmask = server->cache_consistency_bitmask;
        res->server = server;
+       res->seq_res.sr_slot = NULL;
        msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
 }
 
@@ -2671,18 +2570,46 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
        return 1;
 }
 
+static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+{
+       struct nfs_server *server = NFS_SERVER(dir);
+       struct nfs_renameargs *arg = msg->rpc_argp;
+       struct nfs_renameres *res = msg->rpc_resp;
+
+       msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
+       arg->bitmask = server->attr_bitmask;
+       res->server = server;
+}
+
+static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
+                                struct inode *new_dir)
+{
+       struct nfs_renameres *res = task->tk_msg.rpc_resp;
+
+       if (!nfs4_sequence_done(task, &res->seq_res))
+               return 0;
+       if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+               return 0;
+
+       update_changeattr(old_dir, &res->old_cinfo);
+       nfs_post_op_update_inode(old_dir, res->old_fattr);
+       update_changeattr(new_dir, &res->new_cinfo);
+       nfs_post_op_update_inode(new_dir, res->new_fattr);
+       return 1;
+}
+
 static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
                struct inode *new_dir, struct qstr *new_name)
 {
        struct nfs_server *server = NFS_SERVER(old_dir);
-       struct nfs4_rename_arg arg = {
+       struct nfs_renameargs arg = {
                .old_dir = NFS_FH(old_dir),
                .new_dir = NFS_FH(new_dir),
                .old_name = old_name,
                .new_name = new_name,
                .bitmask = server->attr_bitmask,
        };
-       struct nfs4_rename_res res = {
+       struct nfs_renameres res = {
                .server = server,
        };
        struct rpc_message msg = {
@@ -2896,15 +2823,16 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 }
 
 static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-                  u64 cookie, struct page *page, unsigned int count, int plus)
+               u64 cookie, struct page **pages, unsigned int count, int plus)
 {
        struct inode            *dir = dentry->d_inode;
        struct nfs4_readdir_arg args = {
                .fh = NFS_FH(dir),
-               .pages = &page,
+               .pages = pages,
                .pgbase = 0,
                .count = count,
                .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
+               .plus = plus,
        };
        struct nfs4_readdir_res res;
        struct rpc_message msg = {
@@ -2932,14 +2860,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 }
 
 static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-                  u64 cookie, struct page *page, unsigned int count, int plus)
+               u64 cookie, struct page **pages, unsigned int count, int plus)
 {
        struct nfs4_exception exception = { };
        int err;
        do {
                err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
                                _nfs4_proc_readdir(dentry, cred, cookie,
-                                       page, count, plus),
+                                       pages, count, plus),
                                &exception);
        } while (exception.retry);
        return err;
@@ -3490,9 +3418,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
                        nfs4_state_mark_reclaim_nograce(clp, state);
                        goto do_state_recovery;
                case -NFS4ERR_STALE_STATEID:
-                       if (state == NULL)
-                               break;
-                       nfs4_state_mark_reclaim_reboot(clp, state);
                case -NFS4ERR_STALE_CLIENTID:
                case -NFS4ERR_EXPIRED:
                        goto do_state_recovery;
@@ -3626,7 +3551,6 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
                        case -NFS4ERR_RESOURCE:
                                /* The IBM lawyers misread another document! */
                        case -NFS4ERR_DELAY:
-                       case -EKEYEXPIRED:
                                err = nfs4_delay(clp->cl_rpcclient, &timeout);
                }
        } while (err == 0);
@@ -3721,7 +3645,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
        memcpy(&data->stateid, stateid, sizeof(data->stateid));
        data->res.fattr = &data->fattr;
        data->res.server = server;
-       data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
        nfs_fattr_init(data->res.fattr);
        data->timestamp = jiffies;
        data->rpc_status = 0;
@@ -3874,7 +3797,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
        p->arg.fl = &p->fl;
        p->arg.seqid = seqid;
        p->res.seqid = seqid;
-       p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
        p->arg.stateid = &lsp->ls_stateid;
        p->lsp = lsp;
        atomic_inc(&lsp->ls_count);
@@ -4054,7 +3976,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
        p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
        p->arg.lock_owner.id = lsp->ls_id.id;
        p->res.lock_seqid = p->arg.lock_seqid;
-       p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
        p->lsp = lsp;
        p->server = server;
        atomic_inc(&lsp->ls_count);
@@ -4241,7 +4162,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
                if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
                        return 0;
                err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
-               if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED)
+               if (err != -NFS4ERR_DELAY)
                        break;
                nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
@@ -4266,7 +4187,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
                        goto out;
                case -NFS4ERR_GRACE:
                case -NFS4ERR_DELAY:
-               case -EKEYEXPIRED:
                        nfs4_handle_exception(server, err, &exception);
                        err = 0;
                }
@@ -4412,13 +4332,21 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
                                nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
                                err = 0;
                                goto out;
+                       case -EKEYEXPIRED:
+                               /*
+                                * User RPCSEC_GSS context has expired.
+                                * We cannot recover this stateid now, so
+                                * skip it and allow recovery thread to
+                                * proceed.
+                                */
+                               err = 0;
+                               goto out;
                        case -ENOMEM:
                        case -NFS4ERR_DENIED:
                                /* kill_proc(fl->fl_pid, SIGLOST, 1); */
                                err = 0;
                                goto out;
                        case -NFS4ERR_DELAY:
-                       case -EKEYEXPIRED:
                                break;
                }
                err = nfs4_handle_exception(server, err, &exception);
@@ -4647,7 +4575,6 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
        switch (task->tk_status) {
        case -NFS4ERR_DELAY:
        case -NFS4ERR_GRACE:
-       case -EKEYEXPIRED:
                dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
                rpc_delay(task, NFS4_POLL_RETRY_MIN);
                task->tk_status = 0;
@@ -4687,7 +4614,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
        };
        int status;
 
-       res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
        dprintk("--> %s\n", __func__);
        task = rpc_run_task(&task_setup);
 
@@ -5111,7 +5037,6 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
 {
        switch(task->tk_status) {
        case -NFS4ERR_DELAY:
-       case -EKEYEXPIRED:
                rpc_delay(task, NFS4_POLL_RETRY_MAX);
                return -EAGAIN;
        default:
@@ -5180,12 +5105,11 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
 
        if (!atomic_inc_not_zero(&clp->cl_count))
                return ERR_PTR(-EIO);
-       calldata = kmalloc(sizeof(*calldata), GFP_NOFS);
+       calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
        if (calldata == NULL) {
                nfs_put_client(clp);
                return ERR_PTR(-ENOMEM);
        }
-       calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
        msg.rpc_argp = &calldata->args;
        msg.rpc_resp = &calldata->res;
        calldata->clp = clp;
@@ -5254,7 +5178,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
        case -NFS4ERR_WRONG_CRED: /* What to do here? */
                break;
        case -NFS4ERR_DELAY:
-       case -EKEYEXPIRED:
                rpc_delay(task, NFS4_POLL_RETRY_MAX);
                return -EAGAIN;
        default:
@@ -5317,7 +5240,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
                goto out;
        calldata->clp = clp;
        calldata->arg.one_fs = 0;
-       calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
 
        msg.rpc_argp = &calldata->arg;
        msg.rpc_resp = &calldata->res;
@@ -5443,6 +5365,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .unlink_setup   = nfs4_proc_unlink_setup,
        .unlink_done    = nfs4_proc_unlink_done,
        .rename         = nfs4_proc_rename,
+       .rename_setup   = nfs4_proc_rename_setup,
+       .rename_done    = nfs4_proc_rename_done,
        .link           = nfs4_proc_link,
        .symlink        = nfs4_proc_symlink,
        .mkdir          = nfs4_proc_mkdir,
@@ -5463,6 +5387,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .lock           = nfs4_proc_lock,
        .clear_acl_cache = nfs4_zap_acl_attr,
        .close_context  = nfs4_close_context,
+       .open_context   = nfs4_atomic_open,
 };
 
 /*
index 96524c5..aa0b02a 100644 (file)
@@ -46,6 +46,7 @@
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/random.h>
+#include <linux/ratelimit.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 
@@ -1063,6 +1064,14 @@ restart:
                                /* Mark the file as being 'closed' */
                                state->state = 0;
                                break;
+                       case -EKEYEXPIRED:
+                               /*
+                                * User RPCSEC_GSS context has expired.
+                                * We cannot recover this stateid now, so
+                                * skip it and allow recovery thread to
+                                * proceed.
+                                */
+                               break;
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_STALE_STATEID:
                        case -NFS4ERR_BAD_STATEID:
@@ -1138,16 +1147,14 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
                (void)ops->reclaim_complete(clp);
 }
 
-static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
+static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
 {
        struct nfs4_state_owner *sp;
        struct rb_node *pos;
        struct nfs4_state *state;
 
        if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
-               return;
-
-       nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+               return 0;
 
        for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
                sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
@@ -1161,6 +1168,14 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
        }
 
        nfs_delegation_reap_unclaimed(clp);
+       return 1;
+}
+
+static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
+{
+       if (!nfs4_state_clear_reclaim_reboot(clp))
+               return;
+       nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
 }
 
 static void nfs_delegation_clear_all(struct nfs_client *clp)
@@ -1175,6 +1190,14 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
        nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
 }
 
+static void nfs4_warn_keyexpired(const char *s)
+{
+       printk_ratelimited(KERN_WARNING "Error: state manager"
+                       " encountered RPCSEC_GSS session"
+                       " expired against NFSv4 server %s.\n",
+                       s);
+}
+
 static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
 {
        switch (error) {
@@ -1187,7 +1210,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
                case -NFS4ERR_STALE_CLIENTID:
                case -NFS4ERR_LEASE_MOVED:
                        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
-                       nfs4_state_end_reclaim_reboot(clp);
+                       nfs4_state_clear_reclaim_reboot(clp);
                        nfs4_state_start_reclaim_reboot(clp);
                        break;
                case -NFS4ERR_EXPIRED:
@@ -1204,6 +1227,10 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
                        set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
                        /* Zero session reset errors */
                        return 0;
+               case -EKEYEXPIRED:
+                       /* Nothing we can do */
+                       nfs4_warn_keyexpired(clp->cl_hostname);
+                       return 0;
        }
        return error;
 }
@@ -1414,9 +1441,10 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
                case -NFS4ERR_DELAY:
                case -NFS4ERR_CLID_INUSE:
                case -EAGAIN:
-               case -EKEYEXPIRED:
                        break;
 
+               case -EKEYEXPIRED:
+                       nfs4_warn_keyexpired(clp->cl_hostname);
                case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
                                         * in nfs4_exchange_id */
                default:
index 08ef912..bd2101d 100644 (file)
@@ -816,7 +816,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
        if (iap->ia_valid & ATTR_MODE)
                len += 4;
        if (iap->ia_valid & ATTR_UID) {
-               owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name);
+               owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ);
                if (owner_namelen < 0) {
                        dprintk("nfs: couldn't resolve uid %d to string\n",
                                        iap->ia_uid);
@@ -828,7 +828,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
                len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
        }
        if (iap->ia_valid & ATTR_GID) {
-               owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group);
+               owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ);
                if (owner_grouplen < 0) {
                        dprintk("nfs: couldn't resolve gid %d to string\n",
                                        iap->ia_gid);
@@ -1385,24 +1385,35 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
 
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
 {
-       uint32_t attrs[2] = {
-               FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID,
-               FATTR4_WORD1_MOUNTED_ON_FILEID,
-       };
+       uint32_t attrs[2] = {0, 0};
+       uint32_t dircount = readdir->count >> 1;
        __be32 *p;
 
+       if (readdir->plus) {
+               attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
+                       FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
+               attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
+                       FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
+                       FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
+                       FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
+               dircount >>= 1;
+       }
+       attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
+       attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
+       /* Switch to mounted_on_fileid if the server supports it */
+       if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
+               attrs[0] &= ~FATTR4_WORD0_FILEID;
+       else
+               attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+
        p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
        *p++ = cpu_to_be32(OP_READDIR);
        p = xdr_encode_hyper(p, readdir->cookie);
        p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
-       *p++ = cpu_to_be32(readdir->count >> 1);  /* We're not doing readdirplus */
+       *p++ = cpu_to_be32(dircount);
        *p++ = cpu_to_be32(readdir->count);
        *p++ = cpu_to_be32(2);
-       /* Switch to mounted_on_fileid if the server supports it */
-       if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
-               attrs[0] &= ~FATTR4_WORD0_FILEID;
-       else
-               attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
+
        *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
        *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
        hdr->nops++;
@@ -1823,7 +1834,7 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
 /*
  * Encode RENAME request
  */
-static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args)
+static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
 {
        struct xdr_stream xdr;
        struct compound_hdr hdr = {
@@ -2676,7 +2687,10 @@ out_overflow:
 static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
 {
        if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) {
-               decode_attr_bitmap(xdr, bitmask);
+               int ret;
+               ret = decode_attr_bitmap(xdr, bitmask);
+               if (unlikely(ret < 0))
+                       return ret;
                bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
        } else
                bitmask[0] = bitmask[1] = 0;
@@ -2848,6 +2862,56 @@ out_overflow:
        return -EIO;
 }
 
+static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
+{
+       __be32 *p;
+
+       if (unlikely(bitmap[0] & (FATTR4_WORD0_RDATTR_ERROR - 1U)))
+               return -EIO;
+       if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) {
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(!p))
+                       goto out_overflow;
+               bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
+       }
+       return 0;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return -EIO;
+}
+
+static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh)
+{
+       __be32 *p;
+       int len;
+
+       if (fh != NULL)
+               memset(fh, 0, sizeof(*fh));
+
+       if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEHANDLE - 1U)))
+               return -EIO;
+       if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) {
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(!p))
+                       goto out_overflow;
+               len = be32_to_cpup(p);
+               if (len > NFS4_FHSIZE)
+                       return -EIO;
+               p = xdr_inline_decode(xdr, len);
+               if (unlikely(!p))
+                       goto out_overflow;
+               if (fh != NULL) {
+                       memcpy(fh->data, p, len);
+                       fh->size = len;
+               }
+               bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE;
+       }
+       return 0;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return -EIO;
+}
+
 static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
 {
        __be32 *p;
@@ -3521,6 +3585,24 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
        return status;
 }
 
+static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
+                                 struct timespec *time)
+{
+       int status = 0;
+
+       time->tv_sec = 0;
+       time->tv_nsec = 0;
+       if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_DELTA - 1U)))
+               return -EIO;
+       if (likely(bitmap[1] & FATTR4_WORD1_TIME_DELTA)) {
+               status = decode_attr_time(xdr, time);
+               bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA;
+       }
+       dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec,
+               (long)time->tv_nsec);
+       return status;
+}
+
 static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
 {
        int status = 0;
@@ -3744,29 +3826,14 @@ xdr_error:
        return status;
 }
 
-static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
+               struct nfs_fattr *fattr, struct nfs_fh *fh,
                const struct nfs_server *server, int may_sleep)
 {
-       __be32 *savep;
-       uint32_t attrlen,
-                bitmap[2] = {0},
-                type;
        int status;
        umode_t fmode = 0;
        uint64_t fileid;
-
-       status = decode_op_hdr(xdr, OP_GETATTR);
-       if (status < 0)
-               goto xdr_error;
-
-       status = decode_attr_bitmap(xdr, bitmap);
-       if (status < 0)
-               goto xdr_error;
-
-       status = decode_attr_length(xdr, &attrlen, &savep);
-       if (status < 0)
-               goto xdr_error;
-
+       uint32_t type;
 
        status = decode_attr_type(xdr, bitmap, &type);
        if (status < 0)
@@ -3792,6 +3859,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
                goto xdr_error;
        fattr->valid |= status;
 
+       status = decode_attr_error(xdr, bitmap);
+       if (status < 0)
+               goto xdr_error;
+
+       status = decode_attr_filehandle(xdr, bitmap, fh);
+       if (status < 0)
+               goto xdr_error;
+
        status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
        if (status < 0)
                goto xdr_error;
@@ -3862,12 +3937,46 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
                fattr->valid |= status;
        }
 
+xdr_error:
+       dprintk("%s: xdr returned %d\n", __func__, -status);
+       return status;
+}
+
+static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+               struct nfs_fh *fh, const struct nfs_server *server, int may_sleep)
+{
+       __be32 *savep;
+       uint32_t attrlen,
+                bitmap[2] = {0};
+       int status;
+
+       status = decode_op_hdr(xdr, OP_GETATTR);
+       if (status < 0)
+               goto xdr_error;
+
+       status = decode_attr_bitmap(xdr, bitmap);
+       if (status < 0)
+               goto xdr_error;
+
+       status = decode_attr_length(xdr, &attrlen, &savep);
+       if (status < 0)
+               goto xdr_error;
+
+       status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep);
+       if (status < 0)
+               goto xdr_error;
+
        status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
        dprintk("%s: xdr returned %d\n", __func__, -status);
        return status;
 }
 
+static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
+               const struct nfs_server *server, int may_sleep)
+{
+       return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
+}
 
 static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
 {
@@ -3894,6 +4003,9 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
        if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
                goto xdr_error;
        fsinfo->wtpref = fsinfo->wtmax;
+       status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
+       if (status != 0)
+               goto xdr_error;
 
        status = verify_attr_len(xdr, savep, attrlen);
 xdr_error:
@@ -3950,13 +4062,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
        __be32 *p;
        uint32_t namelen, type;
 
-       p = xdr_inline_decode(xdr, 32);
+       p = xdr_inline_decode(xdr, 32); /* read 32 bytes */
        if (unlikely(!p))
                goto out_overflow;
-       p = xdr_decode_hyper(p, &offset);
+       p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */
        p = xdr_decode_hyper(p, &length);
-       type = be32_to_cpup(p++);
-       if (fl != NULL) {
+       type = be32_to_cpup(p++); /* 4 byte read */
+       if (fl != NULL) { /* manipulate file lock */
                fl->fl_start = (loff_t)offset;
                fl->fl_end = fl->fl_start + (loff_t)length - 1;
                if (length == ~(uint64_t)0)
@@ -3966,9 +4078,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
                        fl->fl_type = F_RDLCK;
                fl->fl_pid = 0;
        }
-       p = xdr_decode_hyper(p, &clientid);
-       namelen = be32_to_cpup(p);
-       p = xdr_inline_decode(xdr, namelen);
+       p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
+       namelen = be32_to_cpup(p); /* read 4 bytes */  /* have read all 32 bytes now */
+       p = xdr_inline_decode(xdr, namelen); /* variable size field */
        if (likely(p))
                return -NFS4ERR_DENIED;
 out_overflow:
@@ -4200,12 +4312,9 @@ out_overflow:
 static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
 {
        struct xdr_buf  *rcvbuf = &req->rq_rcv_buf;
-       struct page     *page = *rcvbuf->pages;
        struct kvec     *iov = rcvbuf->head;
        size_t          hdrlen;
        u32             recvd, pglen = rcvbuf->page_len;
-       __be32          *end, *entry, *p, *kaddr;
-       unsigned int    nr = 0;
        int             status;
 
        status = decode_op_hdr(xdr, OP_READDIR);
@@ -4225,71 +4334,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
                pglen = recvd;
        xdr_read_pages(xdr, pglen);
 
-       BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
-       kaddr = p = kmap_atomic(page, KM_USER0);
-       end = p + ((pglen + readdir->pgbase) >> 2);
-       entry = p;
-
-       /* Make sure the packet actually has a value_follows and EOF entry */
-       if ((entry + 1) > end)
-               goto short_pkt;
-
-       for (; *p++; nr++) {
-               u32 len, attrlen, xlen;
-               if (end - p < 3)
-                       goto short_pkt;
-               dprintk("cookie = %Lu, ", *((unsigned long long *)p));
-               p += 2;                 /* cookie */
-               len = ntohl(*p++);      /* filename length */
-               if (len > NFS4_MAXNAMLEN) {
-                       dprintk("NFS: giant filename in readdir (len 0x%x)\n",
-                                       len);
-                       goto err_unmap;
-               }
-               xlen = XDR_QUADLEN(len);
-               if (end - p < xlen + 1)
-                       goto short_pkt;
-               dprintk("filename = %*s\n", len, (char *)p);
-               p += xlen;
-               len = ntohl(*p++);      /* bitmap length */
-               if (end - p < len + 1)
-                       goto short_pkt;
-               p += len;
-               attrlen = XDR_QUADLEN(ntohl(*p++));
-               if (end - p < attrlen + 2)
-                       goto short_pkt;
-               p += attrlen;           /* attributes */
-               entry = p;
-       }
-       /*
-        * Apparently some server sends responses that are a valid size, but
-        * contain no entries, and have value_follows==0 and EOF==0. For
-        * those, just set the EOF marker.
-        */
-       if (!nr && entry[1] == 0) {
-               dprintk("NFS: readdir reply truncated!\n");
-               entry[1] = 1;
-       }
-out:
-       kunmap_atomic(kaddr, KM_USER0);
+
        return 0;
-short_pkt:
-       /*
-        * When we get a short packet there are 2 possibilities. We can
-        * return an error, or fix up the response to look like a valid
-        * response and return what we have so far. If there are no
-        * entries and the packet was short, then return -EIO. If there
-        * are valid entries in the response, return them and pretend that
-        * the call was successful, but incomplete. The caller can retry the
-        * readdir starting at the last cookie.
-        */
-       dprintk("%s: short packet at entry %d\n", __func__, nr);
-       entry[0] = entry[1] = 0;
-       if (nr)
-               goto out;
-err_unmap:
-       kunmap_atomic(kaddr, KM_USER0);
-       return -errno_NFSERR_IO;
 }
 
 static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
@@ -4299,7 +4345,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
        size_t hdrlen;
        u32 len, recvd;
        __be32 *p;
-       char *kaddr;
        int status;
 
        status = decode_op_hdr(xdr, OP_READLINK);
@@ -4330,9 +4375,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
         * and and null-terminate the text (the VFS expects
         * null-termination).
         */
-       kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
-       kaddr[len+rcvbuf->page_base] = '\0';
-       kunmap_atomic(kaddr, KM_USER0);
+       xdr_terminate_string(rcvbuf, len);
        return 0;
 out_overflow:
        print_overflow_msg(__func__, xdr);
@@ -4668,7 +4711,6 @@ static int decode_sequence(struct xdr_stream *xdr,
                           struct rpc_rqst *rqstp)
 {
 #if defined(CONFIG_NFS_V4_1)
-       struct nfs4_slot *slot;
        struct nfs4_sessionid id;
        u32 dummy;
        int status;
@@ -4700,15 +4742,14 @@ static int decode_sequence(struct xdr_stream *xdr,
                goto out_overflow;
 
        /* seqid */
-       slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
        dummy = be32_to_cpup(p++);
-       if (dummy != slot->seq_nr) {
+       if (dummy != res->sr_slot->seq_nr) {
                dprintk("%s Invalid sequence number\n", __func__);
                goto out_err;
        }
        /* slot id */
        dummy = be32_to_cpup(p++);
-       if (dummy != res->sr_slotid) {
+       if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
                dprintk("%s Invalid slot id\n", __func__);
                goto out_err;
        }
@@ -4873,7 +4914,7 @@ out:
 /*
  * Decode RENAME response
  */
-static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res)
+static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
 {
        struct xdr_stream xdr;
        struct compound_hdr hdr;
@@ -5760,23 +5801,35 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
 }
 #endif /* CONFIG_NFS_V4_1 */
 
-__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
+__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
+                          struct nfs_server *server, int plus)
 {
        uint32_t bitmap[2] = {0};
        uint32_t len;
-
-       if (!*p++) {
-               if (!*p)
+       __be32 *p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       if (!ntohl(*p++)) {
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(!p))
+                       goto out_overflow;
+               if (!ntohl(*p++))
                        return ERR_PTR(-EAGAIN);
                entry->eof = 1;
                return ERR_PTR(-EBADCOOKIE);
        }
 
+       p = xdr_inline_decode(xdr, 12);
+       if (unlikely(!p))
+               goto out_overflow;
        entry->prev_cookie = entry->cookie;
        p = xdr_decode_hyper(p, &entry->cookie);
        entry->len = ntohl(*p++);
+
+       p = xdr_inline_decode(xdr, entry->len);
+       if (unlikely(!p))
+               goto out_overflow;
        entry->name = (const char *) p;
-       p += XDR_QUADLEN(entry->len);
 
        /*
         * In case the server doesn't return an inode number,
@@ -5784,32 +5837,33 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
         * since glibc seems to choke on it...)
         */
        entry->ino = 1;
+       entry->fattr->valid = 0;
 
-       len = ntohl(*p++);              /* bitmap length */
-       if (len-- > 0) {
-               bitmap[0] = ntohl(*p++);
-               if (len-- > 0) {
-                       bitmap[1] = ntohl(*p++);
-                       p += len;
-               }
-       }
-       len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */
-       if (len > 0) {
-               if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) {
-                       bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
-                       /* Ignore the return value of rdattr_error for now */
-                       p++;
-                       len--;
-               }
-               if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID)
-                       xdr_decode_hyper(p, &entry->ino);
-               else if (bitmap[0] == FATTR4_WORD0_FILEID)
-                       xdr_decode_hyper(p, &entry->ino);
-               p += len;
-       }
+       if (decode_attr_bitmap(xdr, bitmap) < 0)
+               goto out_overflow;
+
+       if (decode_attr_length(xdr, &len, &p) < 0)
+               goto out_overflow;
+
+       if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
+               goto out_overflow;
+       if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
+               entry->ino = entry->fattr->fileid;
+
+       if (verify_attr_len(xdr, p, len) < 0)
+               goto out_overflow;
+
+       p = xdr_inline_peek(xdr, 8);
+       if (p != NULL)
+               entry->eof = !p[0] && p[1];
+       else
+               entry->eof = 0;
 
-       entry->eof = !p[0] && p[1];
        return p;
+
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return ERR_PTR(-EIO);
 }
 
 /*
index df101d9..460df36 100644 (file)
@@ -3,9 +3,10 @@
  *
  *  Allow an NFS filesystem to be mounted as root. The way this works is:
  *     (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
- *     (2) Handle RPC negotiation with the system which replied to RARP or
- *         was reported as a boot server by BOOTP or manually.
- *     (3) The actual mounting is done later, when init() is running.
+ *     (2) Construct the device string and the options string using DHCP
+ *         option 17 and/or kernel command line options.
+ *     (3) When mount_root() sets up the root file system, pass these strings
+ *         to the NFS client's regular mount interface via sys_mount().
  *
  *
  *     Changes:
  *     Hua Qin         :       Support for mounting root file system via
  *                             NFS over TCP.
  *     Fabian Frederick:       Option parser rebuilt (using parser lib)
-*/
+ *     Chuck Lever     :       Use super.c's text-based mount option parsing
+ *     Chuck Lever     :       Add "nfsrootdebug".
+ */
 
 #include <linux/types.h>
 #include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/fs.h>
 #include <linux/init.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/xprtsock.h>
 #include <linux/nfs.h>
 #include <linux/nfs_fs.h>
-#include <linux/nfs_mount.h>
-#include <linux/in.h>
-#include <linux/major.h>
 #include <linux/utsname.h>
-#include <linux/inet.h>
 #include <linux/root_dev.h>
 #include <net/ipconfig.h>
-#include <linux/parser.h>
 
 #include "internal.h"
 
-/* Define this to allow debugging output */
-#undef NFSROOT_DEBUG
 #define NFSDBG_FACILITY NFSDBG_ROOT
 
-/* Default port to use if server is not running a portmapper */
-#define NFS_MNT_PORT   627
-
 /* Default path we try to mount. "%s" gets replaced by our IP address */
 #define NFS_ROOT               "/tftpboot/%s"
 
 /* Parameters passed from the kernel command line */
-static char nfs_root_name[256] __initdata = "";
+static char nfs_root_parms[256] __initdata = "";
+
+/* Text-based mount options passed to super.c */
+static char nfs_root_options[256] __initdata = "";
 
 /* Address of NFS server */
-static __be32 servaddr __initdata = 0;
+static __be32 servaddr __initdata = htonl(INADDR_NONE);
 
 /* Name of directory to mount */
-static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, };
-
-/* NFS-related data */
-static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
-static int nfs_port __initdata = 0;            /* Port to connect to for NFS */
-static int mount_port __initdata = 0;          /* Mount daemon port number */
-
-
-/***************************************************************************
-
-                            Parsing of options
-
- ***************************************************************************/
-
-enum {
-       /* Options that take integer arguments */
-       Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
-       Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
-       /* Options that take no arguments */
-       Opt_soft, Opt_hard, Opt_intr,
-       Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, 
-       Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
-       Opt_acl, Opt_noacl,
-       /* Error token */
-       Opt_err
-};
-
-static const match_table_t tokens __initconst = {
-       {Opt_port, "port=%u"},
-       {Opt_rsize, "rsize=%u"},
-       {Opt_wsize, "wsize=%u"},
-       {Opt_timeo, "timeo=%u"},
-       {Opt_retrans, "retrans=%u"},
-       {Opt_acregmin, "acregmin=%u"},
-       {Opt_acregmax, "acregmax=%u"},
-       {Opt_acdirmin, "acdirmin=%u"},
-       {Opt_acdirmax, "acdirmax=%u"},
-       {Opt_soft, "soft"},
-       {Opt_hard, "hard"},
-       {Opt_intr, "intr"},
-       {Opt_nointr, "nointr"},
-       {Opt_posix, "posix"},
-       {Opt_noposix, "noposix"},
-       {Opt_cto, "cto"},
-       {Opt_nocto, "nocto"},
-       {Opt_ac, "ac"},
-       {Opt_noac, "noac"},
-       {Opt_lock, "lock"},
-       {Opt_nolock, "nolock"},
-       {Opt_v2, "nfsvers=2"},
-       {Opt_v2, "v2"},
-       {Opt_v3, "nfsvers=3"},
-       {Opt_v3, "v3"},
-       {Opt_udp, "proto=udp"},
-       {Opt_udp, "udp"},
-       {Opt_tcp, "proto=tcp"},
-       {Opt_tcp, "tcp"},
-       {Opt_acl, "acl"},
-       {Opt_noacl, "noacl"},
-       {Opt_err, NULL}
-       
-};
+static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
+
+/* server:export path string passed to super.c */
+static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
 
 /*
- *  Parse option string.
+ * When the "nfsrootdebug" kernel command line option is specified,
+ * enable debugging messages for NFSROOT.
  */
-
-static int __init root_nfs_parse(char *name, char *buf)
+static int __init nfs_root_debug(char *__unused)
 {
-
-       char *p;
-       substring_t args[MAX_OPT_ARGS];
-       int option;
-
-       if (!name)
-               return 1;
-
-       /* Set the NFS remote path */
-       p = strsep(&name, ",");
-       if (p[0] != '\0' && strcmp(p, "default") != 0)
-               strlcpy(buf, p, NFS_MAXPATHLEN);
-
-       while ((p = strsep (&name, ",")) != NULL) {
-               int token; 
-               if (!*p)
-                       continue;
-               token = match_token(p, tokens, args);
-
-               /* %u tokens only. Beware if you add new tokens! */
-               if (token < Opt_soft && match_int(&args[0], &option))
-                       return 0;
-               switch (token) {
-                       case Opt_port:
-                               nfs_port = option;
-                               break;
-                       case Opt_rsize:
-                               nfs_data.rsize = option;
-                               break;
-                       case Opt_wsize:
-                               nfs_data.wsize = option;
-                               break;
-                       case Opt_timeo:
-                               nfs_data.timeo = option;
-                               break;
-                       case Opt_retrans:
-                               nfs_data.retrans = option;
-                               break;
-                       case Opt_acregmin:
-                               nfs_data.acregmin = option;
-                               break;
-                       case Opt_acregmax:
-                               nfs_data.acregmax = option;
-                               break;
-                       case Opt_acdirmin:
-                               nfs_data.acdirmin = option;
-                               break;
-                       case Opt_acdirmax:
-                               nfs_data.acdirmax = option;
-                               break;
-                       case Opt_soft:
-                               nfs_data.flags |= NFS_MOUNT_SOFT;
-                               break;
-                       case Opt_hard:
-                               nfs_data.flags &= ~NFS_MOUNT_SOFT;
-                               break;
-                       case Opt_intr:
-                       case Opt_nointr:
-                               break;
-                       case Opt_posix:
-                               nfs_data.flags |= NFS_MOUNT_POSIX;
-                               break;
-                       case Opt_noposix:
-                               nfs_data.flags &= ~NFS_MOUNT_POSIX;
-                               break;
-                       case Opt_cto:
-                               nfs_data.flags &= ~NFS_MOUNT_NOCTO;
-                               break;
-                       case Opt_nocto:
-                               nfs_data.flags |= NFS_MOUNT_NOCTO;
-                               break;
-                       case Opt_ac:
-                               nfs_data.flags &= ~NFS_MOUNT_NOAC;
-                               break;
-                       case Opt_noac:
-                               nfs_data.flags |= NFS_MOUNT_NOAC;
-                               break;
-                       case Opt_lock:
-                               nfs_data.flags &= ~NFS_MOUNT_NONLM;
-                               break;
-                       case Opt_nolock:
-                               nfs_data.flags |= NFS_MOUNT_NONLM;
-                               break;
-                       case Opt_v2:
-                               nfs_data.flags &= ~NFS_MOUNT_VER3;
-                               break;
-                       case Opt_v3:
-                               nfs_data.flags |= NFS_MOUNT_VER3;
-                               break;
-                       case Opt_udp:
-                               nfs_data.flags &= ~NFS_MOUNT_TCP;
-                               break;
-                       case Opt_tcp:
-                               nfs_data.flags |= NFS_MOUNT_TCP;
-                               break;
-                       case Opt_acl:
-                               nfs_data.flags &= ~NFS_MOUNT_NOACL;
-                               break;
-                       case Opt_noacl:
-                               nfs_data.flags |= NFS_MOUNT_NOACL;
-                               break;
-                       default:
-                               printk(KERN_WARNING "Root-NFS: unknown "
-                                       "option: %s\n", p);
-                               return 0;
-               }
-       }
-
+       nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT;
        return 1;
 }
 
+__setup("nfsrootdebug", nfs_root_debug);
+
 /*
- *  Prepare the NFS data structure and parse all options.
+ *  Parse NFS server and directory information passed on the kernel
+ *  command line.
+ *
+ *  nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
+ *
+ *  If there is a "%s" token in the <root-dir> string, it is replaced
+ *  by the ASCII-representation of the client's IP address.
  */
-static int __init root_nfs_name(char *name)
+static int __init nfs_root_setup(char *line)
 {
-       static char buf[NFS_MAXPATHLEN] __initdata;
-       char *cp;
-
-       /* Set some default values */
-       memset(&nfs_data, 0, sizeof(nfs_data));
-       nfs_port          = -1;
-       nfs_data.version  = NFS_MOUNT_VERSION;
-       nfs_data.flags    = NFS_MOUNT_NONLM;    /* No lockd in nfs root yet */
-       nfs_data.rsize    = NFS_DEF_FILE_IO_SIZE;
-       nfs_data.wsize    = NFS_DEF_FILE_IO_SIZE;
-       nfs_data.acregmin = NFS_DEF_ACREGMIN;
-       nfs_data.acregmax = NFS_DEF_ACREGMAX;
-       nfs_data.acdirmin = NFS_DEF_ACDIRMIN;
-       nfs_data.acdirmax = NFS_DEF_ACDIRMAX;
-       strcpy(buf, NFS_ROOT);
-
-       /* Process options received from the remote server */
-       root_nfs_parse(root_server_path, buf);
-
-       /* Override them by options set on kernel command-line */
-       root_nfs_parse(name, buf);
-
-       cp = utsname()->nodename;
-       if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
-               printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
-               return -1;
+       ROOT_DEV = Root_NFS;
+
+       if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
+               strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms));
+       } else {
+               size_t n = strlen(line) + sizeof(NFS_ROOT) - 1;
+               if (n >= sizeof(nfs_root_parms))
+                       line[sizeof(nfs_root_parms) - sizeof(NFS_ROOT) - 2] = '\0';
+               sprintf(nfs_root_parms, NFS_ROOT, line);
        }
-       sprintf(nfs_export_path, buf, cp);
+
+       /*
+        * Extract the IP address of the NFS server containing our
+        * root file system, if one was specified.
+        *
+        * Note: root_nfs_parse_addr() removes the server-ip from
+        *       nfs_root_parms, if it exists.
+        */
+       root_server_addr = root_nfs_parse_addr(nfs_root_parms);
 
        return 1;
 }
 
+__setup("nfsroot=", nfs_root_setup);
 
-/*
- *  Get NFS server address.
- */
-static int __init root_nfs_addr(void)
+static int __init root_nfs_copy(char *dest, const char *src,
+                                    const size_t destlen)
 {
-       if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) {
-               printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
+       if (strlcpy(dest, src, destlen) > destlen)
                return -1;
-       }
+       return 0;
+}
 
-       snprintf(nfs_data.hostname, sizeof(nfs_data.hostname),
-                "%pI4", &servaddr);
+static int __init root_nfs_cat(char *dest, const char *src,
+                                 const size_t destlen)
+{
+       if (strlcat(dest, src, destlen) > destlen)
+               return -1;
        return 0;
 }
 
 /*
- *  Tell the user what's going on.
+ * Parse out root export path and mount options from
+ * passed-in string @incoming.
+ *
+ * Copy the export path into @exppath.
  */
-#ifdef NFSROOT_DEBUG
-static void __init root_nfs_print(void)
+static int __init root_nfs_parse_options(char *incoming, char *exppath,
+                                        const size_t exppathlen)
 {
-       printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n",
-               nfs_export_path, nfs_data.hostname);
-       printk(KERN_NOTICE "Root-NFS:     rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
-               nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
-       printk(KERN_NOTICE "Root-NFS:     acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
-               nfs_data.acregmin, nfs_data.acregmax,
-               nfs_data.acdirmin, nfs_data.acdirmax);
-       printk(KERN_NOTICE "Root-NFS:     nfsd port = %d, mountd port = %d, flags = %08x\n",
-               nfs_port, mount_port, nfs_data.flags);
-}
-#endif
-
+       char *p;
 
-static int __init root_nfs_init(void)
-{
-#ifdef NFSROOT_DEBUG
-       nfs_debug |= NFSDBG_ROOT;
-#endif
+       /*
+        * Set the NFS remote path
+        */
+       p = strsep(&incoming, ",");
+       if (*p != '\0' && strcmp(p, "default") != 0)
+               if (root_nfs_copy(exppath, p, exppathlen))
+                       return -1;
 
        /*
-        * Decode the root directory path name and NFS options from
-        * the kernel command line. This has to go here in order to
-        * be able to use the client IP address for the remote root
-        * directory (necessary for pure RARP booting).
+        * @incoming now points to the rest of the string; if it
+        * contains something, append it to our root options buffer
         */
-       if (root_nfs_name(nfs_root_name) < 0 ||
-           root_nfs_addr() < 0)
-               return -1;
+       if (incoming != NULL && *incoming != '\0')
+               if (root_nfs_cat(nfs_root_options, incoming,
+                                               sizeof(nfs_root_options)))
+                       return -1;
 
-#ifdef NFSROOT_DEBUG
-       root_nfs_print();
-#endif
+       /*
+        * Possibly prepare for more options to be appended
+        */
+       if (nfs_root_options[0] != '\0' &&
+           nfs_root_options[strlen(nfs_root_options)] != ',')
+               if (root_nfs_cat(nfs_root_options, ",",
+                                               sizeof(nfs_root_options)))
+                       return -1;
 
        return 0;
 }
 
-
 /*
- *  Parse NFS server and directory information passed on the kernel
- *  command line.
+ *  Decode the export directory path name and NFS options from
+ *  the kernel command line.  This has to be done late in order to
+ *  use a dynamically acquired client IP address for the remote
+ *  root directory path.
+ *
+ *  Returns zero if successful; otherwise -1 is returned.
  */
-static int __init nfs_root_setup(char *line)
+static int __init root_nfs_data(char *cmdline)
 {
-       ROOT_DEV = Root_NFS;
-       if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
-               strlcpy(nfs_root_name, line, sizeof(nfs_root_name));
-       } else {
-               int n = strlen(line) + sizeof(NFS_ROOT) - 1;
-               if (n >= sizeof(nfs_root_name))
-                       line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0';
-               sprintf(nfs_root_name, NFS_ROOT, line);
+       char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
+       int len, retval = -1;
+       char *tmp = NULL;
+       const size_t tmplen = sizeof(nfs_export_path);
+
+       tmp = kzalloc(tmplen, GFP_KERNEL);
+       if (tmp == NULL)
+               goto out_nomem;
+       strcpy(tmp, NFS_ROOT);
+
+       if (root_server_path[0] != '\0') {
+               dprintk("Root-NFS: DHCPv4 option 17: %s\n",
+                       root_server_path);
+               if (root_nfs_parse_options(root_server_path, tmp, tmplen))
+                       goto out_optionstoolong;
        }
-       root_server_addr = root_nfs_parse_addr(nfs_root_name);
-       return 1;
-}
 
-__setup("nfsroot=", nfs_root_setup);
-
-/***************************************************************************
-
-              Routines to actually mount the root directory
+       if (cmdline[0] != '\0') {
+               dprintk("Root-NFS: nfsroot=%s\n", cmdline);
+               if (root_nfs_parse_options(cmdline, tmp, tmplen))
+                       goto out_optionstoolong;
+       }
 
- ***************************************************************************/
+       /*
+        * Append mandatory options for nfsroot so they override
+        * what has come before
+        */
+       snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
+                       &servaddr);
+       if (root_nfs_cat(nfs_root_options, addr_option,
+                                               sizeof(nfs_root_options)))
+               goto out_optionstoolong;
 
-/*
- *  Construct sockaddr_in from address and port number.
- */
-static inline void
-set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port)
-{
-       sin->sin_family = AF_INET;
-       sin->sin_addr.s_addr = addr;
-       sin->sin_port = port;
-}
+       /*
+        * Set up nfs_root_device.  For NFS mounts, this looks like
+        *
+        *      server:/path
+        *
+        * At this point, utsname()->nodename contains our local
+        * IP address or hostname, set by ipconfig.  If "%s" exists
+        * in tmp, substitute the nodename, then shovel the whole
+        * mess into nfs_root_device.
+        */
+       len = snprintf(nfs_export_path, sizeof(nfs_export_path),
+                               tmp, utsname()->nodename);
+       if (len > (int)sizeof(nfs_export_path))
+               goto out_devnametoolong;
+       len = snprintf(nfs_root_device, sizeof(nfs_root_device),
+                               "%pI4:%s", &servaddr, nfs_export_path);
+       if (len > (int)sizeof(nfs_root_device))
+               goto out_devnametoolong;
 
-/*
- *  Query server portmapper for the port of a daemon program.
- */
-static int __init root_nfs_getport(int program, int version, int proto)
-{
-       struct sockaddr_in sin;
+       retval = 0;
 
-       printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n",
-               program, version, &servaddr);
-       set_sockaddr(&sin, servaddr, 0);
-       return rpcb_getport_sync(&sin, program, version, proto);
+out:
+       kfree(tmp);
+       return retval;
+out_nomem:
+       printk(KERN_ERR "Root-NFS: could not allocate memory\n");
+       goto out;
+out_optionstoolong:
+       printk(KERN_ERR "Root-NFS: mount options string too long\n");
+       goto out;
+out_devnametoolong:
+       printk(KERN_ERR "Root-NFS: root device name too long.\n");
+       goto out;
 }
 
-
-/*
- *  Use portmapper to find mountd and nfsd port numbers if not overriden
- *  by the user. Use defaults if portmapper is not available.
- *  XXX: Is there any nfs server with no portmapper?
+/**
+ * nfs_root_data - Return prepared 'data' for NFSROOT mount
+ * @root_device: OUT: address of string containing NFSROOT device
+ * @root_data: OUT: address of string containing NFSROOT mount options
+ *
+ * Returns zero and sets @root_device and @root_data if successful,
+ * otherwise -1 is returned.
  */
-static int __init root_nfs_ports(void)
+int __init nfs_root_data(char **root_device, char **root_data)
 {
-       int port;
-       int nfsd_ver, mountd_ver;
-       int nfsd_port, mountd_port;
-       int proto;
-
-       if (nfs_data.flags & NFS_MOUNT_VER3) {
-               nfsd_ver = NFS3_VERSION;
-               mountd_ver = NFS_MNT3_VERSION;
-               nfsd_port = NFS_PORT;
-               mountd_port = NFS_MNT_PORT;
-       } else {
-               nfsd_ver = NFS2_VERSION;
-               mountd_ver = NFS_MNT_VERSION;
-               nfsd_port = NFS_PORT;
-               mountd_port = NFS_MNT_PORT;
-       }
-
-       proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
-
-       if (nfs_port < 0) {
-               if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
-                       printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
-                                       "number from server, using default\n");
-                       port = nfsd_port;
-               }
-               nfs_port = port;
-               dprintk("Root-NFS: Portmapper on server returned %d "
-                       "as nfsd port\n", port);
+       servaddr = root_server_addr;
+       if (servaddr == htonl(INADDR_NONE)) {
+               printk(KERN_ERR "Root-NFS: no NFS server address\n");
+               return -1;
        }
 
-       if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) {
-               printk(KERN_ERR "Root-NFS: Unable to get mountd port "
-                               "number from server, using default\n");
-               port = mountd_port;
-       }
-       mount_port = port;
-       dprintk("Root-NFS: mountd port is %d\n", port);
+       if (root_nfs_data(nfs_root_parms) < 0)
+               return -1;
 
+       *root_device = nfs_root_device;
+       *root_data = nfs_root_options;
        return 0;
 }
-
-
-/*
- *  Get a file handle from the server for the directory which is to be
- *  mounted.
- */
-static int __init root_nfs_get_handle(void)
-{
-       struct sockaddr_in sin;
-       unsigned int auth_flav_len = 0;
-       struct nfs_mount_request request = {
-               .sap            = (struct sockaddr *)&sin,
-               .salen          = sizeof(sin),
-               .dirpath        = nfs_export_path,
-               .version        = (nfs_data.flags & NFS_MOUNT_VER3) ?
-                                       NFS_MNT3_VERSION : NFS_MNT_VERSION,
-               .protocol       = (nfs_data.flags & NFS_MOUNT_TCP) ?
-                                       XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
-               .auth_flav_len  = &auth_flav_len,
-       };
-       int status = -ENOMEM;
-
-       request.fh = nfs_alloc_fhandle();
-       if (!request.fh)
-               goto out;
-       set_sockaddr(&sin, servaddr, htons(mount_port));
-       status = nfs_mount(&request);
-       if (status < 0)
-               printk(KERN_ERR "Root-NFS: Server returned error %d "
-                               "while mounting %s\n", status, nfs_export_path);
-       else {
-               nfs_data.root.size = request.fh->size;
-               memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
-       }
-       nfs_free_fhandle(request.fh);
-out:
-       return status;
-}
-
-/*
- *  Get the NFS port numbers and file handle, and return the prepared 'data'
- *  argument for mount() if everything went OK. Return NULL otherwise.
- */
-void * __init nfs_root_data(void)
-{
-       if (root_nfs_init() < 0
-        || root_nfs_ports() < 0
-        || root_nfs_get_handle() < 0)
-               return NULL;
-       set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port));
-       return (void*)&nfs_data;
-}
index 611bec2..58e7f84 100644 (file)
@@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
 
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
-               int flags, struct nameidata *nd)
+               int flags, struct nfs_open_context *ctx)
 {
        struct nfs_createdata *data;
        struct rpc_message msg = {
@@ -365,17 +365,32 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
        return 1;
 }
 
+static void
+nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
+{
+       msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
+}
+
+static int
+nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
+                    struct inode *new_dir)
+{
+       if (nfs_async_handle_expired_key(task))
+               return 0;
+       nfs_mark_for_revalidate(old_dir);
+       nfs_mark_for_revalidate(new_dir);
+       return 1;
+}
+
 static int
 nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
                struct inode *new_dir, struct qstr *new_name)
 {
        struct nfs_renameargs   arg = {
-               .fromfh         = NFS_FH(old_dir),
-               .fromname       = old_name->name,
-               .fromlen        = old_name->len,
-               .tofh           = NFS_FH(new_dir),
-               .toname         = new_name->name,
-               .tolen          = new_name->len
+               .old_dir        = NFS_FH(old_dir),
+               .old_name       = old_name,
+               .new_dir        = NFS_FH(new_dir),
+               .new_name       = new_name,
        };
        struct rpc_message msg = {
                .rpc_proc       = &nfs_procedures[NFSPROC_RENAME],
@@ -519,14 +534,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
  */
 static int
 nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
-                u64 cookie, struct page *page, unsigned int count, int plus)
+                u64 cookie, struct page **pages, unsigned int count, int plus)
 {
        struct inode            *dir = dentry->d_inode;
        struct nfs_readdirargs  arg = {
                .fh             = NFS_FH(dir),
                .cookie         = cookie,
                .count          = count,
-               .pages          = &page,
+               .pages          = pages,
        };
        struct rpc_message      msg = {
                .rpc_proc       = &nfs_procedures[NFSPROC_READDIR],
@@ -705,6 +720,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .unlink_setup   = nfs_proc_unlink_setup,
        .unlink_done    = nfs_proc_unlink_done,
        .rename         = nfs_proc_rename,
+       .rename_setup   = nfs_proc_rename_setup,
+       .rename_done    = nfs_proc_rename_done,
        .link           = nfs_proc_link,
        .symlink        = nfs_proc_symlink,
        .mkdir          = nfs_proc_mkdir,
index 87adc27..79859c8 100644 (file)
@@ -46,7 +46,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
                memset(p, 0, sizeof(*p));
                INIT_LIST_HEAD(&p->pages);
                p->npages = pagecount;
-               p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
                if (pagecount <= ARRAY_SIZE(p->page_array))
                        p->pagevec = p->page_array;
                else {
index f4cbf0c..3600ec7 100644 (file)
@@ -100,6 +100,7 @@ enum {
        Opt_addr, Opt_mountaddr, Opt_clientaddr,
        Opt_lookupcache,
        Opt_fscache_uniq,
+       Opt_local_lock,
 
        /* Special mount options */
        Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -171,6 +172,7 @@ static const match_table_t nfs_mount_option_tokens = {
 
        { Opt_lookupcache, "lookupcache=%s" },
        { Opt_fscache_uniq, "fsc=%s" },
+       { Opt_local_lock, "local_lock=%s" },
 
        { Opt_err, NULL }
 };
@@ -236,6 +238,22 @@ static match_table_t nfs_lookupcache_tokens = {
        { Opt_lookupcache_err, NULL }
 };
 
+enum {
+       Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
+       Opt_local_lock_none,
+
+       Opt_local_lock_err
+};
+
+static match_table_t nfs_local_lock_tokens = {
+       { Opt_local_lock_all, "all" },
+       { Opt_local_lock_flock, "flock" },
+       { Opt_local_lock_posix, "posix" },
+       { Opt_local_lock_none, "none" },
+
+       { Opt_local_lock_err, NULL }
+};
+
 
 static void nfs_umount_begin(struct super_block *);
 static int  nfs_statfs(struct dentry *, struct kstatfs *);
@@ -622,6 +640,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
        const struct proc_nfs_info *nfs_infop;
        struct nfs_client *clp = nfss->nfs_client;
        u32 version = clp->rpc_ops->version;
+       int local_flock, local_fcntl;
 
        seq_printf(m, ",vers=%u", version);
        seq_printf(m, ",rsize=%u", nfss->rsize);
@@ -670,6 +689,18 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
                else
                        seq_printf(m, ",lookupcache=pos");
        }
+
+       local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK;
+       local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL;
+
+       if (!local_flock && !local_fcntl)
+               seq_printf(m, ",local_lock=none");
+       else if (local_flock && local_fcntl)
+               seq_printf(m, ",local_lock=all");
+       else if (local_flock)
+               seq_printf(m, ",local_lock=flock");
+       else
+               seq_printf(m, ",local_lock=posix");
 }
 
 /*
@@ -1017,9 +1048,13 @@ static int nfs_parse_mount_options(char *raw,
                        break;
                case Opt_lock:
                        mnt->flags &= ~NFS_MOUNT_NONLM;
+                       mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+                                       NFS_MOUNT_LOCAL_FCNTL);
                        break;
                case Opt_nolock:
                        mnt->flags |= NFS_MOUNT_NONLM;
+                       mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+                                      NFS_MOUNT_LOCAL_FCNTL);
                        break;
                case Opt_v2:
                        mnt->flags &= ~NFS_MOUNT_VER3;
@@ -1420,6 +1455,34 @@ static int nfs_parse_mount_options(char *raw,
                        mnt->fscache_uniq = string;
                        mnt->options |= NFS_OPTION_FSCACHE;
                        break;
+               case Opt_local_lock:
+                       string = match_strdup(args);
+                       if (string == NULL)
+                               goto out_nomem;
+                       token = match_token(string, nfs_local_lock_tokens,
+                                       args);
+                       kfree(string);
+                       switch (token) {
+                       case Opt_local_lock_all:
+                               mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+                                              NFS_MOUNT_LOCAL_FCNTL);
+                               break;
+                       case Opt_local_lock_flock:
+                               mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
+                               break;
+                       case Opt_local_lock_posix:
+                               mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
+                               break;
+                       case Opt_local_lock_none:
+                               mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+                                               NFS_MOUNT_LOCAL_FCNTL);
+                               break;
+                       default:
+                               dfprintk(MOUNT, "NFS:   invalid "
+                                               "local_lock argument\n");
+                               return 0;
+                       };
+                       break;
 
                /*
                 * Special options
@@ -1825,6 +1888,12 @@ static int nfs_validate_mount_data(void *options,
                if (!args->nfs_server.hostname)
                        goto out_nomem;
 
+               if (!(data->flags & NFS_MOUNT_NONLM))
+                       args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
+                                        NFS_MOUNT_LOCAL_FCNTL);
+               else
+                       args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
+                                       NFS_MOUNT_LOCAL_FCNTL);
                /*
                 * The legacy version 6 binary mount data from userspace has a
                 * field used only to transport selinux information into the
@@ -2441,7 +2510,8 @@ static void nfs4_fill_super(struct super_block *sb)
 
 static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
 {
-       args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3);
+       args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
+                        NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
 }
 
 static int nfs4_validate_text_mount_data(void *options,
index ad4d2e7..978aaeb 100644 (file)
@@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = {
                .extra1 = (int *)&nfs_set_port_min,
                .extra2 = (int *)&nfs_set_port_max,
        },
+#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
        {
                .procname = "idmap_cache_timeout",
                .data = &nfs_idmap_cache_timeout,
@@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = {
                .mode = 0644,
                .proc_handler = proc_dointvec_jiffies,
        },
+#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
 #endif
        {
                .procname       = "nfs_mountpoint_timeout",
index 2f84ada..9a16bad 100644 (file)
 #include <linux/nfs_fs.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/namei.h>
 
 #include "internal.h"
 #include "nfs4_fs.h"
+#include "iostat.h"
+#include "delegation.h"
 
 struct nfs_unlinkdata {
        struct hlist_node list;
@@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry)
  * @dir: parent directory of dentry
  * @dentry: dentry to unlink
  */
-int
+static int
 nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct nfs_unlinkdata *data;
@@ -259,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
                status = PTR_ERR(data->cred);
                goto out_free;
        }
-       data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
        data->res.dir_attr = &data->dir_attr;
 
        status = -EBUSY;
@@ -303,3 +305,256 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
        if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
                nfs_free_unlinkdata(data);
 }
+
+/* Cancel a queued async unlink. Called when a sillyrename run fails. */
+static void
+nfs_cancel_async_unlink(struct dentry *dentry)
+{
+       spin_lock(&dentry->d_lock);
+       if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
+               struct nfs_unlinkdata *data = dentry->d_fsdata;
+
+               dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
+               spin_unlock(&dentry->d_lock);
+               nfs_free_unlinkdata(data);
+               return;
+       }
+       spin_unlock(&dentry->d_lock);
+}
+
+struct nfs_renamedata {
+       struct nfs_renameargs   args;
+       struct nfs_renameres    res;
+       struct rpc_cred         *cred;
+       struct inode            *old_dir;
+       struct dentry           *old_dentry;
+       struct nfs_fattr        old_fattr;
+       struct inode            *new_dir;
+       struct dentry           *new_dentry;
+       struct nfs_fattr        new_fattr;
+};
+
+/**
+ * nfs_async_rename_done - Sillyrename post-processing
+ * @task: rpc_task of the sillyrename
+ * @calldata: nfs_renamedata for the sillyrename
+ *
+ * Do the directory attribute updates and the d_move
+ */
+static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
+{
+       struct nfs_renamedata *data = calldata;
+       struct inode *old_dir = data->old_dir;
+       struct inode *new_dir = data->new_dir;
+
+       if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
+               nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
+               return;
+       }
+
+       if (task->tk_status != 0) {
+               nfs_cancel_async_unlink(data->old_dentry);
+               return;
+       }
+
+       nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
+       d_move(data->old_dentry, data->new_dentry);
+}
+
+/**
+ * nfs_async_rename_release - Release the sillyrename data.
+ * @calldata: the struct nfs_renamedata to be released
+ */
+static void nfs_async_rename_release(void *calldata)
+{
+       struct nfs_renamedata   *data = calldata;
+       struct super_block *sb = data->old_dir->i_sb;
+
+       if (data->old_dentry->d_inode)
+               nfs_mark_for_revalidate(data->old_dentry->d_inode);
+
+       dput(data->old_dentry);
+       dput(data->new_dentry);
+       iput(data->old_dir);
+       iput(data->new_dir);
+       nfs_sb_deactive(sb);
+       put_rpccred(data->cred);
+       kfree(data);
+}
+
+#if defined(CONFIG_NFS_V4_1)
+static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
+{
+       struct nfs_renamedata *data = calldata;
+       struct nfs_server *server = NFS_SERVER(data->old_dir);
+
+       if (nfs4_setup_sequence(server, &data->args.seq_args,
+                               &data->res.seq_res, 1, task))
+               return;
+       rpc_call_start(task);
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+static const struct rpc_call_ops nfs_rename_ops = {
+       .rpc_call_done = nfs_async_rename_done,
+       .rpc_release = nfs_async_rename_release,
+#if defined(CONFIG_NFS_V4_1)
+       .rpc_call_prepare = nfs_rename_prepare,
+#endif /* CONFIG_NFS_V4_1 */
+};
+
+/**
+ * nfs_async_rename - perform an asynchronous rename operation
+ * @old_dir: directory that currently holds the dentry to be renamed
+ * @new_dir: target directory for the rename
+ * @old_dentry: original dentry to be renamed
+ * @new_dentry: dentry to which the old_dentry should be renamed
+ *
+ * It's expected that valid references to the dentries and inodes are held
+ */
+static struct rpc_task *
+nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
+                struct dentry *old_dentry, struct dentry *new_dentry)
+{
+       struct nfs_renamedata *data;
+       struct rpc_message msg = { };
+       struct rpc_task_setup task_setup_data = {
+               .rpc_message = &msg,
+               .callback_ops = &nfs_rename_ops,
+               .workqueue = nfsiod_workqueue,
+               .rpc_client = NFS_CLIENT(old_dir),
+               .flags = RPC_TASK_ASYNC,
+       };
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (data == NULL)
+               return ERR_PTR(-ENOMEM);
+       task_setup_data.callback_data = data,
+
+       data->cred = rpc_lookup_cred();
+       if (IS_ERR(data->cred)) {
+               struct rpc_task *task = ERR_CAST(data->cred);
+               kfree(data);
+               return task;
+       }
+
+       msg.rpc_argp = &data->args;
+       msg.rpc_resp = &data->res;
+       msg.rpc_cred = data->cred;
+
+       /* set up nfs_renamedata */
+       data->old_dir = old_dir;
+       atomic_inc(&old_dir->i_count);
+       data->new_dir = new_dir;
+       atomic_inc(&new_dir->i_count);
+       data->old_dentry = dget(old_dentry);
+       data->new_dentry = dget(new_dentry);
+       nfs_fattr_init(&data->old_fattr);
+       nfs_fattr_init(&data->new_fattr);
+
+       /* set up nfs_renameargs */
+       data->args.old_dir = NFS_FH(old_dir);
+       data->args.old_name = &old_dentry->d_name;
+       data->args.new_dir = NFS_FH(new_dir);
+       data->args.new_name = &new_dentry->d_name;
+
+       /* set up nfs_renameres */
+       data->res.old_fattr = &data->old_fattr;
+       data->res.new_fattr = &data->new_fattr;
+
+       nfs_sb_active(old_dir->i_sb);
+
+       NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir);
+
+       return rpc_run_task(&task_setup_data);
+}
+
+/**
+ * nfs_sillyrename - Perform a silly-rename of a dentry
+ * @dir: inode of directory that contains dentry
+ * @dentry: dentry to be sillyrenamed
+ *
+ * NFSv2/3 is stateless and the server doesn't know when the client is
+ * holding a file open. To prevent application problems when a file is
+ * unlinked while it's still open, the client performs a "silly-rename".
+ * That is, it renames the file to a hidden file in the same directory,
+ * and only performs the unlink once the last reference to it is put.
+ *
+ * The final cleanup is done during dentry_iput.
+ */
+int
+nfs_sillyrename(struct inode *dir, struct dentry *dentry)
+{
+       static unsigned int sillycounter;
+       const int      fileidsize  = sizeof(NFS_FILEID(dentry->d_inode))*2;
+       const int      countersize = sizeof(sillycounter)*2;
+       const int      slen        = sizeof(".nfs")+fileidsize+countersize-1;
+       char           silly[slen+1];
+       struct dentry *sdentry;
+       struct rpc_task *task;
+       int            error = -EIO;
+
+       dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
+               dentry->d_parent->d_name.name, dentry->d_name.name,
+               atomic_read(&dentry->d_count));
+       nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
+
+       /*
+        * We don't allow a dentry to be silly-renamed twice.
+        */
+       error = -EBUSY;
+       if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
+               goto out;
+
+       sprintf(silly, ".nfs%*.*Lx",
+               fileidsize, fileidsize,
+               (unsigned long long)NFS_FILEID(dentry->d_inode));
+
+       /* Return delegation in anticipation of the rename */
+       nfs_inode_return_delegation(dentry->d_inode);
+
+       sdentry = NULL;
+       do {
+               char *suffix = silly + slen - countersize;
+
+               dput(sdentry);
+               sillycounter++;
+               sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
+
+               dfprintk(VFS, "NFS: trying to rename %s to %s\n",
+                               dentry->d_name.name, silly);
+
+               sdentry = lookup_one_len(silly, dentry->d_parent, slen);
+               /*
+                * N.B. Better to return EBUSY here ... it could be
+                * dangerous to delete the file while it's in use.
+                */
+               if (IS_ERR(sdentry))
+                       goto out;
+       } while (sdentry->d_inode != NULL); /* need negative lookup */
+
+       /* queue unlink first. Can't do this from rpc_release as it
+        * has to allocate memory
+        */
+       error = nfs_async_unlink(dir, dentry);
+       if (error)
+               goto out_dput;
+
+       /* run the rename task, undo unlink if it fails */
+       task = nfs_async_rename(dir, dir, dentry, sdentry);
+       if (IS_ERR(task)) {
+               error = -EBUSY;
+               nfs_cancel_async_unlink(dentry);
+               goto out_dput;
+       }
+
+       /* wait for the RPC task to complete, unless a SIGKILL intervenes */
+       error = rpc_wait_for_completion_task(task);
+       if (error == 0)
+               error = task->tk_status;
+       rpc_put_task(task);
+out_dput:
+       dput(sdentry);
+out:
+       return error;
+}
index 874972d..605e292 100644 (file)
@@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
        if (p) {
                memset(p, 0, sizeof(*p));
                INIT_LIST_HEAD(&p->pages);
-               p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
        }
        return p;
 }
@@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
                memset(p, 0, sizeof(*p));
                INIT_LIST_HEAD(&p->pages);
                p->npages = pagecount;
-               p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
                if (pagecount <= ARRAY_SIZE(p->page_array))
                        p->pagevec = p->page_array;
                else {
@@ -1433,15 +1431,17 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
        int flags = FLUSH_SYNC;
        int ret = 0;
 
-       /* Don't commit yet if this is a non-blocking flush and there are
-        * lots of outstanding writes for this mapping.
-        */
-       if (wbc->sync_mode == WB_SYNC_NONE &&
-           nfsi->ncommit <= (nfsi->npages >> 1))
-               goto out_mark_dirty;
+       if (wbc->sync_mode == WB_SYNC_NONE) {
+               /* Don't commit yet if this is a non-blocking flush and there
+                * are a lot of outstanding writes for this mapping.
+                */
+               if (nfsi->ncommit <= (nfsi->npages >> 1))
+                       goto out_mark_dirty;
 
-       if (wbc->nonblocking || wbc->for_background)
+               /* don't wait for the COMMIT response */
                flags = 0;
+       }
+
        ret = nfs_commit_inode(inode, flags);
        if (ret >= 0) {
                if (wbc->sync_mode == WB_SYNC_NONE) {
index d0edf7d..a46e430 100644 (file)
@@ -360,10 +360,13 @@ extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr);
 extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
 extern void put_nfs_open_context(struct nfs_open_context *ctx);
 extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
+extern struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode);
+extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
 extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
 extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
 extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
+extern unsigned long nfs_inc_attr_generation_counter(void);
 
 extern struct nfs_fattr *nfs_alloc_fattr(void);
 
@@ -379,9 +382,12 @@ static inline void nfs_free_fhandle(const struct nfs_fh *fh)
        kfree(fh);
 }
 
+/*
+ * linux/fs/nfs/nfsroot.c
+ */
+extern int  nfs_root_data(char **root_device, char **root_data); /*__init*/
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern __be32 root_nfs_parse_addr(char *name); /*__init*/
-extern unsigned long nfs_inc_attr_generation_counter(void);
 
 /*
  * linux/fs/nfs/file.c
@@ -479,10 +485,10 @@ extern void nfs_release_automount_timer(void);
 /*
  * linux/fs/nfs/unlink.c
  */
-extern int  nfs_async_unlink(struct inode *dir, struct dentry *dentry);
 extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
 extern void nfs_block_sillyrename(struct dentry *dentry);
 extern void nfs_unblock_sillyrename(struct dentry *dentry);
+extern int  nfs_sillyrename(struct inode *dir, struct dentry *dentry);
 
 /*
  * linux/fs/nfs/write.c
@@ -584,10 +590,6 @@ nfs_fileid_to_ino_t(u64 fileid)
        return ino;
 }
 
-/* NFS root */
-
-extern void * nfs_root_data(void);
-
 #define nfs_wait_event(clnt, wq, condition)                            \
 ({                                                                     \
        int __retval = wait_event_killable(wq, condition);              \
index c82ee7c..5eef862 100644 (file)
@@ -124,6 +124,7 @@ struct nfs_server {
 
        struct nfs_fsid         fsid;
        __u64                   maxfilesize;    /* maximum file size */
+       struct timespec         time_delta;     /* smallest time granularity */
        unsigned long           mount_time;     /* when this fs was mounted */
        dev_t                   s_dev;          /* superblock dev numbers */
 
index 91a1c24..e8352dc 100644 (file)
@@ -66,13 +66,40 @@ struct idmap_msg {
 /* Forward declaration to make this header independent of others */
 struct nfs_client;
 
+#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
+
+int nfs_idmap_init(void);
+void nfs_idmap_quit(void);
+
+static inline int nfs_idmap_new(struct nfs_client *clp)
+{
+       return 0;
+}
+
+static inline void nfs_idmap_delete(struct nfs_client *clp)
+{
+}
+
+#else /* CONFIG_NFS_USE_NEW_IDMAPPER not set */
+
+static inline int nfs_idmap_init(void)
+{
+       return 0;
+}
+
+static inline void nfs_idmap_quit(void)
+{
+}
+
 int nfs_idmap_new(struct nfs_client *);
 void nfs_idmap_delete(struct nfs_client *);
 
+#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
+
 int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *);
 int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *);
-int nfs_map_uid_to_name(struct nfs_client *, __u32, char *);
-int nfs_map_gid_to_group(struct nfs_client *, __u32, char *);
+int nfs_map_uid_to_name(struct nfs_client *, __u32, char *, size_t);
+int nfs_map_gid_to_group(struct nfs_client *, __u32, char *, size_t);
 
 extern unsigned int nfs_idmap_cache_timeout;
 #endif /* __KERNEL__ */
index 5d59ae8..576bddd 100644 (file)
@@ -71,4 +71,7 @@ struct nfs_mount_data {
 #define NFS_MOUNT_NORESVPORT           0x40000
 #define NFS_MOUNT_LEGACY_INTERFACE     0x80000
 
+#define NFS_MOUNT_LOCAL_FLOCK  0x100000
+#define NFS_MOUNT_LOCAL_FCNTL  0x200000
+
 #endif
index fc46192..da7a130 100644 (file)
@@ -112,6 +112,7 @@ struct nfs_fsinfo {
        __u32                   wtmult; /* writes should be multiple of this */
        __u32                   dtpref; /* pref. readdir transfer size */
        __u64                   maxfilesize;
+       struct timespec         time_delta; /* server time granularity */
        __u32                   lease_time; /* in seconds */
 };
 
@@ -170,7 +171,7 @@ struct nfs4_sequence_args {
 
 struct nfs4_sequence_res {
        struct nfs4_session     *sr_session;
-       u8                      sr_slotid;      /* slot used to send request */
+       struct nfs4_slot        *sr_slot;       /* slot used to send request */
        int                     sr_status;      /* sequence operation status */
        unsigned long           sr_renewal_time;
        u32                     sr_status_flags;
@@ -399,6 +400,27 @@ struct nfs_removeres {
        struct nfs4_sequence_res        seq_res;
 };
 
+/*
+ * Common arguments to the rename call
+ */
+struct nfs_renameargs {
+       const struct nfs_fh             *old_dir;
+       const struct nfs_fh             *new_dir;
+       const struct qstr               *old_name;
+       const struct qstr               *new_name;
+       const u32                       *bitmask;
+       struct nfs4_sequence_args       seq_args;
+};
+
+struct nfs_renameres {
+       const struct nfs_server         *server;
+       struct nfs4_change_info         old_cinfo;
+       struct nfs_fattr                *old_fattr;
+       struct nfs4_change_info         new_cinfo;
+       struct nfs_fattr                *new_fattr;
+       struct nfs4_sequence_res        seq_res;
+};
+
 /*
  * Argument struct for decode_entry function
  */
@@ -434,15 +456,6 @@ struct nfs_createargs {
        struct iattr *          sattr;
 };
 
-struct nfs_renameargs {
-       struct nfs_fh *         fromfh;
-       const char *            fromname;
-       unsigned int            fromlen;
-       struct nfs_fh *         tofh;
-       const char *            toname;
-       unsigned int            tolen;
-};
-
 struct nfs_setattrargs {
        struct nfs_fh *                 fh;
        nfs4_stateid                    stateid;
@@ -586,15 +599,6 @@ struct nfs3_mknodargs {
        dev_t                   rdev;
 };
 
-struct nfs3_renameargs {
-       struct nfs_fh *         fromfh;
-       const char *            fromname;
-       unsigned int            fromlen;
-       struct nfs_fh *         tofh;
-       const char *            toname;
-       unsigned int            tolen;
-};
-
 struct nfs3_linkargs {
        struct nfs_fh *         fromfh;
        struct nfs_fh *         tofh;
@@ -629,11 +633,6 @@ struct nfs3_readlinkargs {
        struct page **          pages;
 };
 
-struct nfs3_renameres {
-       struct nfs_fattr *      fromattr;
-       struct nfs_fattr *      toattr;
-};
-
 struct nfs3_linkres {
        struct nfs_fattr *      dir_attr;
        struct nfs_fattr *      fattr;
@@ -780,6 +779,7 @@ struct nfs4_readdir_arg {
        struct page **                  pages;  /* zero-copy data */
        unsigned int                    pgbase; /* zero-copy data */
        const u32 *                     bitmask;
+       int                             plus;
        struct nfs4_sequence_args       seq_args;
 };
 
@@ -801,24 +801,6 @@ struct nfs4_readlink_res {
        struct nfs4_sequence_res        seq_res;
 };
 
-struct nfs4_rename_arg {
-       const struct nfs_fh *           old_dir;
-       const struct nfs_fh *           new_dir;
-       const struct qstr *             old_name;
-       const struct qstr *             new_name;
-       const u32 *                     bitmask;
-       struct nfs4_sequence_args       seq_args;
-};
-
-struct nfs4_rename_res {
-       const struct nfs_server *       server;
-       struct nfs4_change_info         old_cinfo;
-       struct nfs_fattr *              old_fattr;
-       struct nfs4_change_info         new_cinfo;
-       struct nfs_fattr *              new_fattr;
-       struct nfs4_sequence_res        seq_res;
-};
-
 #define NFS4_SETCLIENTID_NAMELEN       (127)
 struct nfs4_setclientid {
        const nfs4_verifier *           sc_verifier;
@@ -1032,19 +1014,21 @@ struct nfs_rpc_ops {
        int     (*readlink)(struct inode *, struct page *, unsigned int,
                            unsigned int);
        int     (*create)  (struct inode *, struct dentry *,
-                           struct iattr *, int, struct nameidata *);
+                           struct iattr *, int, struct nfs_open_context *);
        int     (*remove)  (struct inode *, struct qstr *);
        void    (*unlink_setup)  (struct rpc_message *, struct inode *dir);
        int     (*unlink_done) (struct rpc_task *, struct inode *);
        int     (*rename)  (struct inode *, struct qstr *,
                            struct inode *, struct qstr *);
+       void    (*rename_setup)  (struct rpc_message *msg, struct inode *dir);
+       int     (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir);
        int     (*link)    (struct inode *, struct inode *, struct qstr *);
        int     (*symlink) (struct inode *, struct dentry *, struct page *,
                            unsigned int, struct iattr *);
        int     (*mkdir)   (struct inode *, struct dentry *, struct iattr *);
        int     (*rmdir)   (struct inode *, struct qstr *);
        int     (*readdir) (struct dentry *, struct rpc_cred *,
-                           u64, struct page *, unsigned int, int);
+                           u64, struct page **, unsigned int, int);
        int     (*mknod)   (struct inode *, struct dentry *, struct iattr *,
                            dev_t);
        int     (*statfs)  (struct nfs_server *, struct nfs_fh *,
@@ -1054,7 +1038,7 @@ struct nfs_rpc_ops {
        int     (*pathconf) (struct nfs_server *, struct nfs_fh *,
                             struct nfs_pathconf *);
        int     (*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-       __be32 *(*decode_dirent)(__be32 *, struct nfs_entry *, int plus);
+       __be32 *(*decode_dirent)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int plus);
        void    (*read_setup)   (struct nfs_read_data *, struct rpc_message *);
        int     (*read_done)  (struct rpc_task *, struct nfs_read_data *);
        void    (*write_setup)  (struct nfs_write_data *, struct rpc_message *);
@@ -1065,6 +1049,10 @@ struct nfs_rpc_ops {
        int     (*lock_check_bounds)(const struct file_lock *);
        void    (*clear_acl_cache)(struct inode *);
        void    (*close_context)(struct nfs_open_context *ctx, int);
+       struct inode * (*open_context) (struct inode *dir,
+                               struct nfs_open_context *ctx,
+                               int open_flags,
+                               struct iattr *iattr);
 };
 
 /*
index 85f38a6..c83df09 100644 (file)
@@ -137,7 +137,6 @@ int         rpcb_register(u32, u32, int, unsigned short);
 int            rpcb_v4_register(const u32 program, const u32 version,
                                 const struct sockaddr *address,
                                 const char *netid);
-int            rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
 void           rpcb_getport_async(struct rpc_task *);
 
 void           rpc_call_start(struct rpc_task *);
index 35cf2e8..ab91d86 100644 (file)
@@ -108,6 +108,7 @@ void        xdr_encode_pages(struct xdr_buf *, struct page **, unsigned int,
                         unsigned int);
 void   xdr_inline_pages(struct xdr_buf *, unsigned int,
                         struct page **, unsigned int, unsigned int);
+void   xdr_terminate_string(struct xdr_buf *, const u32);
 
 static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
 {
@@ -200,6 +201,7 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
                unsigned int base, unsigned int len);
 extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern __be32 *xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes);
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
index 42db055..62a47ea 100644 (file)
@@ -361,13 +361,13 @@ out:
 #ifdef CONFIG_ROOT_NFS
 static int __init mount_nfs_root(void)
 {
-       void *data = nfs_root_data();
+       char *root_dev, *root_data;
 
-       create_dev("/dev/root", ROOT_DEV);
-       if (data &&
-           do_mount_root("/dev/root", "nfs", root_mountflags, data) == 0)
-               return 1;
-       return 0;
+       if (nfs_root_data(&root_dev, &root_data) != 0)
+               return 0;
+       if (do_mount_root(root_dev, "nfs", root_mountflags, root_data) != 0)
+               return 0;
+       return 1;
 }
 #endif
 
index e9eaaf7..68192e5 100644 (file)
@@ -595,7 +595,7 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp,
 int
 rpcauth_refreshcred(struct rpc_task *task)
 {
-       struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+       struct rpc_cred *cred;
        int err;
 
        cred = task->tk_rqstp->rq_cred;
index fa55490..cbc5b8c 100644 (file)
@@ -1675,7 +1675,7 @@ rpc_verify_header(struct rpc_task *task)
                        rpcauth_invalcred(task);
                        /* Ensure we obtain a new XID! */
                        xprt_release(task);
-                       task->tk_action = call_refresh;
+                       task->tk_action = call_reserve;
                        goto out_retry;
                case RPC_AUTH_BADCRED:
                case RPC_AUTH_BADVERF:
index dac219a..63ec116 100644 (file)
@@ -211,8 +211,9 @@ static int rpcb_create_local(void)
         */
        clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
        if (IS_ERR(clnt4)) {
-               dprintk("RPC:       failed to create local rpcbind v4 "
-                               "cleint (errno %ld).\n", PTR_ERR(clnt4));
+               dprintk("RPC:       failed to bind second program to "
+                               "rpcbind v4 client (errno %ld).\n",
+                               PTR_ERR(clnt4));
                clnt4 = NULL;
        }
 
@@ -475,57 +476,6 @@ int rpcb_v4_register(const u32 program, const u32 version,
        return -EAFNOSUPPORT;
 }
 
-/**
- * rpcb_getport_sync - obtain the port for an RPC service on a given host
- * @sin: address of remote peer
- * @prog: RPC program number to bind
- * @vers: RPC version number to bind
- * @prot: transport protocol to use to make this request
- *
- * Return value is the requested advertised port number,
- * or a negative errno value.
- *
- * Called from outside the RPC client in a synchronous task context.
- * Uses default timeout parameters specified by underlying transport.
- *
- * XXX: Needs to support IPv6
- */
-int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
-{
-       struct rpcbind_args map = {
-               .r_prog         = prog,
-               .r_vers         = vers,
-               .r_prot         = prot,
-               .r_port         = 0,
-       };
-       struct rpc_message msg = {
-               .rpc_proc       = &rpcb_procedures2[RPCBPROC_GETPORT],
-               .rpc_argp       = &map,
-               .rpc_resp       = &map,
-       };
-       struct rpc_clnt *rpcb_clnt;
-       int status;
-
-       dprintk("RPC:       %s(%pI4, %u, %u, %d)\n",
-               __func__, &sin->sin_addr.s_addr, prog, vers, prot);
-
-       rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
-                               sizeof(*sin), prot, RPCBVERS_2);
-       if (IS_ERR(rpcb_clnt))
-               return PTR_ERR(rpcb_clnt);
-
-       status = rpc_call_sync(rpcb_clnt, &msg, 0);
-       rpc_shutdown_client(rpcb_clnt);
-
-       if (status >= 0) {
-               if (map.r_port != 0)
-                       return map.r_port;
-               status = -EACCES;
-       }
-       return status;
-}
-EXPORT_SYMBOL_GPL(rpcb_getport_sync);
-
 static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, struct rpc_procinfo *proc)
 {
        struct rpc_message msg = {
index aa5dbda..243fc09 100644 (file)
@@ -908,7 +908,7 @@ static int rpciod_start(void)
         * Create the rpciod thread and wait for it to start.
         */
        dprintk("RPC:       creating workqueue rpciod\n");
-       wq = create_workqueue("rpciod");
+       wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
        rpciod_workqueue = wq;
        return rpciod_workqueue != NULL;
 }
index a1f82a8..cd9e841 100644 (file)
@@ -111,6 +111,23 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
 }
 EXPORT_SYMBOL_GPL(xdr_decode_string_inplace);
 
+/**
+ * xdr_terminate_string - '\0'-terminate a string residing in an xdr_buf
+ * @buf: XDR buffer where string resides
+ * @len: length of string, in bytes
+ *
+ */
+void
+xdr_terminate_string(struct xdr_buf *buf, const u32 len)
+{
+       char *kaddr;
+
+       kaddr = kmap_atomic(buf->pages[0], KM_USER0);
+       kaddr[buf->page_base + len] = '\0';
+       kunmap_atomic(kaddr, KM_USER0);
+}
+EXPORT_SYMBOL(xdr_terminate_string);
+
 void
 xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
                 unsigned int len)
@@ -395,24 +412,29 @@ xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
 {
        struct kvec *tail;
        size_t copy;
-       char *p;
        unsigned int pglen = buf->page_len;
+       unsigned int tailbuf_len;
 
        tail = buf->tail;
        BUG_ON (len > pglen);
 
+       tailbuf_len = buf->buflen - buf->head->iov_len - buf->page_len;
+
        /* Shift the tail first */
-       if (tail->iov_len != 0) {
-               p = (char *)tail->iov_base + len;
+       if (tailbuf_len != 0) {
+               unsigned int free_space = tailbuf_len - tail->iov_len;
+
+               if (len < free_space)
+                       free_space = len;
+               tail->iov_len += free_space;
+
+               copy = len;
                if (tail->iov_len > len) {
-                       copy = tail->iov_len - len;
-                       memmove(p, tail->iov_base, copy);
+                       char *p = (char *)tail->iov_base + len;
+                       memmove(p, tail->iov_base, tail->iov_len - len);
                } else
-                       buf->buflen -= len;
-               /* Copy from the inlined pages into the tail */
-               copy = len;
-               if (copy > tail->iov_len)
                        copy = tail->iov_len;
+               /* Copy from the inlined pages into the tail */
                _copy_from_pages((char *)tail->iov_base,
                                buf->pages, buf->page_base + pglen - len,
                                copy);
@@ -550,6 +572,27 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
 }
 EXPORT_SYMBOL_GPL(xdr_init_decode);
 
+/**
+ * xdr_inline_peek - Allow read-ahead in the XDR data stream
+ * @xdr: pointer to xdr_stream struct
+ * @nbytes: number of bytes of data to decode
+ *
+ * Check if the input buffer is long enough to enable us to decode
+ * 'nbytes' more bytes of data starting at the current position.
+ * If so return the current pointer without updating the current
+ * pointer position.
+ */
+__be32 * xdr_inline_peek(struct xdr_stream *xdr, size_t nbytes)
+{
+       __be32 *p = xdr->p;
+       __be32 *q = p + XDR_QUADLEN(nbytes);
+
+       if (unlikely(q > xdr->end || q < p))
+               return NULL;
+       return p;
+}
+EXPORT_SYMBOL_GPL(xdr_inline_peek);
+
 /**
  * xdr_inline_decode - Retrieve non-page XDR data to decode
  * @xdr: pointer to xdr_stream struct