Merge branch 'master' into devel and apply fixup from Stephen Rothwell:
authorStephen Rothwell <sfr@canb.auug.org.au>
Mon, 25 Jul 2011 17:59:46 +0000 (13:59 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Mon, 25 Jul 2011 18:53:52 +0000 (14:53 -0400)
vfs/nfs: fixup for nfs_open_context change

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
42 files changed:
fs/lockd/clntproc.c
fs/nfs/Kconfig
fs/nfs/callback_proc.c
fs/nfs/client.c
fs/nfs/internal.h
fs/nfs/namespace.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4filelayout.c
fs/nfs/nfs4filelayout.h
fs/nfs/nfs4filelayoutdev.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/nfs4xdr.c
fs/nfs/objlayout/objio_osd.c
fs/nfs/pagelist.c
fs/nfs/pnfs.c
fs/nfs/pnfs.h
fs/nfs/pnfs_dev.c
fs/nfs/read.c
fs/nfs/unlink.c
fs/nfs/write.c
include/linux/nfs4.h
include/linux/nfs_fs_sb.h
include/linux/nfs_page.h
include/linux/nfs_xdr.h
include/linux/pnfs_osd_xdr.h
include/linux/sunrpc/bc_xprt.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/xprt.h
net/sunrpc/Kconfig
net/sunrpc/Makefile
net/sunrpc/backchannel_rqst.c
net/sunrpc/bc_svc.c
net/sunrpc/clnt.c
net/sunrpc/sched.c
net/sunrpc/svc.c
net/sunrpc/svcsock.c
net/sunrpc/xdr.c
net/sunrpc/xprt.c
net/sunrpc/xprtrdma/transport.c
net/sunrpc/xprtsock.c

index e374050..8392cb8 100644 (file)
@@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
                                /* We appear to be out of the grace period */
                                wake_up_all(&host->h_gracewait);
                        }
-                       dprintk("lockd: server returns status %d\n", resp->status);
+                       dprintk("lockd: server returns status %d\n",
+                               ntohl(resp->status));
                        return 0;       /* Okay, call complete */
                }
 
@@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
                goto out;
 
        if (resp->status != nlm_lck_denied_nolocks)
-               printk("lockd: unexpected unlock status: %d\n", resp->status);
+               printk("lockd: unexpected unlock status: %d\n",
+                       ntohl(resp->status));
        /* What to do now? I'm out of my depth... */
        status = -ENOLCK;
 out:
@@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status)
                return -ENOLCK;
 #endif
        }
-       printk(KERN_NOTICE "lockd: unexpected server status %d\n", status);
+       printk(KERN_NOTICE "lockd: unexpected server status %d\n",
+                ntohl(status));
        return -ENOLCK;
 }
index 8151554..2cde5d9 100644 (file)
@@ -77,6 +77,7 @@ config NFS_V4
 config NFS_V4_1
        bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
        depends on NFS_FS && NFS_V4 && EXPERIMENTAL
+       select SUNRPC_BACKCHANNEL
        select PNFS_FILE_LAYOUT
        help
          This option enables support for minor version 1 of the NFSv4 protocol
index d4d1954..74780f9 100644 (file)
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
 static u32 initiate_file_draining(struct nfs_client *clp,
                                  struct cb_layoutrecallargs *args)
 {
+       struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
        struct inode *ino;
        bool found = false;
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp,
        LIST_HEAD(free_me_list);
 
        spin_lock(&clp->cl_lock);
-       list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
-               if (nfs_compare_fh(&args->cbl_fh,
-                                  &NFS_I(lo->plh_inode)->fh))
-                       continue;
-               ino = igrab(lo->plh_inode);
-               if (!ino)
-                       continue;
-               found = true;
-               /* Without this, layout can be freed as soon
-                * as we release cl_lock.
-                */
-               get_layout_hdr(lo);
-               break;
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+               list_for_each_entry(lo, &server->layouts, plh_layouts) {
+                       if (nfs_compare_fh(&args->cbl_fh,
+                                          &NFS_I(lo->plh_inode)->fh))
+                               continue;
+                       ino = igrab(lo->plh_inode);
+                       if (!ino)
+                               continue;
+                       found = true;
+                       /* Without this, layout can be freed as soon
+                        * as we release cl_lock.
+                        */
+                       get_layout_hdr(lo);
+                       break;
+               }
+               if (found)
+                       break;
        }
+       rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
+
        if (!found)
                return NFS4ERR_NOMATCHING_LAYOUT;
 
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
 static u32 initiate_bulk_draining(struct nfs_client *clp,
                                  struct cb_layoutrecallargs *args)
 {
+       struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
        struct inode *ino;
        u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
        };
 
        spin_lock(&clp->cl_lock);
-       list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                if ((args->cbl_recall_type == RETURN_FSID) &&
-                   memcmp(&NFS_SERVER(lo->plh_inode)->fsid,
-                          &args->cbl_fsid, sizeof(struct nfs_fsid)))
-                       continue;
-               if (!igrab(lo->plh_inode))
+                   memcmp(&server->fsid, &args->cbl_fsid,
+                          sizeof(struct nfs_fsid)))
                        continue;
-               get_layout_hdr(lo);
-               BUG_ON(!list_empty(&lo->plh_bulk_recall));
-               list_add(&lo->plh_bulk_recall, &recall_list);
+
+               list_for_each_entry(lo, &server->layouts, plh_layouts) {
+                       if (!igrab(lo->plh_inode))
+                               continue;
+                       get_layout_hdr(lo);
+                       BUG_ON(!list_empty(&lo->plh_bulk_recall));
+                       list_add(&lo->plh_bulk_recall, &recall_list);
+               }
        }
+       rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
+
        list_for_each_entry_safe(lo, tmp,
                                 &recall_list, plh_bulk_recall) {
                ino = lo->plh_inode;
index b3dc2b8..19ea7d9 100644 (file)
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
        cred = rpc_lookup_machine_cred();
        if (!IS_ERR(cred))
                clp->cl_machine_cred = cred;
-#if defined(CONFIG_NFS_V4_1)
-       INIT_LIST_HEAD(&clp->cl_layouts);
-#endif
        nfs_fscache_get_client_cookie(clp);
 
        return clp;
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp)
        nfs4_deviceid_purge_client(clp);
 
        kfree(clp->cl_hostname);
+       kfree(clp->server_scope);
        kfree(clp);
 
        dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void)
        INIT_LIST_HEAD(&server->client_link);
        INIT_LIST_HEAD(&server->master_link);
        INIT_LIST_HEAD(&server->delegations);
+       INIT_LIST_HEAD(&server->layouts);
 
        atomic_set(&server->active, 0);
 
@@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
        dprintk("<-- %s %p\n", __func__, clp);
        return clp;
 }
-EXPORT_SYMBOL(nfs4_set_ds_client);
+EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
 
 /*
  * Session has been established, and the client marked ready.
index 2a55347..ab12913 100644 (file)
@@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb);
 extern char *nfs_path(char **p, struct dentry *dentry,
                      char *buffer, ssize_t buflen);
 extern struct vfsmount *nfs_d_automount(struct path *path);
+#ifdef CONFIG_NFS_V4
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
+#endif
 
 /* getroot.c */
 extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
 extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
 #endif
 
+struct nfs_pageio_descriptor;
 /* read.c */
 extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
                             const struct rpc_call_ops *call_ops);
 extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
+extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
+               struct list_head *head);
+
+extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_readdata_release(struct nfs_read_data *rdata);
 
 /* write.c */
+extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
+               struct list_head *head);
+extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
+extern void nfs_writedata_release(struct nfs_write_data *wdata);
 extern void nfs_commit_free(struct nfs_write_data *p);
 extern int nfs_initiate_write(struct nfs_write_data *data,
                              struct rpc_clnt *clnt,
index 1f063ba..8102391 100644 (file)
@@ -119,7 +119,7 @@ Elong:
 }
 
 #ifdef CONFIG_NFS_V4
-static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
+rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
 {
        struct gss_api_mech *mech;
        struct xdr_netobj oid;
index b788f2e..1909ee8 100644 (file)
@@ -48,6 +48,7 @@ enum nfs4_client_state {
        NFS4CLNT_SESSION_RESET,
        NFS4CLNT_RECALL_SLOT,
        NFS4CLNT_LEASE_CONFIRM,
+       NFS4CLNT_SERVER_SCOPE_MISMATCH,
 };
 
 enum nfs4_session_state {
@@ -66,6 +67,8 @@ struct nfs4_minor_version_ops {
                        int cache_reply);
        int     (*validate_stateid)(struct nfs_delegation *,
                        const nfs4_stateid *);
+       int     (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
+                       struct nfs_fsinfo *);
        const struct nfs4_state_recovery_ops *reboot_recovery_ops;
        const struct nfs4_state_recovery_ops *nograce_recovery_ops;
        const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
 extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
 extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
 extern void nfs41_handle_recall_slot(struct nfs_client *clp);
+extern void nfs41_handle_server_scope(struct nfs_client *,
+                                     struct server_scope **);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
 extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
 extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
index f9d03ab..be93a62 100644 (file)
@@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
                __func__, data->inode->i_ino,
                data->args.pgbase, (size_t)data->args.count, offset);
 
+       if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+               return PNFS_NOT_ATTEMPTED;
+
        /* Retrieve the correct rpc_client for the byte range */
        j = nfs4_fl_calc_j_index(lseg, offset);
        idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
                set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
                return PNFS_NOT_ATTEMPTED;
        }
-       dprintk("%s USE DS:ip %x %hu\n", __func__,
-               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+       dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
 
        /* No multipath support. Use first DS */
        data->ds_clp = ds->ds_clp;
@@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
        struct nfs_fh *fh;
        int status;
 
+       if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
+               return PNFS_NOT_ATTEMPTED;
+
        /* Retrieve the correct rpc_client for the byte range */
        j = nfs4_fl_calc_j_index(lseg, offset);
        idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
                set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
                return PNFS_NOT_ATTEMPTED;
        }
-       dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
+       dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
                data->inode->i_ino, sync, (size_t) data->args.count, offset,
-               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+               ds->ds_remotestr);
 
        data->write_done_cb = filelayout_write_done_cb;
        data->ds_clp = ds->ds_clp;
@@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
 
        dprintk("--> %s\n", __func__);
 
+       /* FIXME: remove this check when layout segment support is added */
+       if (lgr->range.offset != 0 ||
+           lgr->range.length != NFS4_MAX_UINT64) {
+               dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
+                       __func__);
+               goto out;
+       }
+
        if (fl->pattern_offset > lgr->range.offset) {
                dprintk("%s pattern_offset %lld too large\n",
                                __func__, fl->pattern_offset);
@@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
                        goto out;
        } else
                dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
+       /* Found deviceid is being reaped */
+       if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
+                       goto out_put;
+
        fl->dsaddr = dsaddr;
 
        if (fl->first_stripe_index < 0 ||
@@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
  * return true  : coalesce page
  * return false : don't coalesce page
  */
-bool
+static bool
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
                   struct nfs_page *req)
 {
@@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
            !nfs_generic_pg_test(pgio, prev, req))
                return false;
 
-       if (!pgio->pg_lseg)
-               return 1;
        p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
        r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
        stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
        return (p_stripe == r_stripe);
 }
 
+void
+filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
+                       struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          0,
+                                          NFS4_MAX_UINT64,
+                                          IOMODE_READ,
+                                          GFP_KERNEL);
+       /* If no lseg, fall back to read through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_read_mds(pgio);
+}
+
+void
+filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
+                        struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          0,
+                                          NFS4_MAX_UINT64,
+                                          IOMODE_RW,
+                                          GFP_NOFS);
+       /* If no lseg, fall back to write through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_write_mds(pgio);
+}
+
+static const struct nfs_pageio_ops filelayout_pg_read_ops = {
+       .pg_init = filelayout_pg_init_read,
+       .pg_test = filelayout_pg_test,
+       .pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops filelayout_pg_write_ops = {
+       .pg_init = filelayout_pg_init_write,
+       .pg_test = filelayout_pg_test,
+       .pg_doio = pnfs_generic_pg_writepages,
+};
+
 static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
 {
        return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
        .owner                  = THIS_MODULE,
        .alloc_lseg             = filelayout_alloc_lseg,
        .free_lseg              = filelayout_free_lseg,
-       .pg_test                = filelayout_pg_test,
+       .pg_read_ops            = &filelayout_pg_read_ops,
+       .pg_write_ops           = &filelayout_pg_write_ops,
        .mark_pnfs_commit       = filelayout_mark_pnfs_commit,
        .choose_commit_list     = filelayout_choose_commit_list,
        .commit_pagelist        = filelayout_commit_pagelist,
@@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void)
        pnfs_unregister_layoutdriver(&filelayout_type);
 }
 
+MODULE_ALIAS("nfs-layouttype4-1");
+
 module_init(nfs4filelayout_init);
 module_exit(nfs4filelayout_exit);
index cebe01e..2e42284 100644 (file)
@@ -47,10 +47,17 @@ enum stripetype4 {
 };
 
 /* Individual ip address */
+struct nfs4_pnfs_ds_addr {
+       struct sockaddr_storage da_addr;
+       size_t                  da_addrlen;
+       struct list_head        da_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
+       char                    *da_remotestr;  /* human readable addr+port */
+};
+
 struct nfs4_pnfs_ds {
        struct list_head        ds_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
-       u32                     ds_ip_addr;
-       u32                     ds_port;
+       char                    *ds_remotestr;  /* comma sep list of addrs */
+       struct list_head        ds_addrs;
        struct nfs_client       *ds_clp;
        atomic_t                ds_count;
 };
@@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
                            generic_hdr);
 }
 
+static inline struct nfs4_deviceid_node *
+FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
+{
+       return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
+}
+
 extern struct nfs_fh *
 nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
 
index 3b7bf13..ed388aa 100644 (file)
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds)
                printk("%s NULL device\n", __func__);
                return;
        }
-       printk("        ip_addr %x port %hu\n"
+       printk("        ds %s\n"
                "        ref count %d\n"
                "        client %p\n"
                "        cl_exchange_flags %x\n",
-               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+               ds->ds_remotestr,
                atomic_read(&ds->ds_count), ds->ds_clp,
                ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
 }
 
-/* nfs4_ds_cache_lock is held */
-static struct nfs4_pnfs_ds *
-_data_server_lookup_locked(u32 ip_addr, u32 port)
+static bool
+same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
 {
-       struct nfs4_pnfs_ds *ds;
+       struct sockaddr_in *a, *b;
+       struct sockaddr_in6 *a6, *b6;
+
+       if (addr1->sa_family != addr2->sa_family)
+               return false;
+
+       switch (addr1->sa_family) {
+       case AF_INET:
+               a = (struct sockaddr_in *)addr1;
+               b = (struct sockaddr_in *)addr2;
+
+               if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
+                   a->sin_port == b->sin_port)
+                       return true;
+               break;
+
+       case AF_INET6:
+               a6 = (struct sockaddr_in6 *)addr1;
+               b6 = (struct sockaddr_in6 *)addr2;
+
+               /* LINKLOCAL addresses must have matching scope_id */
+               if (ipv6_addr_scope(&a6->sin6_addr) ==
+                   IPV6_ADDR_SCOPE_LINKLOCAL &&
+                   a6->sin6_scope_id != b6->sin6_scope_id)
+                       return false;
+
+               if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
+                   a6->sin6_port == b6->sin6_port)
+                       return true;
+               break;
+
+       default:
+               dprintk("%s: unhandled address family: %u\n",
+                       __func__, addr1->sa_family);
+               return false;
+       }
 
-       dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
-                       ntohl(ip_addr), ntohs(port));
+       return false;
+}
 
-       list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
-               if (ds->ds_ip_addr == ip_addr &&
-                   ds->ds_port == port) {
-                       return ds;
+/*
+ * Lookup DS by addresses.  The first matching address returns true.
+ * nfs4_ds_cache_lock is held
+ */
+static struct nfs4_pnfs_ds *
+_data_server_lookup_locked(struct list_head *dsaddrs)
+{
+       struct nfs4_pnfs_ds *ds;
+       struct nfs4_pnfs_ds_addr *da1, *da2;
+
+       list_for_each_entry(da1, dsaddrs, da_node) {
+               list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
+                       list_for_each_entry(da2, &ds->ds_addrs, da_node) {
+                               if (same_sockaddr(
+                                       (struct sockaddr *)&da1->da_addr,
+                                       (struct sockaddr *)&da2->da_addr))
+                                       return ds;
+                       }
                }
        }
        return NULL;
 }
 
+/*
+ * Compare two lists of addresses.
+ */
+static bool
+_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
+                                   struct list_head *dsaddrs2)
+{
+       struct nfs4_pnfs_ds_addr *da1, *da2;
+       size_t count1 = 0,
+              count2 = 0;
+
+       list_for_each_entry(da1, dsaddrs1, da_node)
+               count1++;
+
+       list_for_each_entry(da2, dsaddrs2, da_node) {
+               bool found = false;
+               count2++;
+               list_for_each_entry(da1, dsaddrs1, da_node) {
+                       if (same_sockaddr((struct sockaddr *)&da1->da_addr,
+                               (struct sockaddr *)&da2->da_addr)) {
+                               found = true;
+                               break;
+                       }
+               }
+               if (!found)
+                       return false;
+       }
+
+       return (count1 == count2);
+}
+
 /*
  * Create an rpc connection to the nfs4_pnfs_ds data server
- * Currently only support IPv4
+ * Currently only supports IPv4 and IPv6 addresses
  */
 static int
 nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
 {
-       struct nfs_client *clp;
-       struct sockaddr_in sin;
+       struct nfs_client *clp = ERR_PTR(-EIO);
+       struct nfs4_pnfs_ds_addr *da;
        int status = 0;
 
-       dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
-               ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
+       dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
                mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
 
-       sin.sin_family = AF_INET;
-       sin.sin_addr.s_addr = ds->ds_ip_addr;
-       sin.sin_port = ds->ds_port;
+       BUG_ON(list_empty(&ds->ds_addrs));
+
+       list_for_each_entry(da, &ds->ds_addrs, da_node) {
+               dprintk("%s: DS %s: trying address %s\n",
+                       __func__, ds->ds_remotestr, da->da_remotestr);
+
+               clp = nfs4_set_ds_client(mds_srv->nfs_client,
+                                (struct sockaddr *)&da->da_addr,
+                                da->da_addrlen, IPPROTO_TCP);
+               if (!IS_ERR(clp))
+                       break;
+       }
 
-       clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
-                                sizeof(sin), IPPROTO_TCP);
        if (IS_ERR(clp)) {
                status = PTR_ERR(clp);
                goto out;
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
                        goto out_put;
                }
                ds->ds_clp = clp;
-               dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
-                       ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
+               dprintk("%s [existing] server=%s\n", __func__,
+                       ds->ds_remotestr);
                goto out;
        }
 
@@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
                goto out_put;
 
        ds->ds_clp = clp;
-       dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
-               ntohs(ds->ds_port));
+       dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
 out:
        return status;
 out_put:
@@ -147,12 +231,25 @@ out_put:
 static void
 destroy_ds(struct nfs4_pnfs_ds *ds)
 {
+       struct nfs4_pnfs_ds_addr *da;
+
        dprintk("--> %s\n", __func__);
        ifdebug(FACILITY)
                print_ds(ds);
 
        if (ds->ds_clp)
                nfs_put_client(ds->ds_clp);
+
+       while (!list_empty(&ds->ds_addrs)) {
+               da = list_first_entry(&ds->ds_addrs,
+                                     struct nfs4_pnfs_ds_addr,
+                                     da_node);
+               list_del_init(&da->da_node);
+               kfree(da->da_remotestr);
+               kfree(da);
+       }
+
+       kfree(ds->ds_remotestr);
        kfree(ds);
 }
 
@@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
        kfree(dsaddr);
 }
 
+/*
+ * Create a string with a human readable address and port to avoid
+ * complicated setup around many dprinks.
+ */
+static char *
+nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
+{
+       struct nfs4_pnfs_ds_addr *da;
+       char *remotestr;
+       size_t len;
+       char *p;
+
+       len = 3;        /* '{', '}' and eol */
+       list_for_each_entry(da, dsaddrs, da_node) {
+               len += strlen(da->da_remotestr) + 1;    /* string plus comma */
+       }
+
+       remotestr = kzalloc(len, gfp_flags);
+       if (!remotestr)
+               return NULL;
+
+       p = remotestr;
+       *(p++) = '{';
+       len--;
+       list_for_each_entry(da, dsaddrs, da_node) {
+               size_t ll = strlen(da->da_remotestr);
+
+               if (ll > len)
+                       goto out_err;
+
+               memcpy(p, da->da_remotestr, ll);
+               p += ll;
+               len -= ll;
+
+               if (len < 1)
+                       goto out_err;
+               (*p++) = ',';
+               len--;
+       }
+       if (len < 2)
+               goto out_err;
+       *(p++) = '}';
+       *p = '\0';
+       return remotestr;
+out_err:
+       kfree(remotestr);
+       return NULL;
+}
+
 static struct nfs4_pnfs_ds *
-nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags)
+nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
 {
-       struct nfs4_pnfs_ds *tmp_ds, *ds;
+       struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
+       char *remotestr;
 
-       ds = kzalloc(sizeof(*tmp_ds), gfp_flags);
+       if (list_empty(dsaddrs)) {
+               dprintk("%s: no addresses defined\n", __func__);
+               goto out;
+       }
+
+       ds = kzalloc(sizeof(*ds), gfp_flags);
        if (!ds)
                goto out;
 
+       /* this is only used for debugging, so it's ok if its NULL */
+       remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
+
        spin_lock(&nfs4_ds_cache_lock);
-       tmp_ds = _data_server_lookup_locked(ip_addr, port);
+       tmp_ds = _data_server_lookup_locked(dsaddrs);
        if (tmp_ds == NULL) {
-               ds->ds_ip_addr = ip_addr;
-               ds->ds_port = port;
+               INIT_LIST_HEAD(&ds->ds_addrs);
+               list_splice_init(dsaddrs, &ds->ds_addrs);
+               ds->ds_remotestr = remotestr;
                atomic_set(&ds->ds_count, 1);
                INIT_LIST_HEAD(&ds->ds_node);
                ds->ds_clp = NULL;
                list_add(&ds->ds_node, &nfs4_data_server_cache);
-               dprintk("%s add new data server ip 0x%x\n", __func__,
-                       ds->ds_ip_addr);
+               dprintk("%s add new data server %s\n", __func__,
+                       ds->ds_remotestr);
        } else {
+               if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
+                                                        dsaddrs)) {
+                       dprintk("%s:  multipath address mismatch: %s != %s",
+                               __func__, tmp_ds->ds_remotestr, remotestr);
+               }
+               kfree(remotestr);
                kfree(ds);
                atomic_inc(&tmp_ds->ds_count);
-               dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
-                       __func__, tmp_ds->ds_ip_addr,
+               dprintk("%s data server %s found, inc'ed ds_count to %d\n",
+                       __func__, tmp_ds->ds_remotestr,
                        atomic_read(&tmp_ds->ds_count));
                ds = tmp_ds;
        }
@@ -213,18 +375,22 @@ out:
 }
 
 /*
- * Currently only support ipv4, and one multi-path address.
+ * Currently only supports ipv4, ipv6 and one multi-path address.
  */
-static struct nfs4_pnfs_ds *
-decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags)
+static struct nfs4_pnfs_ds_addr *
+decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
 {
-       struct nfs4_pnfs_ds *ds = NULL;
-       char *buf;
-       const char *ipend, *pstr;
-       u32 ip_addr, port;
-       int nlen, rlen, i;
+       struct nfs4_pnfs_ds_addr *da = NULL;
+       char *buf, *portstr;
+       u32 port;
+       int nlen, rlen;
        int tmp[2];
        __be32 *p;
+       char *netid, *match_netid;
+       size_t len, match_netid_len;
+       char *startsep = "";
+       char *endsep = "";
+
 
        /* r_netid */
        p = xdr_inline_decode(streamp, 4);
@@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
        if (unlikely(!p))
                goto out_err;
 
-       /* Check that netid is "tcp" */
-       if (nlen != 3 ||  memcmp((char *)p, "tcp", 3)) {
-               dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
+       netid = kmalloc(nlen+1, gfp_flags);
+       if (unlikely(!netid))
                goto out_err;
-       }
 
-       /* r_addr */
+       netid[nlen] = '\0';
+       memcpy(netid, p, nlen);
+
+       /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
        p = xdr_inline_decode(streamp, 4);
        if (unlikely(!p))
-               goto out_err;
+               goto out_free_netid;
        rlen = be32_to_cpup(p);
 
        p = xdr_inline_decode(streamp, rlen);
        if (unlikely(!p))
-               goto out_err;
+               goto out_free_netid;
 
-       /* ipv6 length plus port is legal */
-       if (rlen > INET6_ADDRSTRLEN + 8) {
+       /* port is ".ABC.DEF", 8 chars max */
+       if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
                dprintk("%s: Invalid address, length %d\n", __func__,
                        rlen);
-               goto out_err;
+               goto out_free_netid;
        }
        buf = kmalloc(rlen + 1, gfp_flags);
        if (!buf) {
                dprintk("%s: Not enough memory\n", __func__);
-               goto out_err;
+               goto out_free_netid;
        }
        buf[rlen] = '\0';
        memcpy(buf, p, rlen);
 
-       /* replace the port dots with dashes for the in4_pton() delimiter*/
-       for (i = 0; i < 2; i++) {
-               char *res = strrchr(buf, '.');
-               if (!res) {
-                       dprintk("%s: Failed finding expected dots in port\n",
-                               __func__);
-                       goto out_free;
-               }
-               *res = '-';
+       /* replace port '.' with '-' */
+       portstr = strrchr(buf, '.');
+       if (!portstr) {
+               dprintk("%s: Failed finding expected dot in port\n",
+                       __func__);
+               goto out_free_buf;
+       }
+       *portstr = '-';
+
+       /* find '.' between address and port */
+       portstr = strrchr(buf, '.');
+       if (!portstr) {
+               dprintk("%s: Failed finding expected dot between address and "
+                       "port\n", __func__);
+               goto out_free_buf;
        }
+       *portstr = '\0';
 
-       /* Currently only support ipv4 address */
-       if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
-               dprintk("%s: Only ipv4 addresses supported\n", __func__);
-               goto out_free;
+       da = kzalloc(sizeof(*da), gfp_flags);
+       if (unlikely(!da))
+               goto out_free_buf;
+
+       INIT_LIST_HEAD(&da->da_node);
+
+       if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
+                     sizeof(da->da_addr))) {
+               dprintk("%s: error parsing address %s\n", __func__, buf);
+               goto out_free_da;
        }
 
-       /* port */
-       pstr = ipend;
-       sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
+       portstr++;
+       sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
        port = htons((tmp[0] << 8) | (tmp[1]));
 
-       ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags);
-       dprintk("%s: Decoded address and port %s\n", __func__, buf);
-out_free:
+       switch (da->da_addr.ss_family) {
+       case AF_INET:
+               ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
+               da->da_addrlen = sizeof(struct sockaddr_in);
+               match_netid = "tcp";
+               match_netid_len = 3;
+               break;
+
+       case AF_INET6:
+               ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
+               da->da_addrlen = sizeof(struct sockaddr_in6);
+               match_netid = "tcp6";
+               match_netid_len = 4;
+               startsep = "[";
+               endsep = "]";
+               break;
+
+       default:
+               dprintk("%s: unsupported address family: %u\n",
+                       __func__, da->da_addr.ss_family);
+               goto out_free_da;
+       }
+
+       if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
+               dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
+                       __func__, netid, match_netid);
+               goto out_free_da;
+       }
+
+       /* save human readable address */
+       len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
+       da->da_remotestr = kzalloc(len, gfp_flags);
+
+       /* NULL is ok, only used for dprintk */
+       if (da->da_remotestr)
+               snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
+                        buf, endsep, ntohs(port));
+
+       dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
        kfree(buf);
+       kfree(netid);
+       return da;
+
+out_free_da:
+       kfree(da);
+out_free_buf:
+       dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
+       kfree(buf);
+out_free_netid:
+       kfree(netid);
 out_err:
-       return ds;
+       return NULL;
 }
 
 /* Decode opaque device data and return the result */
@@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
        struct xdr_stream stream;
        struct xdr_buf buf;
        struct page *scratch;
+       struct list_head dsaddrs;
+       struct nfs4_pnfs_ds_addr *da;
 
        /* set up xdr stream */
        scratch = alloc_page(gfp_flags);
@@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
                                NFS_SERVER(ino)->nfs_client,
                                &pdev->dev_id);
 
+       INIT_LIST_HEAD(&dsaddrs);
+
        for (i = 0; i < dsaddr->ds_num; i++) {
                int j;
                u32 mp_count;
@@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
                        goto out_err_free_deviceid;
 
                mp_count = be32_to_cpup(p); /* multipath count */
-               if (mp_count > 1) {
-                       printk(KERN_WARNING
-                              "%s: Multipath count %d not supported, "
-                              "skipping all greater than 1\n", __func__,
-                               mp_count);
-               }
                for (j = 0; j < mp_count; j++) {
-                       if (j == 0) {
-                               dsaddr->ds_list[i] = decode_and_add_ds(&stream,
-                                       ino, gfp_flags);
-                               if (dsaddr->ds_list[i] == NULL)
-                                       goto out_err_free_deviceid;
-                       } else {
-                               u32 len;
-                               /* skip extra multipath */
-
-                               /* read len, skip */
-                               p = xdr_inline_decode(&stream, 4);
-                               if (unlikely(!p))
-                                       goto out_err_free_deviceid;
-                               len = be32_to_cpup(p);
-
-                               p = xdr_inline_decode(&stream, len);
-                               if (unlikely(!p))
-                                       goto out_err_free_deviceid;
-
-                               /* read len, skip */
-                               p = xdr_inline_decode(&stream, 4);
-                               if (unlikely(!p))
-                                       goto out_err_free_deviceid;
-                               len = be32_to_cpup(p);
-
-                               p = xdr_inline_decode(&stream, len);
-                               if (unlikely(!p))
-                                       goto out_err_free_deviceid;
-                       }
+                       da = decode_ds_addr(&stream, gfp_flags);
+                       if (da)
+                               list_add_tail(&da->da_node, &dsaddrs);
+               }
+               if (list_empty(&dsaddrs)) {
+                       dprintk("%s: no suitable DS addresses found\n",
+                               __func__);
+                       goto out_err_free_deviceid;
+               }
+
+               dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
+               if (!dsaddr->ds_list[i])
+                       goto out_err_drain_dsaddrs;
+
+               /* If DS was already in cache, free ds addrs */
+               while (!list_empty(&dsaddrs)) {
+                       da = list_first_entry(&dsaddrs,
+                                             struct nfs4_pnfs_ds_addr,
+                                             da_node);
+                       list_del_init(&da->da_node);
+                       kfree(da->da_remotestr);
+                       kfree(da);
                }
        }
 
        __free_page(scratch);
        return dsaddr;
 
+out_err_drain_dsaddrs:
+       while (!list_empty(&dsaddrs)) {
+               da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
+                                     da_node);
+               list_del_init(&da->da_node);
+               kfree(da->da_remotestr);
+               kfree(da);
+       }
 out_err_free_deviceid:
        nfs4_fl_free_deviceid(dsaddr);
        /* stripe_indicies was part of dsaddr */
@@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
 
 static void
 filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
-                              int err, u32 ds_addr)
+                              int err, const char *ds_remotestr)
 {
        u32 *p = (u32 *)&dsaddr->id_node.deviceid;
 
-       printk(KERN_ERR "NFS: data server %x connection error %d."
+       printk(KERN_ERR "NFS: data server %s connection error %d."
                " Deviceid [%x%x%x%x] marked out of use.\n",
-               ds_addr, err, p[0], p[1], p[2], p[3]);
+               ds_remotestr, err, p[0], p[1], p[2], p[3]);
 
        spin_lock(&nfs4_ds_cache_lock);
        dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
                err = nfs4_ds_connect(s, ds);
                if (err) {
                        filelayout_mark_devid_negative(dsaddr, err,
-                                                      ntohl(ds->ds_ip_addr));
+                                                      ds->ds_remotestr);
                        return NULL;
                }
        }
index 26bece8..079614d 100644 (file)
@@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
 static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
                            struct nfs_fattr *fattr, struct iattr *sattr,
                            struct nfs4_state *state);
-
+#ifdef CONFIG_NFS_V4_1
+static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
+static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
+#endif
 /* Prevent leaks of NFSv4 errors into userland */
 static int nfs4_map_errors(int err)
 {
@@ -1689,6 +1692,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
        return ret;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+       int status;
+       struct nfs_server *server = NFS_SERVER(state->inode);
+
+       status = nfs41_test_stateid(server, state);
+       if (status == NFS_OK)
+               return 0;
+       nfs41_free_stateid(server, state);
+       return nfs4_open_expired(sp, state);
+}
+#endif
+
 /*
  * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
  * fields corresponding to attributes that were used to store the verifier.
@@ -2252,13 +2269,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
 static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
                              struct nfs_fsinfo *info)
 {
+       int minor_version = server->nfs_client->cl_minorversion;
        int status = nfs4_lookup_root(server, fhandle, info);
        if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
                /*
                 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
                 * by nfs4_map_errors() as this function exits.
                 */
-               status = nfs4_find_root_sec(server, fhandle, info);
+               status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info);
        if (status == 0)
                status = nfs4_server_capabilities(server, fhandle);
        if (status == 0)
@@ -4441,6 +4459,20 @@ out:
        return err;
 }
 
+#if defined(CONFIG_NFS_V4_1)
+static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
+{
+       int status;
+       struct nfs_server *server = NFS_SERVER(state->inode);
+
+       status = nfs41_test_stateid(server, state);
+       if (status == NFS_OK)
+               return 0;
+       nfs41_free_stateid(server, state);
+       return nfs4_lock_expired(state, request);
+}
+#endif
+
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
        struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -4779,6 +4811,16 @@ out_inval:
        return -NFS4ERR_INVAL;
 }
 
+static bool
+nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
+{
+       if (a->server_scope_sz == b->server_scope_sz &&
+           memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
+               return true;
+
+       return false;
+}
+
 /*
  * nfs4_proc_exchange_id()
  *
@@ -4821,9 +4863,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                                init_utsname()->domainname,
                                clp->cl_rpcclient->cl_auth->au_flavor);
 
+       res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
+       if (unlikely(!res.server_scope))
+               return -ENOMEM;
+
        status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        if (!status)
                status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
+
+       if (!status) {
+               if (clp->server_scope &&
+                   !nfs41_same_server_scope(clp->server_scope,
+                                            res.server_scope)) {
+                       dprintk("%s: server_scope mismatch detected\n",
+                               __func__);
+                       set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
+                       kfree(clp->server_scope);
+                       clp->server_scope = NULL;
+               }
+
+               if (!clp->server_scope)
+                       clp->server_scope = res.server_scope;
+               else
+                       kfree(res.server_scope);
+       }
+
        dprintk("<-- %s status= %d\n", __func__, status);
        return status;
 }
@@ -5704,7 +5768,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 {
        struct nfs4_layoutreturn *lrp = calldata;
        struct nfs_server *server;
-       struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+       struct pnfs_layout_hdr *lo = lrp->args.layout;
 
        dprintk("--> %s\n", __func__);
 
@@ -5733,7 +5797,7 @@ static void nfs4_layoutreturn_release(void *calldata)
        struct nfs4_layoutreturn *lrp = calldata;
 
        dprintk("--> %s\n", __func__);
-       put_layout_hdr(NFS_I(lrp->args.inode)->layout);
+       put_layout_hdr(lrp->args.layout);
        kfree(calldata);
        dprintk("<-- %s\n", __func__);
 }
@@ -5901,6 +5965,143 @@ out:
        rpc_put_task(task);
        return status;
 }
+
+static int
+_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
+                   struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+{
+       struct nfs41_secinfo_no_name_args args = {
+               .style = SECINFO_STYLE_CURRENT_FH,
+       };
+       struct nfs4_secinfo_res res = {
+               .flavors = flavors,
+       };
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME],
+               .rpc_argp = &args,
+               .rpc_resp = &res,
+       };
+       return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
+}
+
+static int
+nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
+                          struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
+{
+       struct nfs4_exception exception = { };
+       int err;
+       do {
+               err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+               switch (err) {
+               case 0:
+               case -NFS4ERR_WRONGSEC:
+               case -NFS4ERR_NOTSUPP:
+                       break;
+               default:
+                       err = nfs4_handle_exception(server, err, &exception);
+               }
+       } while (exception.retry);
+       return err;
+}
+
+static int
+nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
+                   struct nfs_fsinfo *info)
+{
+       int err;
+       struct page *page;
+       rpc_authflavor_t flavor;
+       struct nfs4_secinfo_flavors *flavors;
+
+       page = alloc_page(GFP_KERNEL);
+       if (!page) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       flavors = page_address(page);
+       err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
+
+       /*
+        * Fall back on "guess and check" method if
+        * the server doesn't support SECINFO_NO_NAME
+        */
+       if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+               err = nfs4_find_root_sec(server, fhandle, info);
+               goto out_freepage;
+       }
+       if (err)
+               goto out_freepage;
+
+       flavor = nfs_find_best_sec(flavors);
+       if (err == 0)
+               err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
+
+out_freepage:
+       put_page(page);
+       if (err == -EACCES)
+               return -EPERM;
+out:
+       return err;
+}
+static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+       int status;
+       struct nfs41_test_stateid_args args = {
+               .stateid = &state->stateid,
+       };
+       struct nfs41_test_stateid_res res;
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID],
+               .rpc_argp = &args,
+               .rpc_resp = &res,
+       };
+       args.seq_args.sa_session = res.seq_res.sr_session = NULL;
+       status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+       return status;
+}
+
+static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+       struct nfs4_exception exception = { };
+       int err;
+       do {
+               err = nfs4_handle_exception(server,
+                               _nfs41_test_stateid(server, state),
+                               &exception);
+       } while (exception.retry);
+       return err;
+}
+
+static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+       int status;
+       struct nfs41_free_stateid_args args = {
+               .stateid = &state->stateid,
+       };
+       struct nfs41_free_stateid_res res;
+       struct rpc_message msg = {
+               .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID],
+               .rpc_argp = &args,
+               .rpc_resp = &res,
+       };
+
+       args.seq_args.sa_session = res.seq_res.sr_session = NULL;
+       status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
+       return status;
+}
+
+static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
+{
+       struct nfs4_exception exception = { };
+       int err;
+       do {
+               err = nfs4_handle_exception(server,
+                               _nfs4_free_stateid(server, state),
+                               &exception);
+       } while (exception.retry);
+       return err;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5937,8 +6138,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
 struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
        .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
        .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
-       .recover_open   = nfs4_open_expired,
-       .recover_lock   = nfs4_lock_expired,
+       .recover_open   = nfs41_open_expired,
+       .recover_lock   = nfs41_lock_expired,
        .establish_clid = nfs41_init_clientid,
        .get_clid_cred  = nfs4_get_exchange_id_cred,
 };
@@ -5962,6 +6163,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
        .minor_version = 0,
        .call_sync = _nfs4_call_sync,
        .validate_stateid = nfs4_validate_delegation_stateid,
+       .find_root_sec = nfs4_find_root_sec,
        .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
        .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
        .state_renewal_ops = &nfs40_state_renewal_ops,
@@ -5972,6 +6174,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
        .minor_version = 1,
        .call_sync = _nfs4_call_sync_session,
        .validate_stateid = nfs41_validate_delegation_stateid,
+       .find_root_sec = nfs41_find_root_sec,
        .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
        .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
        .state_renewal_ops = &nfs41_state_renewal_ops,
index 7acfe88..72ab97e 100644 (file)
@@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp)
                                goto out_error;
                        }
                        clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
-                       set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+
+                       if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
+                                              &clp->cl_state))
+                               nfs4_state_start_reclaim_nograce(clp);
+                       else
+                               set_bit(NFS4CLNT_RECLAIM_REBOOT,
+                                       &clp->cl_state);
+
                        pnfs_destroy_all_layouts(clp);
                }
 
index e6e8f3b..c191a9b 100644 (file)
@@ -343,6 +343,14 @@ static int nfs4_stat_to_errno(int);
                                1 /* FIXME: opaque lrf_body always empty at the moment */)
 #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
                                1 + decode_stateid_maxsz)
+#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1)
+#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz
+#define encode_test_stateid_maxsz      (op_encode_hdr_maxsz + 2 + \
+                                        XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_test_stateid_maxsz      (op_decode_hdr_maxsz + 2 + 1)
+#define encode_free_stateid_maxsz      (op_encode_hdr_maxsz + 1 + \
+                                        XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define decode_free_stateid_maxsz      (op_decode_hdr_maxsz + 1)
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz  0
 #define decode_sequence_maxsz  0
@@ -772,6 +780,26 @@ static int nfs4_stat_to_errno(int);
                                decode_sequence_maxsz + \
                                decode_putfh_maxsz + \
                                decode_layoutreturn_maxsz)
+#define NFS4_enc_secinfo_no_name_sz    (compound_encode_hdr_maxsz + \
+                                       encode_sequence_maxsz + \
+                                       encode_putrootfh_maxsz +\
+                                       encode_secinfo_no_name_maxsz)
+#define NFS4_dec_secinfo_no_name_sz    (compound_decode_hdr_maxsz + \
+                                       decode_sequence_maxsz + \
+                                       decode_putrootfh_maxsz + \
+                                       decode_secinfo_no_name_maxsz)
+#define NFS4_enc_test_stateid_sz       (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
+                                        encode_test_stateid_maxsz)
+#define NFS4_dec_test_stateid_sz       (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
+                                        decode_test_stateid_maxsz)
+#define NFS4_enc_free_stateid_sz       (compound_encode_hdr_maxsz + \
+                                        encode_sequence_maxsz + \
+                                        encode_free_stateid_maxsz)
+#define NFS4_dec_free_stateid_sz       (compound_decode_hdr_maxsz + \
+                                        decode_sequence_maxsz + \
+                                        decode_free_stateid_maxsz)
 
 const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
                                      compound_encode_hdr_maxsz +
@@ -1938,6 +1966,46 @@ encode_layoutreturn(struct xdr_stream *xdr,
        hdr->nops++;
        hdr->replen += decode_layoutreturn_maxsz;
 }
+
+static int
+encode_secinfo_no_name(struct xdr_stream *xdr,
+                      const struct nfs41_secinfo_no_name_args *args,
+                      struct compound_hdr *hdr)
+{
+       __be32 *p;
+       p = reserve_space(xdr, 8);
+       *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
+       *p++ = cpu_to_be32(args->style);
+       hdr->nops++;
+       hdr->replen += decode_secinfo_no_name_maxsz;
+       return 0;
+}
+
+static void encode_test_stateid(struct xdr_stream *xdr,
+                               struct nfs41_test_stateid_args *args,
+                               struct compound_hdr *hdr)
+{
+       __be32 *p;
+
+       p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
+       *p++ = cpu_to_be32(OP_TEST_STATEID);
+       *p++ = cpu_to_be32(1);
+       xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+       hdr->nops++;
+       hdr->replen += decode_test_stateid_maxsz;
+}
+
+static void encode_free_stateid(struct xdr_stream *xdr,
+                               struct nfs41_free_stateid_args *args,
+                               struct compound_hdr *hdr)
+{
+       __be32 *p;
+       p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
+       *p++ = cpu_to_be32(OP_FREE_STATEID);
+       xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+       hdr->nops++;
+       hdr->replen += decode_free_stateid_maxsz;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -2790,6 +2858,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
        encode_layoutreturn(xdr, args, &hdr);
        encode_nops(&hdr);
 }
+
+/*
+ * Encode SECINFO_NO_NAME request
+ */
+static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
+                                       struct xdr_stream *xdr,
+                                       struct nfs41_secinfo_no_name_args *args)
+{
+       struct compound_hdr hdr = {
+               .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_putrootfh(xdr, &hdr);
+       encode_secinfo_no_name(xdr, args, &hdr);
+       encode_nops(&hdr);
+       return 0;
+}
+
+/*
+ *  Encode TEST_STATEID request
+ */
+static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
+                                     struct xdr_stream *xdr,
+                                     struct nfs41_test_stateid_args *args)
+{
+       struct compound_hdr hdr = {
+               .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_test_stateid(xdr, args, &hdr);
+       encode_nops(&hdr);
+}
+
+/*
+ *  Encode FREE_STATEID request
+ */
+static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
+                                    struct xdr_stream *xdr,
+                                    struct nfs41_free_stateid_args *args)
+{
+       struct compound_hdr hdr = {
+               .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+       };
+
+       encode_compound_hdr(xdr, req, &hdr);
+       encode_sequence(xdr, &args->seq_args, &hdr);
+       encode_free_stateid(xdr, args, &hdr);
+       encode_nops(&hdr);
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4977,11 +5098,17 @@ static int decode_exchange_id(struct xdr_stream *xdr,
        if (unlikely(status))
                return status;
 
-       /* Throw away server_scope */
+       /* Save server_scope */
        status = decode_opaque_inline(xdr, &dummy, &dummy_str);
        if (unlikely(status))
                return status;
 
+       if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
+               return -EIO;
+
+       memcpy(res->server_scope->server_scope, dummy_str, dummy);
+       res->server_scope->server_scope_sz = dummy;
+
        /* Throw away Implementation id array */
        status = decode_opaque_inline(xdr, &dummy, &dummy_str);
        if (unlikely(status))
@@ -5322,6 +5449,55 @@ out_overflow:
        print_overflow_msg(__func__, xdr);
        return -EIO;
 }
+
+static int decode_test_stateid(struct xdr_stream *xdr,
+                              struct nfs41_test_stateid_res *res)
+{
+       __be32 *p;
+       int status;
+       int num_res;
+
+       status = decode_op_hdr(xdr, OP_TEST_STATEID);
+       if (status)
+               return status;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       num_res = be32_to_cpup(p++);
+       if (num_res != 1)
+               goto out;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       res->status = be32_to_cpup(p++);
+       return res->status;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+out:
+       return -EIO;
+}
+
+static int decode_free_stateid(struct xdr_stream *xdr,
+                              struct nfs41_free_stateid_res *res)
+{
+       __be32 *p;
+       int status;
+
+       status = decode_op_hdr(xdr, OP_FREE_STATEID);
+       if (status)
+               return status;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(!p))
+               goto out_overflow;
+       res->status = be32_to_cpup(p++);
+       return res->status;
+out_overflow:
+       print_overflow_msg(__func__, xdr);
+       return -EIO;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /*
@@ -6461,6 +6637,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
 out:
        return status;
 }
+
+/*
+ * Decode SECINFO_NO_NAME response
+ */
+static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
+                                       struct xdr_stream *xdr,
+                                       struct nfs4_secinfo_res *res)
+{
+       struct compound_hdr hdr;
+       int status;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (status)
+               goto out;
+       status = decode_sequence(xdr, &res->seq_res, rqstp);
+       if (status)
+               goto out;
+       status = decode_putrootfh(xdr);
+       if (status)
+               goto out;
+       status = decode_secinfo(xdr, res);
+out:
+       return status;
+}
+
+/*
+ * Decode TEST_STATEID response
+ */
+static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
+                                    struct xdr_stream *xdr,
+                                    struct nfs41_test_stateid_res *res)
+{
+       struct compound_hdr hdr;
+       int status;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (status)
+               goto out;
+       status = decode_sequence(xdr, &res->seq_res, rqstp);
+       if (status)
+               goto out;
+       status = decode_test_stateid(xdr, res);
+out:
+       return status;
+}
+
+/*
+ * Decode FREE_STATEID response
+ */
+static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
+                                    struct xdr_stream *xdr,
+                                    struct nfs41_free_stateid_res *res)
+{
+       struct compound_hdr hdr;
+       int status;
+
+       status = decode_compound_hdr(xdr, &hdr);
+       if (status)
+               goto out;
+       status = decode_sequence(xdr, &res->seq_res, rqstp);
+       if (status)
+               goto out;
+       status = decode_free_stateid(xdr, res);
+out:
+       return status;
+}
 #endif /* CONFIG_NFS_V4_1 */
 
 /**
@@ -6663,6 +6905,9 @@ struct rpc_procinfo       nfs4_procedures[] = {
        PROC(LAYOUTGET,         enc_layoutget,          dec_layoutget),
        PROC(LAYOUTCOMMIT,      enc_layoutcommit,       dec_layoutcommit),
        PROC(LAYOUTRETURN,      enc_layoutreturn,       dec_layoutreturn),
+       PROC(SECINFO_NO_NAME,   enc_secinfo_no_name,    dec_secinfo_no_name),
+       PROC(TEST_STATEID,      enc_test_stateid,       dec_test_stateid),
+       PROC(FREE_STATEID,      enc_free_stateid,       dec_free_stateid),
 #endif /* CONFIG_NFS_V4_1 */
 };
 
index 8ff2ea3..9383ca7 100644 (file)
@@ -1000,13 +1000,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
        if (!pnfs_generic_pg_test(pgio, prev, req))
                return false;
 
-       if (pgio->pg_lseg == NULL)
-               return true;
-
        return pgio->pg_count + req->wb_bytes <=
                        OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
 }
 
+static const struct nfs_pageio_ops objio_pg_read_ops = {
+       .pg_init = pnfs_generic_pg_init_read,
+       .pg_test = objio_pg_test,
+       .pg_doio = pnfs_generic_pg_readpages,
+};
+
+static const struct nfs_pageio_ops objio_pg_write_ops = {
+       .pg_init = pnfs_generic_pg_init_write,
+       .pg_test = objio_pg_test,
+       .pg_doio = pnfs_generic_pg_writepages,
+};
+
 static struct pnfs_layoutdriver_type objlayout_type = {
        .id = LAYOUT_OSD2_OBJECTS,
        .name = "LAYOUT_OSD2_OBJECTS",
@@ -1020,7 +1029,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
 
        .read_pagelist           = objlayout_read_pagelist,
        .write_pagelist          = objlayout_write_pagelist,
-       .pg_test                 = objio_pg_test,
+       .pg_read_ops             = &objio_pg_read_ops,
+       .pg_write_ops            = &objio_pg_write_ops,
 
        .free_deviceid_node      = objio_free_deviceid_node,
 
@@ -1055,5 +1065,7 @@ objlayout_exit(void)
               __func__);
 }
 
+MODULE_ALIAS("nfs-layouttype4-2");
+
 module_init(objlayout_init);
 module_exit(objlayout_exit);
index 18449f4..b60970c 100644 (file)
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
  */
 void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
                     struct inode *inode,
-                    int (*doio)(struct nfs_pageio_descriptor *),
+                    const struct nfs_pageio_ops *pg_ops,
                     size_t bsize,
                     int io_flags)
 {
@@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
        desc->pg_bsize = bsize;
        desc->pg_base = 0;
        desc->pg_moreio = 0;
+       desc->pg_recoalesce = 0;
        desc->pg_inode = inode;
-       desc->pg_doio = doio;
+       desc->pg_ops = pg_ops;
        desc->pg_ioflags = io_flags;
        desc->pg_error = 0;
        desc->pg_lseg = NULL;
-       desc->pg_test = nfs_generic_pg_test;
-       pnfs_pageio_init(desc, inode);
 }
 
 /**
@@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
                return false;
        if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
                return false;
-       return pgio->pg_test(pgio, prev, req);
+       return pgio->pg_ops->pg_test(pgio, prev, req);
 }
 
 /**
@@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
                if (!nfs_can_coalesce_requests(prev, req, desc))
                        return 0;
        } else {
+               if (desc->pg_ops->pg_init)
+                       desc->pg_ops->pg_init(desc, req);
                desc->pg_base = req->wb_pgbase;
        }
        nfs_list_remove_request(req);
@@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
 static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
 {
        if (!list_empty(&desc->pg_list)) {
-               int error = desc->pg_doio(desc);
+               int error = desc->pg_ops->pg_doio(desc);
                if (error < 0)
                        desc->pg_error = error;
                else
@@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
  * Returns true if the request 'req' was successfully coalesced into the
  * existing list of pages 'desc'.
  */
-int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                           struct nfs_page *req)
 {
        while (!nfs_pageio_do_add_request(desc, req)) {
@@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
                if (desc->pg_error < 0)
                        return 0;
                desc->pg_moreio = 0;
+               if (desc->pg_recoalesce)
+                       return 0;
        }
        return 1;
 }
 
+static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+
+       do {
+               list_splice_init(&desc->pg_list, &head);
+               desc->pg_bytes_written -= desc->pg_count;
+               desc->pg_count = 0;
+               desc->pg_base = 0;
+               desc->pg_recoalesce = 0;
+
+               while (!list_empty(&head)) {
+                       struct nfs_page *req;
+
+                       req = list_first_entry(&head, struct nfs_page, wb_list);
+                       nfs_list_remove_request(req);
+                       if (__nfs_pageio_add_request(desc, req))
+                               continue;
+                       if (desc->pg_error < 0)
+                               return 0;
+                       break;
+               }
+       } while (desc->pg_recoalesce);
+       return 1;
+}
+
+int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
+               struct nfs_page *req)
+{
+       int ret;
+
+       do {
+               ret = __nfs_pageio_add_request(desc, req);
+               if (ret)
+                       break;
+               if (desc->pg_error < 0)
+                       break;
+               ret = nfs_do_recoalesce(desc);
+       } while (ret);
+       return ret;
+}
+
 /**
  * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
  * @desc: pointer to io descriptor
  */
 void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
 {
-       nfs_pageio_doio(desc);
+       for (;;) {
+               nfs_pageio_doio(desc);
+               if (!desc->pg_recoalesce)
+                       break;
+               if (!nfs_do_recoalesce(desc))
+                       break;
+       }
 }
 
 /**
@@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
        if (!list_empty(&desc->pg_list)) {
                struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
                if (index != prev->wb_index + 1)
-                       nfs_pageio_doio(desc);
+                       nfs_pageio_complete(desc);
        }
 }
 
index 29c0ca7..38e5508 100644 (file)
@@ -28,6 +28,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 #include "internal.h"
 #include "pnfs.h"
 #include "iostat.h"
@@ -448,11 +449,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 void
 pnfs_destroy_all_layouts(struct nfs_client *clp)
 {
+       struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
        LIST_HEAD(tmp_list);
 
+       nfs4_deviceid_mark_client_invalid(clp);
+       nfs4_deviceid_purge_client(clp);
+
        spin_lock(&clp->cl_lock);
-       list_splice_init(&clp->cl_layouts, &tmp_list);
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+               if (!list_empty(&server->layouts))
+                       list_splice_init(&server->layouts, &tmp_list);
+       }
+       rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
 
        while (!list_empty(&tmp_list)) {
@@ -661,6 +671,7 @@ _pnfs_return_layout(struct inode *ino)
        lrp->args.stateid = stateid;
        lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
        lrp->args.inode = ino;
+       lrp->args.layout = lo;
        lrp->clp = NFS_SERVER(ino)->nfs_client;
 
        status = nfs4_proc_layoutreturn(lrp);
@@ -920,7 +931,8 @@ pnfs_update_layout(struct inode *ino,
        };
        unsigned pg_offset;
        struct nfs_inode *nfsi = NFS_I(ino);
-       struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+       struct nfs_server *server = NFS_SERVER(ino);
+       struct nfs_client *clp = server->nfs_client;
        struct pnfs_layout_hdr *lo;
        struct pnfs_layout_segment *lseg = NULL;
        bool first = false;
@@ -964,7 +976,7 @@ pnfs_update_layout(struct inode *ino,
                 */
                spin_lock(&clp->cl_lock);
                BUG_ON(!list_empty(&lo->plh_layouts));
-               list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+               list_add_tail(&lo->plh_layouts, &server->layouts);
                spin_unlock(&clp->cl_lock);
        }
 
@@ -973,7 +985,8 @@ pnfs_update_layout(struct inode *ino,
                arg.offset -= pg_offset;
                arg.length += pg_offset;
        }
-       arg.length = PAGE_CACHE_ALIGN(arg.length);
+       if (arg.length != NFS4_MAX_UINT64)
+               arg.length = PAGE_CACHE_ALIGN(arg.length);
 
        lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
        if (!lseg && first) {
@@ -991,6 +1004,7 @@ out_unlock:
        spin_unlock(&ino->i_lock);
        goto out;
 }
+EXPORT_SYMBOL_GPL(pnfs_update_layout);
 
 int
 pnfs_layout_process(struct nfs4_layoutget *lgp)
@@ -1048,35 +1062,71 @@ out_forget_reply:
        goto out;
 }
 
+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          req_offset(req),
+                                          req->wb_bytes,
+                                          IOMODE_READ,
+                                          GFP_KERNEL);
+       /* If no lseg, fall back to read through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_read_mds(pgio);
+
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
+
+void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          req_offset(req),
+                                          req->wb_bytes,
+                                          IOMODE_RW,
+                                          GFP_NOFS);
+       /* If no lseg, fall back to write through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_write_mds(pgio);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
 bool
-pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
-                    struct nfs_page *req)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 {
-       enum pnfs_iomode access_type;
-       gfp_t gfp_flags;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
-       /* We assume that pg_ioflags == 0 iff we're reading a page */
-       if (pgio->pg_ioflags == 0) {
-               access_type = IOMODE_READ;
-               gfp_flags = GFP_KERNEL;
-       } else {
-               access_type = IOMODE_RW;
-               gfp_flags = GFP_NOFS;
-       }
+       if (ld == NULL)
+               return false;
+       nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+       return true;
+}
 
-       if (pgio->pg_lseg == NULL) {
-               if (pgio->pg_count != prev->wb_bytes)
-                       return true;
-               /* This is first coelesce call for a series of nfs_pages */
-               pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  prev->wb_context,
-                                                  req_offset(prev),
-                                                  pgio->pg_count,
-                                                  access_type,
-                                                  gfp_flags);
-               if (pgio->pg_lseg == NULL)
-                       return true;
-       }
+bool
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+       if (ld == NULL)
+               return false;
+       nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+       return true;
+}
+
+bool
+pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+                    struct nfs_page *req)
+{
+       if (pgio->pg_lseg == NULL)
+               return nfs_generic_pg_test(pgio, prev, req);
 
        /*
         * Test if a nfs_page is fully contained in the pnfs_layout_range.
@@ -1120,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data)
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
-enum pnfs_try_status
+static void
+pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
+               struct nfs_write_data *data)
+{
+       list_splice_tail_init(&data->pages, &desc->pg_list);
+       if (data->req && list_empty(&data->req->wb_list))
+               nfs_list_add_request(data->req, &desc->pg_list);
+       nfs_pageio_reset_write_mds(desc);
+       desc->pg_recoalesce = 1;
+       nfs_writedata_release(data);
+}
+
+static enum pnfs_try_status
 pnfs_try_to_write_data(struct nfs_write_data *wdata,
-                       const struct rpc_call_ops *call_ops, int how)
+                       const struct rpc_call_ops *call_ops,
+                       struct pnfs_layout_segment *lseg,
+                       int how)
 {
        struct inode *inode = wdata->inode;
        enum pnfs_try_status trypnfs;
        struct nfs_server *nfss = NFS_SERVER(inode);
 
        wdata->mds_ops = call_ops;
+       wdata->lseg = get_lseg(lseg);
 
        dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
                inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1144,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
        return trypnfs;
 }
 
+static void
+pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+{
+       struct nfs_write_data *data;
+       const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+       struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+       desc->pg_lseg = NULL;
+       while (!list_empty(head)) {
+               enum pnfs_try_status trypnfs;
+
+               data = list_entry(head->next, struct nfs_write_data, list);
+               list_del_init(&data->list);
+
+               trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+               if (trypnfs == PNFS_NOT_ATTEMPTED)
+                       pnfs_write_through_mds(desc, data);
+       }
+       put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_flush(desc, &head);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+               return ret;
+       }
+       pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
+
 /*
  * Called by non rpc-based layout drivers
  */
@@ -1167,18 +1270,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+               struct nfs_read_data *data)
+{
+       list_splice_tail_init(&data->pages, &desc->pg_list);
+       if (data->req && list_empty(&data->req->wb_list))
+               nfs_list_add_request(data->req, &desc->pg_list);
+       nfs_pageio_reset_read_mds(desc);
+       desc->pg_recoalesce = 1;
+       nfs_readdata_release(data);
+}
+
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
-enum pnfs_try_status
+static enum pnfs_try_status
 pnfs_try_to_read_data(struct nfs_read_data *rdata,
-                      const struct rpc_call_ops *call_ops)
+                      const struct rpc_call_ops *call_ops,
+                      struct pnfs_layout_segment *lseg)
 {
        struct inode *inode = rdata->inode;
        struct nfs_server *nfss = NFS_SERVER(inode);
        enum pnfs_try_status trypnfs;
 
        rdata->mds_ops = call_ops;
+       rdata->lseg = get_lseg(lseg);
 
        dprintk("%s: Reading ino:%lu %u@%llu\n",
                __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1194,6 +1311,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
        return trypnfs;
 }
 
+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+       struct nfs_read_data *data;
+       const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+       struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+       desc->pg_lseg = NULL;
+       while (!list_empty(head)) {
+               enum pnfs_try_status trypnfs;
+
+               data = list_entry(head->next, struct nfs_read_data, list);
+               list_del_init(&data->list);
+
+               trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+               if (trypnfs == PNFS_NOT_ATTEMPTED)
+                       pnfs_read_through_mds(desc, data);
+       }
+       put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_pagein(desc, &head);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+               return ret;
+       }
+       pnfs_do_multiple_reads(desc, &head);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
 /*
  * Currently there is only one (whole file) write lseg.
  */
index 96bf4e6..078670d 100644 (file)
@@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
        void (*free_lseg) (struct pnfs_layout_segment *lseg);
 
        /* test for nfs page cache coalescing */
-       bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+       const struct nfs_pageio_ops *pg_read_ops;
+       const struct nfs_pageio_ops *pg_write_ops;
 
        /* Returns true if layoutdriver wants to divert this request to
         * driver's commit routine.
@@ -148,16 +149,16 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
 /* pnfs.c */
 void get_layout_hdr(struct pnfs_layout_hdr *lo);
 void put_lseg(struct pnfs_layout_segment *lseg);
-struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-                  loff_t pos, u64 count, enum pnfs_iomode access_type,
-                  gfp_t gfp_flags);
+
+bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
+bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
+
 void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
 void unset_pnfs_layoutdriver(struct nfs_server *);
-enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
-                                            const struct rpc_call_ops *, int);
-enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
-                                           const struct rpc_call_ops *);
+void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
+void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
+int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
 bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
 int pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -182,6 +183,19 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
 int pnfs_ld_write_done(struct nfs_write_data *);
 int pnfs_ld_read_done(struct nfs_read_data *);
+struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
+                                              struct nfs_open_context *ctx,
+                                              loff_t pos,
+                                              u64 count,
+                                              enum pnfs_iomode iomode,
+                                              gfp_t gfp_flags);
+
+void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
+
+/* nfs4_deviceid_flags */
+enum {
+       NFS_DEVICEID_INVALID = 0,       /* set when MDS clientid recalled */
+};
 
 /* pnfs_dev.c */
 struct nfs4_deviceid_node {
@@ -189,13 +203,13 @@ struct nfs4_deviceid_node {
        struct hlist_node               tmpnode;
        const struct pnfs_layoutdriver_type *ld;
        const struct nfs_client         *nfs_client;
+       unsigned long                   flags;
        struct nfs4_deviceid            deviceid;
        atomic_t                        ref;
 };
 
 void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
 struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
-struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
 void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
                             const struct pnfs_layoutdriver_type *,
@@ -293,15 +307,6 @@ static inline int pnfs_return_layout(struct inode *ino)
        return 0;
 }
 
-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
-                                   struct inode *inode)
-{
-       struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
-
-       if (ld)
-               pgio->pg_test = ld->pg_test;
-}
-
 #else  /* CONFIG_NFS_V4_1 */
 
 static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -322,28 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
 {
 }
 
-static inline struct pnfs_layout_segment *
-pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
-                  loff_t pos, u64 count, enum pnfs_iomode access_type,
-                  gfp_t gfp_flags)
-{
-       return NULL;
-}
-
-static inline enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *data,
-                     const struct rpc_call_ops *call_ops)
-{
-       return PNFS_NOT_ATTEMPTED;
-}
-
-static inline enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *data,
-                      const struct rpc_call_ops *call_ops, int how)
-{
-       return PNFS_NOT_ATTEMPTED;
-}
-
 static inline int pnfs_return_layout(struct inode *ino)
 {
        return 0;
@@ -385,9 +368,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
 {
 }
 
-static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
-                                   struct inode *inode)
+static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 {
+       return false;
+}
+
+static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+       return false;
 }
 
 static inline void
index f0f8e1e..6fda522 100644 (file)
@@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
 
        rcu_read_lock();
        d = _lookup_deviceid(ld, clp, id, hash);
-       if (d && !atomic_inc_not_zero(&d->ref))
-               d = NULL;
+       if (d != NULL)
+               atomic_inc(&d->ref);
        rcu_read_unlock();
        return d;
 }
@@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
 EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
 
 /*
- * Unhash and put deviceid
+ * Remove a deviceid from cache
  *
  * @clp nfs_client associated with deviceid
  * @id the deviceid to unhash
  *
  * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
  */
-struct nfs4_deviceid_node *
-nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
+void
+nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
                         const struct nfs_client *clp, const struct nfs4_deviceid *id)
 {
        struct nfs4_deviceid_node *d;
@@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
        rcu_read_unlock();
        if (!d) {
                spin_unlock(&nfs4_deviceid_lock);
-               return NULL;
+               return;
        }
        hlist_del_init_rcu(&d->node);
        spin_unlock(&nfs4_deviceid_lock);
@@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
 
        /* balance the initial ref set in pnfs_insert_deviceid */
        if (atomic_dec_and_test(&d->ref))
-               return d;
-
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
-
-/*
- * Delete a deviceid from cache
- *
- * @clp struct nfs_client qualifying the deviceid
- * @id deviceid to delete
- */
-void
-nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
-                    const struct nfs_client *clp, const struct nfs4_deviceid *id)
-{
-       struct nfs4_deviceid_node *d;
-
-       d = nfs4_unhash_put_deviceid(ld, clp, id);
-       if (!d)
-               return;
-       d->ld->free_deviceid_node(d);
+               d->ld->free_deviceid_node(d);
 }
 EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
 
@@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
        INIT_HLIST_NODE(&d->tmpnode);
        d->ld = ld;
        d->nfs_client = nfs_client;
+       d->flags = 0;
        d->deviceid = *id;
        atomic_set(&d->ref, 1);
 }
@@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node);
  *
  * @d deviceid node to put
  *
- * @ret true iff the node was deleted
+ * return true iff the node was deleted
+ * Note that since the test for d->ref == 0 is sufficient to establish
+ * that the node is no longer hashed in the global device id cache.
  */
 bool
 nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
 {
-       if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock))
+       if (!atomic_dec_and_test(&d->ref))
                return false;
-       hlist_del_init_rcu(&d->node);
-       spin_unlock(&nfs4_deviceid_lock);
-       synchronize_rcu();
        d->ld->free_deviceid_node(d);
        return true;
 }
@@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
        for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
                _deviceid_purge_client(clp, h);
 }
+
+/*
+ * Stop use of all deviceids associated with an nfs_client
+ */
+void
+nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
+{
+       struct nfs4_deviceid_node *d;
+       struct hlist_node *n;
+       int i;
+
+       rcu_read_lock();
+       for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
+               hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
+                       if (d->nfs_client == clp)
+                               set_bit(NFS_DEVICEID_INVALID, &d->flags);
+       }
+       rcu_read_unlock();
+}
index a68679f..2171c04 100644 (file)
@@ -30,8 +30,7 @@
 
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
 
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
+static const struct nfs_pageio_ops nfs_pageio_read_ops;
 static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
@@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
        mempool_free(p, nfs_rdata_mempool);
 }
 
-static void nfs_readdata_release(struct nfs_read_data *rdata)
+void nfs_readdata_release(struct nfs_read_data *rdata)
 {
        put_lseg(rdata->lseg);
        put_nfs_open_context(rdata->args.context);
@@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
        }
 }
 
+static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+               struct inode *inode)
+{
+       nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
+                       NFS_SERVER(inode)->rsize, 0);
+}
+
+void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
+{
+       pgio->pg_ops = &nfs_pageio_read_ops;
+       pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
+
+static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+               struct inode *inode)
+{
+       if (!pnfs_pageio_init_read(pgio, inode))
+               nfs_pageio_init_read_mds(pgio, inode);
+}
+
 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
                       struct page *page)
 {
@@ -131,14 +151,9 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
        if (len < PAGE_CACHE_SIZE)
                zero_user_segment(page, len, PAGE_CACHE_SIZE);
 
-       nfs_pageio_init(&pgio, inode, NULL, 0, 0);
-       nfs_list_add_request(new, &pgio.pg_list);
-       pgio.pg_count = len;
-
-       if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
-               nfs_pagein_multi(&pgio);
-       else
-               nfs_pagein_one(&pgio);
+       nfs_pageio_init_read(&pgio, inode);
+       nfs_pageio_add_request(&pgio, new);
+       nfs_pageio_complete(&pgio);
        return 0;
 }
 
@@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
 /*
  * Set up the NFS read request struct
  */
-static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
-               const struct rpc_call_ops *call_ops,
-               unsigned int count, unsigned int offset,
-               struct pnfs_layout_segment *lseg)
+static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+               unsigned int count, unsigned int offset)
 {
        struct inode *inode = req->wb_context->dentry->d_inode;
 
        data->req         = req;
        data->inode       = inode;
        data->cred        = req->wb_context->cred;
-       data->lseg        = get_lseg(lseg);
 
        data->args.fh     = NFS_FH(inode);
        data->args.offset = req_offset(req) + offset;
@@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
        data->res.count   = count;
        data->res.eof     = 0;
        nfs_fattr_init(&data->fattr);
+}
 
-       if (data->lseg &&
-           (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
-               return 0;
+static int nfs_do_read(struct nfs_read_data *data,
+               const struct rpc_call_ops *call_ops)
+{
+       struct inode *inode = data->args.context->dentry->d_inode;
 
        return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
 }
 
+static int
+nfs_do_multiple_reads(struct list_head *head,
+               const struct rpc_call_ops *call_ops)
+{
+       struct nfs_read_data *data;
+       int ret = 0;
+
+       while (!list_empty(head)) {
+               int ret2;
+
+               data = list_entry(head->next, struct nfs_read_data, list);
+               list_del_init(&data->list);
+
+               ret2 = nfs_do_read(data, call_ops);
+               if (ret == 0)
+                       ret = ret2;
+       }
+       return ret;
+}
+
 static void
 nfs_async_read_error(struct list_head *head)
 {
@@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head)
  * won't see the new data until our attribute cache is updated.  This is more
  * or less conventional NFS client behavior.
  */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
        struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
        struct page *page = req->wb_page;
        struct nfs_read_data *data;
-       size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
+       size_t rsize = desc->pg_bsize, nbytes;
        unsigned int offset;
        int requests = 0;
        int ret = 0;
-       struct pnfs_layout_segment *lseg;
-       LIST_HEAD(list);
 
        nfs_list_remove_request(req);
 
+       offset = 0;
        nbytes = desc->pg_count;
        do {
                size_t len = min(nbytes,rsize);
@@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
                data = nfs_readdata_alloc(1);
                if (!data)
                        goto out_bad;
-               list_add(&data->pages, &list);
+               data->pagevec[0] = page;
+               nfs_read_rpcsetup(req, data, len, offset);
+               list_add(&data->list, res);
                requests++;
                nbytes -= len;
+               offset += len;
        } while(nbytes != 0);
        atomic_set(&req->wb_complete, requests);
-
-       BUG_ON(desc->pg_lseg != NULL);
-       lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-                                 req_offset(req), desc->pg_count,
-                                 IOMODE_READ, GFP_KERNEL);
        ClearPageError(page);
-       offset = 0;
-       nbytes = desc->pg_count;
-       do {
-               int ret2;
-
-               data = list_entry(list.next, struct nfs_read_data, pages);
-               list_del_init(&data->pages);
-
-               data->pagevec[0] = page;
-
-               if (nbytes < rsize)
-                       rsize = nbytes;
-               ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
-                                        rsize, offset, lseg);
-               if (ret == 0)
-                       ret = ret2;
-               offset += rsize;
-               nbytes -= rsize;
-       } while (nbytes != 0);
-       put_lseg(lseg);
-       desc->pg_lseg = NULL;
-
+       desc->pg_rpc_callops = &nfs_read_partial_ops;
        return ret;
-
 out_bad:
-       while (!list_empty(&list)) {
-               data = list_entry(list.next, struct nfs_read_data, pages);
-               list_del(&data->pages);
+       while (!list_empty(res)) {
+               data = list_entry(res->next, struct nfs_read_data, list);
+               list_del(&data->list);
                nfs_readdata_free(data);
        }
        SetPageError(page);
@@ -327,19 +336,19 @@ out_bad:
        return -ENOMEM;
 }
 
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
+static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
        struct nfs_page         *req;
        struct page             **pages;
        struct nfs_read_data    *data;
        struct list_head *head = &desc->pg_list;
-       struct pnfs_layout_segment *lseg = desc->pg_lseg;
-       int ret = -ENOMEM;
+       int ret = 0;
 
        data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
                                                     desc->pg_count));
        if (!data) {
                nfs_async_read_error(head);
+               ret = -ENOMEM;
                goto out;
        }
 
@@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
                *pages++ = req->wb_page;
        }
        req = nfs_list_entry(data->pages.next);
-       if ((!lseg) && list_is_singular(&data->pages))
-               lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-                                         req_offset(req), desc->pg_count,
-                                         IOMODE_READ, GFP_KERNEL);
 
-       ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
-                               0, lseg);
+       nfs_read_rpcsetup(req, data, desc->pg_count, 0);
+       list_add(&data->list, res);
+       desc->pg_rpc_callops = &nfs_read_full_ops;
 out:
-       put_lseg(lseg);
-       desc->pg_lseg = NULL;
        return ret;
 }
 
+int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+       if (desc->pg_bsize < PAGE_CACHE_SIZE)
+               return nfs_pagein_multi(desc, head);
+       return nfs_pagein_one(desc, head);
+}
+
+static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_pagein(desc, &head);
+       if (ret == 0)
+               ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
+       return ret;
+}
+
+static const struct nfs_pageio_ops nfs_pageio_read_ops = {
+       .pg_test = nfs_generic_pg_test,
+       .pg_doio = nfs_generic_pg_readpages,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
@@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
                .pgio = &pgio,
        };
        struct inode *inode = mapping->host;
-       struct nfs_server *server = NFS_SERVER(inode);
-       size_t rsize = server->rsize;
        unsigned long npages;
        int ret = -ESTALE;
 
@@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
        if (ret == 0)
                goto read_complete; /* all pages were read */
 
-       if (rsize < PAGE_CACHE_SIZE)
-               nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
-       else
-               nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
+       nfs_pageio_init_read(&pgio, inode);
 
        ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
 
index 8d6864c..981298c 100644 (file)
@@ -501,6 +501,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
  * and only performs the unlink once the last reference to it is put.
  *
  * The final cleanup is done during dentry_iput.
+ *
+ * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
+ * could take responsibility for keeping open files referenced.  The server
+ * would also need to ensure that opened-but-deleted files were kept over
+ * reboots.  However, we may not assume a server does so.  (RFC 5661
+ * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can
+ * use to advertise that it does this; some day we may take advantage of
+ * it.))
  */
 int
 nfs_sillyrename(struct inode *dir, struct dentry *dentry)
index 0857931..ebed518 100644 (file)
@@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
        mempool_free(p, nfs_wdata_mempool);
 }
 
-static void nfs_writedata_release(struct nfs_write_data *wdata)
+void nfs_writedata_release(struct nfs_write_data *wdata)
 {
        put_lseg(wdata->lseg);
        put_nfs_open_context(wdata->args.context);
@@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static int nfs_write_rpcsetup(struct nfs_page *req,
+static void nfs_write_rpcsetup(struct nfs_page *req,
                struct nfs_write_data *data,
-               const struct rpc_call_ops *call_ops,
                unsigned int count, unsigned int offset,
-               struct pnfs_layout_segment *lseg,
                int how)
 {
        struct inode *inode = req->wb_context->dentry->d_inode;
@@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
        data->req = req;
        data->inode = inode = req->wb_context->dentry->d_inode;
        data->cred = req->wb_context->cred;
-       data->lseg = get_lseg(lseg);
 
        data->args.fh     = NFS_FH(inode);
        data->args.offset = req_offset(req) + offset;
@@ -872,24 +869,51 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
        data->args.context = get_nfs_open_context(req->wb_context);
        data->args.lock_context = req->wb_lock_context;
        data->args.stable  = NFS_UNSTABLE;
-       if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
-               data->args.stable = NFS_DATA_SYNC;
-               if (!nfs_need_commit(NFS_I(inode)))
-                       data->args.stable = NFS_FILE_SYNC;
+       switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
+       case 0:
+               break;
+       case FLUSH_COND_STABLE:
+               if (nfs_need_commit(NFS_I(inode)))
+                       break;
+       default:
+               data->args.stable = NFS_FILE_SYNC;
        }
 
        data->res.fattr   = &data->fattr;
        data->res.count   = count;
        data->res.verf    = &data->verf;
        nfs_fattr_init(&data->fattr);
+}
 
-       if (data->lseg &&
-           (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
-               return 0;
+static int nfs_do_write(struct nfs_write_data *data,
+               const struct rpc_call_ops *call_ops,
+               int how)
+{
+       struct inode *inode = data->args.context->dentry->d_inode;
 
        return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
 }
 
+static int nfs_do_multiple_writes(struct list_head *head,
+               const struct rpc_call_ops *call_ops,
+               int how)
+{
+       struct nfs_write_data *data;
+       int ret = 0;
+
+       while (!list_empty(head)) {
+               int ret2;
+
+               data = list_entry(head->next, struct nfs_write_data, list);
+               list_del_init(&data->list);
+               
+               ret2 = nfs_do_write(data, call_ops, how);
+                if (ret == 0)
+                        ret = ret2;
+       }
+       return ret;
+}
+
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
  * call this on each, which will prepare them to be retried on next
  * writeback using standard nfs.
@@ -907,17 +931,15 @@ static void nfs_redirty_request(struct nfs_page *req)
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
  */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
        struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
        struct page *page = req->wb_page;
        struct nfs_write_data *data;
-       size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
+       size_t wsize = desc->pg_bsize, nbytes;
        unsigned int offset;
        int requests = 0;
        int ret = 0;
-       struct pnfs_layout_segment *lseg;
-       LIST_HEAD(list);
 
        nfs_list_remove_request(req);
 
@@ -927,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
                desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
 
+       offset = 0;
        nbytes = desc->pg_count;
        do {
                size_t len = min(nbytes, wsize);
@@ -934,45 +957,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
                data = nfs_writedata_alloc(1);
                if (!data)
                        goto out_bad;
-               list_add(&data->pages, &list);
+               data->pagevec[0] = page;
+               nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
+               list_add(&data->list, res);
                requests++;
                nbytes -= len;
+               offset += len;
        } while (nbytes != 0);
        atomic_set(&req->wb_complete, requests);
-
-       BUG_ON(desc->pg_lseg);
-       lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-                                 req_offset(req), desc->pg_count,
-                                 IOMODE_RW, GFP_NOFS);
-       ClearPageError(page);
-       offset = 0;
-       nbytes = desc->pg_count;
-       do {
-               int ret2;
-
-               data = list_entry(list.next, struct nfs_write_data, pages);
-               list_del_init(&data->pages);
-
-               data->pagevec[0] = page;
-
-               if (nbytes < wsize)
-                       wsize = nbytes;
-               ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
-                                         wsize, offset, lseg, desc->pg_ioflags);
-               if (ret == 0)
-                       ret = ret2;
-               offset += wsize;
-               nbytes -= wsize;
-       } while (nbytes != 0);
-
-       put_lseg(lseg);
-       desc->pg_lseg = NULL;
+       desc->pg_rpc_callops = &nfs_write_partial_ops;
        return ret;
 
 out_bad:
-       while (!list_empty(&list)) {
-               data = list_entry(list.next, struct nfs_write_data, pages);
-               list_del(&data->pages);
+       while (!list_empty(res)) {
+               data = list_entry(res->next, struct nfs_write_data, list);
+               list_del(&data->list);
                nfs_writedata_free(data);
        }
        nfs_redirty_request(req);
@@ -987,14 +986,13 @@ out_bad:
  * This is the case if nfs_updatepage detects a conflicting request
  * that has been written but not committed.
  */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
+static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
 {
        struct nfs_page         *req;
        struct page             **pages;
        struct nfs_write_data   *data;
        struct list_head *head = &desc->pg_list;
-       struct pnfs_layout_segment *lseg = desc->pg_lseg;
-       int ret;
+       int ret = 0;
 
        data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
                                                      desc->pg_count));
@@ -1016,32 +1014,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
                *pages++ = req->wb_page;
        }
        req = nfs_list_entry(data->pages.next);
-       if ((!lseg) && list_is_singular(&data->pages))
-               lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
-                                         req_offset(req), desc->pg_count,
-                                         IOMODE_RW, GFP_NOFS);
 
        if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
            (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
                desc->pg_ioflags &= ~FLUSH_COND_STABLE;
 
        /* Set up the argument struct */
-       ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
+       nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
+       list_add(&data->list, res);
+       desc->pg_rpc_callops = &nfs_write_full_ops;
 out:
-       put_lseg(lseg); /* Cleans any gotten in ->pg_test */
-       desc->pg_lseg = NULL;
        return ret;
 }
 
-static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+       if (desc->pg_bsize < PAGE_CACHE_SIZE)
+               return nfs_flush_multi(desc, head);
+       return nfs_flush_one(desc, head);
+}
+
+static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_flush(desc, &head);
+       if (ret == 0)
+               ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
+                               desc->pg_ioflags);
+       return ret;
+}
+
+static const struct nfs_pageio_ops nfs_pageio_write_ops = {
+       .pg_test = nfs_generic_pg_test,
+       .pg_doio = nfs_generic_pg_writepages,
+};
+
+static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
                                  struct inode *inode, int ioflags)
 {
-       size_t wsize = NFS_SERVER(inode)->wsize;
+       nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
+                               NFS_SERVER(inode)->wsize, ioflags);
+}
+
+void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
+{
+       pgio->pg_ops = &nfs_pageio_write_ops;
+       pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
+}
+EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
 
-       if (wsize < PAGE_CACHE_SIZE)
-               nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
-       else
-               nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags);
+static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
+                                 struct inode *inode, int ioflags)
+{
+       if (!pnfs_pageio_init_write(pgio, inode, ioflags))
+               nfs_pageio_init_write_mds(pgio, inode, ioflags);
 }
 
 /*
index 504b289..a3c4bc8 100644 (file)
@@ -563,6 +563,9 @@ enum {
        NFSPROC4_CLNT_GETDEVICEINFO,
        NFSPROC4_CLNT_LAYOUTCOMMIT,
        NFSPROC4_CLNT_LAYOUTRETURN,
+       NFSPROC4_CLNT_SECINFO_NO_NAME,
+       NFSPROC4_CLNT_TEST_STATEID,
+       NFSPROC4_CLNT_FREE_STATEID,
 };
 
 /* nfs41 types */
index 87694ca..4faeac8 100644 (file)
@@ -16,6 +16,7 @@ struct nfs4_sequence_args;
 struct nfs4_sequence_res;
 struct nfs_server;
 struct nfs4_minor_version_ops;
+struct server_scope;
 
 /*
  * The nfs_client identifies our client state to the server.
@@ -77,12 +78,13 @@ struct nfs_client {
        /* The flags used for obtaining the clientid during EXCHANGE_ID */
        u32                     cl_exchange_flags;
        struct nfs4_session     *cl_session;    /* sharred session */
-       struct list_head        cl_layouts;
 #endif /* CONFIG_NFS_V4 */
 
 #ifdef CONFIG_NFS_FSCACHE
        struct fscache_cookie   *fscache;       /* client index cache cookie */
 #endif
+
+       struct server_scope     *server_scope;  /* from exchange_id */
 };
 
 /*
@@ -149,6 +151,7 @@ struct nfs_server {
        struct rb_root          openowner_id;
        struct rb_root          lockowner_id;
 #endif
+       struct list_head        layouts;
        struct list_head        delegations;
        void (*destroy)(struct nfs_server *);
 
index 25311b3..e2791a2 100644 (file)
@@ -55,20 +55,28 @@ struct nfs_page {
        struct nfs_writeverf    wb_verf;        /* Commit cookie */
 };
 
+struct nfs_pageio_descriptor;
+struct nfs_pageio_ops {
+       void    (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
+       bool    (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+       int     (*pg_doio)(struct nfs_pageio_descriptor *);
+};
+
 struct nfs_pageio_descriptor {
        struct list_head        pg_list;
        unsigned long           pg_bytes_written;
        size_t                  pg_count;
        size_t                  pg_bsize;
        unsigned int            pg_base;
-       char                    pg_moreio;
+       unsigned char           pg_moreio : 1,
+                               pg_recoalesce : 1;
 
        struct inode            *pg_inode;
-       int                     (*pg_doio)(struct nfs_pageio_descriptor *);
+       const struct nfs_pageio_ops *pg_ops;
        int                     pg_ioflags;
        int                     pg_error;
+       const struct rpc_call_ops *pg_rpc_callops;
        struct pnfs_layout_segment *pg_lseg;
-       bool                    (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 };
 
 #define NFS_WBACK_BUSY(req)    (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -85,7 +93,7 @@ extern        int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
                          pgoff_t idx_start, unsigned int npages, int tag);
 extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
                             struct inode *inode,
-                            int (*doio)(struct nfs_pageio_descriptor *desc),
+                            const struct nfs_pageio_ops *pg_ops,
                             size_t bsize,
                             int how);
 extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@@ -100,7 +108,6 @@ extern      void nfs_unlock_request(struct nfs_page *req);
 extern int nfs_set_page_tag_locked(struct nfs_page *req);
 extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
 
-
 /*
  * Lock the page of an asynchronous request without getting a new reference
  */
index 00848d8..5b11595 100644 (file)
@@ -269,9 +269,10 @@ struct nfs4_layoutcommit_data {
 };
 
 struct nfs4_layoutreturn_args {
-       __u32   layout_type;
+       struct pnfs_layout_hdr *layout;
        struct inode *inode;
        nfs4_stateid stateid;
+       __u32   layout_type;
        struct nfs4_sequence_args seq_args;
 };
 
@@ -1060,6 +1061,7 @@ struct server_scope {
 struct nfs41_exchange_id_res {
        struct nfs_client               *client;
        u32                             flags;
+       struct server_scope             *server_scope;
 };
 
 struct nfs41_create_session_args {
@@ -1083,6 +1085,34 @@ struct nfs41_reclaim_complete_args {
 struct nfs41_reclaim_complete_res {
        struct nfs4_sequence_res        seq_res;
 };
+
+#define SECINFO_STYLE_CURRENT_FH 0
+#define SECINFO_STYLE_PARENT 1
+struct nfs41_secinfo_no_name_args {
+       int                             style;
+       struct nfs4_sequence_args       seq_args;
+};
+
+struct nfs41_test_stateid_args {
+       nfs4_stateid                    *stateid;
+       struct nfs4_sequence_args       seq_args;
+};
+
+struct nfs41_test_stateid_res {
+       unsigned int                    status;
+       struct nfs4_sequence_res        seq_res;
+};
+
+struct nfs41_free_stateid_args {
+       nfs4_stateid                    *stateid;
+       struct nfs4_sequence_args       seq_args;
+};
+
+struct nfs41_free_stateid_res {
+       unsigned int                    status;
+       struct nfs4_sequence_res        seq_res;
+};
+
 #endif /* CONFIG_NFS_V4_1 */
 
 struct nfs_page;
@@ -1096,6 +1126,7 @@ struct nfs_read_data {
        struct rpc_cred         *cred;
        struct nfs_fattr        fattr;  /* fattr storage */
        struct list_head        pages;  /* Coalesced read requests */
+       struct list_head        list;   /* lists of struct nfs_read_data */
        struct nfs_page         *req;   /* multi ops per nfs_page */
        struct page             **pagevec;
        unsigned int            npages; /* Max length of pagevec */
@@ -1119,6 +1150,7 @@ struct nfs_write_data {
        struct nfs_fattr        fattr;
        struct nfs_writeverf    verf;
        struct list_head        pages;          /* Coalesced requests we wish to flush */
+       struct list_head        list;           /* lists of struct nfs_write_data */
        struct nfs_page         *req;           /* multi ops per nfs_page */
        struct page             **pagevec;
        unsigned int            npages;         /* Max length of pagevec */
index 76efbdd..435dd5f 100644 (file)
@@ -41,9 +41,6 @@
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
-#include <scsi/osd_protocol.h>
-
-#define PNFS_OSD_OSDNAME_MAXSIZE 256
 
 /*
  * draft-ietf-nfsv4-minorversion-22
@@ -99,12 +96,6 @@ struct pnfs_osd_objid {
 #define _DEVID_HI(oid_device_id) \
        (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
 
-static inline int
-pnfs_osd_objid_xdr_sz(void)
-{
-       return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
-}
-
 enum pnfs_osd_version {
        PNFS_OSD_MISSING              = 0,
        PNFS_OSD_VERSION_1            = 1,
@@ -189,8 +180,6 @@ struct pnfs_osd_targetid {
        struct nfs4_string              oti_scsi_device_id;
 };
 
-enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
-
 /*   struct netaddr4 {
  *       // see struct rpcb in RFC1833
  *       string r_netid<>;    // network id
@@ -207,12 +196,6 @@ struct pnfs_osd_targetaddr {
        struct pnfs_osd_net_addr        ota_netaddr;
 };
 
-enum {
-       NETWORK_ID_MAX = 16 / 4,
-       UNIVERSAL_ADDRESS_MAX = 64 / 4,
-       PNFS_OSD_TARGETADDR_MAX = 3 +  NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
-};
-
 struct pnfs_osd_deviceaddr {
        struct pnfs_osd_targetid        oda_targetid;
        struct pnfs_osd_targetaddr      oda_targetaddr;
@@ -222,15 +205,6 @@ struct pnfs_osd_deviceaddr {
        struct nfs4_string              oda_osdname;
 };
 
-enum {
-       ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
-       PNFS_OSD_DEVICEADDR_MAX =
-               PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
-               2 /*oda_lun*/ +
-               1 + OSD_SYSTEMID_LEN +
-               1 + ODA_OSDNAME_MAX,
-};
-
 /* LAYOUTCOMMIT: layoutupdate */
 
 /*   union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
@@ -279,7 +253,7 @@ struct pnfs_osd_ioerr {
        u32                     oer_errno;
 };
 
-/* OSD XDR API */
+/* OSD XDR Client API */
 /* Layout helpers */
 /* Layout decoding is done in two parts:
  * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
@@ -337,8 +311,7 @@ extern int
 pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
                                 struct pnfs_osd_layoutupdate *lou);
 
-/* osd_ioerror encoding/decoding (layout_return) */
-/* Client */
+/* osd_ioerror encoding (layout_return) */
 extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
 extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
 
index 0828842..f7f3ce3 100644 (file)
@@ -31,7 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <linux/sunrpc/xprt.h>
 #include <linux/sunrpc/sched.h>
 
-#ifdef CONFIG_NFS_V4_1
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
 struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
@@ -47,7 +47,7 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
                return 1;
        return 0;
 }
-#else /* CONFIG_NFS_V4_1 */
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
 static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
                                         unsigned int min_reqs)
 {
@@ -62,6 +62,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
 static inline void xprt_free_bc_request(struct rpc_rqst *req)
 {
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 #endif /* _LINUX_SUNRPC_BC_XPRT_H */
 
index fe2d8e6..e775689 100644 (file)
@@ -227,6 +227,10 @@ void               rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
 void           rpc_destroy_wait_queue(struct rpc_wait_queue *);
 void           rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
                                        rpc_action action);
+void           rpc_sleep_on_priority(struct rpc_wait_queue *,
+                                       struct rpc_task *,
+                                       rpc_action action,
+                                       int priority);
 void           rpc_wake_up_queued_task(struct rpc_wait_queue *,
                                        struct rpc_task *);
 void           rpc_wake_up(struct rpc_wait_queue *);
index ea29330..9a3fa7b 100644 (file)
@@ -92,7 +92,7 @@ struct svc_serv {
        struct module *         sv_module;      /* optional module to count when
                                                 * adding threads */
        svc_thread_fn           sv_function;    /* main function for threads */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
        struct list_head        sv_cb_list;     /* queue for callback requests
                                                 * that arrive over the same
                                                 * connection */
@@ -100,7 +100,7 @@ struct svc_serv {
        wait_queue_head_t       sv_cb_waitq;    /* sleep here if there are no
                                                 * entries in the svc_cb_list */
        struct svc_xprt         *sv_bc_xprt;    /* callback on fore channel */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 };
 
 /*
index 81cce3b..15518a1 100644 (file)
@@ -22,6 +22,7 @@
 #define RPC_MIN_SLOT_TABLE     (2U)
 #define RPC_DEF_SLOT_TABLE     (16U)
 #define RPC_MAX_SLOT_TABLE     (128U)
+#define RPC_MAX_SLOT_TABLE_LIMIT       (65536U)
 
 /*
  * This describes a timeout strategy
@@ -100,18 +101,18 @@ struct rpc_rqst {
        ktime_t                 rq_xtime;       /* transmit time stamp */
        int                     rq_ntrans;
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
        struct list_head        rq_bc_list;     /* Callback service list */
        unsigned long           rq_bc_pa_state; /* Backchannel prealloc state */
        struct list_head        rq_bc_pa_list;  /* Backchannel prealloc list */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANEL */
 };
 #define rq_svec                        rq_snd_buf.head
 #define rq_slen                        rq_snd_buf.len
 
 struct rpc_xprt_ops {
        void            (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
-       int             (*reserve_xprt)(struct rpc_task *task);
+       int             (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
        void            (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
        void            (*rpcbind)(struct rpc_task *task);
        void            (*set_port)(struct rpc_xprt *xprt, unsigned short port);
@@ -164,12 +165,12 @@ struct rpc_xprt {
 
        struct rpc_wait_queue   binding;        /* requests waiting on rpcbind */
        struct rpc_wait_queue   sending;        /* requests waiting to send */
-       struct rpc_wait_queue   resend;         /* requests waiting to resend */
        struct rpc_wait_queue   pending;        /* requests in flight */
        struct rpc_wait_queue   backlog;        /* waiting for slot */
        struct list_head        free;           /* free slots */
-       struct rpc_rqst *       slot;           /* slot table storage */
-       unsigned int            max_reqs;       /* total slots */
+       unsigned int            max_reqs;       /* max number of slots */
+       unsigned int            min_reqs;       /* min number of slots */
+       atomic_t                num_reqs;       /* total slots */
        unsigned long           state;          /* transport state */
        unsigned char           shutdown   : 1, /* being shut down */
                                resvport   : 1; /* use a reserved port */
@@ -200,7 +201,7 @@ struct rpc_xprt {
        u32                     xid;            /* Next XID value to use */
        struct rpc_task *       snd_task;       /* Task blocked in send */
        struct svc_xprt         *bc_xprt;       /* NFSv4.1 backchannel */
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
        struct svc_serv         *bc_serv;       /* The RPC service which will */
                                                /* process the callback */
        unsigned int            bc_alloc_count; /* Total number of preallocs */
@@ -208,7 +209,7 @@ struct rpc_xprt {
                                                 * items */
        struct list_head        bc_pa_list;     /* List of preallocated
                                                 * backchannel rpc_rqst's */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
        struct list_head        recv;
 
        struct {
@@ -228,15 +229,15 @@ struct rpc_xprt {
        const char              *address_strings[RPC_DISPLAY_MAX];
 };
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Backchannel flags
  */
 #define        RPC_BC_PA_IN_USE        0x0001          /* Preallocated backchannel */
                                                /* buffer in use */
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static inline int bc_prealloc(struct rpc_rqst *req)
 {
        return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
@@ -246,7 +247,7 @@ static inline int bc_prealloc(struct rpc_rqst *req)
 {
        return 0;
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 struct xprt_create {
        int                     ident;          /* XPRT_TRANSPORT identifier */
@@ -271,8 +272,8 @@ struct xprt_class {
 struct rpc_xprt                *xprt_create_transport(struct xprt_create *args);
 void                   xprt_connect(struct rpc_task *task);
 void                   xprt_reserve(struct rpc_task *task);
-int                    xprt_reserve_xprt(struct rpc_task *task);
-int                    xprt_reserve_xprt_cong(struct rpc_task *task);
+int                    xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
+int                    xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 int                    xprt_prepare_transmit(struct rpc_task *task);
 void                   xprt_transmit(struct rpc_task *task);
 void                   xprt_end_transmit(struct rpc_task *task);
@@ -282,7 +283,9 @@ void                        xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void                   xprt_release(struct rpc_task *task);
 struct rpc_xprt *      xprt_get(struct rpc_xprt *xprt);
 void                   xprt_put(struct rpc_xprt *xprt);
-struct rpc_xprt *      xprt_alloc(struct net *net, int size, int max_req);
+struct rpc_xprt *      xprt_alloc(struct net *net, size_t size,
+                               unsigned int num_prealloc,
+                               unsigned int max_req);
 void                   xprt_free(struct rpc_xprt *);
 
 static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
@@ -321,7 +324,6 @@ void                        xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
 #define XPRT_CLOSING           (6)
 #define XPRT_CONNECTION_ABORT  (7)
 #define XPRT_CONNECTION_CLOSE  (8)
-#define XPRT_INITIALIZED       (9)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
index b2198e6..ffd243d 100644 (file)
@@ -4,6 +4,10 @@ config SUNRPC
 config SUNRPC_GSS
        tristate
 
+config SUNRPC_BACKCHANNEL
+       bool
+       depends on SUNRPC
+
 config SUNRPC_XPRT_RDMA
        tristate
        depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
index 9d2fca5..8209a04 100644 (file)
@@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
            addr.o rpcb_clnt.o timer.o xdr.o \
            sunrpc_syms.o cache.o rpc_pipe.o \
            svc_xprt.o
-sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
+sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
index cf06af3..91eaa26 100644 (file)
@@ -29,8 +29,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define RPCDBG_FACILITY        RPCDBG_TRANS
 #endif
 
-#if defined(CONFIG_NFS_V4_1)
-
 /*
  * Helper routines that track the number of preallocation elements
  * on the transport.
@@ -174,7 +172,7 @@ out_free:
        dprintk("RPC:       setup backchannel transport failed\n");
        return -1;
 }
-EXPORT_SYMBOL(xprt_setup_backchannel);
+EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
 
 /*
  * Destroys the backchannel preallocated structures.
@@ -204,7 +202,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
        dprintk("RPC:        backchannel list empty= %s\n",
                list_empty(&xprt->bc_pa_list) ? "true" : "false");
 }
-EXPORT_SYMBOL(xprt_destroy_backchannel);
+EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
 
 /*
  * One or more rpc_rqst structure have been preallocated during the
@@ -279,4 +277,3 @@ void xprt_free_bc_request(struct rpc_rqst *req)
        spin_unlock_bh(&xprt->bc_pa_lock);
 }
 
-#endif /* CONFIG_NFS_V4_1 */
index 1dd1a68..0b2eb38 100644 (file)
@@ -27,8 +27,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * reply over an existing open connection previously established by the client.
  */
 
-#if defined(CONFIG_NFS_V4_1)
-
 #include <linux/module.h>
 
 #include <linux/sunrpc/xprt.h>
@@ -63,4 +61,3 @@ int bc_send(struct rpc_rqst *req)
        return ret;
 }
 
-#endif /* CONFIG_NFS_V4_1 */
index c50818f..c5347d2 100644 (file)
@@ -64,9 +64,9 @@ static void   call_decode(struct rpc_task *task);
 static void    call_bind(struct rpc_task *task);
 static void    call_bind_status(struct rpc_task *task);
 static void    call_transmit(struct rpc_task *task);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static void    call_bc_transmit(struct rpc_task *task);
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 static void    call_status(struct rpc_task *task);
 static void    call_transmit_status(struct rpc_task *task);
 static void    call_refresh(struct rpc_task *task);
@@ -715,7 +715,7 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
 }
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /**
  * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
  * rpc_execute against it
@@ -758,7 +758,7 @@ out:
        dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
        return task;
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 void
 rpc_call_start(struct rpc_task *task)
@@ -1361,7 +1361,7 @@ call_transmit_status(struct rpc_task *task)
        }
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * 5b. Send the backchannel RPC reply.  On error, drop the reply.  In
  * addition, disconnect on connectivity errors.
@@ -1425,7 +1425,7 @@ call_bc_transmit(struct rpc_task *task)
        }
        rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * 6.  Sort out the RPC call status
@@ -1550,8 +1550,7 @@ call_decode(struct rpc_task *task)
        kxdrdproc_t     decode = task->tk_msg.rpc_proc->p_decode;
        __be32          *p;
 
-       dprintk("RPC: %5u call_decode (status %d)\n",
-                       task->tk_pid, task->tk_status);
+       dprint_status(task);
 
        if (task->tk_flags & RPC_CALL_MAJORSEEN) {
                if (clnt->cl_chatty)
index 4814e24..d12ffa5 100644 (file)
@@ -97,14 +97,16 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 /*
  * Add new request to a priority queue.
  */
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+               struct rpc_task *task,
+               unsigned char queue_priority)
 {
        struct list_head *q;
        struct rpc_task *t;
 
        INIT_LIST_HEAD(&task->u.tk_wait.links);
-       q = &queue->tasks[task->tk_priority];
-       if (unlikely(task->tk_priority > queue->maxpriority))
+       q = &queue->tasks[queue_priority];
+       if (unlikely(queue_priority > queue->maxpriority))
                q = &queue->tasks[queue->maxpriority];
        list_for_each_entry(t, q, u.tk_wait.list) {
                if (t->tk_owner == task->tk_owner) {
@@ -123,12 +125,14 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
  * improve overall performance.
  * Everyone else gets appended to the queue to ensure proper FIFO behavior.
  */
-static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
+static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
+               struct rpc_task *task,
+               unsigned char queue_priority)
 {
        BUG_ON (RPC_IS_QUEUED(task));
 
        if (RPC_IS_PRIORITY(queue))
-               __rpc_add_wait_queue_priority(queue, task);
+               __rpc_add_wait_queue_priority(queue, task, queue_priority);
        else if (RPC_IS_SWAPPER(task))
                list_add(&task->u.tk_wait.list, &queue->tasks[0]);
        else
@@ -311,13 +315,15 @@ static void rpc_make_runnable(struct rpc_task *task)
  * NB: An RPC task will only receive interrupt-driven events as long
  * as it's on a wait queue.
  */
-static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
-                       rpc_action action)
+static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
+               struct rpc_task *task,
+               rpc_action action,
+               unsigned char queue_priority)
 {
        dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
                        task->tk_pid, rpc_qname(q), jiffies);
 
-       __rpc_add_wait_queue(q, task);
+       __rpc_add_wait_queue(q, task, queue_priority);
 
        BUG_ON(task->tk_callback != NULL);
        task->tk_callback = action;
@@ -334,11 +340,25 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
         * Protect the queue operations.
         */
        spin_lock_bh(&q->lock);
-       __rpc_sleep_on(q, task, action);
+       __rpc_sleep_on_priority(q, task, action, task->tk_priority);
        spin_unlock_bh(&q->lock);
 }
 EXPORT_SYMBOL_GPL(rpc_sleep_on);
 
+void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
+               rpc_action action, int priority)
+{
+       /* We shouldn't ever put an inactive task to sleep */
+       BUG_ON(!RPC_IS_ACTIVATED(task));
+
+       /*
+        * Protect the queue operations.
+        */
+       spin_lock_bh(&q->lock);
+       __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
+       spin_unlock_bh(&q->lock);
+}
+
 /**
  * __rpc_do_wake_up_task - wake up a single rpc_task
  * @queue: wait queue
index 2b90292..6a69a11 100644 (file)
@@ -1252,7 +1252,7 @@ svc_process(struct svc_rqst *rqstp)
        }
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Process a backchannel RPC request that arrived over an existing
  * outbound connection
@@ -1300,8 +1300,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
                return 0;
        }
 }
-EXPORT_SYMBOL(bc_svc_process);
-#endif /* CONFIG_NFS_V4_1 */
+EXPORT_SYMBOL_GPL(bc_svc_process);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * Return (transport-specific) limit on the rpc payload.
index af04f77..a1812a2 100644 (file)
@@ -66,12 +66,12 @@ static void         svc_sock_free(struct svc_xprt *);
 static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
                                          struct net *, struct sockaddr *,
                                          int, int);
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
                                             struct net *, struct sockaddr *,
                                             int, int);
 static void svc_bc_sock_free(struct svc_xprt *xprt);
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key svc_key[2];
@@ -1241,7 +1241,7 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
        return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
                                             struct net *, struct sockaddr *,
                                             int, int);
@@ -1282,7 +1282,7 @@ static void svc_cleanup_bc_xprt_sock(void)
 {
        svc_unreg_xprt_class(&svc_tcp_bc_class);
 }
-#else /* CONFIG_NFS_V4_1 */
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
 static void svc_init_bc_xprt_sock(void)
 {
 }
@@ -1290,7 +1290,7 @@ static void svc_init_bc_xprt_sock(void)
 static void svc_cleanup_bc_xprt_sock(void)
 {
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 static struct svc_xprt_ops svc_tcp_ops = {
        .xpo_create = svc_tcp_create,
@@ -1621,7 +1621,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
        kfree(svsk);
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Create a back channel svc_xprt which shares the fore channel socket.
  */
@@ -1660,4 +1660,4 @@ static void svc_bc_sock_free(struct svc_xprt *xprt)
        if (xprt)
                kfree(container_of(xprt, struct svc_sock, sk_xprt));
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
index f008c14..277ebd4 100644 (file)
@@ -126,7 +126,7 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
        kaddr[buf->page_base + len] = '\0';
        kunmap_atomic(kaddr, KM_USER0);
 }
-EXPORT_SYMBOL(xdr_terminate_string);
+EXPORT_SYMBOL_GPL(xdr_terminate_string);
 
 void
 xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
index ce5eb68..9b6a4d1 100644 (file)
@@ -62,6 +62,7 @@
 /*
  * Local functions
  */
+static void     xprt_init(struct rpc_xprt *xprt, struct net *net);
 static void    xprt_request_init(struct rpc_task *, struct rpc_xprt *);
 static void    xprt_connect_status(struct rpc_task *task);
 static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
@@ -191,10 +192,10 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
  * transport connects from colliding with writes.  No congestion control
  * is provided.
  */
-int xprt_reserve_xprt(struct rpc_task *task)
+int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
-       struct rpc_xprt *xprt = req->rq_xprt;
+       int priority;
 
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
                if (task == xprt->snd_task)
@@ -202,8 +203,10 @@ int xprt_reserve_xprt(struct rpc_task *task)
                goto out_sleep;
        }
        xprt->snd_task = task;
-       req->rq_bytes_sent = 0;
-       req->rq_ntrans++;
+       if (req != NULL) {
+               req->rq_bytes_sent = 0;
+               req->rq_ntrans++;
+       }
 
        return 1;
 
@@ -212,10 +215,13 @@ out_sleep:
                        task->tk_pid, xprt);
        task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
-       if (req->rq_ntrans)
-               rpc_sleep_on(&xprt->resend, task, NULL);
+       if (req == NULL)
+               priority = RPC_PRIORITY_LOW;
+       else if (!req->rq_ntrans)
+               priority = RPC_PRIORITY_NORMAL;
        else
-               rpc_sleep_on(&xprt->sending, task, NULL);
+               priority = RPC_PRIORITY_HIGH;
+       rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
        return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -239,22 +245,24 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
  * integrated into the decision of whether a request is allowed to be
  * woken up and given access to the transport.
  */
-int xprt_reserve_xprt_cong(struct rpc_task *task)
+int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-       struct rpc_xprt *xprt = task->tk_xprt;
        struct rpc_rqst *req = task->tk_rqstp;
+       int priority;
 
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
                if (task == xprt->snd_task)
                        return 1;
                goto out_sleep;
        }
+       if (req == NULL) {
+               xprt->snd_task = task;
+               return 1;
+       }
        if (__xprt_get_cong(xprt, task)) {
                xprt->snd_task = task;
-               if (req) {
-                       req->rq_bytes_sent = 0;
-                       req->rq_ntrans++;
-               }
+               req->rq_bytes_sent = 0;
+               req->rq_ntrans++;
                return 1;
        }
        xprt_clear_locked(xprt);
@@ -262,10 +270,13 @@ out_sleep:
        dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
        task->tk_timeout = 0;
        task->tk_status = -EAGAIN;
-       if (req && req->rq_ntrans)
-               rpc_sleep_on(&xprt->resend, task, NULL);
+       if (req == NULL)
+               priority = RPC_PRIORITY_LOW;
+       else if (!req->rq_ntrans)
+               priority = RPC_PRIORITY_NORMAL;
        else
-               rpc_sleep_on(&xprt->sending, task, NULL);
+               priority = RPC_PRIORITY_HIGH;
+       rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
        return 0;
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -275,7 +286,7 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
        int retval;
 
        spin_lock_bh(&xprt->transport_lock);
-       retval = xprt->ops->reserve_xprt(task);
+       retval = xprt->ops->reserve_xprt(xprt, task);
        spin_unlock_bh(&xprt->transport_lock);
        return retval;
 }
@@ -288,12 +299,9 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                return;
 
-       task = rpc_wake_up_next(&xprt->resend);
-       if (!task) {
-               task = rpc_wake_up_next(&xprt->sending);
-               if (!task)
-                       goto out_unlock;
-       }
+       task = rpc_wake_up_next(&xprt->sending);
+       if (task == NULL)
+               goto out_unlock;
 
        req = task->tk_rqstp;
        xprt->snd_task = task;
@@ -310,24 +318,25 @@ out_unlock:
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 {
        struct rpc_task *task;
+       struct rpc_rqst *req;
 
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                return;
        if (RPCXPRT_CONGESTED(xprt))
                goto out_unlock;
-       task = rpc_wake_up_next(&xprt->resend);
-       if (!task) {
-               task = rpc_wake_up_next(&xprt->sending);
-               if (!task)
-                       goto out_unlock;
+       task = rpc_wake_up_next(&xprt->sending);
+       if (task == NULL)
+               goto out_unlock;
+
+       req = task->tk_rqstp;
+       if (req == NULL) {
+               xprt->snd_task = task;
+               return;
        }
        if (__xprt_get_cong(xprt, task)) {
-               struct rpc_rqst *req = task->tk_rqstp;
                xprt->snd_task = task;
-               if (req) {
-                       req->rq_bytes_sent = 0;
-                       req->rq_ntrans++;
-               }
+               req->rq_bytes_sent = 0;
+               req->rq_ntrans++;
                return;
        }
 out_unlock:
@@ -852,7 +861,7 @@ int xprt_prepare_transmit(struct rpc_task *task)
                err = req->rq_reply_bytes_recvd;
                goto out_unlock;
        }
-       if (!xprt->ops->reserve_xprt(task))
+       if (!xprt->ops->reserve_xprt(xprt, task))
                err = -EAGAIN;
 out_unlock:
        spin_unlock_bh(&xprt->transport_lock);
@@ -928,28 +937,66 @@ void xprt_transmit(struct rpc_task *task)
        spin_unlock_bh(&xprt->transport_lock);
 }
 
+static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
+{
+       struct rpc_rqst *req = ERR_PTR(-EAGAIN);
+
+       if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
+               goto out;
+       req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
+       if (req != NULL)
+               goto out;
+       atomic_dec(&xprt->num_reqs);
+       req = ERR_PTR(-ENOMEM);
+out:
+       return req;
+}
+
+static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+       if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
+               kfree(req);
+               return true;
+       }
+       return false;
+}
+
 static void xprt_alloc_slot(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_xprt;
+       struct rpc_rqst *req;
 
-       task->tk_status = 0;
-       if (task->tk_rqstp)
-               return;
        if (!list_empty(&xprt->free)) {
-               struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
-               list_del_init(&req->rq_list);
-               task->tk_rqstp = req;
-               xprt_request_init(task, xprt);
-               return;
+               req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
+               list_del(&req->rq_list);
+               goto out_init_req;
+       }
+       req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT);
+       if (!IS_ERR(req))
+               goto out_init_req;
+       switch (PTR_ERR(req)) {
+       case -ENOMEM:
+               rpc_delay(task, HZ >> 2);
+               dprintk("RPC:       dynamic allocation of request slot "
+                               "failed! Retrying\n");
+               break;
+       case -EAGAIN:
+               rpc_sleep_on(&xprt->backlog, task, NULL);
+               dprintk("RPC:       waiting for request slot\n");
        }
-       dprintk("RPC:       waiting for request slot\n");
        task->tk_status = -EAGAIN;
-       task->tk_timeout = 0;
-       rpc_sleep_on(&xprt->backlog, task, NULL);
+       return;
+out_init_req:
+       task->tk_status = 0;
+       task->tk_rqstp = req;
+       xprt_request_init(task, xprt);
 }
 
 static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
+       if (xprt_dynamic_free_slot(xprt, req))
+               return;
+
        memset(req, 0, sizeof(*req));   /* mark unused */
 
        spin_lock(&xprt->reserve_lock);
@@ -958,25 +1005,49 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
        spin_unlock(&xprt->reserve_lock);
 }
 
-struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
+static void xprt_free_all_slots(struct rpc_xprt *xprt)
+{
+       struct rpc_rqst *req;
+       while (!list_empty(&xprt->free)) {
+               req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list);
+               list_del(&req->rq_list);
+               kfree(req);
+       }
+}
+
+struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
+               unsigned int num_prealloc,
+               unsigned int max_alloc)
 {
        struct rpc_xprt *xprt;
+       struct rpc_rqst *req;
+       int i;
 
        xprt = kzalloc(size, GFP_KERNEL);
        if (xprt == NULL)
                goto out;
-       atomic_set(&xprt->count, 1);
 
-       xprt->max_reqs = max_req;
-       xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
-       if (xprt->slot == NULL)
+       xprt_init(xprt, net);
+
+       for (i = 0; i < num_prealloc; i++) {
+               req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
+               if (!req)
+                       break;
+               list_add(&req->rq_list, &xprt->free);
+       }
+       if (i < num_prealloc)
                goto out_free;
+       if (max_alloc > num_prealloc)
+               xprt->max_reqs = max_alloc;
+       else
+               xprt->max_reqs = num_prealloc;
+       xprt->min_reqs = num_prealloc;
+       atomic_set(&xprt->num_reqs, num_prealloc);
 
-       xprt->xprt_net = get_net(net);
        return xprt;
 
 out_free:
-       kfree(xprt);
+       xprt_free(xprt);
 out:
        return NULL;
 }
@@ -985,7 +1056,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
 void xprt_free(struct rpc_xprt *xprt)
 {
        put_net(xprt->xprt_net);
-       kfree(xprt->slot);
+       xprt_free_all_slots(xprt);
        kfree(xprt);
 }
 EXPORT_SYMBOL_GPL(xprt_free);
@@ -1001,10 +1072,24 @@ void xprt_reserve(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_xprt;
 
-       task->tk_status = -EIO;
+       task->tk_status = 0;
+       if (task->tk_rqstp != NULL)
+               return;
+
+       /* Note: grabbing the xprt_lock_write() here is not strictly needed,
+        * but ensures that we throttle new slot allocation if the transport
+        * is congested (e.g. if reconnecting or if we're out of socket
+        * write buffer space).
+        */
+       task->tk_timeout = 0;
+       task->tk_status = -EAGAIN;
+       if (!xprt_lock_write(xprt, task))
+               return;
+
        spin_lock(&xprt->reserve_lock);
        xprt_alloc_slot(task);
        spin_unlock(&xprt->reserve_lock);
+       xprt_release_write(xprt, task);
 }
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1021,6 +1106,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 {
        struct rpc_rqst *req = task->tk_rqstp;
 
+       INIT_LIST_HEAD(&req->rq_list);
        req->rq_timeout = task->tk_client->cl_timeout->to_initval;
        req->rq_task    = task;
        req->rq_xprt    = xprt;
@@ -1073,6 +1159,34 @@ void xprt_release(struct rpc_task *task)
                xprt_free_bc_request(req);
 }
 
+static void xprt_init(struct rpc_xprt *xprt, struct net *net)
+{
+       atomic_set(&xprt->count, 1);
+
+       spin_lock_init(&xprt->transport_lock);
+       spin_lock_init(&xprt->reserve_lock);
+
+       INIT_LIST_HEAD(&xprt->free);
+       INIT_LIST_HEAD(&xprt->recv);
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+       spin_lock_init(&xprt->bc_pa_lock);
+       INIT_LIST_HEAD(&xprt->bc_pa_list);
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+       xprt->last_used = jiffies;
+       xprt->cwnd = RPC_INITCWND;
+       xprt->bind_index = 0;
+
+       rpc_init_wait_queue(&xprt->binding, "xprt_binding");
+       rpc_init_wait_queue(&xprt->pending, "xprt_pending");
+       rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
+       rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
+
+       xprt_init_xid(xprt);
+
+       xprt->xprt_net = get_net(net);
+}
+
 /**
  * xprt_create_transport - create an RPC transport
  * @args: rpc transport creation arguments
@@ -1081,7 +1195,6 @@ void xprt_release(struct rpc_task *task)
 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
        struct rpc_xprt *xprt;
-       struct rpc_rqst *req;
        struct xprt_class *t;
 
        spin_lock(&xprt_list_lock);
@@ -1100,46 +1213,17 @@ found:
        if (IS_ERR(xprt)) {
                dprintk("RPC:       xprt_create_transport: failed, %ld\n",
                                -PTR_ERR(xprt));
-               return xprt;
+               goto out;
        }
-       if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state))
-               /* ->setup returned a pre-initialized xprt: */
-               return xprt;
-
-       spin_lock_init(&xprt->transport_lock);
-       spin_lock_init(&xprt->reserve_lock);
-
-       INIT_LIST_HEAD(&xprt->free);
-       INIT_LIST_HEAD(&xprt->recv);
-#if defined(CONFIG_NFS_V4_1)
-       spin_lock_init(&xprt->bc_pa_lock);
-       INIT_LIST_HEAD(&xprt->bc_pa_list);
-#endif /* CONFIG_NFS_V4_1 */
-
        INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
        if (xprt_has_timer(xprt))
                setup_timer(&xprt->timer, xprt_init_autodisconnect,
                            (unsigned long)xprt);
        else
                init_timer(&xprt->timer);
-       xprt->last_used = jiffies;
-       xprt->cwnd = RPC_INITCWND;
-       xprt->bind_index = 0;
-
-       rpc_init_wait_queue(&xprt->binding, "xprt_binding");
-       rpc_init_wait_queue(&xprt->pending, "xprt_pending");
-       rpc_init_wait_queue(&xprt->sending, "xprt_sending");
-       rpc_init_wait_queue(&xprt->resend, "xprt_resend");
-       rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
-
-       /* initialize free list */
-       for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
-               list_add(&req->rq_list, &xprt->free);
-
-       xprt_init_xid(xprt);
-
        dprintk("RPC:       created transport %p with %u slots\n", xprt,
                        xprt->max_reqs);
+out:
        return xprt;
 }
 
@@ -1157,7 +1241,6 @@ static void xprt_destroy(struct rpc_xprt *xprt)
        rpc_destroy_wait_queue(&xprt->binding);
        rpc_destroy_wait_queue(&xprt->pending);
        rpc_destroy_wait_queue(&xprt->sending);
-       rpc_destroy_wait_queue(&xprt->resend);
        rpc_destroy_wait_queue(&xprt->backlog);
        cancel_work_sync(&xprt->task_cleanup);
        /*
index 0867070..b446e10 100644 (file)
@@ -283,6 +283,7 @@ xprt_setup_rdma(struct xprt_create *args)
        }
 
        xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
+                       xprt_rdma_slot_table_entries,
                        xprt_rdma_slot_table_entries);
        if (xprt == NULL) {
                dprintk("RPC:       %s: couldn't allocate rpcrdma_xprt\n",
@@ -452,9 +453,8 @@ xprt_rdma_connect(struct rpc_task *task)
 }
 
 static int
-xprt_rdma_reserve_xprt(struct rpc_task *task)
+xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-       struct rpc_xprt *xprt = task->tk_xprt;
        struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
        int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
 
@@ -466,7 +466,7 @@ xprt_rdma_reserve_xprt(struct rpc_task *task)
                BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
        }
        xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
-       return xprt_reserve_xprt_cong(task);
+       return xprt_reserve_xprt_cong(xprt, task);
 }
 
 /*
index 72abb73..d7f97ef 100644 (file)
@@ -37,7 +37,7 @@
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
-#ifdef CONFIG_NFS_V4_1
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
 #include <linux/sunrpc/bc_xprt.h>
 #endif
 
@@ -54,7 +54,8 @@ static void xs_close(struct rpc_xprt *xprt);
  * xprtsock tunables
  */
 unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
-unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
+unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
+unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
 
 unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
 unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
@@ -75,6 +76,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
 
 static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
+static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT;
 static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
 static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
 
@@ -103,6 +105,15 @@ static ctl_table xs_tunables_table[] = {
                .extra1         = &min_slot_table_size,
                .extra2         = &max_slot_table_size
        },
+       {
+               .procname       = "tcp_max_slot_table_entries",
+               .data           = &xprt_max_tcp_slot_table_entries,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = &min_slot_table_size,
+               .extra2         = &max_tcp_slot_table_limit
+       },
        {
                .procname       = "min_resvport",
                .data           = &xprt_min_resvport,
@@ -755,6 +766,8 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
        if (task == NULL)
                goto out_release;
        req = task->tk_rqstp;
+       if (req == NULL)
+               goto out_release;
        if (req->rq_bytes_sent == 0)
                goto out_release;
        if (req->rq_bytes_sent == req->rq_snd_buf.len)
@@ -1236,7 +1249,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
        return 0;
 }
 
-#if defined(CONFIG_NFS_V4_1)
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
 /*
  * Obtains an rpc_rqst previously allocated and invokes the common
  * tcp read code to read the data.  The result is placed in the callback
@@ -1299,7 +1312,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 {
        return xs_tcp_read_reply(xprt, desc);
 }
-#endif /* CONFIG_NFS_V4_1 */
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 /*
  * Read data off the transport.  This can be either an RPC_CALL or an
@@ -2489,7 +2502,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap)
 }
 
 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
-                                     unsigned int slot_table_size)
+                                     unsigned int slot_table_size,
+                                     unsigned int max_slot_table_size)
 {
        struct rpc_xprt *xprt;
        struct sock_xprt *new;
@@ -2499,7 +2513,8 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
                return ERR_PTR(-EBADF);
        }
 
-       xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size);
+       xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size,
+                       max_slot_table_size);
        if (xprt == NULL) {
                dprintk("RPC:       xs_setup_xprt: couldn't allocate "
                                "rpc_xprt\n");
@@ -2541,7 +2556,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
        struct rpc_xprt *xprt;
        struct rpc_xprt *ret;
 
-       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+                       xprt_max_tcp_slot_table_entries);
        if (IS_ERR(xprt))
                return xprt;
        transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2605,7 +2621,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
        struct sock_xprt *transport;
        struct rpc_xprt *ret;
 
-       xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
+       xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries,
+                       xprt_udp_slot_table_entries);
        if (IS_ERR(xprt))
                return xprt;
        transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2681,7 +2698,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
        struct sock_xprt *transport;
        struct rpc_xprt *ret;
 
-       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+                       xprt_max_tcp_slot_table_entries);
        if (IS_ERR(xprt))
                return xprt;
        transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2760,7 +2778,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
                 */
                 return args->bc_xprt->xpt_bc_xprt;
        }
-       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+       xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
+                       xprt_tcp_slot_table_entries);
        if (IS_ERR(xprt))
                return xprt;
        transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2947,8 +2966,26 @@ static struct kernel_param_ops param_ops_slot_table_size = {
 #define param_check_slot_table_size(name, p) \
        __param_check(name, p, unsigned int);
 
+static int param_set_max_slot_table_size(const char *val,
+                                    const struct kernel_param *kp)
+{
+       return param_set_uint_minmax(val, kp,
+                       RPC_MIN_SLOT_TABLE,
+                       RPC_MAX_SLOT_TABLE_LIMIT);
+}
+
+static struct kernel_param_ops param_ops_max_slot_table_size = {
+       .set = param_set_max_slot_table_size,
+       .get = param_get_uint,
+};
+
+#define param_check_max_slot_table_size(name, p) \
+       __param_check(name, p, unsigned int);
+
 module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
                   slot_table_size, 0644);
+module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries,
+                  max_slot_table_size, 0644);
 module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
                   slot_table_size, 0644);