Merge branch 'depends/rmk/devel-stable' into next/board
[pandora-kernel.git] / fs / nfs / pnfs.c
index 29c0ca7..e550e88 100644 (file)
@@ -28,6 +28,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 #include "internal.h"
 #include "pnfs.h"
 #include "iostat.h"
@@ -75,8 +76,11 @@ find_pnfs_driver(u32 id)
 void
 unset_pnfs_layoutdriver(struct nfs_server *nfss)
 {
-       if (nfss->pnfs_curr_ld)
+       if (nfss->pnfs_curr_ld) {
+               if (nfss->pnfs_curr_ld->clear_layoutdriver)
+                       nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
                module_put(nfss->pnfs_curr_ld->owner);
+       }
        nfss->pnfs_curr_ld = NULL;
 }
 
@@ -87,7 +91,8 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
  * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
  */
 void
-set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
+set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
+                     u32 id)
 {
        struct pnfs_layoutdriver_type *ld_type = NULL;
 
@@ -114,6 +119,13 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
                goto out_no_driver;
        }
        server->pnfs_curr_ld = ld_type;
+       if (ld_type->set_layoutdriver
+           && ld_type->set_layoutdriver(server, mntfh)) {
+               printk(KERN_ERR "%s: Error initializing pNFS layout driver %u.\n",
+                               __func__, id);
+               module_put(ld_type->owner);
+               goto out_no_driver;
+       }
 
        dprintk("%s: pNFS module for %u set\n", __func__, id);
        return;
@@ -189,6 +201,7 @@ static void
 pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
 {
        struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
+       put_rpccred(lo->plh_lc_cred);
        return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
 }
 
@@ -223,6 +236,7 @@ static void
 init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
 {
        INIT_LIST_HEAD(&lseg->pls_list);
+       INIT_LIST_HEAD(&lseg->pls_lc_list);
        atomic_set(&lseg->pls_refcount, 1);
        smp_mb();
        set_bit(NFS_LSEG_VALID, &lseg->pls_flags);
@@ -448,11 +462,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 void
 pnfs_destroy_all_layouts(struct nfs_client *clp)
 {
+       struct nfs_server *server;
        struct pnfs_layout_hdr *lo;
        LIST_HEAD(tmp_list);
 
+       nfs4_deviceid_mark_client_invalid(clp);
+       nfs4_deviceid_purge_client(clp);
+
        spin_lock(&clp->cl_lock);
-       list_splice_init(&clp->cl_layouts, &tmp_list);
+       rcu_read_lock();
+       list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+               if (!list_empty(&server->layouts))
+                       list_splice_init(&server->layouts, &tmp_list);
+       }
+       rcu_read_unlock();
        spin_unlock(&clp->cl_lock);
 
        while (!list_empty(&tmp_list)) {
@@ -661,6 +684,7 @@ _pnfs_return_layout(struct inode *ino)
        lrp->args.stateid = stateid;
        lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
        lrp->args.inode = ino;
+       lrp->args.layout = lo;
        lrp->clp = NFS_SERVER(ino)->nfs_client;
 
        status = nfs4_proc_layoutreturn(lrp);
@@ -805,7 +829,9 @@ out:
 }
 
 static struct pnfs_layout_hdr *
-alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
+alloc_init_layout_hdr(struct inode *ino,
+                     struct nfs_open_context *ctx,
+                     gfp_t gfp_flags)
 {
        struct pnfs_layout_hdr *lo;
 
@@ -817,11 +843,14 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags)
        INIT_LIST_HEAD(&lo->plh_segs);
        INIT_LIST_HEAD(&lo->plh_bulk_recall);
        lo->plh_inode = ino;
+       lo->plh_lc_cred = get_rpccred(ctx->state->owner->so_cred);
        return lo;
 }
 
 static struct pnfs_layout_hdr *
-pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
+pnfs_find_alloc_layout(struct inode *ino,
+                      struct nfs_open_context *ctx,
+                      gfp_t gfp_flags)
 {
        struct nfs_inode *nfsi = NFS_I(ino);
        struct pnfs_layout_hdr *new = NULL;
@@ -836,7 +865,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags)
                        return nfsi->layout;
        }
        spin_unlock(&ino->i_lock);
-       new = alloc_init_layout_hdr(ino, gfp_flags);
+       new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
        spin_lock(&ino->i_lock);
 
        if (likely(nfsi->layout == NULL))       /* Won the race? */
@@ -920,7 +949,8 @@ pnfs_update_layout(struct inode *ino,
        };
        unsigned pg_offset;
        struct nfs_inode *nfsi = NFS_I(ino);
-       struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+       struct nfs_server *server = NFS_SERVER(ino);
+       struct nfs_client *clp = server->nfs_client;
        struct pnfs_layout_hdr *lo;
        struct pnfs_layout_segment *lseg = NULL;
        bool first = false;
@@ -928,7 +958,7 @@ pnfs_update_layout(struct inode *ino,
        if (!pnfs_enabled_sb(NFS_SERVER(ino)))
                return NULL;
        spin_lock(&ino->i_lock);
-       lo = pnfs_find_alloc_layout(ino, gfp_flags);
+       lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
        if (lo == NULL) {
                dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
                goto out_unlock;
@@ -964,7 +994,7 @@ pnfs_update_layout(struct inode *ino,
                 */
                spin_lock(&clp->cl_lock);
                BUG_ON(!list_empty(&lo->plh_layouts));
-               list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
+               list_add_tail(&lo->plh_layouts, &server->layouts);
                spin_unlock(&clp->cl_lock);
        }
 
@@ -973,7 +1003,8 @@ pnfs_update_layout(struct inode *ino,
                arg.offset -= pg_offset;
                arg.length += pg_offset;
        }
-       arg.length = PAGE_CACHE_ALIGN(arg.length);
+       if (arg.length != NFS4_MAX_UINT64)
+               arg.length = PAGE_CACHE_ALIGN(arg.length);
 
        lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
        if (!lseg && first) {
@@ -991,6 +1022,7 @@ out_unlock:
        spin_unlock(&ino->i_lock);
        goto out;
 }
+EXPORT_SYMBOL_GPL(pnfs_update_layout);
 
 int
 pnfs_layout_process(struct nfs4_layoutget *lgp)
@@ -1048,35 +1080,71 @@ out_forget_reply:
        goto out;
 }
 
+void
+pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          req_offset(req),
+                                          req->wb_bytes,
+                                          IOMODE_READ,
+                                          GFP_KERNEL);
+       /* If no lseg, fall back to read through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_read_mds(pgio);
+
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
+
+void
+pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+       BUG_ON(pgio->pg_lseg != NULL);
+
+       pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+                                          req->wb_context,
+                                          req_offset(req),
+                                          req->wb_bytes,
+                                          IOMODE_RW,
+                                          GFP_NOFS);
+       /* If no lseg, fall back to write through mds */
+       if (pgio->pg_lseg == NULL)
+               nfs_pageio_reset_write_mds(pgio);
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
+
 bool
-pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
-                    struct nfs_page *req)
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
 {
-       enum pnfs_iomode access_type;
-       gfp_t gfp_flags;
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
 
-       /* We assume that pg_ioflags == 0 iff we're reading a page */
-       if (pgio->pg_ioflags == 0) {
-               access_type = IOMODE_READ;
-               gfp_flags = GFP_KERNEL;
-       } else {
-               access_type = IOMODE_RW;
-               gfp_flags = GFP_NOFS;
-       }
+       if (ld == NULL)
+               return false;
+       nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
+       return true;
+}
 
-       if (pgio->pg_lseg == NULL) {
-               if (pgio->pg_count != prev->wb_bytes)
-                       return true;
-               /* This is first coelesce call for a series of nfs_pages */
-               pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
-                                                  prev->wb_context,
-                                                  req_offset(prev),
-                                                  pgio->pg_count,
-                                                  access_type,
-                                                  gfp_flags);
-               if (pgio->pg_lseg == NULL)
-                       return true;
-       }
+bool
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
+{
+       struct nfs_server *server = NFS_SERVER(inode);
+       struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+       if (ld == NULL)
+               return false;
+       nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
+       return true;
+}
+
+bool
+pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+                    struct nfs_page *req)
+{
+       if (pgio->pg_lseg == NULL)
+               return nfs_generic_pg_test(pgio, prev, req);
 
        /*
         * Test if a nfs_page is fully contained in the pnfs_layout_range.
@@ -1120,15 +1188,30 @@ pnfs_ld_write_done(struct nfs_write_data *data)
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
 
-enum pnfs_try_status
+static void
+pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
+               struct nfs_write_data *data)
+{
+       list_splice_tail_init(&data->pages, &desc->pg_list);
+       if (data->req && list_empty(&data->req->wb_list))
+               nfs_list_add_request(data->req, &desc->pg_list);
+       nfs_pageio_reset_write_mds(desc);
+       desc->pg_recoalesce = 1;
+       nfs_writedata_release(data);
+}
+
+static enum pnfs_try_status
 pnfs_try_to_write_data(struct nfs_write_data *wdata,
-                       const struct rpc_call_ops *call_ops, int how)
+                       const struct rpc_call_ops *call_ops,
+                       struct pnfs_layout_segment *lseg,
+                       int how)
 {
        struct inode *inode = wdata->inode;
        enum pnfs_try_status trypnfs;
        struct nfs_server *nfss = NFS_SERVER(inode);
 
        wdata->mds_ops = call_ops;
+       wdata->lseg = get_lseg(lseg);
 
        dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
                inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1144,6 +1227,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
        return trypnfs;
 }
 
+static void
+pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+{
+       struct nfs_write_data *data;
+       const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+       struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+       desc->pg_lseg = NULL;
+       while (!list_empty(head)) {
+               enum pnfs_try_status trypnfs;
+
+               data = list_entry(head->next, struct nfs_write_data, list);
+               list_del_init(&data->list);
+
+               trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+               if (trypnfs == PNFS_NOT_ATTEMPTED)
+                       pnfs_write_through_mds(desc, data);
+       }
+       put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_flush(desc, &head);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+               return ret;
+       }
+       pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
+
 /*
  * Called by non rpc-based layout drivers
  */
@@ -1167,18 +1288,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
 }
 EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
 
+static void
+pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
+               struct nfs_read_data *data)
+{
+       list_splice_tail_init(&data->pages, &desc->pg_list);
+       if (data->req && list_empty(&data->req->wb_list))
+               nfs_list_add_request(data->req, &desc->pg_list);
+       nfs_pageio_reset_read_mds(desc);
+       desc->pg_recoalesce = 1;
+       nfs_readdata_release(data);
+}
+
 /*
  * Call the appropriate parallel I/O subsystem read function.
  */
-enum pnfs_try_status
+static enum pnfs_try_status
 pnfs_try_to_read_data(struct nfs_read_data *rdata,
-                      const struct rpc_call_ops *call_ops)
+                      const struct rpc_call_ops *call_ops,
+                      struct pnfs_layout_segment *lseg)
 {
        struct inode *inode = rdata->inode;
        struct nfs_server *nfss = NFS_SERVER(inode);
        enum pnfs_try_status trypnfs;
 
        rdata->mds_ops = call_ops;
+       rdata->lseg = get_lseg(lseg);
 
        dprintk("%s: Reading ino:%lu %u@%llu\n",
                __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1194,17 +1329,56 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
        return trypnfs;
 }
 
+static void
+pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+{
+       struct nfs_read_data *data;
+       const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
+       struct pnfs_layout_segment *lseg = desc->pg_lseg;
+
+       desc->pg_lseg = NULL;
+       while (!list_empty(head)) {
+               enum pnfs_try_status trypnfs;
+
+               data = list_entry(head->next, struct nfs_read_data, list);
+               list_del_init(&data->list);
+
+               trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+               if (trypnfs == PNFS_NOT_ATTEMPTED)
+                       pnfs_read_through_mds(desc, data);
+       }
+       put_lseg(lseg);
+}
+
+int
+pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
+{
+       LIST_HEAD(head);
+       int ret;
+
+       ret = nfs_generic_pagein(desc, &head);
+       if (ret != 0) {
+               put_lseg(desc->pg_lseg);
+               desc->pg_lseg = NULL;
+               return ret;
+       }
+       pnfs_do_multiple_reads(desc, &head);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
+
 /*
- * Currently there is only one (whole file) write lseg.
+ * There can be multiple RW segments.
  */
-static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
+static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
 {
-       struct pnfs_layout_segment *lseg, *rv = NULL;
+       struct pnfs_layout_segment *lseg;
 
-       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
-               if (lseg->pls_range.iomode == IOMODE_RW)
-                       rv = lseg;
-       return rv;
+       list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
+               if (lseg->pls_range.iomode == IOMODE_RW &&
+                   test_bit(NFS_LSEG_LAYOUTCOMMIT, &lseg->pls_flags))
+                       list_add(&lseg->pls_lc_list, listp);
+       }
 }
 
 void
@@ -1216,17 +1390,19 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 
        spin_lock(&nfsi->vfs_inode.i_lock);
        if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
-               /* references matched in nfs4_layoutcommit_release */
-               get_lseg(wdata->lseg);
-               wdata->lseg->pls_lc_cred =
-                       get_rpccred(wdata->args.context->state->owner->so_cred);
                mark_as_dirty = true;
                dprintk("%s: Set layoutcommit for inode %lu ",
                        __func__, wdata->inode->i_ino);
        }
-       if (end_pos > wdata->lseg->pls_end_pos)
-               wdata->lseg->pls_end_pos = end_pos;
+       if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &wdata->lseg->pls_flags)) {
+               /* references matched in nfs4_layoutcommit_release */
+               get_lseg(wdata->lseg);
+       }
+       if (end_pos > nfsi->layout->plh_lwb)
+               nfsi->layout->plh_lwb = end_pos;
        spin_unlock(&nfsi->vfs_inode.i_lock);
+       dprintk("%s: lseg %p end_pos %llu\n",
+               __func__, wdata->lseg, nfsi->layout->plh_lwb);
 
        /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
         * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
@@ -1235,6 +1411,14 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
 }
 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
+void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
+{
+       struct nfs_server *nfss = NFS_SERVER(data->args.inode);
+
+       if (nfss->pnfs_curr_ld->cleanup_layoutcommit)
+               nfss->pnfs_curr_ld->cleanup_layoutcommit(data);
+}
+
 /*
  * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
  * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
@@ -1248,8 +1432,6 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 {
        struct nfs4_layoutcommit_data *data;
        struct nfs_inode *nfsi = NFS_I(inode);
-       struct pnfs_layout_segment *lseg;
-       struct rpc_cred *cred;
        loff_t end_pos;
        int status = 0;
 
@@ -1266,30 +1448,25 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
                goto out;
        }
 
+       INIT_LIST_HEAD(&data->lseg_list);
        spin_lock(&inode->i_lock);
        if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
                spin_unlock(&inode->i_lock);
                kfree(data);
                goto out;
        }
-       /*
-        * Currently only one (whole file) write lseg which is referenced
-        * in pnfs_set_layoutcommit and will be found.
-        */
-       lseg = pnfs_list_write_lseg(inode);
 
-       end_pos = lseg->pls_end_pos;
-       cred = lseg->pls_lc_cred;
-       lseg->pls_end_pos = 0;
-       lseg->pls_lc_cred = NULL;
+       pnfs_list_write_lseg(inode, &data->lseg_list);
+
+       end_pos = nfsi->layout->plh_lwb;
+       nfsi->layout->plh_lwb = 0;
 
        memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
                sizeof(nfsi->layout->plh_stateid.data));
        spin_unlock(&inode->i_lock);
 
        data->args.inode = inode;
-       data->lseg = lseg;
-       data->cred = cred;
+       data->cred = get_rpccred(nfsi->layout->plh_lc_cred);
        nfs_fattr_init(&data->fattr);
        data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
        data->res.fattr = &data->fattr;