Merge branch 'fixes' of git://git.linux-nfs.org/pub/linux/nfs-2.6
[pandora-kernel.git] / fs / nfs / read.c
index 05eb43f..da9cf11 100644 (file)
@@ -15,7 +15,6 @@
  * within the RPC code when root squashing is suspected.
  */
 
-#include <linux/config.h>
 #include <linux/time.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
 
 #include <asm/system.h>
 
+#include "iostat.h"
+
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
 
 static int nfs_pagein_one(struct list_head *, struct inode *);
-static void nfs_readpage_result_partial(struct nfs_read_data *, int);
-static void nfs_readpage_result_full(struct nfs_read_data *, int);
+static const struct rpc_call_ops nfs_read_partial_ops;
+static const struct rpc_call_ops nfs_read_full_ops;
 
 static kmem_cache_t *nfs_rdata_cachep;
-mempool_t *nfs_rdata_mempool;
+static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ  (32)
 
+struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
+{
+       struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
+
+       if (p) {
+               memset(p, 0, sizeof(*p));
+               INIT_LIST_HEAD(&p->pages);
+               if (pagecount <= ARRAY_SIZE(p->page_array))
+                       p->pagevec = p->page_array;
+               else {
+                       p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
+                       if (!p->pagevec) {
+                               mempool_free(p, nfs_rdata_mempool);
+                               p = NULL;
+                       }
+               }
+       }
+       return p;
+}
+
+static void nfs_readdata_free(struct nfs_read_data *p)
+{
+       if (p && (p->pagevec != &p->page_array[0]))
+               kfree(p->pagevec);
+       mempool_free(p, nfs_rdata_mempool);
+}
+
 void nfs_readdata_release(void *data)
 {
         nfs_readdata_free(data);
@@ -72,6 +100,35 @@ int nfs_return_empty_page(struct page *page)
        return 0;
 }
 
+static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
+{
+       unsigned int remainder = data->args.count - data->res.count;
+       unsigned int base = data->args.pgbase + data->res.count;
+       unsigned int pglen;
+       struct page **pages;
+
+       if (data->res.eof == 0 || remainder == 0)
+               return;
+       /*
+        * Note: "remainder" can never be negative, since we check for
+        *      this in the XDR code.
+        */
+       pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
+       base &= ~PAGE_CACHE_MASK;
+       pglen = PAGE_CACHE_SIZE - base;
+       for (;;) {
+               if (remainder <= pglen) {
+                       memclear_highpage_flush(*pages, base, remainder);
+                       break;
+               }
+               memclear_highpage_flush(*pages, base, pglen);
+               pages++;
+               remainder -= pglen;
+               pglen = PAGE_CACHE_SIZE;
+               base = 0;
+       }
+}
+
 /*
  * Read a page synchronously.
  */
@@ -133,6 +190,8 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
                }
                count -= result;
                rdata->args.pgbase += result;
+               nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
+
                /* Note: result == 0 should only happen if we're caching
                 * a write that extends the file and punches a hole.
                 */
@@ -143,11 +202,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
        NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
        spin_unlock(&inode->i_lock);
 
-       if (count)
-               memclear_highpage_flush(page, rdata->args.pgbase, count);
-       SetPageUptodate(page);
-       if (PageError(page))
-               ClearPageError(page);
+       nfs_readpage_truncate_uninitialised_page(rdata);
+       if (rdata->res.eof || rdata->res.count == rdata->args.count)
+               SetPageUptodate(page);
        result = 0;
 
 io_error:
@@ -196,9 +253,11 @@ static void nfs_readpage_release(struct nfs_page *req)
  * Set up the NFS read request struct
  */
 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+               const struct rpc_call_ops *call_ops,
                unsigned int count, unsigned int offset)
 {
        struct inode            *inode;
+       int flags;
 
        data->req         = req;
        data->inode       = inode = req->wb_context->dentry->d_inode;
@@ -216,6 +275,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
        data->res.eof     = 0;
        nfs_fattr_init(&data->fattr);
 
+       /* Set up the initial task struct. */
+       flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
+       rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
        NFS_PROTO(inode)->read_setup(data);
 
        data->task.tk_cookie = (unsigned long)inode;
@@ -303,14 +365,15 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
                list_del_init(&data->pages);
 
                data->pagevec[0] = page;
-               data->complete = nfs_readpage_result_partial;
 
                if (nbytes > rsize) {
-                       nfs_read_rpcsetup(req, data, rsize, offset);
+                       nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+                                       rsize, offset);
                        offset += rsize;
                        nbytes -= rsize;
                } else {
-                       nfs_read_rpcsetup(req, data, nbytes, offset);
+                       nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+                                       nbytes, offset);
                        nbytes = 0;
                }
                nfs_execute_read(data);
@@ -356,8 +419,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
        }
        req = nfs_list_entry(data->pages.next);
 
-       data->complete = nfs_readpage_result_full;
-       nfs_read_rpcsetup(req, data, count, 0);
+       nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 
        nfs_execute_read(data);
        return 0;
@@ -391,23 +453,18 @@ nfs_pagein_list(struct list_head *head, int rpages)
 /*
  * Handle a read reply that fills part of a page.
  */
-static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 {
+       struct nfs_read_data *data = calldata;
        struct nfs_page *req = data->req;
        struct page *page = req->wb_page;
  
-       if (status >= 0) {
-               unsigned int request = data->args.count;
-               unsigned int result = data->res.count;
-
-               if (result < request) {
-                       memclear_highpage_flush(page,
-                                               data->args.pgbase + result,
-                                               request - result);
-               }
-       } else
+       if (likely(task->tk_status >= 0))
+               nfs_readpage_truncate_uninitialised_page(data);
+       else
                SetPageError(page);
-
+       if (nfs_readpage_result(task, data) != 0)
+               return;
        if (atomic_dec_and_test(&req->wb_complete)) {
                if (!PageError(page))
                        SetPageUptodate(page);
@@ -415,51 +472,101 @@ static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
        }
 }
 
+static const struct rpc_call_ops nfs_read_partial_ops = {
+       .rpc_call_done = nfs_readpage_result_partial,
+       .rpc_release = nfs_readdata_release,
+};
+
+static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
+{
+       unsigned int count = data->res.count;
+       unsigned int base = data->args.pgbase;
+       struct page **pages;
+
+       if (data->res.eof)
+               count = data->args.count;
+       if (unlikely(count == 0))
+               return;
+       pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
+       base &= ~PAGE_CACHE_MASK;
+       count += base;
+       for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
+               SetPageUptodate(*pages);
+       if (count != 0)
+               SetPageUptodate(*pages);
+}
+
+static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
+{
+       unsigned int count = data->args.count;
+       unsigned int base = data->args.pgbase;
+       struct page **pages;
+
+       pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
+       base &= ~PAGE_CACHE_MASK;
+       count += base;
+       for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
+               SetPageError(*pages);
+       if (count != 0)
+               SetPageError(*pages);
+}
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
+static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
-       unsigned int count = data->res.count;
+       struct nfs_read_data *data = calldata;
 
+       /*
+        * Note: nfs_readpage_result may change the values of
+        * data->args. In the multi-page case, we therefore need
+        * to ensure that we call the next nfs_readpage_set_page_uptodate()
+        * first in the multi-page case.
+        */
+       if (likely(task->tk_status >= 0)) {
+               nfs_readpage_truncate_uninitialised_page(data);
+               nfs_readpage_set_pages_uptodate(data);
+       } else
+               nfs_readpage_set_pages_error(data);
+       if (nfs_readpage_result(task, data) != 0)
+               return;
        while (!list_empty(&data->pages)) {
                struct nfs_page *req = nfs_list_entry(data->pages.next);
-               struct page *page = req->wb_page;
-               nfs_list_remove_request(req);
 
-               if (status >= 0) {
-                       if (count < PAGE_CACHE_SIZE) {
-                               if (count < req->wb_bytes)
-                                       memclear_highpage_flush(page,
-                                                       req->wb_pgbase + count,
-                                                       req->wb_bytes - count);
-                               count = 0;
-                       } else
-                               count -= PAGE_CACHE_SIZE;
-                       SetPageUptodate(page);
-               } else
-                       SetPageError(page);
+               nfs_list_remove_request(req);
                nfs_readpage_release(req);
        }
 }
 
+static const struct rpc_call_ops nfs_read_full_ops = {
+       .rpc_call_done = nfs_readpage_result_full,
+       .rpc_release = nfs_readdata_release,
+};
+
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task, void *calldata)
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 {
-       struct nfs_read_data *data = calldata;
        struct nfs_readargs *argp = &data->args;
        struct nfs_readres *resp = &data->res;
-       int status = task->tk_status;
+       int status;
 
        dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-               task->tk_pid, status);
+               task->tk_pid, task->tk_status);
+
+       status = NFS_PROTO(data->inode)->read_done(task, data);
+       if (status != 0)
+               return status;
+
+       nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
 
        /* Is this a short read? */
        if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
+               nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
                /* Has the server at least made some progress? */
                if (resp->count != 0) {
                        /* Yes, so retry the read at the end of the data */
@@ -467,14 +574,14 @@ void nfs_readpage_result(struct rpc_task *task, void *calldata)
                        argp->pgbase += resp->count;
                        argp->count -= resp->count;
                        rpc_restart_call(task);
-                       return;
+                       return -EAGAIN;
                }
                task->tk_status = -EIO;
        }
        spin_lock(&data->inode->i_lock);
        NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
        spin_unlock(&data->inode->i_lock);
-       data->complete(data, status);
+       return 0;
 }
 
 /*
@@ -491,6 +598,9 @@ int nfs_readpage(struct file *file, struct page *page)
 
        dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
                page, PAGE_CACHE_SIZE, page->index);
+       nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
+       nfs_add_stats(inode, NFSIOS_READPAGES, 1);
+
        /*
         * Try to flush any pending writes to the file..
         *
@@ -570,6 +680,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
                        inode->i_sb->s_id,
                        (long long)NFS_FILEID(inode),
                        nr_pages);
+       nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
        if (filp == NULL) {
                desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
@@ -582,13 +693,14 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
        if (!list_empty(&head)) {
                int err = nfs_pagein_list(&head, server->rpages);
                if (!ret)
+                       nfs_add_stats(inode, NFSIOS_READPAGES, err);
                        ret = err;
        }
        put_nfs_open_context(desc.ctx);
        return ret;
 }
 
-int nfs_init_readpagecache(void)
+int __init nfs_init_readpagecache(void)
 {
        nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
                                             sizeof(struct nfs_read_data),
@@ -597,10 +709,8 @@ int nfs_init_readpagecache(void)
        if (nfs_rdata_cachep == NULL)
                return -ENOMEM;
 
-       nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
-                                          mempool_alloc_slab,
-                                          mempool_free_slab,
-                                          nfs_rdata_cachep);
+       nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
+                                                    nfs_rdata_cachep);
        if (nfs_rdata_mempool == NULL)
                return -ENOMEM;