Merge branch 'master' into upstream
[pandora-kernel.git] / fs / nfs / read.c
index 56f66f0..6ab4d5a 100644 (file)
@@ -5,14 +5,6 @@
  *
  * Partial copy of Linus' read cache modifications to fs/nfs/file.c
  * modified for async RPC by okir@monad.swb.de
- *
- * We do an ugly hack here in order to return proper error codes to the
- * user program when a read request failed: since generic_file_read
- * only checks the return value of inode->i_op->readpage() which is always 0
- * for async RPC, we set the error bit of the page to 1 when an error occurs,
- * and make nfs_readpage transmit requests synchronously when encountering this.
- * This is only a small problem, though, since we now retry all operations
- * within the RPC code when root squashing is suspected.
  */
 
 #include <linux/time.h>
@@ -30,6 +22,7 @@
 
 #include <asm/system.h>
 
+#include "internal.h"
 #include "iostat.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
@@ -38,7 +31,7 @@ static int nfs_pagein_one(struct list_head *, struct inode *);
 static const struct rpc_call_ops nfs_read_partial_ops;
 static const struct rpc_call_ops nfs_read_full_ops;
 
-static kmem_cache_t *nfs_rdata_cachep;
+static struct kmem_cache *nfs_rdata_cachep;
 static mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ  (32)
@@ -65,32 +58,22 @@ struct nfs_read_data *nfs_readdata_alloc(size_t len)
        return p;
 }
 
-static void nfs_readdata_free(struct nfs_read_data *p)
+static void nfs_readdata_rcu_free(struct rcu_head *head)
 {
+       struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_rdata_mempool);
 }
 
-void nfs_readdata_release(void *data)
+static void nfs_readdata_free(struct nfs_read_data *rdata)
 {
-        nfs_readdata_free(data);
+       call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
 }
 
-static
-unsigned int nfs_page_length(struct inode *inode, struct page *page)
+void nfs_readdata_release(void *data)
 {
-       loff_t i_size = i_size_read(inode);
-       unsigned long idx;
-
-       if (i_size <= 0)
-               return 0;
-       idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
-       if (page->index > idx)
-               return 0;
-       if (page->index != idx)
-               return PAGE_CACHE_SIZE;
-       return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
+        nfs_readdata_free(data);
 }
 
 static
@@ -131,92 +114,6 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
        }
 }
 
-/*
- * Read a page synchronously.
- */
-static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
-               struct page *page)
-{
-       unsigned int    rsize = NFS_SERVER(inode)->rsize;
-       unsigned int    count = PAGE_CACHE_SIZE;
-       int             result;
-       struct nfs_read_data *rdata;
-
-       rdata = nfs_readdata_alloc(count);
-       if (!rdata)
-               return -ENOMEM;
-
-       memset(rdata, 0, sizeof(*rdata));
-       rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-       rdata->cred = ctx->cred;
-       rdata->inode = inode;
-       INIT_LIST_HEAD(&rdata->pages);
-       rdata->args.fh = NFS_FH(inode);
-       rdata->args.context = ctx;
-       rdata->args.pages = &page;
-       rdata->args.pgbase = 0UL;
-       rdata->args.count = rsize;
-       rdata->res.fattr = &rdata->fattr;
-
-       dprintk("NFS: nfs_readpage_sync(%p)\n", page);
-
-       /*
-        * This works now because the socket layer never tries to DMA
-        * into this buffer directly.
-        */
-       do {
-               if (count < rsize)
-                       rdata->args.count = count;
-               rdata->res.count = rdata->args.count;
-               rdata->args.offset = page_offset(page) + rdata->args.pgbase;
-
-               dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
-                       NFS_SERVER(inode)->nfs_client->cl_hostname,
-                       inode->i_sb->s_id,
-                       (long long)NFS_FILEID(inode),
-                       (unsigned long long)rdata->args.pgbase,
-                       rdata->args.count);
-
-               lock_kernel();
-               result = NFS_PROTO(inode)->read(rdata);
-               unlock_kernel();
-
-               /*
-                * Even if we had a partial success we can't mark the page
-                * cache valid.
-                */
-               if (result < 0) {
-                       if (result == -EISDIR)
-                               result = -EINVAL;
-                       goto io_error;
-               }
-               count -= result;
-               rdata->args.pgbase += result;
-               nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
-
-               /* Note: result == 0 should only happen if we're caching
-                * a write that extends the file and punches a hole.
-                */
-               if (rdata->res.eof != 0 || result == 0)
-                       break;
-       } while (count);
-       spin_lock(&inode->i_lock);
-       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-       spin_unlock(&inode->i_lock);
-
-       if (rdata->res.eof || rdata->res.count == rdata->args.count) {
-               SetPageUptodate(page);
-               if (rdata->res.eof && count != 0)
-                       memclear_highpage_flush(page, rdata->args.pgbase, count);
-       }
-       result = 0;
-
-io_error:
-       unlock_page(page);
-       nfs_readdata_free(rdata);
-       return result;
-}
-
 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
                struct page *page)
 {
@@ -224,7 +121,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
        struct nfs_page *new;
        unsigned int len;
 
-       len = nfs_page_length(inode, page);
+       len = nfs_page_length(page);
        if (len == 0)
                return nfs_return_empty_page(page);
        new = nfs_create_request(ctx, inode, page, 0, len);
@@ -286,7 +183,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 
        data->task.tk_cookie = (unsigned long)inode;
 
-       dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+       dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
                        data->task.tk_pid,
                        inode->i_sb->s_id,
                        (long long)NFS_FILEID(inode),
@@ -316,9 +213,7 @@ static void nfs_execute_read(struct nfs_read_data *data)
        sigset_t oldset;
 
        rpc_clnt_sigmask(clnt, &oldset);
-       lock_kernel();
        rpc_execute(&data->task);
-       unlock_kernel();
        rpc_clnt_sigunmask(clnt, &oldset);
 }
 
@@ -454,6 +349,55 @@ nfs_pagein_list(struct list_head *head, int rpages)
        return error;
 }
 
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+{
+       int status;
+
+       dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid,
+                       task->tk_status);
+
+       status = NFS_PROTO(data->inode)->read_done(task, data);
+       if (status != 0)
+               return status;
+
+       nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+
+       if (task->tk_status == -ESTALE) {
+               set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+               nfs_mark_for_revalidate(data->inode);
+       }
+       spin_lock(&data->inode->i_lock);
+       NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+       spin_unlock(&data->inode->i_lock);
+       return 0;
+}
+
+static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+{
+       struct nfs_readargs *argp = &data->args;
+       struct nfs_readres *resp = &data->res;
+
+       if (resp->eof || resp->count == argp->count)
+               return 0;
+
+       /* This is a short read! */
+       nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+       /* Has the server at least made some progress? */
+       if (resp->count == 0)
+               return 0;
+
+       /* Yes, so retry the read at the end of the data */
+       argp->offset += resp->count;
+       argp->pgbase += resp->count;
+       argp->count -= resp->count;
+       rpc_restart_call(task);
+       return -EAGAIN;
+}
+
 /*
  * Handle a read reply that fills part of a page.
  */
@@ -463,12 +407,16 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
        struct nfs_page *req = data->req;
        struct page *page = req->wb_page;
  
-       if (likely(task->tk_status >= 0))
-               nfs_readpage_truncate_uninitialised_page(data);
-       else
-               SetPageError(page);
        if (nfs_readpage_result(task, data) != 0)
                return;
+
+       if (likely(task->tk_status >= 0)) {
+               nfs_readpage_truncate_uninitialised_page(data);
+               if (nfs_readpage_retry(task, data) != 0)
+                       return;
+       }
+       if (unlikely(task->tk_status < 0))
+               SetPageError(page);
        if (atomic_dec_and_test(&req->wb_complete)) {
                if (!PageError(page))
                        SetPageUptodate(page);
@@ -496,25 +444,13 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
        count += base;
        for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
                SetPageUptodate(*pages);
-       if (count != 0)
+       if (count == 0)
+               return;
+       /* Was this a short read? */
+       if (data->res.eof || data->res.count == data->args.count)
                SetPageUptodate(*pages);
 }
 
-static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
-{
-       unsigned int count = data->args.count;
-       unsigned int base = data->args.pgbase;
-       struct page **pages;
-
-       pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-       base &= ~PAGE_CACHE_MASK;
-       count += base;
-       for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
-               SetPageError(*pages);
-       if (count != 0)
-               SetPageError(*pages);
-}
-
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
@@ -523,19 +459,20 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
        struct nfs_read_data *data = calldata;
 
+       if (nfs_readpage_result(task, data) != 0)
+               return;
        /*
-        * Note: nfs_readpage_result may change the values of
+        * Note: nfs_readpage_retry may change the values of
         * data->args. In the multi-page case, we therefore need
-        * to ensure that we call the next nfs_readpage_set_page_uptodate()
-        * first in the multi-page case.
+        * to ensure that we call nfs_readpage_set_pages_uptodate()
+        * first.
         */
        if (likely(task->tk_status >= 0)) {
                nfs_readpage_truncate_uninitialised_page(data);
                nfs_readpage_set_pages_uptodate(data);
-       } else
-               nfs_readpage_set_pages_error(data);
-       if (nfs_readpage_result(task, data) != 0)
-               return;
+               if (nfs_readpage_retry(task, data) != 0)
+                       return;
+       }
        while (!list_empty(&data->pages)) {
                struct nfs_page *req = nfs_list_entry(data->pages.next);
 
@@ -549,50 +486,6 @@ static const struct rpc_call_ops nfs_read_full_ops = {
        .rpc_release = nfs_readdata_release,
 };
 
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
-{
-       struct nfs_readargs *argp = &data->args;
-       struct nfs_readres *resp = &data->res;
-       int status;
-
-       dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-               task->tk_pid, task->tk_status);
-
-       status = NFS_PROTO(data->inode)->read_done(task, data);
-       if (status != 0)
-               return status;
-
-       nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
-
-       if (task->tk_status < 0) {
-               if (task->tk_status == -ESTALE) {
-                       set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
-                       nfs_mark_for_revalidate(data->inode);
-               }
-       } else if (resp->count < argp->count && !resp->eof) {
-               /* This is a short read! */
-               nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
-               /* Has the server at least made some progress? */
-               if (resp->count != 0) {
-                       /* Yes, so retry the read at the end of the data */
-                       argp->offset += resp->count;
-                       argp->pgbase += resp->count;
-                       argp->count -= resp->count;
-                       rpc_restart_call(task);
-                       return -EAGAIN;
-               }
-               task->tk_status = -EIO;
-       }
-       spin_lock(&data->inode->i_lock);
-       NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-       spin_unlock(&data->inode->i_lock);
-       return 0;
-}
-
 /*
  * Read a page over NFS.
  * We read the page synchronously in the following case:
@@ -626,21 +519,16 @@ int nfs_readpage(struct file *file, struct page *page)
                goto out_error;
 
        if (file == NULL) {
+               error = -EBADF;
                ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
                if (ctx == NULL)
-                       return -EBADF;
+                       goto out_error;
        } else
                ctx = get_nfs_open_context((struct nfs_open_context *)
                                file->private_data);
-       if (!IS_SYNC(inode)) {
-               error = nfs_readpage_async(ctx, inode, page);
-               goto out;
-       }
 
-       error = nfs_readpage_sync(ctx, inode, page);
-       if (error < 0 && IS_SWAPFILE(inode))
-               printk("Aiee.. nfs swap-in of page failed!\n");
-out:
+       error = nfs_readpage_async(ctx, inode, page);
+
        put_nfs_open_context(ctx);
        return error;
 
@@ -663,7 +551,7 @@ readpage_async_filler(void *data, struct page *page)
        unsigned int len;
 
        nfs_wb_page(inode, page);
-       len = nfs_page_length(inode, page);
+       len = nfs_page_length(page);
        if (len == 0)
                return nfs_return_empty_page(page);
        new = nfs_create_request(desc->ctx, inode, page, 0, len);