Merge branch 'nfs-for-2.6.37' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Oct 2010 20:48:29 +0000 (13:48 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 25 Oct 2010 20:48:29 +0000 (13:48 -0700)
* 'nfs-for-2.6.37' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (67 commits)
  SUNRPC: Cleanup duplicate assignment in rpcauth_refreshcred
  nfs: fix unchecked value
  Ask for time_delta during fsinfo probe
  Revalidate caches on lock
  SUNRPC: After calling xprt_release(), we must restart from call_reserve
  NFSv4: Fix up the 'dircount' hint in encode_readdir
  NFSv4: Clean up nfs4_decode_dirent
  NFSv4: nfs4_decode_dirent must clear entry->fattr->valid
  NFSv4: Fix a regression in decode_getfattr
  NFSv4: Fix up decode_attr_filehandle() to handle the case of empty fh pointer
  NFS: Ensure we check all allocation return values in new readdir code
  NFS: Readdir plus in v4
  NFS: introduce generic decode_getattr function
  NFS: check xdr_decode for errors
  NFS: nfs_readdir_filler catch all errors
  NFS: readdir with vmapped pages
  NFS: remove page size checking code
  NFS: decode_dirent should use an xdr_stream
  SUNRPC: Add a helper function xdr_inline_peek
  NFS: remove readdir plus limit
  ...

12 files changed:
1  2 
Documentation/kernel-parameters.txt
fs/nfs/Kconfig
fs/nfs/client.c
fs/nfs/file.c
fs/nfs/nfs4state.c
fs/nfs/super.c
include/linux/nfs_fs.h
include/linux/sunrpc/clnt.h
init/do_mounts.c
net/sunrpc/auth.c
net/sunrpc/clnt.c
net/sunrpc/sched.c

@@@ -43,11 -43,10 +43,11 @@@ parameter is applicable
        AVR32   AVR32 architecture is enabled.
        AX25    Appropriate AX.25 support is enabled.
        BLACKFIN Blackfin architecture is enabled.
 -      DRM     Direct Rendering Management support is enabled.
        EDD     BIOS Enhanced Disk Drive Services (EDD) is enabled
        EFI     EFI Partitioning (GPT) is enabled
        EIDE    EIDE/ATAPI support is enabled.
 +      DRM     Direct Rendering Management support is enabled.
 +      DYNAMIC_DEBUG Build in debug messages and enable them at runtime
        FB      The frame buffer device is enabled.
        GCOV    GCOV profiling is enabled.
        HW      Appropriate hardware is enabled.
@@@ -456,7 -455,7 +456,7 @@@ and is between 256 and 4096 characters
                        [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2,
                                pxa_timer,timer3,32k_counter,timer0_1
                        [AVR32] avr32
 -                      [X86-32] pit,hpet,tsc,vmi-timer;
 +                      [X86-32] pit,hpet,tsc;
                                scx200_hrt on Geode; cyclone on IBM x440
                        [MIPS] MIPS
                        [PARISC] cr16
                        Format: <port#>,<type>
                        See also Documentation/input/joystick-parport.txt
  
 +      ddebug_query=   [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
 +                      time. See Documentation/dynamic-debug-howto.txt for
 +                      details.
 +
        debug           [KNL] Enable kernel debugging (events log level).
  
        debug_locks_verbose=
        kvm.oos_shadow= [KVM] Disable out-of-sync shadow paging.
                        Default is 1 (enabled)
  
 -      kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
 +      kvm.mmu_audit=  [KVM] This is a R/W parameter which allows audit
 +                      KVM MMU at runtime.
                        Default is 0 (off)
  
 +      kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
 +                      Default is 1 (enabled)
 +
        kvm-amd.npt=    [KVM,AMD] Disable nested paging (virtualized MMU)
                        for all guests.
                        Default is 1 (enabled) if in 64bit or 32bit-PAE mode
                        1 to enable accounting
                        Default value is 0.
  
-       nfsaddrs=       [NFS]
+       nfsaddrs=       [NFS] Deprecated.  Use ip= instead.
                        See Documentation/filesystems/nfs/nfsroot.txt.
  
        nfsroot=        [NFS] nfs root filesystem for disk-less boxes.
                        See Documentation/filesystems/nfs/nfsroot.txt.
  
+       nfsrootdebug    [NFS] enable nfsroot debugging messages.
+                       See Documentation/filesystems/nfs/nfsroot.txt.
        nfs.callback_tcpport=
                        [NFS] set the TCP port on which the NFSv4 callback
                        channel should listen.
  
        nojitter        [IA64] Disables jitter checking for ITC timers.
  
 +      no-kvmclock     [X86,KVM] Disable paravirtualized KVM clock driver
 +
        nolapic         [X86-32,APIC] Do not enable or use the local APIC.
  
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
        norandmaps      Don't use address space randomization.  Equivalent to
                        echo 0 > /proc/sys/kernel/randomize_va_space
  
 -      noreplace-paravirt      [X86-32,PV_OPS] Don't patch paravirt_ops
 +      noreplace-paravirt      [X86,IA-64,PV_OPS] Don't patch paravirt_ops
  
        noreplace-smp   [X86-32,SMP] Don't replace SMP instructions
                        with UP alternatives
                force   Enable ASPM even on devices that claim not to support it.
                        WARNING: Forcing ASPM on may cause system lockups.
  
 +      pcie_ports=     [PCIE] PCIe ports handling:
 +              auto    Ask the BIOS whether or not to use native PCIe services
 +                      associated with PCIe ports (PME, hot-plug, AER).  Use
 +                      them only if that is allowed by the BIOS.
 +              native  Use native PCIe services associated with PCIe ports
 +                      unconditionally.
 +              compat  Treat PCIe ports as PCI-to-PCI bridges, disable the PCIe
 +                      ports driver.
 +
        pcie_pme=       [PCIE,PM] Native PCIe PME signaling options:
 -                      Format: {auto|force}[,nomsi]
 -              auto    Use native PCIe PME signaling if the BIOS allows the
 -                      kernel to control PCIe config registers of root ports.
 -              force   Use native PCIe PME signaling even if the BIOS refuses
 -                      to allow the kernel to control the relevant PCIe config
 -                      registers.
                nomsi   Do not use MSI for native PCIe PME signaling (this makes
 -                      all PCIe root ports use INTx for everything).
 +                      all PCIe root ports use INTx for all services).
  
        pcmv=           [HW,PCMCIA] BadgePAD 4
  
                        Reserves a hole at the top of the kernel virtual
                        address space.
  
 +      reservelow=     [X86]
 +                      Format: nn[K]
 +                      Set the amount of memory to reserve for BIOS at
 +                      the bottom of the address space.
 +
        reset_devices   [KNL] Force drivers to reset the underlying device
                        during initialization.
  
                        in <PAGE_SIZE> units (needed only for swap files).
                        See  Documentation/power/swsusp-and-swap-files.txt
  
 +      hibernate=      [HIBERNATION]
 +              noresume        Don't check if there's a hibernation image
 +                              present during boot.
 +              nocompress      Don't compress/decompress hibernation images.
 +
        retain_initrd   [RAM] Keep initrd memory after extraction
  
        rhash_entries=  [KNL,NET]
  
        switches=       [HW,M68k]
  
 +      sysfs.deprecated=0|1 [KNL]
 +                      Enable/disable old style sysfs layout for old udev
 +                      on older distributions. When this option is enabled
 +                      very new udev will not work anymore. When this option
 +                      is disabled (or CONFIG_SYSFS_DEPRECATED not compiled)
 +                      in older udev will not work anymore.
 +                      Default depends on CONFIG_SYSFS_DEPRECATED_V2 set in
 +                      the kernel configuration.
 +
        sysrq_always_enabled
                        [KNL]
                        Ignore sysrq setting - this boot parameter will
                        topology informations if the hardware supports these.
                        The scheduler will make use of these informations and
                        e.g. base its process migration decisions on it.
 -                      Default is off.
 +                      Default is on.
  
        tp720=          [HW,PS2]
  
                        disables clocksource verification at runtime.
                        Used to enable high-resolution timer mode on older
                        hardware, and in virtualized environment.
 +                      [x86] noirqtime: Do not use TSC to do irq accounting.
 +                      Used to run time disable IRQ_TIME_ACCOUNTING on any
 +                      platforms where RDTSC is slow and this accounting
 +                      can add overhead.
  
        turbografx.map[2|3]=    [HW,JOY]
                        TurboGraFX parallel port interface
diff --combined fs/nfs/Kconfig
@@@ -1,7 -1,6 +1,7 @@@
  config NFS_FS
        tristate "NFS client support"
        depends on INET && FILE_LOCKING
 +      depends on BKL # fix as soon as lockd is done
        select LOCKD
        select SUNRPC
        select NFS_ACL_SUPPORT if NFS_V3_ACL
@@@ -64,7 -63,6 +64,7 @@@ config NFS_V3_AC
  config NFS_V4
        bool "NFS client support for NFS version 4"
        depends on NFS_FS
 +      select SUNRPC_GSS
        help
          This option enables support for version 4 of the NFS protocol
          (RFC 3530) in the kernel's NFS client.
@@@ -118,3 -116,14 +118,14 @@@ config NFS_USE_KERNEL_DN
        select DNS_RESOLVER
        select KEYS
        default y
+ config NFS_USE_NEW_IDMAPPER
+       bool "Use the new idmapper upcall routine"
+       depends on NFS_V4 && KEYS
+       help
+         Say Y here if you want NFS to use the new idmapper upcall functions.
+         You will need /sbin/request-key (usually provided by the keyutils
+         package).  For details, read
+         <file:Documentation/filesystems/nfs/idmapper.txt>.
+         If you are unsure, say N.
diff --combined fs/nfs/client.c
@@@ -275,7 -275,7 +275,7 @@@ static int nfs_sockaddr_match_ipaddr6(c
            sin1->sin6_scope_id != sin2->sin6_scope_id)
                return 0;
  
 -      return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
 +      return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
  }
  #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
  static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
@@@ -635,7 -635,8 +635,8 @@@ static int nfs_create_rpc_client(struc
   */
  static void nfs_destroy_server(struct nfs_server *server)
  {
-       if (!(server->flags & NFS_MOUNT_NONLM))
+       if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
+                       !(server->flags & NFS_MOUNT_LOCAL_FCNTL))
                nlmclnt_done(server->nlm_host);
  }
  
@@@ -657,7 -658,8 +658,8 @@@ static int nfs_start_lockd(struct nfs_s
  
        if (nlm_init.nfs_version > 3)
                return 0;
-       if (server->flags & NFS_MOUNT_NONLM)
+       if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) &&
+                       (server->flags & NFS_MOUNT_LOCAL_FCNTL))
                return 0;
  
        switch (clp->cl_proto) {
@@@ -901,8 -903,8 +903,8 @@@ static void nfs_server_set_fsinfo(struc
        server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
  
        server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
-       if (server->dtsize > PAGE_CACHE_SIZE)
-               server->dtsize = PAGE_CACHE_SIZE;
+       if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
+               server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
        if (server->dtsize > server->rsize)
                server->dtsize = server->rsize;
  
  
        server->maxfilesize = fsinfo->maxfilesize;
  
+       server->time_delta = fsinfo->time_delta;
        /* We're airborne Set socket buffersize */
        rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
  }
@@@ -1356,8 -1360,9 +1360,9 @@@ static int nfs4_init_server(struct nfs_
  
        /* Initialise the client representation from the mount data */
        server->flags = data->flags;
-       server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|
-               NFS_CAP_POSIX_LOCK;
+       server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
+       if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
+                       server->caps |= NFS_CAP_READDIRPLUS;
        server->options = data->options;
  
        /* Get a client record */
diff --combined fs/nfs/file.c
@@@ -551,7 -551,7 +551,7 @@@ static int nfs_vm_page_mkwrite(struct v
        struct file *filp = vma->vm_file;
        struct dentry *dentry = filp->f_path.dentry;
        unsigned pagelen;
-       int ret = -EINVAL;
+       int ret = VM_FAULT_NOPAGE;
        struct address_space *mapping;
  
        dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
        if (mapping != dentry->d_inode->i_mapping)
                goto out_unlock;
  
-       ret = 0;
        pagelen = nfs_page_length(page);
        if (pagelen == 0)
                goto out_unlock;
  
-       ret = nfs_flush_incompatible(filp, page);
-       if (ret != 0)
-               goto out_unlock;
+       ret = VM_FAULT_LOCKED;
+       if (nfs_flush_incompatible(filp, page) == 0 &&
+           nfs_updatepage(filp, page, 0, pagelen) == 0)
+               goto out;
  
-       ret = nfs_updatepage(filp, page, 0, pagelen);
+       ret = VM_FAULT_SIGBUS;
  out_unlock:
-       if (!ret)
-               return VM_FAULT_LOCKED;
        unlock_page(page);
-       return VM_FAULT_SIGBUS;
+ out:
+       return ret;
  }
  
  static const struct vm_operations_struct nfs_file_vm_ops = {
@@@ -684,7 -683,8 +683,8 @@@ static ssize_t nfs_file_splice_write(st
        return ret;
  }
  
- static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
+ static int
+ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
  {
        struct inode *inode = filp->f_mapping->host;
        int status = 0;
        if (nfs_have_delegation(inode, FMODE_READ))
                goto out_noconflict;
  
-       if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
+       if (is_local)
                goto out_noconflict;
  
        status = NFS_PROTO(inode)->lock(filp, cmd, fl);
@@@ -723,10 -723,15 +723,11 @@@ static int do_vfs_lock(struct file *fil
                default:
                        BUG();
        }
 -      if (res < 0)
 -              dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
 -                      " - error %d!\n",
 -                              __func__, res);
        return res;
  }
  
- static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
+ static int
+ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
  {
        struct inode *inode = filp->f_mapping->host;
        int status;
         *      If we're signalled while cleaning up locks on process exit, we
         *      still need to complete the unlock.
         */
-       /* Use local locking if mounted with "-onolock" */
-       if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+       /*
+        * Use local locking if mounted with "-onolock" or with appropriate
+        * "-olocal_lock="
+        */
+       if (!is_local)
                status = NFS_PROTO(inode)->lock(filp, cmd, fl);
        else
                status = do_vfs_lock(filp, fl);
        return status;
  }
  
- static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
+ static int
+ is_time_granular(struct timespec *ts) {
+       return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
+ }
+ static int
+ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
  {
        struct inode *inode = filp->f_mapping->host;
        int status;
        if (status != 0)
                goto out;
  
-       /* Use local locking if mounted with "-onolock" */
-       if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
+       /*
+        * Use local locking if mounted with "-onolock" or with appropriate
+        * "-olocal_lock="
+        */
+       if (!is_local)
                status = NFS_PROTO(inode)->lock(filp, cmd, fl);
        else
                status = do_vfs_lock(filp, fl);
        if (status < 0)
                goto out;
        /*
-        * Make sure we clear the cache whenever we try to get the lock.
+        * Revalidate the cache if the server has time stamps granular
+        * enough to detect subsecond changes.  Otherwise, clear the
+        * cache to prevent missing any changes.
+        *
         * This makes locking act as a cache coherency point.
         */
        nfs_sync_mapping(filp->f_mapping);
-       if (!nfs_have_delegation(inode, FMODE_READ))
-               nfs_zap_caches(inode);
+       if (!nfs_have_delegation(inode, FMODE_READ)) {
+               if (is_time_granular(&NFS_SERVER(inode)->time_delta))
+                       __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+               else
+                       nfs_zap_caches(inode);
+       }
  out:
        return status;
  }
@@@ -787,6 -812,7 +808,7 @@@ static int nfs_lock(struct file *filp, 
  {
        struct inode *inode = filp->f_mapping->host;
        int ret = -ENOLCK;
+       int is_local = 0;
  
        dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
                        filp->f_path.dentry->d_parent->d_name.name,
        if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
                goto out_err;
  
+       if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
+               is_local = 1;
        if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
                ret = NFS_PROTO(inode)->lock_check_bounds(fl);
                if (ret < 0)
        }
  
        if (IS_GETLK(cmd))
-               ret = do_getlk(filp, cmd, fl);
+               ret = do_getlk(filp, cmd, fl, is_local);
        else if (fl->fl_type == F_UNLCK)
-               ret = do_unlk(filp, cmd, fl);
+               ret = do_unlk(filp, cmd, fl, is_local);
        else
-               ret = do_setlk(filp, cmd, fl);
+               ret = do_setlk(filp, cmd, fl, is_local);
  out_err:
        return ret;
  }
   */
  static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
  {
+       struct inode *inode = filp->f_mapping->host;
+       int is_local = 0;
        dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
                        filp->f_path.dentry->d_parent->d_name.name,
                        filp->f_path.dentry->d_name.name,
        if (!(fl->fl_flags & FL_FLOCK))
                return -ENOLCK;
  
+       if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
+               is_local = 1;
        /* We're simulating flock() locks using posix locks on the server */
        fl->fl_owner = (fl_owner_t)filp;
        fl->fl_start = 0;
        fl->fl_end = OFFSET_MAX;
  
        if (fl->fl_type == F_UNLCK)
-               return do_unlk(filp, cmd, fl);
-       return do_setlk(filp, cmd, fl);
+               return do_unlk(filp, cmd, fl, is_local);
+       return do_setlk(filp, cmd, fl, is_local);
  }
  
  /*
diff --combined fs/nfs/nfs4state.c
  
  #include <linux/kernel.h>
  #include <linux/slab.h>
 -#include <linux/smp_lock.h>
 +#include <linux/fs.h>
  #include <linux/nfs_fs.h>
  #include <linux/nfs_idmap.h>
  #include <linux/kthread.h>
  #include <linux/module.h>
  #include <linux/random.h>
+ #include <linux/ratelimit.h>
  #include <linux/workqueue.h>
  #include <linux/bitops.h>
  
@@@ -970,13 -971,13 +971,13 @@@ static int nfs4_reclaim_locks(struct nf
        /* Guard against delegation returns and new lock/unlock calls */
        down_write(&nfsi->rwsem);
        /* Protect inode->i_flock using the BKL */
 -      lock_kernel();
 +      lock_flocks();
        for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
                if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
                        continue;
                if (nfs_file_open_context(fl->fl_file)->state != state)
                        continue;
 -              unlock_kernel();
 +              unlock_flocks();
                status = ops->recover_lock(state, fl);
                switch (status) {
                        case 0:
                                /* kill_proc(fl->fl_pid, SIGLOST, 1); */
                                status = 0;
                }
 -              lock_kernel();
 +              lock_flocks();
        }
 -      unlock_kernel();
 +      unlock_flocks();
  out:
        up_write(&nfsi->rwsem);
        return status;
@@@ -1063,6 -1064,14 +1064,14 @@@ restart
                                /* Mark the file as being 'closed' */
                                state->state = 0;
                                break;
+                       case -EKEYEXPIRED:
+                               /*
+                                * User RPCSEC_GSS context has expired.
+                                * We cannot recover this stateid now, so
+                                * skip it and allow recovery thread to
+                                * proceed.
+                                */
+                               break;
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_STALE_STATEID:
                        case -NFS4ERR_BAD_STATEID:
@@@ -1138,16 -1147,14 +1147,14 @@@ static void nfs4_reclaim_complete(struc
                (void)ops->reclaim_complete(clp);
  }
  
- static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
+ static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
  {
        struct nfs4_state_owner *sp;
        struct rb_node *pos;
        struct nfs4_state *state;
  
        if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
-               return;
-       nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
+               return 0;
  
        for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
                sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
        }
  
        nfs_delegation_reap_unclaimed(clp);
+       return 1;
+ }
+ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
+ {
+       if (!nfs4_state_clear_reclaim_reboot(clp))
+               return;
+       nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
  }
  
  static void nfs_delegation_clear_all(struct nfs_client *clp)
@@@ -1175,6 -1190,14 +1190,14 @@@ static void nfs4_state_start_reclaim_no
        nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
  }
  
+ static void nfs4_warn_keyexpired(const char *s)
+ {
+       printk_ratelimited(KERN_WARNING "Error: state manager"
+                       " encountered RPCSEC_GSS session"
+                       " expired against NFSv4 server %s.\n",
+                       s);
+ }
  static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
  {
        switch (error) {
                case -NFS4ERR_STALE_CLIENTID:
                case -NFS4ERR_LEASE_MOVED:
                        set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
-                       nfs4_state_end_reclaim_reboot(clp);
+                       nfs4_state_clear_reclaim_reboot(clp);
                        nfs4_state_start_reclaim_reboot(clp);
                        break;
                case -NFS4ERR_EXPIRED:
                        set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
                        /* Zero session reset errors */
                        return 0;
+               case -EKEYEXPIRED:
+                       /* Nothing we can do */
+                       nfs4_warn_keyexpired(clp->cl_hostname);
+                       return 0;
        }
        return error;
  }
@@@ -1414,9 -1441,10 +1441,10 @@@ static void nfs4_set_lease_expired(stru
                case -NFS4ERR_DELAY:
                case -NFS4ERR_CLID_INUSE:
                case -EAGAIN:
-               case -EKEYEXPIRED:
                        break;
  
+               case -EKEYEXPIRED:
+                       nfs4_warn_keyexpired(clp->cl_hostname);
                case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
                                         * in nfs4_exchange_id */
                default:
diff --combined fs/nfs/super.c
@@@ -100,6 -100,7 +100,7 @@@ enum 
        Opt_addr, Opt_mountaddr, Opt_clientaddr,
        Opt_lookupcache,
        Opt_fscache_uniq,
+       Opt_local_lock,
  
        /* Special mount options */
        Opt_userspace, Opt_deprecated, Opt_sloppy,
@@@ -171,6 -172,7 +172,7 @@@ static const match_table_t nfs_mount_op
  
        { Opt_lookupcache, "lookupcache=%s" },
        { Opt_fscache_uniq, "fsc=%s" },
+       { Opt_local_lock, "local_lock=%s" },
  
        { Opt_err, NULL }
  };
@@@ -236,6 -238,22 +238,22 @@@ static match_table_t nfs_lookupcache_to
        { Opt_lookupcache_err, NULL }
  };
  
+ enum {
+       Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
+       Opt_local_lock_none,
+       Opt_local_lock_err
+ };
+ static match_table_t nfs_local_lock_tokens = {
+       { Opt_local_lock_all, "all" },
+       { Opt_local_lock_flock, "flock" },
+       { Opt_local_lock_posix, "posix" },
+       { Opt_local_lock_none, "none" },
+       { Opt_local_lock_err, NULL }
+ };
  
  static void nfs_umount_begin(struct super_block *);
  static int  nfs_statfs(struct dentry *, struct kstatfs *);
@@@ -431,15 -449,7 +449,15 @@@ static int nfs_statfs(struct dentry *de
                goto out_err;
  
        error = server->nfs_client->rpc_ops->statfs(server, fh, &res);
 -
 +      if (unlikely(error == -ESTALE)) {
 +              struct dentry *pd_dentry;
 +
 +              pd_dentry = dget_parent(dentry);
 +              if (pd_dentry != NULL) {
 +                      nfs_zap_caches(pd_dentry->d_inode);
 +                      dput(pd_dentry);
 +              }
 +      }
        nfs_free_fattr(res.fattr);
        if (error < 0)
                goto out_err;
@@@ -622,6 -632,7 +640,7 @@@ static void nfs_show_mount_options(stru
        const struct proc_nfs_info *nfs_infop;
        struct nfs_client *clp = nfss->nfs_client;
        u32 version = clp->rpc_ops->version;
+       int local_flock, local_fcntl;
  
        seq_printf(m, ",vers=%u", version);
        seq_printf(m, ",rsize=%u", nfss->rsize);
                else
                        seq_printf(m, ",lookupcache=pos");
        }
+       local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK;
+       local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL;
+       if (!local_flock && !local_fcntl)
+               seq_printf(m, ",local_lock=none");
+       else if (local_flock && local_fcntl)
+               seq_printf(m, ",local_lock=all");
+       else if (local_flock)
+               seq_printf(m, ",local_lock=flock");
+       else
+               seq_printf(m, ",local_lock=posix");
  }
  
  /*
@@@ -1017,9 -1040,13 +1048,13 @@@ static int nfs_parse_mount_options(cha
                        break;
                case Opt_lock:
                        mnt->flags &= ~NFS_MOUNT_NONLM;
+                       mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+                                       NFS_MOUNT_LOCAL_FCNTL);
                        break;
                case Opt_nolock:
                        mnt->flags |= NFS_MOUNT_NONLM;
+                       mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+                                      NFS_MOUNT_LOCAL_FCNTL);
                        break;
                case Opt_v2:
                        mnt->flags &= ~NFS_MOUNT_VER3;
                        mnt->fscache_uniq = string;
                        mnt->options |= NFS_OPTION_FSCACHE;
                        break;
+               case Opt_local_lock:
+                       string = match_strdup(args);
+                       if (string == NULL)
+                               goto out_nomem;
+                       token = match_token(string, nfs_local_lock_tokens,
+                                       args);
+                       kfree(string);
+                       switch (token) {
+                       case Opt_local_lock_all:
+                               mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
+                                              NFS_MOUNT_LOCAL_FCNTL);
+                               break;
+                       case Opt_local_lock_flock:
+                               mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
+                               break;
+                       case Opt_local_lock_posix:
+                               mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
+                               break;
+                       case Opt_local_lock_none:
+                               mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
+                                               NFS_MOUNT_LOCAL_FCNTL);
+                               break;
+                       default:
+                               dfprintk(MOUNT, "NFS:   invalid "
+                                               "local_lock argument\n");
+                               return 0;
+                       };
+                       break;
  
                /*
                 * Special options
@@@ -1825,6 -1880,12 +1888,12 @@@ static int nfs_validate_mount_data(voi
                if (!args->nfs_server.hostname)
                        goto out_nomem;
  
+               if (!(data->flags & NFS_MOUNT_NONLM))
+                       args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
+                                        NFS_MOUNT_LOCAL_FCNTL);
+               else
+                       args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
+                                       NFS_MOUNT_LOCAL_FCNTL);
                /*
                 * The legacy version 6 binary mount data from userspace has a
                 * field used only to transport selinux information into the
@@@ -2441,7 -2502,8 +2510,8 @@@ static void nfs4_fill_super(struct supe
  
  static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
  {
-       args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3);
+       args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
+                        NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
  }
  
  static int nfs4_validate_text_mount_data(void *options,
diff --combined include/linux/nfs_fs.h
@@@ -185,7 -185,7 +185,7 @@@ struct nfs_inode 
        struct nfs4_cached_acl  *nfs4_acl;
          /* NFSv4 state */
        struct list_head        open_states;
 -      struct nfs_delegation   *delegation;
 +      struct nfs_delegation __rcu *delegation;
        fmode_t                  delegation_state;
        struct rw_semaphore     rwsem;
  #endif /* CONFIG_NFS_V4*/
@@@ -360,10 -360,13 +360,13 @@@ extern void nfs_setattr_update_inode(st
  extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx);
  extern void put_nfs_open_context(struct nfs_open_context *ctx);
  extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, fmode_t mode);
+ extern struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode);
+ extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx);
  extern struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx);
  extern void nfs_put_lock_context(struct nfs_lock_context *l_ctx);
  extern u64 nfs_compat_user_ino64(u64 fileid);
  extern void nfs_fattr_init(struct nfs_fattr *fattr);
+ extern unsigned long nfs_inc_attr_generation_counter(void);
  
  extern struct nfs_fattr *nfs_alloc_fattr(void);
  
@@@ -379,9 -382,12 +382,12 @@@ static inline void nfs_free_fhandle(con
        kfree(fh);
  }
  
+ /*
+  * linux/fs/nfs/nfsroot.c
+  */
+ extern int  nfs_root_data(char **root_device, char **root_data); /*__init*/
  /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
  extern __be32 root_nfs_parse_addr(char *name); /*__init*/
- extern unsigned long nfs_inc_attr_generation_counter(void);
  
  /*
   * linux/fs/nfs/file.c
@@@ -479,10 -485,10 +485,10 @@@ extern void nfs_release_automount_timer
  /*
   * linux/fs/nfs/unlink.c
   */
- extern int  nfs_async_unlink(struct inode *dir, struct dentry *dentry);
  extern void nfs_complete_unlink(struct dentry *dentry, struct inode *);
  extern void nfs_block_sillyrename(struct dentry *dentry);
  extern void nfs_unblock_sillyrename(struct dentry *dentry);
+ extern int  nfs_sillyrename(struct inode *dir, struct dentry *dentry);
  
  /*
   * linux/fs/nfs/write.c
@@@ -584,10 -590,6 +590,6 @@@ nfs_fileid_to_ino_t(u64 fileid
        return ino;
  }
  
- /* NFS root */
- extern void * nfs_root_data(void);
  #define nfs_wait_event(clnt, wq, condition)                           \
  ({                                                                    \
        int __retval = wait_event_killable(wq, condition);              \
@@@ -30,7 -30,7 +30,7 @@@ struct rpc_inode
   * The high-level client handle
   */
  struct rpc_clnt {
 -      struct kref             cl_kref;        /* Number of references */
 +      atomic_t                cl_count;       /* Number of references */
        struct list_head        cl_clients;     /* Global list of clients */
        struct list_head        cl_tasks;       /* List of tasks */
        spinlock_t              cl_lock;        /* spinlock */
@@@ -137,7 -137,6 +137,6 @@@ int                rpcb_register(u32, u32, int, unsig
  int           rpcb_v4_register(const u32 program, const u32 version,
                                 const struct sockaddr *address,
                                 const char *netid);
- int           rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
  void          rpcb_getport_async(struct rpc_task *);
  
  void          rpc_call_start(struct rpc_task *);
diff --combined init/do_mounts.c
@@@ -58,62 -58,6 +58,62 @@@ static int __init readwrite(char *str
  __setup("ro", readonly);
  __setup("rw", readwrite);
  
 +#ifdef CONFIG_BLOCK
 +/**
 + * match_dev_by_uuid - callback for finding a partition using its uuid
 + * @dev:      device passed in by the caller
 + * @data:     opaque pointer to a 36 byte char array with a UUID
 + *
 + * Returns 1 if the device matches, and 0 otherwise.
 + */
 +static int match_dev_by_uuid(struct device *dev, void *data)
 +{
 +      u8 *uuid = data;
 +      struct hd_struct *part = dev_to_part(dev);
 +
 +      if (!part->info)
 +              goto no_match;
 +
 +      if (memcmp(uuid, part->info->uuid, sizeof(part->info->uuid)))
 +                      goto no_match;
 +
 +      return 1;
 +no_match:
 +      return 0;
 +}
 +
 +
 +/**
 + * devt_from_partuuid - looks up the dev_t of a partition by its UUID
 + * @uuid:     36 byte char array containing a hex ascii UUID
 + *
 + * The function will return the first partition which contains a matching
 + * UUID value in its partition_meta_info struct.  This does not search
 + * by filesystem UUIDs.
 + *
 + * Returns the matching dev_t on success or 0 on failure.
 + */
 +static dev_t __init devt_from_partuuid(char *uuid_str)
 +{
 +      dev_t res = 0;
 +      struct device *dev = NULL;
 +      u8 uuid[16];
 +
 +      /* Pack the requested UUID in the expected format. */
 +      part_pack_uuid(uuid_str, uuid);
 +
 +      dev = class_find_device(&block_class, NULL, uuid, &match_dev_by_uuid);
 +      if (!dev)
 +              goto done;
 +
 +      res = dev->devt;
 +      put_device(dev);
 +
 +done:
 +      return res;
 +}
 +#endif
 +
  /*
   *    Convert a name into device number.  We accept the following variants:
   *
   *         of partition - device number of disk plus the partition number
   *    5) /dev/<disk_name>p<decimal> - same as the above, that form is
   *       used when disk name of partitioned disk ends on a digit.
 + *    6) PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF representing the
 + *       unique id of a partition if the partition table provides it.
   *
   *    If name doesn't have fall into the categories above, we return (0,0).
   *    block_class is used to check if something is a disk name. If the disk
@@@ -140,18 -82,6 +140,18 @@@ dev_t name_to_dev_t(char *name
        dev_t res = 0;
        int part;
  
 +#ifdef CONFIG_BLOCK
 +      if (strncmp(name, "PARTUUID=", 9) == 0) {
 +              name += 9;
 +              if (strlen(name) != 36)
 +                      goto fail;
 +              res = devt_from_partuuid(name);
 +              if (!res)
 +                      goto fail;
 +              goto done;
 +      }
 +#endif
 +
        if (strncmp(name, "/dev/", 5) != 0) {
                unsigned maj, min;
  
@@@ -361,13 -291,13 +361,13 @@@ out
  #ifdef CONFIG_ROOT_NFS
  static int __init mount_nfs_root(void)
  {
-       void *data = nfs_root_data();
+       char *root_dev, *root_data;
  
-       create_dev("/dev/root", ROOT_DEV);
-       if (data &&
-           do_mount_root("/dev/root", "nfs", root_mountflags, data) == 0)
-               return 1;
-       return 0;
+       if (nfs_root_data(&root_dev, &root_data) != 0)
+               return 0;
+       if (do_mount_root(root_dev, "nfs", root_mountflags, root_data) != 0)
+               return 0;
+       return 1;
  }
  #endif
  
diff --combined net/sunrpc/auth.c
@@@ -38,7 -38,7 +38,7 @@@ static const struct rpc_authops *auth_f
  static LIST_HEAD(cred_unused);
  static unsigned long number_cred_unused;
  
 -#define MAX_HASHTABLE_BITS (10) 
 +#define MAX_HASHTABLE_BITS (14)
  static int param_set_hashtbl_sz(const char *val, const struct kernel_param *kp)
  {
        unsigned long num;
@@@ -595,7 -595,7 +595,7 @@@ rpcauth_unwrap_resp(struct rpc_task *ta
  int
  rpcauth_refreshcred(struct rpc_task *task)
  {
-       struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+       struct rpc_cred *cred;
        int err;
  
        cred = task->tk_rqstp->rq_cred;
diff --combined net/sunrpc/clnt.c
@@@ -226,7 -226,7 +226,7 @@@ static struct rpc_clnt * rpc_new_client
                        goto out_no_principal;
        }
  
 -      kref_init(&clnt->cl_kref);
 +      atomic_set(&clnt->cl_count, 1);
  
        err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
        if (err < 0)
@@@ -390,14 -390,14 +390,14 @@@ rpc_clone_client(struct rpc_clnt *clnt
                if (new->cl_principal == NULL)
                        goto out_no_principal;
        }
 -      kref_init(&new->cl_kref);
 +      atomic_set(&new->cl_count, 1);
        err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
        if (err != 0)
                goto out_no_path;
        if (new->cl_auth)
                atomic_inc(&new->cl_auth->au_count);
        xprt_get(clnt->cl_xprt);
 -      kref_get(&clnt->cl_kref);
 +      atomic_inc(&clnt->cl_count);
        rpc_register_client(new);
        rpciod_up();
        return new;
@@@ -465,8 -465,10 +465,8 @@@ EXPORT_SYMBOL_GPL(rpc_shutdown_client)
   * Free an RPC client
   */
  static void
 -rpc_free_client(struct kref *kref)
 +rpc_free_client(struct rpc_clnt *clnt)
  {
 -      struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
 -
        dprintk("RPC:       destroying %s client for %s\n",
                        clnt->cl_protname, clnt->cl_server);
        if (!IS_ERR(clnt->cl_path.dentry)) {
@@@ -493,10 -495,12 +493,10 @@@ out_free
   * Free an RPC client
   */
  static void
 -rpc_free_auth(struct kref *kref)
 +rpc_free_auth(struct rpc_clnt *clnt)
  {
 -      struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
 -
        if (clnt->cl_auth == NULL) {
 -              rpc_free_client(kref);
 +              rpc_free_client(clnt);
                return;
        }
  
         *       release remaining GSS contexts. This mechanism ensures
         *       that it can do so safely.
         */
 -      kref_init(kref);
 +      atomic_inc(&clnt->cl_count);
        rpcauth_release(clnt->cl_auth);
        clnt->cl_auth = NULL;
 -      kref_put(kref, rpc_free_client);
 +      if (atomic_dec_and_test(&clnt->cl_count))
 +              rpc_free_client(clnt);
  }
  
  /*
@@@ -522,8 -525,7 +522,8 @@@ rpc_release_client(struct rpc_clnt *cln
  
        if (list_empty(&clnt->cl_tasks))
                wake_up(&destroy_wait);
 -      kref_put(&clnt->cl_kref, rpc_free_auth);
 +      if (atomic_dec_and_test(&clnt->cl_count))
 +              rpc_free_auth(clnt);
  }
  
  /**
@@@ -586,7 -588,7 +586,7 @@@ void rpc_task_set_client(struct rpc_tas
        if (clnt != NULL) {
                rpc_task_release_client(task);
                task->tk_client = clnt;
 -              kref_get(&clnt->cl_kref);
 +              atomic_inc(&clnt->cl_count);
                if (clnt->cl_softrtry)
                        task->tk_flags |= RPC_TASK_SOFT;
                /* Add to the client's list of all tasks */
@@@ -929,7 -931,7 +929,7 @@@ call_reserveresult(struct rpc_task *tas
        task->tk_status = 0;
        if (status >= 0) {
                if (task->tk_rqstp) {
 -                      task->tk_action = call_allocate;
 +                      task->tk_action = call_refresh;
                        return;
                }
  
  }
  
  /*
 - * 2. Allocate the buffer. For details, see sched.c:rpc_malloc.
 + * 2. Bind and/or refresh the credentials
 + */
 +static void
 +call_refresh(struct rpc_task *task)
 +{
 +      dprint_status(task);
 +
 +      task->tk_action = call_refreshresult;
 +      task->tk_status = 0;
 +      task->tk_client->cl_stats->rpcauthrefresh++;
 +      rpcauth_refreshcred(task);
 +}
 +
 +/*
 + * 2a.        Process the results of a credential refresh
 + */
 +static void
 +call_refreshresult(struct rpc_task *task)
 +{
 +      int status = task->tk_status;
 +
 +      dprint_status(task);
 +
 +      task->tk_status = 0;
 +      task->tk_action = call_allocate;
 +      if (status >= 0 && rpcauth_uptodatecred(task))
 +              return;
 +      switch (status) {
 +      case -EACCES:
 +              rpc_exit(task, -EACCES);
 +              return;
 +      case -ENOMEM:
 +              rpc_exit(task, -ENOMEM);
 +              return;
 +      case -ETIMEDOUT:
 +              rpc_delay(task, 3*HZ);
 +      }
 +      task->tk_action = call_refresh;
 +}
 +
 +/*
 + * 2b.        Allocate the buffer. For details, see sched.c:rpc_malloc.
   *    (Note: buffer memory is freed in xprt_release).
   */
  static void
  call_allocate(struct rpc_task *task)
  {
 -      unsigned int slack = task->tk_client->cl_auth->au_cslack;
 +      unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = task->tk_xprt;
        struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
        dprint_status(task);
  
        task->tk_status = 0;
 -      task->tk_action = call_refresh;
 +      task->tk_action = call_bind;
  
        if (req->rq_buffer)
                return;
        rpc_exit(task, -ERESTARTSYS);
  }
  
 -/*
 - * 2a.        Bind and/or refresh the credentials
 - */
 -static void
 -call_refresh(struct rpc_task *task)
 -{
 -      dprint_status(task);
 -
 -      task->tk_action = call_refreshresult;
 -      task->tk_status = 0;
 -      task->tk_client->cl_stats->rpcauthrefresh++;
 -      rpcauth_refreshcred(task);
 -}
 -
 -/*
 - * 2b.        Process the results of a credential refresh
 - */
 -static void
 -call_refreshresult(struct rpc_task *task)
 -{
 -      int status = task->tk_status;
 -
 -      dprint_status(task);
 -
 -      task->tk_status = 0;
 -      task->tk_action = call_bind;
 -      if (status >= 0 && rpcauth_uptodatecred(task))
 -              return;
 -      switch (status) {
 -      case -EACCES:
 -              rpc_exit(task, -EACCES);
 -              return;
 -      case -ENOMEM:
 -              rpc_exit(task, -ENOMEM);
 -              return;
 -      case -ETIMEDOUT:
 -              rpc_delay(task, 3*HZ);
 -      }
 -      task->tk_action = call_refresh;
 -}
 -
  static inline int
  rpc_task_need_encode(struct rpc_task *task)
  {
@@@ -1675,7 -1677,7 +1675,7 @@@ rpc_verify_header(struct rpc_task *task
                        rpcauth_invalcred(task);
                        /* Ensure we obtain a new XID! */
                        xprt_release(task);
-                       task->tk_action = call_refresh;
+                       task->tk_action = call_reserve;
                        goto out_retry;
                case RPC_AUTH_BADCRED:
                case RPC_AUTH_BADVERF:
diff --combined net/sunrpc/sched.c
@@@ -376,7 -376,7 +376,7 @@@ int rpc_queue_empty(struct rpc_wait_que
        spin_lock_bh(&queue->lock);
        res = queue->qlen;
        spin_unlock_bh(&queue->lock);
 -      return (res == 0);
 +      return res == 0;
  }
  EXPORT_SYMBOL_GPL(rpc_queue_empty);
  
@@@ -908,7 -908,7 +908,7 @@@ static int rpciod_start(void
         * Create the rpciod thread and wait for it to start.
         */
        dprintk("RPC:       creating workqueue rpciod\n");
-       wq = create_workqueue("rpciod");
+       wq = alloc_workqueue("rpciod", WQ_RESCUER, 0);
        rpciod_workqueue = wq;
        return rpciod_workqueue != NULL;
  }