Merge branch 'locks' of git://linux-nfs.org/~bfields/linux
authorLinus Torvalds <torvalds@woody.linux-foundation.org>
Mon, 15 Oct 2007 23:07:40 +0000 (16:07 -0700)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Mon, 15 Oct 2007 23:07:40 +0000 (16:07 -0700)
* 'locks' of git://linux-nfs.org/~bfields/linux:
  nfsd: remove IS_ISMNDLCK macro
  Rework /proc/locks via seq_files and seq_list helpers
  fs/locks.c: use list_for_each_entry() instead of list_for_each()
  NFS: clean up explicit check for mandatory locks
  AFS: clean up explicit check for mandatory locks
  9PFS: clean up explicit check for mandatory locks
  GFS2: clean up explicit check for mandatory locks
  Cleanup macros for distinguishing mandatory locks
  Documentation: move locks.txt in filesystems/
  locks: add warning about mandatory locking races
  Documentation: move mandatory locking documentation to filesystems/
  locks: Fix potential OOPS in generic_setlease()
  Use list_first_entry in locks_wake_up_blocks
  locks: fix flock_lock_file() comment
  Memory shortage can result in inconsistent flocks state
  locks: kill redundant local variable
  locks: reverse order of posix_locks_conflict() arguments

1  2 
fs/gfs2/ops_file.c
fs/nfs/file.c
fs/nfsd/nfs4state.c
fs/nfsd/vfs.c
fs/proc/proc_misc.c
include/linux/fs.h

diff --combined fs/gfs2/ops_file.c
@@@ -535,7 -535,7 +535,7 @@@ static int gfs2_lock(struct file *file
  
        if (!(fl->fl_flags & FL_POSIX))
                return -ENOLCK;
-       if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+       if (__mandatory_lock(&ip->i_inode))
                return -ENOLCK;
  
        if (sdp->sd_args.ar_localflocks) {
@@@ -571,8 -571,7 +571,8 @@@ static int do_flock(struct file *file, 
        int error = 0;
  
        state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED;
 -      flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE;
 +      flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE 
 +              | GL_FLOCK;
  
        mutex_lock(&fp->f_fl_mutex);
  
        if (gl) {
                if (fl_gh->gh_state == state)
                        goto out;
 -              gfs2_glock_hold(gl);
                flock_lock_file_wait(file,
                                     &(struct file_lock){.fl_type = F_UNLCK});
 -              gfs2_glock_dq_uninit(fl_gh);
 +              gfs2_glock_dq_wait(fl_gh);
 +              gfs2_holder_reinit(state, flags, fl_gh);
        } else {
                error = gfs2_glock_get(GFS2_SB(&ip->i_inode),
                                      ip->i_no_addr, &gfs2_flock_glops,
                                      CREATE, &gl);
                if (error)
                        goto out;
 +              gfs2_holder_init(gl, state, flags, fl_gh);
 +              gfs2_glock_put(gl);
        }
 -
 -      gfs2_holder_init(gl, state, flags, fl_gh);
 -      gfs2_glock_put(gl);
 -
        error = gfs2_glock_nq(fl_gh);
        if (error) {
                gfs2_holder_uninit(fl_gh);
@@@ -636,7 -637,7 +636,7 @@@ static int gfs2_flock(struct file *file
  
        if (!(fl->fl_flags & FL_FLOCK))
                return -ENOLCK;
-       if ((ip->i_inode.i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+       if (__mandatory_lock(&ip->i_inode))
                return -ENOLCK;
  
        if (sdp->sd_args.ar_localflocks)
diff --combined fs/nfs/file.c
@@@ -33,7 -33,6 +33,7 @@@
  #include <asm/system.h>
  
  #include "delegation.h"
 +#include "internal.h"
  #include "iostat.h"
  
  #define NFSDBG_FACILITY               NFSDBG_FILE
@@@ -56,8 -55,6 +56,8 @@@ static int nfs_lock(struct file *filp, 
  static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
  static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
  
 +static struct vm_operations_struct nfs_file_vm_ops;
 +
  const struct file_operations nfs_file_operations = {
        .llseek         = nfs_file_llseek,
        .read           = do_sync_read,
@@@ -176,31 -173,6 +176,31 @@@ static loff_t nfs_file_llseek(struct fi
        return remote_llseek(filp, offset, origin);
  }
  
 +/*
 + * Helper for nfs_file_flush() and nfs_fsync()
 + *
 + * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
 + * disk, but it retrieves and clears ctx->error after synching, despite
 + * the two being set at the same time in nfs_context_set_write_error().
 + * This is because the former is used to notify the _next_ call to
 + * nfs_file_write() that a write error occured, and hence cause it to
 + * fall back to doing a synchronous write.
 + */
 +static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
 +{
 +      int have_error, status;
 +      int ret = 0;
 +
 +      have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 +      status = nfs_wb_all(inode);
 +      have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
 +      if (have_error)
 +              ret = xchg(&ctx->error, 0);
 +      if (!ret)
 +              ret = status;
 +      return ret;
 +}
 +
  /*
   * Flush all dirty pages, and check for write errors.
   *
  static int
  nfs_file_flush(struct file *file, fl_owner_t id)
  {
 -      struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
 +      struct nfs_open_context *ctx = nfs_file_open_context(file);
        struct inode    *inode = file->f_path.dentry->d_inode;
        int             status;
  
        if ((file->f_mode & FMODE_WRITE) == 0)
                return 0;
        nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
 -      lock_kernel();
 +
        /* Ensure that data+attribute caches are up to date after close() */
 -      status = nfs_wb_all(inode);
 -      if (!status) {
 -              status = ctx->error;
 -              ctx->error = 0;
 -              if (!status)
 -                      nfs_revalidate_inode(NFS_SERVER(inode), inode);
 -      }
 -      unlock_kernel();
 +      status = nfs_do_fsync(ctx, inode);
 +      if (!status)
 +              nfs_revalidate_inode(NFS_SERVER(inode), inode);
        return status;
  }
  
@@@ -280,11 -257,8 +280,11 @@@ nfs_file_mmap(struct file * file, struc
                dentry->d_parent->d_name.name, dentry->d_name.name);
  
        status = nfs_revalidate_mapping(inode, file->f_mapping);
 -      if (!status)
 -              status = generic_file_mmap(file, vma);
 +      if (!status) {
 +              vma->vm_ops = &nfs_file_vm_ops;
 +              vma->vm_flags |= VM_CAN_NONLINEAR;
 +              file_accessed(file);
 +      }
        return status;
  }
  
  static int
  nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
  {
 -      struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
 +      struct nfs_open_context *ctx = nfs_file_open_context(file);
        struct inode *inode = dentry->d_inode;
 -      int status;
  
        dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
  
        nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
 -      lock_kernel();
 -      status = nfs_wb_all(inode);
 -      if (!status) {
 -              status = ctx->error;
 -              ctx->error = 0;
 -      }
 -      unlock_kernel();
 -      return status;
 +      return nfs_do_fsync(ctx, inode);
  }
  
  /*
@@@ -351,7 -333,7 +351,7 @@@ static int nfs_launder_page(struct pag
  const struct address_space_operations nfs_file_aops = {
        .readpage = nfs_readpage,
        .readpages = nfs_readpages,
 -      .set_page_dirty = nfs_set_page_dirty,
 +      .set_page_dirty = __set_page_dirty_nobuffers,
        .writepage = nfs_writepage,
        .writepages = nfs_writepages,
        .prepare_write = nfs_prepare_write,
        .launder_page = nfs_launder_page,
  };
  
 +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 +{
 +      struct file *filp = vma->vm_file;
 +      unsigned pagelen;
 +      int ret = -EINVAL;
 +
 +      lock_page(page);
 +      if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
 +              goto out_unlock;
 +      pagelen = nfs_page_length(page);
 +      if (pagelen == 0)
 +              goto out_unlock;
 +      ret = nfs_prepare_write(filp, page, 0, pagelen);
 +      if (!ret)
 +              ret = nfs_commit_write(filp, page, 0, pagelen);
 +out_unlock:
 +      unlock_page(page);
 +      return ret;
 +}
 +
 +static struct vm_operations_struct nfs_file_vm_ops = {
 +      .fault = filemap_fault,
 +      .page_mkwrite = nfs_vm_page_mkwrite,
 +};
 +
 +static int nfs_need_sync_write(struct file *filp, struct inode *inode)
 +{
 +      struct nfs_open_context *ctx;
 +
 +      if (IS_SYNC(inode) || (filp->f_flags & O_SYNC))
 +              return 1;
 +      ctx = nfs_file_open_context(filp);
 +      if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
 +              return 1;
 +      return 0;
 +}
 +
  static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
                                unsigned long nr_segs, loff_t pos)
  {
        nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
        result = generic_file_aio_write(iocb, iov, nr_segs, pos);
        /* Return error values for O_SYNC and IS_SYNC() */
 -      if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) {
 -              int err = nfs_fsync(iocb->ki_filp, dentry, 1);
 +      if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
 +              int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
                if (err < 0)
                        result = err;
        }
@@@ -577,8 -522,7 +577,7 @@@ static int nfs_lock(struct file *filp, 
        nfs_inc_stats(inode, NFSIOS_VFSLOCK);
  
        /* No mandatory locks over NFS */
-       if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID &&
-           fl->fl_type != F_UNLCK)
+       if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
                return -ENOLCK;
  
        if (IS_GETLK(cmd))
diff --combined fs/nfsd/nfs4state.c
@@@ -358,22 -358,9 +358,22 @@@ alloc_client(struct xdr_netobj name
        return clp;
  }
  
 +static void
 +shutdown_callback_client(struct nfs4_client *clp)
 +{
 +      struct rpc_clnt *clnt = clp->cl_callback.cb_client;
 +
 +      /* shutdown rpc client, ending any outstanding recall rpcs */
 +      if (clnt) {
 +              clp->cl_callback.cb_client = NULL;
 +              rpc_shutdown_client(clnt);
 +      }
 +}
 +
  static inline void
  free_client(struct nfs4_client *clp)
  {
 +      shutdown_callback_client(clp);
        if (clp->cl_cred.cr_group_info)
                put_group_info(clp->cl_cred.cr_group_info);
        kfree(clp->cl_name.data);
@@@ -387,6 -374,18 +387,6 @@@ put_nfs4_client(struct nfs4_client *clp
                free_client(clp);
  }
  
 -static void
 -shutdown_callback_client(struct nfs4_client *clp)
 -{
 -      struct rpc_clnt *clnt = clp->cl_callback.cb_client;
 -
 -      /* shutdown rpc client, ending any outstanding recall rpcs */
 -      if (clnt) {
 -              clp->cl_callback.cb_client = NULL;
 -              rpc_shutdown_client(clnt);
 -      }
 -}
 -
  static void
  expire_client(struct nfs4_client *clp)
  {
        dprintk("NFSD: expire_client cl_count %d\n",
                            atomic_read(&clp->cl_count));
  
 -      shutdown_callback_client(clp);
 -
        INIT_LIST_HEAD(&reaplist);
        spin_lock(&recall_lock);
        while (!list_empty(&clp->cl_delegations)) {
@@@ -461,28 -462,26 +461,28 @@@ copy_cred(struct svc_cred *target, stru
  }
  
  static inline int
 -same_name(const char *n1, const char *n2) {
 +same_name(const char *n1, const char *n2)
 +{
        return 0 == memcmp(n1, n2, HEXDIR_LEN);
  }
  
  static int
 -cmp_verf(nfs4_verifier *v1, nfs4_verifier *v2) {
 -      return(!memcmp(v1->data,v2->data,sizeof(v1->data)));
 +same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
 +{
 +      return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
  }
  
  static int
 -cmp_clid(clientid_t * cl1, clientid_t * cl2) {
 -      return((cl1->cl_boot == cl2->cl_boot) &&
 -              (cl1->cl_id == cl2->cl_id));
 +same_clid(clientid_t *cl1, clientid_t *cl2)
 +{
 +      return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
  }
  
  /* XXX what about NGROUP */
  static int
 -cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){
 -      return(cr1->cr_uid == cr2->cr_uid);
 -
 +same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 +{
 +      return cr1->cr_uid == cr2->cr_uid;
  }
  
  static void
@@@ -508,7 -507,7 +508,7 @@@ check_name(struct xdr_netobj name) 
        if (name.len == 0) 
                return 0;
        if (name.len > NFS4_OPAQUE_LIMIT) {
 -              printk("NFSD: check_name: name too long(%d)!\n", name.len);
 +              dprintk("NFSD: check_name: name too long(%d)!\n", name.len);
                return 0;
        }
        return 1;
@@@ -547,7 -546,7 +547,7 @@@ find_confirmed_client(clientid_t *clid
        unsigned int idhashval = clientid_hashval(clid->cl_id);
  
        list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
 -              if (cmp_clid(&clp->cl_clientid, clid))
 +              if (same_clid(&clp->cl_clientid, clid))
                        return clp;
        }
        return NULL;
@@@ -560,7 -559,7 +560,7 @@@ find_unconfirmed_client(clientid_t *cli
        unsigned int idhashval = clientid_hashval(clid->cl_id);
  
        list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
 -              if (cmp_clid(&clp->cl_clientid, clid))
 +              if (same_clid(&clp->cl_clientid, clid))
                        return clp;
        }
        return NULL;
@@@ -754,7 -753,7 +754,7 @@@ nfsd4_setclientid(struct svc_rqst *rqst
                 * or different ip_address
                 */
                status = nfserr_clid_inuse;
 -              if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
 +              if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)
                                || conf->cl_addr != sin->sin_addr.s_addr) {
                        dprintk("NFSD: setclientid: string in use by client"
                                "at %u.%u.%u.%u\n", NIPQUAD(conf->cl_addr));
                new = create_client(clname, dname);
                if (new == NULL)
                        goto out;
 -              copy_verf(new, &clverifier);
 -              new->cl_addr = sin->sin_addr.s_addr;
 -              copy_cred(&new->cl_cred,&rqstp->rq_cred);
                gen_clid(new);
 -              gen_confirm(new);
 -              gen_callback(new, setclid);
 -              add_to_unconfirmed(new, strhashval);
 -      } else if (cmp_verf(&conf->cl_verifier, &clverifier)) {
 +      } else if (same_verf(&conf->cl_verifier, &clverifier)) {
                /*
                 * CASE 1:
                 * cl_name match, confirmed, principal match
                new = create_client(clname, dname);
                if (new == NULL)
                        goto out;
 -              copy_verf(new,&conf->cl_verifier);
 -              new->cl_addr = sin->sin_addr.s_addr;
 -              copy_cred(&new->cl_cred,&rqstp->rq_cred);
                copy_clid(new, conf);
 -              gen_confirm(new);
 -              gen_callback(new, setclid);
 -              add_to_unconfirmed(new,strhashval);
        } else if (!unconf) {
                /*
                 * CASE 2:
                new = create_client(clname, dname);
                if (new == NULL)
                        goto out;
 -              copy_verf(new,&clverifier);
 -              new->cl_addr = sin->sin_addr.s_addr;
 -              copy_cred(&new->cl_cred,&rqstp->rq_cred);
                gen_clid(new);
 -              gen_confirm(new);
 -              gen_callback(new, setclid);
 -              add_to_unconfirmed(new, strhashval);
 -      } else if (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
 +      } else if (!same_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
                /*      
                 * CASE3:
                 * confirmed found (name, principal match)
                new = create_client(clname, dname);
                if (new == NULL)
                        goto out;
 -              copy_verf(new,&clverifier);
 -              new->cl_addr = sin->sin_addr.s_addr;
 -              copy_cred(&new->cl_cred,&rqstp->rq_cred);
                gen_clid(new);
 -              gen_confirm(new);
 -              gen_callback(new, setclid);
 -              add_to_unconfirmed(new, strhashval);
        } else {
                /* No cases hit !!! */
                status = nfserr_inval;
                goto out;
  
        }
 +      copy_verf(new, &clverifier);
 +      new->cl_addr = sin->sin_addr.s_addr;
 +      copy_cred(&new->cl_cred, &rqstp->rq_cred);
 +      gen_confirm(new);
 +      gen_callback(new, setclid);
 +      add_to_unconfirmed(new, strhashval);
        setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
        setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
        memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
@@@ -893,16 -910,16 +893,16 @@@ nfsd4_setclientid_confirm(struct svc_rq
                goto out;
  
        if ((conf && unconf) && 
 -          (cmp_verf(&unconf->cl_confirm, &confirm)) &&
 -          (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
 +          (same_verf(&unconf->cl_confirm, &confirm)) &&
 +          (same_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
            (same_name(conf->cl_recdir,unconf->cl_recdir))  &&
 -          (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
 +          (!same_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
                /* CASE 1:
                * unconf record that matches input clientid and input confirm.
                * conf record that matches input clientid.
                * conf and unconf records match names, verifiers
                */
 -              if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
 +              if (!same_creds(&conf->cl_cred, &unconf->cl_cred))
                        status = nfserr_clid_inuse;
                else {
                        /* XXX: We just turn off callbacks until we can handle
                }
        } else if ((conf && !unconf) ||
            ((conf && unconf) && 
 -           (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
 +           (!same_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
              !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
                /* CASE 2:
                 * conf record that matches input clientid.
                 * unconf->cl_name or unconf->cl_verifier don't match the
                 * conf record.
                 */
 -              if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred))
 +              if (!same_creds(&conf->cl_cred, &rqstp->rq_cred))
                        status = nfserr_clid_inuse;
                else
                        status = nfs_ok;
        } else if (!conf && unconf
 -                      && cmp_verf(&unconf->cl_confirm, &confirm)) {
 +                      && same_verf(&unconf->cl_confirm, &confirm)) {
                /* CASE 3:
                 * conf record not found.
                 * unconf record found.
                 * unconf->cl_confirm matches input confirm
                 */
 -              if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
 +              if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
                        status = nfserr_clid_inuse;
                } else {
                        unsigned int hash =
                        conf = unconf;
                        status = nfs_ok;
                }
 -      } else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
 -          && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm,
 +      } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
 +          && (!unconf || (unconf && !same_verf(&unconf->cl_confirm,
                                                                &confirm)))) {
                /* CASE 4:
                 * conf record not found, or if conf, conf->cl_confirm does not
@@@ -1002,7 -1019,7 +1002,7 @@@ nfsd4_free_slab(struct kmem_cache **sla
        *slab = NULL;
  }
  
 -static void
 +void
  nfsd4_free_slabs(void)
  {
        nfsd4_free_slab(&stateowner_slab);
@@@ -1190,12 -1207,10 +1190,12 @@@ move_to_close_lru(struct nfs4_stateowne
  }
  
  static int
 -cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t *clid) {
 -      return ((sop->so_owner.len == owner->len) && 
 -       !memcmp(sop->so_owner.data, owner->data, owner->len) && 
 -        (sop->so_client->cl_clientid.cl_id == clid->cl_id));
 +same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner,
 +                                                      clientid_t *clid)
 +{
 +      return (sop->so_owner.len == owner->len) &&
 +              0 == memcmp(sop->so_owner.data, owner->data, owner->len) &&
 +              (sop->so_client->cl_clientid.cl_id == clid->cl_id);
  }
  
  static struct nfs4_stateowner *
@@@ -1204,7 -1219,7 +1204,7 @@@ find_openstateowner_str(unsigned int ha
        struct nfs4_stateowner *so = NULL;
  
        list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
 -              if (cmp_owner_str(so, &open->op_owner, &open->op_clientid))
 +              if (same_owner_str(so, &open->op_owner, &open->op_clientid))
                        return so;
        }
        return NULL;
@@@ -1345,7 -1360,6 +1345,7 @@@ void nfsd_break_deleg_cb(struct file_lo
         * lock) we know the server hasn't removed the lease yet, we know
         * it's safe to take a reference: */
        atomic_inc(&dp->dl_count);
 +      atomic_inc(&dp->dl_client->cl_count);
  
        spin_lock(&recall_lock);
        list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
        /* only place dl_time is set. protected by lock_kernel*/
        dp->dl_time = get_seconds();
  
 -      /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */
 -      fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ;
 +      /*
 +       * We don't want the locks code to timeout the lease for us;
 +       * we'll remove it ourself if the delegation isn't returned
 +       * in time.
 +       */
 +      fl->fl_break_time = 0;
  
        t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
        if (IS_ERR(t)) {
                printk(KERN_INFO "NFSD: Callback thread failed for "
                        "for client (clientid %08x/%08x)\n",
                        clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
 +              put_nfs4_client(dp->dl_client);
                nfs4_put_delegation(dp);
        }
  }
@@@ -1729,7 -1738,7 +1729,7 @@@ out
        if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
                        && flag == NFS4_OPEN_DELEGATE_NONE
                        && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
 -              printk("NFSD: WARNING: refusing delegation reclaim\n");
 +              dprintk("NFSD: WARNING: refusing delegation reclaim\n");
        open->op_delegate_type = flag;
  }
  
@@@ -2035,7 -2044,7 +2035,7 @@@ static inline in
  io_during_grace_disallowed(struct inode *inode, int flags)
  {
        return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE))
-               && MANDATORY_LOCK(inode);
+               && mandatory_lock(inode);
  }
  
  /*
@@@ -2138,7 -2147,7 +2138,7 @@@ nfs4_preprocess_seqid_op(struct svc_fh 
        *sopp = NULL;
  
        if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
 -              printk("NFSD: preprocess_seqid_op: magic stateid!\n");
 +              dprintk("NFSD: preprocess_seqid_op: magic stateid!\n");
                return nfserr_bad_stateid;
        }
  
                lkflg = setlkflg(lock->lk_type);
  
                if (lock->lk_is_new) {
 -                       if (!sop->so_is_open_owner)
 -                             return nfserr_bad_stateid;
 -                       if (!cmp_clid(&clp->cl_clientid, lockclid))
 +                      if (!sop->so_is_open_owner)
 +                              return nfserr_bad_stateid;
 +                      if (!same_clid(&clp->cl_clientid, lockclid))
                               return nfserr_bad_stateid;
 -                       /* stp is the open stateid */
 -                       status = nfs4_check_openmode(stp, lkflg);
 -                       if (status)
 -                             return status;
 -               } else {
 -                       /* stp is the lock stateid */
 -                       status = nfs4_check_openmode(stp->st_openstp, lkflg);
 -                       if (status)
 -                             return status;
 +                      /* stp is the open stateid */
 +                      status = nfs4_check_openmode(stp, lkflg);
 +                      if (status)
 +                              return status;
 +              } else {
 +                      /* stp is the lock stateid */
 +                      status = nfs4_check_openmode(stp->st_openstp, lkflg);
 +                      if (status)
 +                              return status;
                 }
 -
        }
  
        if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
 -              printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
 +              dprintk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
                return nfserr_bad_stateid;
        }
  
                goto check_replay;
  
        if (sop->so_confirmed && flags & CONFIRM) {
 -              printk("NFSD: preprocess_seqid_op: expected"
 +              dprintk("NFSD: preprocess_seqid_op: expected"
                                " unconfirmed stateowner!\n");
                return nfserr_bad_stateid;
        }
        if (!sop->so_confirmed && !(flags & CONFIRM)) {
 -              printk("NFSD: preprocess_seqid_op: stateowner not"
 +              dprintk("NFSD: preprocess_seqid_op: stateowner not"
                                " confirmed yet!\n");
                return nfserr_bad_stateid;
        }
        if (stateid->si_generation > stp->st_stateid.si_generation) {
 -              printk("NFSD: preprocess_seqid_op: future stateid?!\n");
 +              dprintk("NFSD: preprocess_seqid_op: future stateid?!\n");
                return nfserr_bad_stateid;
        }
  
        if (stateid->si_generation < stp->st_stateid.si_generation) {
 -              printk("NFSD: preprocess_seqid_op: old stateid!\n");
 +              dprintk("NFSD: preprocess_seqid_op: old stateid!\n");
                return nfserr_old_stateid;
        }
        renew_client(sop->so_client);
@@@ -2232,7 -2242,7 +2232,7 @@@ check_replay
                /* indicate replay to calling function */
                return nfserr_replay_me;
        }
 -      printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
 +      dprintk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
                        sop->so_seqid, seqid);
        *sopp = NULL;
        return nfserr_bad_seqid;
@@@ -2551,7 -2561,7 +2551,7 @@@ find_lockstateowner_str(struct inode *i
        struct nfs4_stateowner *op;
  
        list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
 -              if (cmp_owner_str(op, owner, clid))
 +              if (same_owner_str(op, owner, clid))
                        return op;
        }
        return NULL;
@@@ -2845,7 -2855,7 +2845,7 @@@ nfsd4_lockt(struct svc_rqst *rqstp, str
                        file_lock.fl_type = F_WRLCK;
                break;
                default:
 -                      printk("NFSD: nfs4_lockt: bad lock type!\n");
 +                      dprintk("NFSD: nfs4_lockt: bad lock type!\n");
                        status = nfserr_inval;
                goto out;
        }
@@@ -3015,7 -3025,7 +3015,7 @@@ nfsd4_release_lockowner(struct svc_rqs
        INIT_LIST_HEAD(&matches);
        for (i = 0; i < LOCK_HASH_SIZE; i++) {
                list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
 -                      if (!cmp_owner_str(sop, owner, clid))
 +                      if (!same_owner_str(sop, owner, clid))
                                continue;
                        list_for_each_entry(stp, &sop->so_stateids,
                                        st_perstateowner) {
@@@ -3139,14 -3149,11 +3139,14 @@@ nfs4_check_open_reclaim(clientid_t *cli
  
  /* initialization to perform at module load time: */
  
 -void
 +int
  nfs4_state_init(void)
  {
 -      int i;
 +      int i, status;
  
 +      status = nfsd4_init_slabs();
 +      if (status)
 +              return status;
        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                INIT_LIST_HEAD(&conf_id_hashtbl[i]);
                INIT_LIST_HEAD(&conf_str_hashtbl[i]);
        for (i = 0; i < CLIENT_HASH_SIZE; i++)
                INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
        reclaim_str_hashtbl_size = 0;
 +      return 0;
  }
  
  static void
@@@ -3236,15 -3242,20 +3236,15 @@@ __nfs4_state_start(void
        set_max_delegations();
  }
  
 -int
 +void
  nfs4_state_start(void)
  {
 -      int status;
 -
        if (nfs4_init)
 -              return 0;
 -      status = nfsd4_init_slabs();
 -      if (status)
 -              return status;
 +              return;
        nfsd4_load_reboot_recovery_data();
        __nfs4_state_start();
        nfs4_init = 1;
 -      return 0;
 +      return;
  }
  
  int
@@@ -3302,6 -3313,7 +3302,6 @@@ nfs4_state_shutdown(void
        nfs4_lock_state();
        nfs4_release_reclaim();
        __nfs4_state_shutdown();
 -      nfsd4_free_slabs();
        nfs4_unlock_state();
  }
  
diff --combined fs/nfsd/vfs.c
  #define NFSDDBG_FACILITY              NFSDDBG_FILEOP
  
  
- /* We must ignore files (but only files) which might have mandatory
-  * locks on them because there is no way to know if the accesser has
-  * the lock.
-  */
- #define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
  /*
   * This is a cache of readahead params that help us choose the proper
   * readahead strategy. Initially, we set all readahead parameters to 0
@@@ -295,8 -289,7 +289,8 @@@ nfsd_setattr(struct svc_rqst *rqstp, st
        if (!iap->ia_valid)
                goto out;
  
 -      /* NFSv2 does not differentiate between "set-[ac]time-to-now"
 +      /*
 +       * NFSv2 does not differentiate between "set-[ac]time-to-now"
         * which only requires access, and "set-[ac]time-to-X" which
         * requires ownership.
         * So if it looks like it might be "set both to the same time which
         */
  #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
  #define       MAX_TOUCH_TIME_ERROR (30*60)
 -      if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET
 -          && iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec
 -          ) {
 -          /* Looks probable.  Now just make sure time is in the right ballpark.
 -           * Solaris, at least, doesn't seem to care what the time request is.
 -           * We require it be within 30 minutes of now.
 -           */
 -          time_t delta = iap->ia_atime.tv_sec - get_seconds();
 -          if (delta<0) delta = -delta;
 -          if (delta < MAX_TOUCH_TIME_ERROR &&
 -              inode_change_ok(inode, iap) != 0) {
 -              /* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
 -               * this will cause notify_change to set these times to "now"
 +      if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
 +          iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
 +              /*
 +               * Looks probable.
 +               *
 +               * Now just make sure time is in the right ballpark.
 +               * Solaris, at least, doesn't seem to care what the time
 +               * request is.  We require it be within 30 minutes of now.
                 */
 -              iap->ia_valid &= ~BOTH_TIME_SET;
 -          }
 +              time_t delta = iap->ia_atime.tv_sec - get_seconds();
 +              if (delta < 0)
 +                      delta = -delta;
 +              if (delta < MAX_TOUCH_TIME_ERROR &&
 +                  inode_change_ok(inode, iap) != 0) {
 +                      /*
 +                       * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
 +                       * This will cause notify_change to set these times
 +                       * to "now"
 +                       */
 +                      iap->ia_valid &= ~BOTH_TIME_SET;
 +              }
        }
            
 -      /* The size case is special. It changes the file as well as the attributes.  */
 +      /*
 +       * The size case is special.
 +       * It changes the file as well as the attributes.
 +       */
        if (iap->ia_valid & ATTR_SIZE) {
                if (iap->ia_size < inode->i_size) {
                        err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
@@@ -689,7 -674,12 +683,12 @@@ nfsd_open(struct svc_rqst *rqstp, struc
        err = nfserr_perm;
        if (IS_APPEND(inode) && (access & MAY_WRITE))
                goto out;
-       if (IS_ISMNDLK(inode))
+       /*
+        * We must ignore files (but only files) which might have mandatory
+        * locks on them because there is no way to know if the accesser has
+        * the lock.
+        */
+       if (S_ISREG((inode)->i_mode) && mandatory_lock(inode))
                goto out;
  
        if (!inode->i_fop)
diff --combined fs/proc/proc_misc.c
@@@ -66,7 -66,6 +66,6 @@@ extern int get_stram_list(char *)
  extern int get_filesystem_list(char *);
  extern int get_exec_domain_list(char *);
  extern int get_dma_list(char *);
- extern int get_locks_status (char *, char **, off_t, int);
  
  static int proc_calc_metrics(char *page, char **start, off_t off,
                                 int count, int *eof, int len)
@@@ -443,7 -442,6 +442,7 @@@ static int show_stat(struct seq_file *p
        int i;
        unsigned long jif;
        cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
 +      cputime64_t guest;
        u64 sum = 0;
        struct timespec boottime;
        unsigned int *per_irq_sum;
  
        user = nice = system = idle = iowait =
                irq = softirq = steal = cputime64_zero;
 +      guest = cputime64_zero;
        getboottime(&boottime);
        jif = boottime.tv_sec;
  
                irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
                softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
                steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
 +              guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
                for (j = 0; j < NR_IRQS; j++) {
                        unsigned int temp = kstat_cpu(i).irqs[j];
                        sum += temp;
                }
        }
  
 -      seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu\n",
 +      seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
                (unsigned long long)cputime64_to_clock_t(user),
                (unsigned long long)cputime64_to_clock_t(nice),
                (unsigned long long)cputime64_to_clock_t(system),
                (unsigned long long)cputime64_to_clock_t(iowait),
                (unsigned long long)cputime64_to_clock_t(irq),
                (unsigned long long)cputime64_to_clock_t(softirq),
 -              (unsigned long long)cputime64_to_clock_t(steal));
 +              (unsigned long long)cputime64_to_clock_t(steal),
 +              (unsigned long long)cputime64_to_clock_t(guest));
        for_each_online_cpu(i) {
  
                /* Copy values here to work around gcc-2.95.3, gcc-2.96 */
                irq = kstat_cpu(i).cpustat.irq;
                softirq = kstat_cpu(i).cpustat.softirq;
                steal = kstat_cpu(i).cpustat.steal;
 -              seq_printf(p, "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu\n",
 +              guest = kstat_cpu(i).cpustat.guest;
 +              seq_printf(p,
 +                      "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
                        i,
                        (unsigned long long)cputime64_to_clock_t(user),
                        (unsigned long long)cputime64_to_clock_t(nice),
                        (unsigned long long)cputime64_to_clock_t(iowait),
                        (unsigned long long)cputime64_to_clock_t(irq),
                        (unsigned long long)cputime64_to_clock_t(softirq),
 -                      (unsigned long long)cputime64_to_clock_t(steal));
 +                      (unsigned long long)cputime64_to_clock_t(steal),
 +                      (unsigned long long)cputime64_to_clock_t(guest));
        }
        seq_printf(p, "intr %llu", (unsigned long long)sum);
  
@@@ -624,16 -616,18 +623,18 @@@ static int cmdline_read_proc(char *page
        return proc_calc_metrics(page, start, off, count, eof, len);
  }
  
- static int locks_read_proc(char *page, char **start, off_t off,
-                                int count, int *eof, void *data)
+ static int locks_open(struct inode *inode, struct file *filp)
  {
-       int len = get_locks_status(page, start, off, count);
-       if (len < count)
-               *eof = 1;
-       return len;
+       return seq_open(filp, &locks_seq_operations);
  }
  
+ static const struct file_operations proc_locks_operations = {
+       .open           = locks_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = seq_release,
+ };
  static int execdomains_read_proc(char *page, char **start, off_t off,
                                 int count, int *eof, void *data)
  {
@@@ -691,7 -685,6 +692,6 @@@ void __init proc_misc_init(void
  #endif
                {"filesystems", filesystems_read_proc},
                {"cmdline",     cmdline_read_proc},
-               {"locks",       locks_read_proc},
                {"execdomains", execdomains_read_proc},
                {NULL,}
        };
                        entry->proc_fops = &proc_kmsg_operations;
        }
  #endif
+       create_seq_entry("locks", 0, &proc_locks_operations);
        create_seq_entry("devices", 0, &proc_devinfo_operations);
        create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
  #ifdef CONFIG_BLOCK
diff --combined include/linux/fs.h
@@@ -883,6 -883,7 +883,7 @@@ extern int vfs_setlease(struct file *, 
  extern int lease_modify(struct file_lock **, int);
  extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
  extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
+ extern struct seq_operations locks_seq_operations;
  
  struct fasync_struct {
        int     magic;
@@@ -1302,14 -1303,8 +1303,14 @@@ struct file_system_type 
        struct module *owner;
        struct file_system_type * next;
        struct list_head fs_supers;
 +
        struct lock_class_key s_lock_key;
        struct lock_class_key s_umount_key;
 +
 +      struct lock_class_key i_lock_key;
 +      struct lock_class_key i_mutex_key;
 +      struct lock_class_key i_mutex_dir_key;
 +      struct lock_class_key i_alloc_sem_key;
  };
  
  extern int get_sb_bdev(struct file_system_type *fs_type,
@@@ -1375,12 -1370,25 +1376,25 @@@ extern int locks_mandatory_area(int, st
   * Candidates for mandatory locking have the setgid bit set
   * but no group execute bit -  an otherwise meaningless combination.
   */
- #define MANDATORY_LOCK(inode) \
-       (IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID)
+ static inline int __mandatory_lock(struct inode *ino)
+ {
+       return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID;
+ }
+ /*
+  * ... and these candidates should be on MS_MANDLOCK mounted fs,
+  * otherwise these will be advisory locks
+  */
+ static inline int mandatory_lock(struct inode *ino)
+ {
+       return IS_MANDLOCK(ino) && __mandatory_lock(ino);
+ }
  
  static inline int locks_verify_locked(struct inode *inode)
  {
-       if (MANDATORY_LOCK(inode))
+       if (mandatory_lock(inode))
                return locks_mandatory_locked(inode);
        return 0;
  }
@@@ -1391,7 -1399,7 +1405,7 @@@ static inline int locks_verify_truncate
                                    struct file *filp,
                                    loff_t size)
  {
-       if (inode->i_flock && MANDATORY_LOCK(inode))
+       if (inode->i_flock && mandatory_lock(inode))
                return locks_mandatory_area(
                        FLOCK_VERIFY_WRITE, inode, filp,
                        size < inode->i_size ? size : inode->i_size,