Merge git://oss.sgi.com:8090/oss/git/xfs-2.6
authorLinus Torvalds <torvalds@g5.osdl.org>
Thu, 8 Sep 2005 00:23:52 +0000 (17:23 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Thu, 8 Sep 2005 00:23:52 +0000 (17:23 -0700)
52 files changed:
fs/xfs/Makefile
fs/xfs/Makefile-linux-2.6 [new file with mode: 0644]
fs/xfs/linux-2.6/kmem.c
fs/xfs/linux-2.6/kmem.h
fs/xfs/linux-2.6/spin.h
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_aops.h [new file with mode: 0644]
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_buf.h
fs/xfs/linux-2.6/xfs_file.c
fs/xfs/linux-2.6/xfs_ioctl.c
fs/xfs/linux-2.6/xfs_ioctl32.c
fs/xfs/linux-2.6/xfs_iops.c
fs/xfs/linux-2.6/xfs_linux.h
fs/xfs/linux-2.6/xfs_lrw.c
fs/xfs/linux-2.6/xfs_lrw.h
fs/xfs/linux-2.6/xfs_super.c
fs/xfs/linux-2.6/xfs_vfs.c
fs/xfs/linux-2.6/xfs_vfs.h
fs/xfs/linux-2.6/xfs_vnode.c
fs/xfs/linux-2.6/xfs_vnode.h
fs/xfs/quota/Makefile [new file with mode: 0644]
fs/xfs/quota/Makefile-linux-2.6 [new file with mode: 0644]
fs/xfs/quota/xfs_dquot.c
fs/xfs/quota/xfs_dquot.h
fs/xfs/quota/xfs_dquot_item.c
fs/xfs/quota/xfs_qm.c
fs/xfs/quota/xfs_qm.h
fs/xfs/quota/xfs_qm_bhv.c
fs/xfs/quota/xfs_qm_syscalls.c
fs/xfs/support/debug.c
fs/xfs/xfs_acl.c
fs/xfs/xfs_bmap.c
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_dmapi.h
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_iget.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log.h
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_qmops.c
fs/xfs/xfs_quota.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_ail.c
fs/xfs/xfs_trans_buf.c
fs/xfs/xfs_vfsops.c
fs/xfs/xfs_vnodeops.c

index d3ff783..49e3e7e 100644 (file)
@@ -1,150 +1 @@
-#
-# Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of version 2 of the GNU General Public License as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it would be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# Further, this software is distributed without any warranty that it is
-# free of the rightful claim of any third person regarding infringement
-# or the like.  Any license provided herein, whether implied or
-# otherwise, applies only to this software file.  Patent licenses, if
-# any, provided herein do not apply to combinations of this program with
-# other software, or any other product whatsoever.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write the Free Software Foundation, Inc., 59
-# Temple Place - Suite 330, Boston MA 02111-1307, USA.
-#
-# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
-# Mountain View, CA  94043, or:
-#
-# http://www.sgi.com
-#
-# For further information regarding this notice, see:
-#
-# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
-#
-
-EXTRA_CFLAGS +=         -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
-
-ifeq ($(CONFIG_XFS_DEBUG),y)
-       EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
-       EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
-endif
-ifeq ($(CONFIG_XFS_TRACE),y)
-       EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
-       EXTRA_CFLAGS += -DXFS_ATTR_TRACE
-       EXTRA_CFLAGS += -DXFS_BLI_TRACE
-       EXTRA_CFLAGS += -DXFS_BMAP_TRACE
-       EXTRA_CFLAGS += -DXFS_BMBT_TRACE
-       EXTRA_CFLAGS += -DXFS_DIR_TRACE
-       EXTRA_CFLAGS += -DXFS_DIR2_TRACE
-       EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
-       EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
-       EXTRA_CFLAGS += -DXFS_LOG_TRACE
-       EXTRA_CFLAGS += -DXFS_RW_TRACE
-       EXTRA_CFLAGS += -DPAGEBUF_TRACE
-       EXTRA_CFLAGS += -DXFS_VNODE_TRACE
-endif
-
-obj-$(CONFIG_XFS_FS)           += xfs.o
-
-xfs-$(CONFIG_XFS_QUOTA)                += $(addprefix quota/, \
-                                  xfs_dquot.o \
-                                  xfs_dquot_item.o \
-                                  xfs_trans_dquot.o \
-                                  xfs_qm_syscalls.o \
-                                  xfs_qm_bhv.o \
-                                  xfs_qm.o)
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS)          += quota/xfs_qm_stats.o
-endif
-
-xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
-xfs-$(CONFIG_PROC_FS)          += linux-2.6/xfs_stats.o
-xfs-$(CONFIG_SYSCTL)           += linux-2.6/xfs_sysctl.o
-xfs-$(CONFIG_COMPAT)           += linux-2.6/xfs_ioctl32.o
-xfs-$(CONFIG_XFS_EXPORT)       += linux-2.6/xfs_export.o
-
-
-xfs-y                          += xfs_alloc.o \
-                                  xfs_alloc_btree.o \
-                                  xfs_attr.o \
-                                  xfs_attr_leaf.o \
-                                  xfs_behavior.o \
-                                  xfs_bit.o \
-                                  xfs_bmap.o \
-                                  xfs_bmap_btree.o \
-                                  xfs_btree.o \
-                                  xfs_buf_item.o \
-                                  xfs_da_btree.o \
-                                  xfs_dir.o \
-                                  xfs_dir2.o \
-                                  xfs_dir2_block.o \
-                                  xfs_dir2_data.o \
-                                  xfs_dir2_leaf.o \
-                                  xfs_dir2_node.o \
-                                  xfs_dir2_sf.o \
-                                  xfs_dir_leaf.o \
-                                  xfs_error.o \
-                                  xfs_extfree_item.o \
-                                  xfs_fsops.o \
-                                  xfs_ialloc.o \
-                                  xfs_ialloc_btree.o \
-                                  xfs_iget.o \
-                                  xfs_inode.o \
-                                  xfs_inode_item.o \
-                                  xfs_iocore.o \
-                                  xfs_iomap.o \
-                                  xfs_itable.o \
-                                  xfs_dfrag.o \
-                                  xfs_log.o \
-                                  xfs_log_recover.o \
-                                  xfs_macros.o \
-                                  xfs_mount.o \
-                                  xfs_rename.o \
-                                  xfs_trans.o \
-                                  xfs_trans_ail.o \
-                                  xfs_trans_buf.o \
-                                  xfs_trans_extfree.o \
-                                  xfs_trans_inode.o \
-                                  xfs_trans_item.o \
-                                  xfs_utils.o \
-                                  xfs_vfsops.o \
-                                  xfs_vnodeops.o \
-                                  xfs_rw.o \
-                                  xfs_dmops.o \
-                                  xfs_qmops.o
-
-xfs-$(CONFIG_XFS_TRACE)                += xfs_dir2_trace.o
-
-# Objects in linux-2.6/
-xfs-y                          += $(addprefix linux-2.6/, \
-                                  kmem.o \
-                                  xfs_aops.o \
-                                  xfs_buf.o \
-                                  xfs_file.o \
-                                  xfs_fs_subr.o \
-                                  xfs_globals.o \
-                                  xfs_ioctl.o \
-                                  xfs_iops.o \
-                                  xfs_lrw.o \
-                                  xfs_super.o \
-                                  xfs_vfs.o \
-                                  xfs_vnode.o)
-
-# Objects in support/
-xfs-y                          += $(addprefix support/, \
-                                  debug.o \
-                                  move.o \
-                                  qsort.o \
-                                  uuid.o)
-
-xfs-$(CONFIG_XFS_TRACE)                += support/ktrace.o
-
+include $(TOPDIR)/fs/xfs/Makefile-linux-$(VERSION).$(PATCHLEVEL)
diff --git a/fs/xfs/Makefile-linux-2.6 b/fs/xfs/Makefile-linux-2.6
new file mode 100644 (file)
index 0000000..fbfcbe5
--- /dev/null
@@ -0,0 +1,141 @@
+#
+# Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like.  Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+#
+# http://www.sgi.com
+#
+# For further information regarding this notice, see:
+#
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+EXTRA_CFLAGS +=         -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
+
+XFS_LINUX := linux-2.6
+
+ifeq ($(CONFIG_XFS_DEBUG),y)
+       EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
+       EXTRA_CFLAGS += -DPAGEBUF_LOCK_TRACKING
+endif
+ifeq ($(CONFIG_XFS_TRACE),y)
+       EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
+       EXTRA_CFLAGS += -DXFS_ATTR_TRACE
+       EXTRA_CFLAGS += -DXFS_BLI_TRACE
+       EXTRA_CFLAGS += -DXFS_BMAP_TRACE
+       EXTRA_CFLAGS += -DXFS_BMBT_TRACE
+       EXTRA_CFLAGS += -DXFS_DIR_TRACE
+       EXTRA_CFLAGS += -DXFS_DIR2_TRACE
+       EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
+       EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
+       EXTRA_CFLAGS += -DXFS_LOG_TRACE
+       EXTRA_CFLAGS += -DXFS_RW_TRACE
+       EXTRA_CFLAGS += -DPAGEBUF_TRACE
+       EXTRA_CFLAGS += -DXFS_VNODE_TRACE
+endif
+
+obj-$(CONFIG_XFS_FS)           += xfs.o
+obj-$(CONFIG_XFS_QUOTA)                += quota/
+
+xfs-$(CONFIG_XFS_RT)           += xfs_rtalloc.o
+xfs-$(CONFIG_XFS_POSIX_ACL)    += xfs_acl.o
+xfs-$(CONFIG_PROC_FS)          += $(XFS_LINUX)/xfs_stats.o
+xfs-$(CONFIG_SYSCTL)           += $(XFS_LINUX)/xfs_sysctl.o
+xfs-$(CONFIG_COMPAT)           += $(XFS_LINUX)/xfs_ioctl32.o
+xfs-$(CONFIG_XFS_EXPORT)       += $(XFS_LINUX)/xfs_export.o
+
+
+xfs-y                          += xfs_alloc.o \
+                                  xfs_alloc_btree.o \
+                                  xfs_attr.o \
+                                  xfs_attr_leaf.o \
+                                  xfs_behavior.o \
+                                  xfs_bit.o \
+                                  xfs_bmap.o \
+                                  xfs_bmap_btree.o \
+                                  xfs_btree.o \
+                                  xfs_buf_item.o \
+                                  xfs_da_btree.o \
+                                  xfs_dir.o \
+                                  xfs_dir2.o \
+                                  xfs_dir2_block.o \
+                                  xfs_dir2_data.o \
+                                  xfs_dir2_leaf.o \
+                                  xfs_dir2_node.o \
+                                  xfs_dir2_sf.o \
+                                  xfs_dir_leaf.o \
+                                  xfs_error.o \
+                                  xfs_extfree_item.o \
+                                  xfs_fsops.o \
+                                  xfs_ialloc.o \
+                                  xfs_ialloc_btree.o \
+                                  xfs_iget.o \
+                                  xfs_inode.o \
+                                  xfs_inode_item.o \
+                                  xfs_iocore.o \
+                                  xfs_iomap.o \
+                                  xfs_itable.o \
+                                  xfs_dfrag.o \
+                                  xfs_log.o \
+                                  xfs_log_recover.o \
+                                  xfs_macros.o \
+                                  xfs_mount.o \
+                                  xfs_rename.o \
+                                  xfs_trans.o \
+                                  xfs_trans_ail.o \
+                                  xfs_trans_buf.o \
+                                  xfs_trans_extfree.o \
+                                  xfs_trans_inode.o \
+                                  xfs_trans_item.o \
+                                  xfs_utils.o \
+                                  xfs_vfsops.o \
+                                  xfs_vnodeops.o \
+                                  xfs_rw.o \
+                                  xfs_dmops.o \
+                                  xfs_qmops.o
+
+xfs-$(CONFIG_XFS_TRACE)                += xfs_dir2_trace.o
+
+# Objects in linux/
+xfs-y                          += $(addprefix $(XFS_LINUX)/, \
+                                  kmem.o \
+                                  xfs_aops.o \
+                                  xfs_buf.o \
+                                  xfs_file.o \
+                                  xfs_fs_subr.o \
+                                  xfs_globals.o \
+                                  xfs_ioctl.o \
+                                  xfs_iops.o \
+                                  xfs_lrw.o \
+                                  xfs_super.o \
+                                  xfs_vfs.o \
+                                  xfs_vnode.o)
+
+# Objects in support/
+xfs-y                          += $(addprefix support/, \
+                                  debug.o \
+                                  move.o \
+                                  uuid.o)
+
+xfs-$(CONFIG_XFS_TRACE)                += support/ktrace.o
+
index 364ea8c..4b18455 100644 (file)
 
 
 void *
-kmem_alloc(size_t size, int flags)
+kmem_alloc(size_t size, unsigned int __nocast flags)
 {
-       int     retries = 0;
-       int     lflags = kmem_flags_convert(flags);
-       void    *ptr;
+       int             retries = 0;
+       unsigned int    lflags = kmem_flags_convert(flags);
+       void            *ptr;
 
        do {
                if (size < MAX_SLAB_SIZE || retries > MAX_VMALLOCS)
@@ -67,7 +67,7 @@ kmem_alloc(size_t size, int flags)
 }
 
 void *
-kmem_zalloc(size_t size, int flags)
+kmem_zalloc(size_t size, unsigned int __nocast flags)
 {
        void    *ptr;
 
@@ -89,7 +89,8 @@ kmem_free(void *ptr, size_t size)
 }
 
 void *
-kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
+kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
+            unsigned int __nocast flags)
 {
        void    *new;
 
@@ -104,11 +105,11 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, int flags)
 }
 
 void *
-kmem_zone_alloc(kmem_zone_t *zone, int flags)
+kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 {
-       int     retries = 0;
-       int     lflags = kmem_flags_convert(flags);
-       void    *ptr;
+       int             retries = 0;
+       unsigned int    lflags = kmem_flags_convert(flags);
+       void            *ptr;
 
        do {
                ptr = kmem_cache_alloc(zone, lflags);
@@ -123,7 +124,7 @@ kmem_zone_alloc(kmem_zone_t *zone, int flags)
 }
 
 void *
-kmem_zone_zalloc(kmem_zone_t *zone, int flags)
+kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
 {
        void    *ptr;
 
index 1397b66..109fcf2 100644 (file)
 /*
  * memory management routines
  */
-#define KM_SLEEP       0x0001
-#define KM_NOSLEEP     0x0002
-#define KM_NOFS                0x0004
-#define KM_MAYFAIL     0x0008
+#define KM_SLEEP       0x0001u
+#define KM_NOSLEEP     0x0002u
+#define KM_NOFS                0x0004u
+#define KM_MAYFAIL     0x0008u
 
 #define        kmem_zone       kmem_cache_s
 #define kmem_zone_t    kmem_cache_t
@@ -81,9 +81,9 @@ typedef unsigned long xfs_pflags_t;
        *(NSTATEP) = *(OSTATEP);        \
 } while (0)
 
-static __inline unsigned int kmem_flags_convert(int flags)
+static __inline unsigned int kmem_flags_convert(unsigned int __nocast flags)
 {
-       int     lflags = __GFP_NOWARN;  /* we'll report problems, if need be */
+       unsigned int    lflags = __GFP_NOWARN;  /* we'll report problems, if need be */
 
 #ifdef DEBUG
        if (unlikely(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL))) {
@@ -125,12 +125,13 @@ kmem_zone_destroy(kmem_zone_t *zone)
                BUG();
 }
 
-extern void        *kmem_zone_zalloc(kmem_zone_t *, int);
-extern void        *kmem_zone_alloc(kmem_zone_t *, int);
+extern void        *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
+extern void        *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
 
-extern void        *kmem_alloc(size_t, int);
-extern void        *kmem_realloc(void *, size_t, size_t, int);
-extern void        *kmem_zalloc(size_t, int);
+extern void        *kmem_alloc(size_t, unsigned int __nocast);
+extern void        *kmem_realloc(void *, size_t, size_t,
+                                 unsigned int __nocast);
+extern void        *kmem_zalloc(size_t, unsigned int __nocast);
 extern void         kmem_free(void *, size_t);
 
 typedef struct shrinker *kmem_shaker_t;
index bcf60a0..0039504 100644 (file)
@@ -45,6 +45,9 @@
 typedef spinlock_t lock_t;
 
 #define SPLDECL(s)                     unsigned long s
+#ifndef DEFINE_SPINLOCK
+#define DEFINE_SPINLOCK(s)             spinlock_t s = SPIN_LOCK_UNLOCKED
+#endif
 
 #define spinlock_init(lock, name)      spin_lock_init(lock)
 #define        spinlock_destroy(lock)
index a3a4b5a..c6c0779 100644 (file)
@@ -104,66 +104,114 @@ xfs_page_trace(
 #define xfs_page_trace(tag, inode, page, mask)
 #endif
 
-void
-linvfs_unwritten_done(
-       struct buffer_head      *bh,
-       int                     uptodate)
+/*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend.
+ */
+STATIC void
+xfs_finish_ioend(
+       xfs_ioend_t             *ioend)
 {
-       xfs_buf_t               *pb = (xfs_buf_t *)bh->b_private;
+       if (atomic_dec_and_test(&ioend->io_remaining))
+               queue_work(xfsdatad_workqueue, &ioend->io_work);
+}
 
-       ASSERT(buffer_unwritten(bh));
-       bh->b_end_io = NULL;
-       clear_buffer_unwritten(bh);
-       if (!uptodate)
-               pagebuf_ioerror(pb, EIO);
-       if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
-               pagebuf_iodone(pb, 1, 1);
-       }
-       end_buffer_async_write(bh, uptodate);
+STATIC void
+xfs_destroy_ioend(
+       xfs_ioend_t             *ioend)
+{
+       vn_iowake(ioend->io_vnode);
+       mempool_free(ioend, xfs_ioend_pool);
 }
 
 /*
  * Issue transactions to convert a buffer range from unwritten
- * to written extents (buffered IO).
+ * to written extents.
  */
 STATIC void
-linvfs_unwritten_convert(
-       xfs_buf_t       *bp)
+xfs_end_bio_unwritten(
+       void                    *data)
 {
-       vnode_t         *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
-       int             error;
+       xfs_ioend_t             *ioend = data;
+       vnode_t                 *vp = ioend->io_vnode;
+       xfs_off_t               offset = ioend->io_offset;
+       size_t                  size = ioend->io_size;
+       struct buffer_head      *bh, *next;
+       int                     error;
+
+       if (ioend->io_uptodate)
+               VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
+
+       /* ioend->io_buffer_head is only non-NULL for buffered I/O */
+       for (bh = ioend->io_buffer_head; bh; bh = next) {
+               next = bh->b_private;
+
+               bh->b_end_io = NULL;
+               clear_buffer_unwritten(bh);
+               end_buffer_async_write(bh, ioend->io_uptodate);
+       }
 
-       BUG_ON(atomic_read(&bp->pb_hold) < 1);
-       VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
-                       BMAPI_UNWRITTEN, NULL, NULL, error);
-       XFS_BUF_SET_FSPRIVATE(bp, NULL);
-       XFS_BUF_CLR_IODONE_FUNC(bp);
-       XFS_BUF_UNDATAIO(bp);
-       iput(LINVFS_GET_IP(vp));
-       pagebuf_iodone(bp, 0, 0);
+       xfs_destroy_ioend(ioend);
 }
 
 /*
- * Issue transactions to convert a buffer range from unwritten
- * to written extents (direct IO).
+ * Allocate and initialise an IO completion structure.
+ * We need to track unwritten extent write completion here initially.
+ * We'll need to extend this for updating the ondisk inode size later
+ * (vs. incore size).
  */
-STATIC void
-linvfs_unwritten_convert_direct(
-       struct kiocb    *iocb,
-       loff_t          offset,
-       ssize_t         size,
-       void            *private)
+STATIC xfs_ioend_t *
+xfs_alloc_ioend(
+       struct inode            *inode)
 {
-       struct inode    *inode = iocb->ki_filp->f_dentry->d_inode;
-       ASSERT(!private || inode == (struct inode *)private);
+       xfs_ioend_t             *ioend;
 
-       /* private indicates an unwritten extent lay beneath this IO */
-       if (private && size > 0) {
-               vnode_t *vp = LINVFS_GET_VP(inode);
-               int     error;
+       ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
 
-               VOP_BMAP(vp, offset, size, BMAPI_UNWRITTEN, NULL, NULL, error);
-       }
+       /*
+        * Set the count to 1 initially, which will prevent an I/O
+        * completion callback from happening before we have started
+        * all the I/O from calling the completion routine too early.
+        */
+       atomic_set(&ioend->io_remaining, 1);
+       ioend->io_uptodate = 1; /* cleared if any I/O fails */
+       ioend->io_vnode = LINVFS_GET_VP(inode);
+       ioend->io_buffer_head = NULL;
+       atomic_inc(&ioend->io_vnode->v_iocount);
+       ioend->io_offset = 0;
+       ioend->io_size = 0;
+
+       INIT_WORK(&ioend->io_work, xfs_end_bio_unwritten, ioend);
+
+       return ioend;
+}
+
+void
+linvfs_unwritten_done(
+       struct buffer_head      *bh,
+       int                     uptodate)
+{
+       xfs_ioend_t             *ioend = bh->b_private;
+       static spinlock_t       unwritten_done_lock = SPIN_LOCK_UNLOCKED;
+       unsigned long           flags;
+
+       ASSERT(buffer_unwritten(bh));
+       bh->b_end_io = NULL;
+
+       if (!uptodate)
+               ioend->io_uptodate = 0;
+
+       /*
+        * Deep magic here.  We reuse b_private in the buffer_heads to build
+        * a chain for completing the I/O from user context after we've issued
+        * a transaction to convert the unwritten extent.
+        */
+       spin_lock_irqsave(&unwritten_done_lock, flags);
+       bh->b_private = ioend->io_buffer_head;
+       ioend->io_buffer_head = bh;
+       spin_unlock_irqrestore(&unwritten_done_lock, flags);
+
+       xfs_finish_ioend(ioend);
 }
 
 STATIC int
@@ -255,7 +303,7 @@ xfs_probe_unwritten_page(
        struct address_space    *mapping,
        pgoff_t                 index,
        xfs_iomap_t             *iomapp,
-       xfs_buf_t               *pb,
+       xfs_ioend_t             *ioend,
        unsigned long           max_offset,
        unsigned long           *fsbs,
        unsigned int            bbits)
@@ -283,7 +331,7 @@ xfs_probe_unwritten_page(
                                break;
                        xfs_map_at_offset(page, bh, p_offset, bbits, iomapp);
                        set_buffer_unwritten_io(bh);
-                       bh->b_private = pb;
+                       bh->b_private = ioend;
                        p_offset += bh->b_size;
                        (*fsbs)++;
                } while ((bh = bh->b_this_page) != head);
@@ -434,34 +482,15 @@ xfs_map_unwritten(
 {
        struct buffer_head      *bh = curr;
        xfs_iomap_t             *tmp;
-       xfs_buf_t               *pb;
-       loff_t                  offset, size;
+       xfs_ioend_t             *ioend;
+       loff_t                  offset;
        unsigned long           nblocks = 0;
 
        offset = start_page->index;
        offset <<= PAGE_CACHE_SHIFT;
        offset += p_offset;
 
-       /* get an "empty" pagebuf to manage IO completion
-        * Proper values will be set before returning */
-       pb = pagebuf_lookup(iomapp->iomap_target, 0, 0, 0);
-       if (!pb)
-               return -EAGAIN;
-
-       /* Take a reference to the inode to prevent it from
-        * being reclaimed while we have outstanding unwritten
-        * extent IO on it.
-        */
-       if ((igrab(inode)) != inode) {
-               pagebuf_free(pb);
-               return -EAGAIN;
-       }
-
-       /* Set the count to 1 initially, this will stop an I/O
-        * completion callout which happens before we have started
-        * all the I/O from calling pagebuf_iodone too early.
-        */
-       atomic_set(&pb->pb_io_remaining, 1);
+       ioend = xfs_alloc_ioend(inode);
 
        /* First map forwards in the page consecutive buffers
         * covering this unwritten extent
@@ -474,12 +503,12 @@ xfs_map_unwritten(
                        break;
                xfs_map_at_offset(start_page, bh, p_offset, block_bits, iomapp);
                set_buffer_unwritten_io(bh);
-               bh->b_private = pb;
+               bh->b_private = ioend;
                p_offset += bh->b_size;
                nblocks++;
        } while ((bh = bh->b_this_page) != head);
 
-       atomic_add(nblocks, &pb->pb_io_remaining);
+       atomic_add(nblocks, &ioend->io_remaining);
 
        /* If we reached the end of the page, map forwards in any
         * following pages which are also covered by this extent.
@@ -496,13 +525,13 @@ xfs_map_unwritten(
                tloff = min(tlast, tloff);
                for (tindex = start_page->index + 1; tindex < tloff; tindex++) {
                        page = xfs_probe_unwritten_page(mapping,
-                                               tindex, iomapp, pb,
+                                               tindex, iomapp, ioend,
                                                PAGE_CACHE_SIZE, &bs, bbits);
                        if (!page)
                                break;
                        nblocks += bs;
-                       atomic_add(bs, &pb->pb_io_remaining);
-                       xfs_convert_page(inode, page, iomapp, wbc, pb,
+                       atomic_add(bs, &ioend->io_remaining);
+                       xfs_convert_page(inode, page, iomapp, wbc, ioend,
                                                        startio, all_bh);
                        /* stop if converting the next page might add
                         * enough blocks that the corresponding byte
@@ -514,12 +543,12 @@ xfs_map_unwritten(
                if (tindex == tlast &&
                    (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
                        page = xfs_probe_unwritten_page(mapping,
-                                                       tindex, iomapp, pb,
+                                                       tindex, iomapp, ioend,
                                                        pg_offset, &bs, bbits);
                        if (page) {
                                nblocks += bs;
-                               atomic_add(bs, &pb->pb_io_remaining);
-                               xfs_convert_page(inode, page, iomapp, wbc, pb,
+                               atomic_add(bs, &ioend->io_remaining);
+                               xfs_convert_page(inode, page, iomapp, wbc, ioend,
                                                        startio, all_bh);
                                if (nblocks >= ((ULONG_MAX - PAGE_SIZE) >> block_bits))
                                        goto enough;
@@ -528,21 +557,9 @@ xfs_map_unwritten(
        }
 
 enough:
-       size = nblocks;         /* NB: using 64bit number here */
-       size <<= block_bits;    /* convert fsb's to byte range */
-
-       XFS_BUF_DATAIO(pb);
-       XFS_BUF_ASYNC(pb);
-       XFS_BUF_SET_SIZE(pb, size);
-       XFS_BUF_SET_COUNT(pb, size);
-       XFS_BUF_SET_OFFSET(pb, offset);
-       XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
-       XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
-
-       if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
-               pagebuf_iodone(pb, 1, 1);
-       }
-
+       ioend->io_size = (xfs_off_t)nblocks << block_bits;
+       ioend->io_offset = offset;
+       xfs_finish_ioend(ioend);
        return 0;
 }
 
@@ -787,7 +804,7 @@ xfs_page_state_convert(
                                continue;
                        if (!iomp) {
                                err = xfs_map_blocks(inode, offset, len, &iomap,
-                                               BMAPI_READ|BMAPI_IGNSTATE);
+                                               BMAPI_WRITE|BMAPI_IGNSTATE);
                                if (err) {
                                        goto error;
                                }
@@ -1028,6 +1045,44 @@ linvfs_get_blocks_direct(
                                        create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
+STATIC void
+linvfs_end_io_direct(
+       struct kiocb    *iocb,
+       loff_t          offset,
+       ssize_t         size,
+       void            *private)
+{
+       xfs_ioend_t     *ioend = iocb->private;
+
+       /*
+        * Non-NULL private data means we need to issue a transaction to
+        * convert a range from unwritten to written extents.  This needs
+        * to happen from process contect but aio+dio I/O completion
+        * happens from irq context so we need to defer it to a workqueue.
+        * This is not nessecary for synchronous direct I/O, but we do
+        * it anyway to keep the code uniform and simpler.
+        *
+        * The core direct I/O code might be changed to always call the
+        * completion handler in the future, in which case all this can
+        * go away.
+        */
+       if (private && size > 0) {
+               ioend->io_offset = offset;
+               ioend->io_size = size;
+               xfs_finish_ioend(ioend);
+       } else {
+               ASSERT(size >= 0);
+               xfs_destroy_ioend(ioend);
+       }
+
+       /*
+        * blockdev_direct_IO can return an error even afer the I/O
+        * completion handler was called.  Thus we need to protect
+        * against double-freeing.
+        */
+       iocb->private = NULL;
+}
+
 STATIC ssize_t
 linvfs_direct_IO(
        int                     rw,
@@ -1042,16 +1097,23 @@ linvfs_direct_IO(
        xfs_iomap_t     iomap;
        int             maps = 1;
        int             error;
+       ssize_t         ret;
 
        VOP_BMAP(vp, offset, 0, BMAPI_DEVICE, &iomap, &maps, error);
        if (error)
                return -error;
 
-       return blockdev_direct_IO_own_locking(rw, iocb, inode,
+       iocb->private = xfs_alloc_ioend(inode);
+
+       ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
                iomap.iomap_target->pbr_bdev,
                iov, offset, nr_segs,
                linvfs_get_blocks_direct,
-               linvfs_unwritten_convert_direct);
+               linvfs_end_io_direct);
+
+       if (unlikely(ret <= 0 && iocb->private))
+               xfs_destroy_ioend(iocb->private);
+       return ret;
 }
 
 
@@ -1202,6 +1264,16 @@ out_unlock:
        return error;
 }
 
+STATIC int
+linvfs_invalidate_page(
+       struct page             *page,
+       unsigned long           offset)
+{
+       xfs_page_trace(XFS_INVALIDPAGE_ENTER,
+                       page->mapping->host, page, offset);
+       return block_invalidatepage(page, offset);
+}
+
 /*
  * Called to move a page into cleanable state - and from there
  * to be released. Possibly the page is already clean. We always
@@ -1279,6 +1351,7 @@ struct address_space_operations linvfs_aops = {
        .writepage              = linvfs_writepage,
        .sync_page              = block_sync_page,
        .releasepage            = linvfs_release_page,
+       .invalidatepage         = linvfs_invalidate_page,
        .prepare_write          = linvfs_prepare_write,
        .commit_write           = generic_commit_write,
        .bmap                   = linvfs_bmap,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
new file mode 100644 (file)
index 0000000..2fa6297
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2005 Silicon Graphics, Inc.  All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like.  Any license provided herein, whether implied or
+ * otherwise, applies only to this software file.  Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA  94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+#ifndef __XFS_AOPS_H__
+#define __XFS_AOPS_H__
+
+extern struct workqueue_struct *xfsdatad_workqueue;
+extern mempool_t *xfs_ioend_pool;
+
+typedef void (*xfs_ioend_func_t)(void *);
+
+typedef struct xfs_ioend {
+       unsigned int            io_uptodate;    /* I/O status register */
+       atomic_t                io_remaining;   /* hold count */
+       struct vnode            *io_vnode;      /* file being written to */
+       struct buffer_head      *io_buffer_head;/* buffer linked list head */
+       size_t                  io_size;        /* size of the extent */
+       xfs_off_t               io_offset;      /* offset in the file */
+       struct work_struct      io_work;        /* xfsdatad work queue */
+} xfs_ioend_t;
+
+#endif /* __XFS_IOPS_H__ */
index df0cba2..655bf4a 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -54,6 +54,7 @@
 #include <linux/percpu.h>
 #include <linux/blkdev.h>
 #include <linux/hash.h>
+#include <linux/kthread.h>
 
 #include "xfs_linux.h"
 
@@ -67,7 +68,7 @@ STATIC int xfsbufd_wakeup(int, unsigned int);
 STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
 
 STATIC struct workqueue_struct *xfslogd_workqueue;
-STATIC struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsdatad_workqueue;
 
 /*
  * Pagebuf debugging
@@ -590,8 +591,10 @@ found:
                PB_SET_OWNER(pb);
        }
 
-       if (pb->pb_flags & PBF_STALE)
+       if (pb->pb_flags & PBF_STALE) {
+               ASSERT((pb->pb_flags & _PBF_DELWRI_Q) == 0);
                pb->pb_flags &= PBF_MAPPED;
+       }
        PB_TRACE(pb, "got_lock", 0);
        XFS_STATS_INC(pb_get_locked);
        return (pb);
@@ -699,25 +702,6 @@ xfs_buf_read_flags(
        return NULL;
 }
 
-/*
- * Create a skeletal pagebuf (no pages associated with it).
- */
-xfs_buf_t *
-pagebuf_lookup(
-       xfs_buftarg_t           *target,
-       loff_t                  ioff,
-       size_t                  isize,
-       page_buf_flags_t        flags)
-{
-       xfs_buf_t               *pb;
-
-       pb = pagebuf_allocate(flags);
-       if (pb) {
-               _pagebuf_initialize(pb, target, ioff, isize, flags);
-       }
-       return pb;
-}
-
 /*
  * If we are not low on memory then do the readahead in a deadlock
  * safe manner.
@@ -913,22 +897,23 @@ pagebuf_rele(
                        do_free = 0;
                }
 
-               if (pb->pb_flags & PBF_DELWRI) {
-                       pb->pb_flags |= PBF_ASYNC;
-                       atomic_inc(&pb->pb_hold);
-                       pagebuf_delwri_queue(pb, 0);
-                       do_free = 0;
-               } else if (pb->pb_flags & PBF_FS_MANAGED) {
+               if (pb->pb_flags & PBF_FS_MANAGED) {
                        do_free = 0;
                }
 
                if (do_free) {
+                       ASSERT((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == 0);
                        list_del_init(&pb->pb_hash_list);
                        spin_unlock(&hash->bh_lock);
                        pagebuf_free(pb);
                } else {
                        spin_unlock(&hash->bh_lock);
                }
+       } else {
+               /*
+                * Catch reference count leaks
+                */
+               ASSERT(atomic_read(&pb->pb_hold) >= 0);
        }
 }
 
@@ -1006,13 +991,24 @@ pagebuf_lock(
  *     pagebuf_unlock
  *
  *     pagebuf_unlock releases the lock on the buffer object created by
- *     pagebuf_lock or pagebuf_cond_lock (not any
- *     pinning of underlying pages created by pagebuf_pin).
+ *     pagebuf_lock or pagebuf_cond_lock (not any pinning of underlying pages
+ *     created by pagebuf_pin).
+ *
+ *     If the buffer is marked delwri but is not queued, do so before we
+ *     unlock the buffer as we need to set flags correctly. We also need to
+ *     take a reference for the delwri queue because the unlocker is going to
+ *     drop their's and they don't know we just queued it.
  */
 void
 pagebuf_unlock(                                /* unlock buffer                */
        xfs_buf_t               *pb)    /* buffer to unlock             */
 {
+       if ((pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q)) == PBF_DELWRI) {
+               atomic_inc(&pb->pb_hold);
+               pb->pb_flags |= PBF_ASYNC;
+               pagebuf_delwri_queue(pb, 0);
+       }
+
        PB_CLEAR_OWNER(pb);
        up(&pb->pb_sema);
        PB_TRACE(pb, "unlock", 0);
@@ -1249,8 +1245,8 @@ bio_end_io_pagebuf(
        int                     error)
 {
        xfs_buf_t               *pb = (xfs_buf_t *)bio->bi_private;
-       unsigned int            i, blocksize = pb->pb_target->pbr_bsize;
-       struct bio_vec          *bvec = bio->bi_io_vec;
+       unsigned int            blocksize = pb->pb_target->pbr_bsize;
+       struct bio_vec          *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
 
        if (bio->bi_size)
                return 1;
@@ -1258,10 +1254,12 @@ bio_end_io_pagebuf(
        if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
                pb->pb_error = EIO;
 
-       for (i = 0; i < bio->bi_vcnt; i++, bvec++) {
+       do {
                struct page     *page = bvec->bv_page;
 
-               if (pb->pb_error) {
+               if (unlikely(pb->pb_error)) {
+                       if (pb->pb_flags & PBF_READ)
+                               ClearPageUptodate(page);
                        SetPageError(page);
                } else if (blocksize == PAGE_CACHE_SIZE) {
                        SetPageUptodate(page);
@@ -1270,10 +1268,13 @@ bio_end_io_pagebuf(
                        set_page_region(page, bvec->bv_offset, bvec->bv_len);
                }
 
+               if (--bvec >= bio->bi_io_vec)
+                       prefetchw(&bvec->bv_page->flags);
+
                if (_pagebuf_iolocked(pb)) {
                        unlock_page(page);
                }
-       }
+       } while (bvec >= bio->bi_io_vec);
 
        _pagebuf_iodone(pb, 1);
        bio_put(bio);
@@ -1511,6 +1512,11 @@ again:
                        ASSERT(btp == bp->pb_target);
                        if (!(bp->pb_flags & PBF_FS_MANAGED)) {
                                spin_unlock(&hash->bh_lock);
+                               /*
+                                * Catch superblock reference count leaks
+                                * immediately
+                                */
+                               BUG_ON(bp->pb_bn == 0);
                                delay(100);
                                goto again;
                        }
@@ -1686,17 +1692,20 @@ pagebuf_delwri_queue(
        int                     unlock)
 {
        PB_TRACE(pb, "delwri_q", (long)unlock);
-       ASSERT(pb->pb_flags & PBF_DELWRI);
+       ASSERT((pb->pb_flags & (PBF_DELWRI|PBF_ASYNC)) ==
+                                       (PBF_DELWRI|PBF_ASYNC));
 
        spin_lock(&pbd_delwrite_lock);
        /* If already in the queue, dequeue and place at tail */
        if (!list_empty(&pb->pb_list)) {
+               ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
                if (unlock) {
                        atomic_dec(&pb->pb_hold);
                }
                list_del(&pb->pb_list);
        }
 
+       pb->pb_flags |= _PBF_DELWRI_Q;
        list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
        pb->pb_queuetime = jiffies;
        spin_unlock(&pbd_delwrite_lock);
@@ -1713,10 +1722,11 @@ pagebuf_delwri_dequeue(
 
        spin_lock(&pbd_delwrite_lock);
        if ((pb->pb_flags & PBF_DELWRI) && !list_empty(&pb->pb_list)) {
+               ASSERT(pb->pb_flags & _PBF_DELWRI_Q);
                list_del_init(&pb->pb_list);
                dequeued = 1;
        }
-       pb->pb_flags &= ~PBF_DELWRI;
+       pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
        spin_unlock(&pbd_delwrite_lock);
 
        if (dequeued)
@@ -1733,9 +1743,7 @@ pagebuf_runall_queues(
 }
 
 /* Defines for pagebuf daemon */
-STATIC DECLARE_COMPLETION(xfsbufd_done);
 STATIC struct task_struct *xfsbufd_task;
-STATIC int xfsbufd_active;
 STATIC int xfsbufd_force_flush;
 STATIC int xfsbufd_force_sleep;
 
@@ -1761,14 +1769,8 @@ xfsbufd(
        xfs_buftarg_t           *target;
        xfs_buf_t               *pb, *n;
 
-       /*  Set up the thread  */
-       daemonize("xfsbufd");
        current->flags |= PF_MEMALLOC;
 
-       xfsbufd_task = current;
-       xfsbufd_active = 1;
-       barrier();
-
        INIT_LIST_HEAD(&tmp);
        do {
                if (unlikely(freezing(current))) {
@@ -1795,7 +1797,7 @@ xfsbufd(
                                        break;
                                }
 
-                               pb->pb_flags &= ~PBF_DELWRI;
+                               pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
                                pb->pb_flags |= PBF_WRITE;
                                list_move(&pb->pb_list, &tmp);
                        }
@@ -1816,9 +1818,9 @@ xfsbufd(
                        purge_addresses();
 
                xfsbufd_force_flush = 0;
-       } while (xfsbufd_active);
+       } while (!kthread_should_stop());
 
-       complete_and_exit(&xfsbufd_done, 0);
+       return 0;
 }
 
 /*
@@ -1845,15 +1847,13 @@ xfs_flush_buftarg(
                if (pb->pb_target != target)
                        continue;
 
-               ASSERT(pb->pb_flags & PBF_DELWRI);
+               ASSERT(pb->pb_flags & (PBF_DELWRI|_PBF_DELWRI_Q));
                PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
                if (pagebuf_ispin(pb)) {
                        pincount++;
                        continue;
                }
 
-               pb->pb_flags &= ~PBF_DELWRI;
-               pb->pb_flags |= PBF_WRITE;
                list_move(&pb->pb_list, &tmp);
        }
        spin_unlock(&pbd_delwrite_lock);
@@ -1862,12 +1862,14 @@ xfs_flush_buftarg(
         * Dropped the delayed write list lock, now walk the temporary list
         */
        list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+               pagebuf_lock(pb);
+               pb->pb_flags &= ~(PBF_DELWRI|_PBF_DELWRI_Q);
+               pb->pb_flags |= PBF_WRITE;
                if (wait)
                        pb->pb_flags &= ~PBF_ASYNC;
                else
                        list_del_init(&pb->pb_list);
 
-               pagebuf_lock(pb);
                pagebuf_iostrategy(pb);
        }
 
@@ -1901,9 +1903,11 @@ xfs_buf_daemons_start(void)
        if (!xfsdatad_workqueue)
                goto out_destroy_xfslogd_workqueue;
 
-       error = kernel_thread(xfsbufd, NULL, CLONE_FS|CLONE_FILES);
-       if (error < 0)
+       xfsbufd_task = kthread_run(xfsbufd, NULL, "xfsbufd");
+       if (IS_ERR(xfsbufd_task)) {
+               error = PTR_ERR(xfsbufd_task);
                goto out_destroy_xfsdatad_workqueue;
+       }
        return 0;
 
  out_destroy_xfsdatad_workqueue:
@@ -1920,10 +1924,7 @@ xfs_buf_daemons_start(void)
 STATIC void
 xfs_buf_daemons_stop(void)
 {
-       xfsbufd_active = 0;
-       barrier();
-       wait_for_completion(&xfsbufd_done);
-
+       kthread_stop(xfsbufd_task);
        destroy_workqueue(xfslogd_workqueue);
        destroy_workqueue(xfsdatad_workqueue);
 }
index 3f8f69a..67c19f7 100644 (file)
@@ -89,6 +89,7 @@ typedef enum page_buf_flags_e {               /* pb_flags values */
        _PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache                 */
        _PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()              */
        _PBF_RUN_QUEUES = (1 << 19),/* run block device task queue         */
+       _PBF_DELWRI_Q = (1 << 21),   /* buffer on delwri queue             */
 } page_buf_flags_t;
 
 #define PBF_UPDATE (PBF_READ | PBF_WRITE)
@@ -206,13 +207,6 @@ extern xfs_buf_t *xfs_buf_read_flags(      /* allocate and read a buffer   */
 #define xfs_buf_read(target, blkno, len, flags) \
        xfs_buf_read_flags((target), (blkno), (len), PBF_LOCK | PBF_MAPPED)
 
-extern xfs_buf_t *pagebuf_lookup(
-               xfs_buftarg_t *,
-               loff_t,                 /* starting offset of range     */
-               size_t,                 /* length of range              */
-               page_buf_flags_t);      /* PBF_READ, PBF_WRITE,         */
-                                       /* PBF_FORCEIO,                 */
-
 extern xfs_buf_t *pagebuf_get_empty(   /* allocate pagebuf struct with */
                                        /*  no memory or disk address   */
                size_t len,
@@ -344,8 +338,6 @@ extern void pagebuf_trace(
 
 
 
-
-
 /* These are just for xfs_syncsub... it sets an internal variable
  * then passes it to VOP_FLUSH_PAGES or adds the flags to a newly gotten buf_t
  */
@@ -452,7 +444,7 @@ extern void pagebuf_trace(
 
 #define XFS_BUF_PTR(bp)                (xfs_caddr_t)((bp)->pb_addr)
 
-extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
+static inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
 {
        if (bp->pb_flags & PBF_MAPPED)
                return XFS_BUF_PTR(bp) + offset;
index f1ce432..3881622 100644 (file)
@@ -311,6 +311,31 @@ linvfs_fsync(
 
 #define nextdp(dp)      ((struct xfs_dirent *)((char *)(dp) + (dp)->d_reclen))
 
+#ifdef CONFIG_XFS_DMAPI
+
+STATIC struct page *
+linvfs_filemap_nopage(
+       struct vm_area_struct   *area,
+       unsigned long           address,
+       int                     *type)
+{
+       struct inode    *inode = area->vm_file->f_dentry->d_inode;
+       vnode_t         *vp = LINVFS_GET_VP(inode);
+       xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
+       int             error;
+
+       ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
+
+       error = XFS_SEND_MMAP(mp, area, 0);
+       if (error)
+               return NULL;
+
+       return filemap_nopage(area, address, type);
+}
+
+#endif /* CONFIG_XFS_DMAPI */
+
+
 STATIC int
 linvfs_readdir(
        struct file     *filp,
@@ -390,14 +415,6 @@ done:
        return -error;
 }
 
-#ifdef CONFIG_XFS_DMAPI
-STATIC void
-linvfs_mmap_close(
-       struct vm_area_struct   *vma)
-{
-       xfs_dm_mm_put(vma);
-}
-#endif /* CONFIG_XFS_DMAPI */
 
 STATIC int
 linvfs_file_mmap(
@@ -411,16 +428,11 @@ linvfs_file_mmap(
 
        vma->vm_ops = &linvfs_file_vm_ops;
 
-       if (vp->v_vfsp->vfs_flag & VFS_DMI) {
-               xfs_mount_t     *mp = XFS_VFSTOM(vp->v_vfsp);
-
-               error = -XFS_SEND_MMAP(mp, vma, 0);
-               if (error)
-                       return error;
 #ifdef CONFIG_XFS_DMAPI
+       if (vp->v_vfsp->vfs_flag & VFS_DMI) {
                vma->vm_ops = &linvfs_dmapi_file_vm_ops;
-#endif
        }
+#endif /* CONFIG_XFS_DMAPI */
 
        VOP_SETATTR(vp, &va, XFS_AT_UPDATIME, NULL, error);
        if (!error)
@@ -474,6 +486,7 @@ linvfs_ioctl_invis(
        return error;
 }
 
+#ifdef CONFIG_XFS_DMAPI
 #ifdef HAVE_VMOP_MPROTECT
 STATIC int
 linvfs_mprotect(
@@ -494,6 +507,7 @@ linvfs_mprotect(
        return error;
 }
 #endif /* HAVE_VMOP_MPROTECT */
+#endif /* CONFIG_XFS_DMAPI */
 
 #ifdef HAVE_FOP_OPEN_EXEC
 /* If the user is attempting to execute a file that is offline then
@@ -528,49 +542,10 @@ open_exec_out:
 }
 #endif /* HAVE_FOP_OPEN_EXEC */
 
-/*
- * Temporary workaround to the AIO direct IO write problem.
- * This code can go and we can revert to do_sync_write once
- * the writepage(s) rework is merged.
- */
-STATIC ssize_t
-linvfs_write(
-       struct file     *filp,
-       const char      __user *buf,
-       size_t          len,
-       loff_t          *ppos)
-{
-       struct kiocb    kiocb;
-       ssize_t         ret;
-
-       init_sync_kiocb(&kiocb, filp);
-       kiocb.ki_pos = *ppos;
-       ret = __linvfs_write(&kiocb, buf, 0, len, kiocb.ki_pos);
-       *ppos = kiocb.ki_pos;
-       return ret;
-}
-STATIC ssize_t
-linvfs_write_invis(
-       struct file     *filp,
-       const char      __user *buf,
-       size_t          len,
-       loff_t          *ppos)
-{
-       struct kiocb    kiocb;
-       ssize_t         ret;
-
-       init_sync_kiocb(&kiocb, filp);
-       kiocb.ki_pos = *ppos;
-       ret = __linvfs_write(&kiocb, buf, IO_INVIS, len, kiocb.ki_pos);
-       *ppos = kiocb.ki_pos;
-       return ret;
-}
-
-
 struct file_operations linvfs_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
-       .write          = linvfs_write,
+       .write          = do_sync_write,
        .readv          = linvfs_readv,
        .writev         = linvfs_writev,
        .aio_read       = linvfs_aio_read,
@@ -592,7 +567,7 @@ struct file_operations linvfs_file_operations = {
 struct file_operations linvfs_invis_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
-       .write          = linvfs_write_invis,
+       .write          = do_sync_write,
        .readv          = linvfs_readv_invis,
        .writev         = linvfs_writev_invis,
        .aio_read       = linvfs_aio_read_invis,
@@ -626,8 +601,7 @@ static struct vm_operations_struct linvfs_file_vm_ops = {
 
 #ifdef CONFIG_XFS_DMAPI
 static struct vm_operations_struct linvfs_dmapi_file_vm_ops = {
-       .close          = linvfs_mmap_close,
-       .nopage         = filemap_nopage,
+       .nopage         = linvfs_filemap_nopage,
        .populate       = filemap_populate,
 #ifdef HAVE_VMOP_MPROTECT
        .mprotect       = linvfs_mprotect,
index 05a447e..6a3326b 100644 (file)
@@ -141,13 +141,19 @@ xfs_find_handle(
                return -XFS_ERROR(EINVAL);
        }
 
-       /* we need the vnode */
-       vp = LINVFS_GET_VP(inode);
-       if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
+       case S_IFDIR:
+       case S_IFLNK:
+               break;
+       default:
                iput(inode);
                return -XFS_ERROR(EBADF);
        }
 
+       /* we need the vnode */
+       vp = LINVFS_GET_VP(inode);
+
        /* now we can grab the fsid */
        memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t));
        hsize = sizeof(xfs_fsid_t);
@@ -386,7 +392,7 @@ xfs_readlink_by_handle(
                return -error;
 
        /* Restrict this handle operation to symlinks only. */
-       if (vp->v_type != VLNK) {
+       if (!S_ISLNK(inode->i_mode)) {
                VN_RELE(vp);
                return -XFS_ERROR(EINVAL);
        }
@@ -982,10 +988,10 @@ xfs_ioc_space(
        if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
                return -XFS_ERROR(EPERM);
 
-       if (!(filp->f_flags & FMODE_WRITE))
+       if (!(filp->f_mode & FMODE_WRITE))
                return -XFS_ERROR(EBADF);
 
-       if (vp->v_type != VREG)
+       if (!VN_ISREG(vp))
                return -XFS_ERROR(EINVAL);
 
        if (copy_from_user(&bf, arg, sizeof(bf)))
index 0f8f138..4636b7f 100644 (file)
 #include "xfs_vnode.h"
 #include "xfs_dfrag.h"
 
+#define  _NATIVE_IOC(cmd, type) \
+         _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
+
 #if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
 #define BROKEN_X86_ALIGNMENT
+/* on ia32 l_start is on a 32-bit boundary */
+typedef struct xfs_flock64_32 {
+       __s16           l_type;
+       __s16           l_whence;
+       __s64           l_start __attribute__((packed));
+                       /* len == 0 means until end of file */
+       __s64           l_len __attribute__((packed));
+       __s32           l_sysid;
+       __u32           l_pid;
+       __s32           l_pad[4];       /* reserve area */
+} xfs_flock64_32_t;
+
+#define XFS_IOC_ALLOCSP_32     _IOW ('X', 10, struct xfs_flock64_32)
+#define XFS_IOC_FREESP_32      _IOW ('X', 11, struct xfs_flock64_32)
+#define XFS_IOC_ALLOCSP64_32   _IOW ('X', 36, struct xfs_flock64_32)
+#define XFS_IOC_FREESP64_32    _IOW ('X', 37, struct xfs_flock64_32)
+#define XFS_IOC_RESVSP_32      _IOW ('X', 40, struct xfs_flock64_32)
+#define XFS_IOC_UNRESVSP_32    _IOW ('X', 41, struct xfs_flock64_32)
+#define XFS_IOC_RESVSP64_32    _IOW ('X', 42, struct xfs_flock64_32)
+#define XFS_IOC_UNRESVSP64_32  _IOW ('X', 43, struct xfs_flock64_32)
+
+/* just account for different alignment */
+STATIC unsigned long
+xfs_ioctl32_flock(
+       unsigned long           arg)
+{
+       xfs_flock64_32_t        __user *p32 = (void __user *)arg;
+       xfs_flock64_t           __user *p = compat_alloc_user_space(sizeof(*p));
+
+       if (copy_in_user(&p->l_type,    &p32->l_type,   sizeof(s16)) ||
+           copy_in_user(&p->l_whence,  &p32->l_whence, sizeof(s16)) ||
+           copy_in_user(&p->l_start,   &p32->l_start,  sizeof(s64)) ||
+           copy_in_user(&p->l_len,     &p32->l_len,    sizeof(s64)) ||
+           copy_in_user(&p->l_sysid,   &p32->l_sysid,  sizeof(s32)) ||
+           copy_in_user(&p->l_pid,     &p32->l_pid,    sizeof(u32)) ||
+           copy_in_user(&p->l_pad,     &p32->l_pad,    4*sizeof(u32)))
+               return -EFAULT;
+       
+       return (unsigned long)p;
+}
+
 #else
 
 typedef struct xfs_fsop_bulkreq32 {
@@ -103,7 +147,6 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 /* not handled
        case XFS_IOC_FD_TO_HANDLE:
        case XFS_IOC_PATH_TO_HANDLE:
-       case XFS_IOC_PATH_TO_HANDLE:
        case XFS_IOC_PATH_TO_FSHANDLE:
        case XFS_IOC_OPEN_BY_HANDLE:
        case XFS_IOC_FSSETDM_BY_HANDLE:
@@ -124,8 +167,21 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
        case XFS_IOC_ERROR_CLEARALL:
                break;
 
-#ifndef BROKEN_X86_ALIGNMENT
-       /* xfs_flock_t and xfs_bstat_t have wrong u32 vs u64 alignment */
+#ifdef BROKEN_X86_ALIGNMENT
+       /* xfs_flock_t has wrong u32 vs u64 alignment */
+       case XFS_IOC_ALLOCSP_32:
+       case XFS_IOC_FREESP_32:
+       case XFS_IOC_ALLOCSP64_32:
+       case XFS_IOC_FREESP64_32:
+       case XFS_IOC_RESVSP_32:
+       case XFS_IOC_UNRESVSP_32:
+       case XFS_IOC_RESVSP64_32:
+       case XFS_IOC_UNRESVSP64_32:
+               arg = xfs_ioctl32_flock(arg);
+               cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
+               break;
+
+#else /* These are handled fine if no alignment issues */
        case XFS_IOC_ALLOCSP:
        case XFS_IOC_FREESP:
        case XFS_IOC_RESVSP:
@@ -134,6 +190,9 @@ __linvfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
        case XFS_IOC_FREESP64:
        case XFS_IOC_RESVSP64:
        case XFS_IOC_UNRESVSP64:
+               break;
+
+       /* xfs_bstat_t still has wrong u32 vs u64 alignment */
        case XFS_IOC_SWAPEXT:
                break;
 
index f252605..77708a8 100644 (file)
@@ -140,7 +140,6 @@ linvfs_mknod(
 
        memset(&va, 0, sizeof(va));
        va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
-       va.va_type = IFTOVT(mode);
        va.va_mode = mode;
 
        switch (mode & S_IFMT) {
@@ -308,14 +307,13 @@ linvfs_symlink(
        cvp = NULL;
 
        memset(&va, 0, sizeof(va));
-       va.va_type = VLNK;
-       va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO;
+       va.va_mode = S_IFLNK |
+               (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO);
        va.va_mask = XFS_AT_TYPE|XFS_AT_MODE;
 
        error = 0;
        VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error);
        if (!error && cvp) {
-               ASSERT(cvp->v_type == VLNK);
                ip = LINVFS_GET_IP(cvp);
                d_instantiate(dentry, ip);
                validate_fields(dir);
@@ -425,9 +423,14 @@ linvfs_follow_link(
        return NULL;
 }
 
-static void linvfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+STATIC void
+linvfs_put_link(
+       struct dentry   *dentry,
+       struct nameidata *nd,
+       void            *p)
 {
-       char *s = nd_get_link(nd);
+       char            *s = nd_get_link(nd);
+
        if (!IS_ERR(s))
                kfree(s);
 }
index 42dc5e4..68c5d88 100644 (file)
@@ -64,7 +64,6 @@
 #include <sema.h>
 #include <time.h>
 
-#include <support/qsort.h>
 #include <support/ktrace.h>
 #include <support/debug.h>
 #include <support/move.h>
 #include <xfs_stats.h>
 #include <xfs_sysctl.h>
 #include <xfs_iops.h>
+#include <xfs_aops.h>
 #include <xfs_super.h>
 #include <xfs_globals.h>
 #include <xfs_fs_subr.h>
@@ -254,11 +254,18 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define MAX(a,b)       (max(a,b))
 #define howmany(x, y)  (((x)+((y)-1))/(y))
 #define roundup(x, y)  ((((x)+((y)-1))/(y))*(y))
+#define qsort(a,n,s,fn)        sort(a,n,s,fn,NULL)
 
+/*
+ * Various platform dependent calls that don't fit anywhere else
+ */
 #define xfs_stack_trace()      dump_stack()
-
 #define xfs_itruncate_data(ip, off)    \
        (-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+#define xfs_statvfs_fsid(statp, mp)    \
+       ({ u64 id = huge_encode_dev((mp)->m_dev);       \
+          __kernel_fsid_t *fsid = &(statp)->f_fsid;    \
+       (fsid->val[0] = (u32)id, fsid->val[1] = (u32)(id >> 32)); })
 
 
 /* Move the kernel do_div definition off to one side */
@@ -371,6 +378,4 @@ static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
        return(x * y);
 }
 
-#define qsort(a, n, s, cmp) sort(a, n, s, cmp, NULL)
-
 #endif /* __XFS_LINUX__ */
index acab58c..3b5fabe 100644 (file)
@@ -660,9 +660,6 @@ xfs_write(
                        (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
                                mp->m_rtdev_targp : mp->m_ddev_targp;
 
-               if (ioflags & IO_ISAIO)
-                       return XFS_ERROR(-ENOSYS);
-
                if ((pos & target->pbr_smask) || (count & target->pbr_smask))
                        return XFS_ERROR(-EINVAL);
 
index f197a72..6294dcd 100644 (file)
@@ -70,9 +70,10 @@ struct xfs_iomap;
 #define        XFS_SENDFILE_ENTER      21
 #define        XFS_WRITEPAGE_ENTER     22
 #define        XFS_RELEASEPAGE_ENTER   23
-#define        XFS_IOMAP_ALLOC_ENTER   24
-#define        XFS_IOMAP_ALLOC_MAP     25
-#define        XFS_IOMAP_UNWRITTEN     26
+#define        XFS_INVALIDPAGE_ENTER   24
+#define        XFS_IOMAP_ALLOC_ENTER   25
+#define        XFS_IOMAP_ALLOC_MAP     26
+#define        XFS_IOMAP_UNWRITTEN     27
 extern void xfs_rw_enter_trace(int, struct xfs_iocore *,
                                void *, size_t, loff_t, int);
 extern void xfs_inval_cached_trace(struct xfs_iocore *,
index f6dd7de..0da87bf 100644 (file)
 #include <linux/namei.h>
 #include <linux/init.h>
 #include <linux/mount.h>
+#include <linux/mempool.h>
 #include <linux/writeback.h>
+#include <linux/kthread.h>
 
 STATIC struct quotactl_ops linvfs_qops;
 STATIC struct super_operations linvfs_sops;
-STATIC kmem_zone_t *linvfs_inode_zone;
+STATIC kmem_zone_t *xfs_vnode_zone;
+STATIC kmem_zone_t *xfs_ioend_zone;
+mempool_t *xfs_ioend_pool;
 
 STATIC struct xfs_mount_args *
 xfs_args_allocate(
@@ -138,24 +142,25 @@ STATIC __inline__ void
 xfs_set_inodeops(
        struct inode            *inode)
 {
-       vnode_t                 *vp = LINVFS_GET_VP(inode);
-
-       if (vp->v_type == VNON) {
-               vn_mark_bad(vp);
-       } else if (S_ISREG(inode->i_mode)) {
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFREG:
                inode->i_op = &linvfs_file_inode_operations;
                inode->i_fop = &linvfs_file_operations;
                inode->i_mapping->a_ops = &linvfs_aops;
-       } else if (S_ISDIR(inode->i_mode)) {
+               break;
+       case S_IFDIR:
                inode->i_op = &linvfs_dir_inode_operations;
                inode->i_fop = &linvfs_dir_operations;
-       } else if (S_ISLNK(inode->i_mode)) {
+               break;
+       case S_IFLNK:
                inode->i_op = &linvfs_symlink_inode_operations;
                if (inode->i_blocks)
                        inode->i_mapping->a_ops = &linvfs_aops;
-       } else {
+               break;
+       default:
                inode->i_op = &linvfs_file_inode_operations;
                init_special_inode(inode, inode->i_mode, inode->i_rdev);
+               break;
        }
 }
 
@@ -167,16 +172,23 @@ xfs_revalidate_inode(
 {
        struct inode            *inode = LINVFS_GET_IP(vp);
 
-       inode->i_mode   = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type);
+       inode->i_mode   = ip->i_d.di_mode;
        inode->i_nlink  = ip->i_d.di_nlink;
        inode->i_uid    = ip->i_d.di_uid;
        inode->i_gid    = ip->i_d.di_gid;
-       if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+
+       switch (inode->i_mode & S_IFMT) {
+       case S_IFBLK:
+       case S_IFCHR:
+               inode->i_rdev =
+                       MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
+                             sysv_minor(ip->i_df.if_u2.if_rdev));
+               break;
+       default:
                inode->i_rdev = 0;
-       } else {
-               xfs_dev_t dev = ip->i_df.if_u2.if_rdev;
-               inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev));
+               break;
        }
+
        inode->i_blksize = PAGE_CACHE_SIZE;
        inode->i_generation = ip->i_d.di_gen;
        i_size_write(inode, ip->i_d.di_size);
@@ -231,7 +243,6 @@ xfs_initialize_vnode(
         * finish our work.
         */
        if (ip->i_d.di_mode != 0 && unlock && (inode->i_state & I_NEW)) {
-               vp->v_type = IFTOVT(ip->i_d.di_mode);
                xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
                xfs_set_inodeops(inode);
        
@@ -274,8 +285,7 @@ linvfs_alloc_inode(
 {
        vnode_t                 *vp;
 
-       vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_zone, 
-                kmem_flags_convert(KM_SLEEP));
+       vp = kmem_cache_alloc(xfs_vnode_zone, kmem_flags_convert(KM_SLEEP));
        if (!vp)
                return NULL;
        return LINVFS_GET_IP(vp);
@@ -285,11 +295,11 @@ STATIC void
 linvfs_destroy_inode(
        struct inode            *inode)
 {
-       kmem_cache_free(linvfs_inode_zone, LINVFS_GET_VP(inode));
+       kmem_zone_free(xfs_vnode_zone, LINVFS_GET_VP(inode));
 }
 
 STATIC void
-init_once(
+linvfs_inode_init_once(
        void                    *data,
        kmem_cache_t            *cachep,
        unsigned long           flags)
@@ -302,21 +312,41 @@ init_once(
 }
 
 STATIC int
-init_inodecache( void )
+linvfs_init_zones(void)
 {
-       linvfs_inode_zone = kmem_cache_create("linvfs_icache",
+       xfs_vnode_zone = kmem_cache_create("xfs_vnode",
                                sizeof(vnode_t), 0, SLAB_RECLAIM_ACCOUNT,
-                               init_once, NULL);
-       if (linvfs_inode_zone == NULL)
-               return -ENOMEM;
+                               linvfs_inode_init_once, NULL);
+       if (!xfs_vnode_zone)
+               goto out;
+
+       xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
+       if (!xfs_ioend_zone)
+               goto out_destroy_vnode_zone;
+
+       xfs_ioend_pool = mempool_create(4 * MAX_BUF_PER_PAGE,
+                       mempool_alloc_slab, mempool_free_slab,
+                       xfs_ioend_zone);
+       if (!xfs_ioend_pool)
+               goto out_free_ioend_zone;
+
        return 0;
+
+
+ out_free_ioend_zone:
+       kmem_zone_destroy(xfs_ioend_zone);
+ out_destroy_vnode_zone:
+       kmem_zone_destroy(xfs_vnode_zone);
+ out:
+       return -ENOMEM;
 }
 
 STATIC void
-destroy_inodecache( void )
+linvfs_destroy_zones(void)
 {
-       if (kmem_cache_destroy(linvfs_inode_zone))
-               printk(KERN_WARNING "%s: cache still in use!\n", __FUNCTION__);
+       mempool_destroy(xfs_ioend_pool);
+       kmem_zone_destroy(xfs_vnode_zone);
+       kmem_zone_destroy(xfs_ioend_zone);
 }
 
 /*
@@ -354,17 +384,38 @@ linvfs_clear_inode(
        struct inode            *inode)
 {
        vnode_t                 *vp = LINVFS_GET_VP(inode);
+       int                     error, cache;
 
-       if (vp) {
-               vn_rele(vp);
-               vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
-               /*
-                * Do all our cleanup, and remove this vnode.
-                */
-               vn_remove(vp);
+       vn_trace_entry(vp, "clear_inode", (inst_t *)__return_address);
+
+       XFS_STATS_INC(vn_rele);
+       XFS_STATS_INC(vn_remove);
+       XFS_STATS_INC(vn_reclaim);
+       XFS_STATS_DEC(vn_active);
+
+       /*
+        * This can happen because xfs_iget_core calls xfs_idestroy if we
+        * find an inode with di_mode == 0 but without IGET_CREATE set.
+        */
+       if (vp->v_fbhv)
+               VOP_INACTIVE(vp, NULL, cache);
+
+       VN_LOCK(vp);
+       vp->v_flag &= ~VMODIFIED;
+       VN_UNLOCK(vp, 0);
+
+       if (vp->v_fbhv) {
+               VOP_RECLAIM(vp, error);
+               if (error)
+                       panic("vn_purge: cannot reclaim");
        }
-}
 
+       ASSERT(vp->v_fbhv == NULL);
+
+#ifdef XFS_VNODE_TRACE
+       ktrace_free(vp->v_trace);
+#endif
+}
 
 /*
  * Enqueue a work item to be picked up by the vfs xfssyncd thread.
@@ -466,25 +517,16 @@ xfssyncd(
 {
        long                    timeleft;
        vfs_t                   *vfsp = (vfs_t *) arg;
-       struct list_head        tmp;
        struct vfs_sync_work    *work, *n;
+       LIST_HEAD               (tmp);
 
-       daemonize("xfssyncd");
-
-       vfsp->vfs_sync_work.w_vfs = vfsp;
-       vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
-       vfsp->vfs_sync_task = current;
-       wmb();
-       wake_up(&vfsp->vfs_wait_sync_task);
-
-       INIT_LIST_HEAD(&tmp);
        timeleft = (xfs_syncd_centisecs * HZ) / 100;
        for (;;) {
                set_current_state(TASK_INTERRUPTIBLE);
                timeleft = schedule_timeout(timeleft);
                /* swsusp */
                try_to_freeze();
-               if (vfsp->vfs_flag & VFS_UMOUNT)
+               if (kthread_should_stop())
                        break;
 
                spin_lock(&vfsp->vfs_sync_lock);
@@ -513,10 +555,6 @@ xfssyncd(
                }
        }
 
-       vfsp->vfs_sync_task = NULL;
-       wmb();
-       wake_up(&vfsp->vfs_wait_sync_task);
-
        return 0;
 }
 
@@ -524,13 +562,11 @@ STATIC int
 linvfs_start_syncd(
        vfs_t                   *vfsp)
 {
-       int                     pid;
-
-       pid = kernel_thread(xfssyncd, (void *) vfsp,
-                       CLONE_VM | CLONE_FS | CLONE_FILES);
-       if (pid < 0)
-               return -pid;
-       wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
+       vfsp->vfs_sync_work.w_syncer = vfs_sync_worker;
+       vfsp->vfs_sync_work.w_vfs = vfsp;
+       vfsp->vfs_sync_task = kthread_run(xfssyncd, vfsp, "xfssyncd");
+       if (IS_ERR(vfsp->vfs_sync_task))
+               return -PTR_ERR(vfsp->vfs_sync_task);
        return 0;
 }
 
@@ -538,11 +574,7 @@ STATIC void
 linvfs_stop_syncd(
        vfs_t                   *vfsp)
 {
-       vfsp->vfs_flag |= VFS_UMOUNT;
-       wmb();
-
-       wake_up_process(vfsp->vfs_sync_task);
-       wait_event(vfsp->vfs_wait_sync_task, !vfsp->vfs_sync_task);
+       kthread_stop(vfsp->vfs_sync_task);
 }
 
 STATIC void
@@ -866,9 +898,9 @@ init_xfs_fs( void )
 
        ktrace_init(64);
 
-       error = init_inodecache();
+       error = linvfs_init_zones();
        if (error < 0)
-               goto undo_inodecache;
+               goto undo_zones;
 
        error = pagebuf_init();
        if (error < 0)
@@ -889,9 +921,9 @@ undo_register:
        pagebuf_terminate();
 
 undo_pagebuf:
-       destroy_inodecache();
+       linvfs_destroy_zones();
 
-undo_inodecache:
+undo_zones:
        return error;
 }
 
@@ -903,7 +935,7 @@ exit_xfs_fs( void )
        unregister_filesystem(&xfs_fs_type);
        xfs_cleanup();
        pagebuf_terminate();
-       destroy_inodecache();
+       linvfs_destroy_zones();
        ktrace_uninit();
 }
 
index 669c616..34cc902 100644 (file)
@@ -251,7 +251,6 @@ vfs_allocate( void )
        bhv_head_init(VFS_BHVHEAD(vfsp), "vfs");
        INIT_LIST_HEAD(&vfsp->vfs_sync_list);
        spin_lock_init(&vfsp->vfs_sync_lock);
-       init_waitqueue_head(&vfsp->vfs_wait_sync_task);
        init_waitqueue_head(&vfsp->vfs_wait_single_sync_task);
        return vfsp;
 }
index 7ee1f71..f0ab574 100644 (file)
@@ -65,7 +65,6 @@ typedef struct vfs {
        spinlock_t              vfs_sync_lock;  /* work item list lock */
        int                     vfs_sync_seq;   /* sync thread generation no. */
        wait_queue_head_t       vfs_wait_single_sync_task;
-       wait_queue_head_t       vfs_wait_sync_task;
 } vfs_t;
 
 #define vfs_fbhv               vfs_bh.bh_first /* 1st on vfs behavior chain */
@@ -96,7 +95,6 @@ typedef enum {
 #define VFS_RDONLY             0x0001  /* read-only vfs */
 #define VFS_GRPID              0x0002  /* group-ID assigned from directory */
 #define VFS_DMI                        0x0004  /* filesystem has the DMI enabled */
-#define VFS_UMOUNT             0x0008  /* unmount in progress */
 #define VFS_END                        0x0008  /* max flag */
 
 #define SYNC_ATTR              0x0001  /* sync attributes */
index 250cad5..268f45b 100644 (file)
@@ -42,93 +42,33 @@ DEFINE_SPINLOCK(vnumber_lock);
  */
 #define NVSYNC                  37
 #define vptosync(v)             (&vsync[((unsigned long)v) % NVSYNC])
-sv_t vsync[NVSYNC];
-
-/*
- * Translate stat(2) file types to vnode types and vice versa.
- * Aware of numeric order of S_IFMT and vnode type values.
- */
-enum vtype iftovt_tab[] = {
-       VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
-       VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
-};
-
-u_short vttoif_tab[] = {
-       0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO, 0, S_IFSOCK
-};
+STATIC wait_queue_head_t vsync[NVSYNC];
 
 
 void
 vn_init(void)
 {
-       register sv_t *svp;
-       register int i;
+       int i;
 
-       for (svp = vsync, i = 0; i < NVSYNC; i++, svp++)
-               init_sv(svp, SV_DEFAULT, "vsy", i);
+       for (i = 0; i < NVSYNC; i++)
+               init_waitqueue_head(&vsync[i]);
 }
 
-/*
- * Clean a vnode of filesystem-specific data and prepare it for reuse.
- */
-STATIC int
-vn_reclaim(
+void
+vn_iowait(
        struct vnode    *vp)
 {
-       int             error;
+       wait_queue_head_t *wq = vptosync(vp);
 
-       XFS_STATS_INC(vn_reclaim);
-       vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address);
-
-       /*
-        * Only make the VOP_RECLAIM call if there are behaviors
-        * to call.
-        */
-       if (vp->v_fbhv) {
-               VOP_RECLAIM(vp, error);
-               if (error)
-                       return -error;
-       }
-       ASSERT(vp->v_fbhv == NULL);
-
-       VN_LOCK(vp);
-       vp->v_flag &= (VRECLM|VWAIT);
-       VN_UNLOCK(vp, 0);
-
-       vp->v_type = VNON;
-       vp->v_fbhv = NULL;
-
-#ifdef XFS_VNODE_TRACE
-       ktrace_free(vp->v_trace);
-       vp->v_trace = NULL;
-#endif
-
-       return 0;
-}
-
-STATIC void
-vn_wakeup(
-       struct vnode    *vp)
-{
-       VN_LOCK(vp);
-       if (vp->v_flag & VWAIT)
-               sv_broadcast(vptosync(vp));
-       vp->v_flag &= ~(VRECLM|VWAIT|VMODIFIED);
-       VN_UNLOCK(vp, 0);
+       wait_event(*wq, (atomic_read(&vp->v_iocount) == 0));
 }
 
-int
-vn_wait(
+void
+vn_iowake(
        struct vnode    *vp)
 {
-       VN_LOCK(vp);
-       if (vp->v_flag & (VINACT | VRECLM)) {
-               vp->v_flag |= VWAIT;
-               sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
-               return 1;
-       }
-       VN_UNLOCK(vp, 0);
-       return 0;
+       if (atomic_dec_and_test(&vp->v_iocount))
+               wake_up(vptosync(vp));
 }
 
 struct vnode *
@@ -154,6 +94,8 @@ vn_initialize(
        /* Initialize the first behavior and the behavior chain head. */
        vn_bhv_head_init(VN_BHV_HEAD(vp), "vnode");
 
+       atomic_set(&vp->v_iocount, 0);
+
 #ifdef XFS_VNODE_TRACE
        vp->v_trace = ktrace_alloc(VNODE_TRACE_SIZE, KM_SLEEP);
 #endif /* XFS_VNODE_TRACE */
@@ -162,30 +104,6 @@ vn_initialize(
        return vp;
 }
 
-/*
- * Get a reference on a vnode.
- */
-vnode_t *
-vn_get(
-       struct vnode    *vp,
-       vmap_t          *vmap)
-{
-       struct inode    *inode;
-
-       XFS_STATS_INC(vn_get);
-       inode = LINVFS_GET_IP(vp);
-       if (inode->i_state & I_FREEING)
-               return NULL;
-
-       inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino);
-       if (!inode)     /* Inode not present */
-               return NULL;
-
-       vn_trace_exit(vp, "vn_get", (inst_t *)__return_address);
-
-       return vp;
-}
-
 /*
  * Revalidate the Linux inode from the vattr.
  * Note: i_size _not_ updated; we must hold the inode
@@ -198,7 +116,7 @@ vn_revalidate_core(
 {
        struct inode    *inode = LINVFS_GET_IP(vp);
 
-       inode->i_mode       = VTTOIF(vap->va_type) | vap->va_mode;
+       inode->i_mode       = vap->va_mode;
        inode->i_nlink      = vap->va_nlink;
        inode->i_uid        = vap->va_uid;
        inode->i_gid        = vap->va_gid;
@@ -246,71 +164,6 @@ vn_revalidate(
        return -error;
 }
 
-/*
- * purge a vnode from the cache
- * At this point the vnode is guaranteed to have no references (vn_count == 0)
- * The caller has to make sure that there are no ways someone could
- * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock).
- */
-void
-vn_purge(
-       struct vnode    *vp,
-       vmap_t          *vmap)
-{
-       vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address);
-
-again:
-       /*
-        * Check whether vp has already been reclaimed since our caller
-        * sampled its version while holding a filesystem cache lock that
-        * its VOP_RECLAIM function acquires.
-        */
-       VN_LOCK(vp);
-       if (vp->v_number != vmap->v_number) {
-               VN_UNLOCK(vp, 0);
-               return;
-       }
-
-       /*
-        * If vp is being reclaimed or inactivated, wait until it is inert,
-        * then proceed.  Can't assume that vnode is actually reclaimed
-        * just because the reclaimed flag is asserted -- a vn_alloc
-        * reclaim can fail.
-        */
-       if (vp->v_flag & (VINACT | VRECLM)) {
-               ASSERT(vn_count(vp) == 0);
-               vp->v_flag |= VWAIT;
-               sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0);
-               goto again;
-       }
-
-       /*
-        * Another process could have raced in and gotten this vnode...
-        */
-       if (vn_count(vp) > 0) {
-               VN_UNLOCK(vp, 0);
-               return;
-       }
-
-       XFS_STATS_DEC(vn_active);
-       vp->v_flag |= VRECLM;
-       VN_UNLOCK(vp, 0);
-
-       /*
-        * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells
-        * vp's filesystem to flush and invalidate all cached resources.
-        * When vn_reclaim returns, vp should have no private data,
-        * either in a system cache or attached to v_data.
-        */
-       if (vn_reclaim(vp) != 0)
-               panic("vn_purge: cannot reclaim");
-
-       /*
-        * Wakeup anyone waiting for vp to be reclaimed.
-        */
-       vn_wakeup(vp);
-}
-
 /*
  * Add a reference to a referenced vnode.
  */
@@ -330,80 +183,6 @@ vn_hold(
        return vp;
 }
 
-/*
- *  Call VOP_INACTIVE on last reference.
- */
-void
-vn_rele(
-       struct vnode    *vp)
-{
-       int             vcnt;
-       int             cache;
-
-       XFS_STATS_INC(vn_rele);
-
-       VN_LOCK(vp);
-
-       vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address);
-       vcnt = vn_count(vp);
-
-       /*
-        * Since we always get called from put_inode we know
-        * that i_count won't be decremented after we
-        * return.
-        */
-       if (!vcnt) {
-               /*
-                * As soon as we turn this on, noone can find us in vn_get
-                * until we turn off VINACT or VRECLM
-                */
-               vp->v_flag |= VINACT;
-               VN_UNLOCK(vp, 0);
-
-               /*
-                * Do not make the VOP_INACTIVE call if there
-                * are no behaviors attached to the vnode to call.
-                */
-               if (vp->v_fbhv)
-                       VOP_INACTIVE(vp, NULL, cache);
-
-               VN_LOCK(vp);
-               if (vp->v_flag & VWAIT)
-                       sv_broadcast(vptosync(vp));
-
-               vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED);
-       }
-
-       VN_UNLOCK(vp, 0);
-
-       vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address);
-}
-
-/*
- * Finish the removal of a vnode.
- */
-void
-vn_remove(
-       struct vnode    *vp)
-{
-       vmap_t          vmap;
-
-       /* Make sure we don't do this to the same vnode twice */
-       if (!(vp->v_fbhv))
-               return;
-
-       XFS_STATS_INC(vn_remove);
-       vn_trace_exit(vp, "vn_remove", (inst_t *)__return_address);
-
-       /*
-        * After the following purge the vnode
-        * will no longer exist.
-        */
-       VMAP(vp, vmap);
-       vn_purge(vp, &vmap);
-}
-
-
 #ifdef XFS_VNODE_TRACE
 
 #define KTRACE_ENTER(vp, vk, s, line, ra)                      \
index a6e57c6..35f306c 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -65,10 +65,6 @@ struct vattr;
 struct xfs_iomap;
 struct attrlist_cursor_kern;
 
-/*
- * Vnode types.  VNON means no type.
- */
-enum vtype     { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VFIFO, VBAD, VSOCK };
 
 typedef xfs_ino_t vnumber_t;
 typedef struct dentry vname_t;
@@ -77,15 +73,14 @@ typedef bhv_head_t vn_bhv_head_t;
 /*
  * MP locking protocols:
  *     v_flag, v_vfsp                          VN_LOCK/VN_UNLOCK
- *     v_type                                  read-only or fs-dependent
  */
 typedef struct vnode {
        __u32           v_flag;                 /* vnode flags (see below) */
-       enum vtype      v_type;                 /* vnode type */
        struct vfs      *v_vfsp;                /* ptr to containing VFS */
        vnumber_t       v_number;               /* in-core vnode number */
        vn_bhv_head_t   v_bh;                   /* behavior head */
        spinlock_t      v_lock;                 /* VN_LOCK/VN_UNLOCK */
+       atomic_t        v_iocount;              /* outstanding I/O count */
 #ifdef XFS_VNODE_TRACE
        struct ktrace   *v_trace;               /* trace header structure    */
 #endif
@@ -93,6 +88,12 @@ typedef struct vnode {
        /* inode MUST be last */
 } vnode_t;
 
+#define VN_ISLNK(vp)   S_ISLNK((vp)->v_inode.i_mode)
+#define VN_ISREG(vp)   S_ISREG((vp)->v_inode.i_mode)
+#define VN_ISDIR(vp)   S_ISDIR((vp)->v_inode.i_mode)
+#define VN_ISCHR(vp)   S_ISCHR((vp)->v_inode.i_mode)
+#define VN_ISBLK(vp)   S_ISBLK((vp)->v_inode.i_mode)
+
 #define v_fbhv                 v_bh.bh_first          /* first behavior */
 #define v_fops                 v_bh.bh_first->bd_ops  /* first behavior ops */
 
@@ -132,23 +133,9 @@ typedef enum {
 #define LINVFS_GET_VP(inode)   ((vnode_t *)list_entry(inode, vnode_t, v_inode))
 #define LINVFS_GET_IP(vp)      (&(vp)->v_inode)
 
-/*
- * Convert between vnode types and inode formats (since POSIX.1
- * defines mode word of stat structure in terms of inode formats).
- */
-extern enum vtype      iftovt_tab[];
-extern u_short         vttoif_tab[];
-#define IFTOVT(mode)   (iftovt_tab[((mode) & S_IFMT) >> 12])
-#define VTTOIF(indx)   (vttoif_tab[(int)(indx)])
-#define MAKEIMODE(indx, mode)  (int)(VTTOIF(indx) | (mode))
-
-
 /*
  * Vnode flags.
  */
-#define VINACT                0x1      /* vnode is being inactivated   */
-#define VRECLM                0x2      /* vnode is being reclaimed     */
-#define VWAIT                 0x4      /* waiting for VINACT/VRECLM to end */
 #define VMODIFIED             0x8      /* XFS inode state possibly differs */
                                        /* to the Linux inode state.    */
 
@@ -408,7 +395,6 @@ typedef struct vnodeops {
  */
 typedef struct vattr {
        int             va_mask;        /* bit-mask of attributes present */
-       enum vtype      va_type;        /* vnode type (for create) */
        mode_t          va_mode;        /* file access mode and type */
        xfs_nlink_t     va_nlink;       /* number of references to file */
        uid_t           va_uid;         /* owner user id */
@@ -498,26 +484,11 @@ typedef struct vattr {
  * Check whether mandatory file locking is enabled.
  */
 #define MANDLOCK(vp, mode)     \
-       ((vp)->v_type == VREG && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
+       (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
 
 extern void    vn_init(void);
-extern int     vn_wait(struct vnode *);
 extern vnode_t *vn_initialize(struct inode *);
 
-/*
- * Acquiring and invalidating vnodes:
- *
- *     if (vn_get(vp, version, 0))
- *             ...;
- *     vn_purge(vp, version);
- *
- * vn_get and vn_purge must be called with vmap_t arguments, sampled
- * while a lock that the vnode's VOP_RECLAIM function acquires is
- * held, to ensure that the vnode sampled with the lock held isn't
- * recycled (VOP_RECLAIMed) or deallocated between the release of the lock
- * and the subsequent vn_get or vn_purge.
- */
-
 /*
  * vnode_map structures _must_ match vn_epoch and vnode structure sizes.
  */
@@ -531,11 +502,11 @@ typedef struct vnode_map {
                         (vmap).v_number = (vp)->v_number,      \
                         (vmap).v_ino    = (vp)->v_inode.i_ino; }
 
-extern void    vn_purge(struct vnode *, vmap_t *);
-extern vnode_t *vn_get(struct vnode *, vmap_t *);
 extern int     vn_revalidate(struct vnode *);
 extern void    vn_revalidate_core(struct vnode *, vattr_t *);
-extern void    vn_remove(struct vnode *);
+
+extern void    vn_iowait(struct vnode *vp);
+extern void    vn_iowake(struct vnode *vp);
 
 static inline int vn_count(struct vnode *vp)
 {
@@ -546,7 +517,6 @@ static inline int vn_count(struct vnode *vp)
  * Vnode reference counting functions (and macros for compatibility).
  */
 extern vnode_t *vn_hold(struct vnode *);
-extern void    vn_rele(struct vnode *);
 
 #if defined(XFS_VNODE_TRACE)
 #define VN_HOLD(vp)            \
@@ -560,6 +530,12 @@ extern void        vn_rele(struct vnode *);
 #define VN_RELE(vp)            (iput(LINVFS_GET_IP(vp)))
 #endif
 
+static inline struct vnode *vn_grab(struct vnode *vp)
+{
+       struct inode *inode = igrab(LINVFS_GET_IP(vp));
+       return inode ? LINVFS_GET_VP(inode) : NULL;
+}
+
 /*
  * Vname handling macros.
  */
diff --git a/fs/xfs/quota/Makefile b/fs/xfs/quota/Makefile
new file mode 100644 (file)
index 0000000..7a4f725
--- /dev/null
@@ -0,0 +1 @@
+include $(TOPDIR)/fs/xfs/quota/Makefile-linux-$(VERSION).$(PATCHLEVEL)
diff --git a/fs/xfs/quota/Makefile-linux-2.6 b/fs/xfs/quota/Makefile-linux-2.6
new file mode 100644 (file)
index 0000000..8b7b676
--- /dev/null
@@ -0,0 +1,53 @@
+#
+# Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file.  Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+# Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+# Mountain View, CA  94043, or:
+#
+# http://www.sgi.com
+#
+# For further information regarding this notice, see:
+#
+# http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+#
+
+EXTRA_CFLAGS += -I $(TOPDIR)/fs/xfs -I $(TOPDIR)/fs/xfs/linux-2.6
+
+ifeq ($(CONFIG_XFS_DEBUG),y)
+       EXTRA_CFLAGS += -g -DDEBUG
+       #EXTRA_CFLAGS += -DQUOTADEBUG
+endif
+ifeq ($(CONFIG_XFS_TRACE),y)
+       EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
+       EXTRA_CFLAGS += -DXFS_VNODE_TRACE
+endif
+
+obj-$(CONFIG_XFS_QUOTA)                += xfs_quota.o
+
+xfs_quota-y                    += xfs_dquot.o \
+                                  xfs_dquot_item.o \
+                                  xfs_trans_dquot.o \
+                                  xfs_qm_syscalls.o \
+                                  xfs_qm_bhv.o \
+                                  xfs_qm.o
+
+xfs_quota-$(CONFIG_PROC_FS)    += xfs_qm_stats.o
index 46ce1e3..e2e8d35 100644 (file)
@@ -421,7 +421,7 @@ xfs_qm_init_dquot_blk(
  */
 STATIC int
 xfs_qm_dqalloc(
-       xfs_trans_t     *tp,
+       xfs_trans_t     **tpp,
        xfs_mount_t     *mp,
        xfs_dquot_t     *dqp,
        xfs_inode_t     *quotip,
@@ -433,6 +433,7 @@ xfs_qm_dqalloc(
        xfs_bmbt_irec_t map;
        int             nmaps, error, committed;
        xfs_buf_t       *bp;
+       xfs_trans_t     *tp = *tpp;
 
        ASSERT(tp != NULL);
        xfs_dqtrace_entry(dqp, "DQALLOC");
@@ -492,10 +493,32 @@ xfs_qm_dqalloc(
        xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT),
                              dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
 
-       if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) {
+       /*
+        * xfs_bmap_finish() may commit the current transaction and
+        * start a second transaction if the freelist is not empty.
+        *
+        * Since we still want to modify this buffer, we need to
+        * ensure that the buffer is not released on commit of
+        * the first transaction and ensure the buffer is added to the
+        * second transaction.
+        *
+        * If there is only one transaction then don't stop the buffer
+        * from being released when it commits later on.
+        */
+
+       xfs_trans_bhold(tp, bp);
+
+       if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) {
                goto error1;
        }
 
+       if (committed) {
+               tp = *tpp;
+               xfs_trans_bjoin(tp, bp);
+       } else {
+               xfs_trans_bhold_release(tp, bp);
+       }
+
        *O_bpp = bp;
        return 0;
 
@@ -514,7 +537,7 @@ xfs_qm_dqalloc(
  */
 STATIC int
 xfs_qm_dqtobp(
-       xfs_trans_t             *tp,
+       xfs_trans_t             **tpp,
        xfs_dquot_t             *dqp,
        xfs_disk_dquot_t        **O_ddpp,
        xfs_buf_t               **O_bpp,
@@ -528,6 +551,7 @@ xfs_qm_dqtobp(
        xfs_disk_dquot_t *ddq;
        xfs_dqid_t      id;
        boolean_t       newdquot;
+       xfs_trans_t     *tp = (tpp ? *tpp : NULL);
 
        mp = dqp->q_mount;
        id = INT_GET(dqp->q_core.d_id, ARCH_CONVERT);
@@ -579,9 +603,10 @@ xfs_qm_dqtobp(
                                return (ENOENT);
 
                        ASSERT(tp);
-                       if ((error = xfs_qm_dqalloc(tp, mp, dqp, quotip,
+                       if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
                                                dqp->q_fileoffset, &bp)))
                                return (error);
+                       tp = *tpp;
                        newdquot = B_TRUE;
                } else {
                        /*
@@ -645,7 +670,7 @@ xfs_qm_dqtobp(
 /* ARGSUSED */
 STATIC int
 xfs_qm_dqread(
-       xfs_trans_t     *tp,
+       xfs_trans_t     **tpp,
        xfs_dqid_t      id,
        xfs_dquot_t     *dqp,   /* dquot to get filled in */
        uint            flags)
@@ -653,15 +678,19 @@ xfs_qm_dqread(
        xfs_disk_dquot_t *ddqp;
        xfs_buf_t        *bp;
        int              error;
+       xfs_trans_t      *tp;
+
+       ASSERT(tpp);
 
        /*
         * get a pointer to the on-disk dquot and the buffer containing it
         * dqp already knows its own type (GROUP/USER).
         */
        xfs_dqtrace_entry(dqp, "DQREAD");
-       if ((error = xfs_qm_dqtobp(tp, dqp, &ddqp, &bp, flags))) {
+       if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
                return (error);
        }
+       tp = *tpp;
 
        /* copy everything from disk dquot to the incore dquot */
        memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
@@ -740,7 +769,7 @@ xfs_qm_idtodq(
         * Read it from disk; xfs_dqread() takes care of
         * all the necessary initialization of dquot's fields (locks, etc)
         */
-       if ((error = xfs_qm_dqread(tp, id, dqp, flags))) {
+       if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
                /*
                 * This can happen if quotas got turned off (ESRCH),
                 * or if the dquot didn't exist on disk and we ask to
index 3917510..8ebc871 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -113,20 +113,6 @@ typedef struct xfs_dquot {
 
 #define XFS_DQHOLD(dqp)                ((dqp)->q_nrefs++)
 
-/*
- * Quota Accounting/Enforcement flags
- */
-#define XFS_ALL_QUOTA_ACCT     \
-               (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
-#define XFS_ALL_QUOTA_ENFD     (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
-#define XFS_ALL_QUOTA_CHKD     (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
-
-#define XFS_IS_QUOTA_RUNNING(mp)       ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
-#define XFS_IS_QUOTA_ENFORCED(mp)      ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
-#define XFS_IS_UQUOTA_RUNNING(mp)      ((mp)->m_qflags & XFS_UQUOTA_ACCT)
-#define XFS_IS_PQUOTA_RUNNING(mp)      ((mp)->m_qflags & XFS_PQUOTA_ACCT)
-#define XFS_IS_GQUOTA_RUNNING(mp)      ((mp)->m_qflags & XFS_GQUOTA_ACCT)
-
 #ifdef DEBUG
 static inline int
 XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp)
index f5271b7..e74eaa7 100644 (file)
@@ -509,6 +509,7 @@ xfs_qm_qoff_logitem_format(xfs_qoff_logitem_t       *qf,
 
        log_vector->i_addr = (xfs_caddr_t)&(qf->qql_format);
        log_vector->i_len = sizeof(xfs_qoff_logitem_t);
+       XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_QUOTAOFF);
        qf->qql_format.qf_size = 1;
 }
 
index f665ca8..efde16e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -365,16 +365,6 @@ xfs_qm_mount_quotas(
        int             error = 0;
        uint            sbf;
 
-       /*
-        * If a file system had quotas running earlier, but decided to
-        * mount without -o uquota/pquota/gquota options, revoke the
-        * quotachecked license, and bail out.
-        */
-       if (! XFS_IS_QUOTA_ON(mp) &&
-           (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT)) {
-               mp->m_qflags = 0;
-               goto write_changes;
-       }
 
        /*
         * If quotas on realtime volumes is not supported, we disable
@@ -388,11 +378,8 @@ xfs_qm_mount_quotas(
                goto write_changes;
        }
 
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-       cmn_err(CE_NOTE, "Attempting to turn on disk quotas.");
-#endif
-
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
+
        /*
         * Allocate the quotainfo structure inside the mount struct, and
         * create quotainode(s), and change/rev superblock if necessary.
@@ -410,19 +397,14 @@ xfs_qm_mount_quotas(
         */
        if (XFS_QM_NEED_QUOTACHECK(mp) &&
                !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
-#ifdef DEBUG
-               cmn_err(CE_NOTE, "Doing a quotacheck. Please wait.");
-#endif
                if ((error = xfs_qm_quotacheck(mp))) {
                        /* Quotacheck has failed and quotas have
                         * been disabled.
                         */
                        return XFS_ERROR(error);
                }
-#ifdef DEBUG
-               cmn_err(CE_NOTE, "Done quotacheck.");
-#endif
        }
+
  write_changes:
        /*
         * We actually don't have to acquire the SB_LOCK at all.
@@ -2010,7 +1992,7 @@ xfs_qm_quotacheck(
                ASSERT(mp->m_quotainfo != NULL);
                ASSERT(xfs_Gqm != NULL);
                xfs_qm_destroy_quotainfo(mp);
-               xfs_mount_reset_sbqflags(mp);
+               (void)xfs_mount_reset_sbqflags(mp);
        } else {
                cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
        }
index b03eecf..0b00b3c 100644 (file)
@@ -184,8 +184,6 @@ typedef struct xfs_dquot_acct {
 #define XFS_QM_HOLD(xqm)       ((xqm)->qm_nrefs++)
 #define XFS_QM_RELE(xqm)       ((xqm)->qm_nrefs--)
 
-extern void            xfs_mount_reset_sbqflags(xfs_mount_t *);
-
 extern void            xfs_qm_destroy_quotainfo(xfs_mount_t *);
 extern int             xfs_qm_mount_quotas(xfs_mount_t *, int);
 extern void            xfs_qm_mount_quotainit(xfs_mount_t *, uint);
index dc3c37a..8890a18 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -229,48 +229,6 @@ xfs_qm_syncall(
        return error;
 }
 
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-void
-xfs_mount_reset_sbqflags(
-       xfs_mount_t             *mp)
-{
-       xfs_trans_t             *tp;
-       unsigned long           s;
-
-       mp->m_qflags = 0;
-       /*
-        * It is OK to look at sb_qflags here in mount path,
-        * without SB_LOCK.
-        */
-       if (mp->m_sb.sb_qflags == 0)
-               return;
-       s = XFS_SB_LOCK(mp);
-       mp->m_sb.sb_qflags = 0;
-       XFS_SB_UNLOCK(mp, s);
-
-       /*
-        * if the fs is readonly, let the incore superblock run
-        * with quotas off but don't flush the update out to disk
-        */
-       if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
-               return;
-#ifdef QUOTADEBUG
-       xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
-       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
-       if (xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
-                                     XFS_DEFAULT_LOG_COUNT)) {
-               xfs_trans_cancel(tp, 0);
-               xfs_fs_cmn_err(CE_ALERT, mp,
-                       "xfs_mount_reset_sbqflags: Superblock update failed!");
-               return;
-       }
-       xfs_mod_sb(tp, XFS_SB_QFLAGS);
-       xfs_trans_commit(tp, 0, NULL);
-}
-
 STATIC int
 xfs_qm_newmount(
        xfs_mount_t     *mp,
index 68e9896..15e02e8 100644 (file)
@@ -1053,7 +1053,6 @@ xfs_qm_dqrele_all_inodes(
        struct xfs_mount *mp,
        uint             flags)
 {
-       vmap_t          vmap;
        xfs_inode_t     *ip, *topino;
        uint            ireclaims;
        vnode_t         *vp;
@@ -1061,8 +1060,8 @@ xfs_qm_dqrele_all_inodes(
 
        ASSERT(mp->m_quotainfo);
 
-again:
        XFS_MOUNT_ILOCK(mp);
+again:
        ip = mp->m_inodes;
        if (ip == NULL) {
                XFS_MOUNT_IUNLOCK(mp);
@@ -1090,18 +1089,14 @@ again:
                }
                vnode_refd = B_FALSE;
                if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) {
-                       /*
-                        * Sample vp mapping while holding the mplock, lest
-                        * we come across a non-existent vnode.
-                        */
-                       VMAP(vp, vmap);
                        ireclaims = mp->m_ireclaims;
                        topino = mp->m_inodes;
-                       XFS_MOUNT_IUNLOCK(mp);
+                       vp = vn_grab(vp);
+                       if (!vp)
+                               goto again;
 
+                       XFS_MOUNT_IUNLOCK(mp);
                        /* XXX restart limit ? */
-                       if ( ! (vp = vn_get(vp, &vmap)))
-                               goto again;
                        xfs_ilock(ip, XFS_ILOCK_EXCL);
                        vnode_refd = B_TRUE;
                } else {
@@ -1137,7 +1132,6 @@ again:
                 */
                if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) {
                        /* XXX use a sentinel */
-                       XFS_MOUNT_IUNLOCK(mp);
                        goto again;
                }
                ip = ip->i_mnext;
index 4ed7b69..4e1a5ec 100644 (file)
@@ -31,6 +31,7 @@
  */
 
 #include "debug.h"
+#include "spin.h"
 
 #include <asm/page.h>
 #include <linux/sched.h>
index 8d01dce..92fd1d6 100644 (file)
@@ -85,7 +85,7 @@ xfs_acl_vhasacl_default(
 {
        int             error;
 
-       if (vp->v_type != VDIR)
+       if (!VN_ISDIR(vp))
                return 0;
        xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
        return (error == 0);
@@ -389,7 +389,7 @@ xfs_acl_allow_set(
 
        if (vp->v_inode.i_flags & (S_IMMUTABLE|S_APPEND))
                return EPERM;
-       if (kind == _ACL_TYPE_DEFAULT && vp->v_type != VDIR)
+       if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp))
                return ENOTDIR;
        if (vp->v_vfsp->vfs_flag & VFS_RDONLY)
                return EROFS;
@@ -750,7 +750,7 @@ xfs_acl_inherit(
         * If the new file is a directory, its default ACL is a copy of
         * the containing directory's default ACL.
         */
-       if (vp->v_type == VDIR)
+       if (VN_ISDIR(vp))
                xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
        if (!error && !basicperms)
                xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
index 6f5d283..3e76def 100644 (file)
@@ -4754,10 +4754,20 @@ xfs_bmapi(
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -(alen), rsvd);
-                               if (!error)
+                               if (!error) {
                                        error = xfs_mod_incore_sb(mp,
                                                        XFS_SBS_FDBLOCKS,
                                                        -(indlen), rsvd);
+                                       if (error && rt) {
+                                               xfs_mod_incore_sb(ip->i_mount,
+                                                       XFS_SBS_FREXTENTS,
+                                                       extsz, rsvd);
+                                       } else if (error) {
+                                               xfs_mod_incore_sb(ip->i_mount,
+                                                       XFS_SBS_FDBLOCKS,
+                                                       alen, rsvd);
+                                       }
+                               }
 
                                if (error) {
                                        if (XFS_IS_QUOTA_ON(ip->i_mount))
index 30b8285..a264657 100644 (file)
@@ -274,6 +274,7 @@ xfs_buf_item_format(
                       ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
        vecp->i_addr = (xfs_caddr_t)&bip->bli_format;
        vecp->i_len = base_size;
+       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BFORMAT);
        vecp++;
        nvecs = 1;
 
@@ -320,12 +321,14 @@ xfs_buf_item_format(
                        buffer_offset = first_bit * XFS_BLI_CHUNK;
                        vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
                        vecp->i_len = nbits * XFS_BLI_CHUNK;
+                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
                        nvecs++;
                        break;
                } else if (next_bit != last_bit + 1) {
                        buffer_offset = first_bit * XFS_BLI_CHUNK;
                        vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
                        vecp->i_len = nbits * XFS_BLI_CHUNK;
+                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
                        nvecs++;
                        vecp++;
                        first_bit = next_bit;
@@ -337,6 +340,7 @@ xfs_buf_item_format(
                        buffer_offset = first_bit * XFS_BLI_CHUNK;
                        vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
                        vecp->i_len = nbits * XFS_BLI_CHUNK;
+                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_BCHUNK);
 /* You would think we need to bump the nvecs here too, but we do not
  * this number is used by recovery, and it gets confused by the boundary
  * split here
index 55c17ad..19e8728 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
index db7cbd1..cc7d149 100644 (file)
@@ -107,6 +107,7 @@ xfs_efi_item_format(xfs_efi_log_item_t      *efip,
 
        log_vector->i_addr = (xfs_caddr_t)&(efip->efi_format);
        log_vector->i_len = size;
+       XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFI_FORMAT);
        ASSERT(size >= sizeof(xfs_efi_log_format_t));
 }
 
@@ -426,6 +427,7 @@ xfs_efd_item_format(xfs_efd_log_item_t      *efdp,
 
        log_vector->i_addr = (xfs_caddr_t)&(efdp->efd_format);
        log_vector->i_len = size;
+       XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_EFD_FORMAT);
        ASSERT(size >= sizeof(xfs_efd_log_format_t));
 }
 
index d3da000..0d9ae8f 100644 (file)
@@ -30,6 +30,8 @@
  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
  */
 
+#include <linux/delay.h>
+
 #include "xfs.h"
 
 #include "xfs_macros.h"
@@ -505,17 +507,15 @@ xfs_iget(
        vnode_t         *vp = NULL;
        int             error;
 
-retry:
        XFS_STATS_INC(xs_ig_attempts);
 
+retry:
        if ((inode = iget_locked(XFS_MTOVFS(mp)->vfs_super, ino))) {
                bhv_desc_t      *bdp;
                xfs_inode_t     *ip;
-               int             newnode;
 
                vp = LINVFS_GET_VP(inode);
                if (inode->i_state & I_NEW) {
-inode_allocate:
                        vn_initialize(inode);
                        error = xfs_iget_core(vp, mp, tp, ino, flags,
                                        lock_flags, ipp, bno);
@@ -526,32 +526,25 @@ inode_allocate:
                                iput(inode);
                        }
                } else {
-                       /* These are true if the inode is in inactive or
-                        * reclaim. The linux inode is about to go away,
-                        * wait for that path to finish, and try again.
+                       /*
+                        * If the inode is not fully constructed due to
+                        * filehandle mistmatches wait for the inode to go
+                        * away and try again.
+                        *
+                        * iget_locked will call __wait_on_freeing_inode
+                        * to wait for the inode to go away.
                         */
-                       if (vp->v_flag & (VINACT | VRECLM)) {
-                               vn_wait(vp);
+                       if (is_bad_inode(inode) ||
+                           ((bdp = vn_bhv_lookup(VN_BHV_HEAD(vp),
+                                                 &xfs_vnodeops)) == NULL)) {
                                iput(inode);
+                               delay(1);
                                goto retry;
                        }
 
-                       if (is_bad_inode(inode)) {
-                               iput(inode);
-                               return EIO;
-                       }
-
-                       bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops);
-                       if (bdp == NULL) {
-                               XFS_STATS_INC(xs_ig_dup);
-                               goto inode_allocate;
-                       }
                        ip = XFS_BHVTOI(bdp);
                        if (lock_flags != 0)
                                xfs_ilock(ip, lock_flags);
-                       newnode = (ip->i_d.di_mode == 0);
-                       if (newnode)
-                               xfs_iocore_inode_reinit(ip);
                        XFS_STATS_INC(xs_ig_found);
                        *ipp = ip;
                        error = 0;
index 34bdf59..db43308 100644 (file)
@@ -1128,7 +1128,6 @@ xfs_ialloc(
        ASSERT(ip != NULL);
 
        vp = XFS_ITOV(ip);
-       vp->v_type = IFTOVT(mode);
        ip->i_d.di_mode = (__uint16_t)mode;
        ip->i_d.di_onlink = 0;
        ip->i_d.di_nlink = nlink;
@@ -1250,7 +1249,7 @@ xfs_ialloc(
         */
        xfs_trans_log_inode(tp, ip, flags);
 
-       /* now that we have a v_type we can set Linux inode ops (& unlock) */
+       /* now that we have an i_mode  we can set Linux inode ops (& unlock) */
        VFS_INIT_VNODE(XFS_MTOVFS(tp->t_mountp), vp, XFS_ITOBHV(ip), 1);
 
        *ipp = ip;
index 0eed30f..276ec70 100644 (file)
@@ -248,6 +248,7 @@ xfs_inode_item_format(
 
        vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
        vecp->i_len  = sizeof(xfs_inode_log_format_t);
+       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT);
        vecp++;
        nvecs        = 1;
 
@@ -292,6 +293,7 @@ xfs_inode_item_format(
 
        vecp->i_addr = (xfs_caddr_t)&ip->i_d;
        vecp->i_len  = sizeof(xfs_dinode_core_t);
+       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
        vecp++;
        nvecs++;
        iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
@@ -349,6 +351,7 @@ xfs_inode_item_format(
                                vecp->i_addr =
                                        (char *)(ip->i_df.if_u1.if_extents);
                                vecp->i_len = ip->i_df.if_bytes;
+                               XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
                        } else
 #endif
                        {
@@ -367,6 +370,7 @@ xfs_inode_item_format(
                                vecp->i_addr = (xfs_caddr_t)ext_buffer;
                                vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
                                                XFS_DATA_FORK);
+                               XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
                        }
                        ASSERT(vecp->i_len <= ip->i_df.if_bytes);
                        iip->ili_format.ilf_dsize = vecp->i_len;
@@ -384,6 +388,7 @@ xfs_inode_item_format(
                        ASSERT(ip->i_df.if_broot != NULL);
                        vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
                        vecp->i_len = ip->i_df.if_broot_bytes;
+                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT);
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
@@ -409,6 +414,7 @@ xfs_inode_item_format(
                        ASSERT((ip->i_df.if_real_bytes == 0) ||
                               (ip->i_df.if_real_bytes == data_bytes));
                        vecp->i_len = (int)data_bytes;
+                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL);
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_dsize = (unsigned)data_bytes;
@@ -486,6 +492,7 @@ xfs_inode_item_format(
                        vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
                                        XFS_ATTR_FORK);
 #endif
+                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT);
                        iip->ili_format.ilf_asize = vecp->i_len;
                        vecp++;
                        nvecs++;
@@ -500,6 +507,7 @@ xfs_inode_item_format(
                        ASSERT(ip->i_afp->if_broot != NULL);
                        vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
                        vecp->i_len = ip->i_afp->if_broot_bytes;
+                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT);
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
@@ -523,6 +531,7 @@ xfs_inode_item_format(
                        ASSERT((ip->i_afp->if_real_bytes == 0) ||
                               (ip->i_afp->if_real_bytes == data_bytes));
                        vecp->i_len = (int)data_bytes;
+                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL);
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_asize = (unsigned)data_bytes;
index 2edd676..d0f5be6 100644 (file)
@@ -226,13 +226,12 @@ xfs_iomap(
                xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, io, offset, count);
                lockmode = XFS_LCK_MAP_SHARED(mp, io);
                bmapi_flags = XFS_BMAPI_ENTIRE;
-               if (flags & BMAPI_IGNSTATE)
-                       bmapi_flags |= XFS_BMAPI_IGSTATE;
                break;
        case BMAPI_WRITE:
                xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, io, offset, count);
                lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
-               bmapi_flags = 0;
+               if (flags & BMAPI_IGNSTATE)
+                       bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
                XFS_ILOCK(mp, io, lockmode);
                break;
        case BMAPI_ALLOCATE:
@@ -391,9 +390,9 @@ xfs_iomap_write_direct(
        xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS], *imapp;
        xfs_bmap_free_t free_list;
        int             aeof;
-       xfs_filblks_t   datablocks, qblocks, resblks;
+       xfs_filblks_t   qblocks, resblks;
        int             committed;
-       int             numrtextents;
+       int             resrtextents;
 
        /*
         * Make sure that the dquots are there. This doesn't hold
@@ -434,14 +433,14 @@ xfs_iomap_write_direct(
 
                if (!(extsz = ip->i_d.di_extsize))
                        extsz = mp->m_sb.sb_rextsize;
-               numrtextents = qblocks = (count_fsb + extsz - 1);
-               do_div(numrtextents, mp->m_sb.sb_rextsize);
+               resrtextents = qblocks = (count_fsb + extsz - 1);
+               do_div(resrtextents, mp->m_sb.sb_rextsize);
+               resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
                quota_flag = XFS_QMOPT_RES_RTBLKS;
-               datablocks = 0;
        } else {
-               datablocks = qblocks = count_fsb;
+               resrtextents = 0;
+               resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb);
                quota_flag = XFS_QMOPT_RES_REGBLKS;
-               numrtextents = 0;
        }
 
        /*
@@ -449,9 +448,8 @@ xfs_iomap_write_direct(
         */
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-       resblks = XFS_DIOSTRAT_SPACE_RES(mp, datablocks);
        error = xfs_trans_reserve(tp, resblks,
-                       XFS_WRITE_LOG_RES(mp), numrtextents,
+                       XFS_WRITE_LOG_RES(mp), resrtextents,
                        XFS_TRANS_PERM_LOG_RES,
                        XFS_WRITE_LOG_COUNT);
 
index 1cd2ac1..54a6f11 100644 (file)
@@ -159,11 +159,15 @@ xfs_buftarg_t *xlog_target;
 void
 xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
 {
-       if (! log->l_grant_trace) {
-               log->l_grant_trace = ktrace_alloc(1024, KM_NOSLEEP);
-               if (! log->l_grant_trace)
+       unsigned long cnts;
+
+       if (!log->l_grant_trace) {
+               log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
+               if (!log->l_grant_trace)
                        return;
        }
+       /* ticket counts are 1 byte each */
+       cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
 
        ktrace_enter(log->l_grant_trace,
                     (void *)tic,
@@ -178,10 +182,10 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
                     (void *)((unsigned long)CYCLE_LSN(log->l_tail_lsn)),
                     (void *)((unsigned long)BLOCK_LSN(log->l_tail_lsn)),
                     (void *)string,
-                    (void *)((unsigned long)13),
-                    (void *)((unsigned long)14),
-                    (void *)((unsigned long)15),
-                    (void *)((unsigned long)16));
+                    (void *)((unsigned long)tic->t_trans_type),
+                    (void *)cnts,
+                    (void *)((unsigned long)tic->t_curr_res),
+                    (void *)((unsigned long)tic->t_unit_res));
 }
 
 void
@@ -274,9 +278,11 @@ xfs_log_done(xfs_mount_t   *mp,
                 * Release ticket if not permanent reservation or a specifc
                 * request has been made to release a permanent reservation.
                 */
+               xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
                xlog_ungrant_log_space(log, ticket);
                xlog_state_put_ticket(log, ticket);
        } else {
+               xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
                xlog_regrant_reserve_log_space(log, ticket);
        }
 
@@ -399,7 +405,8 @@ xfs_log_reserve(xfs_mount_t  *mp,
                int              cnt,
                xfs_log_ticket_t *ticket,
                __uint8_t        client,
-               uint             flags)
+               uint             flags,
+               uint             t_type)
 {
        xlog_t          *log = mp->m_log;
        xlog_ticket_t   *internal_ticket;
@@ -421,13 +428,19 @@ xfs_log_reserve(xfs_mount_t        *mp,
        if (*ticket != NULL) {
                ASSERT(flags & XFS_LOG_PERM_RESERV);
                internal_ticket = (xlog_ticket_t *)*ticket;
+               xlog_trace_loggrant(log, internal_ticket, "xfs_log_reserve: existing ticket (permanent trans)");
                xlog_grant_push_ail(mp, internal_ticket->t_unit_res);
                retval = xlog_regrant_write_log_space(log, internal_ticket);
        } else {
                /* may sleep if need to allocate more tickets */
                internal_ticket = xlog_ticket_get(log, unit_bytes, cnt,
                                                  client, flags);
+               internal_ticket->t_trans_type = t_type;
                *ticket = internal_ticket;
+               xlog_trace_loggrant(log, internal_ticket, 
+                       (internal_ticket->t_flags & XLOG_TIC_PERM_RESERV) ?
+                       "xfs_log_reserve: create new ticket (permanent trans)" :
+                       "xfs_log_reserve: create new ticket");
                xlog_grant_push_ail(mp,
                                    (internal_ticket->t_unit_res *
                                     internal_ticket->t_cnt));
@@ -601,8 +614,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
        if (! (XLOG_FORCED_SHUTDOWN(log))) {
                reg[0].i_addr = (void*)&magic;
                reg[0].i_len  = sizeof(magic);
+               XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_UNMOUNT);
 
-               error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0);
+               error = xfs_log_reserve(mp, 600, 1, &tic, XFS_LOG, 0, 0);
                if (!error) {
                        /* remove inited flag */
                        ((xlog_ticket_t *)tic)->t_flags = 0;
@@ -1272,6 +1286,7 @@ xlog_commit_record(xfs_mount_t  *mp,
 
        reg[0].i_addr = NULL;
        reg[0].i_len = 0;
+       XLOG_VEC_SET_TYPE(&reg[0], XLOG_REG_TYPE_COMMIT);
 
        ASSERT_ALWAYS(iclog);
        if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
@@ -1604,6 +1619,117 @@ xlog_state_finish_copy(xlog_t           *log,
 
 
 
+/*
+ * print out info relating to regions written which consume
+ * the reservation
+ */
+#if defined(XFS_LOG_RES_DEBUG)
+STATIC void
+xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
+{
+       uint i;
+       uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
+
+       /* match with XLOG_REG_TYPE_* in xfs_log.h */
+       static char *res_type_str[XLOG_REG_TYPE_MAX] = {
+           "bformat",
+           "bchunk",
+           "efi_format",
+           "efd_format",
+           "iformat",
+           "icore",
+           "iext",
+           "ibroot",
+           "ilocal",
+           "iattr_ext",
+           "iattr_broot",
+           "iattr_local",
+           "qformat",
+           "dquot",
+           "quotaoff",
+           "LR header",
+           "unmount",
+           "commit",
+           "trans header"
+       };
+       static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
+           "SETATTR_NOT_SIZE",
+           "SETATTR_SIZE",
+           "INACTIVE",
+           "CREATE",
+           "CREATE_TRUNC",
+           "TRUNCATE_FILE",
+           "REMOVE",
+           "LINK",
+           "RENAME",
+           "MKDIR",
+           "RMDIR",
+           "SYMLINK",
+           "SET_DMATTRS",
+           "GROWFS",
+           "STRAT_WRITE",
+           "DIOSTRAT",
+           "WRITE_SYNC",
+           "WRITEID",
+           "ADDAFORK",
+           "ATTRINVAL",
+           "ATRUNCATE",
+           "ATTR_SET",
+           "ATTR_RM",
+           "ATTR_FLAG",
+           "CLEAR_AGI_BUCKET",
+           "QM_SBCHANGE",
+           "DUMMY1",
+           "DUMMY2",
+           "QM_QUOTAOFF",
+           "QM_DQALLOC",
+           "QM_SETQLIM",
+           "QM_DQCLUSTER",
+           "QM_QINOCREATE",
+           "QM_QUOTAOFF_END",
+           "SB_UNIT",
+           "FSYNC_TS",
+           "GROWFSRT_ALLOC",
+           "GROWFSRT_ZERO",
+           "GROWFSRT_FREE",
+           "SWAPEXT"
+       };
+
+       xfs_fs_cmn_err(CE_WARN, mp,
+                       "xfs_log_write: reservation summary:\n"
+                       "  trans type  = %s (%u)\n"
+                       "  unit res    = %d bytes\n"
+                       "  current res = %d bytes\n"
+                       "  total reg   = %u bytes (o/flow = %u bytes)\n"
+                       "  ophdrs      = %u (ophdr space = %u bytes)\n"
+                       "  ophdr + reg = %u bytes\n"
+                       "  num regions = %u\n",
+                       ((ticket->t_trans_type <= 0 ||
+                         ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
+                         "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
+                       ticket->t_trans_type,
+                       ticket->t_unit_res,
+                       ticket->t_curr_res,
+                       ticket->t_res_arr_sum, ticket->t_res_o_flow,
+                       ticket->t_res_num_ophdrs, ophdr_spc,
+                       ticket->t_res_arr_sum + 
+                         ticket->t_res_o_flow + ophdr_spc,
+                       ticket->t_res_num);
+
+       for (i = 0; i < ticket->t_res_num; i++) {
+               uint r_type = ticket->t_res_arr[i].r_type; 
+               cmn_err(CE_WARN,
+                           "region[%u]: %s - %u bytes\n",
+                           i, 
+                           ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
+                           "bad-rtype" : res_type_str[r_type-1]),
+                           ticket->t_res_arr[i].r_len);
+       }
+}
+#else
+#define xlog_print_tic_res(mp, ticket)
+#endif
+
 /*
  * Write some region out to in-core log
  *
@@ -1677,16 +1803,21 @@ xlog_write(xfs_mount_t *        mp,
      * xlog_op_header_t and may need to be double word aligned.
      */
     len = 0;
-    if (ticket->t_flags & XLOG_TIC_INITED)     /* acct for start rec of xact */
+    if (ticket->t_flags & XLOG_TIC_INITED) {    /* acct for start rec of xact */
        len += sizeof(xlog_op_header_t);
+       XLOG_TIC_ADD_OPHDR(ticket);
+    }
 
     for (index = 0; index < nentries; index++) {
        len += sizeof(xlog_op_header_t);            /* each region gets >= 1 */
+       XLOG_TIC_ADD_OPHDR(ticket);
        len += reg[index].i_len;
+       XLOG_TIC_ADD_REGION(ticket, reg[index].i_len, reg[index].i_type);
     }
     contwr = *start_lsn = 0;
 
     if (ticket->t_curr_res < len) {
+       xlog_print_tic_res(mp, ticket);
 #ifdef DEBUG
        xlog_panic(
                "xfs_log_write: reservation ran out. Need to up reservation");
@@ -1790,6 +1921,7 @@ xlog_write(xfs_mount_t *  mp,
                len += sizeof(xlog_op_header_t); /* from splitting of region */
                /* account for new log op header */
                ticket->t_curr_res -= sizeof(xlog_op_header_t);
+               XLOG_TIC_ADD_OPHDR(ticket);
            }
            xlog_verify_dest_ptr(log, ptr);
 
@@ -2282,6 +2414,9 @@ restart:
         */
        if (log_offset == 0) {
                ticket->t_curr_res -= log->l_iclog_hsize;
+               XLOG_TIC_ADD_REGION(ticket,
+                                   log->l_iclog_hsize,
+                                   XLOG_REG_TYPE_LRHEADER);
                INT_SET(head->h_cycle, ARCH_CONVERT, log->l_curr_cycle);
                ASSIGN_LSN(head->h_lsn, log);
                ASSERT(log->l_curr_block >= 0);
@@ -2468,6 +2603,7 @@ xlog_regrant_write_log_space(xlog_t          *log,
 #endif
 
        tic->t_curr_res = tic->t_unit_res;
+       XLOG_TIC_RESET_RES(tic);
 
        if (tic->t_cnt > 0)
                return (0);
@@ -2608,6 +2744,7 @@ xlog_regrant_reserve_log_space(xlog_t          *log,
        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'w');
        XLOG_GRANT_SUB_SPACE(log, ticket->t_curr_res, 'r');
        ticket->t_curr_res = ticket->t_unit_res;
+       XLOG_TIC_RESET_RES(ticket);
        xlog_trace_loggrant(log, ticket,
                            "xlog_regrant_reserve_log_space: sub current res");
        xlog_verify_grant_head(log, 1);
@@ -2624,6 +2761,7 @@ xlog_regrant_reserve_log_space(xlog_t          *log,
        xlog_verify_grant_head(log, 0);
        GRANT_UNLOCK(log, s);
        ticket->t_curr_res = ticket->t_unit_res;
+       XLOG_TIC_RESET_RES(ticket);
 }      /* xlog_regrant_reserve_log_space */
 
 
@@ -3179,29 +3317,57 @@ xlog_ticket_get(xlog_t          *log,
         * and their unit amount is the total amount of space required.
         *
         * The following lines of code account for non-transaction data
-        * which occupy space in the on-disk log. 
+        * which occupy space in the on-disk log.
+        *
+        * Normal form of a transaction is:
+        * <oph><trans-hdr><start-oph><reg1-oph><reg1><reg2-oph>...<commit-oph>
+        * and then there are LR hdrs, split-recs and roundoff at end of syncs.
+        *
+        * We need to account for all the leadup data and trailer data
+        * around the transaction data.
+        * And then we need to account for the worst case in terms of using
+        * more space.
+        * The worst case will happen if:
+        * - the placement of the transaction happens to be such that the
+        *   roundoff is at its maximum
+        * - the transaction data is synced before the commit record is synced
+        *   i.e. <transaction-data><roundoff> | <commit-rec><roundoff>
+        *   Therefore the commit record is in its own Log Record.
+        *   This can happen as the commit record is called with its
+        *   own region to xlog_write().
+        *   This then means that in the worst case, roundoff can happen for
+        *   the commit-rec as well.
+        *   The commit-rec is smaller than padding in this scenario and so it is
+        *   not added separately.
         */
 
+       /* for trans header */
+       unit_bytes += sizeof(xlog_op_header_t);
+       unit_bytes += sizeof(xfs_trans_header_t);
+
        /* for start-rec */
-       unit_bytes += sizeof(xlog_op_header_t); 
+       unit_bytes += sizeof(xlog_op_header_t);
+
+       /* for LR headers */
+       num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
+       unit_bytes += log->l_iclog_hsize * num_headers;
+
+       /* for commit-rec LR header - note: padding will subsume the ophdr */
+       unit_bytes += log->l_iclog_hsize;
+
+       /* for split-recs - ophdrs added when data split over LRs */
+       unit_bytes += sizeof(xlog_op_header_t) * num_headers;
 
-       /* for padding */
+       /* for roundoff padding for transaction data and one for commit record */
        if (XFS_SB_VERSION_HASLOGV2(&log->l_mp->m_sb) &&
-               log->l_mp->m_sb.sb_logsunit > 1) {
+           log->l_mp->m_sb.sb_logsunit > 1) {
                /* log su roundoff */
-               unit_bytes += log->l_mp->m_sb.sb_logsunit;  
+               unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
        } else {
                /* BB roundoff */
-               unit_bytes += BBSIZE;
+               unit_bytes += 2*BBSIZE;
         }
 
-       /* for commit-rec */
-       unit_bytes += sizeof(xlog_op_header_t);
-       /* for LR headers */
-       num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
-       unit_bytes += log->l_iclog_hsize * num_headers;
-
        tic->t_unit_res         = unit_bytes;
        tic->t_curr_res         = unit_bytes;
        tic->t_cnt              = cnt;
@@ -3209,10 +3375,13 @@ xlog_ticket_get(xlog_t          *log,
        tic->t_tid              = (xlog_tid_t)((__psint_t)tic & 0xffffffff);
        tic->t_clientid         = client;
        tic->t_flags            = XLOG_TIC_INITED;
+       tic->t_trans_type       = 0;
        if (xflags & XFS_LOG_PERM_RESERV)
                tic->t_flags |= XLOG_TIC_PERM_RESERV;
        sv_init(&(tic->t_sema), SV_DEFAULT, "logtick");
 
+       XLOG_TIC_RESET_RES(tic);
+
        return tic;
 }      /* xlog_ticket_get */
 
index 0db122d..1896111 100644 (file)
@@ -114,9 +114,44 @@ xfs_lsn_t  _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 #define XFS_VOLUME             0x2
 #define XFS_LOG                        0xaa
 
+
+/* Region types for iovec's i_type */
+#if defined(XFS_LOG_RES_DEBUG)
+#define XLOG_REG_TYPE_BFORMAT          1
+#define XLOG_REG_TYPE_BCHUNK           2
+#define XLOG_REG_TYPE_EFI_FORMAT       3
+#define XLOG_REG_TYPE_EFD_FORMAT       4
+#define XLOG_REG_TYPE_IFORMAT          5
+#define XLOG_REG_TYPE_ICORE            6
+#define XLOG_REG_TYPE_IEXT             7
+#define XLOG_REG_TYPE_IBROOT           8
+#define XLOG_REG_TYPE_ILOCAL           9
+#define XLOG_REG_TYPE_IATTR_EXT                10
+#define XLOG_REG_TYPE_IATTR_BROOT      11
+#define XLOG_REG_TYPE_IATTR_LOCAL      12
+#define XLOG_REG_TYPE_QFORMAT          13
+#define XLOG_REG_TYPE_DQUOT            14
+#define XLOG_REG_TYPE_QUOTAOFF         15
+#define XLOG_REG_TYPE_LRHEADER         16
+#define XLOG_REG_TYPE_UNMOUNT          17
+#define XLOG_REG_TYPE_COMMIT           18
+#define XLOG_REG_TYPE_TRANSHDR         19
+#define XLOG_REG_TYPE_MAX              19
+#endif
+
+#if defined(XFS_LOG_RES_DEBUG)
+#define XLOG_VEC_SET_TYPE(vecp, t) ((vecp)->i_type = (t))
+#else
+#define XLOG_VEC_SET_TYPE(vecp, t)
+#endif
+
+
 typedef struct xfs_log_iovec {
        xfs_caddr_t             i_addr;         /* beginning address of region */
        int             i_len;          /* length in bytes of region */
+#if defined(XFS_LOG_RES_DEBUG)
+       uint            i_type;         /* type of region */
+#endif
 } xfs_log_iovec_t;
 
 typedef void* xfs_log_ticket_t;
@@ -159,7 +194,8 @@ int   xfs_log_reserve(struct xfs_mount *mp,
                          int              count,
                          xfs_log_ticket_t *ticket,
                          __uint8_t        clientid,
-                         uint             flags);
+                         uint             flags,
+                         uint             t_type);
 int      xfs_log_write(struct xfs_mount *mp,
                        xfs_log_iovec_t  region[],
                        int              nentries,
index 1a1d452..eb7fdc6 100644 (file)
@@ -335,18 +335,66 @@ typedef __uint32_t xlog_tid_t;
 
 #define XLOG_COVER_OPS         5
 
+
+/* Ticket reservation region accounting */ 
+#if defined(XFS_LOG_RES_DEBUG)
+#define XLOG_TIC_LEN_MAX       15
+#define XLOG_TIC_RESET_RES(t) ((t)->t_res_num = \
+                               (t)->t_res_arr_sum = (t)->t_res_num_ophdrs = 0)
+#define XLOG_TIC_ADD_OPHDR(t) ((t)->t_res_num_ophdrs++)
+#define XLOG_TIC_ADD_REGION(t, len, type)                              \
+       do {                                                            \
+               if ((t)->t_res_num == XLOG_TIC_LEN_MAX) {               \
+                       /* add to overflow and start again */           \
+                       (t)->t_res_o_flow += (t)->t_res_arr_sum;        \
+                       (t)->t_res_num = 0;                             \
+                       (t)->t_res_arr_sum = 0;                         \
+               }                                                       \
+               (t)->t_res_arr[(t)->t_res_num].r_len = (len);           \
+               (t)->t_res_arr[(t)->t_res_num].r_type = (type);         \
+               (t)->t_res_arr_sum += (len);                            \
+               (t)->t_res_num++;                                       \
+       } while (0)
+
+/*
+ * Reservation region
+ * As would be stored in xfs_log_iovec but without the i_addr which
+ * we don't care about.
+ */
+typedef struct xlog_res {
+       uint    r_len;
+       uint    r_type;
+} xlog_res_t;
+#else
+#define XLOG_TIC_RESET_RES(t)
+#define XLOG_TIC_ADD_OPHDR(t)
+#define XLOG_TIC_ADD_REGION(t, len, type)
+#endif
+
+
 typedef struct xlog_ticket {
-       sv_t               t_sema;       /* sleep on this semaphore      :20 */
-       struct xlog_ticket *t_next;      /*                              : 4 */
-       struct xlog_ticket *t_prev;      /*                              : 4 */
-       xlog_tid_t         t_tid;        /* transaction identifier       : 4 */
-       int                t_curr_res;   /* current reservation in bytes : 4 */
-       int                t_unit_res;   /* unit reservation in bytes    : 4 */
-       __uint8_t          t_ocnt;       /* original count               : 1 */
-       __uint8_t          t_cnt;        /* current count                : 1 */
-       __uint8_t          t_clientid;   /* who does this belong to;     : 1 */
-       __uint8_t          t_flags;      /* properties of reservation    : 1 */
+       sv_t               t_sema;       /* sleep on this semaphore      : 20 */
+       struct xlog_ticket *t_next;      /*                              :4|8 */
+       struct xlog_ticket *t_prev;      /*                              :4|8 */
+       xlog_tid_t         t_tid;        /* transaction identifier       : 4  */
+       int                t_curr_res;   /* current reservation in bytes : 4  */
+       int                t_unit_res;   /* unit reservation in bytes    : 4  */
+       char               t_ocnt;       /* original count               : 1  */
+       char               t_cnt;        /* current count                : 1  */
+       char               t_clientid;   /* who does this belong to;     : 1  */
+       char               t_flags;      /* properties of reservation    : 1  */
+       uint               t_trans_type; /* transaction type             : 4  */
+
+#if defined (XFS_LOG_RES_DEBUG)
+        /* reservation array fields */
+       uint               t_res_num;                    /* num in array : 4 */
+       xlog_res_t         t_res_arr[XLOG_TIC_LEN_MAX];  /* array of res : X */ 
+       uint               t_res_num_ophdrs;             /* num op hdrs  : 4 */
+       uint               t_res_arr_sum;                /* array sum    : 4 */
+       uint               t_res_o_flow;                 /* sum overflow : 4 */
+#endif
 } xlog_ticket_t;
+
 #endif
 
 
index 0aac28d..14faaba 100644 (file)
@@ -1387,7 +1387,7 @@ xlog_recover_add_to_cont_trans(
        old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
        old_len = item->ri_buf[item->ri_cnt-1].i_len;
 
-       ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0);
+       ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
        memcpy(&ptr[old_len], dp, len); /* d, s, l */
        item->ri_buf[item->ri_cnt-1].i_len += len;
        item->ri_buf[item->ri_cnt-1].i_addr = ptr;
index 4f40c92..a6cd632 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -42,7 +42,8 @@
 #include "xfs_dir2.h"
 #include "xfs_dmapi.h"
 #include "xfs_mount.h"
-
+#include "xfs_quota.h"
+#include "xfs_error.h"
 
 STATIC struct xfs_dquot *
 xfs_dqvopchown_default(
@@ -54,8 +55,79 @@ xfs_dqvopchown_default(
        return NULL;
 }
 
+/*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(xfs_mount_t *mp)
+{
+       int                     error;
+       xfs_trans_t             *tp;
+       unsigned long           s;
+
+       mp->m_qflags = 0;
+       /*
+        * It is OK to look at sb_qflags here in mount path,
+        * without SB_LOCK.
+        */
+       if (mp->m_sb.sb_qflags == 0)
+               return 0;
+       s = XFS_SB_LOCK(mp);
+       mp->m_sb.sb_qflags = 0;
+       XFS_SB_UNLOCK(mp, s);
+
+       /*
+        * if the fs is readonly, let the incore superblock run
+        * with quotas off but don't flush the update out to disk
+        */
+       if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY)
+               return 0;
+#ifdef QUOTADEBUG
+       xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+       tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+       if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+                                     XFS_DEFAULT_LOG_COUNT))) {
+               xfs_trans_cancel(tp, 0);
+               xfs_fs_cmn_err(CE_ALERT, mp,
+                       "xfs_mount_reset_sbqflags: Superblock update failed!");
+               return error;
+       }
+       xfs_mod_sb(tp, XFS_SB_QFLAGS);
+       error = xfs_trans_commit(tp, 0, NULL);
+       return error;
+}
+
+STATIC int
+xfs_noquota_init(
+       xfs_mount_t     *mp,
+       uint            *needquotamount,
+       uint            *quotaflags)
+{
+       int             error = 0;
+
+       *quotaflags = 0;
+       *needquotamount = B_FALSE;
+
+       ASSERT(!XFS_IS_QUOTA_ON(mp));
+
+       /*
+        * If a file system had quotas running earlier, but decided to
+        * mount without -o uquota/pquota/gquota options, revoke the
+        * quotachecked license.
+        */
+       if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+               cmn_err(CE_NOTE,
+                        "XFS resetting qflags for filesystem %s",
+                        mp->m_fsname);
+
+               error = xfs_mount_reset_sbqflags(mp);
+       }
+       return error;
+}
+
 xfs_qmops_t    xfs_qmcore_stub = {
-       .xfs_qminit             = (xfs_qminit_t) fs_noerr,
+       .xfs_qminit             = (xfs_qminit_t) xfs_noquota_init,
        .xfs_qmdone             = (xfs_qmdone_t) fs_noerr,
        .xfs_qmmount            = (xfs_qmmount_t) fs_noerr,
        .xfs_qmunmount          = (xfs_qmunmount_t) fs_noerr,
index 7134576..32cb797 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -159,6 +159,20 @@ typedef struct xfs_qoff_logformat {
 #define XFS_OQUOTA_CHKD        0x0020  /* quotacheck run on other (grp/prj) quotas */
 #define XFS_GQUOTA_ACCT        0x0040  /* group quota accounting ON */
 
+/*
+ * Quota Accounting/Enforcement flags
+ */
+#define XFS_ALL_QUOTA_ACCT     \
+               (XFS_UQUOTA_ACCT | XFS_GQUOTA_ACCT | XFS_PQUOTA_ACCT)
+#define XFS_ALL_QUOTA_ENFD     (XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD)
+#define XFS_ALL_QUOTA_CHKD     (XFS_UQUOTA_CHKD | XFS_OQUOTA_CHKD)
+
+#define XFS_IS_QUOTA_RUNNING(mp)       ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT)
+#define XFS_IS_QUOTA_ENFORCED(mp)      ((mp)->m_qflags & XFS_ALL_QUOTA_ENFD)
+#define XFS_IS_UQUOTA_RUNNING(mp)      ((mp)->m_qflags & XFS_UQUOTA_ACCT)
+#define XFS_IS_PQUOTA_RUNNING(mp)      ((mp)->m_qflags & XFS_PQUOTA_ACCT)
+#define XFS_IS_GQUOTA_RUNNING(mp)      ((mp)->m_qflags & XFS_GQUOTA_ACCT)
+
 /*
  * Incore only flags for quotaoff - these bits get cleared when quota(s)
  * are in the process of getting turned off. These flags are in m_qflags but
@@ -362,6 +376,7 @@ typedef struct xfs_dqtrxops {
                                f | XFS_QMOPT_RES_REGBLKS)
 
 extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
+extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
 
 extern struct bhv_vfsops xfs_qmops;
 
index 06dfca5..92efe27 100644 (file)
@@ -276,7 +276,7 @@ xfs_trans_reserve(
 
                error = xfs_log_reserve(tp->t_mountp, logspace, logcount,
                                        &tp->t_ticket,
-                                       XFS_TRANSACTION, log_flags);
+                                       XFS_TRANSACTION, log_flags, tp->t_type);
                if (error) {
                        goto undo_blocks;
                }
@@ -1032,6 +1032,7 @@ xfs_trans_fill_vecs(
        tp->t_header.th_num_items = nitems;
        log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
        log_vector->i_len = sizeof(xfs_trans_header_t);
+       XLOG_VEC_SET_TYPE(log_vector, XLOG_REG_TYPE_TRANSHDR);
 }
 
 
index ec541d6..a263aec 100644 (file)
@@ -112,6 +112,7 @@ typedef struct xfs_trans_header {
 #define        XFS_TRANS_GROWFSRT_ZERO         38
 #define        XFS_TRANS_GROWFSRT_FREE         39
 #define        XFS_TRANS_SWAPEXT               40
+#define        XFS_TRANS_TYPE_MAX              40
 /* new transaction types need to be reflected in xfs_logprint(8) */
 
 
@@ -998,6 +999,7 @@ struct xfs_buf      *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
 void           xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
 void           xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
 void           xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
+void           xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
 void           xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
 void           xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
 void           xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
index 7bc5eab..2a71b4f 100644 (file)
@@ -379,8 +379,8 @@ xfs_trans_delete_ail(
                else {
                        xfs_cmn_err(XFS_PTAG_AILDELETE, CE_ALERT, mp,
                                "xfs_trans_delete_ail: attempting to delete a log item that is not in the AIL");
-                       xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
                        AIL_UNLOCK(mp, s);
+                       xfs_force_shutdown(mp, XFS_CORRUPT_INCORE);
                }
        }
 }
index 144da7a..e733293 100644 (file)
@@ -713,6 +713,29 @@ xfs_trans_bhold(xfs_trans_t        *tp,
        xfs_buf_item_trace("BHOLD", bip);
 }
 
+/*
+ * Cancel the previous buffer hold request made on this buffer
+ * for this transaction.
+ */
+void
+xfs_trans_bhold_release(xfs_trans_t    *tp,
+                       xfs_buf_t       *bp)
+{
+       xfs_buf_log_item_t      *bip;
+
+       ASSERT(XFS_BUF_ISBUSY(bp));
+       ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
+       ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+
+       bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+       ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
+       ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
+       ASSERT(atomic_read(&bip->bli_refcount) > 0);
+       ASSERT(bip->bli_flags & XFS_BLI_HOLD);
+       bip->bli_flags &= ~XFS_BLI_HOLD;
+       xfs_buf_item_trace("BHOLD RELEASE", bip);
+}
+
 /*
  * This is called to mark bytes first through last inclusive of the given
  * buffer as needing to be logged when the transaction is committed.
index 42bcc02..f1a904e 100644 (file)
@@ -795,7 +795,6 @@ xfs_statvfs(
        xfs_mount_t     *mp;
        xfs_sb_t        *sbp;
        unsigned long   s;
-       u64 id;
 
        mp = XFS_BHVTOM(bdp);
        sbp = &(mp->m_sb);
@@ -823,9 +822,7 @@ xfs_statvfs(
        statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
        XFS_SB_UNLOCK(mp, s);
 
-       id = huge_encode_dev(mp->m_dev);
-       statp->f_fsid.val[0] = (u32)id;
-       statp->f_fsid.val[1] = (u32)(id >> 32);
+       xfs_statvfs_fsid(statp, mp);
        statp->f_namelen = MAXNAMELEN - 1;
 
        return 0;
@@ -906,7 +903,6 @@ xfs_sync_inodes(
        xfs_inode_t     *ip_next;
        xfs_buf_t       *bp;
        vnode_t         *vp = NULL;
-       vmap_t          vmap;
        int             error;
        int             last_error;
        uint64_t        fflag;
@@ -1101,48 +1097,21 @@ xfs_sync_inodes(
                 * lock in xfs_ireclaim() after the inode is pulled from
                 * the mount list will sleep until we release it here.
                 * This keeps the vnode from being freed while we reference
-                * it.  It is also cheaper and simpler than actually doing
-                * a vn_get() for every inode we touch here.
+                * it.
                 */
                if (xfs_ilock_nowait(ip, lock_flags) == 0) {
-
                        if ((flags & SYNC_BDFLUSH) || (vp == NULL)) {
                                ip = ip->i_mnext;
                                continue;
                        }
 
-                       /*
-                        * We need to unlock the inode list lock in order
-                        * to lock the inode. Insert a marker record into
-                        * the inode list to remember our position, dropping
-                        * the lock is now done inside the IPOINTER_INSERT
-                        * macro.
-                        *
-                        * We also use the inode list lock to protect us
-                        * in taking a snapshot of the vnode version number
-                        * for use in calling vn_get().
-                        */
-                       VMAP(vp, vmap);
-                       IPOINTER_INSERT(ip, mp);
-
-                       vp = vn_get(vp, &vmap);
+                       vp = vn_grab(vp);
                        if (vp == NULL) {
-                               /*
-                                * The vnode was reclaimed once we let go
-                                * of the inode list lock.  Skip to the
-                                * next list entry. Remove the marker.
-                                */
-
-                               XFS_MOUNT_ILOCK(mp);
-
-                               mount_locked = B_TRUE;
-                               vnode_refed  = B_FALSE;
-
-                               IPOINTER_REMOVE(ip, mp);
-
+                               ip = ip->i_mnext;
                                continue;
                        }
 
+                       IPOINTER_INSERT(ip, mp);
                        xfs_ilock(ip, lock_flags);
 
                        ASSERT(vp == XFS_ITOV(ip));
@@ -1533,7 +1502,10 @@ xfs_syncsub(
         * eventually kicked out of the cache.
         */
        if (flags & SYNC_REFCACHE) {
-               xfs_refcache_purge_some(mp);
+               if (flags & SYNC_WAIT)
+                       xfs_refcache_purge_mp(mp);
+               else
+                       xfs_refcache_purge_some(mp);
        }
 
        /*
@@ -1649,6 +1621,10 @@ xfs_vget(
 #define MNTOPT_SWIDTH  "swidth"        /* data volume stripe width */
 #define MNTOPT_NOUUID  "nouuid"        /* ignore filesystem UUID */
 #define MNTOPT_MTPT    "mtpt"          /* filesystem mount point */
+#define MNTOPT_GRPID   "grpid"         /* group-ID from parent directory */
+#define MNTOPT_NOGRPID "nogrpid"       /* group-ID from current process */
+#define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
+#define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
 #define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
 #define MNTOPT_IHASHSIZE    "ihashsize"    /* size of inode hash table */
 #define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
@@ -1769,6 +1745,12 @@ xfs_parseargs(
                        }
                        args->flags |= XFSMNT_IHASHSIZE;
                        args->ihashsize = simple_strtoul(value, &eov, 10);
+               } else if (!strcmp(this_char, MNTOPT_GRPID) ||
+                          !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+                       vfsp->vfs_flag |= VFS_GRPID;
+               } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
+                          !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+                       vfsp->vfs_flag &= ~VFS_GRPID;
                } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
                        args->flags |= XFSMNT_WSYNC;
                } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
@@ -1890,6 +1872,7 @@ xfs_showargs(
        };
        struct proc_xfs_info    *xfs_infop;
        struct xfs_mount        *mp = XFS_BHVTOM(bhv);
+       struct vfs              *vfsp = XFS_MTOVFS(mp);
 
        for (xfs_infop = xfs_info; xfs_infop->flag; xfs_infop++) {
                if (mp->m_flags & xfs_infop->flag)
@@ -1926,7 +1909,10 @@ xfs_showargs(
 
        if (!(mp->m_flags & XFS_MOUNT_32BITINOOPT))
                seq_printf(m, "," MNTOPT_64BITINODE);
-       
+
+       if (vfsp->vfs_flag & VFS_GRPID)
+               seq_printf(m, "," MNTOPT_GRPID);
+
        return 0;
 }
 
index 1377c86..58bfe62 100644 (file)
@@ -104,7 +104,7 @@ xfs_open(
         * If it's a directory with any blocks, read-ahead block 0
         * as we're almost certain to have the next operation be a read there.
         */
-       if (vp->v_type == VDIR && ip->i_d.di_nextents > 0) {
+       if (VN_ISDIR(vp) && ip->i_d.di_nextents > 0) {
                mode = xfs_ilock_map_shared(ip);
                if (ip->i_d.di_nextents > 0)
                        (void)xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
@@ -163,18 +163,21 @@ xfs_getattr(
        /*
         * Copy from in-core inode.
         */
-       vap->va_type = vp->v_type;
-       vap->va_mode = ip->i_d.di_mode & MODEMASK;
+       vap->va_mode = ip->i_d.di_mode;
        vap->va_uid = ip->i_d.di_uid;
        vap->va_gid = ip->i_d.di_gid;
        vap->va_projid = ip->i_d.di_projid;
 
        /*
         * Check vnode type block/char vs. everything else.
-        * Do it with bitmask because that's faster than looking
-        * for multiple values individually.
         */
-       if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) {
+       switch (ip->i_d.di_mode & S_IFMT) {
+       case S_IFBLK:
+       case S_IFCHR:
+               vap->va_rdev = ip->i_df.if_u2.if_rdev;
+               vap->va_blocksize = BLKDEV_IOSIZE;
+               break;
+       default:
                vap->va_rdev = 0;
 
                if (!(ip->i_d.di_flags & XFS_DIFLAG_REALTIME)) {
@@ -224,9 +227,7 @@ xfs_getattr(
                                (ip->i_d.di_extsize << mp->m_sb.sb_blocklog) :
                                (mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog);
                }
-       } else {
-               vap->va_rdev = ip->i_df.if_u2.if_rdev;
-               vap->va_blocksize = BLKDEV_IOSIZE;
+               break;
        }
 
        vap->va_atime.tv_sec = ip->i_d.di_atime.t_sec;
@@ -468,7 +469,7 @@ xfs_setattr(
                                m |= S_ISGID;
 #if 0
                        /* Linux allows this, Irix doesn't. */
-                       if ((vap->va_mode & S_ISVTX) && vp->v_type != VDIR)
+                       if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
                                m |= S_ISVTX;
 #endif
                        if (m && !capable(CAP_FSETID))
@@ -546,10 +547,10 @@ xfs_setattr(
                        goto error_return;
                }
 
-               if (vp->v_type == VDIR) {
+               if (VN_ISDIR(vp)) {
                        code = XFS_ERROR(EISDIR);
                        goto error_return;
-               } else if (vp->v_type != VREG) {
+               } else if (!VN_ISREG(vp)) {
                        code = XFS_ERROR(EINVAL);
                        goto error_return;
                }
@@ -1567,7 +1568,7 @@ xfs_release(
        vp = BHV_TO_VNODE(bdp);
        ip = XFS_BHVTOI(bdp);
 
-       if ((vp->v_type != VREG) || (ip->i_d.di_mode == 0)) {
+       if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0)) {
                return 0;
        }
 
@@ -1895,7 +1896,7 @@ xfs_create(
        dp = XFS_BHVTOI(dir_bdp);
        mp = dp->i_mount;
 
-       dm_di_mode = vap->va_mode|VTTOIF(vap->va_type);
+       dm_di_mode = vap->va_mode;
        namelen = VNAMELEN(dentry);
 
        if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
@@ -1973,8 +1974,7 @@ xfs_create(
            (error = XFS_DIR_CANENTER(mp, tp, dp, name, namelen)))
                goto error_return;
        rdev = (vap->va_mask & XFS_AT_RDEV) ? vap->va_rdev : 0;
-       error = xfs_dir_ialloc(&tp, dp,
-                       MAKEIMODE(vap->va_type,vap->va_mode), 1,
+       error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 1,
                        rdev, credp, prid, resblks > 0,
                        &ip, &committed);
        if (error) {
@@ -2620,7 +2620,7 @@ xfs_link(
        vn_trace_entry(src_vp, __FUNCTION__, (inst_t *)__return_address);
 
        target_namelen = VNAMELEN(dentry);
-       if (src_vp->v_type == VDIR)
+       if (VN_ISDIR(src_vp))
                return XFS_ERROR(EPERM);
 
        src_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(src_vp), &xfs_vnodeops);
@@ -2805,7 +2805,7 @@ xfs_mkdir(
 
        tp = NULL;
        dp_joined_to_trans = B_FALSE;
-       dm_di_mode = vap->va_mode|VTTOIF(vap->va_type);
+       dm_di_mode = vap->va_mode;
 
        if (DM_EVENT_ENABLED(dir_vp->v_vfsp, dp, DM_EVENT_CREATE)) {
                error = XFS_SEND_NAMESP(mp, DM_EVENT_CREATE,
@@ -2879,8 +2879,7 @@ xfs_mkdir(
        /*
         * create the directory inode.
         */
-       error = xfs_dir_ialloc(&tp, dp,
-                       MAKEIMODE(vap->va_type,vap->va_mode), 2,
+       error = xfs_dir_ialloc(&tp, dp, vap->va_mode, 2,
                        0, credp, prid, resblks > 0,
                &cdp, NULL);
        if (error) {
@@ -3650,7 +3649,7 @@ xfs_rwlock(
        vnode_t         *vp;
 
        vp = BHV_TO_VNODE(bdp);
-       if (vp->v_type == VDIR)
+       if (VN_ISDIR(vp))
                return 1;
        ip = XFS_BHVTOI(bdp);
        if (locktype == VRWLOCK_WRITE) {
@@ -3681,7 +3680,7 @@ xfs_rwunlock(
        vnode_t         *vp;
 
        vp = BHV_TO_VNODE(bdp);
-       if (vp->v_type == VDIR)
+       if (VN_ISDIR(vp))
                return;
        ip = XFS_BHVTOI(bdp);
        if (locktype == VRWLOCK_WRITE) {
@@ -3847,51 +3846,10 @@ xfs_reclaim(
                return 0;
        }
 
-       if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
-               if (ip->i_d.di_size > 0) {
-                       /*
-                        * Flush and invalidate any data left around that is
-                        * a part of this file.
-                        *
-                        * Get the inode's i/o lock so that buffers are pushed
-                        * out while holding the proper lock.  We can't hold
-                        * the inode lock here since flushing out buffers may
-                        * cause us to try to get the lock in xfs_strategy().
-                        *
-                        * We don't have to call remapf() here, because there
-                        * cannot be any mapped file references to this vnode
-                        * since it is being reclaimed.
-                        */
-                       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-                       /*
-                        * If we hit an IO error, we need to make sure that the
-                        * buffer and page caches of file data for
-                        * the file are tossed away. We don't want to use
-                        * VOP_FLUSHINVAL_PAGES here because we don't want dirty
-                        * pages to stay attached to the vnode, but be
-                        * marked P_BAD. pdflush/vnode_pagebad
-                        * hates that.
-                        */
-                       if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                               VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_NONE);
-                       } else {
-                               VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
-                       }
+       vn_iowait(vp);
 
-                       ASSERT(VN_CACHED(vp) == 0);
-                       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) ||
-                              ip->i_delayed_blks == 0);
-                       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-               } else if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                       /*
-                        * di_size field may not be quite accurate if we're
-                        * shutting down.
-                        */
-                       VOP_TOSS_PAGES(vp, 0, -1, FI_NONE);
-                       ASSERT(VN_CACHED(vp) == 0);
-               }
-       }
+       ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+       ASSERT(VN_CACHED(vp) == 0);
 
        /* If we have nothing to flush with this inode then complete the
         * teardown now, otherwise break the link between the xfs inode
@@ -4567,7 +4525,7 @@ xfs_change_file_space(
        /*
         * must be a regular file and have write permission
         */
-       if (vp->v_type != VREG)
+       if (!VN_ISREG(vp))
                return XFS_ERROR(EINVAL);
 
        xfs_ilock(ip, XFS_ILOCK_SHARED);