Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Jun 2010 14:33:05 +0000 (07:33 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 5 Jun 2010 14:33:05 +0000 (07:33 -0700)
* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: improve xfs_isilocked
  xfs: skip writeback from reclaim context
  xfs: remove done roadmap item from xfs-delayed-logging-design.txt
  xfs: fix race in inode cluster freeing failing to stale inodes
  xfs: fix access to upper inodes without inode64
  xfs: fix might_sleep() warning when initialising per-ag tree
  fs/xfs/quota: Add missing mutex_unlock
  xfs: remove duplicated #include
  xfs: convert more trace events to DEFINE_EVENT
  xfs: xfs_trace.c: remove duplicated #include
  xfs: Check new inode size is OK before preallocating
  xfs: clean up xlog_align
  xfs: cleanup log reservation calculactions
  xfs: be more explicit if RT mount fails due to config
  xfs: replace E2BIG with EFBIG where appropriate

18 files changed:
Documentation/filesystems/xfs-delayed-logging-design.txt
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_iops.c
fs/xfs/linux-2.6/xfs_quotaops.c
fs/xfs/linux-2.6/xfs_sync.c
fs/xfs/linux-2.6/xfs_trace.c
fs/xfs/linux-2.6/xfs_trace.h
fs/xfs/quota/xfs_qm.c
fs/xfs/xfs_ag.h
fs/xfs/xfs_iget.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_rtalloc.c
fs/xfs/xfs_rtalloc.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_vnodeops.c

index d8119e9..96d0df2 100644 (file)
@@ -794,11 +794,6 @@ designed.
 
 Roadmap:
 
-2.6.35 Inclusion in mainline as an experimental mount option
-       => approximately 2-3 months to merge window
-       => needs to be in xfs-dev tree in 4-6 weeks
-       => code is nearing readiness for review
-
 2.6.37 Remove experimental tag from mount option
        => should be roughly 6 months after initial merge
        => enough time to:
index 089eaca..a0fa3bf 100644 (file)
@@ -1332,6 +1332,21 @@ xfs_vm_writepage(
 
        trace_xfs_writepage(inode, page, 0);
 
+       /*
+        * Refuse to write the page out if we are called from reclaim context.
+        *
+        * This is primarily to avoid stack overflows when called from deep
+        * used stacks in random callers for direct reclaim, but disabling
+        * reclaim for kswap is a nice side-effect as kswapd causes rather
+        * suboptimal I/O patters, too.
+        *
+        * This should really be done by the core VM, but until that happens
+        * filesystems like XFS, btrfs and ext4 have to take care of this
+        * by themselves.
+        */
+       if (current->flags & PF_MEMALLOC)
+               goto out_fail;
+
        /*
         * We need a transaction if:
         *  1. There are delalloc buffers on the page
index 9c8019c..44f0b2d 100644 (file)
@@ -585,11 +585,20 @@ xfs_vn_fallocate(
        bf.l_len = len;
 
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       /* check the new inode size is valid before allocating */
+       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+           offset + len > i_size_read(inode)) {
+               new_size = offset + len;
+               error = inode_newsize_ok(inode, new_size);
+               if (error)
+                       goto out_unlock;
+       }
+
        error = -xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
                                       0, XFS_ATTR_NOLOCK);
-       if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
-           offset + len > i_size_read(inode))
-               new_size = offset + len;
+       if (error)
+               goto out_unlock;
 
        /* Change file size if needed */
        if (new_size) {
@@ -600,6 +609,7 @@ xfs_vn_fallocate(
                error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
        }
 
+out_unlock:
        xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 out_error:
        return error;
index 9ac8aea..067cafb 100644 (file)
@@ -23,7 +23,6 @@
 #include "xfs_ag.h"
 #include "xfs_mount.h"
 #include "xfs_quota.h"
-#include "xfs_log.h"
 #include "xfs_trans.h"
 #include "xfs_bmap_btree.h"
 #include "xfs_inode.h"
index 3884e20..ef7f021 100644 (file)
@@ -164,10 +164,6 @@ xfs_inode_ag_iterator(
                struct xfs_perag        *pag;
 
                pag = xfs_perag_get(mp, ag);
-               if (!pag->pag_ici_init) {
-                       xfs_perag_put(pag);
-                       continue;
-               }
                error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
                                                exclusive, &nr);
                xfs_perag_put(pag);
@@ -867,12 +863,7 @@ xfs_reclaim_inode_shrink(
        down_read(&xfs_mount_list_lock);
        list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
                for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
-
                        pag = xfs_perag_get(mp, ag);
-                       if (!pag->pag_ici_init) {
-                               xfs_perag_put(pag);
-                               continue;
-                       }
                        reclaimable += pag->pag_ici_reclaimable;
                        xfs_perag_put(pag);
                }
index 207fa77..d12be84 100644 (file)
@@ -50,7 +50,6 @@
 #include "quota/xfs_dquot_item.h"
 #include "quota/xfs_dquot.h"
 #include "xfs_log_recover.h"
-#include "xfs_buf_item.h"
 #include "xfs_inode_item.h"
 
 /*
index ff6bc79..73d5aa1 100644 (file)
@@ -82,33 +82,6 @@ DECLARE_EVENT_CLASS(xfs_attr_list_class,
        )
 )
 
-#define DEFINE_PERAG_REF_EVENT(name) \
-TRACE_EVENT(name, \
-       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
-                unsigned long caller_ip), \
-       TP_ARGS(mp, agno, refcount, caller_ip), \
-       TP_STRUCT__entry( \
-               __field(dev_t, dev) \
-               __field(xfs_agnumber_t, agno) \
-               __field(int, refcount) \
-               __field(unsigned long, caller_ip) \
-       ), \
-       TP_fast_assign( \
-               __entry->dev = mp->m_super->s_dev; \
-               __entry->agno = agno; \
-               __entry->refcount = refcount; \
-               __entry->caller_ip = caller_ip; \
-       ), \
-       TP_printk("dev %d:%d agno %u refcount %d caller %pf", \
-                 MAJOR(__entry->dev), MINOR(__entry->dev), \
-                 __entry->agno, \
-                 __entry->refcount, \
-                 (char *)__entry->caller_ip) \
-);
-
-DEFINE_PERAG_REF_EVENT(xfs_perag_get)
-DEFINE_PERAG_REF_EVENT(xfs_perag_put)
-
 #define DEFINE_ATTR_LIST_EVENT(name) \
 DEFINE_EVENT(xfs_attr_list_class, name, \
        TP_PROTO(struct xfs_attr_list_context *ctx), \
@@ -122,6 +95,37 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
 DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
 DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
 
+DECLARE_EVENT_CLASS(xfs_perag_class,
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
+                unsigned long caller_ip),
+       TP_ARGS(mp, agno, refcount, caller_ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(int, refcount)
+               __field(unsigned long, caller_ip)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->agno = agno;
+               __entry->refcount = refcount;
+               __entry->caller_ip = caller_ip;
+       ),
+       TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->refcount,
+                 (char *)__entry->caller_ip)
+);
+
+#define DEFINE_PERAG_REF_EVENT(name)   \
+DEFINE_EVENT(xfs_perag_class, name,    \
+       TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,       \
+                unsigned long caller_ip),                                      \
+       TP_ARGS(mp, agno, refcount, caller_ip))
+DEFINE_PERAG_REF_EVENT(xfs_perag_get);
+DEFINE_PERAG_REF_EVENT(xfs_perag_put);
+
 TRACE_EVENT(xfs_attr_list_node_descend,
        TP_PROTO(struct xfs_attr_list_context *ctx,
                 struct xfs_da_node_entry *btree),
@@ -775,165 +779,181 @@ DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
 DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
 DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
 
-#define DEFINE_RW_EVENT(name) \
-TRACE_EVENT(name, \
-       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
-       TP_ARGS(ip, count, offset, flags), \
-       TP_STRUCT__entry( \
-               __field(dev_t, dev) \
-               __field(xfs_ino_t, ino) \
-               __field(xfs_fsize_t, size) \
-               __field(xfs_fsize_t, new_size) \
-               __field(loff_t, offset) \
-               __field(size_t, count) \
-               __field(int, flags) \
-       ), \
-       TP_fast_assign( \
-               __entry->dev = VFS_I(ip)->i_sb->s_dev; \
-               __entry->ino = ip->i_ino; \
-               __entry->size = ip->i_d.di_size; \
-               __entry->new_size = ip->i_new_size; \
-               __entry->offset = offset; \
-               __entry->count = count; \
-               __entry->flags = flags; \
-       ), \
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
-                 "offset 0x%llx count 0x%zx ioflags %s", \
-                 MAJOR(__entry->dev), MINOR(__entry->dev), \
-                 __entry->ino, \
-                 __entry->size, \
-                 __entry->new_size, \
-                 __entry->offset, \
-                 __entry->count, \
-                 __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) \
+DECLARE_EVENT_CLASS(xfs_file_class,
+       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
+       TP_ARGS(ip, count, offset, flags),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(xfs_fsize_t, size)
+               __field(xfs_fsize_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+               __field(int, flags)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+               __entry->flags = flags;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count 0x%zx ioflags %s",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count,
+                 __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
 )
+
+#define DEFINE_RW_EVENT(name)          \
+DEFINE_EVENT(xfs_file_class, name,     \
+       TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
+       TP_ARGS(ip, count, offset, flags))
 DEFINE_RW_EVENT(xfs_file_read);
 DEFINE_RW_EVENT(xfs_file_buffered_write);
 DEFINE_RW_EVENT(xfs_file_direct_write);
 DEFINE_RW_EVENT(xfs_file_splice_read);
 DEFINE_RW_EVENT(xfs_file_splice_write);
 
-
-#define DEFINE_PAGE_EVENT(name) \
-TRACE_EVENT(name, \
-       TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
-       TP_ARGS(inode, page, off), \
-       TP_STRUCT__entry( \
-               __field(dev_t, dev) \
-               __field(xfs_ino_t, ino) \
-               __field(pgoff_t, pgoff) \
-               __field(loff_t, size) \
-               __field(unsigned long, offset) \
-               __field(int, delalloc) \
-               __field(int, unmapped) \
-               __field(int, unwritten) \
-       ), \
-       TP_fast_assign( \
-               int delalloc = -1, unmapped = -1, unwritten = -1; \
-       \
-               if (page_has_buffers(page)) \
-                       xfs_count_page_state(page, &delalloc, \
-                                            &unmapped, &unwritten); \
-               __entry->dev = inode->i_sb->s_dev; \
-               __entry->ino = XFS_I(inode)->i_ino; \
-               __entry->pgoff = page_offset(page); \
-               __entry->size = i_size_read(inode); \
-               __entry->offset = off; \
-               __entry->delalloc = delalloc; \
-               __entry->unmapped = unmapped; \
-               __entry->unwritten = unwritten; \
-       ), \
-       TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " \
-                 "delalloc %d unmapped %d unwritten %d", \
-                 MAJOR(__entry->dev), MINOR(__entry->dev), \
-                 __entry->ino, \
-                 __entry->pgoff, \
-                 __entry->size, \
-                 __entry->offset, \
-                 __entry->delalloc, \
-                 __entry->unmapped, \
-                 __entry->unwritten) \
+DECLARE_EVENT_CLASS(xfs_page_class,
+       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
+       TP_ARGS(inode, page, off),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(pgoff_t, pgoff)
+               __field(loff_t, size)
+               __field(unsigned long, offset)
+               __field(int, delalloc)
+               __field(int, unmapped)
+               __field(int, unwritten)
+       ),
+       TP_fast_assign(
+               int delalloc = -1, unmapped = -1, unwritten = -1;
+
+               if (page_has_buffers(page))
+                       xfs_count_page_state(page, &delalloc,
+                                            &unmapped, &unwritten);
+               __entry->dev = inode->i_sb->s_dev;
+               __entry->ino = XFS_I(inode)->i_ino;
+               __entry->pgoff = page_offset(page);
+               __entry->size = i_size_read(inode);
+               __entry->offset = off;
+               __entry->delalloc = delalloc;
+               __entry->unmapped = unmapped;
+               __entry->unwritten = unwritten;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
+                 "delalloc %d unmapped %d unwritten %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->pgoff,
+                 __entry->size,
+                 __entry->offset,
+                 __entry->delalloc,
+                 __entry->unmapped,
+                 __entry->unwritten)
 )
+
+#define DEFINE_PAGE_EVENT(name)                \
+DEFINE_EVENT(xfs_page_class, name,     \
+       TP_PROTO(struct inode *inode, struct page *page, unsigned long off),    \
+       TP_ARGS(inode, page, off))
 DEFINE_PAGE_EVENT(xfs_writepage);
 DEFINE_PAGE_EVENT(xfs_releasepage);
 DEFINE_PAGE_EVENT(xfs_invalidatepage);
 
-#define DEFINE_IOMAP_EVENT(name) \
-TRACE_EVENT(name, \
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
-                int flags, struct xfs_bmbt_irec *irec), \
-       TP_ARGS(ip, offset, count, flags, irec), \
-       TP_STRUCT__entry( \
-               __field(dev_t, dev) \
-               __field(xfs_ino_t, ino) \
-               __field(loff_t, size) \
-               __field(loff_t, new_size) \
-               __field(loff_t, offset) \
-               __field(size_t, count) \
-               __field(int, flags) \
-               __field(xfs_fileoff_t, startoff) \
-               __field(xfs_fsblock_t, startblock) \
-               __field(xfs_filblks_t, blockcount) \
-       ), \
-       TP_fast_assign( \
-               __entry->dev = VFS_I(ip)->i_sb->s_dev; \
-               __entry->ino = ip->i_ino; \
-               __entry->size = ip->i_d.di_size; \
-               __entry->new_size = ip->i_new_size; \
-               __entry->offset = offset; \
-               __entry->count = count; \
-               __entry->flags = flags; \
-               __entry->startoff = irec ? irec->br_startoff : 0; \
-               __entry->startblock = irec ? irec->br_startblock : 0; \
-               __entry->blockcount = irec ? irec->br_blockcount : 0; \
-       ), \
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
-                 "offset 0x%llx count %zd flags %s " \
-                 "startoff 0x%llx startblock %lld blockcount 0x%llx", \
-                 MAJOR(__entry->dev), MINOR(__entry->dev), \
-                 __entry->ino, \
-                 __entry->size, \
-                 __entry->new_size, \
-                 __entry->offset, \
-                 __entry->count, \
-                 __print_flags(__entry->flags, "|", BMAPI_FLAGS), \
-                 __entry->startoff, \
-                 (__int64_t)__entry->startblock, \
-                 __entry->blockcount) \
+DECLARE_EVENT_CLASS(xfs_iomap_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
+                int flags, struct xfs_bmbt_irec *irec),
+       TP_ARGS(ip, offset, count, flags, irec),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(loff_t, size)
+               __field(loff_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+               __field(int, flags)
+               __field(xfs_fileoff_t, startoff)
+               __field(xfs_fsblock_t, startblock)
+               __field(xfs_filblks_t, blockcount)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+               __entry->flags = flags;
+               __entry->startoff = irec ? irec->br_startoff : 0;
+               __entry->startblock = irec ? irec->br_startblock : 0;
+               __entry->blockcount = irec ? irec->br_blockcount : 0;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count %zd flags %s "
+                 "startoff 0x%llx startblock %lld blockcount 0x%llx",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count,
+                 __print_flags(__entry->flags, "|", BMAPI_FLAGS),
+                 __entry->startoff,
+                 (__int64_t)__entry->startblock,
+                 __entry->blockcount)
 )
+
+#define DEFINE_IOMAP_EVENT(name)       \
+DEFINE_EVENT(xfs_iomap_class, name,    \
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
+                int flags, struct xfs_bmbt_irec *irec),                \
+       TP_ARGS(ip, offset, count, flags, irec))
 DEFINE_IOMAP_EVENT(xfs_iomap_enter);
 DEFINE_IOMAP_EVENT(xfs_iomap_found);
 DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
 
-#define DEFINE_SIMPLE_IO_EVENT(name) \
-TRACE_EVENT(name, \
-       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
-       TP_ARGS(ip, offset, count), \
-       TP_STRUCT__entry( \
-               __field(dev_t, dev) \
-               __field(xfs_ino_t, ino) \
-               __field(loff_t, size) \
-               __field(loff_t, new_size) \
-               __field(loff_t, offset) \
-               __field(size_t, count) \
-       ), \
-       TP_fast_assign( \
-               __entry->dev = VFS_I(ip)->i_sb->s_dev; \
-               __entry->ino = ip->i_ino; \
-               __entry->size = ip->i_d.di_size; \
-               __entry->new_size = ip->i_new_size; \
-               __entry->offset = offset; \
-               __entry->count = count; \
-       ), \
-       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
-                 "offset 0x%llx count %zd", \
-                 MAJOR(__entry->dev), MINOR(__entry->dev), \
-                 __entry->ino, \
-                 __entry->size, \
-                 __entry->new_size, \
-                 __entry->offset, \
-                 __entry->count) \
+DECLARE_EVENT_CLASS(xfs_simple_io_class,
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
+       TP_ARGS(ip, offset, count),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(loff_t, size)
+               __field(loff_t, new_size)
+               __field(loff_t, offset)
+               __field(size_t, count)
+       ),
+       TP_fast_assign(
+               __entry->dev = VFS_I(ip)->i_sb->s_dev;
+               __entry->ino = ip->i_ino;
+               __entry->size = ip->i_d.di_size;
+               __entry->new_size = ip->i_new_size;
+               __entry->offset = offset;
+               __entry->count = count;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
+                 "offset 0x%llx count %zd",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->new_size,
+                 __entry->offset,
+                 __entry->count)
 );
+
+#define DEFINE_SIMPLE_IO_EVENT(name)   \
+DEFINE_EVENT(xfs_simple_io_class, name,        \
+       TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),        \
+       TP_ARGS(ip, offset, count))
 DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
 DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
 
index 38e7641..2d8b7bc 100644 (file)
@@ -249,8 +249,10 @@ xfs_qm_hold_quotafs_ref(
 
        if (!xfs_Gqm) {
                xfs_Gqm = xfs_Gqm_init();
-               if (!xfs_Gqm)
+               if (!xfs_Gqm) {
+                       mutex_unlock(&xfs_Gqm_lock);
                        return ENOMEM;
+               }
        }
 
        /*
index 401f364..4917d4e 100644 (file)
@@ -227,7 +227,6 @@ typedef struct xfs_perag {
 
        atomic_t        pagf_fstrms;    /* # of filestreams active in this AG */
 
-       int             pag_ici_init;   /* incore inode cache initialised */
        rwlock_t        pag_ici_lock;   /* incore inode lock */
        struct radix_tree_root pag_ici_root;    /* incore inode cache root */
        int             pag_ici_reclaimable;    /* reclaimable inodes */
index 6845db9..75df75f 100644 (file)
@@ -382,9 +382,6 @@ xfs_iget(
 
        /* get the perag structure and ensure that it's inode capable */
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
-       if (!pag->pagi_inodeok)
-               return EINVAL;
-       ASSERT(pag->pag_ici_init);
        agino = XFS_INO_TO_AGINO(mp, ino);
 
 again:
@@ -744,30 +741,24 @@ xfs_ilock_demote(
 }
 
 #ifdef DEBUG
-/*
- * Debug-only routine, without additional rw_semaphore APIs, we can
- * now only answer requests regarding whether we hold the lock for write
- * (reader state is outside our visibility, we only track writer state).
- *
- * Note: this means !xfs_isilocked would give false positives, so don't do that.
- */
 int
 xfs_isilocked(
        xfs_inode_t             *ip,
        uint                    lock_flags)
 {
-       if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
-                       XFS_ILOCK_EXCL) {
-               if (!ip->i_lock.mr_writer)
-                       return 0;
+       if (lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) {
+               if (!(lock_flags & XFS_ILOCK_SHARED))
+                       return !!ip->i_lock.mr_writer;
+               return rwsem_is_locked(&ip->i_lock.mr_lock);
        }
 
-       if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
-                       XFS_IOLOCK_EXCL) {
-               if (!ip->i_iolock.mr_writer)
-                       return 0;
+       if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
+               if (!(lock_flags & XFS_IOLOCK_SHARED))
+                       return !!ip->i_iolock.mr_writer;
+               return rwsem_is_locked(&ip->i_iolock.mr_lock);
        }
 
-       return 1;
+       ASSERT(0);
+       return 0;
 }
 #endif
index 8cd6e8d..d53c39d 100644 (file)
@@ -1940,10 +1940,10 @@ xfs_ifree_cluster(
        int                     blks_per_cluster;
        int                     nbufs;
        int                     ninodes;
-       int                     i, j, found, pre_flushed;
+       int                     i, j;
        xfs_daddr_t             blkno;
        xfs_buf_t               *bp;
-       xfs_inode_t             *ip, **ip_found;
+       xfs_inode_t             *ip;
        xfs_inode_log_item_t    *iip;
        xfs_log_item_t          *lip;
        struct xfs_perag        *pag;
@@ -1960,114 +1960,97 @@ xfs_ifree_cluster(
                nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
        }
 
-       ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
-
        for (j = 0; j < nbufs; j++, inum += ninodes) {
+               int     found = 0;
+
                blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
                                         XFS_INO_TO_AGBNO(mp, inum));
 
+               /*
+                * We obtain and lock the backing buffer first in the process
+                * here, as we have to ensure that any dirty inode that we
+                * can't get the flush lock on is attached to the buffer.
+                * If we scan the in-memory inodes first, then buffer IO can
+                * complete before we get a lock on it, and hence we may fail
+                * to mark all the active inodes on the buffer stale.
+                */
+               bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
+                                       mp->m_bsize * blks_per_cluster,
+                                       XBF_LOCK);
+
+               /*
+                * Walk the inodes already attached to the buffer and mark them
+                * stale. These will all have the flush locks held, so an
+                * in-memory inode walk can't lock them.
+                */
+               lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+               while (lip) {
+                       if (lip->li_type == XFS_LI_INODE) {
+                               iip = (xfs_inode_log_item_t *)lip;
+                               ASSERT(iip->ili_logged == 1);
+                               lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
+                               xfs_trans_ail_copy_lsn(mp->m_ail,
+                                                       &iip->ili_flush_lsn,
+                                                       &iip->ili_item.li_lsn);
+                               xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
+                               found++;
+                       }
+                       lip = lip->li_bio_list;
+               }
 
                /*
-                * Look for each inode in memory and attempt to lock it,
-                * we can be racing with flush and tail pushing here.
-                * any inode we get the locks on, add to an array of
-                * inode items to process later.
+                * For each inode in memory attempt to add it to the inode
+                * buffer and set it up for being staled on buffer IO
+                * completion.  This is safe as we've locked out tail pushing
+                * and flushing by locking the buffer.
                 *
-                * The get the buffer lock, we could beat a flush
-                * or tail pushing thread to the lock here, in which
-                * case they will go looking for the inode buffer
-                * and fail, we need some other form of interlock
-                * here.
+                * We have already marked every inode that was part of a
+                * transaction stale above, which means there is no point in
+                * even trying to lock them.
                 */
-               found = 0;
                for (i = 0; i < ninodes; i++) {
                        read_lock(&pag->pag_ici_lock);
                        ip = radix_tree_lookup(&pag->pag_ici_root,
                                        XFS_INO_TO_AGINO(mp, (inum + i)));
 
-                       /* Inode not in memory or we found it already,
-                        * nothing to do
-                        */
+                       /* Inode not in memory or stale, nothing to do */
                        if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
                                read_unlock(&pag->pag_ici_lock);
                                continue;
                        }
 
-                       if (xfs_inode_clean(ip)) {
-                               read_unlock(&pag->pag_ici_lock);
-                               continue;
-                       }
-
-                       /* If we can get the locks then add it to the
-                        * list, otherwise by the time we get the bp lock
-                        * below it will already be attached to the
-                        * inode buffer.
-                        */
-
-                       /* This inode will already be locked - by us, lets
-                        * keep it that way.
-                        */
-
-                       if (ip == free_ip) {
-                               if (xfs_iflock_nowait(ip)) {
-                                       xfs_iflags_set(ip, XFS_ISTALE);
-                                       if (xfs_inode_clean(ip)) {
-                                               xfs_ifunlock(ip);
-                                       } else {
-                                               ip_found[found++] = ip;
-                                       }
-                               }
+                       /* don't try to lock/unlock the current inode */
+                       if (ip != free_ip &&
+                           !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
                                read_unlock(&pag->pag_ici_lock);
                                continue;
                        }
+                       read_unlock(&pag->pag_ici_lock);
 
-                       if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
-                               if (xfs_iflock_nowait(ip)) {
-                                       xfs_iflags_set(ip, XFS_ISTALE);
-
-                                       if (xfs_inode_clean(ip)) {
-                                               xfs_ifunlock(ip);
-                                               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                                       } else {
-                                               ip_found[found++] = ip;
-                                       }
-                               } else {
+                       if (!xfs_iflock_nowait(ip)) {
+                               if (ip != free_ip)
                                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                               }
+                               continue;
                        }
-                       read_unlock(&pag->pag_ici_lock);
-               }
 
-               bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 
-                                       mp->m_bsize * blks_per_cluster,
-                                       XBF_LOCK);
-
-               pre_flushed = 0;
-               lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
-               while (lip) {
-                       if (lip->li_type == XFS_LI_INODE) {
-                               iip = (xfs_inode_log_item_t *)lip;
-                               ASSERT(iip->ili_logged == 1);
-                               lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
-                               xfs_trans_ail_copy_lsn(mp->m_ail,
-                                                       &iip->ili_flush_lsn,
-                                                       &iip->ili_item.li_lsn);
-                               xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
-                               pre_flushed++;
+                       xfs_iflags_set(ip, XFS_ISTALE);
+                       if (xfs_inode_clean(ip)) {
+                               ASSERT(ip != free_ip);
+                               xfs_ifunlock(ip);
+                               xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                               continue;
                        }
-                       lip = lip->li_bio_list;
-               }
 
-               for (i = 0; i < found; i++) {
-                       ip = ip_found[i];
                        iip = ip->i_itemp;
-
                        if (!iip) {
+                               /* inode with unlogged changes only */
+                               ASSERT(ip != free_ip);
                                ip->i_update_core = 0;
                                xfs_ifunlock(ip);
                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                                continue;
                        }
+                       found++;
 
                        iip->ili_last_fields = iip->ili_format.ilf_fields;
                        iip->ili_format.ilf_fields = 0;
@@ -2078,17 +2061,16 @@ xfs_ifree_cluster(
                        xfs_buf_attach_iodone(bp,
                                (void(*)(xfs_buf_t*,xfs_log_item_t*))
                                xfs_istale_done, (xfs_log_item_t *)iip);
-                       if (ip != free_ip) {
+
+                       if (ip != free_ip)
                                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                       }
                }
 
-               if (found || pre_flushed)
+               if (found)
                        xfs_trans_stale_inode_buf(tp, bp);
                xfs_trans_binval(tp, bp);
        }
 
-       kmem_free(ip_found);
        xfs_perag_put(pag);
 }
 
@@ -2649,8 +2631,6 @@ xfs_iflush_cluster(
        int                     i;
 
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-       ASSERT(pag->pagi_inodeok);
-       ASSERT(pag->pag_ici_init);
 
        inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog;
        ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
index 14a69ae..ed0684c 100644 (file)
@@ -132,15 +132,10 @@ xlog_align(
        int             nbblks,
        xfs_buf_t       *bp)
 {
-       xfs_daddr_t     offset;
-       xfs_caddr_t     ptr;
+       xfs_daddr_t     offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
 
-       offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
-       ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
-
-       ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
-
-       return ptr;
+       ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
+       return XFS_BUF_PTR(bp) + BBTOB(offset);
 }
 
 
index d7bf38c..d59f4e8 100644 (file)
@@ -268,10 +268,10 @@ xfs_sb_validate_fsb_count(
 
 #if XFS_BIG_BLKNOS     /* Limited by ULONG_MAX of page cache index */
        if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
-               return E2BIG;
+               return EFBIG;
 #else                  /* Limited by UINT_MAX of sectors */
        if (nblocks << (sbp->sb_blocklog - BBSHIFT) > UINT_MAX)
-               return E2BIG;
+               return EFBIG;
 #endif
        return 0;
 }
@@ -393,7 +393,7 @@ xfs_mount_validate_sb(
            xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
                xfs_fs_mount_cmn_err(flags,
                        "file system too large to be mounted on this system.");
-               return XFS_ERROR(E2BIG);
+               return XFS_ERROR(EFBIG);
        }
 
        if (unlikely(sbp->sb_inprogress)) {
@@ -413,17 +413,6 @@ xfs_mount_validate_sb(
        return 0;
 }
 
-STATIC void
-xfs_initialize_perag_icache(
-       xfs_perag_t     *pag)
-{
-       if (!pag->pag_ici_init) {
-               rwlock_init(&pag->pag_ici_lock);
-               INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
-               pag->pag_ici_init = 1;
-       }
-}
-
 int
 xfs_initialize_perag(
        xfs_mount_t     *mp,
@@ -436,13 +425,8 @@ xfs_initialize_perag(
        xfs_agino_t     agino;
        xfs_ino_t       ino;
        xfs_sb_t        *sbp = &mp->m_sb;
-       xfs_ino_t       max_inum = XFS_MAXINUMBER_32;
        int             error = -ENOMEM;
 
-       /* Check to see if the filesystem can overflow 32 bit inodes */
-       agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
-       ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
-
        /*
         * Walk the current per-ag tree so we don't try to initialise AGs
         * that already exist (growfs case). Allocate and insert all the
@@ -456,11 +440,18 @@ xfs_initialize_perag(
                }
                if (!first_initialised)
                        first_initialised = index;
+
                pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
                if (!pag)
                        goto out_unwind;
+               pag->pag_agno = index;
+               pag->pag_mount = mp;
+               rwlock_init(&pag->pag_ici_lock);
+               INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+
                if (radix_tree_preload(GFP_NOFS))
                        goto out_unwind;
+
                spin_lock(&mp->m_perag_lock);
                if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
                        BUG();
@@ -469,25 +460,26 @@ xfs_initialize_perag(
                        error = -EEXIST;
                        goto out_unwind;
                }
-               pag->pag_agno = index;
-               pag->pag_mount = mp;
                spin_unlock(&mp->m_perag_lock);
                radix_tree_preload_end();
        }
 
-       /* Clear the mount flag if no inode can overflow 32 bits
-        * on this filesystem, or if specifically requested..
+       /*
+        * If we mount with the inode64 option, or no inode overflows
+        * the legacy 32-bit address space clear the inode32 option.
         */
-       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > max_inum) {
+       agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+       ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+
+       if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
                mp->m_flags |= XFS_MOUNT_32BITINODES;
-       } else {
+       else
                mp->m_flags &= ~XFS_MOUNT_32BITINODES;
-       }
 
-       /* If we can overflow then setup the ag headers accordingly */
        if (mp->m_flags & XFS_MOUNT_32BITINODES) {
-               /* Calculate how much should be reserved for inodes to
-                * meet the max inode percentage.
+               /*
+                * Calculate how much should be reserved for inodes to meet
+                * the max inode percentage.
                 */
                if (mp->m_maxicount) {
                        __uint64_t      icount;
@@ -500,30 +492,28 @@ xfs_initialize_perag(
                } else {
                        max_metadata = agcount;
                }
+
                for (index = 0; index < agcount; index++) {
                        ino = XFS_AGINO_TO_INO(mp, index, agino);
-                       if (ino > max_inum) {
+                       if (ino > XFS_MAXINUMBER_32) {
                                index++;
                                break;
                        }
 
-                       /* This ag is preferred for inodes */
                        pag = xfs_perag_get(mp, index);
                        pag->pagi_inodeok = 1;
                        if (index < max_metadata)
                                pag->pagf_metadata = 1;
-                       xfs_initialize_perag_icache(pag);
                        xfs_perag_put(pag);
                }
        } else {
-               /* Setup default behavior for smaller filesystems */
                for (index = 0; index < agcount; index++) {
                        pag = xfs_perag_get(mp, index);
                        pag->pagi_inodeok = 1;
-                       xfs_initialize_perag_icache(pag);
                        xfs_perag_put(pag);
                }
        }
+
        if (maxagi)
                *maxagi = index;
        return 0;
@@ -1009,7 +999,7 @@ xfs_check_sizes(xfs_mount_t *mp)
        d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
        if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
                cmn_err(CE_WARN, "XFS: size check 1 failed");
-               return XFS_ERROR(E2BIG);
+               return XFS_ERROR(EFBIG);
        }
        error = xfs_read_buf(mp, mp->m_ddev_targp,
                             d - XFS_FSS_TO_BB(mp, 1),
@@ -1019,7 +1009,7 @@ xfs_check_sizes(xfs_mount_t *mp)
        } else {
                cmn_err(CE_WARN, "XFS: size check 2 failed");
                if (error == ENOSPC)
-                       error = XFS_ERROR(E2BIG);
+                       error = XFS_ERROR(EFBIG);
                return error;
        }
 
@@ -1027,7 +1017,7 @@ xfs_check_sizes(xfs_mount_t *mp)
                d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
                if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
                        cmn_err(CE_WARN, "XFS: size check 3 failed");
-                       return XFS_ERROR(E2BIG);
+                       return XFS_ERROR(EFBIG);
                }
                error = xfs_read_buf(mp, mp->m_logdev_targp,
                                     d - XFS_FSB_TO_BB(mp, 1),
@@ -1037,7 +1027,7 @@ xfs_check_sizes(xfs_mount_t *mp)
                } else {
                        cmn_err(CE_WARN, "XFS: size check 3 failed");
                        if (error == ENOSPC)
-                               error = XFS_ERROR(E2BIG);
+                               error = XFS_ERROR(EFBIG);
                        return error;
                }
        }
@@ -1254,7 +1244,7 @@ xfs_mountfs(
         * Allocate and initialize the per-ag data.
         */
        spin_lock_init(&mp->m_perag_lock);
-       INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
+       INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
        error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
        if (error) {
                cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
index 6be05f7..1644551 100644 (file)
@@ -2247,7 +2247,7 @@ xfs_rtmount_init(
                cmn_err(CE_WARN, "XFS: realtime mount -- %llu != %llu",
                        (unsigned long long) XFS_BB_TO_FSB(mp, d),
                        (unsigned long long) mp->m_sb.sb_rblocks);
-               return XFS_ERROR(E2BIG);
+               return XFS_ERROR(EFBIG);
        }
        error = xfs_read_buf(mp, mp->m_rtdev_targp,
                                d - XFS_FSB_TO_BB(mp, 1),
@@ -2256,7 +2256,7 @@ xfs_rtmount_init(
                cmn_err(CE_WARN,
        "XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
                if (error == ENOSPC)
-                       return XFS_ERROR(E2BIG);
+                       return XFS_ERROR(EFBIG);
                return error;
        }
        xfs_buf_relse(bp);
index b2d67ad..ff614c2 100644 (file)
@@ -147,7 +147,16 @@ xfs_growfs_rt(
 # define xfs_rtfree_extent(t,b,l)                       (ENOSYS)
 # define xfs_rtpick_extent(m,t,l,rb)                    (ENOSYS)
 # define xfs_growfs_rt(mp,in)                           (ENOSYS)
-# define xfs_rtmount_init(m)    (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
+static inline int              /* error */
+xfs_rtmount_init(
+       xfs_mount_t     *mp)    /* file system mount structure */
+{
+       if (mp->m_sb.sb_rblocks == 0)
+               return 0;
+
+       cmn_err(CE_WARN, "XFS: Not built with CONFIG_XFS_RT");
+       return ENOSYS;
+}
 # define xfs_rtmount_inodes(m)  (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
 # define xfs_rtunmount_inodes(m)
 #endif /* CONFIG_XFS_RT */
index ce558ef..28547df 100644 (file)
 
 kmem_zone_t    *xfs_trans_zone;
 
+
 /*
- * Reservation functions here avoid a huge stack in xfs_trans_init
- * due to register overflow from temporaries in the calculations.
+ * Various log reservation values.
+ *
+ * These are based on the size of the file system block because that is what
+ * most transactions manipulate.  Each adds in an additional 128 bytes per
+ * item logged to try to account for the overhead of the transaction mechanism.
+ *
+ * Note:  Most of the reservations underestimate the number of allocation
+ * groups into which they could free extents in the xfs_bmap_finish() call.
+ * This is because the number in the worst case is quite high and quite
+ * unusual.  In order to fix this we need to change xfs_bmap_finish() to free
+ * extents in only a single AG at a time.  This will require changes to the
+ * EFI code as well, however, so that the EFI for the extents not freed is
+ * logged again in each transaction.  See SGI PV #261917.
+ *
+ * Reservation functions here avoid a huge stack in xfs_trans_init due to
+ * register overflow from temporaries in the calculations.
+ */
+
+
+/*
+ * In a write transaction we can allocate a maximum of 2
+ * extents.  This gives:
+ *    the inode getting the new extents: inode size
+ *    the inode's bmap btree: max depth * block size
+ *    the agfs of the ags from which the extents are allocated: 2 * sector
+ *    the superblock free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join:
+ *    the agfs of the ags containing the blocks: 2 * sector size
+ *    the agfls of the ags containing the blocks: 2 * sector size
+ *    the super block free block counter: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
  */
 STATIC uint
-xfs_calc_write_reservation(xfs_mount_t *mp)
+xfs_calc_write_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_WRITE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+                    2 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                    128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+                           XFS_ALLOCFREE_LOG_COUNT(mp, 2))),
+                   (2 * mp->m_sb.sb_sectsize +
+                    2 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                    128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
 }
 
+/*
+ * In truncating a file we free up to two extents at once.  We can modify:
+ *    the inode being truncated: inode size
+ *    the inode's bmap btree: (max depth + 1) * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *             4 exts * 2 trees * (2 * max depth - 1) * block size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_itruncate_reservation(xfs_mount_t *mp)
+xfs_calc_itruncate_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_ITRUNCATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) +
+                    128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+                   (4 * mp->m_sb.sb_sectsize +
+                    4 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 4) +
+                    128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4)) +
+                    128 * 5 +
+                    XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                           XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
 }
 
+/*
+ * In renaming a files we can modify:
+ *    the four inodes involved: 4 * inode size
+ *    the two directory btrees: 2 * (max depth + v2) * dir block size
+ *    the two directory bmap btrees: 2 * max depth * block size
+ * And the bmap_finish transaction can free dir and bmap blocks (two sets
+ *     of bmap blocks) giving:
+ *    the agf for the ags in which the blocks live: 3 * sector size
+ *    the agfl for the ags in which the blocks live: 3 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_rename_reservation(xfs_mount_t *mp)
+xfs_calc_rename_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_RENAME_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((4 * mp->m_sb.sb_inodesize +
+                    2 * XFS_DIROP_LOG_RES(mp) +
+                    128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp))),
+                   (3 * mp->m_sb.sb_sectsize +
+                    3 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 3) +
+                    128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))));
 }
 
+/*
+ * For creating a link to an inode:
+ *    the parent directory inode: inode size
+ *    the linked inode: inode size
+ *    the directory btree could split: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free some bmap blocks giving:
+ *    the agf for the ag in which the blocks live: sector size
+ *    the agfl for the ag in which the blocks live: sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_link_reservation(xfs_mount_t *mp)
+xfs_calc_link_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_LINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_inodesize +
+                    XFS_DIROP_LOG_RES(mp) +
+                    128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+                   (mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
 }
 
+/*
+ * For removing a directory entry we can modify:
+ *    the parent directory inode: inode size
+ *    the removed inode: inode size
+ *    the directory btree could join: (max depth + v2) * dir block size
+ *    the directory bmap btree could join or split: (max depth + v2) * blocksize
+ * And the bmap_finish transaction can free the dir and bmap blocks giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_remove_reservation(xfs_mount_t *mp)
+xfs_calc_remove_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_REMOVE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_inodesize +
+                    XFS_DIROP_LOG_RES(mp) +
+                    128 * (2 + XFS_DIROP_LOG_COUNT(mp))),
+                   (2 * mp->m_sb.sb_sectsize +
+                    2 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                    128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
 }
 
+/*
+ * For symlink we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: 1 block
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode's bmap btree: (max depth + v2) * block size
+ *    the blocks for the symlink: 1 kB
+ * Or in the first xact we allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_symlink_reservation(xfs_mount_t *mp)
+xfs_calc_symlink_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_SYMLINK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_inodesize +
+                    XFS_FSB_TO_B(mp, 1) +
+                    XFS_DIROP_LOG_RES(mp) +
+                    1024 +
+                    128 * (4 + XFS_DIROP_LOG_COUNT(mp))),
+                   (2 * mp->m_sb.sb_sectsize +
+                    XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+                    XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+                    XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                           XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
 }
 
+/*
+ * For create we can modify:
+ *    the parent directory inode: inode size
+ *    the new inode: inode size
+ *    the inode btree entry: block size
+ *    the superblock for the nlink flag: sector size
+ *    the directory btree: (max depth + v2) * dir block size
+ *    the directory inode's bmap btree: (max depth + v2) * block size
+ * Or in the first xact we allocate some inodes giving:
+ *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
+ *    the superblock for the nlink flag: sector size
+ *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_create_reservation(xfs_mount_t *mp)
+xfs_calc_create_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_CREATE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_inodesize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_FSB_TO_B(mp, 1) +
+                    XFS_DIROP_LOG_RES(mp) +
+                    128 * (3 + XFS_DIROP_LOG_COUNT(mp))),
+                   (3 * mp->m_sb.sb_sectsize +
+                    XFS_FSB_TO_B(mp, XFS_IALLOC_BLOCKS(mp)) +
+                    XFS_FSB_TO_B(mp, mp->m_in_maxlevels) +
+                    XFS_ALLOCFREE_LOG_RES(mp, 1) +
+                    128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                           XFS_ALLOCFREE_LOG_COUNT(mp, 1))));
 }
 
+/*
+ * Making a new directory is the same as creating a new file.
+ */
 STATIC uint
-xfs_calc_mkdir_reservation(xfs_mount_t *mp)
+xfs_calc_mkdir_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_MKDIR_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return xfs_calc_create_reservation(mp);
 }
 
+/*
+ * In freeing an inode we can modify:
+ *    the inode being freed: inode size
+ *    the super block free inode counter: sector size
+ *    the agi hash list and counters: sector size
+ *    the inode btree entry: block size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ *    the inode btree: max depth * blocksize
+ *    the allocation btrees: 2 trees * (max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_ifree_reservation(xfs_mount_t *mp)
+xfs_calc_ifree_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_IFREE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_sectsize +
+               mp->m_sb.sb_sectsize +
+               XFS_FSB_TO_B(mp, 1) +
+               MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
+                   XFS_INODE_CLUSTER_SIZE(mp)) +
+               128 * 5 +
+               XFS_ALLOCFREE_LOG_RES(mp, 1) +
+               128 * (2 + XFS_IALLOC_BLOCKS(mp) + mp->m_in_maxlevels +
+                      XFS_ALLOCFREE_LOG_COUNT(mp, 1));
 }
 
+/*
+ * When only changing the inode we log the inode and possibly the superblock
+ * We also add a bit of slop for the transaction stuff.
+ */
 STATIC uint
-xfs_calc_ichange_reservation(xfs_mount_t *mp)
+xfs_calc_ichange_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_ICHANGE_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_sectsize +
+               512;
+
 }
 
+/*
+ * Growing the data section of the filesystem.
+ *     superblock
+ *     agi and agf
+ *     allocation btrees
+ */
 STATIC uint
-xfs_calc_growdata_reservation(xfs_mount_t *mp)
+xfs_calc_growdata_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_GROWDATA_LOG_RES(mp);
+       return mp->m_sb.sb_sectsize * 3 +
+               XFS_ALLOCFREE_LOG_RES(mp, 1) +
+               128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1));
 }
 
+/*
+ * Growing the rt section of the filesystem.
+ * In the first set of transactions (ALLOC) we allocate space to the
+ * bitmap or summary files.
+ *     superblock: sector size
+ *     agf of the ag from which the extent is allocated: sector size
+ *     bmap btree for bitmap/summary inode: max depth * blocksize
+ *     bitmap/summary inode: inode size
+ *     allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
+ */
 STATIC uint
-xfs_calc_growrtalloc_reservation(xfs_mount_t *mp)
+xfs_calc_growrtalloc_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_GROWRTALLOC_LOG_RES(mp);
+       return 2 * mp->m_sb.sb_sectsize +
+               XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) +
+               mp->m_sb.sb_inodesize +
+               XFS_ALLOCFREE_LOG_RES(mp, 1) +
+               128 * (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) +
+                      XFS_ALLOCFREE_LOG_COUNT(mp, 1));
 }
 
+/*
+ * Growing the rt section of the filesystem.
+ * In the second set of transactions (ZERO) we zero the new metadata blocks.
+ *     one bitmap/summary block: blocksize
+ */
 STATIC uint
-xfs_calc_growrtzero_reservation(xfs_mount_t *mp)
+xfs_calc_growrtzero_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_GROWRTZERO_LOG_RES(mp);
+       return mp->m_sb.sb_blocksize + 128;
 }
 
+/*
+ * Growing the rt section of the filesystem.
+ * In the third set of transactions (FREE) we update metadata without
+ * allocating any new blocks.
+ *     superblock: sector size
+ *     bitmap inode: inode size
+ *     summary inode: inode size
+ *     one bitmap block: blocksize
+ *     summary blocks: new summary size
+ */
 STATIC uint
-xfs_calc_growrtfree_reservation(xfs_mount_t *mp)
+xfs_calc_growrtfree_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_GROWRTFREE_LOG_RES(mp);
+       return mp->m_sb.sb_sectsize +
+               2 * mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_blocksize +
+               mp->m_rsumsize +
+               128 * 5;
 }
 
+/*
+ * Logging the inode modification timestamp on a synchronous write.
+ *     inode
+ */
 STATIC uint
-xfs_calc_swrite_reservation(xfs_mount_t *mp)
+xfs_calc_swrite_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_SWRITE_LOG_RES(mp);
+       return mp->m_sb.sb_inodesize + 128;
 }
 
+/*
+ * Logging the inode mode bits when writing a setuid/setgid file
+ *     inode
+ */
 STATIC uint
 xfs_calc_writeid_reservation(xfs_mount_t *mp)
 {
-       return XFS_CALC_WRITEID_LOG_RES(mp);
+       return mp->m_sb.sb_inodesize + 128;
 }
 
+/*
+ * Converting the inode from non-attributed to attributed.
+ *     the inode being converted: inode size
+ *     agf block and superblock (for block allocation)
+ *     the new block (directory sized)
+ *     bmap blocks for the new directory block
+ *     allocation btrees
+ */
 STATIC uint
-xfs_calc_addafork_reservation(xfs_mount_t *mp)
+xfs_calc_addafork_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_ADDAFORK_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_sectsize * 2 +
+               mp->m_dirblksize +
+               XFS_FSB_TO_B(mp, XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) +
+               XFS_ALLOCFREE_LOG_RES(mp, 1) +
+               128 * (4 + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1 +
+                      XFS_ALLOCFREE_LOG_COUNT(mp, 1));
 }
 
+/*
+ * Removing the attribute fork of a file
+ *    the inode being truncated: inode size
+ *    the inode's bmap btree: max depth * block size
+ * And the bmap_finish transaction can free the blocks and bmap blocks:
+ *    the agf for each of the ags: 4 * sector size
+ *    the agfl for each of the ags: 4 * sector size
+ *    the super block to reflect the freed blocks: sector size
+ *    worst case split in allocation btrees per extent assuming 4 extents:
+ *             4 exts * 2 trees * (2 * max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_attrinval_reservation(xfs_mount_t *mp)
+xfs_calc_attrinval_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_ATTRINVAL_LOG_RES(mp);
+       return MAX((mp->m_sb.sb_inodesize +
+                   XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+                   128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))),
+                  (4 * mp->m_sb.sb_sectsize +
+                   4 * mp->m_sb.sb_sectsize +
+                   mp->m_sb.sb_sectsize +
+                   XFS_ALLOCFREE_LOG_RES(mp, 4) +
+                   128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))));
 }
 
+/*
+ * Setting an attribute.
+ *     the inode getting the attribute
+ *     the superblock for allocations
+ *     the agfs extents are allocated from
+ *     the attribute btree * max depth
+ *     the inode allocation btree
+ * Since attribute transaction space is dependent on the size of the attribute,
+ * the calculation is done partially at mount time and partially at runtime.
+ */
 STATIC uint
-xfs_calc_attrset_reservation(xfs_mount_t *mp)
+xfs_calc_attrset_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_ATTRSET_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               mp->m_sb.sb_inodesize +
+               mp->m_sb.sb_sectsize +
+               XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+               128 * (2 + XFS_DA_NODE_MAXDEPTH);
 }
 
+/*
+ * Removing an attribute.
+ *    the inode: inode size
+ *    the attribute btree could join: max depth * block size
+ *    the inode bmap btree could join or split: max depth * block size
+ * And the bmap_finish transaction can free the attr blocks freed giving:
+ *    the agf for the ag in which the blocks live: 2 * sector size
+ *    the agfl for the ag in which the blocks live: 2 * sector size
+ *    the superblock for the free block count: sector size
+ *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ */
 STATIC uint
-xfs_calc_attrrm_reservation(xfs_mount_t *mp)
+xfs_calc_attrrm_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_ATTRRM_LOG_RES(mp) + XFS_DQUOT_LOGRES(mp);
+       return XFS_DQUOT_LOGRES(mp) +
+               MAX((mp->m_sb.sb_inodesize +
+                    XFS_FSB_TO_B(mp, XFS_DA_NODE_MAXDEPTH) +
+                    XFS_FSB_TO_B(mp, XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
+                    128 * (1 + XFS_DA_NODE_MAXDEPTH +
+                           XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK))),
+                   (2 * mp->m_sb.sb_sectsize +
+                    2 * mp->m_sb.sb_sectsize +
+                    mp->m_sb.sb_sectsize +
+                    XFS_ALLOCFREE_LOG_RES(mp, 2) +
+                    128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))));
 }
 
+/*
+ * Clearing a bad agino number in an agi hash bucket.
+ */
 STATIC uint
-xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
+xfs_calc_clear_agi_bucket_reservation(
+       struct xfs_mount        *mp)
 {
-       return XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp);
+       return mp->m_sb.sb_sectsize + 128;
 }
 
 /*
@@ -184,11 +539,10 @@ xfs_calc_clear_agi_bucket_reservation(xfs_mount_t *mp)
  */
 void
 xfs_trans_init(
-       xfs_mount_t     *mp)
+       struct xfs_mount        *mp)
 {
-       xfs_trans_reservations_t        *resp;
+       struct xfs_trans_reservations *resp = &mp->m_reservations;
 
-       resp = &(mp->m_reservations);
        resp->tr_write = xfs_calc_write_reservation(mp);
        resp->tr_itruncate = xfs_calc_itruncate_reservation(mp);
        resp->tr_rename = xfs_calc_rename_reservation(mp);
index 8c69e78..e639e8e 100644 (file)
@@ -299,24 +299,6 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
 #define        XFS_TRANS_SB_REXTSLOG           0x00002000
 
 
-/*
- * Various log reservation values.
- * These are based on the size of the file system block
- * because that is what most transactions manipulate.
- * Each adds in an additional 128 bytes per item logged to
- * try to account for the overhead of the transaction mechanism.
- *
- * Note:
- * Most of the reservations underestimate the number of allocation
- * groups into which they could free extents in the xfs_bmap_finish()
- * call.  This is because the number in the worst case is quite high
- * and quite unusual.  In order to fix this we need to change
- * xfs_bmap_finish() to free extents in only a single AG at a time.
- * This will require changes to the EFI code as well, however, so that
- * the EFI for the extents not freed is logged again in each transaction.
- * See bug 261917.
- */
-
 /*
  * Per-extent log reservation for the allocation btree changes
  * involved in freeing or allocating an extent.
@@ -341,429 +323,36 @@ xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp)
        (XFS_DAENTER_BLOCKS(mp, XFS_DATA_FORK) + \
         XFS_DAENTER_BMAPS(mp, XFS_DATA_FORK) + 1)
 
-/*
- * In a write transaction we can allocate a maximum of 2
- * extents.  This gives:
- *    the inode getting the new extents: inode size
- *    the inode's bmap btree: max depth * block size
- *    the agfs of the ags from which the extents are allocated: 2 * sector
- *    the superblock free block counter: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
- *    the agfs of the ags containing the blocks: 2 * sector size
- *    the agfls of the ags containing the blocks: 2 * sector size
- *    the super block free block counter: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define XFS_CALC_WRITE_LOG_RES(mp) \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
-         (2 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
-         (128 * (4 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + XFS_ALLOCFREE_LOG_COUNT(mp, 2)))),\
-        ((2 * (mp)->m_sb.sb_sectsize) + \
-         (2 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
-         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
 
 #define        XFS_WRITE_LOG_RES(mp)   ((mp)->m_reservations.tr_write)
-
-/*
- * In truncating a file we free up to two extents at once.  We can modify:
- *    the inode being truncated: inode size
- *    the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- *    the agf for each of the ags: 4 * sector size
- *    the agfl for each of the ags: 4 * sector size
- *    the super block to reflect the freed blocks: sector size
- *    worst case split in allocation btrees per extent assuming 4 extents:
- *             4 exts * 2 trees * (2 * max depth - 1) * block size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define        XFS_CALC_ITRUNCATE_LOG_RES(mp) \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1) + \
-         (128 * (2 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
-        ((4 * (mp)->m_sb.sb_sectsize) + \
-         (4 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 4) + \
-         (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))) + \
-         (128 * 5) + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-          (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
-           XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
 #define        XFS_ITRUNCATE_LOG_RES(mp)   ((mp)->m_reservations.tr_itruncate)
-
-/*
- * In renaming a files we can modify:
- *    the four inodes involved: 4 * inode size
- *    the two directory btrees: 2 * (max depth + v2) * dir block size
- *    the two directory bmap btrees: 2 * max depth * block size
- * And the bmap_finish transaction can free dir and bmap blocks (two sets
- *     of bmap blocks) giving:
- *    the agf for the ags in which the blocks live: 3 * sector size
- *    the agfl for the ags in which the blocks live: 3 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_RENAME_LOG_RES(mp) \
-       (MAX( \
-        ((4 * (mp)->m_sb.sb_inodesize) + \
-         (2 * XFS_DIROP_LOG_RES(mp)) + \
-         (128 * (4 + 2 * XFS_DIROP_LOG_COUNT(mp)))), \
-        ((3 * (mp)->m_sb.sb_sectsize) + \
-         (3 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 3) + \
-         (128 * (7 + XFS_ALLOCFREE_LOG_COUNT(mp, 3))))))
-
 #define        XFS_RENAME_LOG_RES(mp)  ((mp)->m_reservations.tr_rename)
-
-/*
- * For creating a link to an inode:
- *    the parent directory inode: inode size
- *    the linked inode: inode size
- *    the directory btree could split: (max depth + v2) * dir block size
- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free some bmap blocks giving:
- *    the agf for the ag in which the blocks live: sector size
- *    the agfl for the ag in which the blocks live: sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_LINK_LOG_RES(mp) \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_inodesize + \
-         XFS_DIROP_LOG_RES(mp) + \
-         (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
-        ((mp)->m_sb.sb_sectsize + \
-         (mp)->m_sb.sb_sectsize + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-         (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
 #define        XFS_LINK_LOG_RES(mp)    ((mp)->m_reservations.tr_link)
-
-/*
- * For removing a directory entry we can modify:
- *    the parent directory inode: inode size
- *    the removed inode: inode size
- *    the directory btree could join: (max depth + v2) * dir block size
- *    the directory bmap btree could join or split: (max depth + v2) * blocksize
- * And the bmap_finish transaction can free the dir and bmap blocks giving:
- *    the agf for the ag in which the blocks live: 2 * sector size
- *    the agfl for the ag in which the blocks live: 2 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_REMOVE_LOG_RES(mp)     \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_inodesize + \
-         XFS_DIROP_LOG_RES(mp) + \
-         (128 * (2 + XFS_DIROP_LOG_COUNT(mp)))), \
-        ((2 * (mp)->m_sb.sb_sectsize) + \
-         (2 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
-         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
 #define        XFS_REMOVE_LOG_RES(mp)  ((mp)->m_reservations.tr_remove)
-
-/*
- * For symlink we can modify:
- *    the parent directory inode: inode size
- *    the new inode: inode size
- *    the inode btree entry: 1 block
- *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode's bmap btree: (max depth + v2) * block size
- *    the blocks for the symlink: 1 kB
- * Or in the first xact we allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_SYMLINK_LOG_RES(mp)            \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B(mp, 1) + \
-         XFS_DIROP_LOG_RES(mp) + \
-         1024 + \
-         (128 * (4 + XFS_DIROP_LOG_COUNT(mp)))), \
-        (2 * (mp)->m_sb.sb_sectsize + \
-         XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
-         XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-         (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
-          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
 #define        XFS_SYMLINK_LOG_RES(mp) ((mp)->m_reservations.tr_symlink)
-
-/*
- * For create we can modify:
- *    the parent directory inode: inode size
- *    the new inode: inode size
- *    the inode btree entry: block size
- *    the superblock for the nlink flag: sector size
- *    the directory btree: (max depth + v2) * dir block size
- *    the directory inode's bmap btree: (max depth + v2) * block size
- * Or in the first xact we allocate some inodes giving:
- *    the agi and agf of the ag getting the new inodes: 2 * sectorsize
- *    the superblock for the nlink flag: sector size
- *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define        XFS_CALC_CREATE_LOG_RES(mp)             \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_inodesize + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_FSB_TO_B(mp, 1) + \
-         XFS_DIROP_LOG_RES(mp) + \
-         (128 * (3 + XFS_DIROP_LOG_COUNT(mp)))), \
-        (3 * (mp)->m_sb.sb_sectsize + \
-         XFS_FSB_TO_B((mp), XFS_IALLOC_BLOCKS((mp))) + \
-         XFS_FSB_TO_B((mp), (mp)->m_in_maxlevels) + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-         (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
-          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))))
-
 #define        XFS_CREATE_LOG_RES(mp)  ((mp)->m_reservations.tr_create)
-
-/*
- * Making a new directory is the same as creating a new file.
- */
-#define        XFS_CALC_MKDIR_LOG_RES(mp)      XFS_CALC_CREATE_LOG_RES(mp)
-
 #define        XFS_MKDIR_LOG_RES(mp)   ((mp)->m_reservations.tr_mkdir)
-
-/*
- * In freeing an inode we can modify:
- *    the inode being freed: inode size
- *    the super block free inode counter: sector size
- *    the agi hash list and counters: sector size
- *    the inode btree entry: block size
- *    the on disk inode before ours in the agi hash list: inode cluster size
- *    the inode btree: max depth * blocksize
- *    the allocation btrees: 2 trees * (max depth - 1) * block size
- */
-#define        XFS_CALC_IFREE_LOG_RES(mp) \
-       ((mp)->m_sb.sb_inodesize + \
-        (mp)->m_sb.sb_sectsize + \
-        (mp)->m_sb.sb_sectsize + \
-        XFS_FSB_TO_B((mp), 1) + \
-        MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
-        (128 * 5) + \
-         XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-         (128 * (2 + XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels + \
-          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
-
 #define        XFS_IFREE_LOG_RES(mp)   ((mp)->m_reservations.tr_ifree)
-
-/*
- * When only changing the inode we log the inode and possibly the superblock
- * We also add a bit of slop for the transaction stuff.
- */
-#define        XFS_CALC_ICHANGE_LOG_RES(mp)    ((mp)->m_sb.sb_inodesize + \
-                                        (mp)->m_sb.sb_sectsize + 512)
-
 #define        XFS_ICHANGE_LOG_RES(mp) ((mp)->m_reservations.tr_ichange)
-
-/*
- * Growing the data section of the filesystem.
- *     superblock
- *     agi and agf
- *     allocation btrees
- */
-#define        XFS_CALC_GROWDATA_LOG_RES(mp) \
-       ((mp)->m_sb.sb_sectsize * 3 + \
-        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-        (128 * (3 + XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
 #define        XFS_GROWDATA_LOG_RES(mp)    ((mp)->m_reservations.tr_growdata)
-
-/*
- * Growing the rt section of the filesystem.
- * In the first set of transactions (ALLOC) we allocate space to the
- * bitmap or summary files.
- *     superblock: sector size
- *     agf of the ag from which the extent is allocated: sector size
- *     bmap btree for bitmap/summary inode: max depth * blocksize
- *     bitmap/summary inode: inode size
- *     allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize
- */
-#define        XFS_CALC_GROWRTALLOC_LOG_RES(mp) \
-       (2 * (mp)->m_sb.sb_sectsize + \
-        XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)) + \
-        (mp)->m_sb.sb_inodesize + \
-        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-        (128 * \
-         (3 + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + \
-          XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
 #define        XFS_GROWRTALLOC_LOG_RES(mp)     ((mp)->m_reservations.tr_growrtalloc)
-
-/*
- * Growing the rt section of the filesystem.
- * In the second set of transactions (ZERO) we zero the new metadata blocks.
- *     one bitmap/summary block: blocksize
- */
-#define        XFS_CALC_GROWRTZERO_LOG_RES(mp) \
-       ((mp)->m_sb.sb_blocksize + 128)
-
 #define        XFS_GROWRTZERO_LOG_RES(mp)      ((mp)->m_reservations.tr_growrtzero)
-
-/*
- * Growing the rt section of the filesystem.
- * In the third set of transactions (FREE) we update metadata without
- * allocating any new blocks.
- *     superblock: sector size
- *     bitmap inode: inode size
- *     summary inode: inode size
- *     one bitmap block: blocksize
- *     summary blocks: new summary size
- */
-#define        XFS_CALC_GROWRTFREE_LOG_RES(mp) \
-       ((mp)->m_sb.sb_sectsize + \
-        2 * (mp)->m_sb.sb_inodesize + \
-        (mp)->m_sb.sb_blocksize + \
-        (mp)->m_rsumsize + \
-        (128 * 5))
-
 #define        XFS_GROWRTFREE_LOG_RES(mp)      ((mp)->m_reservations.tr_growrtfree)
-
-/*
- * Logging the inode modification timestamp on a synchronous write.
- *     inode
- */
-#define        XFS_CALC_SWRITE_LOG_RES(mp) \
-       ((mp)->m_sb.sb_inodesize + 128)
-
 #define        XFS_SWRITE_LOG_RES(mp)  ((mp)->m_reservations.tr_swrite)
-
 /*
  * Logging the inode timestamps on an fsync -- same as SWRITE
  * as long as SWRITE logs the entire inode core
  */
 #define XFS_FSYNC_TS_LOG_RES(mp)        ((mp)->m_reservations.tr_swrite)
-
-/*
- * Logging the inode mode bits when writing a setuid/setgid file
- *     inode
- */
-#define        XFS_CALC_WRITEID_LOG_RES(mp) \
-       ((mp)->m_sb.sb_inodesize + 128)
-
 #define        XFS_WRITEID_LOG_RES(mp) ((mp)->m_reservations.tr_swrite)
-
-/*
- * Converting the inode from non-attributed to attributed.
- *     the inode being converted: inode size
- *     agf block and superblock (for block allocation)
- *     the new block (directory sized)
- *     bmap blocks for the new directory block
- *     allocation btrees
- */
-#define        XFS_CALC_ADDAFORK_LOG_RES(mp)   \
-       ((mp)->m_sb.sb_inodesize + \
-        (mp)->m_sb.sb_sectsize * 2 + \
-        (mp)->m_dirblksize + \
-        XFS_FSB_TO_B(mp, (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1)) + \
-        XFS_ALLOCFREE_LOG_RES(mp, 1) + \
-        (128 * (4 + (XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1) + \
-                XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
-
 #define        XFS_ADDAFORK_LOG_RES(mp)        ((mp)->m_reservations.tr_addafork)
-
-/*
- * Removing the attribute fork of a file
- *    the inode being truncated: inode size
- *    the inode's bmap btree: max depth * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
- *    the agf for each of the ags: 4 * sector size
- *    the agfl for each of the ags: 4 * sector size
- *    the super block to reflect the freed blocks: sector size
- *    worst case split in allocation btrees per extent assuming 4 extents:
- *             4 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_ATTRINVAL_LOG_RES(mp)  \
-       (MAX( \
-        ((mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
-         (128 * (1 + XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))), \
-        ((4 * (mp)->m_sb.sb_sectsize) + \
-         (4 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 4) + \
-         (128 * (9 + XFS_ALLOCFREE_LOG_COUNT(mp, 4))))))
-
 #define        XFS_ATTRINVAL_LOG_RES(mp)       ((mp)->m_reservations.tr_attrinval)
-
-/*
- * Setting an attribute.
- *     the inode getting the attribute
- *     the superblock for allocations
- *     the agfs extents are allocated from
- *     the attribute btree * max depth
- *     the inode allocation btree
- * Since attribute transaction space is dependent on the size of the attribute,
- * the calculation is done partially at mount time and partially at runtime.
- */
-#define        XFS_CALC_ATTRSET_LOG_RES(mp)    \
-       ((mp)->m_sb.sb_inodesize + \
-        (mp)->m_sb.sb_sectsize + \
-         XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
-         (128 * (2 + XFS_DA_NODE_MAXDEPTH)))
-
 #define        XFS_ATTRSET_LOG_RES(mp, ext)    \
        ((mp)->m_reservations.tr_attrset + \
         (ext * (mp)->m_sb.sb_sectsize) + \
         (ext * XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK))) + \
         (128 * (ext + (ext * XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)))))
-
-/*
- * Removing an attribute.
- *    the inode: inode size
- *    the attribute btree could join: max depth * block size
- *    the inode bmap btree could join or split: max depth * block size
- * And the bmap_finish transaction can free the attr blocks freed giving:
- *    the agf for the ag in which the blocks live: 2 * sector size
- *    the agfl for the ag in which the blocks live: 2 * sector size
- *    the superblock for the free block count: sector size
- *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- */
-#define        XFS_CALC_ATTRRM_LOG_RES(mp)     \
-       (MAX( \
-         ((mp)->m_sb.sb_inodesize + \
-         XFS_FSB_TO_B((mp), XFS_DA_NODE_MAXDEPTH) + \
-         XFS_FSB_TO_B((mp), XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + \
-         (128 * (1 + XFS_DA_NODE_MAXDEPTH + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK)))), \
-        ((2 * (mp)->m_sb.sb_sectsize) + \
-         (2 * (mp)->m_sb.sb_sectsize) + \
-         (mp)->m_sb.sb_sectsize + \
-         XFS_ALLOCFREE_LOG_RES(mp, 2) + \
-         (128 * (5 + XFS_ALLOCFREE_LOG_COUNT(mp, 2))))))
-
 #define        XFS_ATTRRM_LOG_RES(mp)  ((mp)->m_reservations.tr_attrrm)
-
-/*
- * Clearing a bad agino number in an agi hash bucket.
- */
-#define        XFS_CALC_CLEAR_AGI_BUCKET_LOG_RES(mp) \
-       ((mp)->m_sb.sb_sectsize + 128)
-
 #define        XFS_CLEAR_AGI_BUCKET_LOG_RES(mp)  ((mp)->m_reservations.tr_clearagi)
 
 
index 9d376be..a06bd62 100644 (file)
@@ -267,7 +267,7 @@ xfs_setattr(
                if (code) {
                        ASSERT(tp == NULL);
                        lock_flags &= ~XFS_ILOCK_EXCL;
-                       ASSERT(lock_flags == XFS_IOLOCK_EXCL);
+                       ASSERT(lock_flags == XFS_IOLOCK_EXCL || !need_iolock);
                        goto error_return;
                }
                tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);