Merge branch 'master' into for-linus
authorAlex Elder <aelder@sgi.com>
Wed, 19 May 2010 14:59:23 +0000 (09:59 -0500)
committerAlex Elder <aelder@sgi.com>
Wed, 19 May 2010 14:59:23 +0000 (09:59 -0500)
39 files changed:
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_buf.c
fs/xfs/linux-2.6/xfs_buf.h
fs/xfs/linux-2.6/xfs_file.c
fs/xfs/linux-2.6/xfs_ioctl.c
fs/xfs/linux-2.6/xfs_ioctl32.c
fs/xfs/linux-2.6/xfs_iops.c
fs/xfs/linux-2.6/xfs_super.c
fs/xfs/linux-2.6/xfs_sync.c
fs/xfs/linux-2.6/xfs_trace.c
fs/xfs/linux-2.6/xfs_trace.h
fs/xfs/quota/xfs_dquot.c
fs/xfs/quota/xfs_dquot.h
fs/xfs/quota/xfs_dquot_item.c
fs/xfs/quota/xfs_qm.c
fs/xfs/quota/xfs_qm.h
fs/xfs/quota/xfs_qm_stats.c
fs/xfs/quota/xfs_qm_syscalls.c
fs/xfs/quota/xfs_quota_priv.h
fs/xfs/quota/xfs_trans_dquot.c
fs/xfs/xfs_bmap.c
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_buf_item.h
fs/xfs/xfs_error.c
fs/xfs/xfs_error.h
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_iomap.c
fs/xfs/xfs_iomap.h
fs/xfs/xfs_log.c
fs/xfs/xfs_log.h
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_quota.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_buf.c

index 0f8b996..089eaca 100644 (file)
 #include <linux/pagevec.h>
 #include <linux/writeback.h>
 
+/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+       IO_READ,        /* mapping for a read */
+       IO_DELAY,       /* mapping covers delalloc region */
+       IO_UNWRITTEN,   /* mapping covers allocated but uninitialized data */
+       IO_NEW          /* just allocated */
+};
 
 /*
  * Prime number of hash buckets since address is used as the key.
@@ -103,8 +112,9 @@ xfs_count_page_state(
 
 STATIC struct block_device *
 xfs_find_bdev_for_inode(
-       struct xfs_inode        *ip)
+       struct inode            *inode)
 {
+       struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
 
        if (XFS_IS_REALTIME_INODE(ip))
@@ -183,7 +193,7 @@ xfs_setfilesize(
        xfs_fsize_t             isize;
 
        ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
-       ASSERT(ioend->io_type != IOMAP_READ);
+       ASSERT(ioend->io_type != IO_READ);
 
        if (unlikely(ioend->io_error))
                return 0;
@@ -214,7 +224,7 @@ xfs_finish_ioend(
        if (atomic_dec_and_test(&ioend->io_remaining)) {
                struct workqueue_struct *wq;
 
-               wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
+               wq = (ioend->io_type == IO_UNWRITTEN) ?
                        xfsconvertd_workqueue : xfsdatad_workqueue;
                queue_work(wq, &ioend->io_work);
                if (wait)
@@ -237,7 +247,7 @@ xfs_end_io(
         * For unwritten extents we need to issue transactions to convert a
         * range to normal written extens after the data I/O has finished.
         */
-       if (ioend->io_type == IOMAP_UNWRITTEN &&
+       if (ioend->io_type == IO_UNWRITTEN &&
            likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
 
                error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
@@ -250,7 +260,7 @@ xfs_end_io(
         * We might have to update the on-disk file size after extending
         * writes.
         */
-       if (ioend->io_type != IOMAP_READ) {
+       if (ioend->io_type != IO_READ) {
                error = xfs_setfilesize(ioend);
                ASSERT(!error || error == EAGAIN);
        }
@@ -309,21 +319,25 @@ xfs_map_blocks(
        struct inode            *inode,
        loff_t                  offset,
        ssize_t                 count,
-       xfs_iomap_t             *mapp,
+       struct xfs_bmbt_irec    *imap,
        int                     flags)
 {
        int                     nmaps = 1;
+       int                     new = 0;
 
-       return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
+       return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new);
 }
 
 STATIC int
-xfs_iomap_valid(
-       xfs_iomap_t             *iomapp,
-       loff_t                  offset)
+xfs_imap_valid(
+       struct inode            *inode,
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
 {
-       return offset >= iomapp->iomap_offset &&
-               offset < iomapp->iomap_offset + iomapp->iomap_bsize;
+       offset >>= inode->i_blkbits;
+
+       return offset >= imap->br_startoff &&
+               offset < imap->br_startoff + imap->br_blockcount;
 }
 
 /*
@@ -554,19 +568,23 @@ xfs_add_to_ioend(
 
 STATIC void
 xfs_map_buffer(
+       struct inode            *inode,
        struct buffer_head      *bh,
-       xfs_iomap_t             *mp,
-       xfs_off_t               offset,
-       uint                    block_bits)
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
 {
        sector_t                bn;
+       struct xfs_mount        *m = XFS_I(inode)->i_mount;
+       xfs_off_t               iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
+       xfs_daddr_t             iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
 
-       ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL);
+       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
 
-       bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) +
-             ((offset - mp->iomap_offset) >> block_bits);
+       bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+             ((offset - iomap_offset) >> inode->i_blkbits);
 
-       ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME));
+       ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
 
        bh->b_blocknr = bn;
        set_buffer_mapped(bh);
@@ -574,17 +592,17 @@ xfs_map_buffer(
 
 STATIC void
 xfs_map_at_offset(
+       struct inode            *inode,
        struct buffer_head      *bh,
-       loff_t                  offset,
-       int                     block_bits,
-       xfs_iomap_t             *iomapp)
+       struct xfs_bmbt_irec    *imap,
+       xfs_off_t               offset)
 {
-       ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
-       ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
+       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
 
        lock_buffer(bh);
-       xfs_map_buffer(bh, iomapp, offset, block_bits);
-       bh->b_bdev = iomapp->iomap_target->bt_bdev;
+       xfs_map_buffer(inode, bh, imap, offset);
+       bh->b_bdev = xfs_find_bdev_for_inode(inode);
        set_buffer_mapped(bh);
        clear_buffer_delay(bh);
        clear_buffer_unwritten(bh);
@@ -713,11 +731,11 @@ xfs_is_delayed_page(
                bh = head = page_buffers(page);
                do {
                        if (buffer_unwritten(bh))
-                               acceptable = (type == IOMAP_UNWRITTEN);
+                               acceptable = (type == IO_UNWRITTEN);
                        else if (buffer_delay(bh))
-                               acceptable = (type == IOMAP_DELAY);
+                               acceptable = (type == IO_DELAY);
                        else if (buffer_dirty(bh) && buffer_mapped(bh))
-                               acceptable = (type == IOMAP_NEW);
+                               acceptable = (type == IO_NEW);
                        else
                                break;
                } while ((bh = bh->b_this_page) != head);
@@ -740,7 +758,7 @@ xfs_convert_page(
        struct inode            *inode,
        struct page             *page,
        loff_t                  tindex,
-       xfs_iomap_t             *mp,
+       struct xfs_bmbt_irec    *imap,
        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
        int                     startio,
@@ -750,7 +768,6 @@ xfs_convert_page(
        xfs_off_t               end_offset;
        unsigned long           p_offset;
        unsigned int            type;
-       int                     bbits = inode->i_blkbits;
        int                     len, page_dirty;
        int                     count = 0, done = 0, uptodate = 1;
        xfs_off_t               offset = page_offset(page);
@@ -802,19 +819,19 @@ xfs_convert_page(
 
                if (buffer_unwritten(bh) || buffer_delay(bh)) {
                        if (buffer_unwritten(bh))
-                               type = IOMAP_UNWRITTEN;
+                               type = IO_UNWRITTEN;
                        else
-                               type = IOMAP_DELAY;
+                               type = IO_DELAY;
 
-                       if (!xfs_iomap_valid(mp, offset)) {
+                       if (!xfs_imap_valid(inode, imap, offset)) {
                                done = 1;
                                continue;
                        }
 
-                       ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
-                       ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
+                       ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+                       ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
 
-                       xfs_map_at_offset(bh, offset, bbits, mp);
+                       xfs_map_at_offset(inode, bh, imap, offset);
                        if (startio) {
                                xfs_add_to_ioend(inode, bh, offset,
                                                type, ioendp, done);
@@ -826,7 +843,7 @@ xfs_convert_page(
                        page_dirty--;
                        count++;
                } else {
-                       type = IOMAP_NEW;
+                       type = IO_NEW;
                        if (buffer_mapped(bh) && all_bh && startio) {
                                lock_buffer(bh);
                                xfs_add_to_ioend(inode, bh, offset,
@@ -866,7 +883,7 @@ STATIC void
 xfs_cluster_write(
        struct inode            *inode,
        pgoff_t                 tindex,
-       xfs_iomap_t             *iomapp,
+       struct xfs_bmbt_irec    *imap,
        xfs_ioend_t             **ioendp,
        struct writeback_control *wbc,
        int                     startio,
@@ -885,7 +902,7 @@ xfs_cluster_write(
 
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        done = xfs_convert_page(inode, pvec.pages[i], tindex++,
-                                       iomapp, ioendp, wbc, startio, all_bh);
+                                       imap, ioendp, wbc, startio, all_bh);
                        if (done)
                                break;
                }
@@ -930,7 +947,7 @@ xfs_aops_discard_page(
        loff_t                  offset = page_offset(page);
        ssize_t                 len = 1 << inode->i_blkbits;
 
-       if (!xfs_is_delayed_page(page, IOMAP_DELAY))
+       if (!xfs_is_delayed_page(page, IO_DELAY))
                goto out_invalidate;
 
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1042,15 +1059,15 @@ xfs_page_state_convert(
        int             unmapped) /* also implies page uptodate */
 {
        struct buffer_head      *bh, *head;
-       xfs_iomap_t             iomap;
+       struct xfs_bmbt_irec    imap;
        xfs_ioend_t             *ioend = NULL, *iohead = NULL;
        loff_t                  offset;
        unsigned long           p_offset = 0;
        unsigned int            type;
        __uint64_t              end_offset;
-       pgoff_t                 end_index, last_index, tlast;
+       pgoff_t                 end_index, last_index;
        ssize_t                 size, len;
-       int                     flags, err, iomap_valid = 0, uptodate = 1;
+       int                     flags, err, imap_valid = 0, uptodate = 1;
        int                     page_dirty, count = 0;
        int                     trylock = 0;
        int                     all_bh = unmapped;
@@ -1097,7 +1114,7 @@ xfs_page_state_convert(
        bh = head = page_buffers(page);
        offset = page_offset(page);
        flags = BMAPI_READ;
-       type = IOMAP_NEW;
+       type = IO_NEW;
 
        /* TODO: cleanup count and page_dirty */
 
@@ -1111,12 +1128,12 @@ xfs_page_state_convert(
                         * the iomap is actually still valid, but the ioend
                         * isn't.  shouldn't happen too often.
                         */
-                       iomap_valid = 0;
+                       imap_valid = 0;
                        continue;
                }
 
-               if (iomap_valid)
-                       iomap_valid = xfs_iomap_valid(&iomap, offset);
+               if (imap_valid)
+                       imap_valid = xfs_imap_valid(inode, &imap, offset);
 
                /*
                 * First case, map an unwritten extent and prepare for
@@ -1137,20 +1154,20 @@ xfs_page_state_convert(
                         * Make sure we don't use a read-only iomap
                         */
                        if (flags == BMAPI_READ)
-                               iomap_valid = 0;
+                               imap_valid = 0;
 
                        if (buffer_unwritten(bh)) {
-                               type = IOMAP_UNWRITTEN;
+                               type = IO_UNWRITTEN;
                                flags = BMAPI_WRITE | BMAPI_IGNSTATE;
                        } else if (buffer_delay(bh)) {
-                               type = IOMAP_DELAY;
+                               type = IO_DELAY;
                                flags = BMAPI_ALLOCATE | trylock;
                        } else {
-                               type = IOMAP_NEW;
+                               type = IO_NEW;
                                flags = BMAPI_WRITE | BMAPI_MMAP;
                        }
 
-                       if (!iomap_valid) {
+                       if (!imap_valid) {
                                /*
                                 * if we didn't have a valid mapping then we
                                 * need to ensure that we put the new mapping
@@ -1160,7 +1177,7 @@ xfs_page_state_convert(
                                 * for unwritten extent conversion.
                                 */
                                new_ioend = 1;
-                               if (type == IOMAP_NEW) {
+                               if (type == IO_NEW) {
                                        size = xfs_probe_cluster(inode,
                                                        page, bh, head, 0);
                                } else {
@@ -1168,14 +1185,14 @@ xfs_page_state_convert(
                                }
 
                                err = xfs_map_blocks(inode, offset, size,
-                                               &iomap, flags);
+                                               &imap, flags);
                                if (err)
                                        goto error;
-                               iomap_valid = xfs_iomap_valid(&iomap, offset);
+                               imap_valid = xfs_imap_valid(inode, &imap,
+                                                           offset);
                        }
-                       if (iomap_valid) {
-                               xfs_map_at_offset(bh, offset,
-                                               inode->i_blkbits, &iomap);
+                       if (imap_valid) {
+                               xfs_map_at_offset(inode, bh, &imap, offset);
                                if (startio) {
                                        xfs_add_to_ioend(inode, bh, offset,
                                                        type, &ioend,
@@ -1194,40 +1211,41 @@ xfs_page_state_convert(
                         * That means it must already have extents allocated
                         * underneath it. Map the extent by reading it.
                         */
-                       if (!iomap_valid || flags != BMAPI_READ) {
+                       if (!imap_valid || flags != BMAPI_READ) {
                                flags = BMAPI_READ;
                                size = xfs_probe_cluster(inode, page, bh,
                                                                head, 1);
                                err = xfs_map_blocks(inode, offset, size,
-                                               &iomap, flags);
+                                               &imap, flags);
                                if (err)
                                        goto error;
-                               iomap_valid = xfs_iomap_valid(&iomap, offset);
+                               imap_valid = xfs_imap_valid(inode, &imap,
+                                                           offset);
                        }
 
                        /*
-                        * We set the type to IOMAP_NEW in case we are doing a
+                        * We set the type to IO_NEW in case we are doing a
                         * small write at EOF that is extending the file but
                         * without needing an allocation. We need to update the
                         * file size on I/O completion in this case so it is
                         * the same case as having just allocated a new extent
                         * that we are writing into for the first time.
                         */
-                       type = IOMAP_NEW;
+                       type = IO_NEW;
                        if (trylock_buffer(bh)) {
                                ASSERT(buffer_mapped(bh));
-                               if (iomap_valid)
+                               if (imap_valid)
                                        all_bh = 1;
                                xfs_add_to_ioend(inode, bh, offset, type,
-                                               &ioend, !iomap_valid);
+                                               &ioend, !imap_valid);
                                page_dirty--;
                                count++;
                        } else {
-                               iomap_valid = 0;
+                               imap_valid = 0;
                        }
                } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
                           (unmapped || startio)) {
-                       iomap_valid = 0;
+                       imap_valid = 0;
                }
 
                if (!iohead)
@@ -1241,12 +1259,23 @@ xfs_page_state_convert(
        if (startio)
                xfs_start_page_writeback(page, 1, count);
 
-       if (ioend && iomap_valid) {
-               offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
-                                       PAGE_CACHE_SHIFT;
-               tlast = min_t(pgoff_t, offset, last_index);
-               xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
-                                       wbc, startio, all_bh, tlast);
+       if (ioend && imap_valid) {
+               xfs_off_t               end_index;
+
+               end_index = imap.br_startoff + imap.br_blockcount;
+
+               /* to bytes */
+               end_index <<= inode->i_blkbits;
+
+               /* to pages */
+               end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
+
+               /* check against file size */
+               if (end_index > last_index)
+                       end_index = last_index;
+
+               xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
+                                       wbc, startio, all_bh, end_index);
        }
 
        if (iohead)
@@ -1448,10 +1477,11 @@ __xfs_get_blocks(
        int                     direct,
        bmapi_flags_t           flags)
 {
-       xfs_iomap_t             iomap;
+       struct xfs_bmbt_irec    imap;
        xfs_off_t               offset;
        ssize_t                 size;
-       int                     niomap = 1;
+       int                     nimap = 1;
+       int                     new = 0;
        int                     error;
 
        offset = (xfs_off_t)iblock << inode->i_blkbits;
@@ -1462,22 +1492,21 @@ __xfs_get_blocks(
                return 0;
 
        error = xfs_iomap(XFS_I(inode), offset, size,
-                            create ? flags : BMAPI_READ, &iomap, &niomap);
+                            create ? flags : BMAPI_READ, &imap, &nimap, &new);
        if (error)
                return -error;
-       if (niomap == 0)
+       if (nimap == 0)
                return 0;
 
-       if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
+       if (imap.br_startblock != HOLESTARTBLOCK &&
+           imap.br_startblock != DELAYSTARTBLOCK) {
                /*
                 * For unwritten extents do not report a disk address on
                 * the read case (treat as if we're reading into a hole).
                 */
-               if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
-                       xfs_map_buffer(bh_result, &iomap, offset,
-                                      inode->i_blkbits);
-               }
-               if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+               if (create || !ISUNWRITTEN(&imap))
+                       xfs_map_buffer(inode, bh_result, &imap, offset);
+               if (create && ISUNWRITTEN(&imap)) {
                        if (direct)
                                bh_result->b_private = inode;
                        set_buffer_unwritten(bh_result);
@@ -1488,7 +1517,7 @@ __xfs_get_blocks(
         * If this is a realtime file, data may be on a different device.
         * to that pointed to from the buffer_head b_bdev currently.
         */
-       bh_result->b_bdev = iomap.iomap_target->bt_bdev;
+       bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
 
        /*
         * If we previously allocated a block out beyond eof and we are now
@@ -1502,10 +1531,10 @@ __xfs_get_blocks(
        if (create &&
            ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
             (offset >= i_size_read(inode)) ||
-            (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN))))
+            (new || ISUNWRITTEN(&imap))))
                set_buffer_new(bh_result);
 
-       if (iomap.iomap_flags & IOMAP_DELAY) {
+       if (imap.br_startblock == DELAYSTARTBLOCK) {
                BUG_ON(direct);
                if (create) {
                        set_buffer_uptodate(bh_result);
@@ -1514,11 +1543,23 @@ __xfs_get_blocks(
                }
        }
 
+       /*
+        * If this is O_DIRECT or the mpage code calling tell them how large
+        * the mapping is, so that we can avoid repeated get_blocks calls.
+        */
        if (direct || size > (1 << inode->i_blkbits)) {
-               ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
-               offset = min_t(xfs_off_t,
-                               iomap.iomap_bsize - iomap.iomap_delta, size);
-               bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
+               xfs_off_t               mapping_size;
+
+               mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
+               mapping_size <<= inode->i_blkbits;
+
+               ASSERT(mapping_size > 0);
+               if (mapping_size > size)
+                       mapping_size = size;
+               if (mapping_size > LONG_MAX)
+                       mapping_size = LONG_MAX;
+
+               bh_result->b_size = mapping_size;
        }
 
        return 0;
@@ -1576,7 +1617,7 @@ xfs_end_io_direct(
         */
        ioend->io_offset = offset;
        ioend->io_size = size;
-       if (ioend->io_type == IOMAP_READ) {
+       if (ioend->io_type == IO_READ) {
                xfs_finish_ioend(ioend, 0);
        } else if (private && size > 0) {
                xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
@@ -1587,7 +1628,7 @@ xfs_end_io_direct(
                 * didn't map an unwritten extent so switch it's completion
                 * handler.
                 */
-               ioend->io_type = IOMAP_NEW;
+               ioend->io_type = IO_NEW;
                xfs_finish_ioend(ioend, 0);
        }
 
@@ -1612,10 +1653,10 @@ xfs_vm_direct_IO(
        struct block_device *bdev;
        ssize_t         ret;
 
-       bdev = xfs_find_bdev_for_inode(XFS_I(inode));
+       bdev = xfs_find_bdev_for_inode(inode);
 
        iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
-                                       IOMAP_UNWRITTEN : IOMAP_READ);
+                                       IO_UNWRITTEN : IO_READ);
 
        ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
                                            offset, nr_segs,
index 44c2b0e..f01de3c 100644 (file)
@@ -1007,25 +1007,20 @@ xfs_bwrite(
        struct xfs_mount        *mp,
        struct xfs_buf          *bp)
 {
-       int                     iowait = (bp->b_flags & XBF_ASYNC) == 0;
-       int                     error = 0;
+       int                     error;
 
        bp->b_strat = xfs_bdstrat_cb;
        bp->b_mount = mp;
        bp->b_flags |= XBF_WRITE;
-       if (!iowait)
-               bp->b_flags |= _XBF_RUN_QUEUES;
+       bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
 
        xfs_buf_delwri_dequeue(bp);
        xfs_buf_iostrategy(bp);
 
-       if (iowait) {
-               error = xfs_buf_iowait(bp);
-               if (error)
-                       xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
-               xfs_buf_relse(bp);
-       }
-
+       error = xfs_buf_iowait(bp);
+       if (error)
+               xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+       xfs_buf_relse(bp);
        return error;
 }
 
@@ -1614,7 +1609,8 @@ xfs_mapping_buftarg(
 
 STATIC int
 xfs_alloc_delwrite_queue(
-       xfs_buftarg_t           *btp)
+       xfs_buftarg_t           *btp,
+       const char              *fsname)
 {
        int     error = 0;
 
@@ -1622,7 +1618,7 @@ xfs_alloc_delwrite_queue(
        INIT_LIST_HEAD(&btp->bt_delwrite_queue);
        spin_lock_init(&btp->bt_delwrite_lock);
        btp->bt_flags = 0;
-       btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
+       btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
        if (IS_ERR(btp->bt_task)) {
                error = PTR_ERR(btp->bt_task);
                goto out_error;
@@ -1635,7 +1631,8 @@ out_error:
 xfs_buftarg_t *
 xfs_alloc_buftarg(
        struct block_device     *bdev,
-       int                     external)
+       int                     external,
+       const char              *fsname)
 {
        xfs_buftarg_t           *btp;
 
@@ -1647,7 +1644,7 @@ xfs_alloc_buftarg(
                goto error;
        if (xfs_mapping_buftarg(btp, bdev))
                goto error;
-       if (xfs_alloc_delwrite_queue(btp))
+       if (xfs_alloc_delwrite_queue(btp, fsname))
                goto error;
        xfs_alloc_bufhash(btp, external);
        return btp;
index 386e736..5fbecef 100644 (file)
@@ -390,7 +390,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
 /*
  *     Handling of buftargs.
  */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
 extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
 extern void xfs_wait_buftarg(xfs_buftarg_t *);
 extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
index 42dd3bc..d8fb1b5 100644 (file)
@@ -115,6 +115,8 @@ xfs_file_fsync(
 
        xfs_iflags_clear(ip, XFS_ITRUNCATED);
 
+       xfs_ioend_wait(ip);
+
        /*
         * We always need to make sure that the required inode state is safe on
         * disk.  The inode might be clean but we still might need to force the
index 7b26cc2..699b60c 100644 (file)
@@ -527,6 +527,10 @@ xfs_attrmulti_by_handle(
        if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
                return -XFS_ERROR(EFAULT);
 
+       /* overflow check */
+       if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
+               return -E2BIG;
+
        dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
index 593c05b..9287135 100644 (file)
@@ -420,6 +420,10 @@ xfs_compat_attrmulti_by_handle(
                           sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
                return -XFS_ERROR(EFAULT);
 
+       /* overflow check */
+       if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
+               return -E2BIG;
+
        dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
index e65a793..9c8019c 100644 (file)
@@ -673,7 +673,10 @@ xfs_vn_fiemap(
                bm.bmv_length = BTOBB(length);
 
        /* We add one because in getbmap world count includes the header */
-       bm.bmv_count = fieinfo->fi_extents_max + 1;
+       bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
+                                       fieinfo->fi_extents_max + 1;
+       bm.bmv_count = min_t(__s32, bm.bmv_count,
+                            (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
        bm.bmv_iflags = BMV_IF_PREALLOC;
        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
                bm.bmv_iflags |= BMV_IF_ATTRFORK;
index 29f1edc..e900251 100644 (file)
@@ -789,18 +789,18 @@ xfs_open_devices(
         * Setup xfs_mount buffer target pointers
         */
        error = ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+       mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
        if (!mp->m_ddev_targp)
                goto out_close_rtdev;
 
        if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+               mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
                if (!mp->m_rtdev_targp)
                        goto out_free_ddev_targ;
        }
 
        if (logdev && logdev != ddev) {
-               mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
+               mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
                if (!mp->m_logdev_targp)
                        goto out_free_rtdev_targ;
        } else {
@@ -902,7 +902,8 @@ xfsaild_start(
        struct xfs_ail  *ailp)
 {
        ailp->xa_target = 0;
-       ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
+       ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
+                                   ailp->xa_mount->m_fsname);
        if (IS_ERR(ailp->xa_task))
                return -PTR_ERR(ailp->xa_task);
        return 0;
@@ -1092,6 +1093,7 @@ xfs_fs_write_inode(
                 * the code will only flush the inode if it isn't already
                 * being flushed.
                 */
+               xfs_ioend_wait(ip);
                xfs_ilock(ip, XFS_ILOCK_SHARED);
                if (ip->i_update_core) {
                        error = xfs_log_inode(ip);
index a427c63..3884e20 100644 (file)
@@ -356,68 +356,23 @@ xfs_commit_dummy_trans(
 
 STATIC int
 xfs_sync_fsdata(
-       struct xfs_mount        *mp,
-       int                     flags)
+       struct xfs_mount        *mp)
 {
        struct xfs_buf          *bp;
-       struct xfs_buf_log_item *bip;
-       int                     error = 0;
 
        /*
-        * If this is xfssyncd() then only sync the superblock if we can
-        * lock it without sleeping and it is not pinned.
+        * If the buffer is pinned then push on the log so we won't get stuck
+        * waiting in the write for someone, maybe ourselves, to flush the log.
+        *
+        * Even though we just pushed the log above, we did not have the
+        * superblock buffer locked at that point so it can become pinned in
+        * between there and here.
         */
-       if (flags & SYNC_TRYLOCK) {
-               ASSERT(!(flags & SYNC_WAIT));
-
-               bp = xfs_getsb(mp, XBF_TRYLOCK);
-               if (!bp)
-                       goto out;
-
-               bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
-               if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
-                       goto out_brelse;
-       } else {
-               bp = xfs_getsb(mp, 0);
-
-               /*
-                * If the buffer is pinned then push on the log so we won't
-                * get stuck waiting in the write for someone, maybe
-                * ourselves, to flush the log.
-                *
-                * Even though we just pushed the log above, we did not have
-                * the superblock buffer locked at that point so it can
-                * become pinned in between there and here.
-                */
-               if (XFS_BUF_ISPINNED(bp))
-                       xfs_log_force(mp, 0);
-       }
-
-
-       if (flags & SYNC_WAIT)
-               XFS_BUF_UNASYNC(bp);
-       else
-               XFS_BUF_ASYNC(bp);
-
-       error = xfs_bwrite(mp, bp);
-       if (error)
-               return error;
-
-       /*
-        * If this is a data integrity sync make sure all pending buffers
-        * are flushed out for the log coverage check below.
-        */
-       if (flags & SYNC_WAIT)
-               xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
-       if (xfs_log_need_covered(mp))
-               error = xfs_commit_dummy_trans(mp, flags);
-       return error;
+       bp = xfs_getsb(mp, 0);
+       if (XFS_BUF_ISPINNED(bp))
+               xfs_log_force(mp, 0);
 
- out_brelse:
-       xfs_buf_relse(bp);
- out:
-       return error;
+       return xfs_bwrite(mp, bp);
 }
 
 /*
@@ -441,7 +396,7 @@ int
 xfs_quiesce_data(
        struct xfs_mount        *mp)
 {
-       int error;
+       int                     error, error2 = 0;
 
        /* push non-blocking */
        xfs_sync_data(mp, 0);
@@ -452,13 +407,20 @@ xfs_quiesce_data(
        xfs_qm_sync(mp, SYNC_WAIT);
 
        /* write superblock and hoover up shutdown errors */
-       error = xfs_sync_fsdata(mp, SYNC_WAIT);
+       error = xfs_sync_fsdata(mp);
+
+       /* make sure all delwri buffers are written out */
+       xfs_flush_buftarg(mp->m_ddev_targp, 1);
+
+       /* mark the log as covered if needed */
+       if (xfs_log_need_covered(mp))
+               error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
 
        /* flush data-only devices */
        if (mp->m_rtdev_targp)
                XFS_bflush(mp->m_rtdev_targp);
 
-       return error;
+       return error ? error : error2;
 }
 
 STATIC void
@@ -581,9 +543,9 @@ xfs_flush_inodes(
 }
 
 /*
- * Every sync period we need to unpin all items, reclaim inodes, sync
- * quota and write out the superblock. We might need to cover the log
- * to indicate it is idle.
+ * Every sync period we need to unpin all items, reclaim inodes and sync
+ * disk quotas.  We might need to cover the log to indicate that the
+ * filesystem is idle.
  */
 STATIC void
 xfs_sync_worker(
@@ -597,7 +559,8 @@ xfs_sync_worker(
                xfs_reclaim_inodes(mp, 0);
                /* dgc: errors ignored here */
                error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-               error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
+               if (xfs_log_need_covered(mp))
+                       error = xfs_commit_dummy_trans(mp, 0);
        }
        mp->m_sync_seq++;
        wake_up(&mp->m_wait_single_sync_task);
@@ -660,7 +623,7 @@ xfs_syncd_init(
        mp->m_sync_work.w_syncer = xfs_sync_worker;
        mp->m_sync_work.w_mount = mp;
        mp->m_sync_work.w_completion = NULL;
-       mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
+       mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
        if (IS_ERR(mp->m_sync_task))
                return -PTR_ERR(mp->m_sync_task);
        return 0;
index 5a10760..207fa77 100644 (file)
@@ -41,7 +41,6 @@
 #include "xfs_alloc.h"
 #include "xfs_bmap.h"
 #include "xfs_attr.h"
-#include "xfs_attr_sf.h"
 #include "xfs_attr_leaf.h"
 #include "xfs_log_priv.h"
 #include "xfs_buf_item.h"
@@ -50,6 +49,9 @@
 #include "xfs_aops.h"
 #include "quota/xfs_dquot_item.h"
 #include "quota/xfs_dquot.h"
+#include "xfs_log_recover.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
 
 /*
  * We include this last to have the helpers above available for the trace
index fcaa62f..8a319cf 100644 (file)
@@ -32,6 +32,10 @@ struct xfs_da_node_entry;
 struct xfs_dquot;
 struct xlog_ticket;
 struct log;
+struct xlog_recover;
+struct xlog_recover_item;
+struct xfs_buf_log_format;
+struct xfs_inode_log_format;
 
 DECLARE_EVENT_CLASS(xfs_attr_list_class,
        TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -562,18 +566,21 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
                __field(dev_t, dev)
                __field(xfs_ino_t, ino)
                __field(int, count)
+               __field(int, pincount)
                __field(unsigned long, caller_ip)
        ),
        TP_fast_assign(
                __entry->dev = VFS_I(ip)->i_sb->s_dev;
                __entry->ino = ip->i_ino;
                __entry->count = atomic_read(&VFS_I(ip)->i_count);
+               __entry->pincount = atomic_read(&ip->i_pincount);
                __entry->caller_ip = caller_ip;
        ),
-       TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
+       TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->count,
+                 __entry->pincount,
                  (char *)__entry->caller_ip)
 )
 
@@ -583,6 +590,10 @@ DEFINE_EVENT(xfs_inode_class, name, \
        TP_ARGS(ip, caller_ip))
 DEFINE_INODE_EVENT(xfs_ihold);
 DEFINE_INODE_EVENT(xfs_irele);
+DEFINE_INODE_EVENT(xfs_inode_pin);
+DEFINE_INODE_EVENT(xfs_inode_unpin);
+DEFINE_INODE_EVENT(xfs_inode_unpin_nowait);
+
 /* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
 DEFINE_INODE_EVENT(xfs_inode);
 #define xfs_itrace_entry(ip)    \
@@ -642,8 +653,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \
        TP_PROTO(struct xfs_dquot *dqp), \
        TP_ARGS(dqp))
 DEFINE_DQUOT_EVENT(xfs_dqadjust);
-DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
@@ -658,7 +667,6 @@ DEFINE_DQUOT_EVENT(xfs_dqread_fail);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
 DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
 DEFINE_DQUOT_EVENT(xfs_dqget_hit);
 DEFINE_DQUOT_EVENT(xfs_dqget_miss);
@@ -1495,6 +1503,140 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \
 DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
 DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
 
+DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
+       TP_PROTO(struct log *log, struct xlog_recover *trans,
+               struct xlog_recover_item *item, int pass),
+       TP_ARGS(log, trans, item, pass),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned long, item)
+               __field(xlog_tid_t, tid)
+               __field(int, type)
+               __field(int, pass)
+               __field(int, count)
+               __field(int, total)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->item = (unsigned long)item;
+               __entry->tid = trans->r_log_tid;
+               __entry->type = ITEM_TYPE(item);
+               __entry->pass = pass;
+               __entry->count = item->ri_cnt;
+               __entry->total = item->ri_total;
+       ),
+       TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
+                 "item region count/total %d/%d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->tid,
+                 __entry->pass,
+                 (void *)__entry->item,
+                 __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+                 __entry->count,
+                 __entry->total)
+)
+
+#define DEFINE_LOG_RECOVER_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_item_class, name, \
+       TP_PROTO(struct log *log, struct xlog_recover *trans, \
+               struct xlog_recover_item *item, int pass), \
+       TP_ARGS(log, trans, item, pass))
+
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
+       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+       TP_ARGS(log, buf_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(__int64_t, blkno)
+               __field(unsigned short, len)
+               __field(unsigned short, flags)
+               __field(unsigned short, size)
+               __field(unsigned int, map_size)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->blkno = buf_f->blf_blkno;
+               __entry->len = buf_f->blf_len;
+               __entry->flags = buf_f->blf_flags;
+               __entry->size = buf_f->blf_size;
+               __entry->map_size = buf_f->blf_map_size;
+       ),
+       TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
+                       "map_size %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->blkno,
+                 __entry->len,
+                 __entry->flags,
+                 __entry->size,
+                 __entry->map_size)
+)
+
+#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
+       TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+       TP_ARGS(log, buf_f))
+
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
+       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+       TP_ARGS(log, in_f),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+               __field(unsigned short, size)
+               __field(int, fields)
+               __field(unsigned short, asize)
+               __field(unsigned short, dsize)
+               __field(__int64_t, blkno)
+               __field(int, len)
+               __field(int, boffset)
+       ),
+       TP_fast_assign(
+               __entry->dev = log->l_mp->m_super->s_dev;
+               __entry->ino = in_f->ilf_ino;
+               __entry->size = in_f->ilf_size;
+               __entry->fields = in_f->ilf_fields;
+               __entry->asize = in_f->ilf_asize;
+               __entry->dsize = in_f->ilf_dsize;
+               __entry->blkno = in_f->ilf_blkno;
+               __entry->len = in_f->ilf_len;
+               __entry->boffset = in_f->ilf_boffset;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
+                       "dsize %d, blkno 0x%llx, len %d, boffset %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->ino,
+                 __entry->size,
+                 __entry->fields,
+                 __entry->asize,
+                 __entry->dsize,
+                 __entry->blkno,
+                 __entry->len,
+                 __entry->boffset)
+)
+#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
+       TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+       TP_ARGS(log, in_f))
+
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
index 5f79dd7..b89ec5d 100644 (file)
@@ -101,7 +101,7 @@ xfs_qm_dqinit(
         * No need to re-initialize these if this is a reclaimed dquot.
         */
        if (brandnewdquot) {
-               dqp->dq_flnext = dqp->dq_flprev = dqp;
+               INIT_LIST_HEAD(&dqp->q_freelist);
                mutex_init(&dqp->q_qlock);
                init_waitqueue_head(&dqp->q_pinwait);
 
@@ -119,20 +119,20 @@ xfs_qm_dqinit(
                 * Only the q_core portion was zeroed in dqreclaim_one().
                 * So, we need to reset others.
                 */
-                dqp->q_nrefs = 0;
-                dqp->q_blkno = 0;
-                dqp->MPL_NEXT = dqp->HL_NEXT = NULL;
-                dqp->HL_PREVP = dqp->MPL_PREVP = NULL;
-                dqp->q_bufoffset = 0;
-                dqp->q_fileoffset = 0;
-                dqp->q_transp = NULL;
-                dqp->q_gdquot = NULL;
-                dqp->q_res_bcount = 0;
-                dqp->q_res_icount = 0;
-                dqp->q_res_rtbcount = 0;
-                atomic_set(&dqp->q_pincount, 0);
-                dqp->q_hash = NULL;
-                ASSERT(dqp->dq_flnext == dqp->dq_flprev);
+               dqp->q_nrefs = 0;
+               dqp->q_blkno = 0;
+               INIT_LIST_HEAD(&dqp->q_mplist);
+               INIT_LIST_HEAD(&dqp->q_hashlist);
+               dqp->q_bufoffset = 0;
+               dqp->q_fileoffset = 0;
+               dqp->q_transp = NULL;
+               dqp->q_gdquot = NULL;
+               dqp->q_res_bcount = 0;
+               dqp->q_res_icount = 0;
+               dqp->q_res_rtbcount = 0;
+               atomic_set(&dqp->q_pincount, 0);
+               dqp->q_hash = NULL;
+               ASSERT(list_empty(&dqp->q_freelist));
 
                trace_xfs_dqreuse(dqp);
        }
@@ -158,7 +158,7 @@ void
 xfs_qm_dqdestroy(
        xfs_dquot_t     *dqp)
 {
-       ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
+       ASSERT(list_empty(&dqp->q_freelist));
 
        mutex_destroy(&dqp->q_qlock);
        sv_destroy(&dqp->q_pinwait);
@@ -252,7 +252,7 @@ xfs_qm_adjust_dqtimers(
                     (be64_to_cpu(d->d_bcount) >=
                      be64_to_cpu(d->d_blk_hardlimit)))) {
                        d->d_btimer = cpu_to_be32(get_seconds() +
-                                       XFS_QI_BTIMELIMIT(mp));
+                                       mp->m_quotainfo->qi_btimelimit);
                } else {
                        d->d_bwarns = 0;
                }
@@ -275,7 +275,7 @@ xfs_qm_adjust_dqtimers(
                     (be64_to_cpu(d->d_icount) >=
                      be64_to_cpu(d->d_ino_hardlimit)))) {
                        d->d_itimer = cpu_to_be32(get_seconds() +
-                                       XFS_QI_ITIMELIMIT(mp));
+                                       mp->m_quotainfo->qi_itimelimit);
                } else {
                        d->d_iwarns = 0;
                }
@@ -298,7 +298,7 @@ xfs_qm_adjust_dqtimers(
                     (be64_to_cpu(d->d_rtbcount) >=
                      be64_to_cpu(d->d_rtb_hardlimit)))) {
                        d->d_rtbtimer = cpu_to_be32(get_seconds() +
-                                       XFS_QI_RTBTIMELIMIT(mp));
+                                       mp->m_quotainfo->qi_rtbtimelimit);
                } else {
                        d->d_rtbwarns = 0;
                }
@@ -325,6 +325,7 @@ xfs_qm_init_dquot_blk(
        uint            type,
        xfs_buf_t       *bp)
 {
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
        xfs_dqblk_t     *d;
        int             curid, i;
 
@@ -337,16 +338,16 @@ xfs_qm_init_dquot_blk(
        /*
         * ID of the first dquot in the block - id's are zero based.
         */
-       curid = id - (id % XFS_QM_DQPERBLK(mp));
+       curid = id - (id % q->qi_dqperchunk);
        ASSERT(curid >= 0);
-       memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
-       for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
+       memset(d, 0, BBTOB(q->qi_dqchunklen));
+       for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
                xfs_qm_dqinit_core(curid, type, d);
        xfs_trans_dquot_buf(tp, bp,
                            (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
                            ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
                             XFS_BLI_GDQUOT_BUF)));
-       xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
+       xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
 }
 
 
@@ -419,7 +420,7 @@ xfs_qm_dqalloc(
        /* now we can just get the buffer (there's nothing to read yet) */
        bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
                               dqp->q_blkno,
-                              XFS_QI_DQCHUNKLEN(mp),
+                              mp->m_quotainfo->qi_dqchunklen,
                               0);
        if (!bp || (error = XFS_BUF_GETERROR(bp)))
                goto error1;
@@ -500,7 +501,8 @@ xfs_qm_dqtobp(
         */
        if (dqp->q_blkno == (xfs_daddr_t) 0) {
                /* We use the id as an index */
-               dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp);
+               dqp->q_fileoffset = (xfs_fileoff_t)id /
+                                       mp->m_quotainfo->qi_dqperchunk;
                nmaps = 1;
                quotip = XFS_DQ_TO_QIP(dqp);
                xfs_ilock(quotip, XFS_ILOCK_SHARED);
@@ -529,7 +531,7 @@ xfs_qm_dqtobp(
                /*
                 * offset of dquot in the (fixed sized) dquot chunk.
                 */
-               dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) *
+               dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
                        sizeof(xfs_dqblk_t);
                if (map.br_startblock == HOLESTARTBLOCK) {
                        /*
@@ -559,15 +561,13 @@ xfs_qm_dqtobp(
         * Read in the buffer, unless we've just done the allocation
         * (in which case we already have the buf).
         */
-       if (! newdquot) {
+       if (!newdquot) {
                trace_xfs_dqtobp_read(dqp);
 
-               if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
-                                              dqp->q_blkno,
-                                              XFS_QI_DQCHUNKLEN(mp),
-                                              0, &bp))) {
-                       return (error);
-               }
+               error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+                                          dqp->q_blkno,
+                                          mp->m_quotainfo->qi_dqchunklen,
+                                          0, &bp);
                if (error || !bp)
                        return XFS_ERROR(error);
        }
@@ -689,14 +689,14 @@ xfs_qm_idtodq(
        tp = NULL;
        if (flags & XFS_QMOPT_DQALLOC) {
                tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-               if ((error = xfs_trans_reserve(tp,
-                                      XFS_QM_DQALLOC_SPACE_RES(mp),
-                                      XFS_WRITE_LOG_RES(mp) +
-                                             BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 +
-                                             128,
-                                      0,
-                                      XFS_TRANS_PERM_LOG_RES,
-                                      XFS_WRITE_LOG_COUNT))) {
+               error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
+                               XFS_WRITE_LOG_RES(mp) +
+                               BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
+                               128,
+                               0,
+                               XFS_TRANS_PERM_LOG_RES,
+                               XFS_WRITE_LOG_COUNT);
+               if (error) {
                        cancelflags = 0;
                        goto error0;
                }
@@ -751,7 +751,6 @@ xfs_qm_dqlookup(
 {
        xfs_dquot_t             *dqp;
        uint                    flist_locked;
-       xfs_dquot_t             *d;
 
        ASSERT(mutex_is_locked(&qh->qh_lock));
 
@@ -760,7 +759,7 @@ xfs_qm_dqlookup(
        /*
         * Traverse the hashchain looking for a match
         */
-       for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) {
+       list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
                /*
                 * We already have the hashlock. We don't need the
                 * dqlock to look at the id field of the dquot, since the
@@ -772,12 +771,12 @@ xfs_qm_dqlookup(
                        /*
                         * All in core dquots must be on the dqlist of mp
                         */
-                       ASSERT(dqp->MPL_PREVP != NULL);
+                       ASSERT(!list_empty(&dqp->q_mplist));
 
                        xfs_dqlock(dqp);
                        if (dqp->q_nrefs == 0) {
-                               ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
-                               if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
+                               ASSERT(!list_empty(&dqp->q_freelist));
+                               if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
                                        trace_xfs_dqlookup_want(dqp);
 
                                        /*
@@ -787,7 +786,7 @@ xfs_qm_dqlookup(
                                         */
                                        dqp->dq_flags |= XFS_DQ_WANT;
                                        xfs_dqunlock(dqp);
-                                       xfs_qm_freelist_lock(xfs_Gqm);
+                                       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
                                        xfs_dqlock(dqp);
                                        dqp->dq_flags &= ~(XFS_DQ_WANT);
                                }
@@ -802,46 +801,28 @@ xfs_qm_dqlookup(
 
                        if (flist_locked) {
                                if (dqp->q_nrefs != 0) {
-                                       xfs_qm_freelist_unlock(xfs_Gqm);
+                                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
                                        flist_locked = B_FALSE;
                                } else {
-                                       /*
-                                        * take it off the freelist
-                                        */
+                                       /* take it off the freelist */
                                        trace_xfs_dqlookup_freelist(dqp);
-                                       XQM_FREELIST_REMOVE(dqp);
-                                       /* xfs_qm_freelist_print(&(xfs_Gqm->
-                                                       qm_dqfreelist),
-                                                       "after removal"); */
+                                       list_del_init(&dqp->q_freelist);
+                                       xfs_Gqm->qm_dqfrlist_cnt--;
                                }
                        }
 
-                       /*
-                        * grab a reference
-                        */
                        XFS_DQHOLD(dqp);
 
                        if (flist_locked)
-                               xfs_qm_freelist_unlock(xfs_Gqm);
+                               mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
                        /*
                         * move the dquot to the front of the hashchain
                         */
                        ASSERT(mutex_is_locked(&qh->qh_lock));
-                       if (dqp->HL_PREVP != &qh->qh_next) {
-                               trace_xfs_dqlookup_move(dqp);
-                               if ((d = dqp->HL_NEXT))
-                                       d->HL_PREVP = dqp->HL_PREVP;
-                               *(dqp->HL_PREVP) = d;
-                               d = qh->qh_next;
-                               d->HL_PREVP = &dqp->HL_NEXT;
-                               dqp->HL_NEXT = d;
-                               dqp->HL_PREVP = &qh->qh_next;
-                               qh->qh_next = dqp;
-                       }
+                       list_move(&dqp->q_hashlist, &qh->qh_list);
                        trace_xfs_dqlookup_done(dqp);
                        *O_dqpp = dqp;
-                       ASSERT(mutex_is_locked(&qh->qh_lock));
-                       return (0);
+                       return 0;
                }
        }
 
@@ -975,16 +956,17 @@ xfs_qm_dqget(
         */
        if (ip) {
                xfs_ilock(ip, XFS_ILOCK_EXCL);
-               if (! XFS_IS_DQTYPE_ON(mp, type)) {
-                       /* inode stays locked on return */
-                       xfs_qm_dqdestroy(dqp);
-                       return XFS_ERROR(ESRCH);
-               }
+
                /*
                 * A dquot could be attached to this inode by now, since
                 * we had dropped the ilock.
                 */
                if (type == XFS_DQ_USER) {
+                       if (!XFS_IS_UQUOTA_ON(mp)) {
+                               /* inode stays locked on return */
+                               xfs_qm_dqdestroy(dqp);
+                               return XFS_ERROR(ESRCH);
+                       }
                        if (ip->i_udquot) {
                                xfs_qm_dqdestroy(dqp);
                                dqp = ip->i_udquot;
@@ -992,6 +974,11 @@ xfs_qm_dqget(
                                goto dqret;
                        }
                } else {
+                       if (!XFS_IS_OQUOTA_ON(mp)) {
+                               /* inode stays locked on return */
+                               xfs_qm_dqdestroy(dqp);
+                               return XFS_ERROR(ESRCH);
+                       }
                        if (ip->i_gdquot) {
                                xfs_qm_dqdestroy(dqp);
                                dqp = ip->i_gdquot;
@@ -1033,13 +1020,14 @@ xfs_qm_dqget(
         */
        ASSERT(mutex_is_locked(&h->qh_lock));
        dqp->q_hash = h;
-       XQM_HASHLIST_INSERT(h, dqp);
+       list_add(&dqp->q_hashlist, &h->qh_list);
+       h->qh_version++;
 
        /*
         * Attach this dquot to this filesystem's list of all dquots,
         * kept inside the mount structure in m_quotainfo field
         */
-       xfs_qm_mplist_lock(mp);
+       mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
 
        /*
         * We return a locked dquot to the caller, with a reference taken
@@ -1047,9 +1035,9 @@ xfs_qm_dqget(
        xfs_dqlock(dqp);
        dqp->q_nrefs = 1;
 
-       XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
-
-       xfs_qm_mplist_unlock(mp);
+       list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
+       mp->m_quotainfo->qi_dquots++;
+       mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
        mutex_unlock(&h->qh_lock);
  dqret:
        ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -1086,10 +1074,10 @@ xfs_qm_dqput(
         * drop the dqlock and acquire the freelist and dqlock
         * in the right order; but try to get it out-of-order first
         */
-       if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
+       if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
                trace_xfs_dqput_wait(dqp);
                xfs_dqunlock(dqp);
-               xfs_qm_freelist_lock(xfs_Gqm);
+               mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
                xfs_dqlock(dqp);
        }
 
@@ -1100,10 +1088,8 @@ xfs_qm_dqput(
                if (--dqp->q_nrefs == 0) {
                        trace_xfs_dqput_free(dqp);
 
-                       /*
-                        * insert at end of the freelist.
-                        */
-                       XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
+                       list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
+                       xfs_Gqm->qm_dqfrlist_cnt++;
 
                        /*
                         * If we just added a udquot to the freelist, then
@@ -1118,10 +1104,6 @@ xfs_qm_dqput(
                                xfs_dqlock(gdqp);
                                dqp->q_gdquot = NULL;
                        }
-
-                       /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
-                          "@@@@@++ Free list (after append) @@@@@+");
-                          */
                }
                xfs_dqunlock(dqp);
 
@@ -1133,7 +1115,7 @@ xfs_qm_dqput(
                        break;
                dqp = gdqp;
        }
-       xfs_qm_freelist_unlock(xfs_Gqm);
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
 }
 
 /*
@@ -1386,10 +1368,10 @@ int
 xfs_qm_dqpurge(
        xfs_dquot_t     *dqp)
 {
-       xfs_dqhash_t    *thishash;
+       xfs_dqhash_t    *qh = dqp->q_hash;
        xfs_mount_t     *mp = dqp->q_mount;
 
-       ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
+       ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
        ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
 
        xfs_dqlock(dqp);
@@ -1407,7 +1389,7 @@ xfs_qm_dqpurge(
                return (1);
        }
 
-       ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
+       ASSERT(!list_empty(&dqp->q_freelist));
 
        /*
         * If we're turning off quotas, we have to make sure that, for
@@ -1452,14 +1434,16 @@ xfs_qm_dqpurge(
        ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
               !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
 
-       thishash = dqp->q_hash;
-       XQM_HASHLIST_REMOVE(thishash, dqp);
-       XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp);
+       list_del_init(&dqp->q_hashlist);
+       qh->qh_version++;
+       list_del_init(&dqp->q_mplist);
+       mp->m_quotainfo->qi_dqreclaims++;
+       mp->m_quotainfo->qi_dquots--;
        /*
         * XXX Move this to the front of the freelist, if we can get the
         * freelist lock.
         */
-       ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
+       ASSERT(!list_empty(&dqp->q_freelist));
 
        dqp->q_mount = NULL;
        dqp->q_hash = NULL;
@@ -1467,7 +1451,7 @@ xfs_qm_dqpurge(
        memset(&dqp->q_core, 0, sizeof(dqp->q_core));
        xfs_dqfunlock(dqp);
        xfs_dqunlock(dqp);
-       mutex_unlock(&thishash->qh_lock);
+       mutex_unlock(&qh->qh_lock);
        return (0);
 }
 
@@ -1517,6 +1501,7 @@ void
 xfs_qm_dqflock_pushbuf_wait(
        xfs_dquot_t     *dqp)
 {
+       xfs_mount_t     *mp = dqp->q_mount;
        xfs_buf_t       *bp;
 
        /*
@@ -1525,14 +1510,14 @@ xfs_qm_dqflock_pushbuf_wait(
         * out immediately.  We'll be able to acquire
         * the flush lock when the I/O completes.
         */
-       bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
-                   XFS_QI_DQCHUNKLEN(dqp->q_mount), XBF_TRYLOCK);
+       bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
+                       mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
        if (!bp)
                goto out_lock;
 
        if (XFS_BUF_ISDELAYWRITE(bp)) {
                if (XFS_BUF_ISPINNED(bp))
-                       xfs_log_force(dqp->q_mount, 0);
+                       xfs_log_force(mp, 0);
                xfs_buf_delwri_promote(bp);
                wake_up_process(bp->b_target->bt_task);
        }
index a0f7da5..5da3a23 100644 (file)
  * The hash chain headers (hash buckets)
  */
 typedef struct xfs_dqhash {
-       struct xfs_dquot *qh_next;
+       struct list_head  qh_list;
        struct mutex      qh_lock;
        uint              qh_version;   /* ever increasing version */
        uint              qh_nelems;    /* number of dquots on the list */
 } xfs_dqhash_t;
 
-typedef struct xfs_dqlink {
-       struct xfs_dquot  *ql_next;     /* forward link */
-       struct xfs_dquot **ql_prevp;    /* pointer to prev ql_next */
-} xfs_dqlink_t;
-
 struct xfs_mount;
 struct xfs_trans;
 
-/*
- * This is the marker which is designed to occupy the first few
- * bytes of the xfs_dquot_t structure. Even inside this, the freelist pointers
- * must come first.
- * This serves as the marker ("sentinel") when we have to restart list
- * iterations because of locking considerations.
- */
-typedef struct xfs_dqmarker {
-       struct xfs_dquot*dqm_flnext;    /* link to freelist: must be first */
-       struct xfs_dquot*dqm_flprev;
-       xfs_dqlink_t     dqm_mplist;    /* link to mount's list of dquots */
-       xfs_dqlink_t     dqm_hashlist;  /* link to the hash chain */
-       uint             dqm_flags;     /* various flags (XFS_DQ_*) */
-} xfs_dqmarker_t;
-
 /*
  * The incore dquot structure
  */
 typedef struct xfs_dquot {
-       xfs_dqmarker_t   q_lists;       /* list ptrs, q_flags (marker) */
+       uint             dq_flags;      /* various flags (XFS_DQ_*) */
+       struct list_head q_freelist;    /* global free list of dquots */
+       struct list_head q_mplist;      /* mount's list of dquots */
+       struct list_head q_hashlist;    /* gloabl hash list of dquots */
        xfs_dqhash_t    *q_hash;        /* the hashchain header */
        struct xfs_mount*q_mount;       /* filesystem this relates to */
        struct xfs_trans*q_transp;      /* trans this belongs to currently */
@@ -87,13 +70,6 @@ typedef struct xfs_dquot {
        wait_queue_head_t q_pinwait;    /* dquot pinning wait queue */
 } xfs_dquot_t;
 
-
-#define dq_flnext      q_lists.dqm_flnext
-#define dq_flprev      q_lists.dqm_flprev
-#define dq_mplist      q_lists.dqm_mplist
-#define dq_hashlist    q_lists.dqm_hashlist
-#define dq_flags       q_lists.dqm_flags
-
 /*
  * Lock hierarchy for q_qlock:
  *     XFS_QLOCK_NORMAL is the implicit default,
@@ -127,7 +103,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
 }
 
 #define XFS_DQ_IS_LOCKED(dqp)  (mutex_is_locked(&((dqp)->q_qlock)))
-#define XFS_DQ_IS_ON_FREELIST(dqp)  ((dqp)->dq_flnext != (dqp))
 #define XFS_DQ_IS_DIRTY(dqp)   ((dqp)->dq_flags & XFS_DQ_DIRTY)
 #define XFS_QM_ISUDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_USER)
 #define XFS_QM_ISPDQ(dqp)      ((dqp)->dq_flags & XFS_DQ_PROJ)
index 4e4ee9a..8d89a24 100644 (file)
@@ -107,8 +107,7 @@ xfs_qm_dquot_logitem_pin(
 /* ARGSUSED */
 STATIC void
 xfs_qm_dquot_logitem_unpin(
-       xfs_dq_logitem_t *logitem,
-       int               stale)
+       xfs_dq_logitem_t *logitem)
 {
        xfs_dquot_t *dqp = logitem->qli_dquot;
 
@@ -123,7 +122,7 @@ xfs_qm_dquot_logitem_unpin_remove(
        xfs_dq_logitem_t *logitem,
        xfs_trans_t      *tp)
 {
-       xfs_qm_dquot_logitem_unpin(logitem, 0);
+       xfs_qm_dquot_logitem_unpin(logitem);
 }
 
 /*
@@ -228,7 +227,7 @@ xfs_qm_dquot_logitem_pushbuf(
        }
        mp = dqp->q_mount;
        bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
-                   XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK);
+                       mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
        xfs_dqunlock(dqp);
        if (!bp)
                return;
@@ -329,8 +328,7 @@ static struct xfs_item_ops xfs_dquot_item_ops = {
        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
                                        xfs_qm_dquot_logitem_format,
        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_pin,
-       .iop_unpin      = (void(*)(xfs_log_item_t*, int))
-                                       xfs_qm_dquot_logitem_unpin,
+       .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_unpin,
        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
                                        xfs_qm_dquot_logitem_unpin_remove,
        .iop_trylock    = (uint(*)(xfs_log_item_t*))
@@ -357,9 +355,8 @@ xfs_qm_dquot_logitem_init(
        xfs_dq_logitem_t  *lp;
        lp = &dqp->q_logitem;
 
-       lp->qli_item.li_type = XFS_LI_DQUOT;
-       lp->qli_item.li_ops = &xfs_dquot_item_ops;
-       lp->qli_item.li_mountp = dqp->q_mount;
+       xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
+                                       &xfs_dquot_item_ops);
        lp->qli_dquot = dqp;
        lp->qli_format.qlf_type = XFS_LI_DQUOT;
        lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
@@ -426,7 +423,7 @@ xfs_qm_qoff_logitem_pin(xfs_qoff_logitem_t *qf)
  */
 /*ARGSUSED*/
 STATIC void
-xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf, int stale)
+xfs_qm_qoff_logitem_unpin(xfs_qoff_logitem_t *qf)
 {
        return;
 }
@@ -537,8 +534,7 @@ static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
                                        xfs_qm_qoff_logitem_format,
        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = (void(*)(xfs_log_item_t* ,int))
-                                       xfs_qm_qoff_logitem_unpin,
+       .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
        .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
                                        xfs_qm_qoff_logitem_unpin_remove,
        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
@@ -559,8 +555,7 @@ static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
                                        xfs_qm_qoff_logitem_format,
        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_pin,
-       .iop_unpin      = (void(*)(xfs_log_item_t*, int))
-                                       xfs_qm_qoff_logitem_unpin,
+       .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_unpin,
        .iop_unpin_remove = (void(*)(xfs_log_item_t*,xfs_trans_t*))
                                        xfs_qm_qoff_logitem_unpin_remove,
        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_trylock,
@@ -586,11 +581,8 @@ xfs_qm_qoff_logitem_init(
 
        qf = (xfs_qoff_logitem_t*) kmem_zalloc(sizeof(xfs_qoff_logitem_t), KM_SLEEP);
 
-       qf->qql_item.li_type = XFS_LI_QUOTAOFF;
-       if (start)
-               qf->qql_item.li_ops = &xfs_qm_qoffend_logitem_ops;
-       else
-               qf->qql_item.li_ops = &xfs_qm_qoff_logitem_ops;
+       xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
+                       &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
        qf->qql_item.li_mountp = mp;
        qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
        qf->qql_format.qf_flags = flags;
index 417e61e..38e7641 100644 (file)
@@ -67,9 +67,6 @@ static cred_t xfs_zerocr;
 STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
 STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
 
-STATIC void    xfs_qm_freelist_init(xfs_frlist_t *);
-STATIC void    xfs_qm_freelist_destroy(xfs_frlist_t *);
-
 STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
 STATIC int     xfs_qm_shake(int, gfp_t);
@@ -84,21 +81,25 @@ extern struct mutex qcheck_lock;
 #endif
 
 #ifdef QUOTADEBUG
-#define XQM_LIST_PRINT(l, NXT, title) \
-{ \
-       xfs_dquot_t     *dqp; int i = 0; \
-       cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
-       for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
-               cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
-                                 "bcnt = %d, icnt = %d, refs = %d", \
-                       ++i, (int) be32_to_cpu(dqp->q_core.d_id), \
-                       DQFLAGTO_TYPESTR(dqp),       \
-                       (int) be64_to_cpu(dqp->q_core.d_bcount), \
-                       (int) be64_to_cpu(dqp->q_core.d_icount), \
-                       (int) dqp->q_nrefs);  } \
+static void
+xfs_qm_dquot_list_print(
+       struct xfs_mount *mp)
+{
+       xfs_dquot_t     *dqp;
+       int             i = 0;
+
+       list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
+               cmn_err(CE_DEBUG, "   %d. \"%d (%s)\"   "
+                                 "bcnt = %lld, icnt = %lld, refs = %d",
+                       i++, be32_to_cpu(dqp->q_core.d_id),
+                       DQFLAGTO_TYPESTR(dqp),
+                       (long long)be64_to_cpu(dqp->q_core.d_bcount),
+                       (long long)be64_to_cpu(dqp->q_core.d_icount),
+                       dqp->q_nrefs);
+       }
 }
 #else
-#define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
+static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
 #endif
 
 /*
@@ -144,7 +145,9 @@ xfs_Gqm_init(void)
        /*
         * Freelist of all dquots of all file systems
         */
-       xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
+       INIT_LIST_HEAD(&xqm->qm_dqfrlist);
+       xqm->qm_dqfrlist_cnt = 0;
+       mutex_init(&xqm->qm_dqfrlist_lock);
 
        /*
         * dquot zone. we register our own low-memory callback.
@@ -189,6 +192,7 @@ STATIC void
 xfs_qm_destroy(
        struct xfs_qm   *xqm)
 {
+       struct xfs_dquot *dqp, *n;
        int             hsize, i;
 
        ASSERT(xqm != NULL);
@@ -204,7 +208,21 @@ xfs_qm_destroy(
        xqm->qm_usr_dqhtable = NULL;
        xqm->qm_grp_dqhtable = NULL;
        xqm->qm_dqhashmask = 0;
-       xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
+
+       /* frlist cleanup */
+       mutex_lock(&xqm->qm_dqfrlist_lock);
+       list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
+               xfs_dqlock(dqp);
+#ifdef QUOTADEBUG
+               cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
+#endif
+               list_del_init(&dqp->q_freelist);
+               xfs_Gqm->qm_dqfrlist_cnt--;
+               xfs_dqunlock(dqp);
+               xfs_qm_dqdestroy(dqp);
+       }
+       mutex_unlock(&xqm->qm_dqfrlist_lock);
+       mutex_destroy(&xqm->qm_dqfrlist_lock);
 #ifdef DEBUG
        mutex_destroy(&qcheck_lock);
 #endif
@@ -256,7 +274,7 @@ STATIC void
 xfs_qm_rele_quotafs_ref(
        struct xfs_mount *mp)
 {
-       xfs_dquot_t     *dqp, *nextdqp;
+       xfs_dquot_t     *dqp, *n;
 
        ASSERT(xfs_Gqm);
        ASSERT(xfs_Gqm->qm_nrefs > 0);
@@ -264,26 +282,24 @@ xfs_qm_rele_quotafs_ref(
        /*
         * Go thru the freelist and destroy all inactive dquots.
         */
-       xfs_qm_freelist_lock(xfs_Gqm);
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
 
-       for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
-            dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
+       list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
                xfs_dqlock(dqp);
-               nextdqp = dqp->dq_flnext;
                if (dqp->dq_flags & XFS_DQ_INACTIVE) {
                        ASSERT(dqp->q_mount == NULL);
                        ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(dqp->HL_PREVP == NULL);
-                       ASSERT(dqp->MPL_PREVP == NULL);
-                       XQM_FREELIST_REMOVE(dqp);
+                       ASSERT(list_empty(&dqp->q_hashlist));
+                       ASSERT(list_empty(&dqp->q_mplist));
+                       list_del_init(&dqp->q_freelist);
+                       xfs_Gqm->qm_dqfrlist_cnt--;
                        xfs_dqunlock(dqp);
                        xfs_qm_dqdestroy(dqp);
                } else {
                        xfs_dqunlock(dqp);
                }
-               dqp = nextdqp;
        }
-       xfs_qm_freelist_unlock(xfs_Gqm);
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
 
        /*
         * Destroy the entire XQM. If somebody mounts with quotaon, this'll
@@ -305,7 +321,7 @@ xfs_qm_unmount(
        struct xfs_mount        *mp)
 {
        if (mp->m_quotainfo) {
-               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
+               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
                xfs_qm_destroy_quotainfo(mp);
        }
 }
@@ -449,20 +465,21 @@ xfs_qm_unmount_quotas(
  */
 STATIC int
 xfs_qm_dqflush_all(
-       xfs_mount_t     *mp,
-       int             sync_mode)
+       struct xfs_mount        *mp,
+       int                     sync_mode)
 {
-       int             recl;
-       xfs_dquot_t     *dqp;
-       int             niters;
-       int             error;
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       int                     recl;
+       struct xfs_dquot        *dqp;
+       int                     niters;
+       int                     error;
 
-       if (mp->m_quotainfo == NULL)
+       if (!q)
                return 0;
        niters = 0;
 again:
-       xfs_qm_mplist_lock(mp);
-       FOREACH_DQUOT_IN_MP(dqp, mp) {
+       mutex_lock(&q->qi_dqlist_lock);
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
                xfs_dqlock(dqp);
                if (! XFS_DQ_IS_DIRTY(dqp)) {
                        xfs_dqunlock(dqp);
@@ -470,7 +487,7 @@ again:
                }
 
                /* XXX a sentinel would be better */
-               recl = XFS_QI_MPLRECLAIMS(mp);
+               recl = q->qi_dqreclaims;
                if (!xfs_dqflock_nowait(dqp)) {
                        /*
                         * If we can't grab the flush lock then check
@@ -485,21 +502,21 @@ again:
                 * Let go of the mplist lock. We don't want to hold it
                 * across a disk write.
                 */
-               xfs_qm_mplist_unlock(mp);
+               mutex_unlock(&q->qi_dqlist_lock);
                error = xfs_qm_dqflush(dqp, sync_mode);
                xfs_dqunlock(dqp);
                if (error)
                        return error;
 
-               xfs_qm_mplist_lock(mp);
-               if (recl != XFS_QI_MPLRECLAIMS(mp)) {
-                       xfs_qm_mplist_unlock(mp);
+               mutex_lock(&q->qi_dqlist_lock);
+               if (recl != q->qi_dqreclaims) {
+                       mutex_unlock(&q->qi_dqlist_lock);
                        /* XXX restart limit */
                        goto again;
                }
        }
 
-       xfs_qm_mplist_unlock(mp);
+       mutex_unlock(&q->qi_dqlist_lock);
        /* return ! busy */
        return 0;
 }
@@ -509,15 +526,15 @@ again:
  */
 STATIC void
 xfs_qm_detach_gdquots(
-       xfs_mount_t     *mp)
+       struct xfs_mount        *mp)
 {
-       xfs_dquot_t     *dqp, *gdqp;
-       int             nrecl;
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_dquot        *dqp, *gdqp;
+       int                     nrecl;
 
  again:
-       ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
-       dqp = XFS_QI_MPLNEXT(mp);
-       while (dqp) {
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
                xfs_dqlock(dqp);
                if ((gdqp = dqp->q_gdquot)) {
                        xfs_dqlock(gdqp);
@@ -530,15 +547,14 @@ xfs_qm_detach_gdquots(
                         * Can't hold the mplist lock across a dqput.
                         * XXXmust convert to marker based iterations here.
                         */
-                       nrecl = XFS_QI_MPLRECLAIMS(mp);
-                       xfs_qm_mplist_unlock(mp);
+                       nrecl = q->qi_dqreclaims;
+                       mutex_unlock(&q->qi_dqlist_lock);
                        xfs_qm_dqput(gdqp);
 
-                       xfs_qm_mplist_lock(mp);
-                       if (nrecl != XFS_QI_MPLRECLAIMS(mp))
+                       mutex_lock(&q->qi_dqlist_lock);
+                       if (nrecl != q->qi_dqreclaims)
                                goto again;
                }
-               dqp = dqp->MPL_NEXT;
        }
 }
 
@@ -550,23 +566,23 @@ xfs_qm_detach_gdquots(
  */
 STATIC int
 xfs_qm_dqpurge_int(
-       xfs_mount_t     *mp,
-       uint            flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
+       struct xfs_mount        *mp,
+       uint                    flags)
 {
-       xfs_dquot_t     *dqp;
-       uint            dqtype;
-       int             nrecl;
-       xfs_dquot_t     *nextdqp;
-       int             nmisses;
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_dquot        *dqp, *n;
+       uint                    dqtype;
+       int                     nrecl;
+       int                     nmisses;
 
-       if (mp->m_quotainfo == NULL)
+       if (!q)
                return 0;
 
        dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
        dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
        dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
 
-       xfs_qm_mplist_lock(mp);
+       mutex_lock(&q->qi_dqlist_lock);
 
        /*
         * In the first pass through all incore dquots of this filesystem,
@@ -578,28 +594,25 @@ xfs_qm_dqpurge_int(
 
       again:
        nmisses = 0;
-       ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
        /*
         * Try to get rid of all of the unwanted dquots. The idea is to
         * get them off mplist and hashlist, but leave them on freelist.
         */
-       dqp = XFS_QI_MPLNEXT(mp);
-       while (dqp) {
+       list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
                /*
                 * It's OK to look at the type without taking dqlock here.
                 * We're holding the mplist lock here, and that's needed for
                 * a dqreclaim.
                 */
-               if ((dqp->dq_flags & dqtype) == 0) {
-                       dqp = dqp->MPL_NEXT;
+               if ((dqp->dq_flags & dqtype) == 0)
                        continue;
-               }
 
                if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       nrecl = XFS_QI_MPLRECLAIMS(mp);
-                       xfs_qm_mplist_unlock(mp);
+                       nrecl = q->qi_dqreclaims;
+                       mutex_unlock(&q->qi_dqlist_lock);
                        mutex_lock(&dqp->q_hash->qh_lock);
-                       xfs_qm_mplist_lock(mp);
+                       mutex_lock(&q->qi_dqlist_lock);
 
                        /*
                         * XXXTheoretically, we can get into a very long
@@ -607,7 +620,7 @@ xfs_qm_dqpurge_int(
                         * No one can be adding dquots to the mplist at
                         * this point, but somebody might be taking things off.
                         */
-                       if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
+                       if (nrecl != q->qi_dqreclaims) {
                                mutex_unlock(&dqp->q_hash->qh_lock);
                                goto again;
                        }
@@ -617,11 +630,9 @@ xfs_qm_dqpurge_int(
                 * Take the dquot off the mplist and hashlist. It may remain on
                 * freelist in INACTIVE state.
                 */
-               nextdqp = dqp->MPL_NEXT;
                nmisses += xfs_qm_dqpurge(dqp);
-               dqp = nextdqp;
        }
-       xfs_qm_mplist_unlock(mp);
+       mutex_unlock(&q->qi_dqlist_lock);
        return nmisses;
 }
 
@@ -921,12 +932,13 @@ xfs_qm_dqdetach(
 
 int
 xfs_qm_sync(
-       xfs_mount_t     *mp,
-       int             flags)
+       struct xfs_mount        *mp,
+       int                     flags)
 {
-       int             recl, restarts;
-       xfs_dquot_t     *dqp;
-       int             error;
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       int                     recl, restarts;
+       struct xfs_dquot        *dqp;
+       int                     error;
 
        if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
                return 0;
@@ -934,18 +946,19 @@ xfs_qm_sync(
        restarts = 0;
 
   again:
-       xfs_qm_mplist_lock(mp);
+       mutex_lock(&q->qi_dqlist_lock);
        /*
         * dqpurge_all() also takes the mplist lock and iterate thru all dquots
         * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
         * when we have the mplist lock, we know that dquots will be consistent
         * as long as we have it locked.
         */
-       if (! XFS_IS_QUOTA_ON(mp)) {
-               xfs_qm_mplist_unlock(mp);
+       if (!XFS_IS_QUOTA_ON(mp)) {
+               mutex_unlock(&q->qi_dqlist_lock);
                return 0;
        }
-       FOREACH_DQUOT_IN_MP(dqp, mp) {
+       ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
+       list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
                /*
                 * If this is vfs_sync calling, then skip the dquots that
                 * don't 'seem' to be dirty. ie. don't acquire dqlock.
@@ -969,7 +982,7 @@ xfs_qm_sync(
                }
 
                /* XXX a sentinel would be better */
-               recl = XFS_QI_MPLRECLAIMS(mp);
+               recl = q->qi_dqreclaims;
                if (!xfs_dqflock_nowait(dqp)) {
                        if (flags & SYNC_TRYLOCK) {
                                xfs_dqunlock(dqp);
@@ -989,7 +1002,7 @@ xfs_qm_sync(
                 * Let go of the mplist lock. We don't want to hold it
                 * across a disk write
                 */
-               xfs_qm_mplist_unlock(mp);
+               mutex_unlock(&q->qi_dqlist_lock);
                error = xfs_qm_dqflush(dqp, flags);
                xfs_dqunlock(dqp);
                if (error && XFS_FORCED_SHUTDOWN(mp))
@@ -997,17 +1010,17 @@ xfs_qm_sync(
                else if (error)
                        return error;
 
-               xfs_qm_mplist_lock(mp);
-               if (recl != XFS_QI_MPLRECLAIMS(mp)) {
+               mutex_lock(&q->qi_dqlist_lock);
+               if (recl != q->qi_dqreclaims) {
                        if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
                                break;
 
-                       xfs_qm_mplist_unlock(mp);
+                       mutex_unlock(&q->qi_dqlist_lock);
                        goto again;
                }
        }
 
-       xfs_qm_mplist_unlock(mp);
+       mutex_unlock(&q->qi_dqlist_lock);
        return 0;
 }
 
@@ -1052,8 +1065,9 @@ xfs_qm_init_quotainfo(
                return error;
        }
 
-       xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
-       lockdep_set_class(&qinf->qi_dqlist.qh_lock, &xfs_quota_mplist_class);
+       INIT_LIST_HEAD(&qinf->qi_dqlist);
+       mutex_init(&qinf->qi_dqlist_lock);
+       lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
 
        qinf->qi_dqreclaims = 0;
 
@@ -1150,7 +1164,8 @@ xfs_qm_destroy_quotainfo(
         */
        xfs_qm_rele_quotafs_ref(mp);
 
-       xfs_qm_list_destroy(&qi->qi_dqlist);
+       ASSERT(list_empty(&qi->qi_dqlist));
+       mutex_destroy(&qi->qi_dqlist_lock);
 
        if (qi->qi_uquotaip) {
                IRELE(qi->qi_uquotaip);
@@ -1177,7 +1192,7 @@ xfs_qm_list_init(
        int             n)
 {
        mutex_init(&list->qh_lock);
-       list->qh_next = NULL;
+       INIT_LIST_HEAD(&list->qh_list);
        list->qh_version = 0;
        list->qh_nelems = 0;
 }
@@ -1316,9 +1331,6 @@ xfs_qm_qino_alloc(
         */
        spin_lock(&mp->m_sb_lock);
        if (flags & XFS_QMOPT_SBVERSION) {
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-               unsigned oldv = mp->m_sb.sb_versionnum;
-#endif
                ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
                ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
                                   XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
@@ -1331,11 +1343,6 @@ xfs_qm_qino_alloc(
 
                /* qflags will get updated _after_ quotacheck */
                mp->m_sb.sb_qflags = 0;
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-               cmn_err(CE_NOTE,
-                       "Old superblock version %x, converting to %x.",
-                       oldv, mp->m_sb.sb_versionnum);
-#endif
        }
        if (flags & XFS_QMOPT_UQUOTA)
                mp->m_sb.sb_uquotino = (*ip)->i_ino;
@@ -1371,10 +1378,10 @@ xfs_qm_reset_dqcounts(
 #ifdef DEBUG
        j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
        do_div(j, sizeof(xfs_dqblk_t));
-       ASSERT(XFS_QM_DQPERBLK(mp) == j);
+       ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
 #endif
        ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
-       for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
+       for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
                /*
                 * Do a sanity check, and if needed, repair the dqblk. Don't
                 * output any warnings because it's perfectly possible to
@@ -1429,7 +1436,7 @@ xfs_qm_dqiter_bufs(
        while (blkcnt--) {
                error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
                              XFS_FSB_TO_DADDR(mp, bno),
-                             (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
+                             mp->m_quotainfo->qi_dqchunklen, 0, &bp);
                if (error)
                        break;
 
@@ -1439,7 +1446,7 @@ xfs_qm_dqiter_bufs(
                 * goto the next block.
                 */
                bno++;
-               firstid += XFS_QM_DQPERBLK(mp);
+               firstid += mp->m_quotainfo->qi_dqperchunk;
        }
        return error;
 }
@@ -1505,7 +1512,7 @@ xfs_qm_dqiterate(
                                continue;
 
                        firstid = (xfs_dqid_t) map[i].br_startoff *
-                               XFS_QM_DQPERBLK(mp);
+                               mp->m_quotainfo->qi_dqperchunk;
                        /*
                         * Do a read-ahead on the next extent.
                         */
@@ -1516,7 +1523,7 @@ xfs_qm_dqiterate(
                                while (rablkcnt--) {
                                        xfs_baread(mp->m_ddev_targp,
                                               XFS_FSB_TO_DADDR(mp, rablkno),
-                                              (int)XFS_QI_DQCHUNKLEN(mp));
+                                              mp->m_quotainfo->qi_dqchunklen);
                                        rablkno++;
                                }
                        }
@@ -1576,8 +1583,10 @@ xfs_qm_quotacheck_dqadjust(
 
        /*
         * Set default limits, adjust timers (since we changed usages)
+        *
+        * There are no timers for the default values set in the root dquot.
         */
-       if (! XFS_IS_SUSER_DQUOT(dqp)) {
+       if (dqp->q_core.d_id) {
                xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
                xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
        }
@@ -1747,14 +1756,14 @@ xfs_qm_quotacheck(
        lastino = 0;
        flags = 0;
 
-       ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
+       ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
        ASSERT(XFS_IS_QUOTA_RUNNING(mp));
 
        /*
         * There should be no cached dquots. The (simplistic) quotacheck
         * algorithm doesn't like that.
         */
-       ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
+       ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
 
        cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
 
@@ -1763,15 +1772,19 @@ xfs_qm_quotacheck(
         * their counters to zero. We need a clean slate.
         * We don't log our changes till later.
         */
-       if ((uip = XFS_QI_UQIP(mp))) {
-               if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
+       uip = mp->m_quotainfo->qi_uquotaip;
+       if (uip) {
+               error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
+               if (error)
                        goto error_return;
                flags |= XFS_UQUOTA_CHKD;
        }
 
-       if ((gip = XFS_QI_GQIP(mp))) {
-               if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
-                                       XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
+       gip = mp->m_quotainfo->qi_gquotaip;
+       if (gip) {
+               error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
+                                       XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
+               if (error)
                        goto error_return;
                flags |= XFS_OQUOTA_CHKD;
        }
@@ -1804,7 +1817,7 @@ xfs_qm_quotacheck(
         * at this point (because we intentionally didn't in dqget_noattach).
         */
        if (error) {
-               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
+               xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
                goto error_return;
        }
 
@@ -1825,7 +1838,7 @@ xfs_qm_quotacheck(
        mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
        mp->m_qflags |= flags;
 
-       XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
+       xfs_qm_dquot_list_print(mp);
 
  error_return:
        if (error) {
@@ -1920,59 +1933,53 @@ xfs_qm_init_quotainos(
                }
        }
 
-       XFS_QI_UQIP(mp) = uip;
-       XFS_QI_GQIP(mp) = gip;
+       mp->m_quotainfo->qi_uquotaip = uip;
+       mp->m_quotainfo->qi_gquotaip = gip;
 
        return 0;
 }
 
 
+
 /*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- * XXXsup merge this with qm_reclaim_one().
+ * Just pop the least recently used dquot off the freelist and
+ * recycle it. The returned dquot is locked.
  */
-STATIC int
-xfs_qm_shake_freelist(
-       int howmany)
+STATIC xfs_dquot_t *
+xfs_qm_dqreclaim_one(void)
 {
-       int             nreclaimed;
-       xfs_dqhash_t    *hash;
-       xfs_dquot_t     *dqp, *nextdqp;
+       xfs_dquot_t     *dqpout;
+       xfs_dquot_t     *dqp;
        int             restarts;
-       int             nflushes;
-
-       if (howmany <= 0)
-               return 0;
 
-       nreclaimed = 0;
        restarts = 0;
-       nflushes = 0;
+       dqpout = NULL;
 
-#ifdef QUOTADEBUG
-       cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
-#endif
-       /* lock order is : hashchainlock, freelistlock, mplistlock */
- tryagain:
-       xfs_qm_freelist_lock(xfs_Gqm);
+       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
+startagain:
+       mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
 
-       for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
-            ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
-             nreclaimed < howmany); ) {
+       list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
+               struct xfs_mount *mp = dqp->q_mount;
                xfs_dqlock(dqp);
 
                /*
                 * We are racing with dqlookup here. Naturally we don't
-                * want to reclaim a dquot that lookup wants.
+                * want to reclaim a dquot that lookup wants. We release the
+                * freelist lock and start over, so that lookup will grab
+                * both the dquot and the freelistlock.
                 */
                if (dqp->dq_flags & XFS_DQ_WANT) {
+                       ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
+
+                       trace_xfs_dqreclaim_want(dqp);
+
                        xfs_dqunlock(dqp);
-                       xfs_qm_freelist_unlock(xfs_Gqm);
+                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
                        if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                               return nreclaimed;
+                               return NULL;
                        XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                       goto tryagain;
+                       goto startagain;
                }
 
                /*
@@ -1981,23 +1988,27 @@ xfs_qm_shake_freelist(
                 * life easier.
                 */
                if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                       ASSERT(dqp->q_mount == NULL);
+                       ASSERT(mp == NULL);
                        ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(dqp->HL_PREVP == NULL);
-                       ASSERT(dqp->MPL_PREVP == NULL);
+                       ASSERT(list_empty(&dqp->q_hashlist));
+                       ASSERT(list_empty(&dqp->q_mplist));
+                       list_del_init(&dqp->q_freelist);
+                       xfs_Gqm->qm_dqfrlist_cnt--;
+                       xfs_dqunlock(dqp);
+                       dqpout = dqp;
                        XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-                       nextdqp = dqp->dq_flnext;
-                       goto off_freelist;
+                       break;
                }
 
-               ASSERT(dqp->MPL_PREVP);
+               ASSERT(dqp->q_hash);
+               ASSERT(!list_empty(&dqp->q_mplist));
+
                /*
                 * Try to grab the flush lock. If this dquot is in the process of
                 * getting flushed to disk, we don't want to reclaim it.
                 */
                if (!xfs_dqflock_nowait(dqp)) {
                        xfs_dqunlock(dqp);
-                       dqp = dqp->dq_flnext;
                        continue;
                }
 
@@ -2010,21 +2021,21 @@ xfs_qm_shake_freelist(
                if (XFS_DQ_IS_DIRTY(dqp)) {
                        int     error;
 
-                       trace_xfs_dqshake_dirty(dqp);
+                       trace_xfs_dqreclaim_dirty(dqp);
 
                        /*
                         * We flush it delayed write, so don't bother
-                        * releasing the mplock.
+                        * releasing the freelist lock.
                         */
                        error = xfs_qm_dqflush(dqp, 0);
                        if (error) {
-                               xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
-                       "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
+                               xfs_fs_cmn_err(CE_WARN, mp,
+                       "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
                        }
                        xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
-                       dqp = dqp->dq_flnext;
                        continue;
                }
+
                /*
                 * We're trying to get the hashlock out of order. This races
                 * with dqlookup; so, we giveup and goto the next dquot if
@@ -2033,56 +2044,74 @@ xfs_qm_shake_freelist(
                 * waiting for the freelist lock.
                 */
                if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
-                       xfs_dqfunlock(dqp);
-                       xfs_dqunlock(dqp);
-                       dqp = dqp->dq_flnext;
-                       continue;
+                       restarts++;
+                       goto dqfunlock;
                }
+
                /*
                 * This races with dquot allocation code as well as dqflush_all
                 * and reclaim code. So, if we failed to grab the mplist lock,
                 * giveup everything and start over.
                 */
-               hash = dqp->q_hash;
-               ASSERT(hash);
-               if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
-                       /* XXX put a sentinel so that we can come back here */
+               if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
+                       restarts++;
+                       mutex_unlock(&dqp->q_hash->qh_lock);
                        xfs_dqfunlock(dqp);
                        xfs_dqunlock(dqp);
-                       mutex_unlock(&hash->qh_lock);
-                       xfs_qm_freelist_unlock(xfs_Gqm);
-                       if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                               return nreclaimed;
-                       goto tryagain;
+                       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+                       if (restarts++ >= XFS_QM_RECLAIM_MAX_RESTARTS)
+                               return NULL;
+                       goto startagain;
                }
 
-               trace_xfs_dqshake_unlink(dqp);
-
-#ifdef QUOTADEBUG
-               cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
-                       dqp, be32_to_cpu(dqp->q_core.d_id));
-#endif
                ASSERT(dqp->q_nrefs == 0);
-               nextdqp = dqp->dq_flnext;
-               XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
-               XQM_HASHLIST_REMOVE(hash, dqp);
+               list_del_init(&dqp->q_mplist);
+               mp->m_quotainfo->qi_dquots--;
+               mp->m_quotainfo->qi_dqreclaims++;
+               list_del_init(&dqp->q_hashlist);
+               dqp->q_hash->qh_version++;
+               list_del_init(&dqp->q_freelist);
+               xfs_Gqm->qm_dqfrlist_cnt--;
+               dqpout = dqp;
+               mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
+               mutex_unlock(&dqp->q_hash->qh_lock);
+dqfunlock:
                xfs_dqfunlock(dqp);
-               xfs_qm_mplist_unlock(dqp->q_mount);
-               mutex_unlock(&hash->qh_lock);
-
- off_freelist:
-               XQM_FREELIST_REMOVE(dqp);
                xfs_dqunlock(dqp);
-               nreclaimed++;
-               XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
+               if (dqpout)
+                       break;
+               if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
+                       return NULL;
+       }
+       mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+       return dqpout;
+}
+
+/*
+ * Traverse the freelist of dquots and attempt to reclaim a maximum of
+ * 'howmany' dquots. This operation races with dqlookup(), and attempts to
+ * favor the lookup function ...
+ */
+STATIC int
+xfs_qm_shake_freelist(
+       int     howmany)
+{
+       int             nreclaimed = 0;
+       xfs_dquot_t     *dqp;
+
+       if (howmany <= 0)
+               return 0;
+
+       while (nreclaimed < howmany) {
+               dqp = xfs_qm_dqreclaim_one();
+               if (!dqp)
+                       return nreclaimed;
                xfs_qm_dqdestroy(dqp);
-               dqp = nextdqp;
+               nreclaimed++;
        }
-       xfs_qm_freelist_unlock(xfs_Gqm);
        return nreclaimed;
 }
 
-
 /*
  * The kmem_shake interface is invoked when memory is running low.
  */
@@ -2097,7 +2126,7 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
        if (!xfs_Gqm)
                return 0;
 
-       nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
+       nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
        /* incore dquots in all f/s's */
        ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
 
@@ -2113,131 +2142,6 @@ xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
 }
 
 
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
-       xfs_dquot_t     *dqpout;
-       xfs_dquot_t     *dqp;
-       int             restarts;
-       int             nflushes;
-
-       restarts = 0;
-       dqpout = NULL;
-       nflushes = 0;
-
-       /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
- startagain:
-       xfs_qm_freelist_lock(xfs_Gqm);
-
-       FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
-               xfs_dqlock(dqp);
-
-               /*
-                * We are racing with dqlookup here. Naturally we don't
-                * want to reclaim a dquot that lookup wants. We release the
-                * freelist lock and start over, so that lookup will grab
-                * both the dquot and the freelistlock.
-                */
-               if (dqp->dq_flags & XFS_DQ_WANT) {
-                       ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-
-                       trace_xfs_dqreclaim_want(dqp);
-
-                       xfs_dqunlock(dqp);
-                       xfs_qm_freelist_unlock(xfs_Gqm);
-                       if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                               return NULL;
-                       XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                       goto startagain;
-               }
-
-               /*
-                * If the dquot is inactive, we are assured that it is
-                * not on the mplist or the hashlist, and that makes our
-                * life easier.
-                */
-               if (dqp->dq_flags & XFS_DQ_INACTIVE) {
-                       ASSERT(dqp->q_mount == NULL);
-                       ASSERT(! XFS_DQ_IS_DIRTY(dqp));
-                       ASSERT(dqp->HL_PREVP == NULL);
-                       ASSERT(dqp->MPL_PREVP == NULL);
-                       XQM_FREELIST_REMOVE(dqp);
-                       xfs_dqunlock(dqp);
-                       dqpout = dqp;
-                       XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
-                       break;
-               }
-
-               ASSERT(dqp->q_hash);
-               ASSERT(dqp->MPL_PREVP);
-
-               /*
-                * Try to grab the flush lock. If this dquot is in the process of
-                * getting flushed to disk, we don't want to reclaim it.
-                */
-               if (!xfs_dqflock_nowait(dqp)) {
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               /*
-                * We have the flush lock so we know that this is not in the
-                * process of being flushed. So, if this is dirty, flush it
-                * DELWRI so that we don't get a freelist infested with
-                * dirty dquots.
-                */
-               if (XFS_DQ_IS_DIRTY(dqp)) {
-                       int     error;
-
-                       trace_xfs_dqreclaim_dirty(dqp);
-
-                       /*
-                        * We flush it delayed write, so don't bother
-                        * releasing the freelist lock.
-                        */
-                       error = xfs_qm_dqflush(dqp, 0);
-                       if (error) {
-                               xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
-                       "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
-                       }
-                       xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
-                       continue;
-               }
-
-               if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
-                       xfs_dqfunlock(dqp);
-                       xfs_dqunlock(dqp);
-                       continue;
-               }
-
-               if (!mutex_trylock(&dqp->q_hash->qh_lock))
-                       goto mplistunlock;
-
-               trace_xfs_dqreclaim_unlink(dqp);
-
-               ASSERT(dqp->q_nrefs == 0);
-               XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
-               XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
-               XQM_FREELIST_REMOVE(dqp);
-               dqpout = dqp;
-               mutex_unlock(&dqp->q_hash->qh_lock);
- mplistunlock:
-               xfs_qm_mplist_unlock(dqp->q_mount);
-               xfs_dqfunlock(dqp);
-               xfs_dqunlock(dqp);
-               if (dqpout)
-                       break;
-       }
-
-       xfs_qm_freelist_unlock(xfs_Gqm);
-       return dqpout;
-}
-
-
 /*------------------------------------------------------------------*/
 
 /*
@@ -2662,66 +2566,3 @@ xfs_qm_vop_create_dqattach(
        }
 }
 
-/* ------------- list stuff -----------------*/
-STATIC void
-xfs_qm_freelist_init(xfs_frlist_t *ql)
-{
-       ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
-       mutex_init(&ql->qh_lock);
-       ql->qh_version = 0;
-       ql->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_freelist_destroy(xfs_frlist_t *ql)
-{
-       xfs_dquot_t     *dqp, *nextdqp;
-
-       mutex_lock(&ql->qh_lock);
-       for (dqp = ql->qh_next;
-            dqp != (xfs_dquot_t *)ql; ) {
-               xfs_dqlock(dqp);
-               nextdqp = dqp->dq_flnext;
-#ifdef QUOTADEBUG
-               cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
-#endif
-               XQM_FREELIST_REMOVE(dqp);
-               xfs_dqunlock(dqp);
-               xfs_qm_dqdestroy(dqp);
-               dqp = nextdqp;
-       }
-       mutex_unlock(&ql->qh_lock);
-       mutex_destroy(&ql->qh_lock);
-
-       ASSERT(ql->qh_nelems == 0);
-}
-
-STATIC void
-xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
-{
-       dq->dq_flnext = ql->qh_next;
-       dq->dq_flprev = (xfs_dquot_t *)ql;
-       ql->qh_next = dq;
-       dq->dq_flnext->dq_flprev = dq;
-       xfs_Gqm->qm_dqfreelist.qh_nelems++;
-       xfs_Gqm->qm_dqfreelist.qh_version++;
-}
-
-void
-xfs_qm_freelist_unlink(xfs_dquot_t *dq)
-{
-       xfs_dquot_t *next = dq->dq_flnext;
-       xfs_dquot_t *prev = dq->dq_flprev;
-
-       next->dq_flprev = prev;
-       prev->dq_flnext = next;
-       dq->dq_flnext = dq->dq_flprev = dq;
-       xfs_Gqm->qm_dqfreelist.qh_nelems--;
-       xfs_Gqm->qm_dqfreelist.qh_version++;
-}
-
-void
-xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
-{
-       xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
-}
index 495564b..c9446f1 100644 (file)
@@ -72,17 +72,6 @@ extern kmem_zone_t   *qm_dqtrxzone;
 #define XFS_QM_MAX_DQCLUSTER_LOGSZ     3
 
 typedef xfs_dqhash_t   xfs_dqlist_t;
-/*
- * The freelist head. The first two fields match the first two in the
- * xfs_dquot_t structure (in xfs_dqmarker_t)
- */
-typedef struct xfs_frlist {
-       struct xfs_dquot *qh_next;
-       struct xfs_dquot *qh_prev;
-       struct mutex     qh_lock;
-       uint             qh_version;
-       uint             qh_nelems;
-} xfs_frlist_t;
 
 /*
  * Quota Manager (global) structure. Lives only in core.
@@ -91,7 +80,9 @@ typedef struct xfs_qm {
        xfs_dqlist_t    *qm_usr_dqhtable;/* udquot hash table */
        xfs_dqlist_t    *qm_grp_dqhtable;/* gdquot hash table */
        uint             qm_dqhashmask;  /* # buckets in dq hashtab - 1 */
-       xfs_frlist_t     qm_dqfreelist;  /* freelist of dquots */
+       struct list_head qm_dqfrlist;    /* freelist of dquots */
+       struct mutex     qm_dqfrlist_lock;
+       int              qm_dqfrlist_cnt;
        atomic_t         qm_totaldquots; /* total incore dquots */
        uint             qm_nrefs;       /* file systems with quota on */
        int              qm_dqfree_ratio;/* ratio of free to inuse dquots */
@@ -106,7 +97,9 @@ typedef struct xfs_qm {
 typedef struct xfs_quotainfo {
        xfs_inode_t     *qi_uquotaip;    /* user quota inode */
        xfs_inode_t     *qi_gquotaip;    /* group quota inode */
-       xfs_dqlist_t     qi_dqlist;      /* all dquots in filesys */
+       struct list_head qi_dqlist;      /* all dquots in filesys */
+       struct mutex     qi_dqlist_lock;
+       int              qi_dquots;
        int              qi_dqreclaims;  /* a change here indicates
                                            a removal in the dqlist */
        time_t           qi_btimelimit;  /* limit for blks timer */
@@ -175,10 +168,6 @@ extern int         xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
 extern int             xfs_qm_scall_quotaon(xfs_mount_t *, uint);
 extern int             xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
 
-/* list stuff */
-extern void            xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
-extern void            xfs_qm_freelist_unlink(xfs_dquot_t *);
-
 #ifdef DEBUG
 extern int             xfs_qm_internalqcheck(xfs_mount_t *);
 #else
index 83e7ea3..3d1fc79 100644 (file)
@@ -55,7 +55,7 @@ static int xqm_proc_show(struct seq_file *m, void *v)
                        ndquot,
                        xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
                        xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
-                       xfs_Gqm? xfs_Gqm->qm_dqfreelist.qh_nelems : 0);
+                       xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
        return 0;
 }
 
index 50bee07..26fa431 100644 (file)
@@ -79,6 +79,7 @@ xfs_qm_scall_quotaoff(
        xfs_mount_t             *mp,
        uint                    flags)
 {
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
        uint                    dqtype;
        int                     error;
        uint                    inactivate_flags;
@@ -102,11 +103,8 @@ xfs_qm_scall_quotaoff(
         * critical thing.
         * If quotaoff, then we must be dealing with the root filesystem.
         */
-       ASSERT(mp->m_quotainfo);
-       if (mp->m_quotainfo)
-               mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
-
-       ASSERT(mp->m_quotainfo);
+       ASSERT(q);
+       mutex_lock(&q->qi_quotaofflock);
 
        /*
         * If we're just turning off quota enforcement, change mp and go.
@@ -117,7 +115,7 @@ xfs_qm_scall_quotaoff(
                spin_lock(&mp->m_sb_lock);
                mp->m_sb.sb_qflags = mp->m_qflags;
                spin_unlock(&mp->m_sb_lock);
-               mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+               mutex_unlock(&q->qi_quotaofflock);
 
                /* XXX what to do if error ? Revert back to old vals incore ? */
                error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
@@ -150,10 +148,8 @@ xfs_qm_scall_quotaoff(
         * Nothing to do?  Don't complain. This happens when we're just
         * turning off quota enforcement.
         */
-       if ((mp->m_qflags & flags) == 0) {
-               mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
-               return (0);
-       }
+       if ((mp->m_qflags & flags) == 0)
+               goto out_unlock;
 
        /*
         * Write the LI_QUOTAOFF log record, and do SB changes atomically,
@@ -162,7 +158,7 @@ xfs_qm_scall_quotaoff(
         */
        error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
        if (error)
-               goto out_error;
+               goto out_unlock;
 
        /*
         * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
@@ -204,7 +200,7 @@ xfs_qm_scall_quotaoff(
         * So, if we couldn't purge all the dquots from the filesystem,
         * we can't get rid of the incore data structures.
         */
-       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype|XFS_QMOPT_QUOTAOFF)))
+       while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
                delay(10 * nculprits);
 
        /*
@@ -222,7 +218,7 @@ xfs_qm_scall_quotaoff(
        if (error) {
                /* We're screwed now. Shutdown is the only option. */
                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
-               goto out_error;
+               goto out_unlock;
        }
 
        /*
@@ -230,27 +226,26 @@ xfs_qm_scall_quotaoff(
         */
        if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
            ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
-               mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+               mutex_unlock(&q->qi_quotaofflock);
                xfs_qm_destroy_quotainfo(mp);
                return (0);
        }
 
        /*
-        * Release our quotainode references, and vn_purge them,
-        * if we don't need them anymore.
+        * Release our quotainode references if we don't need them anymore.
         */
-       if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
-               IRELE(XFS_QI_UQIP(mp));
-               XFS_QI_UQIP(mp) = NULL;
+       if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
+               IRELE(q->qi_uquotaip);
+               q->qi_uquotaip = NULL;
        }
-       if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
-               IRELE(XFS_QI_GQIP(mp));
-               XFS_QI_GQIP(mp) = NULL;
+       if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
+               IRELE(q->qi_gquotaip);
+               q->qi_gquotaip = NULL;
        }
-out_error:
-       mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
 
-       return (error);
+out_unlock:
+       mutex_unlock(&q->qi_quotaofflock);
+       return error;
 }
 
 int
@@ -379,9 +374,9 @@ xfs_qm_scall_quotaon(
        /*
         * Switch on quota enforcement in core.
         */
-       mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
+       mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
        mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
-       mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
+       mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
 
        return (0);
 }
@@ -392,11 +387,12 @@ xfs_qm_scall_quotaon(
  */
 int
 xfs_qm_scall_getqstat(
-       xfs_mount_t     *mp,
-       fs_quota_stat_t *out)
+       struct xfs_mount        *mp,
+       struct fs_quota_stat    *out)
 {
-       xfs_inode_t     *uip, *gip;
-       boolean_t       tempuqip, tempgqip;
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
+       struct xfs_inode        *uip, *gip;
+       boolean_t               tempuqip, tempgqip;
 
        uip = gip = NULL;
        tempuqip = tempgqip = B_FALSE;
@@ -415,9 +411,9 @@ xfs_qm_scall_getqstat(
        out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
        out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
 
-       if (mp->m_quotainfo) {
-               uip = mp->m_quotainfo->qi_uquotaip;
-               gip = mp->m_quotainfo->qi_gquotaip;
+       if (q) {
+               uip = q->qi_uquotaip;
+               gip = q->qi_gquotaip;
        }
        if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
                if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
@@ -441,15 +437,15 @@ xfs_qm_scall_getqstat(
                if (tempgqip)
                        IRELE(gip);
        }
-       if (mp->m_quotainfo) {
-               out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp);
-               out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp);
-               out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp);
-               out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp);
-               out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp);
-               out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp);
+       if (q) {
+               out->qs_incoredqs = q->qi_dquots;
+               out->qs_btimelimit = q->qi_btimelimit;
+               out->qs_itimelimit = q->qi_itimelimit;
+               out->qs_rtbtimelimit = q->qi_rtbtimelimit;
+               out->qs_bwarnlimit = q->qi_bwarnlimit;
+               out->qs_iwarnlimit = q->qi_iwarnlimit;
        }
-       return (0);
+       return 0;
 }
 
 /*
@@ -462,6 +458,7 @@ xfs_qm_scall_setqlim(
        uint                    type,
        fs_disk_quota_t         *newlim)
 {
+       struct xfs_quotainfo    *q = mp->m_quotainfo;
        xfs_disk_dquot_t        *ddq;
        xfs_dquot_t             *dqp;
        xfs_trans_t             *tp;
@@ -485,7 +482,7 @@ xfs_qm_scall_setqlim(
         * a quotaoff from happening). (XXXThis doesn't currently happen
         * because we take the vfslock before calling xfs_qm_sysent).
         */
-       mutex_lock(&(XFS_QI_QOFFLOCK(mp)));
+       mutex_lock(&q->qi_quotaofflock);
 
        /*
         * Get the dquot (locked), and join it to the transaction.
@@ -493,9 +490,8 @@ xfs_qm_scall_setqlim(
         */
        if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
                xfs_trans_cancel(tp, XFS_TRANS_ABORT);
-               mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
                ASSERT(error != ENOENT);
-               return (error);
+               goto out_unlock;
        }
        xfs_trans_dqjoin(tp, dqp);
        ddq = &dqp->q_core;
@@ -513,8 +509,8 @@ xfs_qm_scall_setqlim(
                ddq->d_blk_hardlimit = cpu_to_be64(hard);
                ddq->d_blk_softlimit = cpu_to_be64(soft);
                if (id == 0) {
-                       mp->m_quotainfo->qi_bhardlimit = hard;
-                       mp->m_quotainfo->qi_bsoftlimit = soft;
+                       q->qi_bhardlimit = hard;
+                       q->qi_bsoftlimit = soft;
                }
        } else {
                qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft);
@@ -529,8 +525,8 @@ xfs_qm_scall_setqlim(
                ddq->d_rtb_hardlimit = cpu_to_be64(hard);
                ddq->d_rtb_softlimit = cpu_to_be64(soft);
                if (id == 0) {
-                       mp->m_quotainfo->qi_rtbhardlimit = hard;
-                       mp->m_quotainfo->qi_rtbsoftlimit = soft;
+                       q->qi_rtbhardlimit = hard;
+                       q->qi_rtbsoftlimit = soft;
                }
        } else {
                qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
@@ -546,8 +542,8 @@ xfs_qm_scall_setqlim(
                ddq->d_ino_hardlimit = cpu_to_be64(hard);
                ddq->d_ino_softlimit = cpu_to_be64(soft);
                if (id == 0) {
-                       mp->m_quotainfo->qi_ihardlimit = hard;
-                       mp->m_quotainfo->qi_isoftlimit = soft;
+                       q->qi_ihardlimit = hard;
+                       q->qi_isoftlimit = soft;
                }
        } else {
                qdprintk("ihard %Ld < isoft %Ld\n", hard, soft);
@@ -572,23 +568,23 @@ xfs_qm_scall_setqlim(
                 * for warnings.
                 */
                if (newlim->d_fieldmask & FS_DQ_BTIMER) {
-                       mp->m_quotainfo->qi_btimelimit = newlim->d_btimer;
+                       q->qi_btimelimit = newlim->d_btimer;
                        ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
                }
                if (newlim->d_fieldmask & FS_DQ_ITIMER) {
-                       mp->m_quotainfo->qi_itimelimit = newlim->d_itimer;
+                       q->qi_itimelimit = newlim->d_itimer;
                        ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
                }
                if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
-                       mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer;
+                       q->qi_rtbtimelimit = newlim->d_rtbtimer;
                        ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
                }
                if (newlim->d_fieldmask & FS_DQ_BWARNS)
-                       mp->m_quotainfo->qi_bwarnlimit = newlim->d_bwarns;
+                       q->qi_bwarnlimit = newlim->d_bwarns;
                if (newlim->d_fieldmask & FS_DQ_IWARNS)
-                       mp->m_quotainfo->qi_iwarnlimit = newlim->d_iwarns;
+                       q->qi_iwarnlimit = newlim->d_iwarns;
                if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
-                       mp->m_quotainfo->qi_rtbwarnlimit = newlim->d_rtbwarns;
+                       q->qi_rtbwarnlimit = newlim->d_rtbwarns;
        } else {
                /*
                 * If the user is now over quota, start the timelimit.
@@ -605,8 +601,9 @@ xfs_qm_scall_setqlim(
        error = xfs_trans_commit(tp, 0);
        xfs_qm_dqprint(dqp);
        xfs_qm_dqrele(dqp);
-       mutex_unlock(&(XFS_QI_QOFFLOCK(mp)));
 
+ out_unlock:
+       mutex_unlock(&q->qi_quotaofflock);
        return error;
 }
 
@@ -853,7 +850,8 @@ xfs_dqrele_inode(
        int                     error;
 
        /* skip quota inodes */
-       if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) {
+       if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
+           ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
                ASSERT(ip->i_udquot == NULL);
                ASSERT(ip->i_gdquot == NULL);
                read_unlock(&pag->pag_ici_lock);
@@ -931,7 +929,8 @@ struct mutex  qcheck_lock;
 }
 
 typedef struct dqtest {
-       xfs_dqmarker_t  q_lists;
+       uint             dq_flags;      /* various flags (XFS_DQ_*) */
+       struct list_head q_hashlist;
        xfs_dqhash_t    *q_hash;        /* the hashchain header */
        xfs_mount_t     *q_mount;       /* filesystem this relates to */
        xfs_dqid_t      d_id;           /* user id or group id */
@@ -942,14 +941,9 @@ typedef struct dqtest {
 STATIC void
 xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
 {
-       xfs_dquot_t *d;
-       if (((d) = (h)->qh_next))
-               (d)->HL_PREVP = &((dqp)->HL_NEXT);
-       (dqp)->HL_NEXT = d;
-       (dqp)->HL_PREVP = &((h)->qh_next);
-       (h)->qh_next = (xfs_dquot_t *)dqp;
-       (h)->qh_version++;
-       (h)->qh_nelems++;
+       list_add(&dqp->q_hashlist, &h->qh_list);
+       h->qh_version++;
+       h->qh_nelems++;
 }
 STATIC void
 xfs_qm_dqtest_print(
@@ -1061,9 +1055,7 @@ xfs_qm_internalqcheck_dqget(
        xfs_dqhash_t    *h;
 
        h = DQTEST_HASH(mp, id, type);
-       for (d = (xfs_dqtest_t *) h->qh_next; d != NULL;
-            d = (xfs_dqtest_t *) d->HL_NEXT) {
-               /* DQTEST_LIST_PRINT(h, HL_NEXT, "@@@@@ dqtestlist @@@@@"); */
+       list_for_each_entry(d, &h->qh_list, q_hashlist) {
                if (d->d_id == id && mp == d->q_mount) {
                        *O_dq = d;
                        return (0);
@@ -1074,6 +1066,7 @@ xfs_qm_internalqcheck_dqget(
        d->d_id = id;
        d->q_mount = mp;
        d->q_hash = h;
+       INIT_LIST_HEAD(&d->q_hashlist);
        xfs_qm_hashinsert(h, d);
        *O_dq = d;
        return (0);
@@ -1180,8 +1173,6 @@ xfs_qm_internalqcheck(
        xfs_ino_t       lastino;
        int             done, count;
        int             i;
-       xfs_dqtest_t    *d, *e;
-       xfs_dqhash_t    *h1;
        int             error;
 
        lastino = 0;
@@ -1221,19 +1212,18 @@ xfs_qm_internalqcheck(
        }
        cmn_err(CE_DEBUG, "Checking results against system dquots");
        for (i = 0; i < qmtest_hashmask; i++) {
-               h1 = &qmtest_udqtab[i];
-               for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
+               xfs_dqtest_t    *d, *n;
+               xfs_dqhash_t    *h;
+
+               h = &qmtest_udqtab[i];
+               list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
                        xfs_dqtest_cmp(d);
-                       e = (xfs_dqtest_t *) d->HL_NEXT;
                        kmem_free(d);
-                       d = e;
                }
-               h1 = &qmtest_gdqtab[i];
-               for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
+               h = &qmtest_gdqtab[i];
+               list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
                        xfs_dqtest_cmp(d);
-                       e = (xfs_dqtest_t *) d->HL_NEXT;
                        kmem_free(d);
-                       d = e;
                }
        }
 
index 8286b28..94a3d92 100644 (file)
  */
 #define XFS_DQITER_MAP_SIZE    10
 
-/* Number of dquots that fit in to a dquot block */
-#define XFS_QM_DQPERBLK(mp)    ((mp)->m_quotainfo->qi_dqperchunk)
-
-#define XFS_DQ_IS_ADDEDTO_TRX(t, d)    ((d)->q_transp == (t))
-
-#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
-#define XFS_QI_UQIP(mp)                ((mp)->m_quotainfo->qi_uquotaip)
-#define XFS_QI_GQIP(mp)                ((mp)->m_quotainfo->qi_gquotaip)
-#define XFS_QI_DQCHUNKLEN(mp)  ((mp)->m_quotainfo->qi_dqchunklen)
-#define XFS_QI_BTIMELIMIT(mp)  ((mp)->m_quotainfo->qi_btimelimit)
-#define XFS_QI_RTBTIMELIMIT(mp) ((mp)->m_quotainfo->qi_rtbtimelimit)
-#define XFS_QI_ITIMELIMIT(mp)  ((mp)->m_quotainfo->qi_itimelimit)
-#define XFS_QI_BWARNLIMIT(mp)  ((mp)->m_quotainfo->qi_bwarnlimit)
-#define XFS_QI_RTBWARNLIMIT(mp)        ((mp)->m_quotainfo->qi_rtbwarnlimit)
-#define XFS_QI_IWARNLIMIT(mp)  ((mp)->m_quotainfo->qi_iwarnlimit)
-#define XFS_QI_QOFFLOCK(mp)    ((mp)->m_quotainfo->qi_quotaofflock)
-
-#define XFS_QI_MPL_LIST(mp)    ((mp)->m_quotainfo->qi_dqlist)
-#define XFS_QI_MPLNEXT(mp)     ((mp)->m_quotainfo->qi_dqlist.qh_next)
-#define XFS_QI_MPLNDQUOTS(mp)  ((mp)->m_quotainfo->qi_dqlist.qh_nelems)
-
-#define xfs_qm_mplist_lock(mp) \
-       mutex_lock(&(XFS_QI_MPL_LIST(mp).qh_lock))
-#define xfs_qm_mplist_nowait(mp) \
-       mutex_trylock(&(XFS_QI_MPL_LIST(mp).qh_lock))
-#define xfs_qm_mplist_unlock(mp) \
-       mutex_unlock(&(XFS_QI_MPL_LIST(mp).qh_lock))
-#define XFS_QM_IS_MPLIST_LOCKED(mp) \
-       mutex_is_locked(&(XFS_QI_MPL_LIST(mp).qh_lock))
-
-#define xfs_qm_freelist_lock(qm) \
-       mutex_lock(&((qm)->qm_dqfreelist.qh_lock))
-#define xfs_qm_freelist_lock_nowait(qm) \
-       mutex_trylock(&((qm)->qm_dqfreelist.qh_lock))
-#define xfs_qm_freelist_unlock(qm) \
-       mutex_unlock(&((qm)->qm_dqfreelist.qh_lock))
-
 /*
  * Hash into a bucket in the dquot hash table, based on <mp, id>.
  */
@@ -72,9 +35,6 @@
                                      XFS_DQ_HASHVAL(mp, id)) : \
                                     (xfs_Gqm->qm_grp_dqhtable + \
                                      XFS_DQ_HASHVAL(mp, id)))
-#define XFS_IS_DQTYPE_ON(mp, type)   (type == XFS_DQ_USER ? \
-                                       XFS_IS_UQUOTA_ON(mp) : \
-                                       XFS_IS_OQUOTA_ON(mp))
 #define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
        !dqp->q_core.d_blk_hardlimit && \
        !dqp->q_core.d_blk_softlimit && \
        !dqp->q_core.d_rtbcount && \
        !dqp->q_core.d_icount)
 
-#define HL_PREVP       dq_hashlist.ql_prevp
-#define HL_NEXT                dq_hashlist.ql_next
-#define MPL_PREVP      dq_mplist.ql_prevp
-#define MPL_NEXT       dq_mplist.ql_next
-
-
-#define _LIST_REMOVE(h, dqp, PVP, NXT)                         \
-       {                                                       \
-                xfs_dquot_t *d;                                \
-                if (((d) = (dqp)->NXT))                                \
-                        (d)->PVP = (dqp)->PVP;                 \
-                *((dqp)->PVP) = d;                             \
-                (dqp)->NXT = NULL;                             \
-                (dqp)->PVP = NULL;                             \
-                (h)->qh_version++;                             \
-                (h)->qh_nelems--;                              \
-       }
-
-#define _LIST_INSERT(h, dqp, PVP, NXT)                         \
-       {                                                       \
-                xfs_dquot_t *d;                                \
-                if (((d) = (h)->qh_next))                      \
-                        (d)->PVP = &((dqp)->NXT);              \
-                (dqp)->NXT = d;                                \
-                (dqp)->PVP = &((h)->qh_next);                  \
-                (h)->qh_next = dqp;                            \
-                (h)->qh_version++;                             \
-                (h)->qh_nelems++;                              \
-        }
-
-#define FOREACH_DQUOT_IN_MP(dqp, mp) \
-       for ((dqp) = XFS_QI_MPLNEXT(mp); (dqp) != NULL; (dqp) = (dqp)->MPL_NEXT)
-
-#define FOREACH_DQUOT_IN_FREELIST(dqp, qlist)  \
-for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
-     (dqp) = (dqp)->dq_flnext)
-
-#define XQM_HASHLIST_INSERT(h, dqp)    \
-        _LIST_INSERT(h, dqp, HL_PREVP, HL_NEXT)
-
-#define XQM_FREELIST_INSERT(h, dqp)    \
-        xfs_qm_freelist_append(h, dqp)
-
-#define XQM_MPLIST_INSERT(h, dqp)      \
-        _LIST_INSERT(h, dqp, MPL_PREVP, MPL_NEXT)
-
-#define XQM_HASHLIST_REMOVE(h, dqp)    \
-        _LIST_REMOVE(h, dqp, HL_PREVP, HL_NEXT)
-#define XQM_FREELIST_REMOVE(dqp)       \
-        xfs_qm_freelist_unlink(dqp)
-#define XQM_MPLIST_REMOVE(h, dqp)      \
-       { _LIST_REMOVE(h, dqp, MPL_PREVP, MPL_NEXT); \
-         XFS_QI_MPLRECLAIMS((dqp)->q_mount)++; }
-
-#define XFS_DQ_IS_LOGITEM_INITD(dqp)   ((dqp)->q_logitem.qli_dquot == (dqp))
-
-#define XFS_QM_DQP_TO_DQACCT(tp, dqp)  (XFS_QM_ISUDQ(dqp) ? \
-                                        (tp)->t_dqinfo->dqa_usrdquots : \
-                                        (tp)->t_dqinfo->dqa_grpdquots)
-#define XFS_IS_SUSER_DQUOT(dqp)                \
-       (!((dqp)->q_core.d_id))
-
 #define DQFLAGTO_TYPESTR(d)    (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
                                 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
                                 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
index c3ab75c..061d827 100644 (file)
@@ -59,12 +59,11 @@ xfs_trans_dqjoin(
        xfs_trans_t     *tp,
        xfs_dquot_t     *dqp)
 {
-       xfs_dq_logitem_t    *lp;
+       xfs_dq_logitem_t    *lp = &dqp->q_logitem;
 
-       ASSERT(! XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+       ASSERT(dqp->q_transp != tp);
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
-       ASSERT(XFS_DQ_IS_LOGITEM_INITD(dqp));
-       lp = &dqp->q_logitem;
+       ASSERT(lp->qli_dquot == dqp);
 
        /*
         * Get a log_item_desc to point at the new item.
@@ -96,7 +95,7 @@ xfs_trans_log_dquot(
 {
        xfs_log_item_desc_t     *lidp;
 
-       ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+       ASSERT(dqp->q_transp == tp);
        ASSERT(XFS_DQ_IS_LOCKED(dqp));
 
        lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(&dqp->q_logitem));
@@ -198,16 +197,16 @@ xfs_trans_get_dqtrx(
        int             i;
        xfs_dqtrx_t     *qa;
 
-       for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
-               qa = XFS_QM_DQP_TO_DQACCT(tp, dqp);
+       qa = XFS_QM_ISUDQ(dqp) ?
+               tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
 
+       for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
                if (qa[i].qt_dquot == NULL ||
-                   qa[i].qt_dquot == dqp) {
-                       return (&qa[i]);
-               }
+                   qa[i].qt_dquot == dqp)
+                       return &qa[i];
        }
 
-       return (NULL);
+       return NULL;
 }
 
 /*
@@ -381,7 +380,7 @@ xfs_trans_apply_dquot_deltas(
                                break;
 
                        ASSERT(XFS_DQ_IS_LOCKED(dqp));
-                       ASSERT(XFS_DQ_IS_ADDEDTO_TRX(tp, dqp));
+                       ASSERT(dqp->q_transp == tp);
 
                        /*
                         * adjust the actual number of blocks used
@@ -639,7 +638,7 @@ xfs_trans_dqresv(
                        softlimit = q->qi_bsoftlimit;
                timer = be32_to_cpu(dqp->q_core.d_btimer);
                warns = be16_to_cpu(dqp->q_core.d_bwarns);
-               warnlimit = XFS_QI_BWARNLIMIT(dqp->q_mount);
+               warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
                resbcountp = &dqp->q_res_bcount;
        } else {
                ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
@@ -651,7 +650,7 @@ xfs_trans_dqresv(
                        softlimit = q->qi_rtbsoftlimit;
                timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
                warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
-               warnlimit = XFS_QI_RTBWARNLIMIT(dqp->q_mount);
+               warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
                resbcountp = &dqp->q_res_rtbcount;
        }
 
@@ -691,7 +690,7 @@ xfs_trans_dqresv(
                        count = be64_to_cpu(dqp->q_core.d_icount);
                        timer = be32_to_cpu(dqp->q_core.d_itimer);
                        warns = be16_to_cpu(dqp->q_core.d_iwarns);
-                       warnlimit = XFS_QI_IWARNLIMIT(dqp->q_mount);
+                       warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
                        hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
                        if (!hardlimit)
                                hardlimit = q->qi_ihardlimit;
index 5c11e4d..99587de 100644 (file)
@@ -3829,7 +3829,7 @@ xfs_bmap_add_attrfork(
        }
        if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
                goto error2;
-       error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES);
+       error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
        ASSERT(ip->i_df.if_ext_max ==
               XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
        return error;
index f3c49e6..240340a 100644 (file)
@@ -372,12 +372,12 @@ xfs_buf_item_pin(
  */
 STATIC void
 xfs_buf_item_unpin(
-       xfs_buf_log_item_t      *bip,
-       int                     stale)
+       xfs_buf_log_item_t      *bip)
 {
        struct xfs_ail  *ailp;
        xfs_buf_t       *bp;
        int             freed;
+       int             stale = bip->bli_flags & XFS_BLI_STALE;
 
        bp = bip->bli_buf;
        ASSERT(bp != NULL);
@@ -428,40 +428,34 @@ xfs_buf_item_unpin_remove(
        xfs_buf_log_item_t      *bip,
        xfs_trans_t             *tp)
 {
-       xfs_buf_t               *bp;
-       xfs_log_item_desc_t     *lidp;
-       int                     stale = 0;
-
-       bp = bip->bli_buf;
-       /*
-        * will xfs_buf_item_unpin() call xfs_buf_item_relse()?
-        */
+       /* will xfs_buf_item_unpin() call xfs_buf_item_relse()? */
        if ((atomic_read(&bip->bli_refcount) == 1) &&
            (bip->bli_flags & XFS_BLI_STALE)) {
+               /*
+                * yes -- We can safely do some work here and then call
+                * buf_item_unpin to do the rest because we are
+                * are holding the buffer locked so no one else will be
+                * able to bump up the refcount. We have to remove the
+                * log item from the transaction as we are about to release
+                * our reference to the buffer. If we don't, the unlock that
+                * occurs later in the xfs_trans_uncommit() will try to
+                * reference the buffer which we no longer have a hold on.
+                */
+               struct xfs_log_item_desc *lidp;
+
                ASSERT(XFS_BUF_VALUSEMA(bip->bli_buf) <= 0);
                trace_xfs_buf_item_unpin_stale(bip);
 
-               /*
-                * yes -- clear the xaction descriptor in-use flag
-                * and free the chunk if required.  We can safely
-                * do some work here and then call buf_item_unpin
-                * to do the rest because if the if is true, then
-                * we are holding the buffer locked so no one else
-                * will be able to bump up the refcount.
-                */
-               lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) bip);
-               stale = lidp->lid_flags & XFS_LID_BUF_STALE;
+               lidp = xfs_trans_find_item(tp, (xfs_log_item_t *)bip);
                xfs_trans_free_item(tp, lidp);
+
                /*
-                * Since the transaction no longer refers to the buffer,
-                * the buffer should no longer refer to the transaction.
+                * Since the transaction no longer refers to the buffer, the
+                * buffer should no longer refer to the transaction.
                 */
-               XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+               XFS_BUF_SET_FSPRIVATE2(bip->bli_buf, NULL);
        }
-
-       xfs_buf_item_unpin(bip, stale);
-
-       return;
+       xfs_buf_item_unpin(bip);
 }
 
 /*
@@ -675,7 +669,7 @@ static struct xfs_item_ops xfs_buf_item_ops = {
        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
                                        xfs_buf_item_format,
        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_buf_item_pin,
-       .iop_unpin      = (void(*)(xfs_log_item_t*, int))xfs_buf_item_unpin,
+       .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_buf_item_unpin,
        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
                                        xfs_buf_item_unpin_remove,
        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_buf_item_trylock,
@@ -733,10 +727,7 @@ xfs_buf_item_init(
 
        bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
                                                    KM_SLEEP);
-       bip->bli_item.li_type = XFS_LI_BUF;
-       bip->bli_item.li_ops = &xfs_buf_item_ops;
-       bip->bli_item.li_mountp = mp;
-       bip->bli_item.li_ailp = mp->m_ail;
+       xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
        bip->bli_buf = bp;
        xfs_buf_hold(bp);
        bip->bli_format.blf_type = XFS_LI_BUF;
index 217f34a..df44545 100644 (file)
@@ -26,7 +26,7 @@ extern kmem_zone_t    *xfs_buf_item_zone;
  * have been logged.
  * For 6.2 and beyond, this is XFS_LI_BUF.  We use this to log everything.
  */
-typedef struct xfs_buf_log_format_t {
+typedef struct xfs_buf_log_format {
        unsigned short  blf_type;       /* buf log item type indicator */
        unsigned short  blf_size;       /* size of this item */
        ushort          blf_flags;      /* misc state */
index 92d5cd5..ef96175 100644 (file)
@@ -186,18 +186,18 @@ xfs_cmn_err(int panic_tag, int level, xfs_mount_t *mp, char *fmt, ...)
 
 void
 xfs_error_report(
-       char            *tag,
-       int             level,
-       xfs_mount_t     *mp,
-       char            *fname,
-       int             linenum,
-       inst_t          *ra)
+       const char              *tag,
+       int                     level,
+       struct xfs_mount        *mp,
+       const char              *filename,
+       int                     linenum,
+       inst_t                  *ra)
 {
        if (level <= xfs_error_level) {
                xfs_cmn_err(XFS_PTAG_ERROR_REPORT,
                            CE_ALERT, mp,
                "XFS internal error %s at line %d of file %s.  Caller 0x%p\n",
-                           tag, linenum, fname, ra);
+                           tag, linenum, filename, ra);
 
                xfs_stack_trace();
        }
@@ -205,15 +205,15 @@ xfs_error_report(
 
 void
 xfs_corruption_error(
-       char            *tag,
-       int             level,
-       xfs_mount_t     *mp,
-       void            *p,
-       char            *fname,
-       int             linenum,
-       inst_t          *ra)
+       const char              *tag,
+       int                     level,
+       struct xfs_mount        *mp,
+       void                    *p,
+       const char              *filename,
+       int                     linenum,
+       inst_t                  *ra)
 {
        if (level <= xfs_error_level)
                xfs_hex_dump(p, 16);
-       xfs_error_report(tag, level, mp, fname, linenum, ra);
+       xfs_error_report(tag, level, mp, filename, linenum, ra);
 }
index 0c93051..c2c1a07 100644 (file)
@@ -29,10 +29,11 @@ extern int  xfs_error_trap(int);
 
 struct xfs_mount;
 
-extern void xfs_error_report(char *tag, int level, struct xfs_mount *mp,
-                               char *fname, int linenum, inst_t *ra);
-extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
-                               void *p, char *fname, int linenum, inst_t *ra);
+extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
+                       const char *filename, int linenum, inst_t *ra);
+extern void xfs_corruption_error(const char *tag, int level,
+                       struct xfs_mount *mp, void *p, const char *filename,
+                       int linenum, inst_t *ra);
 
 #define        XFS_ERROR_REPORT(e, lvl, mp)    \
        xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
index 6f35ed1..409fe81 100644 (file)
@@ -106,7 +106,7 @@ xfs_efi_item_pin(xfs_efi_log_item_t *efip)
  */
 /*ARGSUSED*/
 STATIC void
-xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale)
+xfs_efi_item_unpin(xfs_efi_log_item_t *efip)
 {
        struct xfs_ail          *ailp = efip->efi_item.li_ailp;
 
@@ -224,7 +224,7 @@ static struct xfs_item_ops xfs_efi_item_ops = {
        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
                                        xfs_efi_item_format,
        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_efi_item_pin,
-       .iop_unpin      = (void(*)(xfs_log_item_t*, int))xfs_efi_item_unpin,
+       .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_efi_item_unpin,
        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t *))
                                        xfs_efi_item_unpin_remove,
        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_efi_item_trylock,
@@ -259,10 +259,7 @@ xfs_efi_init(xfs_mount_t   *mp,
                                                             KM_SLEEP);
        }
 
-       efip->efi_item.li_type = XFS_LI_EFI;
-       efip->efi_item.li_ops = &xfs_efi_item_ops;
-       efip->efi_item.li_mountp = mp;
-       efip->efi_item.li_ailp = mp->m_ail;
+       xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
        efip->efi_format.efi_nextents = nextents;
        efip->efi_format.efi_id = (__psint_t)(void*)efip;
 
@@ -428,7 +425,7 @@ xfs_efd_item_pin(xfs_efd_log_item_t *efdp)
  */
 /*ARGSUSED*/
 STATIC void
-xfs_efd_item_unpin(xfs_efd_log_item_t *efdp, int stale)
+xfs_efd_item_unpin(xfs_efd_log_item_t *efdp)
 {
        return;
 }
@@ -518,7 +515,7 @@ static struct xfs_item_ops xfs_efd_item_ops = {
        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
                                        xfs_efd_item_format,
        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_efd_item_pin,
-       .iop_unpin      = (void(*)(xfs_log_item_t*, int))xfs_efd_item_unpin,
+       .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_efd_item_unpin,
        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
                                        xfs_efd_item_unpin_remove,
        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_efd_item_trylock,
@@ -554,10 +551,7 @@ xfs_efd_init(xfs_mount_t   *mp,
                                                             KM_SLEEP);
        }
 
-       efdp->efd_item.li_type = XFS_LI_EFD;
-       efdp->efd_item.li_ops = &xfs_efd_item_ops;
-       efdp->efd_item.li_mountp = mp;
-       efdp->efd_item.li_ailp = mp->m_ail;
+       xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops);
        efdp->efd_efip = efip;
        efdp->efd_format.efd_nextents = nextents;
        efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
index 0ffd564..8cd6e8d 100644 (file)
@@ -2449,6 +2449,8 @@ xfs_iunpin_nowait(
 {
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
 
+       trace_xfs_inode_unpin_nowait(ip, _RET_IP_);
+
        /* Give the log a push to start the unpinning I/O */
        xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
 
index 7bfea85..cf8249a 100644 (file)
@@ -543,6 +543,7 @@ xfs_inode_item_pin(
 {
        ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
 
+       trace_xfs_inode_pin(iip->ili_inode, _RET_IP_);
        atomic_inc(&iip->ili_inode->i_pincount);
 }
 
@@ -556,11 +557,11 @@ xfs_inode_item_pin(
 /* ARGSUSED */
 STATIC void
 xfs_inode_item_unpin(
-       xfs_inode_log_item_t    *iip,
-       int                     stale)
+       xfs_inode_log_item_t    *iip)
 {
        struct xfs_inode        *ip = iip->ili_inode;
 
+       trace_xfs_inode_unpin(ip, _RET_IP_);
        ASSERT(atomic_read(&ip->i_pincount) > 0);
        if (atomic_dec_and_test(&ip->i_pincount))
                wake_up(&ip->i_ipin_wait);
@@ -572,7 +573,7 @@ xfs_inode_item_unpin_remove(
        xfs_inode_log_item_t    *iip,
        xfs_trans_t             *tp)
 {
-       xfs_inode_item_unpin(iip, 0);
+       xfs_inode_item_unpin(iip);
 }
 
 /*
@@ -838,7 +839,7 @@ static struct xfs_item_ops xfs_inode_item_ops = {
        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
                                        xfs_inode_item_format,
        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_inode_item_pin,
-       .iop_unpin      = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin,
+       .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin,
        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
                                        xfs_inode_item_unpin_remove,
        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock,
@@ -865,17 +866,9 @@ xfs_inode_item_init(
        ASSERT(ip->i_itemp == NULL);
        iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
 
-       iip->ili_item.li_type = XFS_LI_INODE;
-       iip->ili_item.li_ops = &xfs_inode_item_ops;
-       iip->ili_item.li_mountp = mp;
-       iip->ili_item.li_ailp = mp->m_ail;
        iip->ili_inode = ip;
-
-       /*
-          We have zeroed memory. No need ...
-          iip->ili_extents_buf = NULL;
-        */
-
+       xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
+                                               &xfs_inode_item_ops);
        iip->ili_format.ilf_type = XFS_LI_INODE;
        iip->ili_format.ilf_ino = ip->i_ino;
        iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;
index 0b65039..ef14943 100644 (file)
 #define XFS_STRAT_WRITE_IMAPS  2
 #define XFS_WRITE_IMAPS                XFS_BMAP_MAX_NMAP
 
-STATIC int
-xfs_imap_to_bmap(
-       xfs_inode_t     *ip,
-       xfs_off_t       offset,
-       xfs_bmbt_irec_t *imap,
-       xfs_iomap_t     *iomapp,
-       int             imaps,                  /* Number of imap entries */
-       int             iomaps,                 /* Number of iomap entries */
-       int             flags)
-{
-       xfs_mount_t     *mp = ip->i_mount;
-       int             pbm;
-       xfs_fsblock_t   start_block;
-
-
-       for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
-               iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
-               iomapp->iomap_delta = offset - iomapp->iomap_offset;
-               iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
-               iomapp->iomap_flags = flags;
-
-               if (XFS_IS_REALTIME_INODE(ip)) {
-                       iomapp->iomap_flags |= IOMAP_REALTIME;
-                       iomapp->iomap_target = mp->m_rtdev_targp;
-               } else {
-                       iomapp->iomap_target = mp->m_ddev_targp;
-               }
-               start_block = imap->br_startblock;
-               if (start_block == HOLESTARTBLOCK) {
-                       iomapp->iomap_bn = IOMAP_DADDR_NULL;
-                       iomapp->iomap_flags |= IOMAP_HOLE;
-               } else if (start_block == DELAYSTARTBLOCK) {
-                       iomapp->iomap_bn = IOMAP_DADDR_NULL;
-                       iomapp->iomap_flags |= IOMAP_DELAY;
-               } else {
-                       iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block);
-                       if (ISUNWRITTEN(imap))
-                               iomapp->iomap_flags |= IOMAP_UNWRITTEN;
-               }
-
-               offset += iomapp->iomap_bsize - iomapp->iomap_delta;
-       }
-       return pbm;     /* Return the number filled */
-}
+STATIC int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
+                                 int, struct xfs_bmbt_irec *, int *);
+STATIC int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
+                                struct xfs_bmbt_irec *, int *);
+STATIC int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
+                               struct xfs_bmbt_irec *, int *);
 
 int
 xfs_iomap(
-       xfs_inode_t     *ip,
-       xfs_off_t       offset,
-       ssize_t         count,
-       int             flags,
-       xfs_iomap_t     *iomapp,
-       int             *niomaps)
+       struct xfs_inode        *ip,
+       xfs_off_t               offset,
+       ssize_t                 count,
+       int                     flags,
+       struct xfs_bmbt_irec    *imap,
+       int                     *nimaps,
+       int                     *new)
 {
-       xfs_mount_t     *mp = ip->i_mount;
-       xfs_fileoff_t   offset_fsb, end_fsb;
-       int             error = 0;
-       int             lockmode = 0;
-       xfs_bmbt_irec_t imap;
-       int             nimaps = 1;
-       int             bmapi_flags = 0;
-       int             iomap_flags = 0;
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           offset_fsb, end_fsb;
+       int                     error = 0;
+       int                     lockmode = 0;
+       int                     bmapi_flags = 0;
 
        ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
 
+       *new = 0;
+
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
 
@@ -160,8 +122,8 @@ xfs_iomap(
 
        error = xfs_bmapi(NULL, ip, offset_fsb,
                        (xfs_filblks_t)(end_fsb - offset_fsb),
-                       bmapi_flags,  NULL, 0, &imap,
-                       &nimaps, NULL, NULL);
+                       bmapi_flags,  NULL, 0, imap,
+                       nimaps, NULL, NULL);
 
        if (error)
                goto out;
@@ -169,46 +131,41 @@ xfs_iomap(
        switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
        case BMAPI_WRITE:
                /* If we found an extent, return it */
-               if (nimaps &&
-                   (imap.br_startblock != HOLESTARTBLOCK) &&
-                   (imap.br_startblock != DELAYSTARTBLOCK)) {
-                       trace_xfs_iomap_found(ip, offset, count, flags, &imap);
+               if (*nimaps &&
+                   (imap->br_startblock != HOLESTARTBLOCK) &&
+                   (imap->br_startblock != DELAYSTARTBLOCK)) {
+                       trace_xfs_iomap_found(ip, offset, count, flags, imap);
                        break;
                }
 
                if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
                        error = xfs_iomap_write_direct(ip, offset, count, flags,
-                                                      &imap, &nimaps, nimaps);
+                                                      imap, nimaps);
                } else {
                        error = xfs_iomap_write_delay(ip, offset, count, flags,
-                                                     &imap, &nimaps);
+                                                     imap, nimaps);
                }
                if (!error) {
-                       trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
+                       trace_xfs_iomap_alloc(ip, offset, count, flags, imap);
                }
-               iomap_flags = IOMAP_NEW;
+               *new = 1;
                break;
        case BMAPI_ALLOCATE:
                /* If we found an extent, return it */
                xfs_iunlock(ip, lockmode);
                lockmode = 0;
 
-               if (nimaps && !isnullstartblock(imap.br_startblock)) {
-                       trace_xfs_iomap_found(ip, offset, count, flags, &imap);
+               if (*nimaps && !isnullstartblock(imap->br_startblock)) {
+                       trace_xfs_iomap_found(ip, offset, count, flags, imap);
                        break;
                }
 
                error = xfs_iomap_write_allocate(ip, offset, count,
-                                                &imap, &nimaps);
+                                                imap, nimaps);
                break;
        }
 
-       if (nimaps) {
-               *niomaps = xfs_imap_to_bmap(ip, offset, &imap,
-                                           iomapp, nimaps, *niomaps, iomap_flags);
-       } else if (niomaps) {
-               *niomaps = 0;
-       }
+       ASSERT(*nimaps <= 1);
 
 out:
        if (lockmode)
@@ -216,7 +173,6 @@ out:
        return XFS_ERROR(error);
 }
 
-
 STATIC int
 xfs_iomap_eof_align_last_fsb(
        xfs_mount_t     *mp,
@@ -285,15 +241,14 @@ xfs_cmn_err_fsblock_zero(
        return EFSCORRUPTED;
 }
 
-int
+STATIC int
 xfs_iomap_write_direct(
        xfs_inode_t     *ip,
        xfs_off_t       offset,
        size_t          count,
        int             flags,
        xfs_bmbt_irec_t *ret_imap,
-       int             *nmaps,
-       int             found)
+       int             *nmaps)
 {
        xfs_mount_t     *mp = ip->i_mount;
        xfs_fileoff_t   offset_fsb;
@@ -330,7 +285,7 @@ xfs_iomap_write_direct(
                if (error)
                        goto error_out;
        } else {
-               if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
+               if (*nmaps && (ret_imap->br_startblock == HOLESTARTBLOCK))
                        last_fsb = MIN(last_fsb, (xfs_fileoff_t)
                                        ret_imap->br_blockcount +
                                        ret_imap->br_startoff);
@@ -485,7 +440,7 @@ xfs_iomap_eof_want_preallocate(
        return 0;
 }
 
-int
+STATIC int
 xfs_iomap_write_delay(
        xfs_inode_t     *ip,
        xfs_off_t       offset,
@@ -588,7 +543,7 @@ retry:
  * We no longer bother to look at the incoming map - all we have to
  * guarantee is that whatever we allocate fills the required range.
  */
-int
+STATIC int
 xfs_iomap_write_allocate(
        xfs_inode_t     *ip,
        xfs_off_t       offset,
index 174f299..81ac4af 100644 (file)
 #ifndef __XFS_IOMAP_H__
 #define __XFS_IOMAP_H__
 
-#define IOMAP_DADDR_NULL ((xfs_daddr_t) (-1LL))
-
-
-typedef enum {                         /* iomap_flags values */
-       IOMAP_READ =            0,      /* mapping for a read */
-       IOMAP_HOLE =            0x02,   /* mapping covers a hole  */
-       IOMAP_DELAY =           0x04,   /* mapping covers delalloc region  */
-       IOMAP_REALTIME =        0x10,   /* mapping on the realtime device  */
-       IOMAP_UNWRITTEN =       0x20,   /* mapping covers allocated */
-                                       /* but uninitialized file data  */
-       IOMAP_NEW =             0x40    /* just allocate */
-} iomap_flags_t;
-
 typedef enum {
        /* base extent manipulation calls */
        BMAPI_READ = (1 << 0),          /* read extents */
@@ -52,43 +39,11 @@ typedef enum {
        { BMAPI_MMAP,           "MMAP" }, \
        { BMAPI_TRYLOCK,        "TRYLOCK" }
 
-/*
- * xfs_iomap_t:  File system I/O map
- *
- * The iomap_bn field is expressed in 512-byte blocks, and is where the
- * mapping starts on disk.
- *
- * The iomap_offset, iomap_bsize and iomap_delta fields are in bytes.
- * iomap_offset is the offset of the mapping in the file itself.
- * iomap_bsize is the size of the mapping,  iomap_delta is the
- * desired data's offset into the mapping, given the offset supplied
- * to the file I/O map routine.
- *
- * When a request is made to read beyond the logical end of the object,
- * iomap_size may be set to 0, but iomap_offset and iomap_length should be set
- * to the actual amount of underlying storage that has been allocated, if any.
- */
-
-typedef struct xfs_iomap {
-       xfs_daddr_t             iomap_bn;       /* first 512B blk of mapping */
-       xfs_buftarg_t           *iomap_target;
-       xfs_off_t               iomap_offset;   /* offset of mapping, bytes */
-       xfs_off_t               iomap_bsize;    /* size of mapping, bytes */
-       xfs_off_t               iomap_delta;    /* offset into mapping, bytes */
-       iomap_flags_t           iomap_flags;
-} xfs_iomap_t;
-
 struct xfs_inode;
 struct xfs_bmbt_irec;
 
 extern int xfs_iomap(struct xfs_inode *, xfs_off_t, ssize_t, int,
-                    struct xfs_iomap *, int *);
-extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
-                                 int, struct xfs_bmbt_irec *, int *, int);
-extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, int,
-                                struct xfs_bmbt_irec *, int *);
-extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t,
-                               struct xfs_bmbt_irec *, int *);
+                    struct xfs_bmbt_irec *, int *, int *);
 extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);
 
 #endif /* __XFS_IOMAP_H__*/
index 2be0191..3038dd5 100644 (file)
 
 kmem_zone_t    *xfs_log_ticket_zone;
 
-#define xlog_write_adv_cnt(ptr, len, off, bytes) \
-       { (ptr) += (bytes); \
-         (len) -= (bytes); \
-         (off) += (bytes);}
-
 /* Local miscellaneous function prototypes */
-STATIC int      xlog_commit_record(xfs_mount_t *mp, xlog_ticket_t *ticket,
+STATIC int      xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
                                    xlog_in_core_t **, xfs_lsn_t *);
 STATIC xlog_t *  xlog_alloc_log(xfs_mount_t    *mp,
                                xfs_buftarg_t   *log_target,
@@ -59,11 +54,9 @@ STATIC xlog_t *  xlog_alloc_log(xfs_mount_t  *mp,
 STATIC int      xlog_space_left(xlog_t *log, int cycle, int bytes);
 STATIC int      xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
 STATIC void     xlog_dealloc_log(xlog_t *log);
-STATIC int      xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
-                           int nentries, struct xlog_ticket *tic,
-                           xfs_lsn_t *start_lsn,
-                           xlog_in_core_t **commit_iclog,
-                           uint flags);
+STATIC int      xlog_write(struct log *log, struct xfs_log_vec *log_vector,
+                           struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
+                           xlog_in_core_t **commit_iclog, uint flags);
 
 /* local state machine functions */
 STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
@@ -102,7 +95,7 @@ STATIC xlog_ticket_t *xlog_ticket_alloc(xlog_t *log,
                                         uint   flags);
 
 #if defined(DEBUG)
-STATIC void    xlog_verify_dest_ptr(xlog_t *log, __psint_t ptr);
+STATIC void    xlog_verify_dest_ptr(xlog_t *log, char *ptr);
 STATIC void    xlog_verify_grant_head(xlog_t *log, int equals);
 STATIC void    xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
                                  int count, boolean_t syncing);
@@ -258,7 +251,7 @@ xfs_log_done(
             * If we get an error, just continue and give back the log ticket.
             */
            (((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
-            (xlog_commit_record(mp, ticket, iclog, &lsn)))) {
+            (xlog_commit_record(log, ticket, iclog, &lsn)))) {
                lsn = (xfs_lsn_t) -1;
                if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
                        flags |= XFS_LOG_REL_PERM_RESERV;
@@ -516,18 +509,10 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 #ifdef DEBUG
        xlog_in_core_t   *first_iclog;
 #endif
-       xfs_log_iovec_t  reg[1];
        xlog_ticket_t   *tic = NULL;
        xfs_lsn_t        lsn;
        int              error;
 
-       /* the data section must be 32 bit size aligned */
-       struct {
-           __uint16_t magic;
-           __uint16_t pad1;
-           __uint32_t pad2; /* may as well make it 64 bits */
-       } magic = { XLOG_UNMOUNT_TYPE, 0, 0 };
-
        /*
         * Don't write out unmount record on read-only mounts.
         * Or, if we are doing a forced umount (typically because of IO errors).
@@ -549,16 +534,30 @@ xfs_log_unmount_write(xfs_mount_t *mp)
        } while (iclog != first_iclog);
 #endif
        if (! (XLOG_FORCED_SHUTDOWN(log))) {
-               reg[0].i_addr = (void*)&magic;
-               reg[0].i_len  = sizeof(magic);
-               reg[0].i_type = XLOG_REG_TYPE_UNMOUNT;
-
                error = xfs_log_reserve(mp, 600, 1, &tic,
                                        XFS_LOG, 0, XLOG_UNMOUNT_REC_TYPE);
                if (!error) {
+                       /* the data section must be 32 bit size aligned */
+                       struct {
+                           __uint16_t magic;
+                           __uint16_t pad1;
+                           __uint32_t pad2; /* may as well make it 64 bits */
+                       } magic = {
+                               .magic = XLOG_UNMOUNT_TYPE,
+                       };
+                       struct xfs_log_iovec reg = {
+                               .i_addr = (void *)&magic,
+                               .i_len = sizeof(magic),
+                               .i_type = XLOG_REG_TYPE_UNMOUNT,
+                       };
+                       struct xfs_log_vec vec = {
+                               .lv_niovecs = 1,
+                               .lv_iovecp = &reg,
+                       };
+
                        /* remove inited flag */
-                       ((xlog_ticket_t *)tic)->t_flags = 0;
-                       error = xlog_write(mp, reg, 1, tic, &lsn,
+                       tic->t_flags = 0;
+                       error = xlog_write(log, &vec, tic, &lsn,
                                           NULL, XLOG_UNMOUNT_TRANS);
                        /*
                         * At this point, we're umounting anyway,
@@ -648,10 +647,26 @@ xfs_log_unmount(xfs_mount_t *mp)
        xlog_dealloc_log(mp->m_log);
 }
 
+void
+xfs_log_item_init(
+       struct xfs_mount        *mp,
+       struct xfs_log_item     *item,
+       int                     type,
+       struct xfs_item_ops     *ops)
+{
+       item->li_mountp = mp;
+       item->li_ailp = mp->m_ail;
+       item->li_type = type;
+       item->li_ops = ops;
+}
+
 /*
  * Write region vectors to log.  The write happens using the space reservation
  * of the ticket (tic).  It is not a requirement that all writes for a given
- * transaction occur with one call to xfs_log_write().
+ * transaction occur with one call to xfs_log_write(). However, it is important
+ * to note that the transaction reservation code makes an assumption about the
+ * number of log headers a transaction requires that may be violated if you
+ * don't pass all the transaction vectors in one call....
  */
 int
 xfs_log_write(
@@ -663,11 +678,15 @@ xfs_log_write(
 {
        struct log              *log = mp->m_log;
        int                     error;
+       struct xfs_log_vec      vec = {
+               .lv_niovecs = nentries,
+               .lv_iovecp = reg,
+       };
 
        if (XLOG_FORCED_SHUTDOWN(log))
                return XFS_ERROR(EIO);
 
-       error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
+       error = xlog_write(log, &vec, tic, start_lsn, NULL, 0);
        if (error)
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
        return error;
@@ -1020,6 +1039,7 @@ xlog_alloc_log(xfs_mount_t        *mp,
        int                     i;
        int                     iclogsize;
        int                     error = ENOMEM;
+       uint                    log2_size = 0;
 
        log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
        if (!log) {
@@ -1045,29 +1065,30 @@ xlog_alloc_log(xfs_mount_t      *mp,
 
        error = EFSCORRUPTED;
        if (xfs_sb_version_hassector(&mp->m_sb)) {
-               log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
-               if (log->l_sectbb_log < 0 ||
-                   log->l_sectbb_log > mp->m_sectbb_log) {
-                       xlog_warn("XFS: Log sector size (0x%x) out of range.",
-                                               log->l_sectbb_log);
+               log2_size = mp->m_sb.sb_logsectlog;
+               if (log2_size < BBSHIFT) {
+                       xlog_warn("XFS: Log sector size too small "
+                               "(0x%x < 0x%x)", log2_size, BBSHIFT);
                        goto out_free_log;
                }
 
-               /* for larger sector sizes, must have v2 or external log */
-               if (log->l_sectbb_log != 0 &&
-                   (log->l_logBBstart != 0 &&
-                    !xfs_sb_version_haslogv2(&mp->m_sb))) {
-                       xlog_warn("XFS: log sector size (0x%x) invalid "
-                                 "for configuration.", log->l_sectbb_log);
+               log2_size -= BBSHIFT;
+               if (log2_size > mp->m_sectbb_log) {
+                       xlog_warn("XFS: Log sector size too large "
+                               "(0x%x > 0x%x)", log2_size, mp->m_sectbb_log);
                        goto out_free_log;
                }
-               if (mp->m_sb.sb_logsectlog < BBSHIFT) {
-                       xlog_warn("XFS: Log sector log (0x%x) too small.",
-                                               mp->m_sb.sb_logsectlog);
+
+               /* for larger sector sizes, must have v2 or external log */
+               if (log2_size && log->l_logBBstart > 0 &&
+                           !xfs_sb_version_haslogv2(&mp->m_sb)) {
+
+                       xlog_warn("XFS: log sector size (0x%x) invalid "
+                                 "for configuration.", log2_size);
                        goto out_free_log;
                }
        }
-       log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
+       log->l_sectBBsize = 1 << log2_size;
 
        xlog_get_iclog_buffer_size(mp, log);
 
@@ -1174,26 +1195,31 @@ out:
  * ticket.  Return the lsn of the commit record.
  */
 STATIC int
-xlog_commit_record(xfs_mount_t  *mp,
-                  xlog_ticket_t *ticket,
-                  xlog_in_core_t **iclog,
-                  xfs_lsn_t    *commitlsnp)
+xlog_commit_record(
+       struct log              *log,
+       struct xlog_ticket      *ticket,
+       struct xlog_in_core     **iclog,
+       xfs_lsn_t               *commitlsnp)
 {
-       int             error;
-       xfs_log_iovec_t reg[1];
-
-       reg[0].i_addr = NULL;
-       reg[0].i_len = 0;
-       reg[0].i_type = XLOG_REG_TYPE_COMMIT;
+       struct xfs_mount *mp = log->l_mp;
+       int     error;
+       struct xfs_log_iovec reg = {
+               .i_addr = NULL,
+               .i_len = 0,
+               .i_type = XLOG_REG_TYPE_COMMIT,
+       };
+       struct xfs_log_vec vec = {
+               .lv_niovecs = 1,
+               .lv_iovecp = &reg,
+       };
 
        ASSERT_ALWAYS(iclog);
-       if ((error = xlog_write(mp, reg, 1, ticket, commitlsnp,
-                              iclog, XLOG_COMMIT_TRANS))) {
+       error = xlog_write(log, &vec, ticket, commitlsnp, iclog,
+                                       XLOG_COMMIT_TRANS);
+       if (error)
                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
-       }
        return error;
-}      /* xlog_commit_record */
-
+}
 
 /*
  * Push on the buffer cache code if we ever use more than 75% of the on-disk
@@ -1613,6 +1639,192 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
        }
 }
 
+/*
+ * Calculate the potential space needed by the log vector.  Each region gets
+ * its own xlog_op_header_t and may need to be double word aligned.
+ */
+static int
+xlog_write_calc_vec_length(
+       struct xlog_ticket      *ticket,
+       struct xfs_log_vec      *log_vector)
+{
+       struct xfs_log_vec      *lv;
+       int                     headers = 0;
+       int                     len = 0;
+       int                     i;
+
+       /* acct for start rec of xact */
+       if (ticket->t_flags & XLOG_TIC_INITED)
+               headers++;
+
+       for (lv = log_vector; lv; lv = lv->lv_next) {
+               headers += lv->lv_niovecs;
+
+               for (i = 0; i < lv->lv_niovecs; i++) {
+                       struct xfs_log_iovec    *vecp = &lv->lv_iovecp[i];
+
+                       len += vecp->i_len;
+                       xlog_tic_add_region(ticket, vecp->i_len, vecp->i_type);
+               }
+       }
+
+       ticket->t_res_num_ophdrs += headers;
+       len += headers * sizeof(struct xlog_op_header);
+
+       return len;
+}
+
+/*
+ * If first write for transaction, insert start record  We can't be trying to
+ * commit if we are inited.  We can't have any "partial_copy" if we are inited.
+ */
+static int
+xlog_write_start_rec(
+       struct xlog_op_header   *ophdr,
+       struct xlog_ticket      *ticket)
+{
+       if (!(ticket->t_flags & XLOG_TIC_INITED))
+               return 0;
+
+       ophdr->oh_tid   = cpu_to_be32(ticket->t_tid);
+       ophdr->oh_clientid = ticket->t_clientid;
+       ophdr->oh_len = 0;
+       ophdr->oh_flags = XLOG_START_TRANS;
+       ophdr->oh_res2 = 0;
+
+       ticket->t_flags &= ~XLOG_TIC_INITED;
+
+       return sizeof(struct xlog_op_header);
+}
+
+static xlog_op_header_t *
+xlog_write_setup_ophdr(
+       struct log              *log,
+       struct xlog_op_header   *ophdr,
+       struct xlog_ticket      *ticket,
+       uint                    flags)
+{
+       ophdr->oh_tid = cpu_to_be32(ticket->t_tid);
+       ophdr->oh_clientid = ticket->t_clientid;
+       ophdr->oh_res2 = 0;
+
+       /* are we copying a commit or unmount record? */
+       ophdr->oh_flags = flags;
+
+       /*
+        * We've seen logs corrupted with bad transaction client ids.  This
+        * makes sure that XFS doesn't generate them on.  Turn this into an EIO
+        * and shut down the filesystem.
+        */
+       switch (ophdr->oh_clientid)  {
+       case XFS_TRANSACTION:
+       case XFS_VOLUME:
+       case XFS_LOG:
+               break;
+       default:
+               xfs_fs_cmn_err(CE_WARN, log->l_mp,
+                       "Bad XFS transaction clientid 0x%x in ticket 0x%p",
+                       ophdr->oh_clientid, ticket);
+               return NULL;
+       }
+
+       return ophdr;
+}
+
+/*
+ * Set up the parameters of the region copy into the log. This has
+ * to handle region write split across multiple log buffers - this
+ * state is kept external to this function so that this code can
+ * can be written in an obvious, self documenting manner.
+ */
+static int
+xlog_write_setup_copy(
+       struct xlog_ticket      *ticket,
+       struct xlog_op_header   *ophdr,
+       int                     space_available,
+       int                     space_required,
+       int                     *copy_off,
+       int                     *copy_len,
+       int                     *last_was_partial_copy,
+       int                     *bytes_consumed)
+{
+       int                     still_to_copy;
+
+       still_to_copy = space_required - *bytes_consumed;
+       *copy_off = *bytes_consumed;
+
+       if (still_to_copy <= space_available) {
+               /* write of region completes here */
+               *copy_len = still_to_copy;
+               ophdr->oh_len = cpu_to_be32(*copy_len);
+               if (*last_was_partial_copy)
+                       ophdr->oh_flags |= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
+               *last_was_partial_copy = 0;
+               *bytes_consumed = 0;
+               return 0;
+       }
+
+       /* partial write of region, needs extra log op header reservation */
+       *copy_len = space_available;
+       ophdr->oh_len = cpu_to_be32(*copy_len);
+       ophdr->oh_flags |= XLOG_CONTINUE_TRANS;
+       if (*last_was_partial_copy)
+               ophdr->oh_flags |= XLOG_WAS_CONT_TRANS;
+       *bytes_consumed += *copy_len;
+       (*last_was_partial_copy)++;
+
+       /* account for new log op header */
+       ticket->t_curr_res -= sizeof(struct xlog_op_header);
+       ticket->t_res_num_ophdrs++;
+
+       return sizeof(struct xlog_op_header);
+}
+
+static int
+xlog_write_copy_finish(
+       struct log              *log,
+       struct xlog_in_core     *iclog,
+       uint                    flags,
+       int                     *record_cnt,
+       int                     *data_cnt,
+       int                     *partial_copy,
+       int                     *partial_copy_len,
+       int                     log_offset,
+       struct xlog_in_core     **commit_iclog)
+{
+       if (*partial_copy) {
+               /*
+                * This iclog has already been marked WANT_SYNC by
+                * xlog_state_get_iclog_space.
+                */
+               xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
+               *record_cnt = 0;
+               *data_cnt = 0;
+               return xlog_state_release_iclog(log, iclog);
+       }
+
+       *partial_copy = 0;
+       *partial_copy_len = 0;
+
+       if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
+               /* no more space in this iclog - push it. */
+               xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
+               *record_cnt = 0;
+               *data_cnt = 0;
+
+               spin_lock(&log->l_icloglock);
+               xlog_state_want_sync(log, iclog);
+               spin_unlock(&log->l_icloglock);
+
+               if (!commit_iclog)
+                       return xlog_state_release_iclog(log, iclog);
+               ASSERT(flags & XLOG_COMMIT_TRANS);
+               *commit_iclog = iclog;
+       }
+
+       return 0;
+}
+
 /*
  * Write some region out to in-core log
  *
@@ -1655,209 +1867,157 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
  */
 STATIC int
 xlog_write(
-       struct xfs_mount        *mp,
-       struct xfs_log_iovec    reg[],
-       int                     nentries,
+       struct log              *log,
+       struct xfs_log_vec      *log_vector,
        struct xlog_ticket      *ticket,
        xfs_lsn_t               *start_lsn,
        struct xlog_in_core     **commit_iclog,
        uint                    flags)
 {
-    xlog_t          *log = mp->m_log;
-    xlog_in_core_t   *iclog = NULL;  /* ptr to current in-core log */
-    xlog_op_header_t *logop_head;    /* ptr to log operation header */
-    __psint_t       ptr;            /* copy address into data region */
-    int                     len;            /* # xlog_write() bytes 2 still copy */
-    int                     index;          /* region index currently copying */
-    int                     log_offset;     /* offset (from 0) into data region */
-    int                     start_rec_copy; /* # bytes to copy for start record */
-    int                     partial_copy;   /* did we split a region? */
-    int                     partial_copy_len;/* # bytes copied if split region */
-    int                     need_copy;      /* # bytes need to memcpy this region */
-    int                     copy_len;       /* # bytes actually memcpy'ing */
-    int                     copy_off;       /* # bytes from entry start */
-    int                     contwr;         /* continued write of in-core log? */
-    int                     error;
-    int                     record_cnt = 0, data_cnt = 0;
-
-    partial_copy_len = partial_copy = 0;
-
-    /* Calculate potential maximum space.  Each region gets its own
-     * xlog_op_header_t and may need to be double word aligned.
-     */
-    len = 0;
-    if (ticket->t_flags & XLOG_TIC_INITED) {    /* acct for start rec of xact */
-       len += sizeof(xlog_op_header_t);
-       ticket->t_res_num_ophdrs++;
-    }
+       struct xlog_in_core     *iclog = NULL;
+       struct xfs_log_iovec    *vecp;
+       struct xfs_log_vec      *lv;
+       int                     len;
+       int                     index;
+       int                     partial_copy = 0;
+       int                     partial_copy_len = 0;
+       int                     contwr = 0;
+       int                     record_cnt = 0;
+       int                     data_cnt = 0;
+       int                     error;
 
-    for (index = 0; index < nentries; index++) {
-       len += sizeof(xlog_op_header_t);            /* each region gets >= 1 */
-       ticket->t_res_num_ophdrs++;
-       len += reg[index].i_len;
-       xlog_tic_add_region(ticket, reg[index].i_len, reg[index].i_type);
-    }
-    contwr = *start_lsn = 0;
+       *start_lsn = 0;
 
-    if (ticket->t_curr_res < len) {
-       xlog_print_tic_res(mp, ticket);
+       len = xlog_write_calc_vec_length(ticket, log_vector);
+       if (ticket->t_curr_res < len) {
+               xlog_print_tic_res(log->l_mp, ticket);
 #ifdef DEBUG
-       xlog_panic(
-               "xfs_log_write: reservation ran out. Need to up reservation");
+               xlog_panic(
+       "xfs_log_write: reservation ran out. Need to up reservation");
 #else
-       /* Customer configurable panic */
-       xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, mp,
-               "xfs_log_write: reservation ran out. Need to up reservation");
-       /* If we did not panic, shutdown the filesystem */
-       xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               /* Customer configurable panic */
+               xfs_cmn_err(XFS_PTAG_LOGRES, CE_ALERT, log->l_mp,
+       "xfs_log_write: reservation ran out. Need to up reservation");
+
+               /* If we did not panic, shutdown the filesystem */
+               xfs_force_shutdown(log->l_mp, SHUTDOWN_CORRUPT_INCORE);
 #endif
-    } else
+       }
+
        ticket->t_curr_res -= len;
 
-    for (index = 0; index < nentries; ) {
-       if ((error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
-                                              &contwr, &log_offset)))
-               return error;
+       index = 0;
+       lv = log_vector;
+       vecp = lv->lv_iovecp;
+       while (lv && index < lv->lv_niovecs) {
+               void            *ptr;
+               int             log_offset;
 
-       ASSERT(log_offset <= iclog->ic_size - 1);
-       ptr = (__psint_t) ((char *)iclog->ic_datap+log_offset);
+               error = xlog_state_get_iclog_space(log, len, &iclog, ticket,
+                                                  &contwr, &log_offset);
+               if (error)
+                       return error;
 
-       /* start_lsn is the first lsn written to. That's all we need. */
-       if (! *start_lsn)
-           *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+               ASSERT(log_offset <= iclog->ic_size - 1);
+               ptr = iclog->ic_datap + log_offset;
 
-       /* This loop writes out as many regions as can fit in the amount
-        * of space which was allocated by xlog_state_get_iclog_space().
-        */
-       while (index < nentries) {
-           ASSERT(reg[index].i_len % sizeof(__int32_t) == 0);
-           ASSERT((__psint_t)ptr % sizeof(__int32_t) == 0);
-           start_rec_copy = 0;
-
-           /* If first write for transaction, insert start record.
-            * We can't be trying to commit if we are inited.  We can't
-            * have any "partial_copy" if we are inited.
-            */
-           if (ticket->t_flags & XLOG_TIC_INITED) {
-               logop_head              = (xlog_op_header_t *)ptr;
-               logop_head->oh_tid      = cpu_to_be32(ticket->t_tid);
-               logop_head->oh_clientid = ticket->t_clientid;
-               logop_head->oh_len      = 0;
-               logop_head->oh_flags    = XLOG_START_TRANS;
-               logop_head->oh_res2     = 0;
-               ticket->t_flags         &= ~XLOG_TIC_INITED;    /* clear bit */
-               record_cnt++;
-
-               start_rec_copy = sizeof(xlog_op_header_t);
-               xlog_write_adv_cnt(ptr, len, log_offset, start_rec_copy);
-           }
+               /* start_lsn is the first lsn written to. That's all we need. */
+               if (!*start_lsn)
+                       *start_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
 
-           /* Copy log operation header directly into data section */
-           logop_head                  = (xlog_op_header_t *)ptr;
-           logop_head->oh_tid          = cpu_to_be32(ticket->t_tid);
-           logop_head->oh_clientid     = ticket->t_clientid;
-           logop_head->oh_res2         = 0;
+               /*
+                * This loop writes out as many regions as can fit in the amount
+                * of space which was allocated by xlog_state_get_iclog_space().
+                */
+               while (lv && index < lv->lv_niovecs) {
+                       struct xfs_log_iovec    *reg = &vecp[index];
+                       struct xlog_op_header   *ophdr;
+                       int                     start_rec_copy;
+                       int                     copy_len;
+                       int                     copy_off;
+
+                       ASSERT(reg->i_len % sizeof(__int32_t) == 0);
+                       ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
+
+                       start_rec_copy = xlog_write_start_rec(ptr, ticket);
+                       if (start_rec_copy) {
+                               record_cnt++;
+                               xlog_write_adv_cnt(&ptr, &len, &log_offset,
+                                                  start_rec_copy);
+                       }
 
-           /* header copied directly */
-           xlog_write_adv_cnt(ptr, len, log_offset, sizeof(xlog_op_header_t));
+                       ophdr = xlog_write_setup_ophdr(log, ptr, ticket, flags);
+                       if (!ophdr)
+                               return XFS_ERROR(EIO);
 
-           /* are we copying a commit or unmount record? */
-           logop_head->oh_flags = flags;
+                       xlog_write_adv_cnt(&ptr, &len, &log_offset,
+                                          sizeof(struct xlog_op_header));
+
+                       len += xlog_write_setup_copy(ticket, ophdr,
+                                                    iclog->ic_size-log_offset,
+                                                    reg->i_len,
+                                                    &copy_off, &copy_len,
+                                                    &partial_copy,
+                                                    &partial_copy_len);
+                       xlog_verify_dest_ptr(log, ptr);
+
+                       /* copy region */
+                       ASSERT(copy_len >= 0);
+                       memcpy(ptr, reg->i_addr + copy_off, copy_len);
+                       xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
+
+                       copy_len += start_rec_copy + sizeof(xlog_op_header_t);
+                       record_cnt++;
+                       data_cnt += contwr ? copy_len : 0;
+
+                       error = xlog_write_copy_finish(log, iclog, flags,
+                                                      &record_cnt, &data_cnt,
+                                                      &partial_copy,
+                                                      &partial_copy_len,
+                                                      log_offset,
+                                                      commit_iclog);
+                       if (error)
+                               return error;
 
-           /*
-            * We've seen logs corrupted with bad transaction client
-            * ids.  This makes sure that XFS doesn't generate them on.
-            * Turn this into an EIO and shut down the filesystem.
-            */
-           switch (logop_head->oh_clientid)  {
-           case XFS_TRANSACTION:
-           case XFS_VOLUME:
-           case XFS_LOG:
-               break;
-           default:
-               xfs_fs_cmn_err(CE_WARN, mp,
-                   "Bad XFS transaction clientid 0x%x in ticket 0x%p",
-                   logop_head->oh_clientid, ticket);
-               return XFS_ERROR(EIO);
-           }
+                       /*
+                        * if we had a partial copy, we need to get more iclog
+                        * space but we don't want to increment the region
+                        * index because there is still more is this region to
+                        * write.
+                        *
+                        * If we completed writing this region, and we flushed
+                        * the iclog (indicated by resetting of the record
+                        * count), then we also need to get more log space. If
+                        * this was the last record, though, we are done and
+                        * can just return.
+                        */
+                       if (partial_copy)
+                               break;
 
-           /* Partial write last time? => (partial_copy != 0)
-            * need_copy is the amount we'd like to copy if everything could
-            * fit in the current memcpy.
-            */
-           need_copy = reg[index].i_len - partial_copy_len;
-
-           copy_off = partial_copy_len;
-           if (need_copy <= iclog->ic_size - log_offset) { /*complete write */
-               copy_len = need_copy;
-               logop_head->oh_len = cpu_to_be32(copy_len);
-               if (partial_copy)
-                   logop_head->oh_flags|= (XLOG_END_TRANS|XLOG_WAS_CONT_TRANS);
-               partial_copy_len = partial_copy = 0;
-           } else {                                        /* partial write */
-               copy_len = iclog->ic_size - log_offset;
-               logop_head->oh_len = cpu_to_be32(copy_len);
-               logop_head->oh_flags |= XLOG_CONTINUE_TRANS;
-               if (partial_copy)
-                       logop_head->oh_flags |= XLOG_WAS_CONT_TRANS;
-               partial_copy_len += copy_len;
-               partial_copy++;
-               len += sizeof(xlog_op_header_t); /* from splitting of region */
-               /* account for new log op header */
-               ticket->t_curr_res -= sizeof(xlog_op_header_t);
-               ticket->t_res_num_ophdrs++;
-           }
-           xlog_verify_dest_ptr(log, ptr);
-
-           /* copy region */
-           ASSERT(copy_len >= 0);
-           memcpy((xfs_caddr_t)ptr, reg[index].i_addr + copy_off, copy_len);
-           xlog_write_adv_cnt(ptr, len, log_offset, copy_len);
-
-           /* make copy_len total bytes copied, including headers */
-           copy_len += start_rec_copy + sizeof(xlog_op_header_t);
-           record_cnt++;
-           data_cnt += contwr ? copy_len : 0;
-           if (partial_copy) {                 /* copied partial region */
-                   /* already marked WANT_SYNC by xlog_state_get_iclog_space */
-                   xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
-                   record_cnt = data_cnt = 0;
-                   if ((error = xlog_state_release_iclog(log, iclog)))
-                           return error;
-                   break;                      /* don't increment index */
-           } else {                            /* copied entire region */
-               index++;
-               partial_copy_len = partial_copy = 0;
-
-               if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
-                   xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
-                   record_cnt = data_cnt = 0;
-                   spin_lock(&log->l_icloglock);
-                   xlog_state_want_sync(log, iclog);
-                   spin_unlock(&log->l_icloglock);
-                   if (commit_iclog) {
-                       ASSERT(flags & XLOG_COMMIT_TRANS);
-                       *commit_iclog = iclog;
-                   } else if ((error = xlog_state_release_iclog(log, iclog)))
-                          return error;
-                   if (index == nentries)
-                           return 0;           /* we are done */
-                   else
-                           break;
+                       if (++index == lv->lv_niovecs) {
+                               lv = lv->lv_next;
+                               index = 0;
+                               if (lv)
+                                       vecp = lv->lv_iovecp;
+                       }
+                       if (record_cnt == 0) {
+                               if (!lv)
+                                       return 0;
+                               break;
+                       }
                }
-           } /* if (partial_copy) */
-       } /* while (index < nentries) */
-    } /* for (index = 0; index < nentries; ) */
-    ASSERT(len == 0);
+       }
+
+       ASSERT(len == 0);
+
+       xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
+       if (!commit_iclog)
+               return xlog_state_release_iclog(log, iclog);
 
-    xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
-    if (commit_iclog) {
        ASSERT(flags & XLOG_COMMIT_TRANS);
        *commit_iclog = iclog;
        return 0;
-    }
-    return xlog_state_release_iclog(log, iclog);
-}      /* xlog_write */
+}
 
 
 /*****************************************************************************
@@ -3157,14 +3317,16 @@ xfs_log_ticket_get(
  * Allocate and initialise a new log ticket.
  */
 STATIC xlog_ticket_t *
-xlog_ticket_alloc(xlog_t               *log,
-               int             unit_bytes,
-               int             cnt,
-               char            client,
-               uint            xflags)
+xlog_ticket_alloc(
+       struct log      *log,
+       int             unit_bytes,
+       int             cnt,
+       char            client,
+       uint            xflags)
 {
-       xlog_ticket_t   *tic;
+       struct xlog_ticket *tic;
        uint            num_headers;
+       int             iclog_space;
 
        tic = kmem_zone_zalloc(xfs_log_ticket_zone, KM_SLEEP|KM_MAYFAIL);
        if (!tic)
@@ -3208,16 +3370,40 @@ xlog_ticket_alloc(xlog_t                *log,
        /* for start-rec */
        unit_bytes += sizeof(xlog_op_header_t);
 
-       /* for LR headers */
-       num_headers = ((unit_bytes + log->l_iclog_size-1) >> log->l_iclog_size_log);
+       /*
+        * for LR headers - the space for data in an iclog is the size minus
+        * the space used for the headers. If we use the iclog size, then we
+        * undercalculate the number of headers required.
+        *
+        * Furthermore - the addition of op headers for split-recs might
+        * increase the space required enough to require more log and op
+        * headers, so take that into account too.
+        *
+        * IMPORTANT: This reservation makes the assumption that if this
+        * transaction is the first in an iclog and hence has the LR headers
+        * accounted to it, then the remaining space in the iclog is
+        * exclusively for this transaction.  i.e. if the transaction is larger
+        * than the iclog, it will be the only thing in that iclog.
+        * Fundamentally, this means we must pass the entire log vector to
+        * xlog_write to guarantee this.
+        */
+       iclog_space = log->l_iclog_size - log->l_iclog_hsize;
+       num_headers = howmany(unit_bytes, iclog_space);
+
+       /* for split-recs - ophdrs added when data split over LRs */
+       unit_bytes += sizeof(xlog_op_header_t) * num_headers;
+
+       /* add extra header reservations if we overrun */
+       while (!num_headers ||
+              howmany(unit_bytes, iclog_space) > num_headers) {
+               unit_bytes += sizeof(xlog_op_header_t);
+               num_headers++;
+       }
        unit_bytes += log->l_iclog_hsize * num_headers;
 
        /* for commit-rec LR header - note: padding will subsume the ophdr */
        unit_bytes += log->l_iclog_hsize;
 
-       /* for split-recs - ophdrs added when data split over LRs */
-       unit_bytes += sizeof(xlog_op_header_t) * num_headers;
-
        /* for roundoff padding for transaction data and one for commit record */
        if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
            log->l_mp->m_sb.sb_logsunit > 1) {
@@ -3233,13 +3419,13 @@ xlog_ticket_alloc(xlog_t                *log,
        tic->t_curr_res         = unit_bytes;
        tic->t_cnt              = cnt;
        tic->t_ocnt             = cnt;
-       tic->t_tid              = (xlog_tid_t)((__psint_t)tic & 0xffffffff);
+       tic->t_tid              = random32();
        tic->t_clientid         = client;
        tic->t_flags            = XLOG_TIC_INITED;
        tic->t_trans_type       = 0;
        if (xflags & XFS_LOG_PERM_RESERV)
                tic->t_flags |= XLOG_TIC_PERM_RESERV;
-       sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
+       sv_init(&tic->t_wait, SV_DEFAULT, "logtick");
 
        xlog_tic_reset_res(tic);
 
@@ -3260,20 +3446,22 @@ xlog_ticket_alloc(xlog_t                *log,
  * part of the log in case we trash the log structure.
  */
 void
-xlog_verify_dest_ptr(xlog_t     *log,
-                    __psint_t  ptr)
+xlog_verify_dest_ptr(
+       struct log      *log,
+       char            *ptr)
 {
        int i;
        int good_ptr = 0;
 
-       for (i=0; i < log->l_iclog_bufs; i++) {
-               if (ptr >= (__psint_t)log->l_iclog_bak[i] &&
-                   ptr <= (__psint_t)log->l_iclog_bak[i]+log->l_iclog_size)
+       for (i = 0; i < log->l_iclog_bufs; i++) {
+               if (ptr >= log->l_iclog_bak[i] &&
+                   ptr <= log->l_iclog_bak[i] + log->l_iclog_size)
                        good_ptr++;
        }
-       if (! good_ptr)
+
+       if (!good_ptr)
                xlog_panic("xlog_verify_dest_ptr: invalid ptr");
-}      /* xlog_verify_dest_ptr */
+}
 
 STATIC void
 xlog_verify_grant_head(xlog_t *log, int equals)
index 97a24c7..229d1f3 100644 (file)
@@ -110,6 +110,12 @@ typedef struct xfs_log_iovec {
        uint            i_type;         /* type of region */
 } xfs_log_iovec_t;
 
+struct xfs_log_vec {
+       struct xfs_log_vec      *lv_next;       /* next lv in build list */
+       int                     lv_niovecs;     /* number of iovecs in lv */
+       struct xfs_log_iovec    *lv_iovecp;     /* iovec array */
+};
+
 /*
  * Structure used to pass callback function and the function's argument
  * to the log manager.
@@ -126,6 +132,13 @@ typedef struct xfs_log_callback {
 struct xfs_mount;
 struct xlog_in_core;
 struct xlog_ticket;
+struct xfs_log_item;
+struct xfs_item_ops;
+
+void   xfs_log_item_init(struct xfs_mount      *mp,
+                       struct xfs_log_item     *item,
+                       int                     type,
+                       struct xfs_item_ops     *ops);
 
 xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
                       struct xlog_ticket *ticket,
index fd02a18..9cf6951 100644 (file)
@@ -396,9 +396,7 @@ typedef struct log {
        struct xfs_buf_cancel   **l_buf_cancel_table;
        int                     l_iclog_hsize;  /* size of iclog header */
        int                     l_iclog_heads;  /* # of iclog header sectors */
-       uint                    l_sectbb_log;   /* log2 of sector size in BBs */
-       uint                    l_sectbb_mask;  /* sector size (in BBs)
-                                                * alignment mask */
+       uint                    l_sectBBsize;   /* sector size in BBs (2^n) */
        int                     l_iclog_size;   /* size of log in bytes */
        int                     l_iclog_size_log; /* log power size of log */
        int                     l_iclog_bufs;   /* number of iclog buffers */
@@ -449,6 +447,14 @@ extern void         xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
 
 extern kmem_zone_t     *xfs_log_ticket_zone;
 
+static inline void
+xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
+{
+       *ptr += bytes;
+       *len -= bytes;
+       *off += bytes;
+}
+
 /*
  * Unmount record type is used as a pseudo transaction type for the ticket.
  * It's value must be outside the range of XFS_TRANS_* values.
index 22e6efd..0de08e3 100644 (file)
@@ -56,33 +56,61 @@ STATIC void xlog_recover_check_summary(xlog_t *);
 #define        xlog_recover_check_summary(log)
 #endif
 
-
 /*
  * Sector aligned buffer routines for buffer create/read/write/access
  */
 
-#define XLOG_SECTOR_ROUNDUP_BBCOUNT(log, bbs)  \
-       ( ((log)->l_sectbb_mask && (bbs & (log)->l_sectbb_mask)) ? \
-       ((bbs + (log)->l_sectbb_mask + 1) & ~(log)->l_sectbb_mask) : (bbs) )
-#define XLOG_SECTOR_ROUNDDOWN_BLKNO(log, bno)  ((bno) & ~(log)->l_sectbb_mask)
+/*
+ * Verify the given count of basic blocks is valid number of blocks
+ * to specify for an operation involving the given XFS log buffer.
+ * Returns nonzero if the count is valid, 0 otherwise.
+ */
 
+static inline int
+xlog_buf_bbcount_valid(
+       xlog_t          *log,
+       int             bbcount)
+{
+       return bbcount > 0 && bbcount <= log->l_logBBsize;
+}
+
+/*
+ * Allocate a buffer to hold log data.  The buffer needs to be able
+ * to map to a range of nbblks basic blocks at any valid (basic
+ * block) offset within the log.
+ */
 STATIC xfs_buf_t *
 xlog_get_bp(
        xlog_t          *log,
        int             nbblks)
 {
-       if (nbblks <= 0 || nbblks > log->l_logBBsize) {
-               xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
-               XFS_ERROR_REPORT("xlog_get_bp(1)",
-                                XFS_ERRLEVEL_HIGH, log->l_mp);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
                return NULL;
        }
 
-       if (log->l_sectbb_log) {
-               if (nbblks > 1)
-                       nbblks += XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
-       }
+       /*
+        * We do log I/O in units of log sectors (a power-of-2
+        * multiple of the basic block size), so we round up the
+        * requested size to acommodate the basic blocks required
+        * for complete log sectors.
+        *
+        * In addition, the buffer may be used for a non-sector-
+        * aligned block offset, in which case an I/O of the
+        * requested size could extend beyond the end of the
+        * buffer.  If the requested size is only 1 basic block it
+        * will never straddle a sector boundary, so this won't be
+        * an issue.  Nor will this be a problem if the log I/O is
+        * done in basic blocks (sector size 1).  But otherwise we
+        * extend the buffer by one extra log sector to ensure
+        * there's space to accomodate this possiblility.
+        */
+       if (nbblks > 1 && log->l_sectBBsize > 1)
+               nbblks += log->l_sectBBsize;
+       nbblks = round_up(nbblks, log->l_sectBBsize);
+
        return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
 }
 
@@ -93,6 +121,10 @@ xlog_put_bp(
        xfs_buf_free(bp);
 }
 
+/*
+ * Return the address of the start of the given block number's data
+ * in a log buffer.  The buffer covers a log sector-aligned region.
+ */
 STATIC xfs_caddr_t
 xlog_align(
        xlog_t          *log,
@@ -100,14 +132,14 @@ xlog_align(
        int             nbblks,
        xfs_buf_t       *bp)
 {
+       xfs_daddr_t     offset;
        xfs_caddr_t     ptr;
 
-       if (!log->l_sectbb_log)
-               return XFS_BUF_PTR(bp);
+       offset = blk_no & ((xfs_daddr_t) log->l_sectBBsize - 1);
+       ptr = XFS_BUF_PTR(bp) + BBTOB(offset);
+
+       ASSERT(ptr + BBTOB(nbblks) <= XFS_BUF_PTR(bp) + XFS_BUF_SIZE(bp));
 
-       ptr = XFS_BUF_PTR(bp) + BBTOB((int)blk_no & log->l_sectbb_mask);
-       ASSERT(XFS_BUF_SIZE(bp) >=
-               BBTOB(nbblks + (blk_no & log->l_sectbb_mask)));
        return ptr;
 }
 
@@ -124,21 +156,18 @@ xlog_bread_noalign(
 {
        int             error;
 
-       if (nbblks <= 0 || nbblks > log->l_logBBsize) {
-               xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
-               XFS_ERROR_REPORT("xlog_bread(1)",
-                                XFS_ERRLEVEL_HIGH, log->l_mp);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
                return EFSCORRUPTED;
        }
 
-       if (log->l_sectbb_log) {
-               blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
-       }
+       blk_no = round_down(blk_no, log->l_sectBBsize);
+       nbblks = round_up(nbblks, log->l_sectBBsize);
 
        ASSERT(nbblks > 0);
        ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
-       ASSERT(bp);
 
        XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
        XFS_BUF_READ(bp);
@@ -186,17 +215,15 @@ xlog_bwrite(
 {
        int             error;
 
-       if (nbblks <= 0 || nbblks > log->l_logBBsize) {
-               xlog_warn("XFS: Invalid block length (0x%x) given for buffer", nbblks);
-               XFS_ERROR_REPORT("xlog_bwrite(1)",
-                                XFS_ERRLEVEL_HIGH, log->l_mp);
+       if (!xlog_buf_bbcount_valid(log, nbblks)) {
+               xlog_warn("XFS: Invalid block length (0x%x) given for buffer",
+                       nbblks);
+               XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
                return EFSCORRUPTED;
        }
 
-       if (log->l_sectbb_log) {
-               blk_no = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, blk_no);
-               nbblks = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, nbblks);
-       }
+       blk_no = round_down(blk_no, log->l_sectBBsize);
+       nbblks = round_up(nbblks, log->l_sectBBsize);
 
        ASSERT(nbblks > 0);
        ASSERT(BBTOB(nbblks) <= XFS_BUF_SIZE(bp));
@@ -327,39 +354,38 @@ xlog_find_cycle_start(
 {
        xfs_caddr_t     offset;
        xfs_daddr_t     mid_blk;
+       xfs_daddr_t     end_blk;
        uint            mid_cycle;
        int             error;
 
-       mid_blk = BLK_AVG(first_blk, *last_blk);
-       while (mid_blk != first_blk && mid_blk != *last_blk) {
+       end_blk = *last_blk;
+       mid_blk = BLK_AVG(first_blk, end_blk);
+       while (mid_blk != first_blk && mid_blk != end_blk) {
                error = xlog_bread(log, mid_blk, 1, bp, &offset);
                if (error)
                        return error;
                mid_cycle = xlog_get_cycle(offset);
-               if (mid_cycle == cycle) {
-                       *last_blk = mid_blk;
-                       /* last_half_cycle == mid_cycle */
-               } else {
-                       first_blk = mid_blk;
-                       /* first_half_cycle == mid_cycle */
-               }
-               mid_blk = BLK_AVG(first_blk, *last_blk);
+               if (mid_cycle == cycle)
+                       end_blk = mid_blk;   /* last_half_cycle == mid_cycle */
+               else
+                       first_blk = mid_blk; /* first_half_cycle == mid_cycle */
+               mid_blk = BLK_AVG(first_blk, end_blk);
        }
-       ASSERT((mid_blk == first_blk && mid_blk+1 == *last_blk) ||
-              (mid_blk == *last_blk && mid_blk-1 == first_blk));
+       ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) ||
+              (mid_blk == end_blk && mid_blk-1 == first_blk));
+
+       *last_blk = end_blk;
 
        return 0;
 }
 
 /*
- * Check that the range of blocks does not contain the cycle number
- * given.  The scan needs to occur from front to back and the ptr into the
- * region must be updated since a later routine will need to perform another
- * test.  If the region is completely good, we end up returning the same
- * last block number.
- *
- * Set blkno to -1 if we encounter no errors.  This is an invalid block number
- * since we don't ever expect logs to get this large.
+ * Check that a range of blocks does not contain stop_on_cycle_no.
+ * Fill in *new_blk with the block offset where such a block is
+ * found, or with -1 (an invalid block number) if there is no such
+ * block in the range.  The scan needs to occur from front to back
+ * and the pointer into the region must be updated since a later
+ * routine will need to perform another test.
  */
 STATIC int
 xlog_find_verify_cycle(
@@ -376,12 +402,16 @@ xlog_find_verify_cycle(
        xfs_caddr_t     buf = NULL;
        int             error = 0;
 
+       /*
+        * Greedily allocate a buffer big enough to handle the full
+        * range of basic blocks we'll be examining.  If that fails,
+        * try a smaller size.  We need to be able to read at least
+        * a log sector, or we're out of luck.
+        */
        bufblks = 1 << ffs(nbblks);
-
        while (!(bp = xlog_get_bp(log, bufblks))) {
-               /* can't get enough memory to do everything in one big buffer */
                bufblks >>= 1;
-               if (bufblks <= log->l_sectbb_log)
+               if (bufblks < log->l_sectBBsize)
                        return ENOMEM;
        }
 
@@ -629,7 +659,7 @@ xlog_find_head(
                 * In this case we want to find the first block with cycle
                 * number matching last_half_cycle.  We expect the log to be
                 * some variation on
-                *        x + 1 ... | x ...
+                *        x + 1 ... | x ... | x
                 * The first block with cycle number x (last_half_cycle) will
                 * be where the new head belongs.  First we do a binary search
                 * for the first occurrence of last_half_cycle.  The binary
@@ -639,11 +669,13 @@ xlog_find_head(
                 * the log, then we look for occurrences of last_half_cycle - 1
                 * at the end of the log.  The cases we're looking for look
                 * like
-                *        x + 1 ... | x | x + 1 | x ...
-                *                               ^ binary search stopped here
+                *                               v binary search stopped here
+                *        x + 1 ... | x | x + 1 | x ... | x
+                *                   ^ but we want to locate this spot
                 * or
-                *        x + 1 ... | x ... | x - 1 | x
                 *        <---------> less than scan distance
+                *        x + 1 ... | x ... | x - 1 | x
+                *                           ^ we want to locate this spot
                 */
                stop_on_cycle = last_half_cycle;
                if ((error = xlog_find_cycle_start(log, bp, first_blk,
@@ -699,16 +731,16 @@ xlog_find_head(
                 * certainly not the head of the log.  By searching for
                 * last_half_cycle-1 we accomplish that.
                 */
-               start_blk = log_bbnum - num_scan_bblks + head_blk;
                ASSERT(head_blk <= INT_MAX &&
-                       (xfs_daddr_t) num_scan_bblks - head_blk >= 0);
+                       (xfs_daddr_t) num_scan_bblks >= head_blk);
+               start_blk = log_bbnum - (num_scan_bblks - head_blk);
                if ((error = xlog_find_verify_cycle(log, start_blk,
                                        num_scan_bblks - (int)head_blk,
                                        (stop_on_cycle - 1), &new_blk)))
                        goto bp_err;
                if (new_blk != -1) {
                        head_blk = new_blk;
-                       goto bad_blk;
+                       goto validate_head;
                }
 
                /*
@@ -726,7 +758,7 @@ xlog_find_head(
                        head_blk = new_blk;
        }
 
- bad_blk:
+validate_head:
        /*
         * Now we need to make sure head_blk is not pointing to a block in
         * the middle of a log record.
@@ -748,7 +780,7 @@ xlog_find_head(
                if ((error = xlog_find_verify_log_record(log, start_blk,
                                                        &head_blk, 0)) == -1) {
                        /* We hit the beginning of the log during our search */
-                       start_blk = log_bbnum - num_scan_bblks + head_blk;
+                       start_blk = log_bbnum - (num_scan_bblks - head_blk);
                        new_blk = log_bbnum;
                        ASSERT(start_blk <= INT_MAX &&
                                (xfs_daddr_t) log_bbnum-start_blk >= 0);
@@ -833,12 +865,12 @@ xlog_find_tail(
        if (*head_blk == 0) {                           /* special case */
                error = xlog_bread(log, 0, 1, bp, &offset);
                if (error)
-                       goto bread_err;
+                       goto done;
 
                if (xlog_get_cycle(offset) == 0) {
                        *tail_blk = 0;
                        /* leave all other log inited values alone */
-                       goto exit;
+                       goto done;
                }
        }
 
@@ -849,7 +881,7 @@ xlog_find_tail(
        for (i = (int)(*head_blk) - 1; i >= 0; i--) {
                error = xlog_bread(log, i, 1, bp, &offset);
                if (error)
-                       goto bread_err;
+                       goto done;
 
                if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
                        found = 1;
@@ -866,7 +898,7 @@ xlog_find_tail(
                for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
                        error = xlog_bread(log, i, 1, bp, &offset);
                        if (error)
-                               goto bread_err;
+                               goto done;
 
                        if (XLOG_HEADER_MAGIC_NUM ==
                            be32_to_cpu(*(__be32 *)offset)) {
@@ -941,7 +973,7 @@ xlog_find_tail(
                umount_data_blk = (i + hblks) % log->l_logBBsize;
                error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
                if (error)
-                       goto bread_err;
+                       goto done;
 
                op_head = (xlog_op_header_t *)offset;
                if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
@@ -987,12 +1019,10 @@ xlog_find_tail(
         * But... if the -device- itself is readonly, just skip this.
         * We can't recover this device anyway, so it won't matter.
         */
-       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) {
+       if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp))
                error = xlog_clear_stale_blocks(log, tail_lsn);
-       }
 
-bread_err:
-exit:
+done:
        xlog_put_bp(bp);
 
        if (error)
@@ -1152,16 +1182,22 @@ xlog_write_log_records(
        xfs_caddr_t     offset;
        xfs_buf_t       *bp;
        int             balign, ealign;
-       int             sectbb = XLOG_SECTOR_ROUNDUP_BBCOUNT(log, 1);
+       int             sectbb = log->l_sectBBsize;
        int             end_block = start_block + blocks;
        int             bufblks;
        int             error = 0;
        int             i, j = 0;
 
+       /*
+        * Greedily allocate a buffer big enough to handle the full
+        * range of basic blocks to be written.  If that fails, try
+        * a smaller size.  We need to be able to write at least a
+        * log sector, or we're out of luck.
+        */
        bufblks = 1 << ffs(blocks);
        while (!(bp = xlog_get_bp(log, bufblks))) {
                bufblks >>= 1;
-               if (bufblks <= log->l_sectbb_log)
+               if (bufblks < sectbb)
                        return ENOMEM;
        }
 
@@ -1169,7 +1205,7 @@ xlog_write_log_records(
         * the buffer in the starting sector not covered by the first
         * write below.
         */
-       balign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, start_block);
+       balign = round_down(start_block, sectbb);
        if (balign != start_block) {
                error = xlog_bread_noalign(log, start_block, 1, bp);
                if (error)
@@ -1188,7 +1224,7 @@ xlog_write_log_records(
                 * the buffer in the final sector not covered by the write.
                 * If this is the same sector as the above read, skip it.
                 */
-               ealign = XLOG_SECTOR_ROUNDDOWN_BLKNO(log, end_block);
+               ealign = round_down(end_block, sectbb);
                if (j == 0 && (start_block + endcount > ealign)) {
                        offset = XFS_BUF_PTR(bp);
                        balign = BBTOB(ealign - start_block);
@@ -1408,6 +1444,7 @@ xlog_recover_add_item(
 
 STATIC int
 xlog_recover_add_to_cont_trans(
+       struct log              *log,
        xlog_recover_t          *trans,
        xfs_caddr_t             dp,
        int                     len)
@@ -1434,6 +1471,7 @@ xlog_recover_add_to_cont_trans(
        memcpy(&ptr[old_len], dp, len); /* d, s, l */
        item->ri_buf[item->ri_cnt-1].i_len += len;
        item->ri_buf[item->ri_cnt-1].i_addr = ptr;
+       trace_xfs_log_recover_item_add_cont(log, trans, item, 0);
        return 0;
 }
 
@@ -1452,6 +1490,7 @@ xlog_recover_add_to_cont_trans(
  */
 STATIC int
 xlog_recover_add_to_trans(
+       struct log              *log,
        xlog_recover_t          *trans,
        xfs_caddr_t             dp,
        int                     len)
@@ -1510,6 +1549,7 @@ xlog_recover_add_to_trans(
        item->ri_buf[item->ri_cnt].i_addr = ptr;
        item->ri_buf[item->ri_cnt].i_len  = len;
        item->ri_cnt++;
+       trace_xfs_log_recover_item_add(log, trans, item, 0);
        return 0;
 }
 
@@ -1521,7 +1561,9 @@ xlog_recover_add_to_trans(
  */
 STATIC int
 xlog_recover_reorder_trans(
-       xlog_recover_t          *trans)
+       struct log              *log,
+       xlog_recover_t          *trans,
+       int                     pass)
 {
        xlog_recover_item_t     *item, *n;
        LIST_HEAD(sort_list);
@@ -1535,6 +1577,8 @@ xlog_recover_reorder_trans(
                switch (ITEM_TYPE(item)) {
                case XFS_LI_BUF:
                        if (!(buf_f->blf_flags & XFS_BLI_CANCEL)) {
+                               trace_xfs_log_recover_item_reorder_head(log,
+                                                       trans, item, pass);
                                list_move(&item->ri_list, &trans->r_itemq);
                                break;
                        }
@@ -1543,6 +1587,8 @@ xlog_recover_reorder_trans(
                case XFS_LI_QUOTAOFF:
                case XFS_LI_EFD:
                case XFS_LI_EFI:
+                       trace_xfs_log_recover_item_reorder_tail(log,
+                                                       trans, item, pass);
                        list_move_tail(&item->ri_list, &trans->r_itemq);
                        break;
                default:
@@ -1592,8 +1638,10 @@ xlog_recover_do_buffer_pass1(
        /*
         * If this isn't a cancel buffer item, then just return.
         */
-       if (!(flags & XFS_BLI_CANCEL))
+       if (!(flags & XFS_BLI_CANCEL)) {
+               trace_xfs_log_recover_buf_not_cancel(log, buf_f);
                return;
+       }
 
        /*
         * Insert an xfs_buf_cancel record into the hash table of
@@ -1627,6 +1675,7 @@ xlog_recover_do_buffer_pass1(
        while (nextp != NULL) {
                if (nextp->bc_blkno == blkno && nextp->bc_len == len) {
                        nextp->bc_refcount++;
+                       trace_xfs_log_recover_buf_cancel_ref_inc(log, buf_f);
                        return;
                }
                prevp = nextp;
@@ -1640,6 +1689,7 @@ xlog_recover_do_buffer_pass1(
        bcp->bc_refcount = 1;
        bcp->bc_next = NULL;
        prevp->bc_next = bcp;
+       trace_xfs_log_recover_buf_cancel_add(log, buf_f);
 }
 
 /*
@@ -1779,6 +1829,8 @@ xlog_recover_do_inode_buffer(
        unsigned int            *data_map = NULL;
        unsigned int            map_size = 0;
 
+       trace_xfs_log_recover_buf_inode_buf(mp->m_log, buf_f);
+
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                data_map = buf_f->blf_data_map;
@@ -1874,6 +1926,7 @@ xlog_recover_do_inode_buffer(
 /*ARGSUSED*/
 STATIC void
 xlog_recover_do_reg_buffer(
+       struct xfs_mount        *mp,
        xlog_recover_item_t     *item,
        xfs_buf_t               *bp,
        xfs_buf_log_format_t    *buf_f)
@@ -1885,6 +1938,8 @@ xlog_recover_do_reg_buffer(
        unsigned int            map_size = 0;
        int                     error;
 
+       trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
+
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                data_map = buf_f->blf_data_map;
@@ -2083,6 +2138,8 @@ xlog_recover_do_dquot_buffer(
 {
        uint                    type;
 
+       trace_xfs_log_recover_buf_dquot_buf(log, buf_f);
+
        /*
         * Filesystems are required to send in quota flags at mount time.
         */
@@ -2103,7 +2160,7 @@ xlog_recover_do_dquot_buffer(
        if (log->l_quotaoffs_flag & type)
                return;
 
-       xlog_recover_do_reg_buffer(item, bp, buf_f);
+       xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
 }
 
 /*
@@ -2164,9 +2221,11 @@ xlog_recover_do_buffer_trans(
                 */
                cancel = xlog_recover_do_buffer_pass2(log, buf_f);
                if (cancel) {
+                       trace_xfs_log_recover_buf_cancel(log, buf_f);
                        return 0;
                }
        }
+       trace_xfs_log_recover_buf_recover(log, buf_f);
        switch (buf_f->blf_type) {
        case XFS_LI_BUF:
                blkno = buf_f->blf_blkno;
@@ -2204,7 +2263,7 @@ xlog_recover_do_buffer_trans(
                  (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
                xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
        } else {
-               xlog_recover_do_reg_buffer(item, bp, buf_f);
+               xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
        }
        if (error)
                return XFS_ERROR(error);
@@ -2284,8 +2343,10 @@ xlog_recover_do_inode_trans(
        if (xlog_check_buffer_cancelled(log, in_f->ilf_blkno,
                                        in_f->ilf_len, 0)) {
                error = 0;
+               trace_xfs_log_recover_inode_cancel(log, in_f);
                goto error;
        }
+       trace_xfs_log_recover_inode_recover(log, in_f);
 
        bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
                          XBF_LOCK);
@@ -2337,6 +2398,7 @@ xlog_recover_do_inode_trans(
                        /* do nothing */
                } else {
                        xfs_buf_relse(bp);
+                       trace_xfs_log_recover_inode_skip(log, in_f);
                        error = 0;
                        goto error;
                }
@@ -2758,11 +2820,12 @@ xlog_recover_do_trans(
        int                     error = 0;
        xlog_recover_item_t     *item;
 
-       error = xlog_recover_reorder_trans(trans);
+       error = xlog_recover_reorder_trans(log, trans, pass);
        if (error)
                return error;
 
        list_for_each_entry(item, &trans->r_itemq, ri_list) {
+               trace_xfs_log_recover_item_recover(log, trans, item, pass);
                switch (ITEM_TYPE(item)) {
                case XFS_LI_BUF:
                        error = xlog_recover_do_buffer_trans(log, item, pass);
@@ -2919,8 +2982,9 @@ xlog_recover_process_data(
                                error = xlog_recover_unmount_trans(trans);
                                break;
                        case XLOG_WAS_CONT_TRANS:
-                               error = xlog_recover_add_to_cont_trans(trans,
-                                               dp, be32_to_cpu(ohead->oh_len));
+                               error = xlog_recover_add_to_cont_trans(log,
+                                               trans, dp,
+                                               be32_to_cpu(ohead->oh_len));
                                break;
                        case XLOG_START_TRANS:
                                xlog_warn(
@@ -2930,7 +2994,7 @@ xlog_recover_process_data(
                                break;
                        case 0:
                        case XLOG_CONTINUE_TRANS:
-                               error = xlog_recover_add_to_trans(trans,
+                               error = xlog_recover_add_to_trans(log, trans,
                                                dp, be32_to_cpu(ohead->oh_len));
                                break;
                        default:
@@ -3331,42 +3395,6 @@ xlog_pack_data(
        }
 }
 
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-STATIC void
-xlog_unpack_data_checksum(
-       xlog_rec_header_t       *rhead,
-       xfs_caddr_t             dp,
-       xlog_t                  *log)
-{
-       __be32                  *up = (__be32 *)dp;
-       uint                    chksum = 0;
-       int                     i;
-
-       /* divide length by 4 to get # words */
-       for (i=0; i < be32_to_cpu(rhead->h_len) >> 2; i++) {
-               chksum ^= be32_to_cpu(*up);
-               up++;
-       }
-       if (chksum != be32_to_cpu(rhead->h_chksum)) {
-           if (rhead->h_chksum ||
-               ((log->l_flags & XLOG_CHKSUM_MISMATCH) == 0)) {
-                   cmn_err(CE_DEBUG,
-                       "XFS: LogR chksum mismatch: was (0x%x) is (0x%x)\n",
-                           be32_to_cpu(rhead->h_chksum), chksum);
-                   cmn_err(CE_DEBUG,
-"XFS: Disregard message if filesystem was created with non-DEBUG kernel");
-                   if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
-                           cmn_err(CE_DEBUG,
-                               "XFS: LogR this is a LogV2 filesystem\n");
-                   }
-                   log->l_flags |= XLOG_CHKSUM_MISMATCH;
-           }
-       }
-}
-#else
-#define xlog_unpack_data_checksum(rhead, dp, log)
-#endif
-
 STATIC void
 xlog_unpack_data(
        xlog_rec_header_t       *rhead,
@@ -3390,8 +3418,6 @@ xlog_unpack_data(
                        dp += BBSIZE;
                }
        }
-
-       xlog_unpack_data_checksum(rhead, dp, log);
 }
 
 STATIC int
@@ -3490,7 +3516,7 @@ xlog_do_recovery_pass(
                        hblks = 1;
                }
        } else {
-               ASSERT(log->l_sectbb_log == 0);
+               ASSERT(log->l_sectBBsize == 1);
                hblks = 1;
                hbp = xlog_get_bp(log, 1);
                h_size = XLOG_BIG_RECORD_BSIZE;
@@ -3946,10 +3972,6 @@ xlog_recover_check_summary(
        xfs_agf_t       *agfp;
        xfs_buf_t       *agfbp;
        xfs_buf_t       *agibp;
-       xfs_buf_t       *sbbp;
-#ifdef XFS_LOUD_RECOVERY
-       xfs_sb_t        *sbp;
-#endif
        xfs_agnumber_t  agno;
        __uint64_t      freeblks;
        __uint64_t      itotal;
@@ -3984,30 +4006,5 @@ xlog_recover_check_summary(
                        xfs_buf_relse(agibp);
                }
        }
-
-       sbbp = xfs_getsb(mp, 0);
-#ifdef XFS_LOUD_RECOVERY
-       sbp = &mp->m_sb;
-       xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(sbbp));
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_icount %Lu itotal %Lu",
-               sbp->sb_icount, itotal);
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_ifree %Lu itotal %Lu",
-               sbp->sb_ifree, ifree);
-       cmn_err(CE_NOTE,
-               "xlog_recover_check_summary: sb_fdblocks %Lu freeblks %Lu",
-               sbp->sb_fdblocks, freeblks);
-#if 0
-       /*
-        * This is turned off until I account for the allocation
-        * btree blocks which live in free space.
-        */
-       ASSERT(sbp->sb_icount == itotal);
-       ASSERT(sbp->sb_ifree == ifree);
-       ASSERT(sbp->sb_fdblocks == freeblks);
-#endif
-#endif
-       xfs_buf_relse(sbbp);
 }
 #endif /* DEBUG */
index e79b56b..d7bf38c 100644 (file)
@@ -1405,13 +1405,6 @@ xfs_mountfs(
                xfs_qm_mount_quotas(mp);
        }
 
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
-       if (XFS_IS_QUOTA_ON(mp))
-               xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-       else
-               xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
-#endif
-
        /*
         * Now we are mounted, reserve a small amount of unused space for
         * privileged transactions. This is needed so that transaction
index fdcab3f..e0e64b1 100644 (file)
@@ -201,9 +201,6 @@ typedef struct xfs_qoff_logformat {
 #define XFS_QMOPT_FORCE_RES    0x0000010 /* ignore quota limits */
 #define XFS_QMOPT_DQSUSER      0x0000020 /* don't cache super users dquot */
 #define XFS_QMOPT_SBVERSION    0x0000040 /* change superblock version num */
-#define XFS_QMOPT_QUOTAOFF     0x0000080 /* quotas are being turned off */
-#define XFS_QMOPT_UMOUNTING    0x0000100 /* filesys is being unmounted */
-#define XFS_QMOPT_DOLOG                0x0000200 /* log buf changes (in quotacheck) */
 #define XFS_QMOPT_DOWARN        0x0000400 /* increase warning cnt if needed */
 #define XFS_QMOPT_DQREPAIR     0x0001000 /* repair dquot if damaged */
 #define XFS_QMOPT_GQUOTA       0x0002000 /* group dquot requested */
index f73e358..be578ec 100644 (file)
 #include "xfs_trans_space.h"
 #include "xfs_inode_item.h"
 
-
-STATIC void    xfs_trans_apply_sb_deltas(xfs_trans_t *);
-STATIC uint    xfs_trans_count_vecs(xfs_trans_t *);
-STATIC void    xfs_trans_fill_vecs(xfs_trans_t *, xfs_log_iovec_t *);
-STATIC void    xfs_trans_uncommit(xfs_trans_t *, uint);
-STATIC void    xfs_trans_committed(xfs_trans_t *, int);
-STATIC void    xfs_trans_chunk_committed(xfs_log_item_chunk_t *, xfs_lsn_t, int);
-STATIC void    xfs_trans_free(xfs_trans_t *);
-
 kmem_zone_t    *xfs_trans_zone;
 
-
 /*
  * Reservation functions here avoid a huge stack in xfs_trans_init
  * due to register overflow from temporaries in the calculations.
  */
-
 STATIC uint
 xfs_calc_write_reservation(xfs_mount_t *mp)
 {
@@ -260,6 +249,19 @@ _xfs_trans_alloc(
        return tp;
 }
 
+/*
+ * Free the transaction structure.  If there is more clean up
+ * to do when the structure is freed, add it here.
+ */
+STATIC void
+xfs_trans_free(
+       xfs_trans_t     *tp)
+{
+       atomic_dec(&tp->t_mountp->m_active_trans);
+       xfs_trans_free_dqinfo(tp);
+       kmem_zone_free(xfs_trans_zone, tp);
+}
+
 /*
  * This is called to create a new transaction which will share the
  * permanent log reservation of the given transaction.  The remaining
@@ -764,94 +766,278 @@ xfs_trans_unreserve_and_mod_sb(
        }
 }
 
+/*
+ * Total up the number of log iovecs needed to commit this
+ * transaction.  The transaction itself needs one for the
+ * transaction header.  Ask each dirty item in turn how many
+ * it needs to get the total.
+ */
+static uint
+xfs_trans_count_vecs(
+       struct xfs_trans        *tp)
+{
+       int                     nvecs;
+       xfs_log_item_desc_t     *lidp;
+
+       nvecs = 1;
+       lidp = xfs_trans_first_item(tp);
+       ASSERT(lidp != NULL);
+
+       /* In the non-debug case we need to start bailing out if we
+        * didn't find a log_item here, return zero and let trans_commit
+        * deal with it.
+        */
+       if (lidp == NULL)
+               return 0;
+
+       while (lidp != NULL) {
+               /*
+                * Skip items which aren't dirty in this transaction.
+                */
+               if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
+                       lidp = xfs_trans_next_item(tp, lidp);
+                       continue;
+               }
+               lidp->lid_size = IOP_SIZE(lidp->lid_item);
+               nvecs += lidp->lid_size;
+               lidp = xfs_trans_next_item(tp, lidp);
+       }
+
+       return nvecs;
+}
 
 /*
- * xfs_trans_commit
+ * Fill in the vector with pointers to data to be logged
+ * by this transaction.  The transaction header takes
+ * the first vector, and then each dirty item takes the
+ * number of vectors it indicated it needed in xfs_trans_count_vecs().
  *
- * Commit the given transaction to the log a/synchronously.
+ * As each item fills in the entries it needs, also pin the item
+ * so that it cannot be flushed out until the log write completes.
+ */
+static void
+xfs_trans_fill_vecs(
+       struct xfs_trans        *tp,
+       struct xfs_log_iovec    *log_vector)
+{
+       xfs_log_item_desc_t     *lidp;
+       struct xfs_log_iovec    *vecp;
+       uint                    nitems;
+
+       /*
+        * Skip over the entry for the transaction header, we'll
+        * fill that in at the end.
+        */
+       vecp = log_vector + 1;
+
+       nitems = 0;
+       lidp = xfs_trans_first_item(tp);
+       ASSERT(lidp);
+       while (lidp) {
+               /* Skip items which aren't dirty in this transaction. */
+               if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
+                       lidp = xfs_trans_next_item(tp, lidp);
+                       continue;
+               }
+
+               /*
+                * The item may be marked dirty but not log anything.  This can
+                * be used to get called when a transaction is committed.
+                */
+               if (lidp->lid_size)
+                       nitems++;
+               IOP_FORMAT(lidp->lid_item, vecp);
+               vecp += lidp->lid_size;
+               IOP_PIN(lidp->lid_item);
+               lidp = xfs_trans_next_item(tp, lidp);
+       }
+
+       /*
+        * Now that we've counted the number of items in this transaction, fill
+        * in the transaction header. Note that the transaction header does not
+        * have a log item.
+        */
+       tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
+       tp->t_header.th_type = tp->t_type;
+       tp->t_header.th_num_items = nitems;
+       log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
+       log_vector->i_len = sizeof(xfs_trans_header_t);
+       log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
+}
+
+/*
+ * The committed item processing consists of calling the committed routine of
+ * each logged item, updating the item's position in the AIL if necessary, and
+ * unpinning each item.  If the committed routine returns -1, then do nothing
+ * further with the item because it may have been freed.
  *
- * XFS disk error handling mechanism is not based on a typical
- * transaction abort mechanism. Logically after the filesystem
- * gets marked 'SHUTDOWN', we can't let any new transactions
- * be durable - ie. committed to disk - because some metadata might
- * be inconsistent. In such cases, this returns an error, and the
- * caller may assume that all locked objects joined to the transaction
- * have already been unlocked as if the commit had succeeded.
- * Do not reference the transaction structure after this call.
+ * Since items are unlocked when they are copied to the incore log, it is
+ * possible for two transactions to be completing and manipulating the same
+ * item simultaneously.  The AIL lock will protect the lsn field of each item.
+ * The value of this field can never go backwards.
+ *
+ * We unpin the items after repositioning them in the AIL, because otherwise
+ * they could be immediately flushed and we'd have to race with the flusher
+ * trying to pull the item from the AIL as we add it.
  */
- /*ARGSUSED*/
-int
-_xfs_trans_commit(
-       xfs_trans_t     *tp,
-       uint            flags,
-       int             *log_flushed)
+static void
+xfs_trans_item_committed(
+       struct xfs_log_item     *lip,
+       xfs_lsn_t               commit_lsn,
+       int                     aborted)
 {
-       xfs_log_iovec_t         *log_vector;
-       int                     nvec;
-       xfs_mount_t             *mp;
-       xfs_lsn_t               commit_lsn;
-       /* REFERENCED */
-       int                     error;
-       int                     log_flags;
-       int                     sync;
-#define        XFS_TRANS_LOGVEC_COUNT  16
-       xfs_log_iovec_t         log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
-       struct xlog_in_core     *commit_iclog;
-       int                     shutdown;
+       xfs_lsn_t               item_lsn;
+       struct xfs_ail          *ailp;
 
-       commit_lsn = -1;
+       if (aborted)
+               lip->li_flags |= XFS_LI_ABORTED;
+       item_lsn = IOP_COMMITTED(lip, commit_lsn);
+
+       /* If the committed routine returns -1, item has been freed. */
+       if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
+               return;
 
        /*
-        * Determine whether this commit is releasing a permanent
-        * log reservation or not.
+        * If the returned lsn is greater than what it contained before, update
+        * the location of the item in the AIL.  If it is not, then do nothing.
+        * Items can never move backwards in the AIL.
+        *
+        * While the new lsn should usually be greater, it is possible that a
+        * later transaction completing simultaneously with an earlier one
+        * using the same item could complete first with a higher lsn.  This
+        * would cause the earlier transaction to fail the test below.
         */
-       if (flags & XFS_TRANS_RELEASE_LOG_RES) {
-               ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
-               log_flags = XFS_LOG_REL_PERM_RESERV;
+       ailp = lip->li_ailp;
+       spin_lock(&ailp->xa_lock);
+       if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
+               /*
+                * This will set the item's lsn to item_lsn and update the
+                * position of the item in the AIL.
+                *
+                * xfs_trans_ail_update() drops the AIL lock.
+                */
+               xfs_trans_ail_update(ailp, lip, item_lsn);
        } else {
-               log_flags = 0;
+               spin_unlock(&ailp->xa_lock);
        }
-       mp = tp->t_mountp;
 
        /*
-        * If there is nothing to be logged by the transaction,
-        * then unlock all of the items associated with the
-        * transaction and free the transaction structure.
-        * Also make sure to return any reserved blocks to
-        * the free pool.
+        * Now that we've repositioned the item in the AIL, unpin it so it can
+        * be flushed. Pass information about buffer stale state down from the
+        * log item flags, if anyone else stales the buffer we do not want to
+        * pay any attention to it.
         */
-shut_us_down:
-       shutdown = XFS_FORCED_SHUTDOWN(mp) ? EIO : 0;
-       if (!(tp->t_flags & XFS_TRANS_DIRTY) || shutdown) {
-               xfs_trans_unreserve_and_mod_sb(tp);
+       IOP_UNPIN(lip);
+}
+
+/* Clear all the per-AG busy list items listed in this transaction */
+static void
+xfs_trans_clear_busy_extents(
+       struct xfs_trans        *tp)
+{
+       xfs_log_busy_chunk_t    *lbcp;
+       xfs_log_busy_slot_t     *lbsp;
+       int                     i;
+
+       for (lbcp = &tp->t_busy; lbcp != NULL; lbcp = lbcp->lbc_next) {
+               i = 0;
+               for (lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) {
+                       if (XFS_LBC_ISFREE(lbcp, i))
+                               continue;
+                       xfs_alloc_clear_busy(tp, lbsp->lbc_ag, lbsp->lbc_idx);
+               }
+       }
+       xfs_trans_free_busy(tp);
+}
+
+/*
+ * This is typically called by the LM when a transaction has been fully
+ * committed to disk.  It needs to unpin the items which have
+ * been logged by the transaction and update their positions
+ * in the AIL if necessary.
+ *
+ * This also gets called when the transactions didn't get written out
+ * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
+ */
+STATIC void
+xfs_trans_committed(
+       struct xfs_trans        *tp,
+       int                     abortflag)
+{
+       xfs_log_item_desc_t     *lidp;
+       xfs_log_item_chunk_t    *licp;
+       xfs_log_item_chunk_t    *next_licp;
+
+       /* Call the transaction's completion callback if there is one. */
+       if (tp->t_callback != NULL)
+               tp->t_callback(tp, tp->t_callarg);
+
+       for (lidp = xfs_trans_first_item(tp);
+            lidp != NULL;
+            lidp = xfs_trans_next_item(tp, lidp)) {
+               xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
+       }
+
+       /* free the item chunks, ignoring the embedded chunk */
+       for (licp = tp->t_items.lic_next; licp != NULL; licp = next_licp) {
+               next_licp = licp->lic_next;
+               kmem_free(licp);
+       }
+
+       xfs_trans_clear_busy_extents(tp);
+       xfs_trans_free(tp);
+}
+
+/*
+ * Called from the trans_commit code when we notice that
+ * the filesystem is in the middle of a forced shutdown.
+ */
+STATIC void
+xfs_trans_uncommit(
+       struct xfs_trans        *tp,
+       uint                    flags)
+{
+       xfs_log_item_desc_t     *lidp;
+
+       for (lidp = xfs_trans_first_item(tp);
+            lidp != NULL;
+            lidp = xfs_trans_next_item(tp, lidp)) {
                /*
-                * It is indeed possible for the transaction to be
-                * not dirty but the dqinfo portion to be. All that
-                * means is that we have some (non-persistent) quota
-                * reservations that need to be unreserved.
+                * Unpin all but those that aren't dirty.
                 */
-               xfs_trans_unreserve_and_mod_dquots(tp);
-               if (tp->t_ticket) {
-                       commit_lsn = xfs_log_done(mp, tp->t_ticket,
-                                                       NULL, log_flags);
-                       if (commit_lsn == -1 && !shutdown)
-                               shutdown = XFS_ERROR(EIO);
-               }
-               current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-               xfs_trans_free_items(tp, shutdown? XFS_TRANS_ABORT : 0);
-               xfs_trans_free_busy(tp);
-               xfs_trans_free(tp);
-               XFS_STATS_INC(xs_trans_empty);
-               return (shutdown);
+               if (lidp->lid_flags & XFS_LID_DIRTY)
+                       IOP_UNPIN_REMOVE(lidp->lid_item, tp);
        }
-       ASSERT(tp->t_ticket != NULL);
 
-       /*
-        * If we need to update the superblock, then do it now.
-        */
-       if (tp->t_flags & XFS_TRANS_SB_DIRTY)
-               xfs_trans_apply_sb_deltas(tp);
-       xfs_trans_apply_dquot_deltas(tp);
+       xfs_trans_unreserve_and_mod_sb(tp);
+       xfs_trans_unreserve_and_mod_dquots(tp);
+
+       xfs_trans_free_items(tp, flags);
+       xfs_trans_free_busy(tp);
+       xfs_trans_free(tp);
+}
+
+/*
+ * Format the transaction direct to the iclog. This isolates the physical
+ * transaction commit operation from the logical operation and hence allows
+ * other methods to be introduced without affecting the existing commit path.
+ */
+static int
+xfs_trans_commit_iclog(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       xfs_lsn_t               *commit_lsn,
+       int                     flags)
+{
+       int                     shutdown;
+       int                     error;
+       int                     log_flags = 0;
+       struct xlog_in_core     *commit_iclog;
+#define XFS_TRANS_LOGVEC_COUNT  16
+       struct xfs_log_iovec    log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
+       struct xfs_log_iovec    *log_vector;
+       uint                    nvec;
+
 
        /*
         * Ask each log item how many log_vector entries it will
@@ -861,8 +1047,7 @@ shut_us_down:
         */
        nvec = xfs_trans_count_vecs(tp);
        if (nvec == 0) {
-               xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
-               goto shut_us_down;
+               return ENOMEM;  /* triggers a shutdown! */
        } else if (nvec <= XFS_TRANS_LOGVEC_COUNT) {
                log_vector = log_vector_fast;
        } else {
@@ -877,6 +1062,9 @@ shut_us_down:
         */
        xfs_trans_fill_vecs(tp, log_vector);
 
+       if (flags & XFS_TRANS_RELEASE_LOG_RES)
+               log_flags = XFS_LOG_REL_PERM_RESERV;
+
        error = xfs_log_write(mp, log_vector, nvec, tp->t_ticket, &(tp->t_lsn));
 
        /*
@@ -884,18 +1072,17 @@ shut_us_down:
         * at any time after this call.  However, all the items associated
         * with the transaction are still locked and pinned in memory.
         */
-       commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
+       *commit_lsn = xfs_log_done(mp, tp->t_ticket, &commit_iclog, log_flags);
 
-       tp->t_commit_lsn = commit_lsn;
-       if (nvec > XFS_TRANS_LOGVEC_COUNT) {
+       tp->t_commit_lsn = *commit_lsn;
+       if (nvec > XFS_TRANS_LOGVEC_COUNT)
                kmem_free(log_vector);
-       }
 
        /*
         * If we got a log write error. Unpin the logitems that we
         * had pinned, clean up, free trans structure, and return error.
         */
-       if (error || commit_lsn == -1) {
+       if (error || *commit_lsn == -1) {
                current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
                xfs_trans_uncommit(tp, flags|XFS_TRANS_ABORT);
                return XFS_ERROR(EIO);
@@ -909,8 +1096,6 @@ shut_us_down:
         */
        xfs_trans_unreserve_and_mod_sb(tp);
 
-       sync = tp->t_flags & XFS_TRANS_SYNC;
-
        /*
         * Tell the LM to call the transaction completion routine
         * when the log write with LSN commit_lsn completes (e.g.
@@ -953,7 +1138,7 @@ shut_us_down:
         * the commit lsn of this transaction for dependency tracking
         * purposes.
         */
-       xfs_trans_unlock_items(tp, commit_lsn);
+       xfs_trans_unlock_items(tp, *commit_lsn);
 
        /*
         * If we detected a log error earlier, finish committing
@@ -973,156 +1158,114 @@ shut_us_down:
         * and the items are released we can finally allow the iclog to
         * go to disk.
         */
-       error = xfs_log_release_iclog(mp, commit_iclog);
-
-       /*
-        * If the transaction needs to be synchronous, then force the
-        * log out now and wait for it.
-        */
-       if (sync) {
-               if (!error) {
-                       error = _xfs_log_force_lsn(mp, commit_lsn,
-                                     XFS_LOG_SYNC, log_flushed);
-               }
-               XFS_STATS_INC(xs_trans_sync);
-       } else {
-               XFS_STATS_INC(xs_trans_async);
-       }
-
-       return (error);
+       return xfs_log_release_iclog(mp, commit_iclog);
 }
 
 
 /*
- * Total up the number of log iovecs needed to commit this
- * transaction.  The transaction itself needs one for the
- * transaction header.  Ask each dirty item in turn how many
- * it needs to get the total.
+ * xfs_trans_commit
+ *
+ * Commit the given transaction to the log a/synchronously.
+ *
+ * XFS disk error handling mechanism is not based on a typical
+ * transaction abort mechanism. Logically after the filesystem
+ * gets marked 'SHUTDOWN', we can't let any new transactions
+ * be durable - ie. committed to disk - because some metadata might
+ * be inconsistent. In such cases, this returns an error, and the
+ * caller may assume that all locked objects joined to the transaction
+ * have already been unlocked as if the commit had succeeded.
+ * Do not reference the transaction structure after this call.
  */
-STATIC uint
-xfs_trans_count_vecs(
-       xfs_trans_t     *tp)
+int
+_xfs_trans_commit(
+       struct xfs_trans        *tp,
+       uint                    flags,
+       int                     *log_flushed)
 {
-       int                     nvecs;
-       xfs_log_item_desc_t     *lidp;
+       struct xfs_mount        *mp = tp->t_mountp;
+       xfs_lsn_t               commit_lsn = -1;
+       int                     error = 0;
+       int                     log_flags = 0;
+       int                     sync = tp->t_flags & XFS_TRANS_SYNC;
 
-       nvecs = 1;
-       lidp = xfs_trans_first_item(tp);
-       ASSERT(lidp != NULL);
-
-       /* In the non-debug case we need to start bailing out if we
-        * didn't find a log_item here, return zero and let trans_commit
-        * deal with it.
+       /*
+        * Determine whether this commit is releasing a permanent
+        * log reservation or not.
         */
-       if (lidp == NULL)
-               return 0;
-
-       while (lidp != NULL) {
-               /*
-                * Skip items which aren't dirty in this transaction.
-                */
-               if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
-                       lidp = xfs_trans_next_item(tp, lidp);
-                       continue;
-               }
-               lidp->lid_size = IOP_SIZE(lidp->lid_item);
-               nvecs += lidp->lid_size;
-               lidp = xfs_trans_next_item(tp, lidp);
+       if (flags & XFS_TRANS_RELEASE_LOG_RES) {
+               ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+               log_flags = XFS_LOG_REL_PERM_RESERV;
        }
 
-       return nvecs;
-}
-
-/*
- * Called from the trans_commit code when we notice that
- * the filesystem is in the middle of a forced shutdown.
- */
-STATIC void
-xfs_trans_uncommit(
-       xfs_trans_t     *tp,
-       uint            flags)
-{
-       xfs_log_item_desc_t     *lidp;
+       /*
+        * If there is nothing to be logged by the transaction,
+        * then unlock all of the items associated with the
+        * transaction and free the transaction structure.
+        * Also make sure to return any reserved blocks to
+        * the free pool.
+        */
+       if (!(tp->t_flags & XFS_TRANS_DIRTY))
+               goto out_unreserve;
 
-       for (lidp = xfs_trans_first_item(tp);
-            lidp != NULL;
-            lidp = xfs_trans_next_item(tp, lidp)) {
-               /*
-                * Unpin all but those that aren't dirty.
-                */
-               if (lidp->lid_flags & XFS_LID_DIRTY)
-                       IOP_UNPIN_REMOVE(lidp->lid_item, tp);
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               error = XFS_ERROR(EIO);
+               goto out_unreserve;
        }
 
-       xfs_trans_unreserve_and_mod_sb(tp);
-       xfs_trans_unreserve_and_mod_dquots(tp);
+       ASSERT(tp->t_ticket != NULL);
 
-       xfs_trans_free_items(tp, flags);
-       xfs_trans_free_busy(tp);
-       xfs_trans_free(tp);
-}
+       /*
+        * If we need to update the superblock, then do it now.
+        */
+       if (tp->t_flags & XFS_TRANS_SB_DIRTY)
+               xfs_trans_apply_sb_deltas(tp);
+       xfs_trans_apply_dquot_deltas(tp);
 
-/*
- * Fill in the vector with pointers to data to be logged
- * by this transaction.  The transaction header takes
- * the first vector, and then each dirty item takes the
- * number of vectors it indicated it needed in xfs_trans_count_vecs().
- *
- * As each item fills in the entries it needs, also pin the item
- * so that it cannot be flushed out until the log write completes.
- */
-STATIC void
-xfs_trans_fill_vecs(
-       xfs_trans_t             *tp,
-       xfs_log_iovec_t         *log_vector)
-{
-       xfs_log_item_desc_t     *lidp;
-       xfs_log_iovec_t         *vecp;
-       uint                    nitems;
+       error = xfs_trans_commit_iclog(mp, tp, &commit_lsn, flags);
+       if (error == ENOMEM) {
+               xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
+               error = XFS_ERROR(EIO);
+               goto out_unreserve;
+       }
 
        /*
-        * Skip over the entry for the transaction header, we'll
-        * fill that in at the end.
+        * If the transaction needs to be synchronous, then force the
+        * log out now and wait for it.
         */
-       vecp = log_vector + 1;          /* pointer arithmetic */
-
-       nitems = 0;
-       lidp = xfs_trans_first_item(tp);
-       ASSERT(lidp != NULL);
-       while (lidp != NULL) {
-               /*
-                * Skip items which aren't dirty in this transaction.
-                */
-               if (!(lidp->lid_flags & XFS_LID_DIRTY)) {
-                       lidp = xfs_trans_next_item(tp, lidp);
-                       continue;
-               }
-               /*
-                * The item may be marked dirty but not log anything.
-                * This can be used to get called when a transaction
-                * is committed.
-                */
-               if (lidp->lid_size) {
-                       nitems++;
+       if (sync) {
+               if (!error) {
+                       error = _xfs_log_force_lsn(mp, commit_lsn,
+                                     XFS_LOG_SYNC, log_flushed);
                }
-               IOP_FORMAT(lidp->lid_item, vecp);
-               vecp += lidp->lid_size;         /* pointer arithmetic */
-               IOP_PIN(lidp->lid_item);
-               lidp = xfs_trans_next_item(tp, lidp);
+               XFS_STATS_INC(xs_trans_sync);
+       } else {
+               XFS_STATS_INC(xs_trans_async);
        }
 
+       return error;
+
+out_unreserve:
+       xfs_trans_unreserve_and_mod_sb(tp);
+
        /*
-        * Now that we've counted the number of items in this
-        * transaction, fill in the transaction header.
+        * It is indeed possible for the transaction to be not dirty but
+        * the dqinfo portion to be.  All that means is that we have some
+        * (non-persistent) quota reservations that need to be unreserved.
         */
-       tp->t_header.th_magic = XFS_TRANS_HEADER_MAGIC;
-       tp->t_header.th_type = tp->t_type;
-       tp->t_header.th_num_items = nitems;
-       log_vector->i_addr = (xfs_caddr_t)&tp->t_header;
-       log_vector->i_len = sizeof(xfs_trans_header_t);
-       log_vector->i_type = XLOG_REG_TYPE_TRANSHDR;
-}
+       xfs_trans_unreserve_and_mod_dquots(tp);
+       if (tp->t_ticket) {
+               commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+               if (commit_lsn == -1 && !error)
+                       error = XFS_ERROR(EIO);
+       }
+       current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
+       xfs_trans_free_items(tp, error ? XFS_TRANS_ABORT : 0);
+       xfs_trans_free_busy(tp);
+       xfs_trans_free(tp);
 
+       XFS_STATS_INC(xs_trans_empty);
+       return error;
+}
 
 /*
  * Unlock all of the transaction's items and free the transaction.
@@ -1200,20 +1343,6 @@ xfs_trans_cancel(
        xfs_trans_free(tp);
 }
 
-
-/*
- * Free the transaction structure.  If there is more clean up
- * to do when the structure is freed, add it here.
- */
-STATIC void
-xfs_trans_free(
-       xfs_trans_t     *tp)
-{
-       atomic_dec(&tp->t_mountp->m_active_trans);
-       xfs_trans_free_dqinfo(tp);
-       kmem_zone_free(xfs_trans_zone, tp);
-}
-
 /*
  * Roll from one trans in the sequence of PERMANENT transactions to
  * the next: permanent transactions are only flushed out when
@@ -1283,174 +1412,3 @@ xfs_trans_roll(
        xfs_trans_ihold(trans, dp);
        return 0;
 }
-
-/*
- * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item().
- *
- * This is typically called by the LM when a transaction has been fully
- * committed to disk.  It needs to unpin the items which have
- * been logged by the transaction and update their positions
- * in the AIL if necessary.
- * This also gets called when the transactions didn't get written out
- * because of an I/O error. Abortflag & XFS_LI_ABORTED is set then.
- *
- * Call xfs_trans_chunk_committed() to process the items in
- * each chunk.
- */
-STATIC void
-xfs_trans_committed(
-       xfs_trans_t     *tp,
-       int             abortflag)
-{
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_chunk_t    *next_licp;
-       xfs_log_busy_chunk_t    *lbcp;
-       xfs_log_busy_slot_t     *lbsp;
-       int                     i;
-
-       /*
-        * Call the transaction's completion callback if there
-        * is one.
-        */
-       if (tp->t_callback != NULL) {
-               tp->t_callback(tp, tp->t_callarg);
-       }
-
-       /*
-        * Special case the chunk embedded in the transaction.
-        */
-       licp = &(tp->t_items);
-       if (!(xfs_lic_are_all_free(licp))) {
-               xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
-       }
-
-       /*
-        * Process the items in each chunk in turn.
-        */
-       licp = licp->lic_next;
-       while (licp != NULL) {
-               ASSERT(!xfs_lic_are_all_free(licp));
-               xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
-               next_licp = licp->lic_next;
-               kmem_free(licp);
-               licp = next_licp;
-       }
-
-       /*
-        * Clear all the per-AG busy list items listed in this transaction
-        */
-       lbcp = &tp->t_busy;
-       while (lbcp != NULL) {
-               for (i = 0, lbsp = lbcp->lbc_busy; i < lbcp->lbc_unused; i++, lbsp++) {
-                       if (!XFS_LBC_ISFREE(lbcp, i)) {
-                               xfs_alloc_clear_busy(tp, lbsp->lbc_ag,
-                                                    lbsp->lbc_idx);
-                       }
-               }
-               lbcp = lbcp->lbc_next;
-       }
-       xfs_trans_free_busy(tp);
-
-       /*
-        * That's it for the transaction structure.  Free it.
-        */
-       xfs_trans_free(tp);
-}
-
-/*
- * This is called to perform the commit processing for each
- * item described by the given chunk.
- *
- * The commit processing consists of unlocking items which were
- * held locked with the SYNC_UNLOCK attribute, calling the committed
- * routine of each logged item, updating the item's position in the AIL
- * if necessary, and unpinning each item.  If the committed routine
- * returns -1, then do nothing further with the item because it
- * may have been freed.
- *
- * Since items are unlocked when they are copied to the incore
- * log, it is possible for two transactions to be completing
- * and manipulating the same item simultaneously.  The AIL lock
- * will protect the lsn field of each item.  The value of this
- * field can never go backwards.
- *
- * We unpin the items after repositioning them in the AIL, because
- * otherwise they could be immediately flushed and we'd have to race
- * with the flusher trying to pull the item from the AIL as we add it.
- */
-STATIC void
-xfs_trans_chunk_committed(
-       xfs_log_item_chunk_t    *licp,
-       xfs_lsn_t               lsn,
-       int                     aborted)
-{
-       xfs_log_item_desc_t     *lidp;
-       xfs_log_item_t          *lip;
-       xfs_lsn_t               item_lsn;
-       int                     i;
-
-       lidp = licp->lic_descs;
-       for (i = 0; i < licp->lic_unused; i++, lidp++) {
-               struct xfs_ail          *ailp;
-
-               if (xfs_lic_isfree(licp, i)) {
-                       continue;
-               }
-
-               lip = lidp->lid_item;
-               if (aborted)
-                       lip->li_flags |= XFS_LI_ABORTED;
-
-               /*
-                * Send in the ABORTED flag to the COMMITTED routine
-                * so that it knows whether the transaction was aborted
-                * or not.
-                */
-               item_lsn = IOP_COMMITTED(lip, lsn);
-
-               /*
-                * If the committed routine returns -1, make
-                * no more references to the item.
-                */
-               if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) {
-                       continue;
-               }
-
-               /*
-                * If the returned lsn is greater than what it
-                * contained before, update the location of the
-                * item in the AIL.  If it is not, then do nothing.
-                * Items can never move backwards in the AIL.
-                *
-                * While the new lsn should usually be greater, it
-                * is possible that a later transaction completing
-                * simultaneously with an earlier one using the
-                * same item could complete first with a higher lsn.
-                * This would cause the earlier transaction to fail
-                * the test below.
-                */
-               ailp = lip->li_ailp;
-               spin_lock(&ailp->xa_lock);
-               if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) {
-                       /*
-                        * This will set the item's lsn to item_lsn
-                        * and update the position of the item in
-                        * the AIL.
-                        *
-                        * xfs_trans_ail_update() drops the AIL lock.
-                        */
-                       xfs_trans_ail_update(ailp, lip, item_lsn);
-               } else {
-                       spin_unlock(&ailp->xa_lock);
-               }
-
-               /*
-                * Now that we've repositioned the item in the AIL,
-                * unpin it so it can be flushed. Pass information
-                * about buffer stale state down from the log item
-                * flags, if anyone else stales the buffer we do not
-                * want to pay any attention to it.
-                */
-               IOP_UNPIN(lip, lidp->lid_flags & XFS_LID_BUF_STALE);
-       }
-}
index 79c8bab..c62beee 100644 (file)
@@ -49,6 +49,15 @@ typedef struct xfs_trans_header {
 #define        XFS_LI_DQUOT            0x123d
 #define        XFS_LI_QUOTAOFF         0x123e
 
+#define XFS_LI_TYPE_DESC \
+       { XFS_LI_EFI,           "XFS_LI_EFI" }, \
+       { XFS_LI_EFD,           "XFS_LI_EFD" }, \
+       { XFS_LI_IUNLINK,       "XFS_LI_IUNLINK" }, \
+       { XFS_LI_INODE,         "XFS_LI_INODE" }, \
+       { XFS_LI_BUF,           "XFS_LI_BUF" }, \
+       { XFS_LI_DQUOT,         "XFS_LI_DQUOT" }, \
+       { XFS_LI_QUOTAOFF,      "XFS_LI_QUOTAOFF" }
+
 /*
  * Transaction types.  Used to distinguish types of buffers.
  */
@@ -159,7 +168,6 @@ typedef struct xfs_log_item_desc {
 
 #define XFS_LID_DIRTY          0x1
 #define XFS_LID_PINNED         0x2
-#define XFS_LID_BUF_STALE      0x8
 
 /*
  * This structure is used to maintain a chunk list of log_item_desc
@@ -833,7 +841,7 @@ typedef struct xfs_item_ops {
        uint (*iop_size)(xfs_log_item_t *);
        void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
        void (*iop_pin)(xfs_log_item_t *);
-       void (*iop_unpin)(xfs_log_item_t *, int);
+       void (*iop_unpin)(xfs_log_item_t *);
        void (*iop_unpin_remove)(xfs_log_item_t *, struct xfs_trans *);
        uint (*iop_trylock)(xfs_log_item_t *);
        void (*iop_unlock)(xfs_log_item_t *);
@@ -846,7 +854,7 @@ typedef struct xfs_item_ops {
 #define IOP_SIZE(ip)           (*(ip)->li_ops->iop_size)(ip)
 #define IOP_FORMAT(ip,vp)      (*(ip)->li_ops->iop_format)(ip, vp)
 #define IOP_PIN(ip)            (*(ip)->li_ops->iop_pin)(ip)
-#define IOP_UNPIN(ip, flags)   (*(ip)->li_ops->iop_unpin)(ip, flags)
+#define IOP_UNPIN(ip)          (*(ip)->li_ops->iop_unpin)(ip)
 #define IOP_UNPIN_REMOVE(ip,tp) (*(ip)->li_ops->iop_unpin_remove)(ip, tp)
 #define IOP_TRYLOCK(ip)                (*(ip)->li_ops->iop_trylock)(ip)
 #define IOP_UNLOCK(ip)         (*(ip)->li_ops->iop_unlock)(ip)
index fb58636..9cd8090 100644 (file)
 #include "xfs_rw.h"
 #include "xfs_trace.h"
 
+/*
+ * Check to see if a buffer matching the given parameters is already
+ * a part of the given transaction.
+ */
+STATIC struct xfs_buf *
+xfs_trans_buf_item_match(
+       struct xfs_trans        *tp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             blkno,
+       int                     len)
+{
+       xfs_log_item_chunk_t    *licp;
+       xfs_log_item_desc_t     *lidp;
+       xfs_buf_log_item_t      *blip;
+       int                     i;
 
-STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
-               xfs_daddr_t, int);
-STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *,
-               xfs_daddr_t, int);
+       len = BBTOB(len);
+       for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
+               if (xfs_lic_are_all_free(licp)) {
+                       ASSERT(licp == &tp->t_items);
+                       ASSERT(licp->lic_next == NULL);
+                       return NULL;
+               }
+
+               for (i = 0; i < licp->lic_unused; i++) {
+                       /*
+                        * Skip unoccupied slots.
+                        */
+                       if (xfs_lic_isfree(licp, i))
+                               continue;
+
+                       lidp = xfs_lic_slot(licp, i);
+                       blip = (xfs_buf_log_item_t *)lidp->lid_item;
+                       if (blip->bli_item.li_type != XFS_LI_BUF)
+                               continue;
+
+                       if (XFS_BUF_TARGET(blip->bli_buf) == target &&
+                           XFS_BUF_ADDR(blip->bli_buf) == blkno &&
+                           XFS_BUF_COUNT(blip->bli_buf) == len)
+                               return blip->bli_buf;
+               }
+       }
+
+       return NULL;
+}
 
 /*
  * Add the locked buffer to the transaction.
@@ -112,14 +152,6 @@ xfs_trans_bjoin(
  * within the transaction, just increment its lock recursion count
  * and return a pointer to it.
  *
- * Use the fast path function xfs_trans_buf_item_match() or the buffer
- * cache routine incore_match() to find the buffer
- * if it is already owned by this transaction.
- *
- * If we don't already own the buffer, use get_buf() to get it.
- * If it doesn't yet have an associated xfs_buf_log_item structure,
- * then allocate one and add the item to this transaction.
- *
  * If the transaction pointer is NULL, make this just a normal
  * get_buf() call.
  */
@@ -149,11 +181,7 @@ xfs_trans_get_buf(xfs_trans_t      *tp,
         * have it locked.  In this case we just increment the lock
         * recursion count and return the buffer to the caller.
         */
-       if (tp->t_items.lic_next == NULL) {
-               bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
-       } else {
-               bp  = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len);
-       }
+       bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
        if (bp != NULL) {
                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
                if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
@@ -259,14 +287,6 @@ int        xfs_error_mod = 33;
  * within the transaction and already read in, just increment its
  * lock recursion count and return a pointer to it.
  *
- * Use the fast path function xfs_trans_buf_item_match() or the buffer
- * cache routine incore_match() to find the buffer
- * if it is already owned by this transaction.
- *
- * If we don't already own the buffer, use read_buf() to get it.
- * If it doesn't yet have an associated xfs_buf_log_item structure,
- * then allocate one and add the item to this transaction.
- *
  * If the transaction pointer is NULL, make this just a normal
  * read_buf() call.
  */
@@ -328,11 +348,7 @@ xfs_trans_read_buf(
         * If the buffer is not yet read in, then we read it in, increment
         * the lock recursion count, and return it to the caller.
         */
-       if (tp->t_items.lic_next == NULL) {
-               bp = xfs_trans_buf_item_match(tp, target, blkno, len);
-       } else {
-               bp = xfs_trans_buf_item_match_all(tp, target, blkno, len);
-       }
+       bp = xfs_trans_buf_item_match(tp, target, blkno, len);
        if (bp != NULL) {
                ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
                ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
@@ -696,7 +712,6 @@ xfs_trans_log_buf(xfs_trans_t       *tp,
 
        tp->t_flags |= XFS_TRANS_DIRTY;
        lidp->lid_flags |= XFS_LID_DIRTY;
-       lidp->lid_flags &= ~XFS_LID_BUF_STALE;
        bip->bli_flags |= XFS_BLI_LOGGED;
        xfs_buf_item_log(bip, first, last);
 }
@@ -782,7 +797,7 @@ xfs_trans_binval(
        bip->bli_format.blf_flags |= XFS_BLI_CANCEL;
        memset((char *)(bip->bli_format.blf_data_map), 0,
              (bip->bli_format.blf_map_size * sizeof(uint)));
-       lidp->lid_flags |= XFS_LID_DIRTY|XFS_LID_BUF_STALE;
+       lidp->lid_flags |= XFS_LID_DIRTY;
        tp->t_flags |= XFS_TRANS_DIRTY;
 }
 
@@ -902,111 +917,3 @@ xfs_trans_dquot_buf(
 
        bip->bli_format.blf_flags |= type;
 }
-
-/*
- * Check to see if a buffer matching the given parameters is already
- * a part of the given transaction.  Only check the first, embedded
- * chunk, since we don't want to spend all day scanning large transactions.
- */
-STATIC xfs_buf_t *
-xfs_trans_buf_item_match(
-       xfs_trans_t     *tp,
-       xfs_buftarg_t   *target,
-       xfs_daddr_t     blkno,
-       int             len)
-{
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_desc_t     *lidp;
-       xfs_buf_log_item_t      *blip;
-       xfs_buf_t               *bp;
-       int                     i;
-
-       bp = NULL;
-       len = BBTOB(len);
-       licp = &tp->t_items;
-       if (!xfs_lic_are_all_free(licp)) {
-               for (i = 0; i < licp->lic_unused; i++) {
-                       /*
-                        * Skip unoccupied slots.
-                        */
-                       if (xfs_lic_isfree(licp, i)) {
-                               continue;
-                       }
-
-                       lidp = xfs_lic_slot(licp, i);
-                       blip = (xfs_buf_log_item_t *)lidp->lid_item;
-                       if (blip->bli_item.li_type != XFS_LI_BUF) {
-                               continue;
-                       }
-
-                       bp = blip->bli_buf;
-                       if ((XFS_BUF_TARGET(bp) == target) &&
-                           (XFS_BUF_ADDR(bp) == blkno) &&
-                           (XFS_BUF_COUNT(bp) == len)) {
-                               /*
-                                * We found it.  Break out and
-                                * return the pointer to the buffer.
-                                */
-                               break;
-                       } else {
-                               bp = NULL;
-                       }
-               }
-       }
-       return bp;
-}
-
-/*
- * Check to see if a buffer matching the given parameters is already
- * a part of the given transaction.  Check all the chunks, we
- * want to be thorough.
- */
-STATIC xfs_buf_t *
-xfs_trans_buf_item_match_all(
-       xfs_trans_t     *tp,
-       xfs_buftarg_t   *target,
-       xfs_daddr_t     blkno,
-       int             len)
-{
-       xfs_log_item_chunk_t    *licp;
-       xfs_log_item_desc_t     *lidp;
-       xfs_buf_log_item_t      *blip;
-       xfs_buf_t               *bp;
-       int                     i;
-
-       bp = NULL;
-       len = BBTOB(len);
-       for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) {
-               if (xfs_lic_are_all_free(licp)) {
-                       ASSERT(licp == &tp->t_items);
-                       ASSERT(licp->lic_next == NULL);
-                       return NULL;
-               }
-               for (i = 0; i < licp->lic_unused; i++) {
-                       /*
-                        * Skip unoccupied slots.
-                        */
-                       if (xfs_lic_isfree(licp, i)) {
-                               continue;
-                       }
-
-                       lidp = xfs_lic_slot(licp, i);
-                       blip = (xfs_buf_log_item_t *)lidp->lid_item;
-                       if (blip->bli_item.li_type != XFS_LI_BUF) {
-                               continue;
-                       }
-
-                       bp = blip->bli_buf;
-                       if ((XFS_BUF_TARGET(bp) == target) &&
-                           (XFS_BUF_ADDR(bp) == blkno) &&
-                           (XFS_BUF_COUNT(bp) == len)) {
-                               /*
-                                * We found it.  Break out and
-                                * return the pointer to the buffer.
-                                */
-                               return bp;
-                       }
-               }
-       }
-       return NULL;
-}