Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c

index f3ccaec..ba53128 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -188,8 +188,8 @@ _xfs_buf_initialize(
         atomic_set(&bp->b_hold, 1);
         init_completion(&bp->b_iowait);
         INIT_LIST_HEAD(&bp->b_list);
-       INIT_LIST_HEAD(&bp->b_hash_list);
-       init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+       RB_CLEAR_NODE(&bp->b_rbnode);
+       sema_init(&bp->b_sema, 0); /* held, no waiters */
         XB_SET_OWNER(bp);
         bp->b_target = target;
         bp->b_file_offset = range_base;
@@ -262,8 +262,6 @@ xfs_buf_free(
  {
         trace_xfs_buf_free(bp, _RET_IP_);
  
-       ASSERT(list_empty(&bp->b_hash_list));
-
         if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
                 uint            i;
  
@@ -422,8 +420,10 @@ _xfs_buf_find(
  {
         xfs_off_t               range_base;
         size_t                  range_length;
-       xfs_bufhash_t           *hash;
-       xfs_buf_t               *bp, *n;
+       struct xfs_perag        *pag;
+       struct rb_node          **rbp;
+       struct rb_node          *parent;
+       xfs_buf_t               *bp;
  
         range_base = (ioff << BBSHIFT);
         range_length = (isize << BBSHIFT);
@@ -432,14 +432,37 @@ _xfs_buf_find(
         ASSERT(!(range_length < (1 << btp->bt_sshift)));
         ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
  
-       hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
-
-       spin_lock(&hash->bh_lock);
-
-       list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-               ASSERT(btp == bp->b_target);
-               if (bp->b_file_offset == range_base &&
-                   bp->b_buffer_length == range_length) {
+       /* get tree root */
+       pag = xfs_perag_get(btp->bt_mount,
+                               xfs_daddr_to_agno(btp->bt_mount, ioff));
+
+       /* walk tree */
+       spin_lock(&pag->pag_buf_lock);
+       rbp = &pag->pag_buf_tree.rb_node;
+       parent = NULL;
+       bp = NULL;
+       while (*rbp) {
+               parent = *rbp;
+               bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+
+               if (range_base < bp->b_file_offset)
+                       rbp = &(*rbp)->rb_left;
+               else if (range_base > bp->b_file_offset)
+                       rbp = &(*rbp)->rb_right;
+               else {
+                       /*
+                        * found a block offset match. If the range doesn't
+                        * match, the only way this is allowed is if the buffer
+                        * in the cache is stale and the transaction that made
+                        * it stale has not yet committed. i.e. we are
+                        * reallocating a busy extent. Skip this buffer and
+                        * continue searching to the right for an exact match.
+                        */
+                       if (bp->b_buffer_length != range_length) {
+                               ASSERT(bp->b_flags & XBF_STALE);
+                               rbp = &(*rbp)->rb_right;
+                               continue;
+                       }
                         atomic_inc(&bp->b_hold);
                         goto found;
                 }
@@ -449,17 +472,21 @@ _xfs_buf_find(
         if (new_bp) {
                 _xfs_buf_initialize(new_bp, btp, range_base,
                                 range_length, flags);
-               new_bp->b_hash = hash;
-               list_add(&new_bp->b_hash_list, &hash->bh_list);
+               rb_link_node(&new_bp->b_rbnode, parent, rbp);
+               rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+               /* the buffer keeps the perag reference until it is freed */
+               new_bp->b_pag = pag;
+               spin_unlock(&pag->pag_buf_lock);
         } else {
                 XFS_STATS_INC(xb_miss_locked);
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
         }
-
-       spin_unlock(&hash->bh_lock);
         return new_bp;
  
  found:
-       spin_unlock(&hash->bh_lock);
+       spin_unlock(&pag->pag_buf_lock);
+       xfs_perag_put(pag);
  
         /* Attempt to get the semaphore without sleeping,
          * if this does not work then we need to drop the
@@ -625,8 +652,7 @@ void
  xfs_buf_readahead(
         xfs_buftarg_t           *target,
         xfs_off_t               ioff,
-       size_t                  isize,
-       xfs_buf_flags_t         flags)
+       size_t                  isize)
  {
         struct backing_dev_info *bdi;
  
@@ -634,8 +660,42 @@ xfs_buf_readahead(
         if (bdi_read_congested(bdi))
                 return;
  
-       flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
-       xfs_buf_read(target, ioff, isize, flags);
+       xfs_buf_read(target, ioff, isize,
+                    XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+       struct xfs_mount        *mp,
+       struct xfs_buftarg      *target,
+       xfs_daddr_t             daddr,
+       size_t                  length,
+       int                     flags)
+{
+       xfs_buf_t               *bp;
+       int                     error;
+
+       bp = xfs_buf_get_uncached(target, length, flags);
+       if (!bp)
+               return NULL;
+
+       /* set up the buffer for a read IO */
+       xfs_buf_lock(bp);
+       XFS_BUF_SET_ADDR(bp, daddr);
+       XFS_BUF_READ(bp);
+       XFS_BUF_BUSY(bp);
+
+       xfsbdstrat(mp, bp);
+       error = xfs_buf_iowait(bp);
+       if (error || bp->b_error) {
+               xfs_buf_relse(bp);
+               return NULL;
+       }
+       return bp;
  }
  
  xfs_buf_t *
@@ -707,9 +767,10 @@ xfs_buf_associate_memory(
  }
  
  xfs_buf_t *
-xfs_buf_get_noaddr(
+xfs_buf_get_uncached(
+       struct xfs_buftarg      *target,
         size_t                  len,
-       xfs_buftarg_t           *target)
+       int                     flags)
  {
         unsigned long           page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
         int                     error, i;
@@ -725,7 +786,7 @@ xfs_buf_get_noaddr(
                 goto fail_free_buf;
  
         for (i = 0; i < page_count; i++) {
-               bp->b_pages[i] = alloc_page(GFP_KERNEL);
+               bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
                 if (!bp->b_pages[i])
                         goto fail_free_mem;
         }
@@ -740,7 +801,7 @@ xfs_buf_get_noaddr(
  
         xfs_buf_unlock(bp);
  
-       trace_xfs_buf_get_noaddr(bp, _RET_IP_);
+       trace_xfs_buf_get_uncached(bp, _RET_IP_);
         return bp;
  
   fail_free_mem:
@@ -774,29 +835,30 @@ void
  xfs_buf_rele(
         xfs_buf_t               *bp)
  {
-       xfs_bufhash_t           *hash = bp->b_hash;
+       struct xfs_perag        *pag = bp->b_pag;
  
         trace_xfs_buf_rele(bp, _RET_IP_);
  
-       if (unlikely(!hash)) {
+       if (!pag) {
                 ASSERT(!bp->b_relse);
+               ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
                 if (atomic_dec_and_test(&bp->b_hold))
                         xfs_buf_free(bp);
                 return;
         }
  
+       ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
         ASSERT(atomic_read(&bp->b_hold) > 0);
-       if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
+       if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
                 if (bp->b_relse) {
                         atomic_inc(&bp->b_hold);
-                       spin_unlock(&hash->bh_lock);
-                       (*(bp->b_relse)) (bp);
-               } else if (bp->b_flags & XBF_FS_MANAGED) {
-                       spin_unlock(&hash->bh_lock);
+                       spin_unlock(&pag->pag_buf_lock);
+                       bp->b_relse(bp);
                 } else {
                         ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
-                       list_del_init(&bp->b_hash_list);
-                       spin_unlock(&hash->bh_lock);
+                       rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+                       spin_unlock(&pag->pag_buf_lock);
+                       xfs_perag_put(pag);
                         xfs_buf_free(bp);
                 }
         }
@@ -859,7 +921,7 @@ xfs_buf_lock(
         trace_xfs_buf_lock(bp, _RET_IP_);
  
         if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
-               xfs_log_force(bp->b_mount, 0);
+               xfs_log_force(bp->b_target->bt_mount, 0);
         if (atomic_read(&bp->b_io_remaining))
                 blk_run_address_space(bp->b_target->bt_mapping);
         down(&bp->b_sema);
@@ -970,7 +1032,6 @@ xfs_bwrite(
  {
         int                     error;
  
-       bp->b_mount = mp;
         bp->b_flags |= XBF_WRITE;
         bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
  
@@ -991,8 +1052,6 @@ xfs_bdwrite(
  {
         trace_xfs_buf_bdwrite(bp, _RET_IP_);
  
-       bp->b_mount = mp;
-
         bp->b_flags &= ~XBF_READ;
         bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
  
@@ -1001,7 +1060,7 @@ xfs_bdwrite(
  
  /*
   * Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call biodone
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
   * so that the proper iodone callbacks get called.
   */
  STATIC int
@@ -1018,21 +1077,21 @@ xfs_bioerror(
         XFS_BUF_ERROR(bp, EIO);
  
         /*
-        * We're calling biodone, so delete XBF_DONE flag.
+        * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
          */
         XFS_BUF_UNREAD(bp);
         XFS_BUF_UNDELAYWRITE(bp);
         XFS_BUF_UNDONE(bp);
         XFS_BUF_STALE(bp);
  
-       xfs_biodone(bp);
+       xfs_buf_ioend(bp, 0);
  
         return EIO;
  }
  
  /*
   * Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the biodone call.
+ * here ourselves, and avoiding the xfs_buf_ioend call.
   * This is meant for userdata errors; metadata bufs come with
   * iodone functions attached, so that we can track down errors.
   */
@@ -1081,7 +1140,7 @@ int
  xfs_bdstrat_cb(
         struct xfs_buf  *bp)
  {
-       if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+       if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
                 trace_xfs_bdstrat_shut(bp, _RET_IP_);
                 /*
                  * Metadata write that didn't get logged but
@@ -1387,62 +1446,24 @@ xfs_buf_iomove(
   */
  void
  xfs_wait_buftarg(
-       xfs_buftarg_t   *btp)
-{
-       xfs_buf_t       *bp, *n;
-       xfs_bufhash_t   *hash;
-       uint            i;
-
-       for (i = 0; i < (1 << btp->bt_hashshift); i++) {
-               hash = &btp->bt_hash[i];
-again:
-               spin_lock(&hash->bh_lock);
-               list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
-                       ASSERT(btp == bp->b_target);
-                       if (!(bp->b_flags & XBF_FS_MANAGED)) {
-                               spin_unlock(&hash->bh_lock);
-                               /*
-                                * Catch superblock reference count leaks
-                                * immediately
-                                */
-                               BUG_ON(bp->b_bn == 0);
-                               delay(100);
-                               goto again;
-                       }
-               }
-               spin_unlock(&hash->bh_lock);
-       }
-}
-
-/*
- *     Allocate buffer hash table for a given target.
- *     For devices containing metadata (i.e. not the log/realtime devices)
- *     we need to allocate a much larger hash table.
- */
-STATIC void
-xfs_alloc_bufhash(
-       xfs_buftarg_t           *btp,
-       int                     external)
+       struct xfs_buftarg      *btp)
  {
-       unsigned int            i;
+       struct xfs_perag        *pag;
+       uint                    i;
  
-       btp->bt_hashshift = external ? 3 : 12;  /* 8 or 4096 buckets */
-       btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
-                                        sizeof(xfs_bufhash_t));
-       for (i = 0; i < (1 << btp->bt_hashshift); i++) {
-               spin_lock_init(&btp->bt_hash[i].bh_lock);
-               INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+       for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
+               pag = xfs_perag_get(btp->bt_mount, i);
+               spin_lock(&pag->pag_buf_lock);
+               while (rb_first(&pag->pag_buf_tree)) {
+                       spin_unlock(&pag->pag_buf_lock);
+                       delay(100);
+                       spin_lock(&pag->pag_buf_lock);
+               }
+               spin_unlock(&pag->pag_buf_lock);
+               xfs_perag_put(pag);
         }
  }
  
-STATIC void
-xfs_free_bufhash(
-       xfs_buftarg_t           *btp)
-{
-       kmem_free_large(btp->bt_hash);
-       btp->bt_hash = NULL;
-}
-
  /*
   *     buftarg list for delwrite queue processing
   */
@@ -1475,7 +1496,6 @@ xfs_free_buftarg(
         xfs_flush_buftarg(btp, 1);
         if (mp->m_flags & XFS_MOUNT_BARRIER)
                 xfs_blkdev_issue_flush(btp);
-       xfs_free_bufhash(btp);
         iput(btp->bt_mapping->host);
  
         /* Unregister the buftarg first so that we don't get a
@@ -1597,6 +1617,7 @@ out_error:
  
  xfs_buftarg_t *
  xfs_alloc_buftarg(
+       struct xfs_mount        *mp,
         struct block_device     *bdev,
         int                     external,
         const char              *fsname)
@@ -1605,6 +1626,7 @@ xfs_alloc_buftarg(
  
         btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
  
+       btp->bt_mount = mp;
         btp->bt_dev =  bdev->bd_dev;
         btp->bt_bdev = bdev;
         if (xfs_setsize_buftarg_early(btp, bdev))
@@ -1613,7 +1635,6 @@ xfs_alloc_buftarg(
                 goto error;
         if (xfs_alloc_delwrite_queue(btp, fsname))
                 goto error;
-       xfs_alloc_bufhash(btp, external);
         return btp;
  
  error:
@@ -1904,7 +1925,7 @@ xfs_flush_buftarg(
                         bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
  
                         list_del_init(&bp->b_list);
-                       xfs_iowait(bp);
+                       xfs_buf_iowait(bp);
                         xfs_buf_relse(bp);
                 }
         }
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h

index 9d021c7..383a3f3 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -51,7 +51,6 @@ typedef enum {
  #define XBF_DONE       (1 << 5) /* all pages in the buffer uptodate */
  #define XBF_DELWRI     (1 << 6) /* buffer has dirty pages */
  #define XBF_STALE      (1 << 7) /* buffer has been staled, do not find it */
-#define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */
  #define XBF_ORDERED    (1 << 11)/* use ordered writes */
  #define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
  #define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
@@ -96,7 +95,6 @@ typedef unsigned int xfs_buf_flags_t;
         { XBF_DONE,             "DONE" }, \
         { XBF_DELWRI,           "DELWRI" }, \
         { XBF_STALE,            "STALE" }, \
-       { XBF_FS_MANAGED,       "FS_MANAGED" }, \
         { XBF_ORDERED,          "ORDERED" }, \
         { XBF_READ_AHEAD,       "READ_AHEAD" }, \
         { XBF_LOCK,             "LOCK" },       /* should never be set */\
@@ -123,14 +121,11 @@ typedef struct xfs_buftarg {
         dev_t                   bt_dev;
         struct block_device     *bt_bdev;
         struct address_space    *bt_mapping;
+       struct xfs_mount        *bt_mount;
         unsigned int            bt_bsize;
         unsigned int            bt_sshift;
         size_t                  bt_smask;
  
-       /* per device buffer hash table */
-       uint                    bt_hashshift;
-       xfs_bufhash_t           *bt_hash;
-
         /* per device delwri queue */
         struct task_struct      *bt_task;
         struct list_head        bt_list;
@@ -158,34 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
  #define XB_PAGES       2
  
  typedef struct xfs_buf {
+       /*
+        * first cacheline holds all the fields needed for an uncontended cache
+        * hit to be fully processed. The semaphore straddles the cacheline
+        * boundary, but the counter and lock sits on the first cacheline,
+        * which is the only bit that is touched if we hit the semaphore
+        * fast-path on locking.
+        */
+       struct rb_node          b_rbnode;       /* rbtree node */
+       xfs_off_t               b_file_offset;  /* offset in file */
+       size_t                  b_buffer_length;/* size of buffer in bytes */
+       atomic_t                b_hold;         /* reference count */
+       xfs_buf_flags_t         b_flags;        /* status flags */
         struct semaphore        b_sema;         /* semaphore for lockables */
-       unsigned long           b_queuetime;    /* time buffer was queued */
-       atomic_t                b_pin_count;    /* pin count */
+
         wait_queue_head_t       b_waiters;      /* unpin waiters */
         struct list_head        b_list;
-       xfs_buf_flags_t         b_flags;        /* status flags */
-       struct list_head        b_hash_list;    /* hash table list */
-       xfs_bufhash_t           *b_hash;        /* hash table list start */
+       struct xfs_perag        *b_pag;         /* contains rbtree root */
         xfs_buftarg_t           *b_target;      /* buffer target (device) */
-       atomic_t                b_hold;         /* reference count */
         xfs_daddr_t             b_bn;           /* block number for I/O */
-       xfs_off_t               b_file_offset;  /* offset in file */
-       size_t                  b_buffer_length;/* size of buffer in bytes */
         size_t                  b_count_desired;/* desired transfer size */
         void                    *b_addr;        /* virtual address of buffer */
         struct work_struct      b_iodone_work;
-       atomic_t                b_io_remaining; /* #outstanding I/O requests */
         xfs_buf_iodone_t        b_iodone;       /* I/O completion function */
         xfs_buf_relse_t         b_relse;        /* releasing function */
         struct completion       b_iowait;       /* queue for I/O waiters */
         void                    *b_fspriv;
         void                    *b_fspriv2;
-       struct xfs_mount        *b_mount;
-       unsigned short          b_error;        /* error code on I/O */
-       unsigned int            b_page_count;   /* size of page array */
-       unsigned int            b_offset;       /* page offset in first page */
         struct page             **b_pages;      /* array of page pointers */
         struct page             *b_page_array[XB_PAGES]; /* inline pages */
+       unsigned long           b_queuetime;    /* time buffer was queued */
+       atomic_t                b_pin_count;    /* pin count */
+       atomic_t                b_io_remaining; /* #outstanding I/O requests */
+       unsigned int            b_page_count;   /* size of page array */
+       unsigned int            b_offset;       /* page offset in first page */
+       unsigned short          b_error;        /* error code on I/O */
  #ifdef XFS_BUF_LOCK_TRACKING
         int                     b_last_holder;
  #endif
@@ -204,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
                                 xfs_buf_flags_t);
  
  extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
  extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
  extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
-                               xfs_buf_flags_t);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+                               struct xfs_buftarg *target,
+                               xfs_daddr_t daddr, size_t length, int flags);
  
  /* Releasing Buffers */
  extern void xfs_buf_free(xfs_buf_t *);
@@ -233,6 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *);
  extern int xfs_buf_iowait(xfs_buf_t *);
  extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
                                 xfs_buf_rw_t);
+#define xfs_buf_zero(bp, off, len) \
+           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
  
  static inline int xfs_buf_geterror(xfs_buf_t *bp)
  {
@@ -267,8 +273,6 @@ extern void xfs_buf_terminate(void);
                                         XFS_BUF_DONE(bp);       \
                                 } while (0)
  
-#define XFS_BUF_UNMANAGE(bp)   ((bp)->b_flags &= ~XBF_FS_MANAGED)
-
  #define XFS_BUF_DELAYWRITE(bp)         ((bp)->b_flags |= XBF_DELWRI)
  #define XFS_BUF_UNDELAYWRITE(bp)       xfs_buf_delwri_dequeue(bp)
  #define XFS_BUF_ISDELAYWRITE(bp)       ((bp)->b_flags & XBF_DELWRI)
@@ -347,25 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
         xfs_buf_rele(bp);
  }
  
-#define xfs_biodone(bp)                xfs_buf_ioend(bp, 0)
-
-#define xfs_biomove(bp, off, len, data, rw) \
-           xfs_buf_iomove((bp), (off), (len), (data), \
-               ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
-
-#define xfs_biozero(bp, off, len) \
-           xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-#define xfs_iowait(bp) xfs_buf_iowait(bp)
-
-#define xfs_baread(target, rablkno, ralen)  \
-       xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
-
-
  /*
   *     Handling of buftargs.
   */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+                       struct block_device *, int, const char *);
  extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
  extern void xfs_wait_buftarg(xfs_buftarg_t *);
  extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h

deleted file mode 100644 (file)

index 55bddf3..0000000
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_CRED_H__
-#define __XFS_CRED_H__
-
-#include <linux/capability.h>
-
-/*
- * Credentials
- */
-typedef const struct cred cred_t;
-
-#endif  /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c

index 1f279b0..ed88ed1 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -32,10 +32,9 @@ xfs_tosspages(
         xfs_off_t       last,
         int             fiopt)
  {
-       struct address_space *mapping = VFS_I(ip)->i_mapping;
-
-       if (mapping->nrpages)
-               truncate_inode_pages(mapping, first);
+       /* can't toss partial tail pages, so mask them out */
+       last &= ~(PAGE_SIZE - 1);
+       truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
  }
  
  int
@@ -50,12 +49,11 @@ xfs_flushinval_pages(
  
         trace_xfs_pagecache_inval(ip, first, last);
  
-       if (mapping->nrpages) {
-               xfs_iflags_clear(ip, XFS_ITRUNCATED);
-               ret = filemap_write_and_wait(mapping);
-               if (!ret)
-                       truncate_inode_pages(mapping, first);
-       }
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+       ret = filemap_write_and_wait_range(mapping, first,
+                               last == -1 ? LLONG_MAX : last);
+       if (!ret)
+               truncate_inode_pages_range(mapping, first, last);
         return -ret;
  }
  
@@ -71,10 +69,9 @@ xfs_flush_pages(
         int             ret = 0;
         int             ret2;
  
-       if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
-               xfs_iflags_clear(ip, XFS_ITRUNCATED);
-               ret = -filemap_fdatawrite(mapping);
-       }
+       xfs_iflags_clear(ip, XFS_ITRUNCATED);
+       ret = -filemap_fdatawrite_range(mapping, first,
+                               last == -1 ? LLONG_MAX : last);
         if (flags & XBF_ASYNC)
                 return ret;
         ret2 = xfs_wait_on_pages(ip, first, last);
@@ -91,7 +88,9 @@ xfs_wait_on_pages(
  {
         struct address_space *mapping = VFS_I(ip)->i_mapping;
  
-       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
-               return -filemap_fdatawait(mapping);
+       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+               return -filemap_fdatawait_range(mapping, first,
+                                       last == -1 ? ip->i_size - 1 : last);
+       }
         return 0;
  }
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c

index 2ae8b1c..76e81cf 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -16,7 +16,6 @@
   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
   */
  #include "xfs.h"
-#include "xfs_cred.h"
  #include "xfs_sysctl.h"
  
  /*
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h

deleted file mode 100644 (file)

index 69f71ca..0000000
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_GLOBALS_H__
-#define __XFS_GLOBALS_H__
-
-extern uint64_t        xfs_panic_mask;         /* set to cause more panics */
-
-#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c

index 3b9e626..2ea238f 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -790,7 +790,7 @@ xfs_ioc_fsgetxattr(
         xfs_ilock(ip, XFS_ILOCK_SHARED);
         fa.fsx_xflags = xfs_ip2xflags(ip);
         fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
-       fa.fsx_projid = ip->i_d.di_projid;
+       fa.fsx_projid = xfs_get_projid(ip);
  
         if (attr) {
                 if (ip->i_afp) {
@@ -909,10 +909,10 @@ xfs_ioctl_setattr(
                 return XFS_ERROR(EIO);
  
         /*
-        * Disallow 32bit project ids because on-disk structure
-        * is 16bit only.
+        * Disallow 32bit project ids when projid32bit feature is not enabled.
          */
-       if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1))
+       if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+                       !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
                 return XFS_ERROR(EINVAL);
  
         /*
@@ -961,7 +961,7 @@ xfs_ioctl_setattr(
         if (mask & FSX_PROJID) {
                 if (XFS_IS_QUOTA_RUNNING(mp) &&
                     XFS_IS_PQUOTA_ON(mp) &&
-                   ip->i_d.di_projid != fa->fsx_projid) {
+                   xfs_get_projid(ip) != fa->fsx_projid) {
                         ASSERT(tp);
                         code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
                                                 capable(CAP_FOWNER) ?
@@ -1063,12 +1063,12 @@ xfs_ioctl_setattr(
                  * Change the ownerships and register quota modifications
                  * in the transaction.
                  */
-               if (ip->i_d.di_projid != fa->fsx_projid) {
+               if (xfs_get_projid(ip) != fa->fsx_projid) {
                         if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
                                 olddquot = xfs_qm_vop_chown(tp, ip,
                                                         &ip->i_gdquot, gdqp);
                         }
-                       ip->i_d.di_projid = fa->fsx_projid;
+                       xfs_set_projid(ip, fa->fsx_projid);
  
                         /*
                          * We may have to rev the inode as well as
@@ -1088,8 +1088,8 @@ xfs_ioctl_setattr(
                 xfs_diflags_to_linux(ip);
         }
  
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
  
         XFS_STATS_INC(xs_ig_attrchg);
  
@@ -1301,7 +1301,8 @@ xfs_file_ioctl(
         case XFS_IOC_ALLOCSP64:
         case XFS_IOC_FREESP64:
         case XFS_IOC_RESVSP64:
-       case XFS_IOC_UNRESVSP64: {
+       case XFS_IOC_UNRESVSP64:
+       case XFS_IOC_ZERO_RANGE: {
                 xfs_flock64_t           bf;
  
                 if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c

index 6c83f7f..b3486df 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -164,7 +164,8 @@ xfs_ioctl32_bstat_copyin(
             get_user(bstat->bs_extsize, &bstat32->bs_extsize)   ||
             get_user(bstat->bs_extents, &bstat32->bs_extents)   ||
             get_user(bstat->bs_gen,     &bstat32->bs_gen)       ||
-           get_user(bstat->bs_projid,  &bstat32->bs_projid)    ||
+           get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+           get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
             get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
             get_user(bstat->bs_dmstate, &bstat32->bs_dmstate)   ||
             get_user(bstat->bs_aextents, &bstat32->bs_aextents))
@@ -218,6 +219,7 @@ xfs_bulkstat_one_fmt_compat(
             put_user(buffer->bs_extents,  &p32->bs_extents)     ||
             put_user(buffer->bs_gen,      &p32->bs_gen)         ||
             put_user(buffer->bs_projid,   &p32->bs_projid)      ||
+           put_user(buffer->bs_projid_hi,      &p32->bs_projid_hi)     ||
             put_user(buffer->bs_dmevmask, &p32->bs_dmevmask)    ||
             put_user(buffer->bs_dmstate,  &p32->bs_dmstate)     ||
             put_user(buffer->bs_aextents, &p32->bs_aextents))
@@ -574,6 +576,7 @@ xfs_file_compat_ioctl(
         case XFS_IOC_FSGEOMETRY_V1:
         case XFS_IOC_FSGROWFSDATA:
         case XFS_IOC_FSGROWFSRT:
+       case XFS_IOC_ZERO_RANGE:
                 return xfs_file_ioctl(filp, cmd, p);
  #else
         case XFS_IOC_ALLOCSP_32:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h

index 1024c4f..08b6057 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat {
         __s32           bs_extsize;     /* extent size                  */
         __s32           bs_extents;     /* number of extents            */
         __u32           bs_gen;         /* generation count             */
-       __u16           bs_projid;      /* project id                   */
-       unsigned char   bs_pad[14];     /* pad space, unused            */
+       __u16           bs_projid_lo;   /* lower part of project id     */
+#define        bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
+       __u16           bs_projid_hi;   /* high part of project id      */
+       unsigned char   bs_pad[12];     /* pad space, unused            */
         __u32           bs_dmevmask;    /* DMIG event mask              */
         __u16           bs_dmstate;     /* DMIG state info              */
         __u16           bs_aextents;    /* attribute number of extents  */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c

index b1fc2a6..ec858e0 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -94,41 +94,6 @@ xfs_mark_inode_dirty(
                 mark_inode_dirty(inode);
  }
  
-/*
- * Change the requested timestamp in the given inode.
- * We don't lock across timestamp updates, and we don't log them but
- * we do record the fact that there is dirty information in core.
- */
-void
-xfs_ichgtime(
-       xfs_inode_t     *ip,
-       int             flags)
-{
-       struct inode    *inode = VFS_I(ip);
-       timespec_t      tv;
-       int             sync_it = 0;
-
-       tv = current_fs_time(inode->i_sb);
-
-       if ((flags & XFS_ICHGTIME_MOD) &&
-           !timespec_equal(&inode->i_mtime, &tv)) {
-               inode->i_mtime = tv;
-               sync_it = 1;
-       }
-       if ((flags & XFS_ICHGTIME_CHG) &&
-           !timespec_equal(&inode->i_ctime, &tv)) {
-               inode->i_ctime = tv;
-               sync_it = 1;
-       }
-
-       /*
-        * Update complete - now make sure everyone knows that the inode
-        * is dirty.
-        */
-       if (sync_it)
-               xfs_mark_inode_dirty_sync(ip);
-}
-
  /*
   * Hook in SELinux.  This is not quite correct yet, what we really need
   * here (as we do for default ACLs) is a mechanism by which creation of
@@ -224,7 +189,7 @@ xfs_vn_mknod(
         }
  
         xfs_dentry_to_name(&name, dentry);
-       error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
+       error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
         if (unlikely(error))
                 goto out_free_acl;
  
@@ -397,7 +362,7 @@ xfs_vn_symlink(
                 (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
         xfs_dentry_to_name(&name, dentry);
  
-       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL);
+       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
         if (unlikely(error))
                 goto out;
  
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h

index 2fa0bd9..214ddd7 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -71,6 +71,7 @@
  #include <linux/random.h>
  #include <linux/ctype.h>
  #include <linux/writeback.h>
+#include <linux/capability.h>
  
  #include <asm/page.h>
  #include <asm/div64.h>
@@ -79,14 +80,12 @@
  #include <asm/byteorder.h>
  #include <asm/unaligned.h>
  
-#include <xfs_cred.h>
  #include <xfs_vnode.h>
  #include <xfs_stats.h>
  #include <xfs_sysctl.h>
  #include <xfs_iops.h>
  #include <xfs_aops.h>
  #include <xfs_super.h>
-#include <xfs_globals.h>
  #include <xfs_buf.h>
  
  /*
@@ -144,7 +143,7 @@
  #define SYNCHRONIZE()  barrier()
  #define __return_address __builtin_return_address(0)
  
-#define dfltprid       0
+#define XFS_PROJID_DEFAULT     0
  #define MAXPATHLEN     1024
  
  #define MIN(a,b)       (min(a,b))
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

index 08fd310..ab31ce5 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -44,7 +44,6 @@
  #include "xfs_buf_item.h"
  #include "xfs_utils.h"
  #include "xfs_vnodeops.h"
-#include "xfs_version.h"
  #include "xfs_log_priv.h"
  #include "xfs_trans_priv.h"
  #include "xfs_filestream.h"
@@ -645,7 +644,7 @@ xfs_barrier_test(
         XFS_BUF_ORDERED(sbp);
  
         xfsbdstrat(mp, sbp);
-       error = xfs_iowait(sbp);
+       error = xfs_buf_iowait(sbp);
  
         /*
          * Clear all the flags we set and possible error state in the
@@ -757,18 +756,20 @@ xfs_open_devices(
          * Setup xfs_mount buffer target pointers
          */
         error = ENOMEM;
-       mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
+       mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
         if (!mp->m_ddev_targp)
                 goto out_close_rtdev;
  
         if (rtdev) {
-               mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
+               mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+                                                       mp->m_fsname);
                 if (!mp->m_rtdev_targp)
                         goto out_free_ddev_targ;
         }
  
         if (logdev && logdev != ddev) {
-               mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
+               mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+                                                       mp->m_fsname);
                 if (!mp->m_logdev_targp)
                         goto out_free_rtdev_targ;
         } else {
@@ -971,12 +972,7 @@ xfs_fs_inode_init_once(
  
  /*
   * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode. Care must be taken
- * here - the transaction code calls mark_inode_dirty_sync() to mark the
- * VFS inode dirty in a transaction and clears the i_update_core field;
- * it must clear the field after calling mark_inode_dirty_sync() to
- * correctly indicate that the dirty state has been propagated into the
- * inode log item.
+ * we catch unlogged VFS level updates to the inode.
   *
   * We need the barrier() to maintain correct ordering between unlogged
   * updates and the transaction commit code that clears the i_update_core
@@ -1520,8 +1516,9 @@ xfs_fs_fill_super(
         if (error)
                 goto out_free_fsname;
  
-       if (xfs_icsb_init_counters(mp))
-               mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+       error = xfs_icsb_init_counters(mp);
+       if (error)
+               goto out_close_devices;
  
         error = xfs_readsb(mp, flags);
         if (error)
@@ -1582,6 +1579,7 @@ xfs_fs_fill_super(
         xfs_freesb(mp);
   out_destroy_counters:
         xfs_icsb_destroy_counters(mp);
+ out_close_devices:
         xfs_close_devices(mp);
   out_free_fsname:
         xfs_free_fsname(mp);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h

index 1ef4a4d..50a3266 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -62,6 +62,7 @@ extern void xfs_qm_exit(void);
  # define XFS_DBG_STRING                "no debug"
  #endif
  
+#define XFS_VERSION_STRING     "SGI XFS"
  #define XFS_BUILD_OPTIONS      XFS_ACL_STRING \
                                 XFS_SECURITY_STRING \
                                 XFS_REALTIME_STRING \
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c

index 81976ff..37d3325 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -39,42 +39,39 @@
  #include <linux/kthread.h>
  #include <linux/freezer.h>
  
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH       32
  
-STATIC xfs_inode_t *
-xfs_inode_ag_lookup(
-       struct xfs_mount        *mp,
-       struct xfs_perag        *pag,
-       uint32_t                *first_index,
-       int                     tag)
+STATIC int
+xfs_inode_ag_walk_grab(
+       struct xfs_inode        *ip)
  {
-       int                     nr_found;
-       struct xfs_inode        *ip;
+       struct inode            *inode = VFS_I(ip);
  
-       /*
-        * use a gang lookup to find the next inode in the tree
-        * as the tree is sparse and a gang lookup walks to find
-        * the number of objects requested.
-        */
-       if (tag == XFS_ICI_NO_TAG) {
-               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
-                               (void **)&ip, *first_index, 1);
-       } else {
-               nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
-                               (void **)&ip, *first_index, 1, tag);
+       /* nothing to sync during shutdown */
+       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+               return EFSCORRUPTED;
+
+       /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+       if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+               return ENOENT;
+
+       /* If we can't grab the inode, it must on it's way to reclaim. */
+       if (!igrab(inode))
+               return ENOENT;
+
+       if (is_bad_inode(inode)) {
+               IRELE(ip);
+               return ENOENT;
         }
-       if (!nr_found)
-               return NULL;
  
-       /*
-        * Update the index for the next lookup. Catch overflows
-        * into the next AG range which can occur if we have inodes
-        * in the last block of the AG and we are currently
-        * pointing to the last inode.
-        */
-       *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
-       if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
-               return NULL;
-       return ip;
+       /* inode is valid */
+       return 0;
  }
  
  STATIC int
@@ -83,49 +80,75 @@ xfs_inode_ag_walk(
         struct xfs_perag        *pag,
         int                     (*execute)(struct xfs_inode *ip,
                                            struct xfs_perag *pag, int flags),
-       int                     flags,
-       int                     tag,
-       int                     exclusive,
-       int                     *nr_to_scan)
+       int                     flags)
  {
         uint32_t                first_index;
         int                     last_error = 0;
         int                     skipped;
+       int                     done;
+       int                     nr_found;
  
  restart:
+       done = 0;
         skipped = 0;
         first_index = 0;
+       nr_found = 0;
         do {
+               struct xfs_inode *batch[XFS_LOOKUP_BATCH];
                 int             error = 0;
-               xfs_inode_t     *ip;
+               int             i;
  
-               if (exclusive)
-                       write_lock(&pag->pag_ici_lock);
-               else
-                       read_lock(&pag->pag_ici_lock);
-               ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
-               if (!ip) {
-                       if (exclusive)
-                               write_unlock(&pag->pag_ici_lock);
-                       else
-                               read_unlock(&pag->pag_ici_lock);
+               read_lock(&pag->pag_ici_lock);
+               nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH);
+               if (!nr_found) {
+                       read_unlock(&pag->pag_ici_lock);
                         break;
                 }
  
-               /* execute releases pag->pag_ici_lock */
-               error = execute(ip, pag, flags);
-               if (error == EAGAIN) {
-                       skipped++;
-                       continue;
+               /*
+                * Grab the inodes before we drop the lock. if we found
+                * nothing, nr == 0 and the loop will be skipped.
+                */
+               for (i = 0; i < nr_found; i++) {
+                       struct xfs_inode *ip = batch[i];
+
+                       if (done || xfs_inode_ag_walk_grab(ip))
+                               batch[i] = NULL;
+
+                       /*
+                        * Update the index for the next lookup. Catch overflows
+                        * into the next AG range which can occur if we have inodes
+                        * in the last block of the AG and we are currently
+                        * pointing to the last inode.
+                        */
+                       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                       if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                               done = 1;
+               }
+
+               /* unlock now we've grabbed the inodes. */
+               read_unlock(&pag->pag_ici_lock);
+
+               for (i = 0; i < nr_found; i++) {
+                       if (!batch[i])
+                               continue;
+                       error = execute(batch[i], pag, flags);
+                       IRELE(batch[i]);
+                       if (error == EAGAIN) {
+                               skipped++;
+                               continue;
+                       }
+                       if (error && last_error != EFSCORRUPTED)
+                               last_error = error;
                 }
-               if (error)
-                       last_error = error;
  
                 /* bail out if the filesystem is corrupted.  */
                 if (error == EFSCORRUPTED)
                         break;
  
-       } while ((*nr_to_scan)--);
+       } while (nr_found && !done);
  
         if (skipped) {
                 delay(1);
@@ -134,110 +157,32 @@ restart:
         return last_error;
  }
  
-/*
- * Select the next per-ag structure to iterate during the walk. The reclaim
- * walk is optimised only to walk AGs with reclaimable inodes in them.
- */
-static struct xfs_perag *
-xfs_inode_ag_iter_next_pag(
-       struct xfs_mount        *mp,
-       xfs_agnumber_t          *first,
-       int                     tag)
-{
-       struct xfs_perag        *pag = NULL;
-
-       if (tag == XFS_ICI_RECLAIM_TAG) {
-               int found;
-               int ref;
-
-               spin_lock(&mp->m_perag_lock);
-               found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
-                               (void **)&pag, *first, 1, tag);
-               if (found <= 0) {
-                       spin_unlock(&mp->m_perag_lock);
-                       return NULL;
-               }
-               *first = pag->pag_agno + 1;
-               /* open coded pag reference increment */
-               ref = atomic_inc_return(&pag->pag_ref);
-               spin_unlock(&mp->m_perag_lock);
-               trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
-       } else {
-               pag = xfs_perag_get(mp, *first);
-               (*first)++;
-       }
-       return pag;
-}
-
  int
  xfs_inode_ag_iterator(
         struct xfs_mount        *mp,
         int                     (*execute)(struct xfs_inode *ip,
                                            struct xfs_perag *pag, int flags),
-       int                     flags,
-       int                     tag,
-       int                     exclusive,
-       int                     *nr_to_scan)
+       int                     flags)
  {
         struct xfs_perag        *pag;
         int                     error = 0;
         int                     last_error = 0;
         xfs_agnumber_t          ag;
-       int                     nr;
  
-       nr = nr_to_scan ? *nr_to_scan : INT_MAX;
         ag = 0;
-       while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
-               error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
-                                               exclusive, &nr);
+       while ((pag = xfs_perag_get(mp, ag))) {
+               ag = pag->pag_agno + 1;
+               error = xfs_inode_ag_walk(mp, pag, execute, flags);
                 xfs_perag_put(pag);
                 if (error) {
                         last_error = error;
                         if (error == EFSCORRUPTED)
                                 break;
                 }
-               if (nr <= 0)
-                       break;
         }
-       if (nr_to_scan)
-               *nr_to_scan = nr;
         return XFS_ERROR(last_error);
  }
  
-/* must be called with pag_ici_lock held and releases it */
-int
-xfs_sync_inode_valid(
-       struct xfs_inode        *ip,
-       struct xfs_perag        *pag)
-{
-       struct inode            *inode = VFS_I(ip);
-       int                     error = EFSCORRUPTED;
-
-       /* nothing to sync during shutdown */
-       if (XFS_FORCED_SHUTDOWN(ip->i_mount))
-               goto out_unlock;
-
-       /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
-       error = ENOENT;
-       if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
-               goto out_unlock;
-
-       /* If we can't grab the inode, it must on it's way to reclaim. */
-       if (!igrab(inode))
-               goto out_unlock;
-
-       if (is_bad_inode(inode)) {
-               IRELE(ip);
-               goto out_unlock;
-       }
-
-       /* inode is valid */
-       error = 0;
-out_unlock:
-       read_unlock(&pag->pag_ici_lock);
-       return error;
-}
-
  STATIC int
  xfs_sync_inode_data(
         struct xfs_inode        *ip,
@@ -248,10 +193,6 @@ xfs_sync_inode_data(
         struct address_space *mapping = inode->i_mapping;
         int                     error = 0;
  
-       error = xfs_sync_inode_valid(ip, pag);
-       if (error)
-               return error;
-
         if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
                 goto out_wait;
  
@@ -268,7 +209,6 @@ xfs_sync_inode_data(
   out_wait:
         if (flags & SYNC_WAIT)
                 xfs_ioend_wait(ip);
-       IRELE(ip);
         return error;
  }
  
@@ -280,10 +220,6 @@ xfs_sync_inode_attr(
  {
         int                     error = 0;
  
-       error = xfs_sync_inode_valid(ip, pag);
-       if (error)
-               return error;
-
         xfs_ilock(ip, XFS_ILOCK_SHARED);
         if (xfs_inode_clean(ip))
                 goto out_unlock;
@@ -302,7 +238,6 @@ xfs_sync_inode_attr(
  
   out_unlock:
         xfs_iunlock(ip, XFS_ILOCK_SHARED);
-       IRELE(ip);
         return error;
  }
  
@@ -318,8 +253,7 @@ xfs_sync_data(
  
         ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
  
-       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-                                     XFS_ICI_NO_TAG, 0, NULL);
+       error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
         if (error)
                 return XFS_ERROR(error);
  
@@ -337,8 +271,7 @@ xfs_sync_attr(
  {
         ASSERT((flags & ~SYNC_WAIT) == 0);
  
-       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-                                    XFS_ICI_NO_TAG, 0, NULL);
+       return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
  }
  
  STATIC int
@@ -697,6 +630,43 @@ __xfs_inode_clear_reclaim_tag(
         __xfs_inode_clear_reclaim(pag, ip);
  }
  
+/*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+       struct xfs_inode        *ip,
+       int                     flags)
+{
+
+       /*
+        * do some unlocked checks first to avoid unnecceary lock traffic.
+        * The first is a flush lock check, the second is a already in reclaim
+        * check. Only do these checks if we are not going to block on locks.
+        */
+       if ((flags & SYNC_TRYLOCK) &&
+           (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+               return 1;
+       }
+
+       /*
+        * The radix tree lock here protects a thread in xfs_iget from racing
+        * with us starting reclaim on the inode.  Once we have the
+        * XFS_IRECLAIM flag set it will not touch us.
+        */
+       spin_lock(&ip->i_flags_lock);
+       ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+       if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+               /* ignore as it is already under reclaim */
+               spin_unlock(&ip->i_flags_lock);
+               return 1;
+       }
+       __xfs_iflags_set(ip, XFS_IRECLAIM);
+       spin_unlock(&ip->i_flags_lock);
+       return 0;
+}
+
  /*
   * Inodes in different states need to be treated differently, and the return
   * value of xfs_iflush is not sufficient to get this right. The following table
@@ -755,23 +725,6 @@ xfs_reclaim_inode(
  {
         int     error = 0;
  
-       /*
-        * The radix tree lock here protects a thread in xfs_iget from racing
-        * with us starting reclaim on the inode.  Once we have the
-        * XFS_IRECLAIM flag set it will not touch us.
-        */
-       spin_lock(&ip->i_flags_lock);
-       ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
-       if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
-               /* ignore as it is already under reclaim */
-               spin_unlock(&ip->i_flags_lock);
-               write_unlock(&pag->pag_ici_lock);
-               return 0;
-       }
-       __xfs_iflags_set(ip, XFS_IRECLAIM);
-       spin_unlock(&ip->i_flags_lock);
-       write_unlock(&pag->pag_ici_lock);
-
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         if (!xfs_iflock_nowait(ip)) {
                 if (!(sync_mode & SYNC_WAIT))
@@ -868,13 +821,126 @@ reclaim:
  
  }
  
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+       struct xfs_mount        *mp,
+       int                     flags,
+       int                     *nr_to_scan)
+{
+       struct xfs_perag        *pag;
+       int                     error = 0;
+       int                     last_error = 0;
+       xfs_agnumber_t          ag;
+       int                     trylock = flags & SYNC_TRYLOCK;
+       int                     skipped;
+
+restart:
+       ag = 0;
+       skipped = 0;
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               unsigned long   first_index = 0;
+               int             done = 0;
+               int             nr_found = 0;
+
+               ag = pag->pag_agno + 1;
+
+               if (trylock) {
+                       if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+                               skipped++;
+                               continue;
+                       }
+                       first_index = pag->pag_ici_reclaim_cursor;
+               } else
+                       mutex_lock(&pag->pag_ici_reclaim_lock);
+
+               do {
+                       struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+                       int     i;
+
+                       write_lock(&pag->pag_ici_lock);
+                       nr_found = radix_tree_gang_lookup_tag(
+                                       &pag->pag_ici_root,
+                                       (void **)batch, first_index,
+                                       XFS_LOOKUP_BATCH,
+                                       XFS_ICI_RECLAIM_TAG);
+                       if (!nr_found) {
+                               write_unlock(&pag->pag_ici_lock);
+                               break;
+                       }
+
+                       /*
+                        * Grab the inodes before we drop the lock. if we found
+                        * nothing, nr == 0 and the loop will be skipped.
+                        */
+                       for (i = 0; i < nr_found; i++) {
+                               struct xfs_inode *ip = batch[i];
+
+                               if (done || xfs_reclaim_inode_grab(ip, flags))
+                                       batch[i] = NULL;
+
+                               /*
+                                * Update the index for the next lookup. Catch
+                                * overflows into the next AG range which can
+                                * occur if we have inodes in the last block of
+                                * the AG and we are currently pointing to the
+                                * last inode.
+                                */
+                               first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+                               if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+                                       done = 1;
+                       }
+
+                       /* unlock now we've grabbed the inodes. */
+                       write_unlock(&pag->pag_ici_lock);
+
+                       for (i = 0; i < nr_found; i++) {
+                               if (!batch[i])
+                                       continue;
+                               error = xfs_reclaim_inode(batch[i], pag, flags);
+                               if (error && last_error != EFSCORRUPTED)
+                                       last_error = error;
+                       }
+
+                       *nr_to_scan -= XFS_LOOKUP_BATCH;
+
+               } while (nr_found && !done && *nr_to_scan > 0);
+
+               if (trylock && !done)
+                       pag->pag_ici_reclaim_cursor = first_index;
+               else
+                       pag->pag_ici_reclaim_cursor = 0;
+               mutex_unlock(&pag->pag_ici_reclaim_lock);
+               xfs_perag_put(pag);
+       }
+
+       /*
+        * if we skipped any AG, and we still have scan count remaining, do
+        * another pass this time using blocking reclaim semantics (i.e
+        * waiting on the reclaim locks and ignoring the reclaim cursors). This
+        * ensure that when we get more reclaimers than AGs we block rather
+        * than spin trying to execute reclaim.
+        */
+       if (trylock && skipped && *nr_to_scan > 0) {
+               trylock = 0;
+               goto restart;
+       }
+       return XFS_ERROR(last_error);
+}
+
  int
  xfs_reclaim_inodes(
         xfs_mount_t     *mp,
         int             mode)
  {
-       return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
-                                       XFS_ICI_RECLAIM_TAG, 1, NULL);
+       int             nr_to_scan = INT_MAX;
+
+       return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
  }
  
  /*
@@ -896,17 +962,16 @@ xfs_reclaim_inode_shrink(
                 if (!(gfp_mask & __GFP_FS))
                         return -1;
  
-               xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
-                                       XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
-               /* if we don't exhaust the scan, don't bother coming back */
+               xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+               /* terminate if we don't exhaust the scan */
                 if (nr_to_scan > 0)
                         return -1;
         }
  
         reclaimable = 0;
         ag = 0;
-       while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
-                                       XFS_ICI_RECLAIM_TAG))) {
+       while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+               ag = pag->pag_agno + 1;
                 reclaimable += pag->pag_ici_reclaimable;
                 xfs_perag_put(pag);
         }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h

index fe78726..32ba662 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -47,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
  void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
                                 struct xfs_inode *ip);
  
-int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_sync_inode_grab(struct xfs_inode *ip);
  int xfs_inode_ag_iterator(struct xfs_mount *mp,
         int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-       int flags, int tag, int write_lock, int *nr_to_scan);
+       int flags);
  
  void xfs_inode_shrinker_register(struct xfs_mount *mp);
  void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h

index 8fe311a..acef2e9 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \
                  unsigned long caller_ip),                                      \
         TP_ARGS(mp, agno, refcount, caller_ip))
  DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
  DEFINE_PERAG_REF_EVENT(xfs_perag_put);
  DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
  DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
@@ -330,7 +330,7 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
  DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
  DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
  DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
  DEFINE_BUF_EVENT(xfs_bdstrat_shut);
  DEFINE_BUF_EVENT(xfs_buf_item_relse);
  DEFINE_BUF_EVENT(xfs_buf_item_iodone);
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h

deleted file mode 100644 (file)

index f8d279d..0000000
--- a/fs/xfs/linux-2.6/xfs_version.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_VERSION_H__
-#define __XFS_VERSION_H__
-
-/*
- * Dummy file that can contain a timestamp to put into the
- * XFS init string, to help users keep track of what they're
- * running
- */
-
-#define XFS_VERSION_STRING "SGI XFS"
-
-#endif /* __XFS_VERSION_H__ */
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c

index e1a2f68..faf8e1a 100644 (file)
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -463,87 +463,68 @@ xfs_qm_dqtobp(
         uint                    flags)
  {
         xfs_bmbt_irec_t map;
-       int             nmaps, error;
+       int             nmaps = 1, error;
         xfs_buf_t       *bp;
-       xfs_inode_t     *quotip;
-       xfs_mount_t     *mp;
+       xfs_inode_t     *quotip = XFS_DQ_TO_QIP(dqp);
+       xfs_mount_t     *mp = dqp->q_mount;
         xfs_disk_dquot_t *ddq;
-       xfs_dqid_t      id;
-       boolean_t       newdquot;
+       xfs_dqid_t      id = be32_to_cpu(dqp->q_core.d_id);
         xfs_trans_t     *tp = (tpp ? *tpp : NULL);
  
-       mp = dqp->q_mount;
-       id = be32_to_cpu(dqp->q_core.d_id);
-       nmaps = 1;
-       newdquot = B_FALSE;
+       dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
  
-       /*
-        * If we don't know where the dquot lives, find out.
-        */
-       if (dqp->q_blkno == (xfs_daddr_t) 0) {
-               /* We use the id as an index */
-               dqp->q_fileoffset = (xfs_fileoff_t)id /
-                                       mp->m_quotainfo->qi_dqperchunk;
-               nmaps = 1;
-               quotip = XFS_DQ_TO_QIP(dqp);
-               xfs_ilock(quotip, XFS_ILOCK_SHARED);
+       xfs_ilock(quotip, XFS_ILOCK_SHARED);
+       if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
                 /*
-                * Return if this type of quotas is turned off while we didn't
-                * have an inode lock
+                * Return if this type of quotas is turned off while we
+                * didn't have the quota inode lock.
                  */
-               if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
-                       xfs_iunlock(quotip, XFS_ILOCK_SHARED);
-                       return (ESRCH);
-               }
+               xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+               return ESRCH;
+       }
+
+       /*
+        * Find the block map; no allocations yet
+        */
+       error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
+                         XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
+                         NULL, 0, &map, &nmaps, NULL);
+
+       xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+       if (error)
+               return error;
+
+       ASSERT(nmaps == 1);
+       ASSERT(map.br_blockcount == 1);
+
+       /*
+        * Offset of dquot in the (fixed sized) dquot chunk.
+        */
+       dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
+               sizeof(xfs_dqblk_t);
+
+       ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+       if (map.br_startblock == HOLESTARTBLOCK) {
                 /*
-                * Find the block map; no allocations yet
+                * We don't allocate unless we're asked to
                  */
-               error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
-                                 XFS_DQUOT_CLUSTER_SIZE_FSB,
-                                 XFS_BMAPI_METADATA,
-                                 NULL, 0, &map, &nmaps, NULL);
+               if (!(flags & XFS_QMOPT_DQALLOC))
+                       return ENOENT;
  
-               xfs_iunlock(quotip, XFS_ILOCK_SHARED);
+               ASSERT(tp);
+               error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
+                                       dqp->q_fileoffset, &bp);
                 if (error)
-                       return (error);
-               ASSERT(nmaps == 1);
-               ASSERT(map.br_blockcount == 1);
+                       return error;
+               tp = *tpp;
+       } else {
+               trace_xfs_dqtobp_read(dqp);
  
                 /*
-                * offset of dquot in the (fixed sized) dquot chunk.
+                * store the blkno etc so that we don't have to do the
+                * mapping all the time
                  */
-               dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
-                       sizeof(xfs_dqblk_t);
-               if (map.br_startblock == HOLESTARTBLOCK) {
-                       /*
-                        * We don't allocate unless we're asked to
-                        */
-                       if (!(flags & XFS_QMOPT_DQALLOC))
-                               return (ENOENT);
-
-                       ASSERT(tp);
-                       if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
-                                               dqp->q_fileoffset, &bp)))
-                               return (error);
-                       tp = *tpp;
-                       newdquot = B_TRUE;
-               } else {
-                       /*
-                        * store the blkno etc so that we don't have to do the
-                        * mapping all the time
-                        */
-                       dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-               }
-       }
-       ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
-       ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
-
-       /*
-        * Read in the buffer, unless we've just done the allocation
-        * (in which case we already have the buf).
-        */
-       if (!newdquot) {
-               trace_xfs_dqtobp_read(dqp);
+               dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
  
                 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
                                            dqp->q_blkno,
@@ -552,13 +533,14 @@ xfs_qm_dqtobp(
                 if (error || !bp)
                         return XFS_ERROR(error);
         }
+
         ASSERT(XFS_BUF_ISBUSY(bp));
         ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
  
         /*
          * calculate the location of the dquot inside the buffer.
          */
-       ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+       ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
  
         /*
          * A simple sanity check in case we got a corrupted dquot...
@@ -1176,18 +1158,18 @@ xfs_qm_dqflush(
         xfs_dquot_t             *dqp,
         uint                    flags)
  {
-       xfs_mount_t             *mp;
-       xfs_buf_t               *bp;
-       xfs_disk_dquot_t        *ddqp;
+       struct xfs_mount        *mp = dqp->q_mount;
+       struct xfs_buf          *bp;
+       struct xfs_disk_dquot   *ddqp;
         int                     error;
  
         ASSERT(XFS_DQ_IS_LOCKED(dqp));
         ASSERT(!completion_done(&dqp->q_flush));
+
         trace_xfs_dqflush(dqp);
  
         /*
-        * If not dirty, or it's pinned and we are not supposed to
-        * block, nada.
+        * If not dirty, or it's pinned and we are not supposed to block, nada.
          */
         if (!XFS_DQ_IS_DIRTY(dqp) ||
             (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
@@ -1201,40 +1183,46 @@ xfs_qm_dqflush(
          * down forcibly. If that's the case we must not write this dquot
          * to disk, because the log record didn't make it to disk!
          */
-       if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
-               dqp->dq_flags &= ~(XFS_DQ_DIRTY);
+       if (XFS_FORCED_SHUTDOWN(mp)) {
+               dqp->dq_flags &= ~XFS_DQ_DIRTY;
                 xfs_dqfunlock(dqp);
                 return XFS_ERROR(EIO);
         }
  
         /*
          * Get the buffer containing the on-disk dquot
-        * We don't need a transaction envelope because we know that the
-        * the ondisk-dquot has already been allocated for.
          */
-       if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
+       error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
+                                  mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+       if (error) {
                 ASSERT(error != ENOENT);
-               /*
-                * Quotas could have gotten turned off (ESRCH)
-                */
                 xfs_dqfunlock(dqp);
-               return (error);
+               return error;
         }
  
-       if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
-                          0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
-               xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE);
+       /*
+        * Calculate the location of the dquot inside the buffer.
+        */
+       ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
+
+       /*
+        * A simple sanity check in case we got a corrupted dquot..
+        */
+       if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
+                          XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
+               xfs_buf_relse(bp);
+               xfs_dqfunlock(dqp);
+               xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
                 return XFS_ERROR(EIO);
         }
  
         /* This is the only portion of data that needs to persist */
-       memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
+       memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
  
         /*
          * Clear the dirty field and remember the flush lsn for later use.
          */
-       dqp->dq_flags &= ~(XFS_DQ_DIRTY);
-       mp = dqp->q_mount;
+       dqp->dq_flags &= ~XFS_DQ_DIRTY;
  
         xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
                                         &dqp->q_logitem.qli_item.li_lsn);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c

index 9a92407..f8e854b 100644 (file)
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -55,8 +55,6 @@ uint          ndquot;
  kmem_zone_t    *qm_dqzone;
  kmem_zone_t    *qm_dqtrxzone;
  
-static cred_t  xfs_zerocr;
-
  STATIC void    xfs_qm_list_init(xfs_dqlist_t *, char *, int);
  STATIC void    xfs_qm_list_destroy(xfs_dqlist_t *);
  
@@ -837,7 +835,7 @@ xfs_qm_dqattach_locked(
                         xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
                                                 flags & XFS_QMOPT_DQALLOC,
                                                 ip->i_udquot, &ip->i_gdquot) :
-                       xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
+                       xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
                                                 flags & XFS_QMOPT_DQALLOC,
                                                 ip->i_udquot, &ip->i_gdquot);
                 /*
@@ -1199,87 +1197,6 @@ xfs_qm_list_destroy(
         mutex_destroy(&(list->qh_lock));
  }
  
-
-/*
- * Stripped down version of dqattach. This doesn't attach, or even look at the
- * dquots attached to the inode. The rationale is that there won't be any
- * attached at the time this is called from quotacheck.
- */
-STATIC int
-xfs_qm_dqget_noattach(
-       xfs_inode_t     *ip,
-       xfs_dquot_t     **O_udqpp,
-       xfs_dquot_t     **O_gdqpp)
-{
-       int             error;
-       xfs_mount_t     *mp;
-       xfs_dquot_t     *udqp, *gdqp;
-
-       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       mp = ip->i_mount;
-       udqp = NULL;
-       gdqp = NULL;
-
-       if (XFS_IS_UQUOTA_ON(mp)) {
-               ASSERT(ip->i_udquot == NULL);
-               /*
-                * We want the dquot allocated if it doesn't exist.
-                */
-               if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
-                                        XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
-                                        &udqp))) {
-                       /*
-                        * Shouldn't be able to turn off quotas here.
-                        */
-                       ASSERT(error != ESRCH);
-                       ASSERT(error != ENOENT);
-                       return error;
-               }
-               ASSERT(udqp);
-       }
-
-       if (XFS_IS_OQUOTA_ON(mp)) {
-               ASSERT(ip->i_gdquot == NULL);
-               if (udqp)
-                       xfs_dqunlock(udqp);
-               error = XFS_IS_GQUOTA_ON(mp) ?
-                               xfs_qm_dqget(mp, ip,
-                                            ip->i_d.di_gid, XFS_DQ_GROUP,
-                                            XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-                                            &gdqp) :
-                               xfs_qm_dqget(mp, ip,
-                                            ip->i_d.di_projid, XFS_DQ_PROJ,
-                                            XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
-                                            &gdqp);
-               if (error) {
-                       if (udqp)
-                               xfs_qm_dqrele(udqp);
-                       ASSERT(error != ESRCH);
-                       ASSERT(error != ENOENT);
-                       return error;
-               }
-               ASSERT(gdqp);
-
-               /* Reacquire the locks in the right order */
-               if (udqp) {
-                       if (! xfs_qm_dqlock_nowait(udqp)) {
-                               xfs_dqunlock(gdqp);
-                               xfs_dqlock(udqp);
-                               xfs_dqlock(gdqp);
-                       }
-               }
-       }
-
-       *O_udqpp = udqp;
-       *O_gdqpp = gdqp;
-
-#ifdef QUOTADEBUG
-       if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
-       if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
-#endif
-       return 0;
-}
-
  /*
   * Create an inode and return with a reference already taken, but unlocked
   * This is how we create quota inodes
@@ -1305,8 +1222,8 @@ xfs_qm_qino_alloc(
                 return error;
         }
  
-       if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
-                                  &xfs_zerocr, 0, 1, ip, &committed))) {
+       error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
+       if (error) {
                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
                                  XFS_TRANS_ABORT);
                 return error;
@@ -1516,7 +1433,7 @@ xfs_qm_dqiterate(
                                 rablkcnt =  map[i+1].br_blockcount;
                                 rablkno = map[i+1].br_startblock;
                                 while (rablkcnt--) {
-                                       xfs_baread(mp->m_ddev_targp,
+                                       xfs_buf_readahead(mp->m_ddev_targp,
                                                XFS_FSB_TO_DADDR(mp, rablkno),
                                                mp->m_quotainfo->qi_dqchunklen);
                                         rablkno++;
@@ -1546,18 +1463,34 @@ xfs_qm_dqiterate(
  
  /*
   * Called by dqusage_adjust in doing a quotacheck.
- * Given the inode, and a dquot (either USR or GRP, doesn't matter),
- * this updates its incore copy as well as the buffer copy. This is
- * so that once the quotacheck is done, we can just log all the buffers,
- * as opposed to logging numerous updates to individual dquots.
+ *
+ * Given the inode, and a dquot id this updates both the incore dqout as well
+ * as the buffer copy. This is so that once the quotacheck is done, we can
+ * just log all the buffers, as opposed to logging numerous updates to
+ * individual dquots.
   */
-STATIC void
+STATIC int
  xfs_qm_quotacheck_dqadjust(
-       xfs_dquot_t             *dqp,
+       struct xfs_inode        *ip,
+       xfs_dqid_t              id,
+       uint                    type,
         xfs_qcnt_t              nblks,
         xfs_qcnt_t              rtblks)
  {
-       ASSERT(XFS_DQ_IS_LOCKED(dqp));
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_dquot        *dqp;
+       int                     error;
+
+       error = xfs_qm_dqget(mp, ip, id, type,
+                            XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
+       if (error) {
+               /*
+                * Shouldn't be able to turn off quotas here.
+                */
+               ASSERT(error != ESRCH);
+               ASSERT(error != ENOENT);
+               return error;
+       }
  
         trace_xfs_dqadjust(dqp);
  
@@ -1582,11 +1515,13 @@ xfs_qm_quotacheck_dqadjust(
          * There are no timers for the default values set in the root dquot.
          */
         if (dqp->q_core.d_id) {
-               xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
-               xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
+               xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
+               xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
         }
  
         dqp->dq_flags |= XFS_DQ_DIRTY;
+       xfs_qm_dqput(dqp);
+       return 0;
  }
  
  STATIC int
@@ -1629,8 +1564,7 @@ xfs_qm_dqusage_adjust(
         int             *res)           /* result code value */
  {
         xfs_inode_t     *ip;
-       xfs_dquot_t     *udqp, *gdqp;
-       xfs_qcnt_t      nblks, rtblks;
+       xfs_qcnt_t      nblks, rtblks = 0;
         int             error;
  
         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -1650,51 +1584,24 @@ xfs_qm_dqusage_adjust(
          * the case in all other instances. It's OK that we do this because
          * quotacheck is done only at mount time.
          */
-       if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) {
+       error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
+       if (error) {
                 *res = BULKSTAT_RV_NOTHING;
                 return error;
         }
  
-       /*
-        * Obtain the locked dquots. In case of an error (eg. allocation
-        * fails for ENOSPC), we return the negative of the error number
-        * to bulkstat, so that it can get propagated to quotacheck() and
-        * making us disable quotas for the file system.
-        */
-       if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
-               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-               IRELE(ip);
-               *res = BULKSTAT_RV_GIVEUP;
-               return error;
-       }
+       ASSERT(ip->i_delayed_blks == 0);
  
-       rtblks = 0;
-       if (! XFS_IS_REALTIME_INODE(ip)) {
-               nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
-       } else {
+       if (XFS_IS_REALTIME_INODE(ip)) {
                 /*
                  * Walk thru the extent list and count the realtime blocks.
                  */
-               if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
-                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                       IRELE(ip);
-                       if (udqp)
-                               xfs_qm_dqput(udqp);
-                       if (gdqp)
-                               xfs_qm_dqput(gdqp);
-                       *res = BULKSTAT_RV_GIVEUP;
-                       return error;
-               }
-               nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
+               error = xfs_qm_get_rtblks(ip, &rtblks);
+               if (error)
+                       goto error0;
         }
-       ASSERT(ip->i_delayed_blks == 0);
  
-       /*
-        * We can't release the inode while holding its dquot locks.
-        * The inode can go into inactive and might try to acquire the dquotlocks.
-        * So, just unlock here and do a vn_rele at the end.
-        */
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
  
         /*
          * Add the (disk blocks and inode) resources occupied by this
@@ -1709,26 +1616,36 @@ xfs_qm_dqusage_adjust(
          * and quotaoffs don't race. (Quotachecks happen at mount time only).
          */
         if (XFS_IS_UQUOTA_ON(mp)) {
-               ASSERT(udqp);
-               xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
-               xfs_qm_dqput(udqp);
+               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
+                                                  XFS_DQ_USER, nblks, rtblks);
+               if (error)
+                       goto error0;
         }
-       if (XFS_IS_OQUOTA_ON(mp)) {
-               ASSERT(gdqp);
-               xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
-               xfs_qm_dqput(gdqp);
+
+       if (XFS_IS_GQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
+                                                  XFS_DQ_GROUP, nblks, rtblks);
+               if (error)
+                       goto error0;
         }
-       /*
-        * Now release the inode. This will send it to 'inactive', and
-        * possibly even free blocks.
-        */
-       IRELE(ip);
  
-       /*
-        * Goto next inode.
-        */
+       if (XFS_IS_PQUOTA_ON(mp)) {
+               error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
+                                                  XFS_DQ_PROJ, nblks, rtblks);
+               if (error)
+                       goto error0;
+       }
+
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       IRELE(ip);
         *res = BULKSTAT_RV_DIDONE;
         return 0;
+
+error0:
+       xfs_iunlock(ip, XFS_ILOCK_EXCL);
+       IRELE(ip);
+       *res = BULKSTAT_RV_GIVEUP;
+       return error;
  }
  
  /*
@@ -2224,7 +2141,7 @@ xfs_qm_write_sb_changes(
  
  
  /*
- * Given an inode, a uid and gid (from cred_t) make sure that we have
+ * Given an inode, a uid, gid and prid make sure that we have
   * allocated relevant dquot(s) on disk, and that we won't exceed inode
   * quotas by creating this file.
   * This also attaches dquot(s) to the given inode after locking it,
@@ -2332,7 +2249,7 @@ xfs_qm_vop_dqalloc(
                         xfs_dqunlock(gq);
                 }
         } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
-               if (ip->i_d.di_projid != prid) {
+               if (xfs_get_projid(ip) != prid) {
                         xfs_iunlock(ip, lockflags);
                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
                                                  XFS_DQ_PROJ,
@@ -2454,7 +2371,7 @@ xfs_qm_vop_chown_reserve(
         }
         if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
                 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
-                    ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
+                    xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
                         prjflags = XFS_QMOPT_ENOSPC;
  
                 if (prjflags ||
@@ -2558,7 +2475,7 @@ xfs_qm_vop_create_dqattach(
                 ip->i_gdquot = gdqp;
                 ASSERT(XFS_IS_OQUOTA_ON(mp));
                 ASSERT((XFS_IS_GQUOTA_ON(mp) ?
-                       ip->i_d.di_gid : ip->i_d.di_projid) ==
+                       ip->i_d.di_gid : xfs_get_projid(ip)) ==
                                 be32_to_cpu(gdqp->q_core.d_id));
                 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
         }
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c

index bea02d7..45b5cb1 100644 (file)
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -81,7 +81,7 @@ xfs_qm_statvfs(
         xfs_mount_t             *mp = ip->i_mount;
         xfs_dquot_t             *dqp;
  
-       if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
+       if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
                 xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
                 xfs_qm_dqput(dqp);
         }
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c

index 45e5849..bdebc18 100644 (file)
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -276,7 +276,7 @@ xfs_qm_scall_trunc_qfile(
                 goto out_unlock;
         }
  
-       xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
  
  out_unlock:
@@ -875,21 +875,14 @@ xfs_dqrele_inode(
         struct xfs_perag        *pag,
         int                     flags)
  {
-       int                     error;
-
         /* skip quota inodes */
         if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
             ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
                 ASSERT(ip->i_udquot == NULL);
                 ASSERT(ip->i_gdquot == NULL);
-               read_unlock(&pag->pag_ici_lock);
                 return 0;
         }
  
-       error = xfs_sync_inode_valid(ip, pag);
-       if (error)
-               return error;
-
         xfs_ilock(ip, XFS_ILOCK_EXCL);
         if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
                 xfs_qm_dqrele(ip->i_udquot);
@@ -900,8 +893,6 @@ xfs_dqrele_inode(
                 ip->i_gdquot = NULL;
         }
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       IRELE(ip);
         return 0;
  }
  
@@ -918,8 +909,7 @@ xfs_qm_dqrele_all_inodes(
         uint             flags)
  {
         ASSERT(mp->m_quotainfo);
-       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
-                               XFS_ICI_NO_TAG, 0, NULL);
+       xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
  }
  
  /*------------------------------------------------------------------------*/
@@ -1175,7 +1165,7 @@ xfs_qm_internalqcheck_adjust(
         }
         xfs_qm_internalqcheck_get_dquots(mp,
                                         (xfs_dqid_t) ip->i_d.di_uid,
-                                       (xfs_dqid_t) ip->i_d.di_projid,
+                                       (xfs_dqid_t) xfs_get_projid(ip),
                                         (xfs_dqid_t) ip->i_d.di_gid,
                                         &ud, &gd);
         if (XFS_IS_UQUOTA_ON(mp)) {
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h

index 4917d4e..63c7a1a 100644 (file)
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -230,6 +230,15 @@ typedef struct xfs_perag {
         rwlock_t        pag_ici_lock;   /* incore inode lock */
         struct radix_tree_root pag_ici_root;    /* incore inode cache root */
         int             pag_ici_reclaimable;    /* reclaimable inodes */
+       struct mutex    pag_ici_reclaim_lock;   /* serialisation point */
+       unsigned long   pag_ici_reclaim_cursor; /* reclaim restart point */
+
+       /* buffer cache index */
+       spinlock_t      pag_buf_lock;   /* lock for pag_buf_tree */
+       struct rb_root  pag_buf_tree;   /* ordered tree of active buffers */
+
+       /* for rcu-safe freeing */
+       struct rcu_head rcu_head;
  #endif
         int             pagb_count;     /* pagb slots in use */
  } xfs_perag_t;
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c

index af168fa..112abc4 100644 (file)
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -675,7 +675,7 @@ xfs_alloc_ag_vextent_near(
         xfs_agblock_t   gtbnoa;         /* aligned ... */
         xfs_extlen_t    gtdiff;         /* difference to right side entry */
         xfs_extlen_t    gtlen;          /* length of right side entry */
-       xfs_extlen_t    gtlena;         /* aligned ... */
+       xfs_extlen_t    gtlena = 0;     /* aligned ... */
         xfs_agblock_t   gtnew;          /* useful start bno of right side */
         int             error;          /* error code */
         int             i;              /* result code, temporary */
@@ -684,7 +684,7 @@ xfs_alloc_ag_vextent_near(
         xfs_agblock_t   ltbnoa;         /* aligned ... */
         xfs_extlen_t    ltdiff;         /* difference to left side entry */
         xfs_extlen_t    ltlen;          /* length of left side entry */
-       xfs_extlen_t    ltlena;         /* aligned ... */
+       xfs_extlen_t    ltlena = 0;     /* aligned ... */
         xfs_agblock_t   ltnew;          /* useful start bno of left side */
         xfs_extlen_t    rlen;           /* length of returned extent */
  #if defined(DEBUG) && defined(__KERNEL__)
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c

index 97f7328..3916925 100644 (file)
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -280,38 +280,6 @@ xfs_allocbt_key_diff(
         return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock;
  }
  
-STATIC int
-xfs_allocbt_kill_root(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp,
-       int                     level,
-       union xfs_btree_ptr     *newroot)
-{
-       int                     error;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_STATS_INC(cur, killroot);
-
-       /*
-        * Update the root pointer, decreasing the level by 1 and then
-        * free the old root.
-        */
-       xfs_allocbt_set_root(cur, newroot, -1);
-       error = xfs_allocbt_free_block(cur, bp);
-       if (error) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-               return error;
-       }
-
-       XFS_BTREE_STATS_INC(cur, free);
-
-       xfs_btree_setbuf(cur, level, NULL);
-       cur->bc_nlevels--;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       return 0;
-}
-
  #ifdef DEBUG
  STATIC int
  xfs_allocbt_keys_inorder(
@@ -423,7 +391,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
  
         .dup_cursor             = xfs_allocbt_dup_cursor,
         .set_root               = xfs_allocbt_set_root,
-       .kill_root              = xfs_allocbt_kill_root,
         .alloc_block            = xfs_allocbt_alloc_block,
         .free_block             = xfs_allocbt_free_block,
         .update_lastrec         = xfs_allocbt_update_lastrec,
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c

index c256824..c863753 100644 (file)
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -355,16 +355,15 @@ xfs_attr_set_int(
                         if (mp->m_flags & XFS_MOUNT_WSYNC) {
                                 xfs_trans_set_sync(args.trans);
                         }
+
+                       if (!error && (flags & ATTR_KERNOTIME) == 0) {
+                               xfs_trans_ichgtime(args.trans, dp,
+                                                       XFS_ICHGTIME_CHG);
+                       }
                         err2 = xfs_trans_commit(args.trans,
                                                  XFS_TRANS_RELEASE_LOG_RES);
                         xfs_iunlock(dp, XFS_ILOCK_EXCL);
  
-                       /*
-                        * Hit the inode change time.
-                        */
-                       if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                               xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-                       }
                         return(error == 0 ? err2 : error);
                 }
  
@@ -420,6 +419,9 @@ xfs_attr_set_int(
                 xfs_trans_set_sync(args.trans);
         }
  
+       if ((flags & ATTR_KERNOTIME) == 0)
+               xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
+
         /*
          * Commit the last in the sequence of transactions.
          */
@@ -427,13 +429,6 @@ xfs_attr_set_int(
         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
         xfs_iunlock(dp, XFS_ILOCK_EXCL);
  
-       /*
-        * Hit the inode change time.
-        */
-       if (!error && (flags & ATTR_KERNOTIME) == 0) {
-               xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-       }
-
         return(error);
  
  out:
@@ -567,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
                 xfs_trans_set_sync(args.trans);
         }
  
+       if ((flags & ATTR_KERNOTIME) == 0)
+               xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
+
         /*
          * Commit the last in the sequence of transactions.
          */
@@ -574,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
         xfs_iunlock(dp, XFS_ILOCK_EXCL);
  
-       /*
-        * Hit the inode change time.
-        */
-       if (!error && (flags & ATTR_KERNOTIME) == 0) {
-               xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-       }
-
         return(error);
  
  out:
@@ -1995,7 +1986,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
  
                         tmp = (valuelen < XFS_BUF_SIZE(bp))
                                 ? valuelen : XFS_BUF_SIZE(bp);
-                       xfs_biomove(bp, 0, tmp, dst, XBF_READ);
+                       xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ);
                         xfs_buf_relse(bp);
                         dst += tmp;
                         valuelen -= tmp;
@@ -2125,9 +2116,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
  
                 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
                                                         XFS_BUF_SIZE(bp);
-               xfs_biomove(bp, 0, tmp, src, XBF_WRITE);
+               xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
                 if (tmp < XFS_BUF_SIZE(bp))
-                       xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
+                       xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
                 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
                         return (error);
                 }
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c

index f90dadd..8abd12e 100644 (file)
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -614,7 +614,7 @@ xfs_bmap_add_extent(
                         nblks += cur->bc_private.b.allocated;
                 ASSERT(nblks <= da_old);
                 if (nblks < da_old)
-                       xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+                       xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
                                 (int64_t)(da_old - nblks), rsvd);
         }
         /*
@@ -1079,7 +1079,8 @@ xfs_bmap_add_extent_delay_real(
                 diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
                         (cur ? cur->bc_private.b.allocated : 0));
                 if (diff > 0 &&
-                   xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) {
+                   xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
+                                            -((int64_t)diff), rsvd)) {
                         /*
                          * Ick gross gag me with a spoon.
                          */
@@ -1089,16 +1090,18 @@ xfs_bmap_add_extent_delay_real(
                                         temp--;
                                         diff--;
                                         if (!diff ||
-                                           !xfs_mod_incore_sb(ip->i_mount,
-                                                   XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
+                                           !xfs_icsb_modify_counters(ip->i_mount,
+                                                   XFS_SBS_FDBLOCKS,
+                                                   -((int64_t)diff), rsvd))
                                                 break;
                                 }
                                 if (temp2) {
                                         temp2--;
                                         diff--;
                                         if (!diff ||
-                                           !xfs_mod_incore_sb(ip->i_mount,
-                                                   XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
+                                           !xfs_icsb_modify_counters(ip->i_mount,
+                                                   XFS_SBS_FDBLOCKS,
+                                                   -((int64_t)diff), rsvd))
                                                 break;
                                 }
                         }
@@ -1766,7 +1769,7 @@ xfs_bmap_add_extent_hole_delay(
         }
         if (oldlen != newlen) {
                 ASSERT(oldlen > newlen);
-               xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
+               xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
                         (int64_t)(oldlen - newlen), rsvd);
                 /*
                  * Nothing to do for disk quota accounting here.
@@ -3111,9 +3114,10 @@ xfs_bmap_del_extent(
          * Nothing to do for disk quota accounting here.
          */
         ASSERT(da_old >= da_new);
-       if (da_old > da_new)
-               xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new),
-                       rsvd);
+       if (da_old > da_new) {
+               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                       (int64_t)(da_old - da_new), rsvd);
+       }
  done:
         *logflagsp = flags;
         return error;
@@ -4526,13 +4530,13 @@ xfs_bmapi(
                                                         -((int64_t)extsz), (flags &
                                                         XFS_BMAPI_RSVBLOCKS));
                                 } else {
-                                       error = xfs_mod_incore_sb(mp,
+                                       error = xfs_icsb_modify_counters(mp,
                                                         XFS_SBS_FDBLOCKS,
                                                         -((int64_t)alen), (flags &
                                                         XFS_BMAPI_RSVBLOCKS));
                                 }
                                 if (!error) {
-                                       error = xfs_mod_incore_sb(mp,
+                                       error = xfs_icsb_modify_counters(mp,
                                                         XFS_SBS_FDBLOCKS,
                                                         -((int64_t)indlen), (flags &
                                                         XFS_BMAPI_RSVBLOCKS));
@@ -4542,7 +4546,7 @@ xfs_bmapi(
                                                         (int64_t)extsz, (flags &
                                                         XFS_BMAPI_RSVBLOCKS));
                                         else if (error)
-                                               xfs_mod_incore_sb(mp,
+                                               xfs_icsb_modify_counters(mp,
                                                         XFS_SBS_FDBLOCKS,
                                                         (int64_t)alen, (flags &
                                                         XFS_BMAPI_RSVBLOCKS));
@@ -4744,8 +4748,12 @@ xfs_bmapi(
                  * Check if writing previously allocated but
                  * unwritten extents.
                  */
-               if (wr && mval->br_state == XFS_EXT_UNWRITTEN &&
-                   ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) {
+               if (wr &&
+                   ((mval->br_state == XFS_EXT_UNWRITTEN &&
+                     ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) ||
+                    (mval->br_state == XFS_EXT_NORM &&
+                     ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) ==
+                               (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) {
                         /*
                          * Modify (by adding) the state flag, if writing.
                          */
@@ -4757,7 +4765,9 @@ xfs_bmapi(
                                         *firstblock;
                                 cur->bc_private.b.flist = flist;
                         }
-                       mval->br_state = XFS_EXT_NORM;
+                       mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
+                                               ? XFS_EXT_NORM
+                                               : XFS_EXT_UNWRITTEN;
                         error = xfs_bmap_add_extent(ip, lastx, &cur, mval,
                                 firstblock, flist, &tmp_logflags,
                                 whichfork, (flags & XFS_BMAPI_RSVBLOCKS));
@@ -5200,7 +5210,7 @@ xfs_bunmapi(
                                         ip, -((long)del.br_blockcount), 0,
                                         XFS_QMOPT_RES_RTBLKS);
                         } else {
-                               xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
+                               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
                                                 (int64_t)del.br_blockcount, rsvd);
                                 (void)xfs_trans_reserve_quota_nblks(NULL,
                                         ip, -((long)del.br_blockcount), 0,
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h

index b13569a..71ec9b6 100644 (file)
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -74,9 +74,12 @@ typedef      struct xfs_bmap_free
  #define        XFS_BMAPI_IGSTATE       0x080   /* Ignore state - */
                                         /* combine contig. space */
  #define        XFS_BMAPI_CONTIG        0x100   /* must allocate only one extent */
-#define XFS_BMAPI_CONVERT      0x200   /* unwritten extent conversion - */
-                                       /* need write cache flushing and no */
-                                       /* additional allocation alignments */
+/*
+ * unwritten extent conversion - this needs write cache flushing and no additional
+ * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
+ * from written to unwritten, otherwise convert from unwritten to written.
+ */
+#define XFS_BMAPI_CONVERT      0x200
  
  #define XFS_BMAPI_FLAGS \
         { XFS_BMAPI_WRITE,      "WRITE" }, \
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c

index 829af92..04f9cca 100644 (file)
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -217,7 +217,7 @@ xfs_btree_del_cursor(
          */
         for (i = 0; i < cur->bc_nlevels; i++) {
                 if (cur->bc_bufs[i])
-                       xfs_btree_setbuf(cur, i, NULL);
+                       xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]);
                 else if (!error)
                         break;
         }
@@ -656,7 +656,7 @@ xfs_btree_reada_bufl(
  
         ASSERT(fsbno != NULLFSBLOCK);
         d = XFS_FSB_TO_DADDR(mp, fsbno);
-       xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
+       xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
  }
  
  /*
@@ -676,7 +676,7 @@ xfs_btree_reada_bufs(
         ASSERT(agno != NULLAGNUMBER);
         ASSERT(agbno != NULLAGBLOCK);
         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
-       xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
+       xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count);
  }
  
  STATIC int
@@ -763,22 +763,19 @@ xfs_btree_readahead(
   * Set the buffer for level "lev" in the cursor to bp, releasing
   * any previous buffer.
   */
-void
+STATIC void
  xfs_btree_setbuf(
         xfs_btree_cur_t         *cur,   /* btree cursor */
         int                     lev,    /* level in btree */
         xfs_buf_t               *bp)    /* new buffer to set */
  {
         struct xfs_btree_block  *b;     /* btree block */
-       xfs_buf_t               *obp;   /* old buffer pointer */
  
-       obp = cur->bc_bufs[lev];
-       if (obp)
-               xfs_trans_brelse(cur->bc_tp, obp);
+       if (cur->bc_bufs[lev])
+               xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]);
         cur->bc_bufs[lev] = bp;
         cur->bc_ra[lev] = 0;
-       if (!bp)
-               return;
+
         b = XFS_BUF_TO_BLOCK(bp);
         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
@@ -3011,6 +3008,43 @@ out0:
         return 0;
  }
  
+/*
+ * Kill the current root node, and replace it with it's only child node.
+ */
+STATIC int
+xfs_btree_kill_root(
+       struct xfs_btree_cur    *cur,
+       struct xfs_buf          *bp,
+       int                     level,
+       union xfs_btree_ptr     *newroot)
+{
+       int                     error;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
+       XFS_BTREE_STATS_INC(cur, killroot);
+
+       /*
+        * Update the root pointer, decreasing the level by 1 and then
+        * free the old root.
+        */
+       cur->bc_ops->set_root(cur, newroot, -1);
+
+       error = cur->bc_ops->free_block(cur, bp);
+       if (error) {
+               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+               return error;
+       }
+
+       XFS_BTREE_STATS_INC(cur, free);
+
+       cur->bc_bufs[level] = NULL;
+       cur->bc_ra[level] = 0;
+       cur->bc_nlevels--;
+
+       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
+       return 0;
+}
+
  STATIC int
  xfs_btree_dec_cursor(
         struct xfs_btree_cur    *cur,
@@ -3195,7 +3229,7 @@ xfs_btree_delrec(
                          * Make it the new root of the btree.
                          */
                         pp = xfs_btree_ptr_addr(cur, 1, block);
-                       error = cur->bc_ops->kill_root(cur, bp, level, pp);
+                       error = xfs_btree_kill_root(cur, bp, level, pp);
                         if (error)
                                 goto error0;
                 } else if (level > 0) {
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h

index 7fa0706..82fafc6 100644 (file)
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -152,9 +152,7 @@ struct xfs_btree_ops {
  
         /* update btree root pointer */
         void    (*set_root)(struct xfs_btree_cur *cur,
-                               union xfs_btree_ptr *nptr, int level_change);
-       int     (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp,
-                               int level, union xfs_btree_ptr *newroot);
+                           union xfs_btree_ptr *nptr, int level_change);
  
         /* block allocation / freeing */
         int     (*alloc_block)(struct xfs_btree_cur *cur,
@@ -399,16 +397,6 @@ xfs_btree_reada_bufs(
         xfs_agblock_t           agbno,  /* allocation group block number */
         xfs_extlen_t            count); /* count of filesystem blocks */
  
-/*
- * Set the buffer for level "lev" in the cursor to bp, releasing
- * any previous buffer.
- */
-void
-xfs_btree_setbuf(
-       xfs_btree_cur_t         *cur,   /* btree cursor */
-       int                     lev,    /* level in btree */
-       struct xfs_buf          *bp);   /* new buffer to set */
-
  
  /*
   * Common btree core entry points.
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index 1b09d7a..2686d0d 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -692,8 +692,7 @@ xfs_buf_item_init(
          * the first.  If we do already have one, there is
          * nothing to do here so return.
          */
-       if (bp->b_mount != mp)
-               bp->b_mount = mp;
+       ASSERT(bp->b_target->bt_mount == mp);
         if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
                 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
                 if (lip->li_type == XFS_LI_BUF) {
@@ -974,7 +973,7 @@ xfs_buf_iodone_callbacks(
                         xfs_buf_do_callbacks(bp, lip);
                         XFS_BUF_SET_FSPRIVATE(bp, NULL);
                         XFS_BUF_CLR_IODONE_FUNC(bp);
-                       xfs_biodone(bp);
+                       xfs_buf_ioend(bp, 0);
                         return;
                 }
  
@@ -1033,7 +1032,7 @@ xfs_buf_iodone_callbacks(
         xfs_buf_do_callbacks(bp, lip);
         XFS_BUF_SET_FSPRIVATE(bp, NULL);
         XFS_BUF_CLR_IODONE_FUNC(bp);
-       xfs_biodone(bp);
+       xfs_buf_ioend(bp, 0);
  }
  
  /*
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c

index 30fa0e2..1c00bed 100644 (file)
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2042,7 +2042,7 @@ xfs_da_do_buf(
                                 mappedbno, nmapped, 0, &bp);
                         break;
                 case 3:
-                       xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
+                       xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
                         error = 0;
                         bp = NULL;
                         break;
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h

index e5b153b..dffba9b 100644 (file)
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -49,8 +49,9 @@ typedef struct xfs_dinode {
         __be32          di_uid;         /* owner's user id */
         __be32          di_gid;         /* owner's group id */
         __be32          di_nlink;       /* number of links to file */
-       __be16          di_projid;      /* owner's project id */
-       __u8            di_pad[8];      /* unused, zeroed space */
+       __be16          di_projid_lo;   /* lower part of owner's project id */
+       __be16          di_projid_hi;   /* higher part owner's project id */
+       __u8            di_pad[6];      /* unused, zeroed space */
         __be16          di_flushiter;   /* incremented on flush */
         xfs_timestamp_t di_atime;       /* time last accessed */
         xfs_timestamp_t di_mtime;       /* time last modified */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c

index 504be86..ae89122 100644 (file)
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -961,7 +961,7 @@ xfs_dir2_leaf_getdents(
                                 if (i > ra_current &&
                                     map[ra_index].br_blockcount >=
                                     mp->m_dirblkfsbs) {
-                                       xfs_baread(mp->m_ddev_targp,
+                                       xfs_buf_readahead(mp->m_ddev_targp,
                                                 XFS_FSB_TO_DADDR(mp,
                                                    map[ra_index].br_startblock +
                                                    ra_offset),
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h

index 87c2e9d..8f6fc1a 100644 (file)
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -293,9 +293,11 @@ typedef struct xfs_bstat {
         __s32           bs_extsize;     /* extent size                  */
         __s32           bs_extents;     /* number of extents            */
         __u32           bs_gen;         /* generation count             */
-       __u16           bs_projid;      /* project id                   */
+       __u16           bs_projid_lo;   /* lower part of project id     */
+#define        bs_projid       bs_projid_lo    /* (previously just bs_projid)  */
         __u16           bs_forkoff;     /* inode fork offset in bytes   */
-       unsigned char   bs_pad[12];     /* pad space, unused            */
+       __u16           bs_projid_hi;   /* higher part of project id    */
+       unsigned char   bs_pad[10];     /* pad space, unused            */
         __u32           bs_dmevmask;    /* DMIG event mask              */
         __u16           bs_dmstate;     /* DMIG state info              */
         __u16           bs_aextents;    /* attribute number of extents  */
@@ -448,6 +450,7 @@ typedef struct xfs_handle {
  /*     XFS_IOC_SETBIOSIZE ---- deprecated 46      */
  /*     XFS_IOC_GETBIOSIZE ---- deprecated 47      */
  #define XFS_IOC_GETBMAPX       _IOWR('X', 56, struct getbmap)
+#define XFS_IOC_ZERO_RANGE     _IOW ('X', 57, struct xfs_flock64)
  
  /*
   * ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index 43b1d56..a7c116e 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -144,12 +144,11 @@ xfs_growfs_data_private(
         if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
                 return error;
         dpct = pct - mp->m_sb.sb_imax_pct;
-       error = xfs_read_buf(mp, mp->m_ddev_targp,
-                       XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
-                       XFS_FSS_TO_BB(mp, 1), 0, &bp);
-       if (error)
-               return error;
-       ASSERT(bp);
+       bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
+                               XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
+                               BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
+       if (!bp)
+               return EIO;
         xfs_buf_relse(bp);
  
         new = nb;       /* use new as a temporary here */
@@ -597,7 +596,8 @@ out:
                  * the extra reserve blocks from the reserve.....
                  */
                 int error;
-               error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0);
+               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                                fdblks_delta, 0);
                 if (error == ENOSPC)
                         goto retry;
         }
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c

index 5371d2d..0626a32 100644 (file)
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -212,7 +212,7 @@ xfs_ialloc_inode_init(
                  *      to log a whole cluster of inodes instead of all the
                  *      individual transactions causing a lot of log traffic.
                  */
-               xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
+               xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog);
                 for (i = 0; i < ninodes; i++) {
                         int     ioffset = i << mp->m_sb.sb_inodelog;
                         uint    isize = sizeof(struct xfs_dinode);
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c

index d352862..16921f5 100644 (file)
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -183,38 +183,6 @@ xfs_inobt_key_diff(
                           cur->bc_rec.i.ir_startino;
  }
  
-STATIC int
-xfs_inobt_kill_root(
-       struct xfs_btree_cur    *cur,
-       struct xfs_buf          *bp,
-       int                     level,
-       union xfs_btree_ptr     *newroot)
-{
-       int                     error;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
-       XFS_BTREE_STATS_INC(cur, killroot);
-
-       /*
-        * Update the root pointer, decreasing the level by 1 and then
-        * free the old root.
-        */
-       xfs_inobt_set_root(cur, newroot, -1);
-       error = xfs_inobt_free_block(cur, bp);
-       if (error) {
-               XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
-               return error;
-       }
-
-       XFS_BTREE_STATS_INC(cur, free);
-
-       cur->bc_bufs[level] = NULL;
-       cur->bc_nlevels--;
-
-       XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
-       return 0;
-}
-
  #ifdef DEBUG
  STATIC int
  xfs_inobt_keys_inorder(
@@ -309,7 +277,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
  
         .dup_cursor             = xfs_inobt_dup_cursor,
         .set_root               = xfs_inobt_set_root,
-       .kill_root              = xfs_inobt_kill_root,
         .alloc_block            = xfs_inobt_alloc_block,
         .free_block             = xfs_inobt_free_block,
         .get_minrecs            = xfs_inobt_get_minrecs,
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c

index b1ecc6f..0cdd269 100644 (file)
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -365,8 +365,8 @@ xfs_iget(
         xfs_perag_t     *pag;
         xfs_agino_t     agino;
  
-       /* the radix tree exists only in inode capable AGs */
-       if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi)
+       /* reject inode numbers outside existing AGs */
+       if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
                 return EINVAL;
  
         /* get the perag structure and ensure that it's inode capable */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 34798f3..108c7a0 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -660,7 +660,8 @@ xfs_dinode_from_disk(
         to->di_uid = be32_to_cpu(from->di_uid);
         to->di_gid = be32_to_cpu(from->di_gid);
         to->di_nlink = be32_to_cpu(from->di_nlink);
-       to->di_projid = be16_to_cpu(from->di_projid);
+       to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+       to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
         memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
         to->di_flushiter = be16_to_cpu(from->di_flushiter);
         to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
@@ -695,7 +696,8 @@ xfs_dinode_to_disk(
         to->di_uid = cpu_to_be32(from->di_uid);
         to->di_gid = cpu_to_be32(from->di_gid);
         to->di_nlink = cpu_to_be32(from->di_nlink);
-       to->di_projid = cpu_to_be16(from->di_projid);
+       to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+       to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
         memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
         to->di_flushiter = cpu_to_be16(from->di_flushiter);
         to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
@@ -874,7 +876,7 @@ xfs_iread(
         if (ip->i_d.di_version == 1) {
                 ip->i_d.di_nlink = ip->i_d.di_onlink;
                 ip->i_d.di_onlink = 0;
-               ip->i_d.di_projid = 0;
+               xfs_set_projid(ip, 0);
         }
  
         ip->i_delayed_blks = 0;
@@ -982,8 +984,7 @@ xfs_ialloc(
         mode_t          mode,
         xfs_nlink_t     nlink,
         xfs_dev_t       rdev,
-       cred_t          *cr,
-       xfs_prid_t      prid,
+       prid_t          prid,
         int             okalloc,
         xfs_buf_t       **ialloc_context,
         boolean_t       *call_again,
@@ -1027,7 +1028,7 @@ xfs_ialloc(
         ASSERT(ip->i_d.di_nlink == nlink);
         ip->i_d.di_uid = current_fsuid();
         ip->i_d.di_gid = current_fsgid();
-       ip->i_d.di_projid = prid;
+       xfs_set_projid(ip, prid);
         memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
  
         /*
@@ -2725,7 +2726,7 @@ cluster_corrupt_out:
                         XFS_BUF_UNDONE(bp);
                         XFS_BUF_STALE(bp);
                         XFS_BUF_ERROR(bp,EIO);
-                       xfs_biodone(bp);
+                       xfs_buf_ioend(bp, 0);
                 } else {
                         XFS_BUF_STALE(bp);
                         xfs_buf_relse(bp);
@@ -3008,7 +3009,7 @@ xfs_iflush_int(
                         memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
                         memset(&(dip->di_pad[0]), 0,
                               sizeof(dip->di_pad));
-                       ASSERT(ip->i_d.di_projid == 0);
+                       ASSERT(xfs_get_projid(ip) == 0);
                 }
         }
  
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index 0898c54..fac5229 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -134,8 +134,9 @@ typedef struct xfs_icdinode {
         __uint32_t      di_uid;         /* owner's user id */
         __uint32_t      di_gid;         /* owner's group id */
         __uint32_t      di_nlink;       /* number of links to file */
-       __uint16_t      di_projid;      /* owner's project id */
-       __uint8_t       di_pad[8];      /* unused, zeroed space */
+       __uint16_t      di_projid_lo;   /* lower part of owner's project id */
+       __uint16_t      di_projid_hi;   /* higher part of owner's project id */
+       __uint8_t       di_pad[6];      /* unused, zeroed space */
         __uint16_t      di_flushiter;   /* incremented on flush */
         xfs_ictimestamp_t di_atime;     /* time last accessed */
         xfs_ictimestamp_t di_mtime;     /* time last modified */
@@ -212,7 +213,6 @@ typedef struct xfs_icdinode {
  #ifdef __KERNEL__
  
  struct bhv_desc;
-struct cred;
  struct xfs_buf;
  struct xfs_bmap_free;
  struct xfs_bmbt_irec;
@@ -334,6 +334,25 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
         return ret;
  }
  
+/*
+ * Project quota id helpers (previously projid was 16bit only
+ * and using two 16bit values to hold new 32bit projid was choosen
+ * to retain compatibility with "old" filesystems).
+ */
+static inline prid_t
+xfs_get_projid(struct xfs_inode *ip)
+{
+       return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo;
+}
+
+static inline void
+xfs_set_projid(struct xfs_inode *ip,
+               prid_t projid)
+{
+       ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16);
+       ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
+}
+
  /*
   * Manage the i_flush queue embedded in the inode.  This completion
   * queue synchronizes processes attempting to flush the in-core
@@ -456,8 +475,8 @@ void                xfs_inode_free(struct xfs_inode *ip);
   * xfs_inode.c prototypes.
   */
  int            xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t,
-                          xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t,
-                          int, struct xfs_buf **, boolean_t *, xfs_inode_t **);
+                          xfs_nlink_t, xfs_dev_t, prid_t, int,
+                          struct xfs_buf **, boolean_t *, xfs_inode_t **);
  
  uint           xfs_ip2xflags(struct xfs_inode *);
  uint           xfs_dic2xflags(struct xfs_dinode *);
@@ -471,7 +490,6 @@ int         xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
  void           xfs_iext_realloc(xfs_inode_t *, int, int);
  void           xfs_iunpin_wait(xfs_inode_t *);
  int            xfs_iflush(xfs_inode_t *, uint);
-void           xfs_ichgtime(xfs_inode_t *, int);
  void           xfs_lock_inodes(xfs_inode_t **, int, uint);
  void           xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
  
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index fe00777..c7ac020 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -222,15 +222,6 @@ xfs_inode_item_format(
         vecp++;
         nvecs        = 1;
  
-       /*
-        * Make sure the linux inode is dirty. We do this before
-        * clearing i_update_core as the VFS will call back into
-        * XFS here and set i_update_core, so we need to dirty the
-        * inode first so that the ordering of i_update_core and
-        * unlogged modifications still works as described below.
-        */
-       xfs_mark_inode_dirty_sync(ip);
-
         /*
          * Clear i_update_core if the timestamps (or any other
          * non-transactional modification) need flushing/logging
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c

index 7e3626e..dc1882a 100644 (file)
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -92,7 +92,8 @@ xfs_bulkstat_one_int(
          * further change.
          */
         buf->bs_nlink = dic->di_nlink;
-       buf->bs_projid = dic->di_projid;
+       buf->bs_projid_lo = dic->di_projid_lo;
+       buf->bs_projid_hi = dic->di_projid_hi;
         buf->bs_ino = ino;
         buf->bs_mode = dic->di_mode;
         buf->bs_uid = dic->di_uid;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index ba8e36e..cee4ab9 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1118,7 +1118,8 @@ xlog_alloc_log(xfs_mount_t        *mp,
                 iclog->ic_prev = prev_iclog;
                 prev_iclog = iclog;
  
-               bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp);
+               bp = xfs_buf_get_uncached(mp->m_logdev_targp,
+                                               log->l_iclog_size, 0);
                 if (!bp)
                         goto out_free_iclog;
                 if (!XFS_BUF_CPSEMA(bp))
@@ -1296,7 +1297,7 @@ xlog_bdstrat(
         if (iclog->ic_state & XLOG_STATE_IOERROR) {
                 XFS_BUF_ERROR(bp, EIO);
                 XFS_BUF_STALE(bp);
-               xfs_biodone(bp);
+               xfs_buf_ioend(bp, 0);
                 /*
                  * It would seem logical to return EIO here, but we rely on
                  * the log state machine to propagate I/O errors instead of
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 7e206fc..23d6ceb 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -145,102 +145,6 @@ xlog_cil_init_post_recovery(
                                                                 log->l_curr_block);
  }
  
-/*
- * Insert the log item into the CIL and calculate the difference in space
- * consumed by the item. Add the space to the checkpoint ticket and calculate
- * if the change requires additional log metadata. If it does, take that space
- * as well. Remove the amount of space we addded to the checkpoint ticket from
- * the current transaction ticket so that the accounting works out correctly.
- *
- * If this is the first time the item is being placed into the CIL in this
- * context, pin it so it can't be written to disk until the CIL is flushed to
- * the iclog and the iclog written to disk.
- */
-static void
-xlog_cil_insert(
-       struct log              *log,
-       struct xlog_ticket      *ticket,
-       struct xfs_log_item     *item,
-       struct xfs_log_vec      *lv)
-{
-       struct xfs_cil          *cil = log->l_cilp;
-       struct xfs_log_vec      *old = lv->lv_item->li_lv;
-       struct xfs_cil_ctx      *ctx = cil->xc_ctx;
-       int                     len;
-       int                     diff_iovecs;
-       int                     iclog_space;
-
-       if (old) {
-               /* existing lv on log item, space used is a delta */
-               ASSERT(!list_empty(&item->li_cil));
-               ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
-
-               len = lv->lv_buf_len - old->lv_buf_len;
-               diff_iovecs = lv->lv_niovecs - old->lv_niovecs;
-               kmem_free(old->lv_buf);
-               kmem_free(old);
-       } else {
-               /* new lv, must pin the log item */
-               ASSERT(!lv->lv_item->li_lv);
-               ASSERT(list_empty(&item->li_cil));
-
-               len = lv->lv_buf_len;
-               diff_iovecs = lv->lv_niovecs;
-               IOP_PIN(lv->lv_item);
-
-       }
-       len += diff_iovecs * sizeof(xlog_op_header_t);
-
-       /* attach new log vector to log item */
-       lv->lv_item->li_lv = lv;
-
-       spin_lock(&cil->xc_cil_lock);
-       list_move_tail(&item->li_cil, &cil->xc_cil);
-       ctx->nvecs += diff_iovecs;
-
-       /*
-        * If this is the first time the item is being committed to the CIL,
-        * store the sequence number on the log item so we can tell
-        * in future commits whether this is the first checkpoint the item is
-        * being committed into.
-        */
-       if (!item->li_seq)
-               item->li_seq = ctx->sequence;
-
-       /*
-        * Now transfer enough transaction reservation to the context ticket
-        * for the checkpoint. The context ticket is special - the unit
-        * reservation has to grow as well as the current reservation as we
-        * steal from tickets so we can correctly determine the space used
-        * during the transaction commit.
-        */
-       if (ctx->ticket->t_curr_res == 0) {
-               /* first commit in checkpoint, steal the header reservation */
-               ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
-               ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
-               ticket->t_curr_res -= ctx->ticket->t_unit_res;
-       }
-
-       /* do we need space for more log record headers? */
-       iclog_space = log->l_iclog_size - log->l_iclog_hsize;
-       if (len > 0 && (ctx->space_used / iclog_space !=
-                               (ctx->space_used + len) / iclog_space)) {
-               int hdrs;
-
-               hdrs = (len + iclog_space - 1) / iclog_space;
-               /* need to take into account split region headers, too */
-               hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
-               ctx->ticket->t_unit_res += hdrs;
-               ctx->ticket->t_curr_res += hdrs;
-               ticket->t_curr_res -= hdrs;
-               ASSERT(ticket->t_curr_res >= len);
-       }
-       ticket->t_curr_res -= len;
-       ctx->space_used += len;
-
-       spin_unlock(&cil->xc_cil_lock);
-}
-
  /*
   * Format log item into a flat buffers
   *
@@ -286,7 +190,7 @@ xlog_cil_format_items(
                         len += lv->lv_iovecp[index].i_len;
  
                 lv->lv_buf_len = len;
-               lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
+               lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
                 ptr = lv->lv_buf;
  
                 for (index = 0; index < lv->lv_niovecs; index++) {
@@ -300,21 +204,136 @@ xlog_cil_format_items(
         }
  }
  
+/*
+ * Prepare the log item for insertion into the CIL. Calculate the difference in
+ * log space and vectors it will consume, and if it is a new item pin it as
+ * well.
+ */
+STATIC void
+xfs_cil_prepare_item(
+       struct log              *log,
+       struct xfs_log_vec      *lv,
+       int                     *len,
+       int                     *diff_iovecs)
+{
+       struct xfs_log_vec      *old = lv->lv_item->li_lv;
+
+       if (old) {
+               /* existing lv on log item, space used is a delta */
+               ASSERT(!list_empty(&lv->lv_item->li_cil));
+               ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
+
+               *len += lv->lv_buf_len - old->lv_buf_len;
+               *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
+               kmem_free(old->lv_buf);
+               kmem_free(old);
+       } else {
+               /* new lv, must pin the log item */
+               ASSERT(!lv->lv_item->li_lv);
+               ASSERT(list_empty(&lv->lv_item->li_cil));
+
+               *len += lv->lv_buf_len;
+               *diff_iovecs += lv->lv_niovecs;
+               IOP_PIN(lv->lv_item);
+
+       }
+
+       /* attach new log vector to log item */
+       lv->lv_item->li_lv = lv;
+
+       /*
+        * If this is the first time the item is being committed to the
+        * CIL, store the sequence number on the log item so we can
+        * tell in future commits whether this is the first checkpoint
+        * the item is being committed into.
+        */
+       if (!lv->lv_item->li_seq)
+               lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+
+/*
+ * Insert the log items into the CIL and calculate the difference in space
+ * consumed by the item. Add the space to the checkpoint ticket and calculate
+ * if the change requires additional log metadata. If it does, take that space
+ * as well. Remove the amount of space we addded to the checkpoint ticket from
+ * the current transaction ticket so that the accounting works out correctly.
+ */
  static void
  xlog_cil_insert_items(
         struct log              *log,
         struct xfs_log_vec      *log_vector,
-       struct xlog_ticket      *ticket,
-       xfs_lsn_t               *start_lsn)
+       struct xlog_ticket      *ticket)
  {
-       struct xfs_log_vec *lv;
-
-       if (start_lsn)
-               *start_lsn = log->l_cilp->xc_ctx->sequence;
+       struct xfs_cil          *cil = log->l_cilp;
+       struct xfs_cil_ctx      *ctx = cil->xc_ctx;
+       struct xfs_log_vec      *lv;
+       int                     len = 0;
+       int                     diff_iovecs = 0;
+       int                     iclog_space;
  
         ASSERT(log_vector);
+
+       /*
+        * Do all the accounting aggregation and switching of log vectors
+        * around in a separate loop to the insertion of items into the CIL.
+        * Then we can do a separate loop to update the CIL within a single
+        * lock/unlock pair. This reduces the number of round trips on the CIL
+        * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
+        * hold time for the transaction commit.
+        *
+        * If this is the first time the item is being placed into the CIL in
+        * this context, pin it so it can't be written to disk until the CIL is
+        * flushed to the iclog and the iclog written to disk.
+        *
+        * We can do this safely because the context can't checkpoint until we
+        * are done so it doesn't matter exactly how we update the CIL.
+        */
+       for (lv = log_vector; lv; lv = lv->lv_next)
+               xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
+
+       /* account for space used by new iovec headers  */
+       len += diff_iovecs * sizeof(xlog_op_header_t);
+
+       spin_lock(&cil->xc_cil_lock);
+
+       /* move the items to the tail of the CIL */
         for (lv = log_vector; lv; lv = lv->lv_next)
-               xlog_cil_insert(log, ticket, lv->lv_item, lv);
+               list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+
+       ctx->nvecs += diff_iovecs;
+
+       /*
+        * Now transfer enough transaction reservation to the context ticket
+        * for the checkpoint. The context ticket is special - the unit
+        * reservation has to grow as well as the current reservation as we
+        * steal from tickets so we can correctly determine the space used
+        * during the transaction commit.
+        */
+       if (ctx->ticket->t_curr_res == 0) {
+               /* first commit in checkpoint, steal the header reservation */
+               ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
+               ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
+               ticket->t_curr_res -= ctx->ticket->t_unit_res;
+       }
+
+       /* do we need space for more log record headers? */
+       iclog_space = log->l_iclog_size - log->l_iclog_hsize;
+       if (len > 0 && (ctx->space_used / iclog_space !=
+                               (ctx->space_used + len) / iclog_space)) {
+               int hdrs;
+
+               hdrs = (len + iclog_space - 1) / iclog_space;
+               /* need to take into account split region headers, too */
+               hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
+               ctx->ticket->t_unit_res += hdrs;
+               ctx->ticket->t_curr_res += hdrs;
+               ticket->t_curr_res -= hdrs;
+               ASSERT(ticket->t_curr_res >= len);
+       }
+       ticket->t_curr_res -= len;
+       ctx->space_used += len;
+
+       spin_unlock(&cil->xc_cil_lock);
  }
  
  static void
@@ -638,7 +657,10 @@ xfs_log_commit_cil(
  
         /* lock out background commit */
         down_read(&log->l_cilp->xc_ctx_lock);
-       xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
+       if (commit_lsn)
+               *commit_lsn = log->l_cilp->xc_ctx->sequence;
+
+       xlog_cil_insert_items(log, log_vector, tp->t_ticket);
  
         /* check we didn't blow the reservation */
         if (tp->t_ticket->t_curr_res < 0)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 6f3f5fa..966d3f9 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -107,7 +107,8 @@ xlog_get_bp(
                 nbblks += log->l_sectBBsize;
         nbblks = round_up(nbblks, log->l_sectBBsize);
  
-       return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp);
+       return xfs_buf_get_uncached(log->l_mp->m_logdev_targp,
+                                       BBTOB(nbblks), 0);
  }
  
  STATIC void
@@ -167,7 +168,7 @@ xlog_bread_noalign(
         XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
  
         xfsbdstrat(log->l_mp, bp);
-       error = xfs_iowait(bp);
+       error = xfs_buf_iowait(bp);
         if (error)
                 xfs_ioerror_alert("xlog_bread", log->l_mp,
                                   bp, XFS_BUF_ADDR(bp));
@@ -321,12 +322,13 @@ xlog_recover_iodone(
                  * this during recovery. One strike!
                  */
                 xfs_ioerror_alert("xlog_recover_iodone",
-                                 bp->b_mount, bp, XFS_BUF_ADDR(bp));
-               xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
+                                       bp->b_target->bt_mount, bp,
+                                       XFS_BUF_ADDR(bp));
+               xfs_force_shutdown(bp->b_target->bt_mount,
+                                       SHUTDOWN_META_IO_ERROR);
         }
-       bp->b_mount = NULL;
         XFS_BUF_CLR_IODONE_FUNC(bp);
-       xfs_biodone(bp);
+       xfs_buf_ioend(bp, 0);
  }
  
  /*
@@ -2275,8 +2277,7 @@ xlog_recover_do_buffer_trans(
                 XFS_BUF_STALE(bp);
                 error = xfs_bwrite(mp, bp);
         } else {
-               ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
-               bp->b_mount = mp;
+               ASSERT(bp->b_target->bt_mount == mp);
                 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
                 xfs_bdwrite(mp, bp);
         }
@@ -2540,8 +2541,7 @@ xlog_recover_do_inode_trans(
         }
  
  write_inode_buffer:
-       ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
-       bp->b_mount = mp;
+       ASSERT(bp->b_target->bt_mount == mp);
         XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
         xfs_bdwrite(mp, bp);
  error:
@@ -2678,8 +2678,7 @@ xlog_recover_do_dquot_trans(
         memcpy(ddq, recddq, item->ri_buf[1].i_len);
  
         ASSERT(dq_f->qlf_size == 2);
-       ASSERT(bp->b_mount == NULL || bp->b_mount == mp);
-       bp->b_mount = mp;
+       ASSERT(bp->b_target->bt_mount == mp);
         XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
         xfs_bdwrite(mp, bp);
  
@@ -3817,7 +3816,7 @@ xlog_do_recover(
         XFS_BUF_READ(bp);
         XFS_BUF_UNASYNC(bp);
         xfsbdstrat(log->l_mp, bp);
-       error = xfs_iowait(bp);
+       error = xfs_buf_iowait(bp);
         if (error) {
                 xfs_ioerror_alert("xlog_do_recover",
                                   log->l_mp, bp, XFS_BUF_ADDR(bp));
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c

index aeb9d72..b1498ab 100644 (file)
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -52,16 +52,11 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
                                                 int);
  STATIC void    xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
                                                 int);
-STATIC int     xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
-                                               int64_t, int);
  STATIC void    xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
-
  #else
  
  #define xfs_icsb_balance_counter(mp, a, b)             do { } while (0)
  #define xfs_icsb_balance_counter_locked(mp, a, b)      do { } while (0)
-#define xfs_icsb_modify_counters(mp, a, b, c)          do { } while (0)
-
  #endif
  
  static const struct {
@@ -199,6 +194,8 @@ xfs_uuid_unmount(
  
  /*
   * Reference counting access wrappers to the perag structures.
+ * Because we never free per-ag structures, the only thing we
+ * have to protect against changes is the tree structure itself.
   */
  struct xfs_perag *
  xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
@@ -206,19 +203,43 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
         struct xfs_perag        *pag;
         int                     ref = 0;
  
-       spin_lock(&mp->m_perag_lock);
+       rcu_read_lock();
         pag = radix_tree_lookup(&mp->m_perag_tree, agno);
         if (pag) {
                 ASSERT(atomic_read(&pag->pag_ref) >= 0);
-               /* catch leaks in the positive direction during testing */
-               ASSERT(atomic_read(&pag->pag_ref) < 1000);
                 ref = atomic_inc_return(&pag->pag_ref);
         }
-       spin_unlock(&mp->m_perag_lock);
+       rcu_read_unlock();
         trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
         return pag;
  }
  
+/*
+ * search from @first to find the next perag with the given tag set.
+ */
+struct xfs_perag *
+xfs_perag_get_tag(
+       struct xfs_mount        *mp,
+       xfs_agnumber_t          first,
+       int                     tag)
+{
+       struct xfs_perag        *pag;
+       int                     found;
+       int                     ref;
+
+       rcu_read_lock();
+       found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
+                                       (void **)&pag, first, 1, tag);
+       if (found <= 0) {
+               rcu_read_unlock();
+               return NULL;
+       }
+       ref = atomic_inc_return(&pag->pag_ref);
+       rcu_read_unlock();
+       trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
+       return pag;
+}
+
  void
  xfs_perag_put(struct xfs_perag *pag)
  {
@@ -229,10 +250,18 @@ xfs_perag_put(struct xfs_perag *pag)
         trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
  }
  
+STATIC void
+__xfs_free_perag(
+       struct rcu_head *head)
+{
+       struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
+
+       ASSERT(atomic_read(&pag->pag_ref) == 0);
+       kmem_free(pag);
+}
+
  /*
- * Free up the resources associated with a mount structure.  Assume that
- * the structure was initially zeroed, so we can tell which fields got
- * initialized.
+ * Free up the per-ag resources associated with the mount structure.
   */
  STATIC void
  xfs_free_perag(
@@ -244,10 +273,9 @@ xfs_free_perag(
         for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
                 spin_lock(&mp->m_perag_lock);
                 pag = radix_tree_delete(&mp->m_perag_tree, agno);
-               ASSERT(pag);
-               ASSERT(atomic_read(&pag->pag_ref) == 0);
                 spin_unlock(&mp->m_perag_lock);
-               kmem_free(pag);
+               ASSERT(pag);
+               call_rcu(&pag->rcu_head, __xfs_free_perag);
         }
  }
  
@@ -444,7 +472,10 @@ xfs_initialize_perag(
                 pag->pag_agno = index;
                 pag->pag_mount = mp;
                 rwlock_init(&pag->pag_ici_lock);
+               mutex_init(&pag->pag_ici_reclaim_lock);
                 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
+               spin_lock_init(&pag->pag_buf_lock);
+               pag->pag_buf_tree = RB_ROOT;
  
                 if (radix_tree_preload(GFP_NOFS))
                         goto out_unwind;
@@ -639,7 +670,6 @@ int
  xfs_readsb(xfs_mount_t *mp, int flags)
  {
         unsigned int    sector_size;
-       unsigned int    extra_flags;
         xfs_buf_t       *bp;
         int             error;
  
@@ -652,28 +682,24 @@ xfs_readsb(xfs_mount_t *mp, int flags)
          * access to the superblock.
          */
         sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
-       extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
  
-       bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
-                         extra_flags);
-       if (!bp || XFS_BUF_ISERROR(bp)) {
-               xfs_fs_mount_cmn_err(flags, "SB read failed");
-               error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
-               goto fail;
+reread:
+       bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
+                                       XFS_SB_DADDR, sector_size, 0);
+       if (!bp) {
+               xfs_fs_mount_cmn_err(flags, "SB buffer read failed");
+               return EIO;
         }
-       ASSERT(XFS_BUF_ISBUSY(bp));
-       ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
  
         /*
          * Initialize the mount structure from the superblock.
          * But first do some basic consistency checking.
          */
         xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp));
-
         error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags);
         if (error) {
                 xfs_fs_mount_cmn_err(flags, "SB validate failed");
-               goto fail;
+               goto release_buf;
         }
  
         /*
@@ -684,7 +710,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
                         "device supports only %u byte sectors (not %u)",
                         sector_size, mp->m_sb.sb_sectsize);
                 error = ENOSYS;
-               goto fail;
+               goto release_buf;
         }
  
         /*
@@ -692,33 +718,20 @@ xfs_readsb(xfs_mount_t *mp, int flags)
          * re-read the superblock so the buffer is correctly sized.
          */
         if (sector_size < mp->m_sb.sb_sectsize) {
-               XFS_BUF_UNMANAGE(bp);
                 xfs_buf_relse(bp);
                 sector_size = mp->m_sb.sb_sectsize;
-               bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR,
-                                 BTOBB(sector_size), extra_flags);
-               if (!bp || XFS_BUF_ISERROR(bp)) {
-                       xfs_fs_mount_cmn_err(flags, "SB re-read failed");
-                       error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM;
-                       goto fail;
-               }
-               ASSERT(XFS_BUF_ISBUSY(bp));
-               ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+               goto reread;
         }
  
         /* Initialize per-cpu counters */
         xfs_icsb_reinit_counters(mp);
  
         mp->m_sb_bp = bp;
-       xfs_buf_relse(bp);
-       ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
+       xfs_buf_unlock(bp);
         return 0;
  
- fail:
-       if (bp) {
-               XFS_BUF_UNMANAGE(bp);
-               xfs_buf_relse(bp);
-       }
+release_buf:
+       xfs_buf_relse(bp);
         return error;
  }
  
@@ -991,42 +1004,35 @@ xfs_check_sizes(xfs_mount_t *mp)
  {
         xfs_buf_t       *bp;
         xfs_daddr_t     d;
-       int             error;
  
         d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
         if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) {
-               cmn_err(CE_WARN, "XFS: size check 1 failed");
+               cmn_err(CE_WARN, "XFS: filesystem size mismatch detected");
                 return XFS_ERROR(EFBIG);
         }
-       error = xfs_read_buf(mp, mp->m_ddev_targp,
-                            d - XFS_FSS_TO_BB(mp, 1),
-                            XFS_FSS_TO_BB(mp, 1), 0, &bp);
-       if (!error) {
-               xfs_buf_relse(bp);
-       } else {
-               cmn_err(CE_WARN, "XFS: size check 2 failed");
-               if (error == ENOSPC)
-                       error = XFS_ERROR(EFBIG);
-               return error;
+       bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp,
+                                       d - XFS_FSS_TO_BB(mp, 1),
+                                       BBTOB(XFS_FSS_TO_BB(mp, 1)), 0);
+       if (!bp) {
+               cmn_err(CE_WARN, "XFS: last sector read failed");
+               return EIO;
         }
+       xfs_buf_relse(bp);
  
         if (mp->m_logdev_targp != mp->m_ddev_targp) {
                 d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
                 if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) {
-                       cmn_err(CE_WARN, "XFS: size check 3 failed");
+                       cmn_err(CE_WARN, "XFS: log size mismatch detected");
                         return XFS_ERROR(EFBIG);
                 }
-               error = xfs_read_buf(mp, mp->m_logdev_targp,
-                                    d - XFS_FSB_TO_BB(mp, 1),
-                                    XFS_FSB_TO_BB(mp, 1), 0, &bp);
-               if (!error) {
-                       xfs_buf_relse(bp);
-               } else {
-                       cmn_err(CE_WARN, "XFS: size check 3 failed");
-                       if (error == ENOSPC)
-                               error = XFS_ERROR(EFBIG);
-                       return error;
+               bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp,
+                                       d - XFS_FSB_TO_BB(mp, 1),
+                                       XFS_FSB_TO_B(mp, 1), 0);
+               if (!bp) {
+                       cmn_err(CE_WARN, "XFS: log device read failed");
+                       return EIO;
                 }
+               xfs_buf_relse(bp);
         }
         return 0;
  }
@@ -1601,7 +1607,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
                 XFS_BUF_UNASYNC(sbp);
                 ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
                 xfsbdstrat(mp, sbp);
-               error = xfs_iowait(sbp);
+               error = xfs_buf_iowait(sbp);
                 if (error)
                         xfs_ioerror_alert("xfs_unmountfs_writesb",
                                           mp, sbp, XFS_BUF_ADDR(sbp));
@@ -1832,135 +1838,72 @@ xfs_mod_incore_sb_unlocked(
   */
  int
  xfs_mod_incore_sb(
-       xfs_mount_t     *mp,
-       xfs_sb_field_t  field,
-       int64_t         delta,
-       int             rsvd)
+       struct xfs_mount        *mp,
+       xfs_sb_field_t          field,
+       int64_t                 delta,
+       int                     rsvd)
  {
-       int     status;
+       int                     status;
  
-       /* check for per-cpu counters */
-       switch (field) {
  #ifdef HAVE_PERCPU_SB
-       case XFS_SBS_ICOUNT:
-       case XFS_SBS_IFREE:
-       case XFS_SBS_FDBLOCKS:
-               if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                       status = xfs_icsb_modify_counters(mp, field,
-                                                       delta, rsvd);
-                       break;
-               }
-               /* FALLTHROUGH */
+       ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
  #endif
-       default:
-               spin_lock(&mp->m_sb_lock);
-               status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-               spin_unlock(&mp->m_sb_lock);
-               break;
-       }
+       spin_lock(&mp->m_sb_lock);
+       status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+       spin_unlock(&mp->m_sb_lock);
  
         return status;
  }
  
  /*
- * xfs_mod_incore_sb_batch() is used to change more than one field
- * in the in-core superblock structure at a time.  This modification
- * is protected by a lock internal to this module.  The fields and
- * changes to those fields are specified in the array of xfs_mod_sb
- * structures passed in.
+ * Change more than one field in the in-core superblock structure at a time.
   *
- * Either all of the specified deltas will be applied or none of
- * them will.  If any modified field dips below 0, then all modifications
- * will be backed out and EINVAL will be returned.
+ * The fields and changes to those fields are specified in the array of
+ * xfs_mod_sb structures passed in.  Either all of the specified deltas
+ * will be applied or none of them will.  If any modified field dips below 0,
+ * then all modifications will be backed out and EINVAL will be returned.
+ *
+ * Note that this function may not be used for the superblock values that
+ * are tracked with the in-memory per-cpu counters - a direct call to
+ * xfs_icsb_modify_counters is required for these.
   */
  int
-xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
+xfs_mod_incore_sb_batch(
+       struct xfs_mount        *mp,
+       xfs_mod_sb_t            *msb,
+       uint                    nmsb,
+       int                     rsvd)
  {
-       int             status=0;
-       xfs_mod_sb_t    *msbp;
+       xfs_mod_sb_t            *msbp = &msb[0];
+       int                     error = 0;
  
         /*
-        * Loop through the array of mod structures and apply each
-        * individually.  If any fail, then back out all those
-        * which have already been applied.  Do all of this within
-        * the scope of the m_sb_lock so that all of the changes will
-        * be atomic.
+        * Loop through the array of mod structures and apply each individually.
+        * If any fail, then back out all those which have already been applied.
+        * Do all of this within the scope of the m_sb_lock so that all of the
+        * changes will be atomic.
          */
         spin_lock(&mp->m_sb_lock);
-       msbp = &msb[0];
         for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) {
-               /*
-                * Apply the delta at index n.  If it fails, break
-                * from the loop so we'll fall into the undo loop
-                * below.
-                */
-               switch (msbp->msb_field) {
-#ifdef HAVE_PERCPU_SB
-               case XFS_SBS_ICOUNT:
-               case XFS_SBS_IFREE:
-               case XFS_SBS_FDBLOCKS:
-                       if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                               spin_unlock(&mp->m_sb_lock);
-                               status = xfs_icsb_modify_counters(mp,
-                                                       msbp->msb_field,
-                                                       msbp->msb_delta, rsvd);
-                               spin_lock(&mp->m_sb_lock);
-                               break;
-                       }
-                       /* FALLTHROUGH */
-#endif
-               default:
-                       status = xfs_mod_incore_sb_unlocked(mp,
-                                               msbp->msb_field,
-                                               msbp->msb_delta, rsvd);
-                       break;
-               }
+               ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
+                      msbp->msb_field > XFS_SBS_FDBLOCKS);
  
-               if (status != 0) {
-                       break;
-               }
+               error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
+                                                  msbp->msb_delta, rsvd);
+               if (error)
+                       goto unwind;
         }
+       spin_unlock(&mp->m_sb_lock);
+       return 0;
  
-       /*
-        * If we didn't complete the loop above, then back out
-        * any changes made to the superblock.  If you add code
-        * between the loop above and here, make sure that you
-        * preserve the value of status. Loop back until
-        * we step below the beginning of the array.  Make sure
-        * we don't touch anything back there.
-        */
-       if (status != 0) {
-               msbp--;
-               while (msbp >= msb) {
-                       switch (msbp->msb_field) {
-#ifdef HAVE_PERCPU_SB
-                       case XFS_SBS_ICOUNT:
-                       case XFS_SBS_IFREE:
-                       case XFS_SBS_FDBLOCKS:
-                               if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
-                                       spin_unlock(&mp->m_sb_lock);
-                                       status = xfs_icsb_modify_counters(mp,
-                                                       msbp->msb_field,
-                                                       -(msbp->msb_delta),
-                                                       rsvd);
-                                       spin_lock(&mp->m_sb_lock);
-                                       break;
-                               }
-                               /* FALLTHROUGH */
-#endif
-                       default:
-                               status = xfs_mod_incore_sb_unlocked(mp,
-                                                       msbp->msb_field,
-                                                       -(msbp->msb_delta),
-                                                       rsvd);
-                               break;
-                       }
-                       ASSERT(status == 0);
-                       msbp--;
-               }
+unwind:
+       while (--msbp >= msb) {
+               error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
+                                                  -msbp->msb_delta, rsvd);
+               ASSERT(error == 0);
         }
         spin_unlock(&mp->m_sb_lock);
-       return status;
+       return error;
  }
  
  /*
@@ -1998,18 +1941,13 @@ xfs_getsb(
   */
  void
  xfs_freesb(
-       xfs_mount_t     *mp)
+       struct xfs_mount        *mp)
  {
-       xfs_buf_t       *bp;
+       struct xfs_buf          *bp = mp->m_sb_bp;
  
-       /*
-        * Use xfs_getsb() so that the buffer will be locked
-        * when we call xfs_buf_relse().
-        */
-       bp = xfs_getsb(mp, 0);
-       XFS_BUF_UNMANAGE(bp);
-       xfs_buf_relse(bp);
+       xfs_buf_lock(bp);
         mp->m_sb_bp = NULL;
+       xfs_buf_relse(bp);
  }
  
  /*
@@ -2496,7 +2434,7 @@ xfs_icsb_balance_counter(
         spin_unlock(&mp->m_sb_lock);
  }
  
-STATIC int
+int
  xfs_icsb_modify_counters(
         xfs_mount_t     *mp,
         xfs_sb_field_t  field,
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index 622da21..5861b49 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -53,7 +53,6 @@ typedef struct xfs_trans_reservations {
  
  #include "xfs_sync.h"
  
-struct cred;
  struct log;
  struct xfs_mount_args;
  struct xfs_inode;
@@ -91,6 +90,8 @@ extern void   xfs_icsb_reinit_counters(struct xfs_mount *);
  extern void    xfs_icsb_destroy_counters(struct xfs_mount *);
  extern void    xfs_icsb_sync_counters(struct xfs_mount *, int);
  extern void    xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
+extern int     xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
+                                               int64_t, int);
  
  #else
  #define xfs_icsb_init_counters(mp)             (0)
@@ -98,6 +99,8 @@ extern void   xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
  #define xfs_icsb_reinit_counters(mp)           do { } while (0)
  #define xfs_icsb_sync_counters(mp, flags)      do { } while (0)
  #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
+#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
+       xfs_mod_incore_sb(mp, field, delta, rsvd)
  #endif
  
  typedef struct xfs_mount {
@@ -232,8 +235,6 @@ typedef struct xfs_mount {
  #define XFS_MOUNT_DIRSYNC      (1ULL << 21)    /* synchronous directory ops */
  #define XFS_MOUNT_COMPAT_IOSIZE        (1ULL << 22)    /* don't report large preferred
                                                  * I/O size in stat() */
-#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23)    /* don't use per-cpu superblock
-                                                  counters */
  #define XFS_MOUNT_FILESTREAMS  (1ULL << 24)    /* enable the filestreams
                                                    allocator */
  #define XFS_MOUNT_NOATTR2      (1ULL << 25)    /* disable use of attr2 format */
@@ -327,6 +328,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
   * perag get/put wrappers for ref counting
   */
  struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
+struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
+                                       int tag);
  void   xfs_perag_put(struct xfs_perag *pag);
  
  /*
diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h

deleted file mode 100644 (file)

index 2dec79e..0000000
--- a/fs/xfs/xfs_refcache.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-#ifndef __XFS_REFCACHE_H__
-#define __XFS_REFCACHE_H__
-
-#ifdef HAVE_REFCACHE
-/*
- * Maximum size (in inodes) for the NFS reference cache
- */
-#define XFS_REFCACHE_SIZE_MAX  512
-
-struct xfs_inode;
-struct xfs_mount;
-
-extern void xfs_refcache_insert(struct xfs_inode *);
-extern void xfs_refcache_purge_ip(struct xfs_inode *);
-extern void xfs_refcache_purge_mp(struct xfs_mount *);
-extern void xfs_refcache_purge_some(struct xfs_mount *);
-extern void xfs_refcache_resize(int);
-extern void xfs_refcache_destroy(void);
-
-extern void xfs_refcache_iunlock(struct xfs_inode *, uint);
-
-#else
-
-#define xfs_refcache_insert(ip)                do { } while (0)
-#define xfs_refcache_purge_ip(ip)      do { } while (0)
-#define xfs_refcache_purge_mp(mp)      do { } while (0)
-#define xfs_refcache_purge_some(mp)    do { } while (0)
-#define xfs_refcache_resize(size)      do { } while (0)
-#define xfs_refcache_destroy()         do { } while (0)
-
-#define xfs_refcache_iunlock(ip, flags)        xfs_iunlock(ip, flags)
-
-#endif
-
-#endif /* __XFS_REFCACHE_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c

index 8fca957..d2af0a8 100644 (file)
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -183,7 +183,7 @@ xfs_rename(
          * tree quota mechanism would be circumvented.
          */
         if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-                    (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
+                    (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
                 error = XFS_ERROR(EXDEV);
                 goto error_return;
         }
@@ -211,7 +211,9 @@ xfs_rename(
                         goto error_return;
                 if (error)
                         goto abort_return;
-               xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+               xfs_trans_ichgtime(tp, target_dp,
+                                       XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
  
                 if (new_parent && src_is_directory) {
                         error = xfs_bumplink(tp, target_dp);
@@ -249,7 +251,9 @@ xfs_rename(
                                         &first_block, &free_list, spaceres);
                 if (error)
                         goto abort_return;
-               xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+               xfs_trans_ichgtime(tp, target_dp,
+                                       XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
  
                 /*
                  * Decrement the link count on the target since the target
@@ -292,7 +296,7 @@ xfs_rename(
          * inode isn't really being changed, but old unix file systems did
          * it and some incremental backup programs won't work without it.
          */
-       xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
  
         /*
          * Adjust the link count on src_dp.  This is necessary when
@@ -315,7 +319,7 @@ xfs_rename(
         if (error)
                 goto abort_return;
  
-       xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
         if (new_parent)
                 xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c

index 891260f..12a1913 100644 (file)
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -39,6 +39,7 @@
  #include "xfs_trans_space.h"
  #include "xfs_utils.h"
  #include "xfs_trace.h"
+#include "xfs_buf.h"
  
  
  /*
@@ -1883,13 +1884,13 @@ xfs_growfs_rt(
         /*
          * Read in the last block of the device, make sure it exists.
          */
-       error = xfs_read_buf(mp, mp->m_rtdev_targp,
-                       XFS_FSB_TO_BB(mp, nrblocks - 1),
-                       XFS_FSB_TO_BB(mp, 1), 0, &bp);
-       if (error)
-               return error;
-       ASSERT(bp);
+       bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
+                               XFS_FSB_TO_BB(mp, nrblocks - 1),
+                               XFS_FSB_TO_B(mp, 1), 0);
+       if (!bp)
+               return EIO;
         xfs_buf_relse(bp);
+
         /*
          * Calculate new parameters.  These are the final values to be reached.
          */
@@ -2215,7 +2216,6 @@ xfs_rtmount_init(
  {
         xfs_buf_t       *bp;    /* buffer for last block of subvolume */
         xfs_daddr_t     d;      /* address of last block of subvolume */
-       int             error;  /* error return value */
         xfs_sb_t        *sbp;   /* filesystem superblock copy in mount */
  
         sbp = &mp->m_sb;
@@ -2242,15 +2242,12 @@ xfs_rtmount_init(
                         (unsigned long long) mp->m_sb.sb_rblocks);
                 return XFS_ERROR(EFBIG);
         }
-       error = xfs_read_buf(mp, mp->m_rtdev_targp,
-                               d - XFS_FSB_TO_BB(mp, 1),
-                               XFS_FSB_TO_BB(mp, 1), 0, &bp);
-       if (error) {
-               cmn_err(CE_WARN,
-       "XFS: realtime mount -- xfs_read_buf failed, returned %d", error);
-               if (error == ENOSPC)
-                       return XFS_ERROR(EFBIG);
-               return error;
+       bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp,
+                                       d - XFS_FSB_TO_BB(mp, 1),
+                                       XFS_FSB_TO_B(mp, 1), 0);
+       if (!bp) {
+               cmn_err(CE_WARN, "XFS: realtime device size check failed");
+               return EIO;
         }
         xfs_buf_relse(bp);
         return 0;
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h

index 1b017c6..1eb2ba5 100644 (file)
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -80,10 +80,12 @@ struct xfs_mount;
  #define XFS_SB_VERSION2_RESERVED4BIT   0x00000004
  #define XFS_SB_VERSION2_ATTR2BIT       0x00000008      /* Inline attr rework */
  #define XFS_SB_VERSION2_PARENTBIT      0x00000010      /* parent pointers */
+#define XFS_SB_VERSION2_PROJID32BIT    0x00000080      /* 32 bit project id */
  
  #define        XFS_SB_VERSION2_OKREALFBITS     \
         (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \
-        XFS_SB_VERSION2_ATTR2BIT)
+        XFS_SB_VERSION2_ATTR2BIT       | \
+        XFS_SB_VERSION2_PROJID32BIT)
  #define        XFS_SB_VERSION2_OKSASHFBITS     \
         (0)
  #define XFS_SB_VERSION2_OKREALBITS     \
@@ -495,6 +497,12 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
                 sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
  }
  
+static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
+{
+       return xfs_sb_version_hasmorebits(sbp) &&
+               (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT);
+}
+
  /*
   * end of superblock version macros
   */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index 1c47eda..f6d956b 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -696,7 +696,7 @@ xfs_trans_reserve(
          * fail if the count would go below zero.
          */
         if (blocks > 0) {
-               error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
+               error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
                                           -((int64_t)blocks), rsvd);
                 if (error != 0) {
                         current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -767,7 +767,7 @@ undo_log:
  
  undo_blocks:
         if (blocks > 0) {
-               (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
+               xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
                                          (int64_t)blocks, rsvd);
                 tp->t_blk_res = 0;
         }
@@ -1009,7 +1009,7 @@ void
  xfs_trans_unreserve_and_mod_sb(
         xfs_trans_t     *tp)
  {
-       xfs_mod_sb_t    msb[14];        /* If you add cases, add entries */
+       xfs_mod_sb_t    msb[9]; /* If you add cases, add entries */
         xfs_mod_sb_t    *msbp;
         xfs_mount_t     *mp = tp->t_mountp;
         /* REFERENCED */
@@ -1017,55 +1017,61 @@ xfs_trans_unreserve_and_mod_sb(
         int             rsvd;
         int64_t         blkdelta = 0;
         int64_t         rtxdelta = 0;
+       int64_t         idelta = 0;
+       int64_t         ifreedelta = 0;
  
         msbp = msb;
         rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
  
-       /* calculate free blocks delta */
+       /* calculate deltas */
         if (tp->t_blk_res > 0)
                 blkdelta = tp->t_blk_res;
-
         if ((tp->t_fdblocks_delta != 0) &&
             (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
              (tp->t_flags & XFS_TRANS_SB_DIRTY)))
                 blkdelta += tp->t_fdblocks_delta;
  
-       if (blkdelta != 0) {
-               msbp->msb_field = XFS_SBS_FDBLOCKS;
-               msbp->msb_delta = blkdelta;
-               msbp++;
-       }
-
-       /* calculate free realtime extents delta */
         if (tp->t_rtx_res > 0)
                 rtxdelta = tp->t_rtx_res;
-
         if ((tp->t_frextents_delta != 0) &&
             (tp->t_flags & XFS_TRANS_SB_DIRTY))
                 rtxdelta += tp->t_frextents_delta;
  
+       if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
+            (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
+               idelta = tp->t_icount_delta;
+               ifreedelta = tp->t_ifree_delta;
+       }
+
+       /* apply the per-cpu counters */
+       if (blkdelta) {
+               error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                                blkdelta, rsvd);
+               if (error)
+                       goto out;
+       }
+
+       if (idelta) {
+               error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
+                                                idelta, rsvd);
+               if (error)
+                       goto out_undo_fdblocks;
+       }
+
+       if (ifreedelta) {
+               error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
+                                                ifreedelta, rsvd);
+               if (error)
+                       goto out_undo_icount;
+       }
+
+       /* apply remaining deltas */
         if (rtxdelta != 0) {
                 msbp->msb_field = XFS_SBS_FREXTENTS;
                 msbp->msb_delta = rtxdelta;
                 msbp++;
         }
  
-       /* apply remaining deltas */
-
-       if (xfs_sb_version_haslazysbcount(&mp->m_sb) ||
-            (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
-               if (tp->t_icount_delta != 0) {
-                       msbp->msb_field = XFS_SBS_ICOUNT;
-                       msbp->msb_delta = tp->t_icount_delta;
-                       msbp++;
-               }
-               if (tp->t_ifree_delta != 0) {
-                       msbp->msb_field = XFS_SBS_IFREE;
-                       msbp->msb_delta = tp->t_ifree_delta;
-                       msbp++;
-               }
-       }
-
         if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
                 if (tp->t_dblocks_delta != 0) {
                         msbp->msb_field = XFS_SBS_DBLOCKS;
@@ -1115,8 +1121,24 @@ xfs_trans_unreserve_and_mod_sb(
         if (msbp > msb) {
                 error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
                         (uint)(msbp - msb), rsvd);
-               ASSERT(error == 0);
+               if (error)
+                       goto out_undo_ifreecount;
         }
+
+       return;
+
+out_undo_ifreecount:
+       if (ifreedelta)
+               xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
+out_undo_icount:
+       if (idelta)
+               xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+out_undo_fdblocks:
+       if (blkdelta)
+               xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
+out:
+       ASSERT(error = 0);
+       return;
  }
  
  /*
@@ -1389,15 +1411,12 @@ xfs_trans_item_committed(
   */
  STATIC void
  xfs_trans_committed(
-       struct xfs_trans        *tp,
+       void                    *arg,
         int                     abortflag)
  {
+       struct xfs_trans        *tp = arg;
         struct xfs_log_item_desc *lidp, *next;
  
-       /* Call the transaction's completion callback if there is one. */
-       if (tp->t_callback != NULL)
-               tp->t_callback(tp, tp->t_callarg);
-
         list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) {
                 xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag);
                 xfs_trans_free_item_desc(lidp);
@@ -1525,7 +1544,7 @@ xfs_trans_commit_iclog(
          * running in simulation mode (the log is explicitly turned
          * off).
          */
-       tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed;
+       tp->t_logcb.cb_func = xfs_trans_committed;
         tp->t_logcb.cb_arg = tp;
  
         /*
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h

index c13c0f9..246286b 100644 (file)
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -399,8 +399,6 @@ typedef struct xfs_trans {
                                                  * transaction. */
         struct xfs_mount        *t_mountp;      /* ptr to fs mount struct */
         struct xfs_dquot_acct   *t_dqinfo;      /* acctg info for dquots */
-       xfs_trans_callback_t    t_callback;     /* transaction callback */
-       void                    *t_callarg;     /* callback arg */
         unsigned int            t_flags;        /* misc flags */
         int64_t                 t_icount_delta; /* superblock icount change */
         int64_t                 t_ifree_delta;  /* superblock ifree change */
@@ -473,6 +471,7 @@ void                xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
  void           xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
  int            xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
                                xfs_ino_t , uint, uint, struct xfs_inode **);
+void           xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
  void           xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
  void           xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
  void           xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 90af025..c47918c 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -336,7 +336,7 @@ xfs_trans_read_buf(
                         ASSERT(!XFS_BUF_ISASYNC(bp));
                         XFS_BUF_READ(bp);
                         xfsbdstrat(tp->t_mountp, bp);
-                       error = xfs_iowait(bp);
+                       error = xfs_buf_iowait(bp);
                         if (error) {
                                 xfs_ioerror_alert("xfs_trans_read_buf", mp,
                                                   bp, blkno);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c

index cdc53a1..ccb3453 100644 (file)
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -117,6 +117,36 @@ xfs_trans_ijoin_ref(
         ip->i_itemp->ili_lock_flags = lock_flags;
  }
  
+/*
+ * Transactional inode timestamp update. Requires the inode to be locked and
+ * joined to the transaction supplied. Relies on the transaction subsystem to
+ * track dirty state and update/writeback the inode accordingly.
+ */
+void
+xfs_trans_ichgtime(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     flags)
+{
+       struct inode            *inode = VFS_I(ip);
+       timespec_t              tv;
+
+       ASSERT(tp);
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(ip->i_transp == tp);
+
+       tv = current_fs_time(inode->i_sb);
+
+       if ((flags & XFS_ICHGTIME_MOD) &&
+           !timespec_equal(&inode->i_mtime, &tv)) {
+               inode->i_mtime = tv;
+       }
+       if ((flags & XFS_ICHGTIME_CHG) &&
+           !timespec_equal(&inode->i_ctime, &tv)) {
+               inode->i_ctime = tv;
+       }
+}
+
  /*
   * This is called to mark the fields indicated in fieldmask as needing
   * to be logged when the transaction is committed.  The inode must
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h

index 3207752..26d1867 100644 (file)
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef       __int32_t       xfs_tid_t;      /* transaction identifier */
  typedef        __uint32_t      xfs_dablk_t;    /* dir/attr block number (in file) */
  typedef        __uint32_t      xfs_dahash_t;   /* dir/attr hash value */
  
-typedef __uint16_t     xfs_prid_t;     /* prid_t truncated to 16bits in XFS */
-
  typedef __uint32_t     xlog_tid_t;     /* transaction ID type */
  
  /*
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c

index b7d5769..8b32d1a 100644 (file)
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -56,7 +56,6 @@ xfs_dir_ialloc(
         mode_t          mode,
         xfs_nlink_t     nlink,
         xfs_dev_t       rdev,
-       cred_t          *credp,
         prid_t          prid,           /* project id */
         int             okalloc,        /* ok to allocate new space */
         xfs_inode_t     **ipp,          /* pointer to inode; it will be
@@ -93,7 +92,7 @@ xfs_dir_ialloc(
          * transaction commit so that no other process can steal
          * the inode(s) that we've just allocated.
          */
-       code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc,
+       code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
                           &ialloc_context, &call_again, &ip);
  
         /*
@@ -197,7 +196,7 @@ xfs_dir_ialloc(
                  * other allocations in this allocation group,
                  * this call should always succeed.
                  */
-               code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid,
+               code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
                                   okalloc, &ialloc_context, &call_again, &ip);
  
                 /*
@@ -235,7 +234,7 @@ xfs_droplink(
  {
         int     error;
  
-       xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
  
         ASSERT (ip->i_d.di_nlink > 0);
         ip->i_d.di_nlink--;
@@ -299,7 +298,7 @@ xfs_bumplink(
  {
         if (ip->i_d.di_nlink >= XFS_MAXLINK)
                 return XFS_ERROR(EMLINK);
-       xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
  
         ASSERT(ip->i_d.di_nlink > 0);
         ip->i_d.di_nlink++;
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h

index f55b967..456fca3 100644 (file)
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -19,8 +19,7 @@
  #define __XFS_UTILS_H__
  
  extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
-                               xfs_dev_t, cred_t *, prid_t, int,
-                               xfs_inode_t **, int *);
+                               xfs_dev_t, prid_t, int, xfs_inode_t **, int *);
  extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *);
  extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *);
  extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c

index 4c7c7bf..8e4a63c 100644 (file)
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -114,7 +114,7 @@ xfs_setattr(
                  */
                 ASSERT(udqp == NULL);
                 ASSERT(gdqp == NULL);
-               code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
+               code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
                                          qflags, &udqp, &gdqp);
                 if (code)
                         return code;
@@ -184,8 +184,11 @@ xfs_setattr(
                     ip->i_size == 0 && ip->i_d.di_nextents == 0) {
                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
                         lock_flags &= ~XFS_ILOCK_EXCL;
-                       if (mask & ATTR_CTIME)
-                               xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                       if (mask & ATTR_CTIME) {
+                               inode->i_mtime = inode->i_ctime =
+                                               current_fs_time(inode->i_sb);
+                               xfs_mark_inode_dirty_sync(ip);
+                       }
                         code = 0;
                         goto error_return;
                 }
@@ -1253,8 +1256,7 @@ xfs_create(
         struct xfs_name         *name,
         mode_t                  mode,
         xfs_dev_t               rdev,
-       xfs_inode_t             **ipp,
-       cred_t                  *credp)
+       xfs_inode_t             **ipp)
  {
         int                     is_dir = S_ISDIR(mode);
         struct xfs_mount        *mp = dp->i_mount;
@@ -1266,7 +1268,7 @@ xfs_create(
         boolean_t               unlock_dp_on_error = B_FALSE;
         uint                    cancel_flags;
         int                     committed;
-       xfs_prid_t              prid;
+       prid_t                  prid;
         struct xfs_dquot        *udqp = NULL;
         struct xfs_dquot        *gdqp = NULL;
         uint                    resblks;
@@ -1279,9 +1281,9 @@ xfs_create(
                 return XFS_ERROR(EIO);
  
         if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-               prid = dp->i_d.di_projid;
+               prid = xfs_get_projid(dp);
         else
-               prid = dfltprid;
+               prid = XFS_PROJID_DEFAULT;
  
         /*
          * Make sure that we have allocated dquot(s) on disk.
@@ -1360,7 +1362,7 @@ xfs_create(
          * entry pointing to them, but a directory also the "." entry
          * pointing to itself.
          */
-       error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp,
+       error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
                                prid, resblks > 0, &ip, &committed);
         if (error) {
                 if (error == ENOSPC)
@@ -1391,7 +1393,7 @@ xfs_create(
                 ASSERT(error != ENOSPC);
                 goto out_trans_abort;
         }
-       xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
  
         if (is_dir) {
@@ -1742,7 +1744,7 @@ xfs_remove(
                 ASSERT(error != ENOENT);
                 goto out_bmap_cancel;
         }
-       xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
  
         if (is_dir) {
                 /*
@@ -1880,7 +1882,7 @@ xfs_link(
          * the tree quota mechanism could be circumvented.
          */
         if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
-                    (tdp->i_d.di_projid != sip->i_d.di_projid))) {
+                    (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
                 error = XFS_ERROR(EXDEV);
                 goto error_return;
         }
@@ -1895,7 +1897,7 @@ xfs_link(
                                         &first_block, &free_list, resblks);
         if (error)
                 goto abort_return;
-       xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
  
         error = xfs_bumplink(tp, sip);
@@ -1933,8 +1935,7 @@ xfs_symlink(
         struct xfs_name         *link_name,
         const char              *target_path,
         mode_t                  mode,
-       xfs_inode_t             **ipp,
-       cred_t                  *credp)
+       xfs_inode_t             **ipp)
  {
         xfs_mount_t             *mp = dp->i_mount;
         xfs_trans_t             *tp;
@@ -1955,7 +1956,7 @@ xfs_symlink(
         int                     byte_cnt;
         int                     n;
         xfs_buf_t               *bp;
-       xfs_prid_t              prid;
+       prid_t                  prid;
         struct xfs_dquot        *udqp, *gdqp;
         uint                    resblks;
  
@@ -1978,9 +1979,9 @@ xfs_symlink(
  
         udqp = gdqp = NULL;
         if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
-               prid = dp->i_d.di_projid;
+               prid = xfs_get_projid(dp);
         else
-               prid = (xfs_prid_t)dfltprid;
+               prid = XFS_PROJID_DEFAULT;
  
         /*
          * Make sure that we have allocated dquot(s) on disk.
@@ -2046,8 +2047,8 @@ xfs_symlink(
         /*
          * Allocate an inode for the symlink.
          */
-       error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT),
-                              1, 0, credp, prid, resblks > 0, &ip, NULL);
+       error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
+                              prid, resblks > 0, &ip, NULL);
         if (error) {
                 if (error == ENOSPC)
                         goto error_return;
@@ -2129,7 +2130,7 @@ xfs_symlink(
                                         &first_block, &free_list, resblks);
         if (error)
                 goto error1;
-       xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
  
         /*
@@ -2272,7 +2273,7 @@ xfs_alloc_file_space(
         count = len;
         imapp = &imaps[0];
         nimaps = 1;
-       bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0);
+       bmapi_flag = XFS_BMAPI_WRITE | alloc_type;
         startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
         allocatesize_fsb = XFS_B_TO_FSB(mp, count);
  
@@ -2431,9 +2432,9 @@ xfs_zero_remaining_bytes(
         if (endoff > ip->i_size)
                 endoff = ip->i_size;
  
-       bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize,
-                               XFS_IS_REALTIME_INODE(ip) ?
-                               mp->m_rtdev_targp : mp->m_ddev_targp);
+       bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
+                                       mp->m_rtdev_targp : mp->m_ddev_targp,
+                               mp->m_sb.sb_blocksize, XBF_DONT_BLOCK);
         if (!bp)
                 return XFS_ERROR(ENOMEM);
  
@@ -2459,7 +2460,7 @@ xfs_zero_remaining_bytes(
                 XFS_BUF_READ(bp);
                 XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock));
                 xfsbdstrat(mp, bp);
-               error = xfs_iowait(bp);
+               error = xfs_buf_iowait(bp);
                 if (error) {
                         xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
                                           mp, bp, XFS_BUF_ADDR(bp));
@@ -2472,7 +2473,7 @@ xfs_zero_remaining_bytes(
                 XFS_BUF_UNREAD(bp);
                 XFS_BUF_WRITE(bp);
                 xfsbdstrat(mp, bp);
-               error = xfs_iowait(bp);
+               error = xfs_buf_iowait(bp);
                 if (error) {
                         xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
                                           mp, bp, XFS_BUF_ADDR(bp));
@@ -2711,6 +2712,7 @@ xfs_change_file_space(
         xfs_off_t       llen;
         xfs_trans_t     *tp;
         struct iattr    iattr;
+       int             prealloc_type;
  
         if (!S_ISREG(ip->i_d.di_mode))
                 return XFS_ERROR(EINVAL);
@@ -2753,12 +2755,17 @@ xfs_change_file_space(
          * size to be changed.
          */
         setprealloc = clrprealloc = 0;
+       prealloc_type = XFS_BMAPI_PREALLOC;
  
         switch (cmd) {
+       case XFS_IOC_ZERO_RANGE:
+               prealloc_type |= XFS_BMAPI_CONVERT;
+               xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0);
+               /* FALLTHRU */
         case XFS_IOC_RESVSP:
         case XFS_IOC_RESVSP64:
                 error = xfs_alloc_file_space(ip, startoffset, bf->l_len,
-                                                               1, attr_flags);
+                                               prealloc_type, attr_flags);
                 if (error)
                         return error;
                 setprealloc = 1;
@@ -2827,7 +2834,7 @@ xfs_change_file_space(
                 if (ip->i_d.di_mode & S_IXGRP)
                         ip->i_d.di_mode &= ~S_ISGID;
  
-               xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+               xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
         }
         if (setprealloc)
                 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h

index d8dfa8d..f670292 100644 (file)
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,7 +2,6 @@
  #define _XFS_VNODEOPS_H 1
  
  struct attrlist_cursor_kern;
-struct cred;
  struct file;
  struct iattr;
  struct inode;
@@ -26,7 +25,7 @@ int xfs_inactive(struct xfs_inode *ip);
  int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
                 struct xfs_inode **ipp, struct xfs_name *ci_name);
  int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
-               xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp);
+               xfs_dev_t rdev, struct xfs_inode **ipp);
  int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                 struct xfs_inode *ip);
  int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
@@ -34,8 +33,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
  int xfs_readdir(struct xfs_inode       *dp, void *dirent, size_t bufsize,
                        xfs_off_t *offset, filldir_t filldir);
  int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
-               const char *target_path, mode_t mode, struct xfs_inode **ipp,
-               cred_t *credp);
+               const char *target_path, mode_t mode, struct xfs_inode **ipp);
  int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state);
  int xfs_change_file_space(struct xfs_inode *ip, int cmd,
                 xfs_flock64_t *bf, xfs_off_t offset, int attr_flags);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 23 Oct 2010 00:32:27 +0000 (17:32 -0700)
fs/xfs/linux-2.6/xfs_buf.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_buf.h		patch \| blob \| history
fs/xfs/linux-2.6/xfs_cred.h	[deleted file]	patch \| blob \| history
fs/xfs/linux-2.6/xfs_fs_subr.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_globals.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_globals.h	[deleted file]	patch \| blob \| history
fs/xfs/linux-2.6/xfs_ioctl.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_ioctl32.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_ioctl32.h		patch \| blob \| history
fs/xfs/linux-2.6/xfs_iops.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_linux.h		patch \| blob \| history
fs/xfs/linux-2.6/xfs_super.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_super.h		patch \| blob \| history
fs/xfs/linux-2.6/xfs_sync.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_sync.h		patch \| blob \| history
fs/xfs/linux-2.6/xfs_trace.h		patch \| blob \| history
fs/xfs/linux-2.6/xfs_version.h	[deleted file]	patch \| blob \| history
fs/xfs/quota/xfs_dquot.c		patch \| blob \| history
fs/xfs/quota/xfs_qm.c		patch \| blob \| history
fs/xfs/quota/xfs_qm_bhv.c		patch \| blob \| history
fs/xfs/quota/xfs_qm_syscalls.c		patch \| blob \| history
fs/xfs/xfs_ag.h		patch \| blob \| history
fs/xfs/xfs_alloc.c		patch \| blob \| history
fs/xfs/xfs_alloc_btree.c		patch \| blob \| history
fs/xfs/xfs_attr.c		patch \| blob \| history
fs/xfs/xfs_bmap.c		patch \| blob \| history
fs/xfs/xfs_bmap.h		patch \| blob \| history
fs/xfs/xfs_btree.c		patch \| blob \| history
fs/xfs/xfs_btree.h		patch \| blob \| history
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_da_btree.c		patch \| blob \| history
fs/xfs/xfs_dinode.h		patch \| blob \| history
fs/xfs/xfs_dir2_leaf.c		patch \| blob \| history
fs/xfs/xfs_fs.h		patch \| blob \| history
fs/xfs/xfs_fsops.c		patch \| blob \| history
fs/xfs/xfs_ialloc.c		patch \| blob \| history
fs/xfs/xfs_ialloc_btree.c		patch \| blob \| history
fs/xfs/xfs_iget.c		patch \| blob \| history
fs/xfs/xfs_inode.c		patch \| blob \| history
fs/xfs/xfs_inode.h		patch \| blob \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| history
fs/xfs/xfs_itable.c		patch \| blob \| history
fs/xfs/xfs_log.c		patch \| blob \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| history
fs/xfs/xfs_mount.c		patch \| blob \| history
fs/xfs/xfs_mount.h		patch \| blob \| history
fs/xfs/xfs_refcache.h	[deleted file]	patch \| blob \| history
fs/xfs/xfs_rename.c		patch \| blob \| history
fs/xfs/xfs_rtalloc.c		patch \| blob \| history
fs/xfs/xfs_sb.h		patch \| blob \| history
fs/xfs/xfs_trans.c		patch \| blob \| history
fs/xfs/xfs_trans.h		patch \| blob \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| history
fs/xfs/xfs_trans_inode.c		patch \| blob \| history
fs/xfs/xfs_types.h		patch \| blob \| history
fs/xfs/xfs_utils.c		patch \| blob \| history
fs/xfs/xfs_utils.h		patch \| blob \| history
fs/xfs/xfs_vnodeops.c		patch \| blob \| history
fs/xfs/xfs_vnodeops.h		patch \| blob \| history