Merge branch 'stable/bug.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git...

[pandora-kernel.git] / fs / xfs / xfs_inode.c
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index a37480a..3cc21dd 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -37,7 +37,6 @@
  #include "xfs_buf_item.h"
  #include "xfs_inode_item.h"
  #include "xfs_btree.h"
-#include "xfs_btree_trace.h"
  #include "xfs_alloc.h"
  #include "xfs_ialloc.h"
  #include "xfs_bmap.h"
@@ -52,7 +51,7 @@ kmem_zone_t *xfs_ifork_zone;
  kmem_zone_t *xfs_inode_zone;
  
  /*
- * Used in xfs_itruncate().  This is the maximum number of extents
+ * Used in xfs_itruncate_extents().  This is the maximum number of extents
   * freed from a file in a single transaction.
   */
  #define        XFS_ITRUNC_MAX_EXTENTS  2
@@ -167,7 +166,7 @@ xfs_imap_to_bp(
  
                 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
                                         (i << mp->m_sb.sb_inodelog));
-               di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
+               di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
                             XFS_DINODE_GOOD_VERSION(dip->di_version);
                 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
                                                 XFS_ERRTAG_ITOBP_INOTOBP,
@@ -802,7 +801,7 @@ xfs_iread(
          * If we got something that isn't an inode it means someone
          * (nfs or dmi) has a stale handle.
          */
-       if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
+       if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
  #ifdef DEBUG
                 xfs_alert(mp,
                         "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
@@ -920,7 +919,6 @@ xfs_iread_extents(
         /*
          * We know that the size is valid (it's checked in iformat_btree)
          */
-       ifp->if_lastex = NULLEXTNUM;
         ifp->if_bytes = ifp->if_real_bytes = 0;
         ifp->if_flags |= XFS_IFEXTENTS;
         xfs_iext_add(ifp, 0, nextents);
@@ -1180,15 +1178,15 @@ xfs_ialloc(
   * at least do it for regular files.
   */
  #ifdef DEBUG
-void
+STATIC void
  xfs_isize_check(
-       xfs_mount_t     *mp,
-       xfs_inode_t     *ip,
-       xfs_fsize_t     isize)
+       struct xfs_inode        *ip,
+       xfs_fsize_t             isize)
  {
-       xfs_fileoff_t   map_first;
-       int             nimaps;
-       xfs_bmbt_irec_t imaps[2];
+       struct xfs_mount        *mp = ip->i_mount;
+       xfs_fileoff_t           map_first;
+       int                     nimaps;
+       xfs_bmbt_irec_t         imaps[2];
  
         if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
                 return;
@@ -1215,168 +1213,14 @@ xfs_isize_check(
         ASSERT(nimaps == 1);
         ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
  }
+#else  /* DEBUG */
+#define xfs_isize_check(ip, isize)
  #endif /* DEBUG */
  
  /*
- * Calculate the last possible buffered byte in a file.  This must
- * include data that was buffered beyond the EOF by the write code.
- * This also needs to deal with overflowing the xfs_fsize_t type
- * which can happen for sizes near the limit.
- *
- * We also need to take into account any blocks beyond the EOF.  It
- * may be the case that they were buffered by a write which failed.
- * In that case the pages will still be in memory, but the inode size
- * will never have been updated.
- */
-STATIC xfs_fsize_t
-xfs_file_last_byte(
-       xfs_inode_t     *ip)
-{
-       xfs_mount_t     *mp;
-       xfs_fsize_t     last_byte;
-       xfs_fileoff_t   last_block;
-       xfs_fileoff_t   size_last_block;
-       int             error;
-
-       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
-
-       mp = ip->i_mount;
-       /*
-        * Only check for blocks beyond the EOF if the extents have
-        * been read in.  This eliminates the need for the inode lock,
-        * and it also saves us from looking when it really isn't
-        * necessary.
-        */
-       if (ip->i_df.if_flags & XFS_IFEXTENTS) {
-               xfs_ilock(ip, XFS_ILOCK_SHARED);
-               error = xfs_bmap_last_offset(NULL, ip, &last_block,
-                       XFS_DATA_FORK);
-               xfs_iunlock(ip, XFS_ILOCK_SHARED);
-               if (error) {
-                       last_block = 0;
-               }
-       } else {
-               last_block = 0;
-       }
-       size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
-       last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
-
-       last_byte = XFS_FSB_TO_B(mp, last_block);
-       if (last_byte < 0) {
-               return XFS_MAXIOFFSET(mp);
-       }
-       last_byte += (1 << mp->m_writeio_log);
-       if (last_byte < 0) {
-               return XFS_MAXIOFFSET(mp);
-       }
-       return last_byte;
-}
-
-/*
- * Start the truncation of the file to new_size.  The new size
- * must be smaller than the current size.  This routine will
- * clear the buffer and page caches of file data in the removed
- * range, and xfs_itruncate_finish() will remove the underlying
- * disk blocks.
- *
- * The inode must have its I/O lock locked EXCLUSIVELY, and it
- * must NOT have the inode lock held at all.  This is because we're
- * calling into the buffer/page cache code and we can't hold the
- * inode lock when we do so.
- *
- * We need to wait for any direct I/Os in flight to complete before we
- * proceed with the truncate. This is needed to prevent the extents
- * being read or written by the direct I/Os from being removed while the
- * I/O is in flight as there is no other method of synchronising
- * direct I/O with the truncate operation.  Also, because we hold
- * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
- * started until the truncate completes and drops the lock. Essentially,
- * the xfs_ioend_wait() call forms an I/O barrier that provides strict
- * ordering between direct I/Os and the truncate operation.
- *
- * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
- * or XFS_ITRUNC_MAYBE.  The XFS_ITRUNC_MAYBE value should be used
- * in the case that the caller is locking things out of order and
- * may not be able to call xfs_itruncate_finish() with the inode lock
- * held without dropping the I/O lock.  If the caller must drop the
- * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
- * must be called again with all the same restrictions as the initial
- * call.
- */
-int
-xfs_itruncate_start(
-       xfs_inode_t     *ip,
-       uint            flags,
-       xfs_fsize_t     new_size)
-{
-       xfs_fsize_t     last_byte;
-       xfs_off_t       toss_start;
-       xfs_mount_t     *mp;
-       int             error = 0;
-
-       ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
-       ASSERT((new_size == 0) || (new_size <= ip->i_size));
-       ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
-              (flags == XFS_ITRUNC_MAYBE));
-
-       mp = ip->i_mount;
-
-       /* wait for the completion of any pending DIOs */
-       if (new_size == 0 || new_size < ip->i_size)
-               xfs_ioend_wait(ip);
-
-       /*
-        * Call toss_pages or flushinval_pages to get rid of pages
-        * overlapping the region being removed.  We have to use
-        * the less efficient flushinval_pages in the case that the
-        * caller may not be able to finish the truncate without
-        * dropping the inode's I/O lock.  Make sure
-        * to catch any pages brought in by buffers overlapping
-        * the EOF by searching out beyond the isize by our
-        * block size. We round new_size up to a block boundary
-        * so that we don't toss things on the same block as
-        * new_size but before it.
-        *
-        * Before calling toss_page or flushinval_pages, make sure to
-        * call remapf() over the same region if the file is mapped.
-        * This frees up mapped file references to the pages in the
-        * given range and for the flushinval_pages case it ensures
-        * that we get the latest mapped changes flushed out.
-        */
-       toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-       toss_start = XFS_FSB_TO_B(mp, toss_start);
-       if (toss_start < 0) {
-               /*
-                * The place to start tossing is beyond our maximum
-                * file size, so there is no way that the data extended
-                * out there.
-                */
-               return 0;
-       }
-       last_byte = xfs_file_last_byte(ip);
-       trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte);
-       if (last_byte > toss_start) {
-               if (flags & XFS_ITRUNC_DEFINITE) {
-                       xfs_tosspages(ip, toss_start,
-                                       -1, FI_REMAPF_LOCKED);
-               } else {
-                       error = xfs_flushinval_pages(ip, toss_start,
-                                       -1, FI_REMAPF_LOCKED);
-               }
-       }
-
-#ifdef DEBUG
-       if (new_size == 0) {
-               ASSERT(VN_CACHED(VFS_I(ip)) == 0);
-       }
-#endif
-       return error;
-}
-
-/*
- * Shrink the file to the given new_size.  The new size must be smaller than
- * the current size.  This will free up the underlying blocks in the removed
- * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
+ * Free up the underlying blocks past new_size.  The new size must be smaller
+ * than the current size.  This routine can be used both for the attribute and
+ * data fork, and does not modify the inode size, which is left to the caller.
   *
   * The transaction passed to this routine must have made a permanent log
   * reservation of at least XFS_ITRUNCATE_LOG_RES.  This routine may commit the
@@ -1388,31 +1232,6 @@ xfs_itruncate_start(
   * will be "held" within the returned transaction.  This routine does NOT
   * require any disk space to be reserved for it within the transaction.
   *
- * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
- * indicates the fork which is to be truncated.  For the attribute fork we only
- * support truncation to size 0.
- *
- * We use the sync parameter to indicate whether or not the first transaction
- * we perform might have to be synchronous.  For the attr fork, it needs to be
- * so if the unlink of the inode is not yet known to be permanent in the log.
- * This keeps us from freeing and reusing the blocks of the attribute fork
- * before the unlink of the inode becomes permanent.
- *
- * For the data fork, we normally have to run synchronously if we're being
- * called out of the inactive path or we're being called out of the create path
- * where we're truncating an existing file.  Either way, the truncate needs to
- * be sync so blocks don't reappear in the file with altered data in case of a
- * crash.  wsync filesystems can run the first case async because anything that
- * shrinks the inode has to run sync so by the time we're called here from
- * inactive, the inode size is permanently set to 0.
- *
- * Calls from the truncate path always need to be sync unless we're in a wsync
- * filesystem and the file has already been unlinked.
- *
- * The caller is responsible for correctly setting the sync parameter.  It gets
- * too hard for us to guess here which path we're being called out of just
- * based on inode state.
- *
   * If we get an error, we must return with the inode locked and linked into the
   * current transaction. This keeps things simple for the higher level code,
   * because it always knows that the inode is locked and held in the transaction
@@ -1420,124 +1239,30 @@ xfs_itruncate_start(
   * dirty on error so that transactions can be easily aborted if possible.
   */
  int
-xfs_itruncate_finish(
-       xfs_trans_t     **tp,
-       xfs_inode_t     *ip,
-       xfs_fsize_t     new_size,
-       int             fork,
-       int             sync)
+xfs_itruncate_extents(
+       struct xfs_trans        **tpp,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       xfs_fsize_t             new_size)
  {
-       xfs_fsblock_t   first_block;
-       xfs_fileoff_t   first_unmap_block;
-       xfs_fileoff_t   last_block;
-       xfs_filblks_t   unmap_len=0;
-       xfs_mount_t     *mp;
-       xfs_trans_t     *ntp;
-       int             done;
-       int             committed;
-       xfs_bmap_free_t free_list;
-       int             error;
+       struct xfs_mount        *mp = ip->i_mount;
+       struct xfs_trans        *tp = *tpp;
+       struct xfs_trans        *ntp;
+       xfs_bmap_free_t         free_list;
+       xfs_fsblock_t           first_block;
+       xfs_fileoff_t           first_unmap_block;
+       xfs_fileoff_t           last_block;
+       xfs_filblks_t           unmap_len;
+       int                     committed;
+       int                     error = 0;
+       int                     done = 0;
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
-       ASSERT((new_size == 0) || (new_size <= ip->i_size));
-       ASSERT(*tp != NULL);
-       ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
-       ASSERT(ip->i_transp == *tp);
+       ASSERT(new_size <= ip->i_size);
+       ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
         ASSERT(ip->i_itemp != NULL);
         ASSERT(ip->i_itemp->ili_lock_flags == 0);
-
-
-       ntp = *tp;
-       mp = (ntp)->t_mountp;
-       ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
-
-       /*
-        * We only support truncating the entire attribute fork.
-        */
-       if (fork == XFS_ATTR_FORK) {
-               new_size = 0LL;
-       }
-       first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-       trace_xfs_itruncate_finish_start(ip, new_size);
-
-       /*
-        * The first thing we do is set the size to new_size permanently
-        * on disk.  This way we don't have to worry about anyone ever
-        * being able to look at the data being freed even in the face
-        * of a crash.  What we're getting around here is the case where
-        * we free a block, it is allocated to another file, it is written
-        * to, and then we crash.  If the new data gets written to the
-        * file but the log buffers containing the free and reallocation
-        * don't, then we'd end up with garbage in the blocks being freed.
-        * As long as we make the new_size permanent before actually
-        * freeing any blocks it doesn't matter if they get writtten to.
-        *
-        * The callers must signal into us whether or not the size
-        * setting here must be synchronous.  There are a few cases
-        * where it doesn't have to be synchronous.  Those cases
-        * occur if the file is unlinked and we know the unlink is
-        * permanent or if the blocks being truncated are guaranteed
-        * to be beyond the inode eof (regardless of the link count)
-        * and the eof value is permanent.  Both of these cases occur
-        * only on wsync-mounted filesystems.  In those cases, we're
-        * guaranteed that no user will ever see the data in the blocks
-        * that are being truncated so the truncate can run async.
-        * In the free beyond eof case, the file may wind up with
-        * more blocks allocated to it than it needs if we crash
-        * and that won't get fixed until the next time the file
-        * is re-opened and closed but that's ok as that shouldn't
-        * be too many blocks.
-        *
-        * However, we can't just make all wsync xactions run async
-        * because there's one call out of the create path that needs
-        * to run sync where it's truncating an existing file to size
-        * 0 whose size is > 0.
-        *
-        * It's probably possible to come up with a test in this
-        * routine that would correctly distinguish all the above
-        * cases from the values of the function parameters and the
-        * inode state but for sanity's sake, I've decided to let the
-        * layers above just tell us.  It's simpler to correctly figure
-        * out in the layer above exactly under what conditions we
-        * can run async and I think it's easier for others read and
-        * follow the logic in case something has to be changed.
-        * cscope is your friend -- rcc.
-        *
-        * The attribute fork is much simpler.
-        *
-        * For the attribute fork we allow the caller to tell us whether
-        * the unlink of the inode that led to this call is yet permanent
-        * in the on disk log.  If it is not and we will be freeing extents
-        * in this inode then we make the first transaction synchronous
-        * to make sure that the unlink is permanent by the time we free
-        * the blocks.
-        */
-       if (fork == XFS_DATA_FORK) {
-               if (ip->i_d.di_nextents > 0) {
-                       /*
-                        * If we are not changing the file size then do
-                        * not update the on-disk file size - we may be
-                        * called from xfs_inactive_free_eofblocks().  If we
-                        * update the on-disk file size and then the system
-                        * crashes before the contents of the file are
-                        * flushed to disk then the files may be full of
-                        * holes (ie NULL files bug).
-                        */
-                       if (ip->i_size != new_size) {
-                               ip->i_d.di_size = new_size;
-                               ip->i_size = new_size;
-                               xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
-                       }
-               }
-       } else if (sync) {
-               ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
-               if (ip->i_d.di_anextents > 0)
-                       xfs_trans_set_sync(ntp);
-       }
-       ASSERT(fork == XFS_DATA_FORK ||
-               (fork == XFS_ATTR_FORK &&
-                       ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
-                        (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
+       ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
  
         /*
          * Since it is possible for space to become allocated beyond
@@ -1548,128 +1273,142 @@ xfs_itruncate_finish(
          * beyond the maximum file size (ie it is the same as last_block),
          * then there is nothing to do.
          */
+       first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
         last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
-       ASSERT(first_unmap_block <= last_block);
-       done = 0;
-       if (last_block == first_unmap_block) {
-               done = 1;
-       } else {
-               unmap_len = last_block - first_unmap_block + 1;
-       }
+       if (first_unmap_block == last_block)
+               return 0;
+
+       ASSERT(first_unmap_block < last_block);
+       unmap_len = last_block - first_unmap_block + 1;
         while (!done) {
-               /*
-                * Free up up to XFS_ITRUNC_MAX_EXTENTS.  xfs_bunmapi()
-                * will tell us whether it freed the entire range or
-                * not.  If this is a synchronous mount (wsync),
-                * then we can tell bunmapi to keep all the
-                * transactions asynchronous since the unlink
-                * transaction that made this inode inactive has
-                * already hit the disk.  There's no danger of
-                * the freed blocks being reused, there being a
-                * crash, and the reused blocks suddenly reappearing
-                * in this file with garbage in them once recovery
-                * runs.
-                */
                 xfs_bmap_init(&free_list, &first_block);
-               error = xfs_bunmapi(ntp, ip,
+               error = xfs_bunmapi(tp, ip,
                                     first_unmap_block, unmap_len,
-                                   xfs_bmapi_aflag(fork),
+                                   xfs_bmapi_aflag(whichfork),
                                     XFS_ITRUNC_MAX_EXTENTS,
                                     &first_block, &free_list,
                                     &done);
-               if (error) {
-                       /*
-                        * If the bunmapi call encounters an error,
-                        * return to the caller where the transaction
-                        * can be properly aborted.  We just need to
-                        * make sure we're not holding any resources
-                        * that we were not when we came in.
-                        */
-                       xfs_bmap_cancel(&free_list);
-                       return error;
-               }
+               if (error)
+                       goto out_bmap_cancel;
  
                 /*
                  * Duplicate the transaction that has the permanent
                  * reservation and commit the old transaction.
                  */
-               error = xfs_bmap_finish(tp, &free_list, &committed);
-               ntp = *tp;
+               error = xfs_bmap_finish(&tp, &free_list, &committed);
                 if (committed)
-                       xfs_trans_ijoin(ntp, ip);
-
-               if (error) {
-                       /*
-                        * If the bmap finish call encounters an error, return
-                        * to the caller where the transaction can be properly
-                        * aborted.  We just need to make sure we're not
-                        * holding any resources that we were not when we came
-                        * in.
-                        *
-                        * Aborting from this point might lose some blocks in
-                        * the file system, but oh well.
-                        */
-                       xfs_bmap_cancel(&free_list);
-                       return error;
-               }
+                       xfs_trans_ijoin(tp, ip);
+               if (error)
+                       goto out_bmap_cancel;
  
                 if (committed) {
                         /*
                          * Mark the inode dirty so it will be logged and
                          * moved forward in the log as part of every commit.
                          */
-                       xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+                       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
                 }
  
-               ntp = xfs_trans_dup(ntp);
-               error = xfs_trans_commit(*tp, 0);
-               *tp = ntp;
+               ntp = xfs_trans_dup(tp);
+               error = xfs_trans_commit(tp, 0);
+               tp = ntp;
  
-               xfs_trans_ijoin(ntp, ip);
+               xfs_trans_ijoin(tp, ip);
  
                 if (error)
-                       return error;
+                       goto out;
+
                 /*
-                * transaction commit worked ok so we can drop the extra ticket
+                * Transaction commit worked ok so we can drop the extra ticket
                  * reference that we gained in xfs_trans_dup()
                  */
-               xfs_log_ticket_put(ntp->t_ticket);
-               error = xfs_trans_reserve(ntp, 0,
+               xfs_log_ticket_put(tp->t_ticket);
+               error = xfs_trans_reserve(tp, 0,
                                         XFS_ITRUNCATE_LOG_RES(mp), 0,
                                         XFS_TRANS_PERM_LOG_RES,
                                         XFS_ITRUNCATE_LOG_COUNT);
                 if (error)
-                       return error;
+                       goto out;
         }
+
+out:
+       *tpp = tp;
+       return error;
+out_bmap_cancel:
+       /*
+        * If the bunmapi call encounters an error, return to the caller where
+        * the transaction can be properly aborted.  We just need to make sure
+        * we're not holding any resources that we were not when we came in.
+        */
+       xfs_bmap_cancel(&free_list);
+       goto out;
+}
+
+int
+xfs_itruncate_data(
+       struct xfs_trans        **tpp,
+       struct xfs_inode        *ip,
+       xfs_fsize_t             new_size)
+{
+       int                     error;
+
+       trace_xfs_itruncate_data_start(ip, new_size);
+
         /*
-        * Only update the size in the case of the data fork, but
-        * always re-log the inode so that our permanent transaction
-        * can keep on rolling it forward in the log.
+        * The first thing we do is set the size to new_size permanently on
+        * disk.  This way we don't have to worry about anyone ever being able
+        * to look at the data being freed even in the face of a crash.
+        * What we're getting around here is the case where we free a block, it
+        * is allocated to another file, it is written to, and then we crash.
+        * If the new data gets written to the file but the log buffers
+        * containing the free and reallocation don't, then we'd end up with
+        * garbage in the blocks being freed.  As long as we make the new_size
+        * permanent before actually freeing any blocks it doesn't matter if
+        * they get written to.
          */
-       if (fork == XFS_DATA_FORK) {
-               xfs_isize_check(mp, ip, new_size);
+       if (ip->i_d.di_nextents > 0) {
                 /*
-                * If we are not changing the file size then do
-                * not update the on-disk file size - we may be
-                * called from xfs_inactive_free_eofblocks().  If we
-                * update the on-disk file size and then the system
-                * crashes before the contents of the file are
-                * flushed to disk then the files may be full of
-                * holes (ie NULL files bug).
+                * If we are not changing the file size then do not update
+                * the on-disk file size - we may be called from
+                * xfs_inactive_free_eofblocks().  If we update the on-disk
+                * file size and then the system crashes before the contents
+                * of the file are flushed to disk then the files may be
+                * full of holes (ie NULL files bug).
                  */
                 if (ip->i_size != new_size) {
                         ip->i_d.di_size = new_size;
                         ip->i_size = new_size;
+                       xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
                 }
         }
-       xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
-       ASSERT((new_size != 0) ||
-              (fork == XFS_ATTR_FORK) ||
-              (ip->i_delayed_blks == 0));
-       ASSERT((new_size != 0) ||
-              (fork == XFS_ATTR_FORK) ||
-              (ip->i_d.di_nextents == 0));
-       trace_xfs_itruncate_finish_end(ip, new_size);
+
+       error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
+       if (error)
+               return error;
+
+       /*
+        * If we are not changing the file size then do not update the on-disk
+        * file size - we may be called from xfs_inactive_free_eofblocks().
+        * If we update the on-disk file size and then the system crashes
+        * before the contents of the file are flushed to disk then the files
+        * may be full of holes (ie NULL files bug).
+        */
+       xfs_isize_check(ip, new_size);
+       if (ip->i_size != new_size) {
+               ip->i_d.di_size = new_size;
+               ip->i_size = new_size;
+       }
+
+       ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
+       ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
+
+       /*
+        * Always re-log the inode so that our permanent transaction can keep
+        * on rolling it forward in the log.
+        */
+       xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+
+       trace_xfs_itruncate_data_end(ip, new_size);
         return 0;
  }
  
@@ -1695,7 +1434,6 @@ xfs_iunlink(
  
         ASSERT(ip->i_d.di_nlink == 0);
         ASSERT(ip->i_d.di_mode != 0);
-       ASSERT(ip->i_transp == tp);
  
         mp = tp->t_mountp;
  
@@ -1718,7 +1456,7 @@ xfs_iunlink(
         ASSERT(agi->agi_unlinked[bucket_index]);
         ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
  
-       if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
+       if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
                 /*
                  * There is already another inode in the bucket we need
                  * to add ourselves to.  Add us at the front of the list.
@@ -1729,8 +1467,7 @@ xfs_iunlink(
                 if (error)
                         return error;
  
-               ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
-               /* both on-disk, don't endian flip twice */
+               ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
                 dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
                 offset = ip->i_imap.im_boffset +
                         offsetof(xfs_dinode_t, di_next_unlinked);
@@ -1795,7 +1532,7 @@ xfs_iunlink_remove(
         agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
         ASSERT(agino != 0);
         bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
-       ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
+       ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
         ASSERT(agi->agi_unlinked[bucket_index]);
  
         if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -1960,7 +1697,7 @@ xfs_ifree_cluster(
                  * stale first, we will not attempt to lock them in the loop
                  * below as the XFS_ISTALE flag will be set.
                  */
-               lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+               lip = bp->b_fspriv;
                 while (lip) {
                         if (lip->li_type == XFS_LI_INODE) {
                                 iip = (xfs_inode_log_item_t *)lip;
@@ -2087,7 +1824,6 @@ xfs_ifree(
         xfs_buf_t               *ibp;
  
         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-       ASSERT(ip->i_transp == tp);
         ASSERT(ip->i_d.di_nlink == 0);
         ASSERT(ip->i_d.di_nextents == 0);
         ASSERT(ip->i_d.di_anextents == 0);
@@ -2558,12 +2294,9 @@ xfs_iflush_fork(
         case XFS_DINODE_FMT_EXTENTS:
                 ASSERT((ifp->if_flags & XFS_IFEXTENTS) ||
                        !(iip->ili_format.ilf_fields & extflag[whichfork]));
-               ASSERT((xfs_iext_get_ext(ifp, 0) != NULL) ||
-                       (ifp->if_bytes == 0));
-               ASSERT((xfs_iext_get_ext(ifp, 0) == NULL) ||
-                       (ifp->if_bytes > 0));
                 if ((iip->ili_format.ilf_fields & extflag[whichfork]) &&
                     (ifp->if_bytes > 0)) {
+                       ASSERT(xfs_iext_get_ext(ifp, 0));
                         ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) > 0);
                         (void)xfs_iextents_copy(ip, (xfs_bmbt_rec_t *)cp,
                                 whichfork);
@@ -2737,7 +2470,7 @@ cluster_corrupt_out:
                  * mark the buffer as an error and call them.  Otherwise
                  * mark it as stale and brelse.
                  */
-               if (XFS_BUF_IODONE_FUNC(bp)) {
+               if (bp->b_iodone) {
                         XFS_BUF_UNDONE(bp);
                         XFS_BUF_STALE(bp);
                         XFS_BUF_ERROR(bp,EIO);
@@ -2924,7 +2657,7 @@ xfs_iflush_int(
          */
         xfs_synchronize_times(ip);
  
-       if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
+       if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
                                mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
                 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
                         "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
@@ -3077,8 +2810,8 @@ xfs_iflush_int(
                  */
                 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
  
-               ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-               ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
+               ASSERT(bp->b_fspriv != NULL);
+               ASSERT(bp->b_iodone != NULL);
         } else {
                 /*
                  * We're flushing an inode which is not in the AIL and has
@@ -3112,6 +2845,8 @@ xfs_iext_get_ext(
         xfs_extnum_t    idx)            /* index of target extent */
  {
         ASSERT(idx >= 0);
+       ASSERT(idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+
         if ((ifp->if_flags & XFS_IFEXTIREC) && (idx == 0)) {
                 return ifp->if_u1.if_ext_irec->er_extbuf;
         } else if (ifp->if_flags & XFS_IFEXTIREC) {
@@ -3191,7 +2926,6 @@ xfs_iext_add(
                 }
                 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
                 ifp->if_real_bytes = 0;
-               ifp->if_lastex = nextents + ext_diff;
         }
         /*
          * Otherwise use a linear (direct) extent list.
@@ -3886,8 +3620,10 @@ xfs_iext_idx_to_irec(
         xfs_extnum_t    page_idx = *idxp; /* extent index in target list */
  
         ASSERT(ifp->if_flags & XFS_IFEXTIREC);
-       ASSERT(page_idx >= 0 && page_idx <=
-               ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
+       ASSERT(page_idx >= 0);
+       ASSERT(page_idx <= ifp->if_bytes / sizeof(xfs_bmbt_rec_t));
+       ASSERT(page_idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t) || realloc);
+
         nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
         erp_idx = 0;
         low = 0;