Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
[pandora-kernel.git] / fs / xfs / xfs_inode_item.c
index f38855d..cf8249a 100644 (file)
@@ -228,7 +228,7 @@ xfs_inode_item_format(
 
        vecp->i_addr = (xfs_caddr_t)&iip->ili_format;
        vecp->i_len  = sizeof(xfs_inode_log_format_t);
-       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IFORMAT);
+       vecp->i_type = XLOG_REG_TYPE_IFORMAT;
        vecp++;
        nvecs        = 1;
 
@@ -279,7 +279,7 @@ xfs_inode_item_format(
 
        vecp->i_addr = (xfs_caddr_t)&ip->i_d;
        vecp->i_len  = sizeof(struct xfs_icdinode);
-       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ICORE);
+       vecp->i_type = XLOG_REG_TYPE_ICORE;
        vecp++;
        nvecs++;
        iip->ili_format.ilf_fields |= XFS_ILOG_CORE;
@@ -336,7 +336,7 @@ xfs_inode_item_format(
                                vecp->i_addr =
                                        (char *)(ip->i_df.if_u1.if_extents);
                                vecp->i_len = ip->i_df.if_bytes;
-                               XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
+                               vecp->i_type = XLOG_REG_TYPE_IEXT;
                        } else
 #endif
                        {
@@ -355,7 +355,7 @@ xfs_inode_item_format(
                                vecp->i_addr = (xfs_caddr_t)ext_buffer;
                                vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
                                                XFS_DATA_FORK);
-                               XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IEXT);
+                               vecp->i_type = XLOG_REG_TYPE_IEXT;
                        }
                        ASSERT(vecp->i_len <= ip->i_df.if_bytes);
                        iip->ili_format.ilf_dsize = vecp->i_len;
@@ -373,7 +373,7 @@ xfs_inode_item_format(
                        ASSERT(ip->i_df.if_broot != NULL);
                        vecp->i_addr = (xfs_caddr_t)ip->i_df.if_broot;
                        vecp->i_len = ip->i_df.if_broot_bytes;
-                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IBROOT);
+                       vecp->i_type = XLOG_REG_TYPE_IBROOT;
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes;
@@ -399,7 +399,7 @@ xfs_inode_item_format(
                        ASSERT((ip->i_df.if_real_bytes == 0) ||
                               (ip->i_df.if_real_bytes == data_bytes));
                        vecp->i_len = (int)data_bytes;
-                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_ILOCAL);
+                       vecp->i_type = XLOG_REG_TYPE_ILOCAL;
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_dsize = (unsigned)data_bytes;
@@ -477,7 +477,7 @@ xfs_inode_item_format(
                        vecp->i_len = xfs_iextents_copy(ip, ext_buffer,
                                        XFS_ATTR_FORK);
 #endif
-                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_EXT);
+                       vecp->i_type = XLOG_REG_TYPE_IATTR_EXT;
                        iip->ili_format.ilf_asize = vecp->i_len;
                        vecp++;
                        nvecs++;
@@ -492,7 +492,7 @@ xfs_inode_item_format(
                        ASSERT(ip->i_afp->if_broot != NULL);
                        vecp->i_addr = (xfs_caddr_t)ip->i_afp->if_broot;
                        vecp->i_len = ip->i_afp->if_broot_bytes;
-                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_BROOT);
+                       vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT;
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes;
@@ -516,7 +516,7 @@ xfs_inode_item_format(
                        ASSERT((ip->i_afp->if_real_bytes == 0) ||
                               (ip->i_afp->if_real_bytes == data_bytes));
                        vecp->i_len = (int)data_bytes;
-                       XLOG_VEC_SET_TYPE(vecp, XLOG_REG_TYPE_IATTR_LOCAL);
+                       vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL;
                        vecp++;
                        nvecs++;
                        iip->ili_format.ilf_asize = (unsigned)data_bytes;
@@ -535,31 +535,36 @@ xfs_inode_item_format(
 
 /*
  * This is called to pin the inode associated with the inode log
- * item in memory so it cannot be written out.  Do this by calling
- * xfs_ipin() to bump the pin count in the inode while holding the
- * inode pin lock.
+ * item in memory so it cannot be written out.
  */
 STATIC void
 xfs_inode_item_pin(
        xfs_inode_log_item_t    *iip)
 {
        ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
-       xfs_ipin(iip->ili_inode);
+
+       trace_xfs_inode_pin(iip->ili_inode, _RET_IP_);
+       atomic_inc(&iip->ili_inode->i_pincount);
 }
 
 
 /*
  * This is called to unpin the inode associated with the inode log
  * item which was previously pinned with a call to xfs_inode_item_pin().
- * Just call xfs_iunpin() on the inode to do this.
+ *
+ * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
  */
 /* ARGSUSED */
 STATIC void
 xfs_inode_item_unpin(
-       xfs_inode_log_item_t    *iip,
-       int                     stale)
+       xfs_inode_log_item_t    *iip)
 {
-       xfs_iunpin(iip->ili_inode);
+       struct xfs_inode        *ip = iip->ili_inode;
+
+       trace_xfs_inode_unpin(ip, _RET_IP_);
+       ASSERT(atomic_read(&ip->i_pincount) > 0);
+       if (atomic_dec_and_test(&ip->i_pincount))
+               wake_up(&ip->i_ipin_wait);
 }
 
 /* ARGSUSED */
@@ -568,7 +573,7 @@ xfs_inode_item_unpin_remove(
        xfs_inode_log_item_t    *iip,
        xfs_trans_t             *tp)
 {
-       xfs_iunpin(iip->ili_inode);
+       xfs_inode_item_unpin(iip);
 }
 
 /*
@@ -602,33 +607,20 @@ xfs_inode_item_trylock(
 
        if (!xfs_iflock_nowait(ip)) {
                /*
-                * If someone else isn't already trying to push the inode
-                * buffer, we get to do it.
+                * inode has already been flushed to the backing buffer,
+                * leave it locked in shared mode, pushbuf routine will
+                * unlock it.
                 */
-               if (iip->ili_pushbuf_flag == 0) {
-                       iip->ili_pushbuf_flag = 1;
-#ifdef DEBUG
-                       iip->ili_push_owner = current_pid();
-#endif
-                       /*
-                        * Inode is left locked in shared mode.
-                        * Pushbuf routine gets to unlock it.
-                        */
-                       return XFS_ITEM_PUSHBUF;
-               } else {
-                       /*
-                        * We hold the AIL lock, so we must specify the
-                        * NONOTIFY flag so that we won't double trip.
-                        */
-                       xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
-                       return XFS_ITEM_FLUSHING;
-               }
-               /* NOTREACHED */
+               return XFS_ITEM_PUSHBUF;
        }
 
        /* Stale items should force out the iclog */
        if (ip->i_flags & XFS_ISTALE) {
                xfs_ifunlock(ip);
+               /*
+                * we hold the AIL lock - notify the unlock routine of this
+                * so it doesn't try to get the lock again.
+                */
                xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
                return XFS_ITEM_PINNED;
        }
@@ -746,11 +738,8 @@ xfs_inode_item_committed(
  * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK
  * failed to get the inode flush lock but did get the inode locked SHARED.
  * Here we're trying to see if the inode buffer is incore, and if so whether it's
- * marked delayed write. If that's the case, we'll initiate a bawrite on that
- * buffer to expedite the process.
- *
- * We aren't holding the AIL lock (or the flush lock) when this gets called,
- * so it is inherently race-y.
+ * marked delayed write. If that's the case, we'll promote it and that will
+ * allow the caller to write the buffer by triggering the xfsbufd to run.
  */
 STATIC void
 xfs_inode_item_pushbuf(
@@ -759,82 +748,30 @@ xfs_inode_item_pushbuf(
        xfs_inode_t     *ip;
        xfs_mount_t     *mp;
        xfs_buf_t       *bp;
-       uint            dopush;
 
        ip = iip->ili_inode;
-
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
 
-       /*
-        * The ili_pushbuf_flag keeps others from
-        * trying to duplicate our effort.
-        */
-       ASSERT(iip->ili_pushbuf_flag != 0);
-       ASSERT(iip->ili_push_owner == current_pid());
-
        /*
         * If a flush is not in progress anymore, chances are that the
         * inode was taken off the AIL. So, just get out.
         */
        if (completion_done(&ip->i_flush) ||
            ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) {
-               iip->ili_pushbuf_flag = 0;
                xfs_iunlock(ip, XFS_ILOCK_SHARED);
                return;
        }
 
        mp = ip->i_mount;
        bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno,
-                   iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK);
-
-       if (bp != NULL) {
-               if (XFS_BUF_ISDELAYWRITE(bp)) {
-                       /*
-                        * We were racing with iflush because we don't hold
-                        * the AIL lock or the flush lock. However, at this point,
-                        * we have the buffer, and we know that it's dirty.
-                        * So, it's possible that iflush raced with us, and
-                        * this item is already taken off the AIL.
-                        * If not, we can flush it async.
-                        */
-                       dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) &&
-                                 !completion_done(&ip->i_flush));
-                       iip->ili_pushbuf_flag = 0;
-                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
-                       trace_xfs_inode_item_push(bp, _RET_IP_);
+                   iip->ili_format.ilf_len, XBF_TRYLOCK);
 
-                       if (XFS_BUF_ISPINNED(bp)) {
-                               xfs_log_force(mp, (xfs_lsn_t)0,
-                                             XFS_LOG_FORCE);
-                       }
-                       if (dopush) {
-                               int     error;
-                               error = xfs_bawrite(mp, bp);
-                               if (error)
-                                       xfs_fs_cmn_err(CE_WARN, mp,
-               "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p",
-                                                       error, iip, bp);
-                       } else {
-                               xfs_buf_relse(bp);
-                       }
-               } else {
-                       iip->ili_pushbuf_flag = 0;
-                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
-                       xfs_buf_relse(bp);
-               }
-               return;
-       }
-       /*
-        * We have to be careful about resetting pushbuf flag too early (above).
-        * Even though in theory we can do it as soon as we have the buflock,
-        * we don't want others to be doing work needlessly. They'll come to
-        * this function thinking that pushing the buffer is their
-        * responsibility only to find that the buffer is still locked by
-        * another doing the same thing
-        */
-       iip->ili_pushbuf_flag = 0;
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       if (!bp)
+               return;
+       if (XFS_BUF_ISDELAYWRITE(bp))
+               xfs_buf_delwri_promote(bp);
+       xfs_buf_relse(bp);
        return;
 }
 
@@ -867,10 +804,14 @@ xfs_inode_item_push(
               iip->ili_format.ilf_fields != 0);
 
        /*
-        * Write out the inode.  The completion routine ('iflush_done') will
-        * pull it from the AIL, mark it clean, unlock the flush lock.
+        * Push the inode to it's backing buffer. This will not remove the
+        * inode from the AIL - a further push will be required to trigger a
+        * buffer push. However, this allows all the dirty inodes to be pushed
+        * to the buffer before it is pushed to disk. THe buffer IO completion
+        * will pull th einode from the AIL, mark it clean and unlock the flush
+        * lock.
         */
-       (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC);
+       (void) xfs_iflush(ip, 0);
        xfs_iunlock(ip, XFS_ILOCK_SHARED);
 
        return;
@@ -898,7 +839,7 @@ static struct xfs_item_ops xfs_inode_item_ops = {
        .iop_format     = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
                                        xfs_inode_item_format,
        .iop_pin        = (void(*)(xfs_log_item_t*))xfs_inode_item_pin,
-       .iop_unpin      = (void(*)(xfs_log_item_t*, int))xfs_inode_item_unpin,
+       .iop_unpin      = (void(*)(xfs_log_item_t*))xfs_inode_item_unpin,
        .iop_unpin_remove = (void(*)(xfs_log_item_t*, xfs_trans_t*))
                                        xfs_inode_item_unpin_remove,
        .iop_trylock    = (uint(*)(xfs_log_item_t*))xfs_inode_item_trylock,
@@ -925,18 +866,9 @@ xfs_inode_item_init(
        ASSERT(ip->i_itemp == NULL);
        iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
 
-       iip->ili_item.li_type = XFS_LI_INODE;
-       iip->ili_item.li_ops = &xfs_inode_item_ops;
-       iip->ili_item.li_mountp = mp;
-       iip->ili_item.li_ailp = mp->m_ail;
        iip->ili_inode = ip;
-
-       /*
-          We have zeroed memory. No need ...
-          iip->ili_extents_buf = NULL;
-          iip->ili_pushbuf_flag = 0;
-        */
-
+       xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
+                                               &xfs_inode_item_ops);
        iip->ili_format.ilf_type = XFS_LI_INODE;
        iip->ili_format.ilf_ino = ip->i_ino;
        iip->ili_format.ilf_blkno = ip->i_imap.im_blkno;