Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

author Linus Torvalds <torvalds@woody.linux-foundation.org>

Wed, 19 Sep 2007 18:40:13 +0000 (11:40 -0700)

committer Linus Torvalds <torvalds@woody.linux-foundation.org>

Wed, 19 Sep 2007 18:40:13 +0000 (11:40 -0700)
author Linus Torvalds <torvalds@woody.linux-foundation.org>
Wed, 19 Sep 2007 18:40:13 +0000 (11:40 -0700)
committer Linus Torvalds <torvalds@woody.linux-foundation.org>
Wed, 19 Sep 2007 18:40:13 +0000 (11:40 -0700)
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c

index d9c40fe..5f152f6 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
                 ip->i_d.di_size = isize;
                 ip->i_update_core = 1;
                 ip->i_update_size = 1;
+               mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
         }
  
         xfs_iunlock(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

index 4528f9a..491d1f4 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -415,8 +415,10 @@ xfs_fs_write_inode(
  
         if (vp) {
                 vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
-               if (sync)
+               if (sync) {
+                       filemap_fdatawait(inode->i_mapping);
                         flags |= FLUSH_SYNC;
+               }
                 error = bhv_vop_iflush(vp, flags);
                 if (error == EAGAIN)
                         error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h

index d7e1361..fa25b7d 100644 (file)
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t {
  #define        XFS_BLI_UDQUOT_BUF      0x4
  #define XFS_BLI_PDQUOT_BUF     0x8
  #define        XFS_BLI_GDQUOT_BUF      0x10
+/*
+ * This flag indicates that the buffer contains newly allocated
+ * inodes.
+ */
+#define        XFS_BLI_INODE_NEW_BUF   0x20
  
  #define        XFS_BLI_CHUNK           128
  #define        XFS_BLI_SHIFT           7
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c

index ce22786..16f8e17 100644 (file)
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -467,8 +467,7 @@ void
  xfs_filestream_flush(
         xfs_mount_t     *mp)
  {
-       /* point in time flush, so keep the reaper running */
-       xfs_mru_cache_flush(mp->m_filestream, 1);
+       xfs_mru_cache_flush(mp->m_filestream);
  }
  
  /*
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c

index 8ae6e8e..dacb197 100644 (file)
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer(
  /*ARGSUSED*/
  STATIC void
  xlog_recover_do_reg_buffer(
+       xfs_mount_t             *mp,
         xlog_recover_item_t     *item,
         xfs_buf_t               *bp,
         xfs_buf_log_format_t    *buf_f)
@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer(
         unsigned int            *data_map = NULL;
         unsigned int            map_size = 0;
         int                     error;
+       int                     stale_buf = 1;
+
+       /*
+        * Scan through the on-disk inode buffer and attempt to
+        * determine if it has been written to since it was logged.
+        *
+        * - If any of the magic numbers are incorrect then the buffer is stale
+        * - If any of the modes are non-zero then the buffer is not stale
+        * - If all of the modes are zero and at least one of the generation
+        *   counts is non-zero then the buffer is stale
+        *
+        * If the end result is a stale buffer then the log buffer is replayed
+        * otherwise it is skipped.
+        *
+        * This heuristic is not perfect.  It can be improved by scanning the
+        * entire inode chunk for evidence that any of the inode clusters have
+        * been updated.  To fix this problem completely we will need a major
+        * architectural change to the logging system.
+        */
+       if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
+               xfs_dinode_t    *dip;
+               int             inodes_per_buf;
+               int             mode_count = 0;
+               int             gen_count = 0;
+
+               stale_buf = 0;
+               inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
+               for (i = 0; i < inodes_per_buf; i++) {
+                       dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+                               i * mp->m_sb.sb_inodesize);
+                       if (be16_to_cpu(dip->di_core.di_magic) !=
+                                       XFS_DINODE_MAGIC) {
+                               stale_buf = 1;
+                               break;
+                       }
+                       if (be16_to_cpu(dip->di_core.di_mode))
+                               mode_count++;
+                       if (be16_to_cpu(dip->di_core.di_gen))
+                               gen_count++;
+               }
+
+               if (!mode_count && gen_count)
+                       stale_buf = 1;
+       }
  
         switch (buf_f->blf_type) {
         case XFS_LI_BUF:
@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer(
                                                -1, 0, XFS_QMOPT_DOWARN,
                                                "dquot_buf_recover");
                 }
-               if (!error)
+               if (!error && stale_buf)
                         memcpy(xfs_buf_offset(bp,
                                 (uint)bit << XFS_BLI_SHIFT),    /* dest */
                                 item->ri_buf[i].i_addr,         /* source */
@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer(
         if (log->l_quotaoffs_flag & type)
                 return;
  
-       xlog_recover_do_reg_buffer(item, bp, buf_f);
+       xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
  }
  
  /*
@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans(
                   (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
                 xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
         } else {
-               xlog_recover_do_reg_buffer(item, bp, buf_f);
+               xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
         }
         if (error)
                 return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c

index 7deb9e3..e0b358c 100644 (file)
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
          */
         if (!_xfs_mru_cache_migrate(mru, now)) {
                 mru->time_zero = now;
-               if (!mru->next_reap)
-                       mru->next_reap = mru->grp_count * mru->grp_time;
+               if (!mru->queued) {
+                       mru->queued = 1;
+                       queue_delayed_work(xfs_mru_reap_wq, &mru->work,
+                                          mru->grp_count * mru->grp_time);
+               }
         } else {
                 grp = (now - mru->time_zero) / mru->grp_time;
                 grp = (mru->lru_grp + grp) % mru->grp_count;
@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
         struct work_struct      *work)
  {
         xfs_mru_cache_t         *mru = container_of(work, xfs_mru_cache_t, work.work);
-       unsigned long           now;
+       unsigned long           now, next;
  
         ASSERT(mru && mru->lists);
         if (!mru || !mru->lists)
                 return;
  
         mutex_spinlock(&mru->lock);
-       now = jiffies;
-       if (mru->reap_all ||
-           (mru->next_reap && time_after(now, mru->next_reap))) {
-               if (mru->reap_all)
-                       now += mru->grp_count * mru->grp_time * 2;
-               mru->next_reap = _xfs_mru_cache_migrate(mru, now);
-               _xfs_mru_cache_clear_reap_list(mru);
+       next = _xfs_mru_cache_migrate(mru, jiffies);
+       _xfs_mru_cache_clear_reap_list(mru);
+
+       mru->queued = next;
+       if ((mru->queued > 0)) {
+               now = jiffies;
+               if (next <= now)
+                       next = 0;
+               else
+                       next -= now;
+               queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
         }
  
-       /*
-        * the process that triggered the reap_all is responsible
-        * for restating the periodic reap if it is required.
-        */
-       if (!mru->reap_all)
-               queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
-       mru->reap_all = 0;
         mutex_spinunlock(&mru->lock, 0);
  }
  
@@ -352,7 +352,7 @@ xfs_mru_cache_create(
  
         /* An extra list is needed to avoid reaping up to a grp_time early. */
         mru->grp_count = grp_count + 1;
-       mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
+       mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
  
         if (!mru->lists) {
                 err = ENOMEM;
@@ -374,11 +374,6 @@ xfs_mru_cache_create(
         mru->grp_time  = grp_time;
         mru->free_func = free_func;
  
-       /* start up the reaper event */
-       mru->next_reap = 0;
-       mru->reap_all = 0;
-       queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
-
         *mrup = mru;
  
  exit:
@@ -394,35 +389,25 @@ exit:
   * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
   * free functions as they're deleted.  When this function returns, the caller is
   * guaranteed that all the free functions for all the elements have finished
- * executing.
- *
- * While we are flushing, we stop the periodic reaper event from triggering.
- * Normally, we want to restart this periodic event, but if we are shutting
- * down the cache we do not want it restarted. hence the restart parameter
- * where 0 = do not restart reaper and 1 = restart reaper.
+ * executing and the reaper is not running.
   */
  void
  xfs_mru_cache_flush(
-       xfs_mru_cache_t         *mru,
-       int                     restart)
+       xfs_mru_cache_t         *mru)
  {
         if (!mru || !mru->lists)
                 return;
  
-       cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
-
         mutex_spinlock(&mru->lock);
-       mru->reap_all = 1;
-       mutex_spinunlock(&mru->lock, 0);
+       if (mru->queued) {
+               mutex_spinunlock(&mru->lock, 0);
+               cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
+               mutex_spinlock(&mru->lock);
+       }
  
-       queue_work(xfs_mru_reap_wq, &mru->work.work);
-       flush_workqueue(xfs_mru_reap_wq);
+       _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
+       _xfs_mru_cache_clear_reap_list(mru);
  
-       mutex_spinlock(&mru->lock);
-       WARN_ON_ONCE(mru->reap_all != 0);
-       mru->reap_all = 0;
-       if (restart)
-               queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
         mutex_spinunlock(&mru->lock, 0);
  }
  
@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
         if (!mru || !mru->lists)
                 return;
  
-       /* we don't want the reaper to restart here */
-       xfs_mru_cache_flush(mru, 0);
+       xfs_mru_cache_flush(mru);
  
         kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
         kmem_free(mru, sizeof(*mru));
diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h

index 624fd10..dd58ea1 100644 (file)
--- a/fs/xfs/xfs_mru_cache.h
+++ b/fs/xfs/xfs_mru_cache.h
@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
         unsigned int            grp_time;  /* Time period spanned by grps.  */
         unsigned int            lru_grp;   /* Group containing time zero.   */
         unsigned long           time_zero; /* Time first element was added. */
-       unsigned long           next_reap; /* Time that the reaper should
-                                             next do something. */
-       unsigned int            reap_all;  /* if set, reap all lists */
         xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
         struct delayed_work     work;      /* Workqueue data for reaping.   */
+       unsigned int            queued;    /* work has been queued */
  } xfs_mru_cache_t;
  
  int xfs_mru_cache_init(void);
@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
  int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
                              unsigned int grp_count,
                              xfs_mru_cache_free_func_t free_func);
-void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart);
+void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
  void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
  int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
                                 void *value);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c

index 60b6b89..95fff68 100644 (file)
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf(
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
  
         bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+       bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
  }
  
  
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c

index 1a5ad8c..6034592 100644 (file)
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1082,6 +1082,9 @@ xfs_fsync(
         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                 return XFS_ERROR(EIO);
  
+       if (flag & FSYNC_DATA)
+               filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
+
         /*
          * We always need to make sure that the required inode state
          * is safe on disk.  The vnode might be clean but because
@@ -3769,12 +3772,16 @@ xfs_inode_flush(
                         sync_lsn = log->l_last_sync_lsn;
                         GRANT_UNLOCK(log, s);
  
-                       if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0))
-                               return 0;
+                       if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
+                               if (flags & FLUSH_SYNC)
+                                       log_flags |= XFS_LOG_SYNC;
+                               error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
+                               if (error)
+                                       return error;
+                       }
  
-                       if (flags & FLUSH_SYNC)
-                               log_flags |= XFS_LOG_SYNC;
-                       return xfs_log_force(mp, iip->ili_last_lsn, log_flags);
+                       if (ip->i_update_core == 0)
+                               return 0;
                 }
         }
  
@@ -3788,9 +3795,6 @@ xfs_inode_flush(
         if (flags & FLUSH_INODE) {
                 int     flush_flags;
  
-               if (xfs_ipincount(ip))
-                       return EAGAIN;
-
                 if (flags & FLUSH_SYNC) {
                         xfs_ilock(ip, XFS_ILOCK_SHARED);
                         xfs_iflock(ip);
author	Linus Torvalds <torvalds@woody.linux-foundation.org>
	Wed, 19 Sep 2007 18:40:13 +0000 (11:40 -0700)
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>
	Wed, 19 Sep 2007 18:40:13 +0000 (11:40 -0700)
fs/xfs/linux-2.6/xfs_aops.c		patch \| blob \| history
fs/xfs/linux-2.6/xfs_super.c		patch \| blob \| history
fs/xfs/xfs_buf_item.h		patch \| blob \| history
fs/xfs/xfs_filestream.c		patch \| blob \| history
fs/xfs/xfs_log_recover.c		patch \| blob \| history
fs/xfs/xfs_mru_cache.c		patch \| blob \| history
fs/xfs/xfs_mru_cache.h		patch \| blob \| history
fs/xfs/xfs_trans_buf.c		patch \| blob \| history
fs/xfs/xfs_vnodeops.c		patch \| blob \| history