xfs: introduce background inode reclaim work

author Dave Chinner <dchinner@redhat.com>

Fri, 8 Apr 2011 02:45:07 +0000 (12:45 +1000)

committer Dave Chinner <david@fromorbit.com>

Fri, 8 Apr 2011 02:45:07 +0000 (12:45 +1000)
author Dave Chinner <dchinner@redhat.com>
Fri, 8 Apr 2011 02:45:07 +0000 (12:45 +1000)
committer Dave Chinner <david@fromorbit.com>
Fri, 8 Apr 2011 02:45:07 +0000 (12:45 +1000)
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c

index af32759..debe282 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -461,7 +461,6 @@ xfs_sync_worker(
                         error = xfs_fs_log_dummy(mp);
                 else
                         xfs_log_force(mp, 0);
-               xfs_reclaim_inodes(mp, 0);
                 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
         }
  
@@ -469,6 +468,52 @@ xfs_sync_worker(
         xfs_syncd_queue_sync(mp);
  }
  
+/*
+ * Queue a new inode reclaim pass if there are reclaimable inodes and there
+ * isn't a reclaim pass already in progress. By default it runs every 5s based
+ * on the xfs syncd work default of 30s. Perhaps this should have it's own
+ * tunable, but that can be done if this method proves to be ineffective or too
+ * aggressive.
+ */
+static void
+xfs_syncd_queue_reclaim(
+       struct xfs_mount        *mp)
+{
+
+       /*
+        * We can have inodes enter reclaim after we've shut down the syncd
+        * workqueue during unmount, so don't allow reclaim work to be queued
+        * during unmount.
+        */
+       if (!(mp->m_super->s_flags & MS_ACTIVE))
+               return;
+
+       rcu_read_lock();
+       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
+               queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
+                       msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
+       }
+       rcu_read_unlock();
+}
+
+/*
+ * This is a fast pass over the inode cache to try to get reclaim moving on as
+ * many inodes as possible in a short period of time. It kicks itself every few
+ * seconds, as well as being kicked by the inode cache shrinker when memory
+ * goes low. It scans as quickly as possible avoiding locked inodes or those
+ * already being flushed, and once done schedules a future pass.
+ */
+STATIC void
+xfs_reclaim_worker(
+       struct work_struct *work)
+{
+       struct xfs_mount *mp = container_of(to_delayed_work(work),
+                                       struct xfs_mount, m_reclaim_work);
+
+       xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
+       xfs_syncd_queue_reclaim(mp);
+}
+
  /*
   * Flush delayed allocate data, attempting to free up reserved space
   * from existing allocations.  At this point a new allocation attempt
@@ -508,7 +553,10 @@ xfs_syncd_init(
  {
         INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
         INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
+       INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+
         xfs_syncd_queue_sync(mp);
+       xfs_syncd_queue_reclaim(mp);
  
         return 0;
  }
@@ -518,6 +566,7 @@ xfs_syncd_stop(
         struct xfs_mount        *mp)
  {
         cancel_delayed_work_sync(&mp->m_sync_work);
+       cancel_delayed_work_sync(&mp->m_reclaim_work);
         cancel_work_sync(&mp->m_flush_work);
  }
  
@@ -537,6 +586,10 @@ __xfs_inode_set_reclaim_tag(
                                 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
                                 XFS_ICI_RECLAIM_TAG);
                 spin_unlock(&ip->i_mount->m_perag_lock);
+
+               /* schedule periodic background inode reclaim */
+               xfs_syncd_queue_reclaim(ip->i_mount);
+
                 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
                                                         -1, _RET_IP_);
         }
@@ -953,7 +1006,13 @@ xfs_reclaim_inodes(
  }
  
  /*
- * Shrinker infrastructure.
+ * Inode cache shrinker.
+ *
+ * When called we make sure that there is a background (fast) inode reclaim in
+ * progress, while we will throttle the speed of reclaim via doiing synchronous
+ * reclaim of inodes. That means if we come across dirty inodes, we wait for
+ * them to be cleaned, which we hope will not be very long due to the
+ * background walker having already kicked the IO off on those dirty inodes.
   */
  static int
  xfs_reclaim_inode_shrink(
@@ -968,10 +1027,14 @@ xfs_reclaim_inode_shrink(
  
         mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
         if (nr_to_scan) {
+               /* kick background reclaimer */
+               xfs_syncd_queue_reclaim(mp);
+
                 if (!(gfp_mask & __GFP_FS))
                         return -1;
  
-               xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+               xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
+                                       &nr_to_scan);
                 /* terminate if we don't exhaust the scan */
                 if (nr_to_scan > 0)
                         return -1;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h

index a0ad90e..19af0ab 100644 (file)
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -204,6 +204,7 @@ typedef struct xfs_mount {
  #endif
         struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
         struct delayed_work     m_sync_work;    /* background sync work */
+       struct delayed_work     m_reclaim_work; /* background inode reclaim */
         struct work_struct      m_flush_work;   /* background inode flush */
         __int64_t               m_update_flags; /* sb flags we need to update
                                                    on the next remount,rw */
author	Dave Chinner <dchinner@redhat.com>
	Fri, 8 Apr 2011 02:45:07 +0000 (12:45 +1000)
committer	Dave Chinner <david@fromorbit.com>
	Fri, 8 Apr 2011 02:45:07 +0000 (12:45 +1000)
fs/xfs/linux-2.6/xfs_sync.c		patch \| blob \| history
fs/xfs/xfs_mount.h		patch \| blob \| history