Merge branch 'semaphore' of git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc
[pandora-kernel.git] / fs / dcache.c
index 6068c25..3818d6a 100644 (file)
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
 static struct hlist_head *dentry_hashtable __read_mostly;
-static LIST_HEAD(dentry_unused);
 
 /* Statistics gathering. */
 struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
                call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
-static void dentry_lru_remove(struct dentry *dentry)
-{
-       if (!list_empty(&dentry->d_lru)) {
-               list_del_init(&dentry->d_lru);
-               dentry_stat.nr_unused--;
-       }
-}
-
 /*
  * Release the dentry's inode, using the filesystem
  * d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
        }
 }
 
+/*
+ * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ */
+static void dentry_lru_add(struct dentry *dentry)
+{
+       list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+       dentry->d_sb->s_nr_dentry_unused++;
+       dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_add_tail(struct dentry *dentry)
+{
+       list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+       dentry->d_sb->s_nr_dentry_unused++;
+       dentry_stat.nr_unused++;
+}
+
+static void dentry_lru_del(struct dentry *dentry)
+{
+       if (!list_empty(&dentry->d_lru)) {
+               list_del(&dentry->d_lru);
+               dentry->d_sb->s_nr_dentry_unused--;
+               dentry_stat.nr_unused--;
+       }
+}
+
+static void dentry_lru_del_init(struct dentry *dentry)
+{
+       if (likely(!list_empty(&dentry->d_lru))) {
+               list_del_init(&dentry->d_lru);
+               dentry->d_sb->s_nr_dentry_unused--;
+               dentry_stat.nr_unused--;
+       }
+}
+
 /**
  * d_kill - kill dentry and return parent
  * @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
                goto kill_it;
        if (list_empty(&dentry->d_lru)) {
                dentry->d_flags |= DCACHE_REFERENCED;
-               list_add(&dentry->d_lru, &dentry_unused);
-               dentry_stat.nr_unused++;
+               dentry_lru_add(dentry);
        }
        spin_unlock(&dentry->d_lock);
        spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
 unhash_it:
        __d_drop(dentry);
 kill_it:
-       dentry_lru_remove(dentry);
+       /* if dentry was on the d_lru list delete it from there */
+       dentry_lru_del(dentry);
        dentry = d_kill(dentry);
        if (dentry)
                goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
        atomic_inc(&dentry->d_count);
-       dentry_lru_remove(dentry);
+       dentry_lru_del_init(dentry);
        return dentry;
 }
 
@@ -406,133 +432,167 @@ static void prune_one_dentry(struct dentry * dentry)
 
                if (dentry->d_op && dentry->d_op->d_delete)
                        dentry->d_op->d_delete(dentry);
-               dentry_lru_remove(dentry);
+               dentry_lru_del_init(dentry);
                __d_drop(dentry);
                dentry = d_kill(dentry);
                spin_lock(&dcache_lock);
        }
 }
 
-/**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try and free
- * @sb: if given, ignore dentries for other superblocks
- *         which are being unmounted.
- *
- * Shrink the dcache. This is done when we need
- * more memory, or simply when we need to unmount
- * something (at which point we need to unuse
- * all dentries).
- *
- * This function may fail to free any resources if
- * all the dentries are in use.
+/*
+ * Shrink the dentry LRU on a given superblock.
+ * @sb   : superblock to shrink dentry LRU.
+ * @count: If count is NULL, we prune all dentries on superblock.
+ * @flags: If flags is non-zero, we need to do special processing based on
+ * which flags are set. This means we don't need to maintain multiple
+ * similar copies of this loop.
  */
-static void prune_dcache(int count, struct super_block *sb)
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
 {
-       spin_lock(&dcache_lock);
-       for (; count ; count--) {
-               struct dentry *dentry;
-               struct list_head *tmp;
-               struct rw_semaphore *s_umount;
-
-               cond_resched_lock(&dcache_lock);
+       LIST_HEAD(referenced);
+       LIST_HEAD(tmp);
+       struct dentry *dentry;
+       int cnt = 0;
 
-               tmp = dentry_unused.prev;
-               if (sb) {
-                       /* Try to find a dentry for this sb, but don't try
-                        * too hard, if they aren't near the tail they will
-                        * be moved down again soon
+       BUG_ON(!sb);
+       BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
+       spin_lock(&dcache_lock);
+       if (count != NULL)
+               /* called from prune_dcache() and shrink_dcache_parent() */
+               cnt = *count;
+restart:
+       if (count == NULL)
+               list_splice_init(&sb->s_dentry_lru, &tmp);
+       else {
+               while (!list_empty(&sb->s_dentry_lru)) {
+                       dentry = list_entry(sb->s_dentry_lru.prev,
+                                       struct dentry, d_lru);
+                       BUG_ON(dentry->d_sb != sb);
+
+                       spin_lock(&dentry->d_lock);
+                       /*
+                        * If we are honouring the DCACHE_REFERENCED flag and
+                        * the dentry has this flag set, don't free it. Clear
+                        * the flag and put it back on the LRU.
                         */
-                       int skip = count;
-                       while (skip && tmp != &dentry_unused &&
-                           list_entry(tmp, struct dentry, d_lru)->d_sb != sb) {
-                               skip--;
-                               tmp = tmp->prev;
+                       if ((flags & DCACHE_REFERENCED)
+                               && (dentry->d_flags & DCACHE_REFERENCED)) {
+                               dentry->d_flags &= ~DCACHE_REFERENCED;
+                               list_move_tail(&dentry->d_lru, &referenced);
+                               spin_unlock(&dentry->d_lock);
+                       } else {
+                               list_move_tail(&dentry->d_lru, &tmp);
+                               spin_unlock(&dentry->d_lock);
+                               cnt--;
+                               if (!cnt)
+                                       break;
                        }
                }
-               if (tmp == &dentry_unused)
-                       break;
-               list_del_init(tmp);
-               prefetch(dentry_unused.prev);
-               dentry_stat.nr_unused--;
-               dentry = list_entry(tmp, struct dentry, d_lru);
-
-               spin_lock(&dentry->d_lock);
+       }
+       while (!list_empty(&tmp)) {
+               dentry = list_entry(tmp.prev, struct dentry, d_lru);
+               dentry_lru_del_init(dentry);
+               spin_lock(&dentry->d_lock);
                /*
                 * We found an inuse dentry which was not removed from
-                * dentry_unused because of laziness during lookup.  Do not free
-                * it - just keep it off the dentry_unused list.
+                * the LRU because of laziness during lookup.  Do not free
+                * it - just keep it off the LRU list.
                 */
-               if (atomic_read(&dentry->d_count)) {
-                       spin_unlock(&dentry->d_lock);
+               if (atomic_read(&dentry->d_count)) {
+                       spin_unlock(&dentry->d_lock);
                        continue;
                }
-               /* If the dentry was recently referenced, don't free it. */
-               if (dentry->d_flags & DCACHE_REFERENCED) {
-                       dentry->d_flags &= ~DCACHE_REFERENCED;
-                       list_add(&dentry->d_lru, &dentry_unused);
-                       dentry_stat.nr_unused++;
-                       spin_unlock(&dentry->d_lock);
+               prune_one_dentry(dentry);
+               /* dentry->d_lock was dropped in prune_one_dentry() */
+               cond_resched_lock(&dcache_lock);
+       }
+       if (count == NULL && !list_empty(&sb->s_dentry_lru))
+               goto restart;
+       if (count != NULL)
+               *count = cnt;
+       if (!list_empty(&referenced))
+               list_splice(&referenced, &sb->s_dentry_lru);
+       spin_unlock(&dcache_lock);
+}
+
+/**
+ * prune_dcache - shrink the dcache
+ * @count: number of entries to try to free
+ *
+ * Shrink the dcache. This is done when we need more memory, or simply when we
+ * need to unmount something (at which point we need to unuse all dentries).
+ *
+ * This function may fail to free any resources if all the dentries are in use.
+ */
+static void prune_dcache(int count)
+{
+       struct super_block *sb;
+       int w_count;
+       int unused = dentry_stat.nr_unused;
+       int prune_ratio;
+       int pruned;
+
+       if (unused == 0 || count == 0)
+               return;
+       spin_lock(&dcache_lock);
+restart:
+       if (count >= unused)
+               prune_ratio = 1;
+       else
+               prune_ratio = unused / count;
+       spin_lock(&sb_lock);
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               if (sb->s_nr_dentry_unused == 0)
                        continue;
-               }
-               /*
-                * If the dentry is not DCACHED_REFERENCED, it is time
-                * to remove it from the dcache, provided the super block is
-                * NULL (which means we are trying to reclaim memory)
-                * or this dentry belongs to the same super block that
-                * we want to shrink.
-                */
-               /*
-                * If this dentry is for "my" filesystem, then I can prune it
-                * without taking the s_umount lock (I already hold it).
+               sb->s_count++;
+               /* Now, we reclaim unused dentrins with fairness.
+                * We reclaim them same percentage from each superblock.
+                * We calculate number of dentries to scan on this sb
+                * as follows, but the implementation is arranged to avoid
+                * overflows:
+                * number of dentries to scan on this sb =
+                * count * (number of dentries on this sb /
+                * number of dentries in the machine)
                 */
-               if (sb && dentry->d_sb == sb) {
-                       prune_one_dentry(dentry);
-                       continue;
-               }
+               spin_unlock(&sb_lock);
+               if (prune_ratio != 1)
+                       w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
+               else
+                       w_count = sb->s_nr_dentry_unused;
+               pruned = w_count;
                /*
-                * ...otherwise we need to be sure this filesystem isn't being
-                * unmounted, otherwise we could race with
-                * generic_shutdown_super(), and end up holding a reference to
-                * an inode while the filesystem is unmounted.
-                * So we try to get s_umount, and make sure s_root isn't NULL.
-                * (Take a local copy of s_umount to avoid a use-after-free of
-                * `dentry').
+                * We need to be sure this filesystem isn't being unmounted,
+                * otherwise we could race with generic_shutdown_super(), and
+                * end up holding a reference to an inode while the filesystem
+                * is unmounted.  So we try to get s_umount, and make sure
+                * s_root isn't NULL.
                 */
-               s_umount = &dentry->d_sb->s_umount;
-               if (down_read_trylock(s_umount)) {
-                       if (dentry->d_sb->s_root != NULL) {
-                               prune_one_dentry(dentry);
-                               up_read(s_umount);
-                               continue;
+               if (down_read_trylock(&sb->s_umount)) {
+                       if ((sb->s_root != NULL) &&
+                           (!list_empty(&sb->s_dentry_lru))) {
+                               spin_unlock(&dcache_lock);
+                               __shrink_dcache_sb(sb, &w_count,
+                                               DCACHE_REFERENCED);
+                               pruned -= w_count;
+                               spin_lock(&dcache_lock);
                        }
-                       up_read(s_umount);
+                       up_read(&sb->s_umount);
                }
-               spin_unlock(&dentry->d_lock);
+               spin_lock(&sb_lock);
+               count -= pruned;
                /*
-                * Insert dentry at the head of the list as inserting at the
-                * tail leads to a cycle.
+                * restart only when sb is no longer on the list and
+                * we have more work to do.
                 */
-               list_add(&dentry->d_lru, &dentry_unused);
-               dentry_stat.nr_unused++;
+               if (__put_super_and_need_restart(sb) && count > 0) {
+                       spin_unlock(&sb_lock);
+                       goto restart;
+               }
        }
+       spin_unlock(&sb_lock);
        spin_unlock(&dcache_lock);
 }
 
-/*
- * Shrink the dcache for the specified super block.
- * This allows us to unmount a device without disturbing
- * the dcache for the other devices.
- *
- * This implementation makes just two traversals of the
- * unused list.  On the first pass we move the selected
- * dentries to the most recent end, and on the second
- * pass we free them.  The second pass must restart after
- * each dput(), but since the target dentries are all at
- * the end, it's really just a single traversal.
- */
-
 /**
  * shrink_dcache_sb - shrink dcache for a superblock
  * @sb: superblock
@@ -541,44 +601,9 @@ static void prune_dcache(int count, struct super_block *sb)
  * is used to free the dcache before unmounting a file
  * system
  */
-
 void shrink_dcache_sb(struct super_block * sb)
 {
-       struct list_head *tmp, *next;
-       struct dentry *dentry;
-
-       /*
-        * Pass one ... move the dentries for the specified
-        * superblock to the most recent end of the unused list.
-        */
-       spin_lock(&dcache_lock);
-       list_for_each_prev_safe(tmp, next, &dentry_unused) {
-               dentry = list_entry(tmp, struct dentry, d_lru);
-               if (dentry->d_sb != sb)
-                       continue;
-               list_move_tail(tmp, &dentry_unused);
-       }
-
-       /*
-        * Pass two ... free the dentries for this superblock.
-        */
-repeat:
-       list_for_each_prev_safe(tmp, next, &dentry_unused) {
-               dentry = list_entry(tmp, struct dentry, d_lru);
-               if (dentry->d_sb != sb)
-                       continue;
-               dentry_stat.nr_unused--;
-               list_del_init(tmp);
-               spin_lock(&dentry->d_lock);
-               if (atomic_read(&dentry->d_count)) {
-                       spin_unlock(&dentry->d_lock);
-                       continue;
-               }
-               prune_one_dentry(dentry);
-               cond_resched_lock(&dcache_lock);
-               goto repeat;
-       }
-       spin_unlock(&dcache_lock);
+       __shrink_dcache_sb(sb, NULL, 0);
 }
 
 /*
@@ -595,7 +620,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 
        /* detach this root from the system */
        spin_lock(&dcache_lock);
-       dentry_lru_remove(dentry);
+       dentry_lru_del_init(dentry);
        __d_drop(dentry);
        spin_unlock(&dcache_lock);
 
@@ -609,7 +634,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                        spin_lock(&dcache_lock);
                        list_for_each_entry(loop, &dentry->d_subdirs,
                                            d_u.d_child) {
-                               dentry_lru_remove(loop);
+                               dentry_lru_del_init(loop);
                                __d_drop(loop);
                                cond_resched_lock(&dcache_lock);
                        }
@@ -791,14 +816,13 @@ resume:
                struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
 
-               dentry_lru_remove(dentry);
+               dentry_lru_del_init(dentry);
                /* 
                 * move only zero ref count dentries to the end 
                 * of the unused list for prune_dcache
                 */
                if (!atomic_read(&dentry->d_count)) {
-                       list_add_tail(&dentry->d_lru, &dentry_unused);
-                       dentry_stat.nr_unused++;
+                       dentry_lru_add_tail(dentry);
                        found++;
                }
 
@@ -840,10 +864,11 @@ out:
  
 void shrink_dcache_parent(struct dentry * parent)
 {
+       struct super_block *sb = parent->d_sb;
        int found;
 
        while ((found = select_parent(parent)) != 0)
-               prune_dcache(found, parent->d_sb);
+               __shrink_dcache_sb(sb, &found, 0);
 }
 
 /*
@@ -863,7 +888,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
        if (nr) {
                if (!(gfp_mask & __GFP_FS))
                        return -1;
-               prune_dcache(nr, NULL);
+               prune_dcache(nr);
        }
        return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
@@ -1215,7 +1240,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
  * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
  * lookup is going on.
  *
- * dentry_unused list is not updated even if lookup finds the required dentry
+ * The dentry unused LRU is not updated even if lookup finds the required dentry
  * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
  * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
  * acquisition.