Merge tag 'v3.1-rc6' into staging/for_v3.2
[pandora-kernel.git] / fs / dcache.c
index fbdcbca..a88948b 100644 (file)
@@ -301,6 +301,27 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
        return parent;
 }
 
+/*
+ * Unhash a dentry without inserting an RCU walk barrier or checking that
+ * dentry->d_lock is locked.  The caller must take care of that, if
+ * appropriate.
+ */
+static void __d_shrink(struct dentry *dentry)
+{
+       if (!d_unhashed(dentry)) {
+               struct hlist_bl_head *b;
+               if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
+                       b = &dentry->d_sb->s_anon;
+               else
+                       b = d_hash(dentry->d_parent, dentry->d_name.hash);
+
+               hlist_bl_lock(b);
+               __hlist_bl_del(&dentry->d_hash);
+               dentry->d_hash.pprev = NULL;
+               hlist_bl_unlock(b);
+       }
+}
+
 /**
  * d_drop - drop a dentry
  * @dentry: dentry to drop
@@ -319,17 +340,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 void __d_drop(struct dentry *dentry)
 {
        if (!d_unhashed(dentry)) {
-               struct hlist_bl_head *b;
-               if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
-                       b = &dentry->d_sb->s_anon;
-               else
-                       b = d_hash(dentry->d_parent, dentry->d_name.hash);
-
-               hlist_bl_lock(b);
-               __hlist_bl_del(&dentry->d_hash);
-               dentry->d_hash.pprev = NULL;
-               hlist_bl_unlock(b);
-
+               __d_shrink(dentry);
                dentry_rcuwalk_barrier(dentry);
        }
 }
@@ -343,6 +354,24 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
+/*
+ * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
+ * @dentry: dentry to drop
+ *
+ * This is called when we do a lookup on a placeholder dentry that needed to be
+ * looked up.  The dentry should have been hashed in order for it to be found by
+ * the lookup code, but now needs to be unhashed while we do the actual lookup
+ * and clear the DCACHE_NEED_LOOKUP flag.
+ */
+void d_clear_need_lookup(struct dentry *dentry)
+{
+       spin_lock(&dentry->d_lock);
+       __d_drop(dentry);
+       dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
+       spin_unlock(&dentry->d_lock);
+}
+EXPORT_SYMBOL(d_clear_need_lookup);
+
 /*
  * Finish off a dentry we've decided to kill.
  * dentry->d_lock must be held, returns with it unlocked.
@@ -432,8 +461,13 @@ repeat:
        if (d_unhashed(dentry))
                goto kill_it;
 
-       /* Otherwise leave it cached and ensure it's on the LRU */
-       dentry->d_flags |= DCACHE_REFERENCED;
+       /*
+        * If this dentry needs lookup, don't set the referenced flag so that it
+        * is more likely to be cleaned up by the dcache shrinker in case of
+        * memory pressure.
+        */
+       if (!d_need_lookup(dentry))
+               dentry->d_flags |= DCACHE_REFERENCED;
        dentry_lru_add(dentry);
 
        dentry->d_count--;
@@ -526,10 +560,6 @@ repeat:
         */
        rcu_read_lock();
        ret = dentry->d_parent;
-       if (!ret) {
-               rcu_read_unlock();
-               goto out;
-       }
        spin_lock(&ret->d_lock);
        if (unlikely(ret != dentry->d_parent)) {
                spin_unlock(&ret->d_lock);
@@ -540,7 +570,6 @@ repeat:
        BUG_ON(!ret->d_count);
        ret->d_count++;
        spin_unlock(&ret->d_lock);
-out:
        return ret;
 }
 EXPORT_SYMBOL(dget_parent);
@@ -720,13 +749,11 @@ static void shrink_dentry_list(struct list_head *list)
  *
  * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
  */
-static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
 {
-       /* called from prune_dcache() and shrink_dcache_parent() */
        struct dentry *dentry;
        LIST_HEAD(referenced);
        LIST_HEAD(tmp);
-       int cnt = *count;
 
 relock:
        spin_lock(&dcache_lru_lock);
@@ -754,7 +781,7 @@ relock:
                } else {
                        list_move_tail(&dentry->d_lru, &tmp);
                        spin_unlock(&dentry->d_lock);
-                       if (!--cnt)
+                       if (!--count)
                                break;
                }
                cond_resched_lock(&dcache_lru_lock);
@@ -764,83 +791,23 @@ relock:
        spin_unlock(&dcache_lru_lock);
 
        shrink_dentry_list(&tmp);
-
-       *count = cnt;
 }
 
 /**
- * prune_dcache - shrink the dcache
- * @count: number of entries to try to free
+ * prune_dcache_sb - shrink the dcache
+ * @sb: superblock
+ * @nr_to_scan: number of entries to try to free
  *
- * Shrink the dcache. This is done when we need more memory, or simply when we
- * need to unmount something (at which point we need to unuse all dentries).
+ * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
+ * done when we need more memory an called from the superblock shrinker
+ * function.
  *
- * This function may fail to free any resources if all the dentries are in use.
+ * This function may fail to free any resources if all the dentries are in
+ * use.
  */
-static void prune_dcache(int count)
+void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
 {
-       struct super_block *sb, *p = NULL;
-       int w_count;
-       int unused = dentry_stat.nr_unused;
-       int prune_ratio;
-       int pruned;
-
-       if (unused == 0 || count == 0)
-               return;
-       if (count >= unused)
-               prune_ratio = 1;
-       else
-               prune_ratio = unused / count;
-       spin_lock(&sb_lock);
-       list_for_each_entry(sb, &super_blocks, s_list) {
-               if (list_empty(&sb->s_instances))
-                       continue;
-               if (sb->s_nr_dentry_unused == 0)
-                       continue;
-               sb->s_count++;
-               /* Now, we reclaim unused dentrins with fairness.
-                * We reclaim them same percentage from each superblock.
-                * We calculate number of dentries to scan on this sb
-                * as follows, but the implementation is arranged to avoid
-                * overflows:
-                * number of dentries to scan on this sb =
-                * count * (number of dentries on this sb /
-                * number of dentries in the machine)
-                */
-               spin_unlock(&sb_lock);
-               if (prune_ratio != 1)
-                       w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
-               else
-                       w_count = sb->s_nr_dentry_unused;
-               pruned = w_count;
-               /*
-                * We need to be sure this filesystem isn't being unmounted,
-                * otherwise we could race with generic_shutdown_super(), and
-                * end up holding a reference to an inode while the filesystem
-                * is unmounted.  So we try to get s_umount, and make sure
-                * s_root isn't NULL.
-                */
-               if (down_read_trylock(&sb->s_umount)) {
-                       if ((sb->s_root != NULL) &&
-                           (!list_empty(&sb->s_dentry_lru))) {
-                               __shrink_dcache_sb(sb, &w_count,
-                                               DCACHE_REFERENCED);
-                               pruned -= w_count;
-                       }
-                       up_read(&sb->s_umount);
-               }
-               spin_lock(&sb_lock);
-               if (p)
-                       __put_super(p);
-               count -= pruned;
-               p = sb;
-               /* more work left to do? */
-               if (count <= 0)
-                       break;
-       }
-       if (p)
-               __put_super(p);
-       spin_unlock(&sb_lock);
+       __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
 }
 
 /**
@@ -873,44 +840,24 @@ EXPORT_SYMBOL(shrink_dcache_sb);
 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 {
        struct dentry *parent;
-       unsigned detached = 0;
 
        BUG_ON(!IS_ROOT(dentry));
 
-       /* detach this root from the system */
-       spin_lock(&dentry->d_lock);
-       dentry_lru_del(dentry);
-       __d_drop(dentry);
-       spin_unlock(&dentry->d_lock);
-
        for (;;) {
                /* descend to the first leaf in the current subtree */
-               while (!list_empty(&dentry->d_subdirs)) {
-                       struct dentry *loop;
-
-                       /* this is a branch with children - detach all of them
-                        * from the system in one go */
-                       spin_lock(&dentry->d_lock);
-                       list_for_each_entry(loop, &dentry->d_subdirs,
-                                           d_u.d_child) {
-                               spin_lock_nested(&loop->d_lock,
-                                               DENTRY_D_LOCK_NESTED);
-                               dentry_lru_del(loop);
-                               __d_drop(loop);
-                               spin_unlock(&loop->d_lock);
-                       }
-                       spin_unlock(&dentry->d_lock);
-
-                       /* move to the first child */
+               while (!list_empty(&dentry->d_subdirs))
                        dentry = list_entry(dentry->d_subdirs.next,
                                            struct dentry, d_u.d_child);
-               }
 
                /* consume the dentries from this leaf up through its parents
                 * until we find one with children or run out altogether */
                do {
                        struct inode *inode;
 
+                       /* detach from the system */
+                       dentry_lru_del(dentry);
+                       __d_shrink(dentry);
+
                        if (dentry->d_count != 0) {
                                printk(KERN_ERR
                                       "BUG: Dentry %p{i=%lx,n=%s}"
@@ -931,14 +878,10 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                                list_del(&dentry->d_u.d_child);
                        } else {
                                parent = dentry->d_parent;
-                               spin_lock(&parent->d_lock);
                                parent->d_count--;
                                list_del(&dentry->d_u.d_child);
-                               spin_unlock(&parent->d_lock);
                        }
 
-                       detached++;
-
                        inode = dentry->d_inode;
                        if (inode) {
                                dentry->d_inode = NULL;
@@ -983,9 +926,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
 
        dentry = sb->s_root;
        sb->s_root = NULL;
-       spin_lock(&dentry->d_lock);
        dentry->d_count--;
-       spin_unlock(&dentry->d_lock);
        shrink_dcache_for_umount_subtree(dentry);
 
        while (!hlist_bl_empty(&sb->s_anon)) {
@@ -1215,45 +1156,13 @@ void shrink_dcache_parent(struct dentry * parent)
        int found;
 
        while ((found = select_parent(parent)) != 0)
-               __shrink_dcache_sb(sb, &found, 0);
+               __shrink_dcache_sb(sb, found, 0);
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
 
-/*
- * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
- *
- * We need to avoid reentering the filesystem if the caller is performing a
- * GFP_NOFS allocation attempt.  One example deadlock is:
- *
- * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
- * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
- * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
- *
- * In this case we return -1 to tell the caller that we baled.
- */
-static int shrink_dcache_memory(struct shrinker *shrink,
-                               struct shrink_control *sc)
-{
-       int nr = sc->nr_to_scan;
-       gfp_t gfp_mask = sc->gfp_mask;
-
-       if (nr) {
-               if (!(gfp_mask & __GFP_FS))
-                       return -1;
-               prune_dcache(nr);
-       }
-
-       return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
-}
-
-static struct shrinker dcache_shrinker = {
-       .shrink = shrink_dcache_memory,
-       .seeks = DEFAULT_SEEKS,
-};
-
 /**
- * d_alloc     -       allocate a dcache entry
- * @parent: parent of entry to allocate
+ * __d_alloc   -       allocate a dcache entry
+ * @sb: filesystem it will belong to
  * @name: qstr of the name
  *
  * Allocates a dentry. It returns %NULL if there is insufficient memory
@@ -1261,7 +1170,7 @@ static struct shrinker dcache_shrinker = {
  * copied and the copy passed in may be reused after this call.
  */
  
-struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
+struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 {
        struct dentry *dentry;
        char *dname;
@@ -1291,8 +1200,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
        spin_lock_init(&dentry->d_lock);
        seqcount_init(&dentry->d_seq);
        dentry->d_inode = NULL;
-       dentry->d_parent = NULL;
-       dentry->d_sb = NULL;
+       dentry->d_parent = dentry;
+       dentry->d_sb = sb;
        dentry->d_op = NULL;
        dentry->d_fsdata = NULL;
        INIT_HLIST_BL_NODE(&dentry->d_hash);
@@ -1300,36 +1209,47 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
        INIT_LIST_HEAD(&dentry->d_subdirs);
        INIT_LIST_HEAD(&dentry->d_alias);
        INIT_LIST_HEAD(&dentry->d_u.d_child);
-
-       if (parent) {
-               spin_lock(&parent->d_lock);
-               /*
-                * don't need child lock because it is not subject
-                * to concurrency here
-                */
-               __dget_dlock(parent);
-               dentry->d_parent = parent;
-               dentry->d_sb = parent->d_sb;
-               d_set_d_op(dentry, dentry->d_sb->s_d_op);
-               list_add(&dentry->d_u.d_child, &parent->d_subdirs);
-               spin_unlock(&parent->d_lock);
-       }
+       d_set_d_op(dentry, dentry->d_sb->s_d_op);
 
        this_cpu_inc(nr_dentry);
 
        return dentry;
 }
+
+/**
+ * d_alloc     -       allocate a dcache entry
+ * @parent: parent of entry to allocate
+ * @name: qstr of the name
+ *
+ * Allocates a dentry. It returns %NULL if there is insufficient memory
+ * available. On a success the dentry is returned. The name passed in is
+ * copied and the copy passed in may be reused after this call.
+ */
+struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
+{
+       struct dentry *dentry = __d_alloc(parent->d_sb, name);
+       if (!dentry)
+               return NULL;
+
+       spin_lock(&parent->d_lock);
+       /*
+        * don't need child lock because it is not subject
+        * to concurrency here
+        */
+       __dget_dlock(parent);
+       dentry->d_parent = parent;
+       list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+       spin_unlock(&parent->d_lock);
+
+       return dentry;
+}
 EXPORT_SYMBOL(d_alloc);
 
 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
 {
-       struct dentry *dentry = d_alloc(NULL, name);
-       if (dentry) {
-               dentry->d_sb = sb;
-               d_set_d_op(dentry, dentry->d_sb->s_d_op);
-               dentry->d_parent = dentry;
+       struct dentry *dentry = __d_alloc(sb, name);
+       if (dentry)
                dentry->d_flags |= DCACHE_DISCONNECTED;
-       }
        return dentry;
 }
 EXPORT_SYMBOL(d_alloc_pseudo);
@@ -1499,13 +1419,9 @@ struct dentry * d_alloc_root(struct inode * root_inode)
        if (root_inode) {
                static const struct qstr name = { .name = "/", .len = 1 };
 
-               res = d_alloc(NULL, &name);
-               if (res) {
-                       res->d_sb = root_inode->i_sb;
-                       d_set_d_op(res, res->d_sb->s_d_op);
-                       res->d_parent = res;
+               res = __d_alloc(root_inode->i_sb, &name);
+               if (res)
                        d_instantiate(res, root_inode);
-               }
        }
        return res;
 }
@@ -1566,13 +1482,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
        if (res)
                goto out_iput;
 
-       tmp = d_alloc(NULL, &anonstring);
+       tmp = __d_alloc(inode->i_sb, &anonstring);
        if (!tmp) {
                res = ERR_PTR(-ENOMEM);
                goto out_iput;
        }
-       tmp->d_parent = tmp; /* make sure dput doesn't croak */
-
 
        spin_lock(&inode->i_lock);
        res = __d_find_any_alias(inode);
@@ -1584,8 +1498,6 @@ struct dentry *d_obtain_alias(struct inode *inode)
 
        /* attach a disconnected dentry */
        spin_lock(&tmp->d_lock);
-       tmp->d_sb = inode->i_sb;
-       d_set_d_op(tmp, tmp->d_sb->s_d_op);
        tmp->d_inode = inode;
        tmp->d_flags |= DCACHE_DISCONNECTED;
        list_add(&tmp->d_alias, &inode->i_dentry);
@@ -1626,6 +1538,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 {
        struct dentry *new = NULL;
 
+       if (IS_ERR(inode))
+               return ERR_CAST(inode);
+
        if (inode && S_ISDIR(inode->i_mode)) {
                spin_lock(&inode->i_lock);
                new = __d_find_alias(inode, 1);
@@ -1708,29 +1623,22 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
        }
 
        /*
-        * Negative dentry: instantiate it unless the inode is a directory and
-        * already has a dentry.
+        * We are going to instantiate this dentry, unhash it and clear the
+        * lookup flag so we can do that.
         */
-       spin_lock(&inode->i_lock);
-       if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) {
-               __d_instantiate(found, inode);
-               spin_unlock(&inode->i_lock);
-               security_d_instantiate(found, inode);
-               return found;
-       }
+       if (unlikely(d_need_lookup(found)))
+               d_clear_need_lookup(found);
 
        /*
-        * In case a directory already has a (disconnected) entry grab a
-        * reference to it, move it in place and use it.
+        * Negative dentry: instantiate it unless the inode is a directory and
+        * already has a dentry.
         */
-       new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
-       __dget(new);
-       spin_unlock(&inode->i_lock);
-       security_d_instantiate(found, inode);
-       d_move(new, found);
-       iput(inode);
-       dput(found);
-       return new;
+       new = d_splice_alias(inode, found);
+       if (new) {
+               dput(found);
+               found = new;
+       }
+       return found;
 
 err_out:
        iput(inode);
@@ -1821,7 +1729,7 @@ seqretry:
                 */
                if (read_seqcount_retry(&dentry->d_seq, *seq))
                        goto seqretry;
-               if (parent->d_flags & DCACHE_OP_COMPARE) {
+               if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
                        if (parent->d_op->d_compare(parent, *inode,
                                                dentry, i,
                                                tlen, tname, name))
@@ -2216,8 +2124,9 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.  Caller hold
- * rename_lock.
+ * dcache entries should not be moved in this way. Caller must hold
+ * rename_lock, the i_mutex of the source and target directories,
+ * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
  */
 static void __d_move(struct dentry * dentry, struct dentry * target)
 {
@@ -2280,7 +2189,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
  * @target: new dentry
  *
  * Update the dcache to reflect the move of a file name. Negative
- * dcache entries should not be moved in this way.
+ * dcache entries should not be moved in this way. See the locking
+ * requirements for __d_move.
  */
 void d_move(struct dentry *dentry, struct dentry *target)
 {
@@ -2398,7 +2308,8 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
  * @inode: inode to bind to the dentry, to which aliases may be attached
  *
  * Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one
+ * root directory alias in its place if there is one. Caller must hold the
+ * i_mutex of the parent directory.
  */
 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 {
@@ -3045,8 +2956,6 @@ static void __init dcache_init(void)
         */
        dentry_cache = KMEM_CACHE(dentry,
                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
-       
-       register_shrinker(&dcache_shrinker);
 
        /* Hash may have been set up in dcache_init_early */
        if (!hashdist)