Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 30 May 2014 16:52:55 +0000 (09:52 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 30 May 2014 16:52:55 +0000 (09:52 -0700)
Pull vfs dcache livelock fix from Al Viro:
 "Fixes for livelocks in shrink_dentry_list() introduced by fixes to
  shrink list corruption; the root cause was that trylock of parent's
  ->d_lock could be disrupted by d_walk() happening on other CPUs,
  resulting in shrink_dentry_list() making no progress *and* the same
  d_walk() being called again and again for as long as
  shrink_dentry_list() doesn't get past that mess.

  The solution is to have shrink_dentry_list() treat that trylock
  failure not as 'try to do the same thing again', but 'lock them in the
  right order'"

* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  dentry_kill() doesn't need the second argument now
  dealing with the rest of shrink_dentry_list() livelock
  shrink_dentry_list(): take parent's ->d_lock earlier
  expand dentry_kill(dentry, 0) in shrink_dentry_list()
  split dentry_kill()
  lift the "already marked killed" case into shrink_dentry_list()

fs/dcache.c

index 42ae01e..bce851d 100644 (file)
@@ -441,42 +441,12 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
-/*
- * Finish off a dentry we've decided to kill.
- * dentry->d_lock must be held, returns with it unlocked.
- * If ref is non-zero, then decrement the refcount too.
- * Returns dentry requiring refcount drop, or NULL if we're done.
- */
-static struct dentry *
-dentry_kill(struct dentry *dentry, int unlock_on_failure)
-       __releases(dentry->d_lock)
+static void __dentry_kill(struct dentry *dentry)
 {
-       struct inode *inode;
        struct dentry *parent = NULL;
        bool can_free = true;
-
-       if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
-               can_free = dentry->d_flags & DCACHE_MAY_FREE;
-               spin_unlock(&dentry->d_lock);
-               goto out;
-       }
-
-       inode = dentry->d_inode;
-       if (inode && !spin_trylock(&inode->i_lock)) {
-relock:
-               if (unlock_on_failure) {
-                       spin_unlock(&dentry->d_lock);
-                       cpu_relax();
-               }
-               return dentry; /* try again with same dentry */
-       }
        if (!IS_ROOT(dentry))
                parent = dentry->d_parent;
-       if (parent && !spin_trylock(&parent->d_lock)) {
-               if (inode)
-                       spin_unlock(&inode->i_lock);
-               goto relock;
-       }
 
        /*
         * The dentry is now unrecoverably dead to the world.
@@ -520,9 +490,72 @@ relock:
                can_free = false;
        }
        spin_unlock(&dentry->d_lock);
-out:
        if (likely(can_free))
                dentry_free(dentry);
+}
+
+/*
+ * Finish off a dentry we've decided to kill.
+ * dentry->d_lock must be held, returns with it unlocked.
+ * If ref is non-zero, then decrement the refcount too.
+ * Returns dentry requiring refcount drop, or NULL if we're done.
+ */
+static struct dentry *dentry_kill(struct dentry *dentry)
+       __releases(dentry->d_lock)
+{
+       struct inode *inode = dentry->d_inode;
+       struct dentry *parent = NULL;
+
+       if (inode && unlikely(!spin_trylock(&inode->i_lock)))
+               goto failed;
+
+       if (!IS_ROOT(dentry)) {
+               parent = dentry->d_parent;
+               if (unlikely(!spin_trylock(&parent->d_lock))) {
+                       if (inode)
+                               spin_unlock(&inode->i_lock);
+                       goto failed;
+               }
+       }
+
+       __dentry_kill(dentry);
+       return parent;
+
+failed:
+       spin_unlock(&dentry->d_lock);
+       cpu_relax();
+       return dentry; /* try again with same dentry */
+}
+
+static inline struct dentry *lock_parent(struct dentry *dentry)
+{
+       struct dentry *parent = dentry->d_parent;
+       if (IS_ROOT(dentry))
+               return NULL;
+       if (likely(spin_trylock(&parent->d_lock)))
+               return parent;
+       spin_unlock(&dentry->d_lock);
+       rcu_read_lock();
+again:
+       parent = ACCESS_ONCE(dentry->d_parent);
+       spin_lock(&parent->d_lock);
+       /*
+        * We can't blindly lock dentry until we are sure
+        * that we won't violate the locking order.
+        * Any changes of dentry->d_parent must have
+        * been done with parent->d_lock held, so
+        * spin_lock() above is enough of a barrier
+        * for checking if it's still our child.
+        */
+       if (unlikely(parent != dentry->d_parent)) {
+               spin_unlock(&parent->d_lock);
+               goto again;
+       }
+       rcu_read_unlock();
+       if (parent != dentry)
+               spin_lock(&dentry->d_lock);
+       else
+               parent = NULL;
        return parent;
 }
 
@@ -579,7 +612,7 @@ repeat:
        return;
 
 kill_it:
-       dentry = dentry_kill(dentry, 1);
+       dentry = dentry_kill(dentry);
        if (dentry)
                goto repeat;
 }
@@ -797,8 +830,11 @@ static void shrink_dentry_list(struct list_head *list)
        struct dentry *dentry, *parent;
 
        while (!list_empty(list)) {
+               struct inode *inode;
                dentry = list_entry(list->prev, struct dentry, d_lru);
                spin_lock(&dentry->d_lock);
+               parent = lock_parent(dentry);
+
                /*
                 * The dispose list is isolated and dentries are not accounted
                 * to the LRU here, so we can simply remove it from the list
@@ -812,26 +848,33 @@ static void shrink_dentry_list(struct list_head *list)
                 */
                if ((int)dentry->d_lockref.count > 0) {
                        spin_unlock(&dentry->d_lock);
+                       if (parent)
+                               spin_unlock(&parent->d_lock);
                        continue;
                }
 
-               parent = dentry_kill(dentry, 0);
-               /*
-                * If dentry_kill returns NULL, we have nothing more to do.
-                */
-               if (!parent)
+
+               if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) {
+                       bool can_free = dentry->d_flags & DCACHE_MAY_FREE;
+                       spin_unlock(&dentry->d_lock);
+                       if (parent)
+                               spin_unlock(&parent->d_lock);
+                       if (can_free)
+                               dentry_free(dentry);
                        continue;
+               }
 
-               if (unlikely(parent == dentry)) {
-                       /*
-                        * trylocks have failed and d_lock has been held the
-                        * whole time, so it could not have been added to any
-                        * other lists. Just add it back to the shrink list.
-                        */
+               inode = dentry->d_inode;
+               if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
                        d_shrink_add(dentry, list);
                        spin_unlock(&dentry->d_lock);
+                       if (parent)
+                               spin_unlock(&parent->d_lock);
                        continue;
                }
+
+               __dentry_kill(dentry);
+
                /*
                 * We need to prune ancestors too. This is necessary to prevent
                 * quadratic behavior of shrink_dcache_parent(), but is also
@@ -839,8 +882,26 @@ static void shrink_dentry_list(struct list_head *list)
                 * fragmentation.
                 */
                dentry = parent;
-               while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
-                       dentry = dentry_kill(dentry, 1);
+               while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) {
+                       parent = lock_parent(dentry);
+                       if (dentry->d_lockref.count != 1) {
+                               dentry->d_lockref.count--;
+                               spin_unlock(&dentry->d_lock);
+                               if (parent)
+                                       spin_unlock(&parent->d_lock);
+                               break;
+                       }
+                       inode = dentry->d_inode;        /* can't be NULL */
+                       if (unlikely(!spin_trylock(&inode->i_lock))) {
+                               spin_unlock(&dentry->d_lock);
+                               if (parent)
+                                       spin_unlock(&parent->d_lock);
+                               cpu_relax();
+                               continue;
+                       }
+                       __dentry_kill(dentry);
+                       dentry = parent;
+               }
        }
 }