fix d_walk()/non-delayed __d_free() race
[pandora-kernel.git] / fs / dcache.c
index 10ba92d..01c68ae 100644 (file)
@@ -42,7 +42,7 @@
 /*
  * Usage:
  * dcache->d_inode->i_lock protects:
- *   - i_dentry, d_alias, d_inode of aliases
+ *   - i_dentry, d_u.d_alias, d_inode of aliases
  * dcache_hash_bucket lock protects:
  *   - the dcache hash table
  * s_anon bl list spinlock protects:
@@ -57,7 +57,7 @@
  *   - d_unhashed()
  *   - d_parent and d_subdirs
  *   - childrens' d_child and d_parent
- *   - d_alias, d_inode
+ *   - d_u.d_alias, d_inode
  *
  * Ordering:
  * dentry->d_inode->i_lock
@@ -140,7 +140,6 @@ static void __d_free(struct rcu_head *head)
 {
        struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 
-       WARN_ON(!list_empty(&dentry->d_alias));
        if (dname_external(dentry))
                kfree(dentry->d_name.name);
        kmem_cache_free(dentry_cache, dentry); 
@@ -151,6 +150,7 @@ static void __d_free(struct rcu_head *head)
  */
 static void d_free(struct dentry *dentry)
 {
+       WARN_ON(!list_empty(&dentry->d_u.d_alias));
        BUG_ON(dentry->d_count);
        this_cpu_dec(nr_dentry);
        if (dentry->d_op && dentry->d_op->d_release)
@@ -189,7 +189,7 @@ static void dentry_iput(struct dentry * dentry)
        struct inode *inode = dentry->d_inode;
        if (inode) {
                dentry->d_inode = NULL;
-               list_del_init(&dentry->d_alias);
+               list_del_init(&dentry->d_u.d_alias);
                spin_unlock(&dentry->d_lock);
                spin_unlock(&inode->i_lock);
                if (!inode->i_nlink)
@@ -213,7 +213,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 {
        struct inode *inode = dentry->d_inode;
        dentry->d_inode = NULL;
-       list_del_init(&dentry->d_alias);
+       list_del_init(&dentry->d_u.d_alias);
        dentry_rcuwalk_barrier(dentry);
        spin_unlock(&dentry->d_lock);
        spin_unlock(&inode->i_lock);
@@ -242,6 +242,7 @@ static void dentry_lru_add(struct dentry *dentry)
 static void __dentry_lru_del(struct dentry *dentry)
 {
        list_del_init(&dentry->d_lru);
+       dentry->d_flags &= ~DCACHE_SHRINK_LIST;
        dentry->d_sb->s_nr_dentry_unused--;
        dentry_stat.nr_unused--;
 }
@@ -275,15 +276,15 @@ static void dentry_lru_prune(struct dentry *dentry)
        }
 }
 
-static void dentry_lru_move_tail(struct dentry *dentry)
+static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 {
        spin_lock(&dcache_lru_lock);
        if (list_empty(&dentry->d_lru)) {
-               list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+               list_add_tail(&dentry->d_lru, list);
                dentry->d_sb->s_nr_dentry_unused++;
                dentry_stat.nr_unused++;
        } else {
-               list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+               list_move_tail(&dentry->d_lru, list);
        }
        spin_unlock(&dcache_lru_lock);
 }
@@ -305,12 +306,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
        __releases(parent->d_lock)
        __releases(dentry->d_inode->i_lock)
 {
-       list_del(&dentry->d_u.d_child);
+       __list_del_entry(&dentry->d_child);
        /*
-        * Inform try_to_ascend() that we are no longer attached to the
+        * Inform ascending readers that we are no longer attached to the
         * dentry tree
         */
-       dentry->d_flags |= DCACHE_DISCONNECTED;
+       dentry->d_flags |= DCACHE_DENTRY_KILLED;
        if (parent)
                spin_unlock(&parent->d_lock);
        dentry_iput(dentry);
@@ -623,7 +624,7 @@ static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
 
 again:
        discon_alias = NULL;
-       list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+       list_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
                spin_lock(&alias->d_lock);
                if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
                        if (IS_ROOT(alias) &&
@@ -676,7 +677,7 @@ void d_prune_aliases(struct inode *inode)
        struct dentry *dentry;
 restart:
        spin_lock(&inode->i_lock);
-       list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+       list_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
                spin_lock(&dentry->d_lock);
                if (!dentry->d_count) {
                        __dget_dlock(dentry);
@@ -769,14 +770,18 @@ static void shrink_dentry_list(struct list_head *list)
 }
 
 /**
- * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
- * @sb:                superblock to shrink dentry LRU.
- * @count:     number of entries to prune
- * @flags:     flags to control the dentry processing
+ * prune_dcache_sb - shrink the dcache
+ * @sb: superblock
+ * @count: number of entries to try to free
+ *
+ * Attempt to shrink the superblock dcache LRU by @count entries. This is
+ * done when we need more memory an called from the superblock shrinker
+ * function.
  *
- * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
+ * This function may fail to free any resources if all the dentries are in
+ * use.
  */
-static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
+void prune_dcache_sb(struct super_block *sb, int count)
 {
        struct dentry *dentry;
        LIST_HEAD(referenced);
@@ -795,18 +800,13 @@ relock:
                        goto relock;
                }
 
-               /*
-                * If we are honouring the DCACHE_REFERENCED flag and the
-                * dentry has this flag set, don't free it.  Clear the flag
-                * and put it back on the LRU.
-                */
-               if (flags & DCACHE_REFERENCED &&
-                               dentry->d_flags & DCACHE_REFERENCED) {
+               if (dentry->d_flags & DCACHE_REFERENCED) {
                        dentry->d_flags &= ~DCACHE_REFERENCED;
                        list_move(&dentry->d_lru, &referenced);
                        spin_unlock(&dentry->d_lock);
                } else {
                        list_move_tail(&dentry->d_lru, &tmp);
+                       dentry->d_flags |= DCACHE_SHRINK_LIST;
                        spin_unlock(&dentry->d_lock);
                        if (!--count)
                                break;
@@ -820,23 +820,6 @@ relock:
        shrink_dentry_list(&tmp);
 }
 
-/**
- * prune_dcache_sb - shrink the dcache
- * @sb: superblock
- * @nr_to_scan: number of entries to try to free
- *
- * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
- * done when we need more memory an called from the superblock shrinker
- * function.
- *
- * This function may fail to free any resources if all the dentries are in
- * use.
- */
-void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
-{
-       __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
-}
-
 /**
  * shrink_dcache_sb - shrink dcache for a superblock
  * @sb: superblock
@@ -874,7 +857,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                /* descend to the first leaf in the current subtree */
                while (!list_empty(&dentry->d_subdirs))
                        dentry = list_entry(dentry->d_subdirs.next,
-                                           struct dentry, d_u.d_child);
+                                           struct dentry, d_child);
 
                /* consume the dentries from this leaf up through its parents
                 * until we find one with children or run out altogether */
@@ -906,17 +889,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 
                        if (IS_ROOT(dentry)) {
                                parent = NULL;
-                               list_del(&dentry->d_u.d_child);
+                               list_del(&dentry->d_child);
                        } else {
                                parent = dentry->d_parent;
                                parent->d_count--;
-                               list_del(&dentry->d_u.d_child);
+                               list_del(&dentry->d_child);
                        }
 
                        inode = dentry->d_inode;
                        if (inode) {
                                dentry->d_inode = NULL;
-                               list_del_init(&dentry->d_alias);
+                               list_del_init(&dentry->d_u.d_alias);
                                if (dentry->d_op && dentry->d_op->d_iput)
                                        dentry->d_op->d_iput(dentry, inode);
                                else
@@ -934,7 +917,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                } while (list_empty(&dentry->d_subdirs));
 
                dentry = list_entry(dentry->d_subdirs.next,
-                                   struct dentry, d_u.d_child);
+                                   struct dentry, d_child);
        }
 }
 
@@ -966,34 +949,6 @@ void shrink_dcache_for_umount(struct super_block *sb)
        }
 }
 
-/*
- * This tries to ascend one level of parenthood, but
- * we can race with renaming, so we need to re-check
- * the parenthood after dropping the lock and check
- * that the sequence number still matches.
- */
-static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
-{
-       struct dentry *new = old->d_parent;
-
-       rcu_read_lock();
-       spin_unlock(&old->d_lock);
-       spin_lock(&new->d_lock);
-
-       /*
-        * might go back up the wrong parent if we have had a rename
-        * or deletion
-        */
-       if (new != old->d_parent ||
-                (old->d_flags & DCACHE_DISCONNECTED) ||
-                (!locked && read_seqretry(&rename_lock, seq))) {
-               spin_unlock(&new->d_lock);
-               new = NULL;
-       }
-       rcu_read_unlock();
-       return new;
-}
-
 
 /*
  * Search for at least 1 mount point in the dentry's subdirs.
@@ -1027,7 +982,7 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-               struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+               struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
                next = tmp->next;
 
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1049,28 +1004,48 @@ resume:
        /*
         * All done at this level ... ascend and resume the search.
         */
+       rcu_read_lock();
+ascend:
        if (this_parent != parent) {
                struct dentry *child = this_parent;
-               this_parent = try_to_ascend(this_parent, locked, seq);
-               if (!this_parent)
+               this_parent = child->d_parent;
+
+               spin_unlock(&child->d_lock);
+               spin_lock(&this_parent->d_lock);
+
+               /* might go back up the wrong parent if we have had a rename */
+               if (!locked && read_seqretry(&rename_lock, seq))
                        goto rename_retry;
-               next = child->d_u.d_child.next;
+               /* go into the first sibling still alive */
+               do {
+                       next = child->d_child.next;
+                       if (next == &this_parent->d_subdirs)
+                               goto ascend;
+                       child = list_entry(next, struct dentry, d_child);
+               } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+               rcu_read_unlock();
                goto resume;
        }
-       spin_unlock(&this_parent->d_lock);
        if (!locked && read_seqretry(&rename_lock, seq))
                goto rename_retry;
+       spin_unlock(&this_parent->d_lock);
+       rcu_read_unlock();
        if (locked)
                write_sequnlock(&rename_lock);
        return 0; /* No mount points found in tree */
 positive:
        if (!locked && read_seqretry(&rename_lock, seq))
-               goto rename_retry;
+               goto rename_retry_unlocked;
        if (locked)
                write_sequnlock(&rename_lock);
        return 1;
 
 rename_retry:
+       spin_unlock(&this_parent->d_lock);
+       rcu_read_unlock();
+       if (locked)
+               goto again;
+rename_retry_unlocked:
        locked = 1;
        write_seqlock(&rename_lock);
        goto again;
@@ -1091,7 +1066,7 @@ EXPORT_SYMBOL(have_submounts);
  * drop the lock and return early due to latency
  * constraints.
  */
-static int select_parent(struct dentry * parent)
+static int select_parent(struct dentry *parent, struct list_head *dispose)
 {
        struct dentry *this_parent;
        struct list_head *next;
@@ -1108,22 +1083,26 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-               struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+               struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
                next = tmp->next;
 
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 
-               /* 
-                * move only zero ref count dentries to the end 
-                * of the unused list for prune_dcache
+               /*
+                * move only zero ref count dentries to the dispose list.
+                *
+                * Those which are presently on the shrink list, being processed
+                * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
+                * loop in shrink_dcache_parent() might not make any progress
+                * and loop forever.
                 */
-               if (!dentry->d_count) {
-                       dentry_lru_move_tail(dentry);
-                       found++;
-               } else {
+               if (dentry->d_count) {
                        dentry_lru_del(dentry);
+               } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
+                       dentry_lru_move_list(dentry, dispose);
+                       dentry->d_flags |= DCACHE_SHRINK_LIST;
+                       found++;
                }
-
                /*
                 * We can return to the caller if we have found some (this
                 * ensures forward progress). We'll be coming back to find
@@ -1131,6 +1110,7 @@ resume:
                 */
                if (found && need_resched()) {
                        spin_unlock(&dentry->d_lock);
+                       rcu_read_lock();
                        goto out;
                }
 
@@ -1150,25 +1130,44 @@ resume:
        /*
         * All done at this level ... ascend and resume the search.
         */
+       rcu_read_lock();
+ascend:
        if (this_parent != parent) {
                struct dentry *child = this_parent;
-               this_parent = try_to_ascend(this_parent, locked, seq);
-               if (!this_parent)
+               this_parent = child->d_parent;
+
+               spin_unlock(&child->d_lock);
+               spin_lock(&this_parent->d_lock);
+
+               /* might go back up the wrong parent if we have had a rename */
+               if (!locked && read_seqretry(&rename_lock, seq))
                        goto rename_retry;
-               next = child->d_u.d_child.next;
+               /* go into the first sibling still alive */
+               do {
+                       next = child->d_child.next;
+                       if (next == &this_parent->d_subdirs)
+                               goto ascend;
+                       child = list_entry(next, struct dentry, d_child);
+               } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+               rcu_read_unlock();
                goto resume;
        }
 out:
-       spin_unlock(&this_parent->d_lock);
        if (!locked && read_seqretry(&rename_lock, seq))
                goto rename_retry;
+       spin_unlock(&this_parent->d_lock);
+       rcu_read_unlock();
        if (locked)
                write_sequnlock(&rename_lock);
        return found;
 
 rename_retry:
+       spin_unlock(&this_parent->d_lock);
+       rcu_read_unlock();
        if (found)
                return found;
+       if (locked)
+               goto again;
        locked = 1;
        write_seqlock(&rename_lock);
        goto again;
@@ -1180,14 +1179,15 @@ rename_retry:
  *
  * Prune the dcache to remove unused children of the parent dentry.
  */
 void shrink_dcache_parent(struct dentry * parent)
 {
-       struct super_block *sb = parent->d_sb;
+       LIST_HEAD(dispose);
        int found;
 
-       while ((found = select_parent(parent)) != 0)
-               __shrink_dcache_sb(sb, found, 0);
+       while ((found = select_parent(parent, &dispose)) != 0) {
+               shrink_dentry_list(&dispose);
+               cond_resched();
+       }
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
 
@@ -1238,8 +1238,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
        INIT_HLIST_BL_NODE(&dentry->d_hash);
        INIT_LIST_HEAD(&dentry->d_lru);
        INIT_LIST_HEAD(&dentry->d_subdirs);
-       INIT_LIST_HEAD(&dentry->d_alias);
-       INIT_LIST_HEAD(&dentry->d_u.d_child);
+       INIT_LIST_HEAD(&dentry->d_u.d_alias);
+       INIT_LIST_HEAD(&dentry->d_child);
        d_set_d_op(dentry, dentry->d_sb->s_d_op);
 
        this_cpu_inc(nr_dentry);
@@ -1261,7 +1261,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
        struct dentry *dentry = __d_alloc(parent->d_sb, name);
        if (!dentry)
                return NULL;
-
+       dentry->d_flags |= DCACHE_RCUACCESS;
        spin_lock(&parent->d_lock);
        /*
         * don't need child lock because it is not subject
@@ -1269,7 +1269,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
         */
        __dget_dlock(parent);
        dentry->d_parent = parent;
-       list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+       list_add(&dentry->d_child, &parent->d_subdirs);
        spin_unlock(&parent->d_lock);
 
        return dentry;
@@ -1326,7 +1326,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
        if (inode) {
                if (unlikely(IS_AUTOMOUNT(inode)))
                        dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
-               list_add(&dentry->d_alias, &inode->i_dentry);
+               list_add(&dentry->d_u.d_alias, &inode->i_dentry);
        }
        dentry->d_inode = inode;
        dentry_rcuwalk_barrier(dentry);
@@ -1351,7 +1351,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
  
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
-       BUG_ON(!list_empty(&entry->d_alias));
+       BUG_ON(!list_empty(&entry->d_u.d_alias));
        if (inode)
                spin_lock(&inode->i_lock);
        __d_instantiate(entry, inode);
@@ -1390,7 +1390,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
                return NULL;
        }
 
-       list_for_each_entry(alias, &inode->i_dentry, d_alias) {
+       list_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
                struct qstr *qstr = &alias->d_name;
 
                /*
@@ -1416,7 +1416,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 {
        struct dentry *result;
 
-       BUG_ON(!list_empty(&entry->d_alias));
+       BUG_ON(!list_empty(&entry->d_u.d_alias));
 
        if (inode)
                spin_lock(&inode->i_lock);
@@ -1466,7 +1466,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
 
        if (list_empty(&inode->i_dentry))
                return NULL;
-       alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias);
+       alias = list_first_entry(&inode->i_dentry, struct dentry, d_u.d_alias);
        __dget(alias);
        return alias;
 }
@@ -1502,7 +1502,7 @@ static struct dentry * d_find_any_alias(struct inode *inode)
  */
 struct dentry *d_obtain_alias(struct inode *inode)
 {
-       static const struct qstr anonstring = { .name = "" };
+       static const struct qstr anonstring = { .name = "/", .len = 1 };
        struct dentry *tmp;
        struct dentry *res;
 
@@ -1533,7 +1533,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
        spin_lock(&tmp->d_lock);
        tmp->d_inode = inode;
        tmp->d_flags |= DCACHE_DISCONNECTED;
-       list_add(&tmp->d_alias, &inode->i_dentry);
+       list_add(&tmp->d_u.d_alias, &inode->i_dentry);
        hlist_bl_lock(&tmp->d_sb->s_anon);
        hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
        hlist_bl_unlock(&tmp->d_sb->s_anon);
@@ -1939,7 +1939,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
        struct dentry *child;
 
        spin_lock(&dparent->d_lock);
-       list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
+       list_for_each_entry(child, &dparent->d_subdirs, d_child) {
                if (dentry == child) {
                        spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
                        __dget_dlock(dentry);
@@ -2011,7 +2011,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 {
        BUG_ON(!d_unhashed(entry));
        hlist_bl_lock(b);
-       entry->d_flags |= DCACHE_RCUACCESS;
        hlist_bl_add_head_rcu(&entry->d_hash, b);
        hlist_bl_unlock(b);
 }
@@ -2186,8 +2185,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
        /* Unhash the target: dput() will then get rid of it */
        __d_drop(target);
 
-       list_del(&dentry->d_u.d_child);
-       list_del(&target->d_u.d_child);
+       list_del(&dentry->d_child);
+       list_del(&target->d_child);
 
        /* Switch the names.. */
        switch_names(dentry, target);
@@ -2195,17 +2194,18 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
 
        /* ... and switch the parents */
        if (IS_ROOT(dentry)) {
+               dentry->d_flags |= DCACHE_RCUACCESS;
                dentry->d_parent = target->d_parent;
                target->d_parent = target;
-               INIT_LIST_HEAD(&target->d_u.d_child);
+               INIT_LIST_HEAD(&target->d_child);
        } else {
                swap(dentry->d_parent, target->d_parent);
 
                /* And add them back to the (new) parent lists */
-               list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
+               list_add(&target->d_child, &target->d_parent->d_subdirs);
        }
 
-       list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+       list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
 
        write_seqcount_end(&target->d_seq);
        write_seqcount_end(&dentry->d_seq);
@@ -2311,19 +2311,20 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
        switch_names(dentry, anon);
        swap(dentry->d_name.hash, anon->d_name.hash);
 
+       dentry->d_flags |= DCACHE_RCUACCESS;
        dentry->d_parent = (aparent == anon) ? dentry : aparent;
-       list_del(&dentry->d_u.d_child);
+       list_del(&dentry->d_child);
        if (!IS_ROOT(dentry))
-               list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+               list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
        else
-               INIT_LIST_HEAD(&dentry->d_u.d_child);
+               INIT_LIST_HEAD(&dentry->d_child);
 
        anon->d_parent = (dparent == dentry) ? anon : dparent;
-       list_del(&anon->d_u.d_child);
+       list_del(&anon->d_child);
        if (!IS_ROOT(anon))
-               list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
+               list_add(&anon->d_child, &anon->d_parent->d_subdirs);
        else
-               INIT_LIST_HEAD(&anon->d_u.d_child);
+               INIT_LIST_HEAD(&anon->d_child);
 
        write_seqcount_end(&dentry->d_seq);
        write_seqcount_end(&anon->d_seq);
@@ -2371,6 +2372,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
                        if (d_ancestor(alias, dentry)) {
                                /* Check for loops */
                                actual = ERR_PTR(-ELOOP);
+                               spin_unlock(&inode->i_lock);
                        } else if (IS_ROOT(alias)) {
                                /* Is this an anonymous mountpoint that we
                                 * could splice into our tree? */
@@ -2380,7 +2382,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
                                goto found;
                        } else {
                                /* Nope, but we must(!) avoid directory
-                                * aliasing */
+                                * aliasing. This drops inode->i_lock */
                                actual = __d_unalias(inode, dentry, alias);
                        }
                        write_sequnlock(&rename_lock);
@@ -2439,28 +2441,35 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 /**
  * prepend_path - Prepend path string to a buffer
  * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry (may be modified by this function)
+ * @root: root vfsmnt/dentry
  * @buffer: pointer to the end of the buffer
  * @buflen: pointer to buffer length
  *
  * Caller holds the rename_lock.
- *
- * If path is not reachable from the supplied root, then the value of
- * root is changed (without modifying refcounts).
  */
-static int prepend_path(const struct path *path, struct path *root,
+static int prepend_path(const struct path *path,
+                       const struct path *root,
                        char **buffer, int *buflen)
 {
        struct dentry *dentry = path->dentry;
        struct vfsmount *vfsmnt = path->mnt;
+       char *orig_buffer = *buffer;
+       int orig_len = *buflen;
        bool slash = false;
        int error = 0;
 
-       br_read_lock(vfsmount_lock);
        while (dentry != root->dentry || vfsmnt != root->mnt) {
                struct dentry * parent;
 
                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
+                       /* Escaped? */
+                       if (dentry != vfsmnt->mnt_root) {
+                               *buffer = orig_buffer;
+                               *buflen = orig_len;
+                               slash = false;
+                               error = 3;
+                               goto global_root;
+                       }
                        /* Global root? */
                        if (vfsmnt->mnt_parent == vfsmnt) {
                                goto global_root;
@@ -2483,11 +2492,9 @@ static int prepend_path(const struct path *path, struct path *root,
                dentry = parent;
        }
 
-out:
        if (!error && !slash)
                error = prepend(buffer, buflen, "/", 1);
 
-       br_read_unlock(vfsmount_lock);
        return error;
 
 global_root:
@@ -2500,15 +2507,17 @@ global_root:
                WARN(1, "Root dentry has weird name <%.*s>\n",
                     (int) dentry->d_name.len, dentry->d_name.name);
        }
-       root->mnt = vfsmnt;
-       root->dentry = dentry;
-       goto out;
+       if (!slash)
+               error = prepend(buffer, buflen, "/", 1);
+       if (!error)
+               error = vfsmnt->mnt_ns ? 1 : 2;
+       return error;
 }
 
 /**
  * __d_path - return the path of a dentry
  * @path: the dentry/vfsmount to report
- * @root: root vfsmnt/dentry (may be modified by this function)
+ * @root: root vfsmnt/dentry
  * @buf: buffer to return value in
  * @buflen: buffer length
  *
@@ -2519,21 +2528,46 @@ global_root:
  *
  * "buflen" should be positive.
  *
- * If path is not reachable from the supplied root, then the value of
- * root is changed (without modifying refcounts).
+ * If the path is not reachable from the supplied root, return %NULL.
  */
-char *__d_path(const struct path *path, struct path *root,
+char *__d_path(const struct path *path,
+              const struct path *root,
               char *buf, int buflen)
 {
        char *res = buf + buflen;
        int error;
 
        prepend(&res, &buflen, "\0", 1);
+       br_read_lock(vfsmount_lock);
        write_seqlock(&rename_lock);
        error = prepend_path(path, root, &res, &buflen);
        write_sequnlock(&rename_lock);
+       br_read_unlock(vfsmount_lock);
 
-       if (error)
+       if (error < 0)
+               return ERR_PTR(error);
+       if (error > 0)
+               return NULL;
+       return res;
+}
+
+char *d_absolute_path(const struct path *path,
+              char *buf, int buflen)
+{
+       struct path root = {};
+       char *res = buf + buflen;
+       int error;
+
+       prepend(&res, &buflen, "\0", 1);
+       br_read_lock(vfsmount_lock);
+       write_seqlock(&rename_lock);
+       error = prepend_path(path, &root, &res, &buflen);
+       write_sequnlock(&rename_lock);
+       br_read_unlock(vfsmount_lock);
+
+       if (error > 1)
+               error = -EINVAL;
+       if (error < 0)
                return ERR_PTR(error);
        return res;
 }
@@ -2541,8 +2575,9 @@ char *__d_path(const struct path *path, struct path *root,
 /*
  * same as __d_path but appends "(deleted)" for unlinked files.
  */
-static int path_with_deleted(const struct path *path, struct path *root,
-                                char **buf, int *buflen)
+static int path_with_deleted(const struct path *path,
+                            const struct path *root,
+                            char **buf, int *buflen)
 {
        prepend(buf, buflen, "\0", 1);
        if (d_unlinked(path->dentry)) {
@@ -2579,7 +2614,6 @@ char *d_path(const struct path *path, char *buf, int buflen)
 {
        char *res = buf + buflen;
        struct path root;
-       struct path tmp;
        int error;
 
        /*
@@ -2593,12 +2627,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 
        get_fs_root(current->fs, &root);
+       br_read_lock(vfsmount_lock);
        write_seqlock(&rename_lock);
-       tmp = root;
-       error = path_with_deleted(path, &tmp, &res, &buflen);
-       if (error)
-               res = ERR_PTR(error);
+       error = path_with_deleted(path, &root, &res, &buflen);
        write_sequnlock(&rename_lock);
+       br_read_unlock(vfsmount_lock);
+       if (error < 0)
+               res = ERR_PTR(error);
        path_put(&root);
        return res;
 }
@@ -2617,7 +2652,6 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 {
        char *res = buf + buflen;
        struct path root;
-       struct path tmp;
        int error;
 
        if (path->dentry->d_op && path->dentry->d_op->d_dname)
@@ -2625,9 +2659,8 @@ char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 
        get_fs_root(current->fs, &root);
        write_seqlock(&rename_lock);
-       tmp = root;
-       error = path_with_deleted(path, &tmp, &res, &buflen);
-       if (!error && !path_equal(&tmp, &root))
+       error = path_with_deleted(path, &root, &res, &buflen);
+       if (error > 0)
                error = prepend_unreachable(&res, &buflen);
        write_sequnlock(&rename_lock);
        path_put(&root);
@@ -2755,22 +2788,23 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
        get_fs_root_and_pwd(current->fs, &root, &pwd);
 
        error = -ENOENT;
+       br_read_lock(vfsmount_lock);
        write_seqlock(&rename_lock);
        if (!d_unlinked(pwd.dentry)) {
                unsigned long len;
-               struct path tmp = root;
                char *cwd = page + PAGE_SIZE;
                int buflen = PAGE_SIZE;
 
                prepend(&cwd, &buflen, "\0", 1);
-               error = prepend_path(&pwd, &tmp, &cwd, &buflen);
+               error = prepend_path(&pwd, &root, &cwd, &buflen);
                write_sequnlock(&rename_lock);
+               br_read_unlock(vfsmount_lock);
 
-               if (error)
+               if (error < 0)
                        goto out;
 
                /* Unreachable from current root */
-               if (!path_equal(&tmp, &root)) {
+               if (error > 0) {
                        error = prepend_unreachable(&cwd, &buflen);
                        if (error)
                                goto out;
@@ -2785,6 +2819,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                }
        } else {
                write_sequnlock(&rename_lock);
+               br_read_unlock(vfsmount_lock);
        }
 
 out:
@@ -2877,7 +2912,7 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-               struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+               struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
                next = tmp->next;
 
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -2898,26 +2933,45 @@ resume:
                }
                spin_unlock(&dentry->d_lock);
        }
+       rcu_read_lock();
+ascend:
        if (this_parent != root) {
                struct dentry *child = this_parent;
                if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
                        this_parent->d_flags |= DCACHE_GENOCIDE;
                        this_parent->d_count--;
                }
-               this_parent = try_to_ascend(this_parent, locked, seq);
-               if (!this_parent)
+               this_parent = child->d_parent;
+
+               spin_unlock(&child->d_lock);
+               spin_lock(&this_parent->d_lock);
+
+               /* might go back up the wrong parent if we have had a rename */
+               if (!locked && read_seqretry(&rename_lock, seq))
                        goto rename_retry;
-               next = child->d_u.d_child.next;
+               /* go into the first sibling still alive */
+               do {
+                       next = child->d_child.next;
+                       if (next == &this_parent->d_subdirs)
+                               goto ascend;
+                       child = list_entry(next, struct dentry, d_child);
+               } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
+               rcu_read_unlock();
                goto resume;
        }
-       spin_unlock(&this_parent->d_lock);
        if (!locked && read_seqretry(&rename_lock, seq))
                goto rename_retry;
+       spin_unlock(&this_parent->d_lock);
+       rcu_read_unlock();
        if (locked)
                write_sequnlock(&rename_lock);
        return;
 
 rename_retry:
+       spin_unlock(&this_parent->d_lock);
+       rcu_read_unlock();
+       if (locked)
+               goto again;
        locked = 1;
        write_seqlock(&rename_lock);
        goto again;