Btrfs: reduce stack in cow_file_range
[pandora-kernel.git] / fs / btrfs / ctree.c
index 2603ee5..3764248 100644 (file)
@@ -38,22 +38,64 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                   struct btrfs_path *path, int level, int slot);
 
-inline void btrfs_init_path(struct btrfs_path *p)
-{
-       memset(p, 0, sizeof(*p));
-}
-
 struct btrfs_path *btrfs_alloc_path(void)
 {
        struct btrfs_path *path;
-       path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
-       if (path) {
-               btrfs_init_path(path);
+       path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
+       if (path)
                path->reada = 1;
-       }
        return path;
 }
 
+/*
+ * set all locked nodes in the path to blocking locks.  This should
+ * be done before scheduling
+ */
+noinline void btrfs_set_path_blocking(struct btrfs_path *p)
+{
+       int i;
+       for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+               if (p->nodes[i] && p->locks[i])
+                       btrfs_set_lock_blocking(p->nodes[i]);
+       }
+}
+
+/*
+ * reset all the locked nodes in the patch to spinning locks.
+ *
+ * held is used to keep lockdep happy, when lockdep is enabled
+ * we set held to a blocking lock before we go around and
+ * retake all the spinlocks in the path.  You can safely use NULL
+ * for held
+ */
+noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
+                                       struct extent_buffer *held)
+{
+       int i;
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       /* lockdep really cares that we take all of these spinlocks
+        * in the right order.  If any of the locks in the path are not
+        * currently blocking, it is going to complain.  So, make really
+        * really sure by forcing the path to blocking before we clear
+        * the path blocking.
+        */
+       if (held)
+               btrfs_set_lock_blocking(held);
+       btrfs_set_path_blocking(p);
+#endif
+
+       for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
+               if (p->nodes[i] && p->locks[i])
+                       btrfs_clear_lock_blocking(p->nodes[i]);
+       }
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+       if (held)
+               btrfs_clear_lock_blocking(held);
+#endif
+}
+
 /* this also releases the path */
 void btrfs_free_path(struct btrfs_path *p)
 {
@@ -212,18 +254,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
  * empty_size -- a hint that you plan on doing more cow.  This is the size in
  * bytes the allocator should try to find free next to the block it returns.
  * This is just a hint and may be ignored by the allocator.
- *
- * prealloc_dest -- if you have already reserved a destination for the cow,
- * this uses that block instead of allocating a new one.
- * btrfs_alloc_reserved_extent is used to finish the allocation.
  */
 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             struct extent_buffer *buf,
                             struct extent_buffer *parent, int parent_slot,
                             struct extent_buffer **cow_ret,
-                            u64 search_start, u64 empty_size,
-                            u64 prealloc_dest)
+                            u64 search_start, u64 empty_size)
 {
        u64 parent_start;
        struct extent_buffer *cow;
@@ -235,7 +272,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        if (*cow_ret == buf)
                unlock_orig = 1;
 
-       WARN_ON(!btrfs_tree_locked(buf));
+       btrfs_assert_tree_locked(buf);
 
        if (parent)
                parent_start = parent->start;
@@ -249,29 +286,15 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        level = btrfs_header_level(buf);
        nritems = btrfs_header_nritems(buf);
 
-       if (prealloc_dest) {
-               struct btrfs_key ins;
-
-               ins.objectid = prealloc_dest;
-               ins.offset = buf->len;
-               ins.type = BTRFS_EXTENT_ITEM_KEY;
-
-               ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
-                                                 root->root_key.objectid,
-                                                 trans->transid, level, &ins);
-               BUG_ON(ret);
-               cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
-                                           buf->len);
-       } else {
-               cow = btrfs_alloc_free_block(trans, root, buf->len,
-                                            parent_start,
-                                            root->root_key.objectid,
-                                            trans->transid, level,
-                                            search_start, empty_size);
-       }
+       cow = btrfs_alloc_free_block(trans, root, buf->len,
+                                    parent_start, root->root_key.objectid,
+                                    trans->transid, level,
+                                    search_start, empty_size);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
+       /* cow is set to blocking by btrfs_init_new_buffer */
+
        copy_extent_buffer(cow, buf, 0, 0, cow->len);
        btrfs_set_header_bytenr(cow, cow->start);
        btrfs_set_header_generation(cow, trans->transid);
@@ -369,7 +392,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root, struct extent_buffer *buf,
                    struct extent_buffer *parent, int parent_slot,
-                   struct extent_buffer **cow_ret, u64 prealloc_dest)
+                   struct extent_buffer **cow_ret)
 {
        u64 search_start;
        int ret;
@@ -388,20 +411,21 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
                WARN_ON(1);
        }
 
-       spin_lock(&root->fs_info->hash_lock);
        if (btrfs_header_generation(buf) == trans->transid &&
            btrfs_header_owner(buf) == root->root_key.objectid &&
            !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
                *cow_ret = buf;
-               spin_unlock(&root->fs_info->hash_lock);
-               WARN_ON(prealloc_dest);
                return 0;
        }
-       spin_unlock(&root->fs_info->hash_lock);
+
        search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
+
+       if (parent)
+               btrfs_set_lock_blocking(parent);
+       btrfs_set_lock_blocking(buf);
+
        ret = __btrfs_cow_block(trans, root, buf, parent,
-                                parent_slot, cow_ret, search_start, 0,
-                                prealloc_dest);
+                                parent_slot, cow_ret, search_start, 0);
        return ret;
 }
 
@@ -504,6 +528,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
        if (parent_nritems == 1)
                return 0;
 
+       btrfs_set_lock_blocking(parent);
+
        for (i = start_slot; i < end_slot; i++) {
                int close = 1;
 
@@ -564,10 +590,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                        search_start = last_block;
 
                btrfs_tree_lock(cur);
+               btrfs_set_lock_blocking(cur);
                err = __btrfs_cow_block(trans, root, cur, parent, i,
                                        &cur, search_start,
                                        min(16 * blocksize,
-                                           (end_slot - i) * blocksize), 0);
+                                           (end_slot - i) * blocksize));
                if (err) {
                        btrfs_tree_unlock(cur);
                        free_extent_buffer(cur);
@@ -862,6 +889,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                return 0;
 
        mid = path->nodes[level];
+
        WARN_ON(!path->locks[level]);
        WARN_ON(btrfs_header_generation(mid) != trans->transid);
 
@@ -883,9 +911,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
                /* promote the child to a root */
                child = read_node_slot(root, mid, 0);
-               btrfs_tree_lock(child);
                BUG_ON(!child);
-               ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
+               btrfs_tree_lock(child);
+               btrfs_set_lock_blocking(child);
+               ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
                BUG_ON(ret);
 
                spin_lock(&root->node_lock);
@@ -893,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                spin_unlock(&root->node_lock);
 
                ret = btrfs_update_extent_ref(trans, root, child->start,
+                                             child->len,
                                              mid->start, child->start,
                                              root->root_key.objectid,
                                              trans->transid, level - 1);
@@ -900,6 +930,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 
                add_root_to_dirty_list(root);
                btrfs_tree_unlock(child);
+
                path->locks[level] = 0;
                path->nodes[level] = NULL;
                clean_tree_block(trans, root, mid);
@@ -924,8 +955,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
        left = read_node_slot(root, parent, pslot - 1);
        if (left) {
                btrfs_tree_lock(left);
+               btrfs_set_lock_blocking(left);
                wret = btrfs_cow_block(trans, root, left,
-                                      parent, pslot - 1, &left, 0);
+                                      parent, pslot - 1, &left);
                if (wret) {
                        ret = wret;
                        goto enospc;
@@ -934,8 +966,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
        right = read_node_slot(root, parent, pslot + 1);
        if (right) {
                btrfs_tree_lock(right);
+               btrfs_set_lock_blocking(right);
                wret = btrfs_cow_block(trans, root, right,
-                                      parent, pslot + 1, &right, 0);
+                                      parent, pslot + 1, &right);
                if (wret) {
                        ret = wret;
                        goto enospc;
@@ -1109,12 +1142,14 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
                u32 left_nr;
 
                btrfs_tree_lock(left);
+               btrfs_set_lock_blocking(left);
+
                left_nr = btrfs_header_nritems(left);
                if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
                        wret = 1;
                } else {
                        ret = btrfs_cow_block(trans, root, left, parent,
-                                             pslot - 1, &left, 0);
+                                             pslot - 1, &left);
                        if (ret)
                                wret = 1;
                        else {
@@ -1155,14 +1190,17 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
         */
        if (right) {
                u32 right_nr;
+
                btrfs_tree_lock(right);
+               btrfs_set_lock_blocking(right);
+
                right_nr = btrfs_header_nritems(right);
                if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
                        wret = 1;
                } else {
                        ret = btrfs_cow_block(trans, root, right,
                                              parent, pslot + 1,
-                                             &right, 0);
+                                             &right);
                        if (ret)
                                wret = 1;
                        else {
@@ -1266,6 +1304,68 @@ static noinline void reada_for_search(struct btrfs_root *root,
        }
 }
 
+/*
+ * returns -EAGAIN if it had to drop the path, or zero if everything was in
+ * cache
+ */
+static noinline int reada_for_balance(struct btrfs_root *root,
+                                     struct btrfs_path *path, int level)
+{
+       int slot;
+       int nritems;
+       struct extent_buffer *parent;
+       struct extent_buffer *eb;
+       u64 gen;
+       u64 block1 = 0;
+       u64 block2 = 0;
+       int ret = 0;
+       int blocksize;
+
+       parent = path->nodes[level - 1];
+       if (!parent)
+               return 0;
+
+       nritems = btrfs_header_nritems(parent);
+       slot = path->slots[level];
+       blocksize = btrfs_level_size(root, level);
+
+       if (slot > 0) {
+               block1 = btrfs_node_blockptr(parent, slot - 1);
+               gen = btrfs_node_ptr_generation(parent, slot - 1);
+               eb = btrfs_find_tree_block(root, block1, blocksize);
+               if (eb && btrfs_buffer_uptodate(eb, gen))
+                       block1 = 0;
+               free_extent_buffer(eb);
+       }
+       if (slot < nritems) {
+               block2 = btrfs_node_blockptr(parent, slot + 1);
+               gen = btrfs_node_ptr_generation(parent, slot + 1);
+               eb = btrfs_find_tree_block(root, block2, blocksize);
+               if (eb && btrfs_buffer_uptodate(eb, gen))
+                       block2 = 0;
+               free_extent_buffer(eb);
+       }
+       if (block1 || block2) {
+               ret = -EAGAIN;
+               btrfs_release_path(root, path);
+               if (block1)
+                       readahead_tree_block(root, block1, blocksize, 0);
+               if (block2)
+                       readahead_tree_block(root, block2, blocksize, 0);
+
+               if (block1) {
+                       eb = read_tree_block(root, block1, blocksize, 0);
+                       free_extent_buffer(eb);
+               }
+               if (block1) {
+                       eb = read_tree_block(root, block2, blocksize, 0);
+                       free_extent_buffer(eb);
+               }
+       }
+       return ret;
+}
+
+
 /*
  * when we walk down the tree, it is usually safe to unlock the higher layers
  * in the tree.  The exceptions are when our path goes through slot 0, because
@@ -1316,6 +1416,32 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
        }
 }
 
+/*
+ * This releases any locks held in the path starting at level and
+ * going all the way up to the root.
+ *
+ * btrfs_search_slot will keep the lock held on higher nodes in a few
+ * corner cases, such as COW of the block at slot zero in the node.  This
+ * ignores those rules, and it should only be called when there are no
+ * more updates to be done higher up in the tree.
+ */
+noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
+{
+       int i;
+
+       if (path->keep_locks || path->lowest_level)
+               return;
+
+       for (i = level; i < BTRFS_MAX_LEVEL; i++) {
+               if (!path->nodes[i])
+                       continue;
+               if (!path->locks[i])
+                       continue;
+               btrfs_tree_unlock(path->nodes[i]);
+               path->locks[i] = 0;
+       }
+}
+
 /*
  * look for key in the tree.  path is filled in with nodes along the way
  * if key is found, we return zero and you can find the item in the leaf
@@ -1344,7 +1470,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        u8 lowest_level = 0;
        u64 blocknr;
        u64 gen;
-       struct btrfs_key prealloc_block;
 
        lowest_level = p->lowest_level;
        WARN_ON(lowest_level && ins_len > 0);
@@ -1353,8 +1478,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        if (ins_len < 0)
                lowest_unlock = 2;
 
-       prealloc_block.objectid = 0;
-
 again:
        if (p->skip_locking)
                b = btrfs_root_node(root);
@@ -1376,49 +1499,16 @@ again:
                        int wret;
 
                        /* is a cow on this block not required */
-                       spin_lock(&root->fs_info->hash_lock);
                        if (btrfs_header_generation(b) == trans->transid &&
                            btrfs_header_owner(b) == root->root_key.objectid &&
                            !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
-                               spin_unlock(&root->fs_info->hash_lock);
                                goto cow_done;
                        }
-                       spin_unlock(&root->fs_info->hash_lock);
-
-                       /* ok, we have to cow, is our old prealloc the right
-                        * size?
-                        */
-                       if (prealloc_block.objectid &&
-                           prealloc_block.offset != b->len) {
-                               btrfs_free_reserved_extent(root,
-                                          prealloc_block.objectid,
-                                          prealloc_block.offset);
-                               prealloc_block.objectid = 0;
-                       }
-
-                       /*
-                        * for higher level blocks, try not to allocate blocks
-                        * with the block and the parent locks held.
-                        */
-                       if (level > 1 && !prealloc_block.objectid &&
-                           btrfs_path_lock_waiting(p, level)) {
-                               u32 size = b->len;
-                               u64 hint = b->start;
-
-                               btrfs_release_path(root, p);
-                               ret = btrfs_reserve_extent(trans, root,
-                                                          size, size, 0,
-                                                          hint, (u64)-1,
-                                                          &prealloc_block, 0);
-                               BUG_ON(ret);
-                               goto again;
-                       }
+                       btrfs_set_path_blocking(p);
 
                        wret = btrfs_cow_block(trans, root, b,
                                               p->nodes[level + 1],
-                                              p->slots[level + 1],
-                                              &b, prealloc_block.objectid);
-                       prealloc_block.objectid = 0;
+                                              p->slots[level + 1], &b);
                        if (wret) {
                                free_extent_buffer(b);
                                ret = wret;
@@ -1435,6 +1525,22 @@ cow_done:
                if (!p->skip_locking)
                        p->locks[level] = 1;
 
+               btrfs_clear_path_blocking(p, NULL);
+
+               /*
+                * we have a lock on b and as long as we aren't changing
+                * the tree, there is no way to for the items in b to change.
+                * It is safe to drop the lock on our parent before we
+                * go through the expensive btree search on b.
+                *
+                * If cow is true, then we might be changing slot zero,
+                * which may require changing the parent.  So, we can't
+                * drop the lock until after we know which slot we're
+                * operating on.
+                */
+               if (!cow)
+                       btrfs_unlock_up_safe(p, level + 1);
+
                ret = check_block(root, p, level);
                if (ret) {
                        ret = -1;
@@ -1442,6 +1548,7 @@ cow_done:
                }
 
                ret = bin_search(b, key, level, &slot);
+
                if (level != 0) {
                        if (ret && slot > 0)
                                slot -= 1;
@@ -1449,7 +1556,16 @@ cow_done:
                        if ((p->search_for_split || ins_len > 0) &&
                            btrfs_header_nritems(b) >=
                            BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
-                               int sret = split_node(trans, root, p, level);
+                               int sret;
+
+                               sret = reada_for_balance(root, p, level);
+                               if (sret)
+                                       goto again;
+
+                               btrfs_set_path_blocking(p);
+                               sret = split_node(trans, root, p, level);
+                               btrfs_clear_path_blocking(p, NULL);
+
                                BUG_ON(sret > 0);
                                if (sret) {
                                        ret = sret;
@@ -1457,9 +1573,19 @@ cow_done:
                                }
                                b = p->nodes[level];
                                slot = p->slots[level];
-                       } else if (ins_len < 0) {
-                               int sret = balance_level(trans, root, p,
-                                                        level);
+                       } else if (ins_len < 0 &&
+                                  btrfs_header_nritems(b) <
+                                  BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
+                               int sret;
+
+                               sret = reada_for_balance(root, p, level);
+                               if (sret)
+                                       goto again;
+
+                               btrfs_set_path_blocking(p);
+                               sret = balance_level(trans, root, p, level);
+                               btrfs_clear_path_blocking(p, NULL);
+
                                if (sret) {
                                        ret = sret;
                                        goto done;
@@ -1493,7 +1619,7 @@ cow_done:
                                 * of the btree by dropping locks before
                                 * we read.
                                 */
-                               if (level > 1) {
+                               if (level > 0) {
                                        btrfs_release_path(NULL, p);
                                        if (tmp)
                                                free_extent_buffer(tmp);
@@ -1508,6 +1634,7 @@ cow_done:
                                                free_extent_buffer(tmp);
                                        goto again;
                                } else {
+                                       btrfs_set_path_blocking(p);
                                        if (tmp)
                                                free_extent_buffer(tmp);
                                        if (should_reada)
@@ -1517,14 +1644,29 @@ cow_done:
                                        b = read_node_slot(root, b, slot);
                                }
                        }
-                       if (!p->skip_locking)
-                               btrfs_tree_lock(b);
+                       if (!p->skip_locking) {
+                               int lret;
+
+                               btrfs_clear_path_blocking(p, NULL);
+                               lret = btrfs_try_spin_lock(b);
+
+                               if (!lret) {
+                                       btrfs_set_path_blocking(p);
+                                       btrfs_tree_lock(b);
+                                       btrfs_clear_path_blocking(p, b);
+                               }
+                       }
                } else {
                        p->slots[level] = slot;
                        if (ins_len > 0 &&
                            btrfs_leaf_free_space(root, b) < ins_len) {
-                               int sret = split_leaf(trans, root, key,
+                               int sret;
+
+                               btrfs_set_path_blocking(p);
+                               sret = split_leaf(trans, root, key,
                                                      p, ins_len, ret == 0);
+                               btrfs_clear_path_blocking(p, NULL);
+
                                BUG_ON(sret > 0);
                                if (sret) {
                                        ret = sret;
@@ -1538,12 +1680,11 @@ cow_done:
        }
        ret = 1;
 done:
-       if (prealloc_block.objectid) {
-               btrfs_free_reserved_extent(root,
-                          prealloc_block.objectid,
-                          prealloc_block.offset);
-       }
-
+       /*
+        * we don't really know what they plan on doing with the path
+        * from here on, so for now just mark it as blocking
+        */
+       btrfs_set_path_blocking(p);
        return ret;
 }
 
@@ -1564,9 +1705,11 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
        int ret;
 
        eb = btrfs_lock_root_node(root);
-       ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
+       ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
        BUG_ON(ret);
 
+       btrfs_set_lock_blocking(eb);
+
        parent = eb;
        while (1) {
                level = btrfs_header_level(parent);
@@ -1591,6 +1734,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
                        eb = read_tree_block(root, bytenr, blocksize,
                                             generation);
                        btrfs_tree_lock(eb);
+                       btrfs_set_lock_blocking(eb);
                }
 
                /*
@@ -1615,10 +1759,11 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans,
                                eb = read_tree_block(root, bytenr, blocksize,
                                                generation);
                                btrfs_tree_lock(eb);
+                               btrfs_set_lock_blocking(eb);
                        }
 
                        ret = btrfs_cow_block(trans, root, eb, parent, slot,
-                                             &eb, 0);
+                                             &eb);
                        BUG_ON(ret);
 
                        if (root->root_key.objectid ==
@@ -1931,7 +2076,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        spin_unlock(&root->node_lock);
 
        ret = btrfs_update_extent_ref(trans, root, lower->start,
-                                     lower->start, c->start,
+                                     lower->len, lower->start, c->start,
                                      root->root_key.objectid,
                                      trans->transid, level - 1);
        BUG_ON(ret);
@@ -2121,64 +2266,27 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
        return ret;
 }
 
-/*
- * push some data in the path leaf to the right, trying to free up at
- * least data_size bytes.  returns zero if the push worked, nonzero otherwise
- *
- * returns 1 if the push failed because the other node didn't have enough
- * room, 0 if everything worked out and < 0 if there were major errors.
- */
-static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
-                          *root, struct btrfs_path *path, int data_size,
-                          int empty)
+static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
+                                     struct btrfs_root *root,
+                                     struct btrfs_path *path,
+                                     int data_size, int empty,
+                                     struct extent_buffer *right,
+                                     int free_space, u32 left_nritems)
 {
        struct extent_buffer *left = path->nodes[0];
-       struct extent_buffer *right;
-       struct extent_buffer *upper;
+       struct extent_buffer *upper = path->nodes[1];
        struct btrfs_disk_key disk_key;
        int slot;
        u32 i;
-       int free_space;
        int push_space = 0;
        int push_items = 0;
        struct btrfs_item *item;
-       u32 left_nritems;
        u32 nr;
        u32 right_nritems;
        u32 data_end;
        u32 this_item_size;
        int ret;
 
-       slot = path->slots[1];
-       if (!path->nodes[1])
-               return 1;
-
-       upper = path->nodes[1];
-       if (slot >= btrfs_header_nritems(upper) - 1)
-               return 1;
-
-       WARN_ON(!btrfs_tree_locked(path->nodes[1]));
-
-       right = read_node_slot(root, upper, slot + 1);
-       btrfs_tree_lock(right);
-       free_space = btrfs_leaf_free_space(root, right);
-       if (free_space < data_size)
-               goto out_unlock;
-
-       /* cow and double check */
-       ret = btrfs_cow_block(trans, root, right, upper,
-                             slot + 1, &right, 0);
-       if (ret)
-               goto out_unlock;
-
-       free_space = btrfs_leaf_free_space(root, right);
-       if (free_space < data_size)
-               goto out_unlock;
-
-       left_nritems = btrfs_header_nritems(left);
-       if (left_nritems == 0)
-               goto out_unlock;
-
        if (empty)
                nr = 0;
        else
@@ -2187,6 +2295,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
        if (path->slots[0] >= left_nritems)
                push_space += data_size;
 
+       slot = path->slots[1];
        i = left_nritems - 1;
        while (i >= nr) {
                item = btrfs_item_nr(left, i);
@@ -2317,25 +2426,83 @@ out_unlock:
        return 1;
 }
 
+/*
+ * push some data in the path leaf to the right, trying to free up at
+ * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ *
+ * returns 1 if the push failed because the other node didn't have enough
+ * room, 0 if everything worked out and < 0 if there were major errors.
+ */
+static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
+                          *root, struct btrfs_path *path, int data_size,
+                          int empty)
+{
+       struct extent_buffer *left = path->nodes[0];
+       struct extent_buffer *right;
+       struct extent_buffer *upper;
+       int slot;
+       int free_space;
+       u32 left_nritems;
+       int ret;
+
+       if (!path->nodes[1])
+               return 1;
+
+       slot = path->slots[1];
+       upper = path->nodes[1];
+       if (slot >= btrfs_header_nritems(upper) - 1)
+               return 1;
+
+       btrfs_assert_tree_locked(path->nodes[1]);
+
+       right = read_node_slot(root, upper, slot + 1);
+       btrfs_tree_lock(right);
+       btrfs_set_lock_blocking(right);
+
+       free_space = btrfs_leaf_free_space(root, right);
+       if (free_space < data_size)
+               goto out_unlock;
+
+       /* cow and double check */
+       ret = btrfs_cow_block(trans, root, right, upper,
+                             slot + 1, &right);
+       if (ret)
+               goto out_unlock;
+
+       free_space = btrfs_leaf_free_space(root, right);
+       if (free_space < data_size)
+               goto out_unlock;
+
+       left_nritems = btrfs_header_nritems(left);
+       if (left_nritems == 0)
+               goto out_unlock;
+
+       return __push_leaf_right(trans, root, path, data_size, empty,
+                               right, free_space, left_nritems);
+out_unlock:
+       btrfs_tree_unlock(right);
+       free_extent_buffer(right);
+       return 1;
+}
+
 /*
  * push some data in the path leaf to the left, trying to free up at
  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
  */
-static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
-                         *root, struct btrfs_path *path, int data_size,
-                         int empty)
+static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
+                                    struct btrfs_root *root,
+                                    struct btrfs_path *path, int data_size,
+                                    int empty, struct extent_buffer *left,
+                                    int free_space, int right_nritems)
 {
        struct btrfs_disk_key disk_key;
        struct extent_buffer *right = path->nodes[0];
-       struct extent_buffer *left;
        int slot;
        int i;
-       int free_space;
        int push_space = 0;
        int push_items = 0;
        struct btrfs_item *item;
        u32 old_left_nritems;
-       u32 right_nritems;
        u32 nr;
        int ret = 0;
        int wret;
@@ -2343,54 +2510,21 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
        u32 old_left_item_size;
 
        slot = path->slots[1];
-       if (slot == 0)
-               return 1;
-       if (!path->nodes[1])
-               return 1;
-
-       right_nritems = btrfs_header_nritems(right);
-       if (right_nritems == 0)
-               return 1;
 
-       WARN_ON(!btrfs_tree_locked(path->nodes[1]));
+       if (empty)
+               nr = right_nritems;
+       else
+               nr = right_nritems - 1;
 
-       left = read_node_slot(root, path->nodes[1], slot - 1);
-       btrfs_tree_lock(left);
-       free_space = btrfs_leaf_free_space(root, left);
-       if (free_space < data_size) {
-               ret = 1;
-               goto out;
-       }
-
-       /* cow and double check */
-       ret = btrfs_cow_block(trans, root, left,
-                             path->nodes[1], slot - 1, &left, 0);
-       if (ret) {
-               /* we hit -ENOSPC, but it isn't fatal here */
-               ret = 1;
-               goto out;
-       }
-
-       free_space = btrfs_leaf_free_space(root, left);
-       if (free_space < data_size) {
-               ret = 1;
-               goto out;
-       }
-
-       if (empty)
-               nr = right_nritems;
-       else
-               nr = right_nritems - 1;
-
-       for (i = 0; i < nr; i++) {
-               item = btrfs_item_nr(right, i);
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
+       for (i = 0; i < nr; i++) {
+               item = btrfs_item_nr(right, i);
+               if (!right->map_token) {
+                       map_extent_buffer(right, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &right->map_token, &right->kaddr,
+                                       &right->map_start, &right->map_len,
+                                       KM_USER1);
+               }
 
                if (!empty && push_items > 0) {
                        if (path->slots[0] < i)
@@ -2542,6 +2676,154 @@ out:
        return ret;
 }
 
+/*
+ * push some data in the path leaf to the left, trying to free up at
+ * least data_size bytes.  returns zero if the push worked, nonzero otherwise
+ */
+static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
+                         *root, struct btrfs_path *path, int data_size,
+                         int empty)
+{
+       struct extent_buffer *right = path->nodes[0];
+       struct extent_buffer *left;
+       int slot;
+       int free_space;
+       u32 right_nritems;
+       int ret = 0;
+
+       slot = path->slots[1];
+       if (slot == 0)
+               return 1;
+       if (!path->nodes[1])
+               return 1;
+
+       right_nritems = btrfs_header_nritems(right);
+       if (right_nritems == 0)
+               return 1;
+
+       btrfs_assert_tree_locked(path->nodes[1]);
+
+       left = read_node_slot(root, path->nodes[1], slot - 1);
+       btrfs_tree_lock(left);
+       btrfs_set_lock_blocking(left);
+
+       free_space = btrfs_leaf_free_space(root, left);
+       if (free_space < data_size) {
+               ret = 1;
+               goto out;
+       }
+
+       /* cow and double check */
+       ret = btrfs_cow_block(trans, root, left,
+                             path->nodes[1], slot - 1, &left);
+       if (ret) {
+               /* we hit -ENOSPC, but it isn't fatal here */
+               ret = 1;
+               goto out;
+       }
+
+       free_space = btrfs_leaf_free_space(root, left);
+       if (free_space < data_size) {
+               ret = 1;
+               goto out;
+       }
+
+       return __push_leaf_left(trans, root, path, data_size,
+                              empty, left, free_space, right_nritems);
+out:
+       btrfs_tree_unlock(left);
+       free_extent_buffer(left);
+       return ret;
+}
+
+/*
+ * split the path's leaf in two, making sure there is at least data_size
+ * available for the resulting leaf level of the path.
+ *
+ * returns 0 if all went well and < 0 on failure.
+ */
+static noinline int copy_for_split(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root,
+                              struct btrfs_path *path,
+                              struct extent_buffer *l,
+                              struct extent_buffer *right,
+                              int slot, int mid, int nritems)
+{
+       int data_copy_size;
+       int rt_data_off;
+       int i;
+       int ret = 0;
+       int wret;
+       struct btrfs_disk_key disk_key;
+
+       nritems = nritems - mid;
+       btrfs_set_header_nritems(right, nritems);
+       data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
+
+       copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
+                          btrfs_item_nr_offset(mid),
+                          nritems * sizeof(struct btrfs_item));
+
+       copy_extent_buffer(right, l,
+                    btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
+                    data_copy_size, btrfs_leaf_data(l) +
+                    leaf_data_end(root, l), data_copy_size);
+
+       rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
+                     btrfs_item_end_nr(l, mid);
+
+       for (i = 0; i < nritems; i++) {
+               struct btrfs_item *item = btrfs_item_nr(right, i);
+               u32 ioff;
+
+               if (!right->map_token) {
+                       map_extent_buffer(right, (unsigned long)item,
+                                       sizeof(struct btrfs_item),
+                                       &right->map_token, &right->kaddr,
+                                       &right->map_start, &right->map_len,
+                                       KM_USER1);
+               }
+
+               ioff = btrfs_item_offset(right, item);
+               btrfs_set_item_offset(right, item, ioff + rt_data_off);
+       }
+
+       if (right->map_token) {
+               unmap_extent_buffer(right, right->map_token, KM_USER1);
+               right->map_token = NULL;
+       }
+
+       btrfs_set_header_nritems(l, mid);
+       ret = 0;
+       btrfs_item_key(right, &disk_key, 0);
+       wret = insert_ptr(trans, root, path, &disk_key, right->start,
+                         path->slots[1] + 1, 1);
+       if (wret)
+               ret = wret;
+
+       btrfs_mark_buffer_dirty(right);
+       btrfs_mark_buffer_dirty(l);
+       BUG_ON(path->slots[0] != slot);
+
+       ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
+       BUG_ON(ret);
+
+       if (mid <= slot) {
+               btrfs_tree_unlock(path->nodes[0]);
+               free_extent_buffer(path->nodes[0]);
+               path->nodes[0] = right;
+               path->slots[0] -= mid;
+               path->slots[1] += 1;
+       } else {
+               btrfs_tree_unlock(right);
+               free_extent_buffer(right);
+       }
+
+       BUG_ON(path->slots[0] < 0);
+
+       return ret;
+}
+
 /*
  * split the path's leaf in two, making sure there is at least data_size
  * available for the resulting leaf level of the path.
@@ -2559,14 +2841,10 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        int mid;
        int slot;
        struct extent_buffer *right;
-       int data_copy_size;
-       int rt_data_off;
-       int i;
        int ret = 0;
        int wret;
        int double_split;
        int num_doubles = 0;
-       struct btrfs_disk_key disk_key;
 
        /* first try to make some room by pushing left and right */
        if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
@@ -2618,11 +2896,14 @@ again:
        write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
                            (unsigned long)btrfs_header_chunk_tree_uuid(right),
                            BTRFS_UUID_SIZE);
+
        if (mid <= slot) {
                if (nritems == 1 ||
                    leaf_space_used(l, mid, nritems - mid) + data_size >
                        BTRFS_LEAF_DATA_SIZE(root)) {
                        if (slot >= nritems) {
+                               struct btrfs_disk_key disk_key;
+
                                btrfs_cpu_key_to_disk(&disk_key, ins_key);
                                btrfs_set_header_nritems(right, 0);
                                wret = insert_ptr(trans, root, path,
@@ -2650,6 +2931,8 @@ again:
                if (leaf_space_used(l, 0, mid) + data_size >
                        BTRFS_LEAF_DATA_SIZE(root)) {
                        if (!extend && data_size && slot == 0) {
+                               struct btrfs_disk_key disk_key;
+
                                btrfs_cpu_key_to_disk(&disk_key, ins_key);
                                btrfs_set_header_nritems(right, 0);
                                wret = insert_ptr(trans, root, path,
@@ -2682,76 +2965,16 @@ again:
                        }
                }
        }
-       nritems = nritems - mid;
-       btrfs_set_header_nritems(right, nritems);
-       data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
-
-       copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
-                          btrfs_item_nr_offset(mid),
-                          nritems * sizeof(struct btrfs_item));
-
-       copy_extent_buffer(right, l,
-                    btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
-                    data_copy_size, btrfs_leaf_data(l) +
-                    leaf_data_end(root, l), data_copy_size);
-
-       rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
-                     btrfs_item_end_nr(l, mid);
-
-       for (i = 0; i < nritems; i++) {
-               struct btrfs_item *item = btrfs_item_nr(right, i);
-               u32 ioff;
-
-               if (!right->map_token) {
-                       map_extent_buffer(right, (unsigned long)item,
-                                       sizeof(struct btrfs_item),
-                                       &right->map_token, &right->kaddr,
-                                       &right->map_start, &right->map_len,
-                                       KM_USER1);
-               }
-
-               ioff = btrfs_item_offset(right, item);
-               btrfs_set_item_offset(right, item, ioff + rt_data_off);
-       }
-
-       if (right->map_token) {
-               unmap_extent_buffer(right, right->map_token, KM_USER1);
-               right->map_token = NULL;
-       }
 
-       btrfs_set_header_nritems(l, mid);
-       ret = 0;
-       btrfs_item_key(right, &disk_key, 0);
-       wret = insert_ptr(trans, root, path, &disk_key, right->start,
-                         path->slots[1] + 1, 1);
-       if (wret)
-               ret = wret;
-
-       btrfs_mark_buffer_dirty(right);
-       btrfs_mark_buffer_dirty(l);
-       BUG_ON(path->slots[0] != slot);
-
-       ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
+       ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
        BUG_ON(ret);
 
-       if (mid <= slot) {
-               btrfs_tree_unlock(path->nodes[0]);
-               free_extent_buffer(path->nodes[0]);
-               path->nodes[0] = right;
-               path->slots[0] -= mid;
-               path->slots[1] += 1;
-       } else {
-               btrfs_tree_unlock(right);
-               free_extent_buffer(right);
-       }
-
-       BUG_ON(path->slots[0] < 0);
-
        if (double_split) {
                BUG_ON(num_doubles != 0);
                num_doubles++;
                goto again;
        }
+
        return ret;
 }
 
@@ -2814,6 +3037,12 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
        path->keep_locks = 0;
        BUG_ON(ret);
 
+       /*
+        * make sure any changes to the path from split_leaf leave it
+        * in a blocking state
+        */
+       btrfs_set_path_blocking(path);
+
        leaf = path->nodes[0];
        BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
 
@@ -3227,39 +3456,27 @@ out:
 }
 
 /*
- * Given a key and some data, insert items into the tree.
- * This does all the path init required, making room in the tree if needed.
+ * this is a helper for btrfs_insert_empty_items, the main goal here is
+ * to save stack depth by doing the bulk of the work in a function
+ * that doesn't call btrfs_search_slot
  */
-int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root,
-                           struct btrfs_path *path,
-                           struct btrfs_key *cpu_key, u32 *data_size,
-                           int nr)
+static noinline_for_stack int
+setup_items_for_insert(struct btrfs_trans_handle *trans,
+                     struct btrfs_root *root, struct btrfs_path *path,
+                     struct btrfs_key *cpu_key, u32 *data_size,
+                     u32 total_data, u32 total_size, int nr)
 {
-       struct extent_buffer *leaf;
        struct btrfs_item *item;
-       int ret = 0;
-       int slot;
-       int slot_orig;
        int i;
        u32 nritems;
-       u32 total_size = 0;
-       u32 total_data = 0;
        unsigned int data_end;
        struct btrfs_disk_key disk_key;
+       int ret;
+       struct extent_buffer *leaf;
+       int slot;
 
-       for (i = 0; i < nr; i++)
-               total_data += data_size[i];
-
-       total_size = total_data + (nr * sizeof(struct btrfs_item));
-       ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
-       if (ret == 0)
-               return -EEXIST;
-       if (ret < 0)
-               goto out;
-
-       slot_orig = path->slots[0];
        leaf = path->nodes[0];
+       slot = path->slots[0];
 
        nritems = btrfs_header_nritems(leaf);
        data_end = leaf_data_end(root, leaf);
@@ -3271,9 +3488,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                BUG();
        }
 
-       slot = path->slots[0];
-       BUG_ON(slot < 0);
-
        if (slot != nritems) {
                unsigned int old_data = btrfs_item_end_nr(leaf, slot);
 
@@ -3329,11 +3543,13 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                data_end -= data_size[i];
                btrfs_set_item_size(leaf, item, data_size[i]);
        }
+
        btrfs_set_header_nritems(leaf, nritems + nr);
        btrfs_mark_buffer_dirty(leaf);
 
        ret = 0;
        if (slot == 0) {
+               struct btrfs_disk_key disk_key;
                btrfs_cpu_key_to_disk(&disk_key, cpu_key);
                ret = fixup_low_keys(trans, root, path, &disk_key, 1);
        }
@@ -3342,7 +3558,45 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
                btrfs_print_leaf(root, leaf);
                BUG();
        }
+       return ret;
+}
+
+/*
+ * Given a key and some data, insert items into the tree.
+ * This does all the path init required, making room in the tree if needed.
+ */
+int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root,
+                           struct btrfs_path *path,
+                           struct btrfs_key *cpu_key, u32 *data_size,
+                           int nr)
+{
+       struct extent_buffer *leaf;
+       int ret = 0;
+       int slot;
+       int i;
+       u32 total_size = 0;
+       u32 total_data = 0;
+
+       for (i = 0; i < nr; i++)
+               total_data += data_size[i];
+
+       total_size = total_data + (nr * sizeof(struct btrfs_item));
+       ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
+       if (ret == 0)
+               return -EEXIST;
+       if (ret < 0)
+               goto out;
+
+       leaf = path->nodes[0];
+       slot = path->slots[0];
+       BUG_ON(slot < 0);
+
+       ret = setup_items_for_insert(trans, root, path, cpu_key, data_size,
+                              total_data, total_size, nr);
+
 out:
+       btrfs_unlock_up_safe(path, 1);
        return ret;
 }
 
@@ -3430,15 +3684,22 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
 {
        int ret;
        u64 root_gen = btrfs_header_generation(path->nodes[1]);
+       u64 parent_start = path->nodes[1]->start;
+       u64 parent_owner = btrfs_header_owner(path->nodes[1]);
 
        ret = del_ptr(trans, root, path, 1, path->slots[1]);
        if (ret)
                return ret;
 
+       /*
+        * btrfs_free_extent is expensive, we want to make sure we
+        * aren't holding any locks when we call it
+        */
+       btrfs_unlock_up_safe(path, 0);
+
        ret = btrfs_free_extent(trans, root, bytenr,
                                btrfs_level_size(root, 0),
-                               path->nodes[1]->start,
-                               btrfs_header_owner(path->nodes[1]),
+                               parent_start, parent_owner,
                                root_gen, 0, 1);
        return ret;
 }
@@ -3710,6 +3971,7 @@ find_next_key:
                 */
                if (slot >= nritems) {
                        path->slots[level] = slot;
+                       btrfs_set_path_blocking(path);
                        sret = btrfs_find_next_key(root, path, min_key, level,
                                                  cache_only, min_trans);
                        if (sret == 0) {
@@ -3727,16 +3989,20 @@ find_next_key:
                        unlock_up(path, level, 1);
                        goto out;
                }
+               btrfs_set_path_blocking(path);
                cur = read_node_slot(root, cur, slot);
 
                btrfs_tree_lock(cur);
+
                path->locks[level - 1] = 1;
                path->nodes[level - 1] = cur;
                unlock_up(path, level, 1);
+               btrfs_clear_path_blocking(path, NULL);
        }
 out:
        if (ret == 0)
                memcpy(min_key, &found_key, sizeof(found_key));
+       btrfs_set_path_blocking(path);
        return ret;
 }
 
@@ -3832,6 +4098,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
        if (ret < 0)
                return ret;
 
+       btrfs_set_path_blocking(path);
        nritems = btrfs_header_nritems(path->nodes[0]);
        /*
         * by releasing the path above we dropped all our locks.  A balance
@@ -3862,14 +4129,16 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
                        free_extent_buffer(next);
                }
 
+               /* the path was set to blocking above */
                if (level == 1 && (path->locks[1] || path->skip_locking) &&
                    path->reada)
                        reada_for_search(root, path, level, slot, 0);
 
                next = read_node_slot(root, c, slot);
                if (!path->skip_locking) {
-                       WARN_ON(!btrfs_tree_locked(c));
+                       btrfs_assert_tree_locked(c);
                        btrfs_tree_lock(next);
+                       btrfs_set_lock_blocking(next);
                }
                break;
        }
@@ -3886,12 +4155,15 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
                        path->locks[level] = 1;
                if (!level)
                        break;
+
+               btrfs_set_path_blocking(path);
                if (level == 1 && path->locks[1] && path->reada)
                        reada_for_search(root, path, level, slot, 0);
                next = read_node_slot(root, next, 0);
                if (!path->skip_locking) {
-                       WARN_ON(!btrfs_tree_locked(path->nodes[level]));
+                       btrfs_assert_tree_locked(path->nodes[level]);
                        btrfs_tree_lock(next);
+                       btrfs_set_lock_blocking(next);
                }
        }
 done:
@@ -3916,6 +4188,7 @@ int btrfs_previous_item(struct btrfs_root *root,
 
        while (1) {
                if (path->slots[0] == 0) {
+                       btrfs_set_path_blocking(path);
                        ret = btrfs_prev_leaf(root, path);
                        if (ret != 0)
                                return ret;