Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
[pandora-kernel.git] / fs / btrfs / ctree.c
index fedf8b9..3fdcc05 100644 (file)
@@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        u32 nritems;
        int ret = 0;
        int level;
-       struct btrfs_root *new_root;
-
-       new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
-       if (!new_root)
-               return -ENOMEM;
-
-       memcpy(new_root, root, sizeof(*new_root));
-       new_root->root_key.objectid = new_root_objectid;
+       struct btrfs_disk_key disk_key;
 
        WARN_ON(root->ref_cows && trans->transid !=
                root->fs_info->running_transaction->transid);
@@ -212,28 +205,37 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 
        level = btrfs_header_level(buf);
        nritems = btrfs_header_nritems(buf);
+       if (level == 0)
+               btrfs_item_key(buf, &disk_key, 0);
+       else
+               btrfs_node_key(buf, &disk_key, 0);
 
-       cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
-                                    new_root_objectid, trans->transid,
-                                    level, buf->start, 0);
-       if (IS_ERR(cow)) {
-               kfree(new_root);
+       cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
+                                    new_root_objectid, &disk_key, level,
+                                    buf->start, 0);
+       if (IS_ERR(cow))
                return PTR_ERR(cow);
-       }
 
        copy_extent_buffer(cow, buf, 0, 0, cow->len);
        btrfs_set_header_bytenr(cow, cow->start);
        btrfs_set_header_generation(cow, trans->transid);
-       btrfs_set_header_owner(cow, new_root_objectid);
-       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+       btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+                                    BTRFS_HEADER_FLAG_RELOC);
+       if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+               btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+       else
+               btrfs_set_header_owner(cow, new_root_objectid);
 
        write_extent_buffer(cow, root->fs_info->fsid,
                            (unsigned long)btrfs_header_fsid(cow),
                            BTRFS_FSID_SIZE);
 
        WARN_ON(btrfs_header_generation(buf) > trans->transid);
-       ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
-       kfree(new_root);
+       if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+               ret = btrfs_inc_ref(trans, root, cow, 1);
+       else
+               ret = btrfs_inc_ref(trans, root, cow, 0);
 
        if (ret)
                return ret;
@@ -243,6 +245,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+/*
+ * check if the tree block can be shared by multiple trees
+ */
+int btrfs_block_can_be_shared(struct btrfs_root *root,
+                             struct extent_buffer *buf)
+{
+       /*
+        * Tree blocks not in refernece counted trees and tree roots
+        * are never shared. If a block was allocated after the last
+        * snapshot and the block was not allocated by tree relocation,
+        * we know the block is not shared.
+        */
+       if (root->ref_cows &&
+           buf != root->node && buf != root->commit_root &&
+           (btrfs_header_generation(buf) <=
+            btrfs_root_last_snapshot(&root->root_item) ||
+            btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+               return 1;
+#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
+       if (root->ref_cows &&
+           btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+               return 1;
+#endif
+       return 0;
+}
+
+static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
+                                      struct btrfs_root *root,
+                                      struct extent_buffer *buf,
+                                      struct extent_buffer *cow)
+{
+       u64 refs;
+       u64 owner;
+       u64 flags;
+       u64 new_flags = 0;
+       int ret;
+
+       /*
+        * Backrefs update rules:
+        *
+        * Always use full backrefs for extent pointers in tree block
+        * allocated by tree relocation.
+        *
+        * If a shared tree block is no longer referenced by its owner
+        * tree (btrfs_header_owner(buf) == root->root_key.objectid),
+        * use full backrefs for extent pointers in tree block.
+        *
+        * If a tree block is been relocating
+        * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
+        * use full backrefs for extent pointers in tree block.
+        * The reason for this is some operations (such as drop tree)
+        * are only allowed for blocks use full backrefs.
+        */
+
+       if (btrfs_block_can_be_shared(root, buf)) {
+               ret = btrfs_lookup_extent_info(trans, root, buf->start,
+                                              buf->len, &refs, &flags);
+               BUG_ON(ret);
+               BUG_ON(refs == 0);
+       } else {
+               refs = 1;
+               if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+                   btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+                       flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
+               else
+                       flags = 0;
+       }
+
+       owner = btrfs_header_owner(buf);
+       BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
+              !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
+
+       if (refs > 1) {
+               if ((owner == root->root_key.objectid ||
+                    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
+                   !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
+                       ret = btrfs_inc_ref(trans, root, buf, 1);
+                       BUG_ON(ret);
+
+                       if (root->root_key.objectid ==
+                           BTRFS_TREE_RELOC_OBJECTID) {
+                               ret = btrfs_dec_ref(trans, root, buf, 0);
+                               BUG_ON(ret);
+                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                               BUG_ON(ret);
+                       }
+                       new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
+               } else {
+
+                       if (root->root_key.objectid ==
+                           BTRFS_TREE_RELOC_OBJECTID)
+                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                       else
+                               ret = btrfs_inc_ref(trans, root, cow, 0);
+                       BUG_ON(ret);
+               }
+               if (new_flags != 0) {
+                       ret = btrfs_set_disk_extent_flags(trans, root,
+                                                         buf->start,
+                                                         buf->len,
+                                                         new_flags, 0);
+                       BUG_ON(ret);
+               }
+       } else {
+               if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+                       if (root->root_key.objectid ==
+                           BTRFS_TREE_RELOC_OBJECTID)
+                               ret = btrfs_inc_ref(trans, root, cow, 1);
+                       else
+                               ret = btrfs_inc_ref(trans, root, cow, 0);
+                       BUG_ON(ret);
+                       ret = btrfs_dec_ref(trans, root, buf, 1);
+                       BUG_ON(ret);
+               }
+               clean_tree_block(trans, root, buf);
+       }
+       return 0;
+}
+
 /*
  * does the dirty work in cow of a single block.  The parent block (if
  * supplied) is updated to point to the new cow copy.  The new buffer is marked
@@ -262,34 +383,39 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                             struct extent_buffer **cow_ret,
                             u64 search_start, u64 empty_size)
 {
-       u64 parent_start;
+       struct btrfs_disk_key disk_key;
        struct extent_buffer *cow;
-       u32 nritems;
-       int ret = 0;
        int level;
        int unlock_orig = 0;
+       u64 parent_start;
 
        if (*cow_ret == buf)
                unlock_orig = 1;
 
        btrfs_assert_tree_locked(buf);
 
-       if (parent)
-               parent_start = parent->start;
-       else
-               parent_start = 0;
-
        WARN_ON(root->ref_cows && trans->transid !=
                root->fs_info->running_transaction->transid);
        WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
        level = btrfs_header_level(buf);
-       nritems = btrfs_header_nritems(buf);
 
-       cow = btrfs_alloc_free_block(trans, root, buf->len,
-                                    parent_start, root->root_key.objectid,
-                                    trans->transid, level,
-                                    search_start, empty_size);
+       if (level == 0)
+               btrfs_item_key(buf, &disk_key, 0);
+       else
+               btrfs_node_key(buf, &disk_key, 0);
+
+       if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+               if (parent)
+                       parent_start = parent->start;
+               else
+                       parent_start = 0;
+       } else
+               parent_start = 0;
+
+       cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
+                                    root->root_key.objectid, &disk_key,
+                                    level, search_start, empty_size);
        if (IS_ERR(cow))
                return PTR_ERR(cow);
 
@@ -298,83 +424,53 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        copy_extent_buffer(cow, buf, 0, 0, cow->len);
        btrfs_set_header_bytenr(cow, cow->start);
        btrfs_set_header_generation(cow, trans->transid);
-       btrfs_set_header_owner(cow, root->root_key.objectid);
-       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+       btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
+       btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+                                    BTRFS_HEADER_FLAG_RELOC);
+       if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+               btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+       else
+               btrfs_set_header_owner(cow, root->root_key.objectid);
 
        write_extent_buffer(cow, root->fs_info->fsid,
                            (unsigned long)btrfs_header_fsid(cow),
                            BTRFS_FSID_SIZE);
 
-       WARN_ON(btrfs_header_generation(buf) > trans->transid);
-       if (btrfs_header_generation(buf) != trans->transid) {
-               u32 nr_extents;
-               ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
-               if (ret)
-                       return ret;
-
-               ret = btrfs_cache_ref(trans, root, buf, nr_extents);
-               WARN_ON(ret);
-       } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
-               /*
-                * There are only two places that can drop reference to
-                * tree blocks owned by living reloc trees, one is here,
-                * the other place is btrfs_drop_subtree. In both places,
-                * we check reference count while tree block is locked.
-                * Furthermore, if reference count is one, it won't get
-                * increased by someone else.
-                */
-               u32 refs;
-               ret = btrfs_lookup_extent_ref(trans, root, buf->start,
-                                             buf->len, &refs);
-               BUG_ON(ret);
-               if (refs == 1) {
-                       ret = btrfs_update_ref(trans, root, buf, cow,
-                                              0, nritems);
-                       clean_tree_block(trans, root, buf);
-               } else {
-                       ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
-               }
-               BUG_ON(ret);
-       } else {
-               ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
-               if (ret)
-                       return ret;
-               clean_tree_block(trans, root, buf);
-       }
-
-       if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
-               ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
-               WARN_ON(ret);
-       }
+       update_ref_for_cow(trans, root, buf, cow);
 
        if (buf == root->node) {
                WARN_ON(parent && parent != buf);
+               if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
+                   btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
+                       parent_start = buf->start;
+               else
+                       parent_start = 0;
 
                spin_lock(&root->node_lock);
                root->node = cow;
                extent_buffer_get(cow);
                spin_unlock(&root->node_lock);
 
-               if (buf != root->commit_root) {
-                       btrfs_free_extent(trans, root, buf->start,
-                                         buf->len, buf->start,
-                                         root->root_key.objectid,
-                                         btrfs_header_generation(buf),
-                                         level, 1);
-               }
+               btrfs_free_extent(trans, root, buf->start, buf->len,
+                                 parent_start, root->root_key.objectid,
+                                 level, 0);
                free_extent_buffer(buf);
                add_root_to_dirty_list(root);
        } else {
+               if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+                       parent_start = parent->start;
+               else
+                       parent_start = 0;
+
+               WARN_ON(trans->transid != btrfs_header_generation(parent));
                btrfs_set_node_blockptr(parent, parent_slot,
                                        cow->start);
-               WARN_ON(trans->transid == 0);
                btrfs_set_node_ptr_generation(parent, parent_slot,
                                              trans->transid);
                btrfs_mark_buffer_dirty(parent);
-               WARN_ON(btrfs_header_generation(parent) != trans->transid);
                btrfs_free_extent(trans, root, buf->start, buf->len,
-                                 parent_start, btrfs_header_owner(parent),
-                                 btrfs_header_generation(parent), level, 1);
+                                 parent_start, root->root_key.objectid,
+                                 level, 0);
        }
        if (unlock_orig)
                btrfs_tree_unlock(buf);
@@ -384,6 +480,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+static inline int should_cow_block(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct extent_buffer *buf)
+{
+       if (btrfs_header_generation(buf) == trans->transid &&
+           !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
+           !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
+             btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+               return 0;
+       return 1;
+}
+
 /*
  * cows a single block, see __btrfs_cow_block for the real work.
  * This version of it has extra checks so that a block isn't cow'd more than
@@ -411,9 +519,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
                WARN_ON(1);
        }
 
-       if (btrfs_header_generation(buf) == trans->transid &&
-           btrfs_header_owner(buf) == root->root_key.objectid &&
-           !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
+       if (!should_cow_block(trans, root, buf)) {
                *cow_ret = buf;
                return 0;
        }
@@ -451,25 +557,13 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
 
        btrfs_disk_key_to_cpu(&k1, disk);
 
-       if (k1.objectid > k2->objectid)
-               return 1;
-       if (k1.objectid < k2->objectid)
-               return -1;
-       if (k1.type > k2->type)
-               return 1;
-       if (k1.type < k2->type)
-               return -1;
-       if (k1.offset > k2->offset)
-               return 1;
-       if (k1.offset < k2->offset)
-               return -1;
-       return 0;
+       return btrfs_comp_cpu_keys(&k1, k2);
 }
 
 /*
  * same as comp_keys only with two btrfs_key's
  */
-static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
 {
        if (k1->objectid > k2->objectid)
                return 1;
@@ -845,6 +939,12 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
        return -1;
 }
 
+int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+                    int level, int *slot)
+{
+       return bin_search(eb, key, level, slot);
+}
+
 /* given a node and slot number, this reads the blocks it points to.  The
  * extent buffer is returned with a reference taken (but unlocked).
  * NULL is returned on error.
@@ -921,13 +1021,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                root->node = child;
                spin_unlock(&root->node_lock);
 
-               ret = btrfs_update_extent_ref(trans, root, child->start,
-                                             child->len,
-                                             mid->start, child->start,
-                                             root->root_key.objectid,
-                                             trans->transid, level - 1);
-               BUG_ON(ret);
-
                add_root_to_dirty_list(root);
                btrfs_tree_unlock(child);
 
@@ -938,9 +1031,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                /* once for the path */
                free_extent_buffer(mid);
                ret = btrfs_free_extent(trans, root, mid->start, mid->len,
-                                       mid->start, root->root_key.objectid,
-                                       btrfs_header_generation(mid),
-                                       level, 1);
+                                       0, root->root_key.objectid, level, 1);
                /* once for the root ptr */
                free_extent_buffer(mid);
                return ret;
@@ -949,10 +1040,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
            BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
                return 0;
 
-       if (trans->transaction->delayed_refs.flushing &&
-           btrfs_header_nritems(mid) > 2)
-               return 0;
-
        if (btrfs_header_nritems(mid) < 2)
                err_on_enospc = 1;
 
@@ -998,7 +1085,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        ret = wret;
                if (btrfs_header_nritems(right) == 0) {
                        u64 bytenr = right->start;
-                       u64 generation = btrfs_header_generation(parent);
                        u32 blocksize = right->len;
 
                        clean_tree_block(trans, root, right);
@@ -1010,9 +1096,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        if (wret)
                                ret = wret;
                        wret = btrfs_free_extent(trans, root, bytenr,
-                                                blocksize, parent->start,
-                                                btrfs_header_owner(parent),
-                                                generation, level, 1);
+                                                blocksize, 0,
+                                                root->root_key.objectid,
+                                                level, 0);
                        if (wret)
                                ret = wret;
                } else {
@@ -1047,7 +1133,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
        }
        if (btrfs_header_nritems(mid) == 0) {
                /* we've managed to empty the middle node, drop it */
-               u64 root_gen = btrfs_header_generation(parent);
                u64 bytenr = mid->start;
                u32 blocksize = mid->len;
 
@@ -1059,9 +1144,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                if (wret)
                        ret = wret;
                wret = btrfs_free_extent(trans, root, bytenr, blocksize,
-                                        parent->start,
-                                        btrfs_header_owner(parent),
-                                        root_gen, level, 1);
+                                        0, root->root_key.objectid,
+                                        level, 0);
                if (wret)
                        ret = wret;
        } else {
@@ -1437,7 +1521,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
 {
        int i;
 
-       if (path->keep_locks || path->lowest_level)
+       if (path->keep_locks)
                return;
 
        for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -1552,7 +1636,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
                }
                b = p->nodes[level];
        } else if (ins_len < 0 && btrfs_header_nritems(b) <
-                  BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
+                  BTRFS_NODEPTRS_PER_BLOCK(root) / 2) {
                int sret;
 
                sret = reada_for_balance(root, p, level);
@@ -1602,6 +1686,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        struct extent_buffer *b;
        int slot;
        int ret;
+       int err;
        int level;
        int lowest_unlock = 1;
        u8 lowest_level = 0;
@@ -1614,10 +1699,17 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
                lowest_unlock = 2;
 
 again:
-       if (p->skip_locking)
-               b = btrfs_root_node(root);
-       else
-               b = btrfs_lock_root_node(root);
+       if (p->search_commit_root) {
+               b = root->commit_root;
+               extent_buffer_get(b);
+               if (!p->skip_locking)
+                       btrfs_tree_lock(b);
+       } else {
+               if (p->skip_locking)
+                       b = btrfs_root_node(root);
+               else
+                       b = btrfs_lock_root_node(root);
+       }
 
        while (b) {
                level = btrfs_header_level(b);
@@ -1631,26 +1723,22 @@ again:
                        p->locks[level] = 1;
 
                if (cow) {
-                       int wret;
-
                        /*
                         * if we don't really need to cow this block
                         * then we don't want to set the path blocking,
                         * so we test it here
                         */
-                       if (btrfs_header_generation(b) == trans->transid &&
-                           btrfs_header_owner(b) == root->root_key.objectid &&
-                           !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
+                       if (!should_cow_block(trans, root, b))
                                goto cow_done;
-                       }
+
                        btrfs_set_path_blocking(p);
 
-                       wret = btrfs_cow_block(trans, root, b,
-                                              p->nodes[level + 1],
-                                              p->slots[level + 1], &b);
-                       if (wret) {
+                       err = btrfs_cow_block(trans, root, b,
+                                             p->nodes[level + 1],
+                                             p->slots[level + 1], &b);
+                       if (err) {
                                free_extent_buffer(b);
-                               ret = wret;
+                               ret = err;
                                goto done;
                        }
                }
@@ -1689,41 +1777,45 @@ cow_done:
                ret = bin_search(b, key, level, &slot);
 
                if (level != 0) {
-                       if (ret && slot > 0)
+                       int dec = 0;
+                       if (ret && slot > 0) {
+                               dec = 1;
                                slot -= 1;
+                       }
                        p->slots[level] = slot;
-                       ret = setup_nodes_for_search(trans, root, p, b, level,
+                       err = setup_nodes_for_search(trans, root, p, b, level,
                                                     ins_len);
-                       if (ret == -EAGAIN)
+                       if (err == -EAGAIN)
                                goto again;
-                       else if (ret)
+                       if (err) {
+                               ret = err;
                                goto done;
+                       }
                        b = p->nodes[level];
                        slot = p->slots[level];
 
                        unlock_up(p, level, lowest_unlock);
 
-                       /* this is only true while dropping a snapshot */
                        if (level == lowest_level) {
-                               ret = 0;
+                               if (dec)
+                                       p->slots[level]++;
                                goto done;
                        }
 
-                       ret = read_block_for_search(trans, root, p,
+                       err = read_block_for_search(trans, root, p,
                                                    &b, level, slot, key);
-                       if (ret == -EAGAIN)
+                       if (err == -EAGAIN)
                                goto again;
-
-                       if (ret == -EIO)
+                       if (err) {
+                               ret = err;
                                goto done;
+                       }
 
                        if (!p->skip_locking) {
-                               int lret;
-
                                btrfs_clear_path_blocking(p, NULL);
-                               lret = btrfs_try_spin_lock(b);
+                               err = btrfs_try_spin_lock(b);
 
-                               if (!lret) {
+                               if (!err) {
                                        btrfs_set_path_blocking(p);
                                        btrfs_tree_lock(b);
                                        btrfs_clear_path_blocking(p, b);
@@ -1733,16 +1825,14 @@ cow_done:
                        p->slots[level] = slot;
                        if (ins_len > 0 &&
                            btrfs_leaf_free_space(root, b) < ins_len) {
-                               int sret;
-
                                btrfs_set_path_blocking(p);
-                               sret = split_leaf(trans, root, key,
-                                                     p, ins_len, ret == 0);
+                               err = split_leaf(trans, root, key,
+                                                p, ins_len, ret == 0);
                                btrfs_clear_path_blocking(p, NULL);
 
-                               BUG_ON(sret > 0);
-                               if (sret) {
-                                       ret = sret;
+                               BUG_ON(err > 0);
+                               if (err) {
+                                       ret = err;
                                        goto done;
                                }
                        }
@@ -1764,138 +1854,6 @@ done:
        return ret;
 }
 
-int btrfs_merge_path(struct btrfs_trans_handle *trans,
-                    struct btrfs_root *root,
-                    struct btrfs_key *node_keys,
-                    u64 *nodes, int lowest_level)
-{
-       struct extent_buffer *eb;
-       struct extent_buffer *parent;
-       struct btrfs_key key;
-       u64 bytenr;
-       u64 generation;
-       u32 blocksize;
-       int level;
-       int slot;
-       int key_match;
-       int ret;
-
-       eb = btrfs_lock_root_node(root);
-       ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
-       BUG_ON(ret);
-
-       btrfs_set_lock_blocking(eb);
-
-       parent = eb;
-       while (1) {
-               level = btrfs_header_level(parent);
-               if (level == 0 || level <= lowest_level)
-                       break;
-
-               ret = bin_search(parent, &node_keys[lowest_level], level,
-                                &slot);
-               if (ret && slot > 0)
-                       slot--;
-
-               bytenr = btrfs_node_blockptr(parent, slot);
-               if (nodes[level - 1] == bytenr)
-                       break;
-
-               blocksize = btrfs_level_size(root, level - 1);
-               generation = btrfs_node_ptr_generation(parent, slot);
-               btrfs_node_key_to_cpu(eb, &key, slot);
-               key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
-
-               if (generation == trans->transid) {
-                       eb = read_tree_block(root, bytenr, blocksize,
-                                            generation);
-                       btrfs_tree_lock(eb);
-                       btrfs_set_lock_blocking(eb);
-               }
-
-               /*
-                * if node keys match and node pointer hasn't been modified
-                * in the running transaction, we can merge the path. for
-                * blocks owened by reloc trees, the node pointer check is
-                * skipped, this is because these blocks are fully controlled
-                * by the space balance code, no one else can modify them.
-                */
-               if (!nodes[level - 1] || !key_match ||
-                   (generation == trans->transid &&
-                    btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
-                       if (level == 1 || level == lowest_level + 1) {
-                               if (generation == trans->transid) {
-                                       btrfs_tree_unlock(eb);
-                                       free_extent_buffer(eb);
-                               }
-                               break;
-                       }
-
-                       if (generation != trans->transid) {
-                               eb = read_tree_block(root, bytenr, blocksize,
-                                               generation);
-                               btrfs_tree_lock(eb);
-                               btrfs_set_lock_blocking(eb);
-                       }
-
-                       ret = btrfs_cow_block(trans, root, eb, parent, slot,
-                                             &eb);
-                       BUG_ON(ret);
-
-                       if (root->root_key.objectid ==
-                           BTRFS_TREE_RELOC_OBJECTID) {
-                               if (!nodes[level - 1]) {
-                                       nodes[level - 1] = eb->start;
-                                       memcpy(&node_keys[level - 1], &key,
-                                              sizeof(node_keys[0]));
-                               } else {
-                                       WARN_ON(1);
-                               }
-                       }
-
-                       btrfs_tree_unlock(parent);
-                       free_extent_buffer(parent);
-                       parent = eb;
-                       continue;
-               }
-
-               btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
-               btrfs_set_node_ptr_generation(parent, slot, trans->transid);
-               btrfs_mark_buffer_dirty(parent);
-
-               ret = btrfs_inc_extent_ref(trans, root,
-                                       nodes[level - 1],
-                                       blocksize, parent->start,
-                                       btrfs_header_owner(parent),
-                                       btrfs_header_generation(parent),
-                                       level - 1);
-               BUG_ON(ret);
-
-               /*
-                * If the block was created in the running transaction,
-                * it's possible this is the last reference to it, so we
-                * should drop the subtree.
-                */
-               if (generation == trans->transid) {
-                       ret = btrfs_drop_subtree(trans, root, eb, parent);
-                       BUG_ON(ret);
-                       btrfs_tree_unlock(eb);
-                       free_extent_buffer(eb);
-               } else {
-                       ret = btrfs_free_extent(trans, root, bytenr,
-                                       blocksize, parent->start,
-                                       btrfs_header_owner(parent),
-                                       btrfs_header_generation(parent),
-                                       level - 1, 1);
-                       BUG_ON(ret);
-               }
-               break;
-       }
-       btrfs_tree_unlock(parent);
-       free_extent_buffer(parent);
-       return 0;
-}
-
 /*
  * adjust the pointers going up the tree, starting at level
  * making sure the right key of each node is points to 'key'.
@@ -2021,9 +1979,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(src);
        btrfs_mark_buffer_dirty(dst);
 
-       ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
-       BUG_ON(ret);
-
        return ret;
 }
 
@@ -2083,9 +2038,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(src);
        btrfs_mark_buffer_dirty(dst);
 
-       ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
-       BUG_ON(ret);
-
        return ret;
 }
 
@@ -2105,7 +2057,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        struct extent_buffer *c;
        struct extent_buffer *old;
        struct btrfs_disk_key lower_key;
-       int ret;
 
        BUG_ON(path->nodes[level]);
        BUG_ON(path->nodes[level-1] != root->node);
@@ -2117,16 +2068,17 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
                btrfs_node_key(lower, &lower_key, 0);
 
        c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
-                                  root->root_key.objectid, trans->transid,
+                                  root->root_key.objectid, &lower_key,
                                   level, root->node->start, 0);
        if (IS_ERR(c))
                return PTR_ERR(c);
 
-       memset_extent_buffer(c, 0, 0, root->nodesize);
+       memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_nritems(c, 1);
        btrfs_set_header_level(c, level);
        btrfs_set_header_bytenr(c, c->start);
        btrfs_set_header_generation(c, trans->transid);
+       btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(c, root->root_key.objectid);
 
        write_extent_buffer(c, root->fs_info->fsid,
@@ -2151,12 +2103,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        root->node = c;
        spin_unlock(&root->node_lock);
 
-       ret = btrfs_update_extent_ref(trans, root, lower->start,
-                                     lower->len, lower->start, c->start,
-                                     root->root_key.objectid,
-                                     trans->transid, level - 1);
-       BUG_ON(ret);
-
        /* the super has an extra ref to root->node */
        free_extent_buffer(old);
 
@@ -2233,7 +2179,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
                ret = insert_new_root(trans, root, path, level + 1);
                if (ret)
                        return ret;
-       } else if (!trans->transaction->delayed_refs.flushing) {
+       } else {
                ret = push_nodes_for_insert(trans, root, path, level);
                c = path->nodes[level];
                if (!ret && btrfs_header_nritems(c) <
@@ -2244,20 +2190,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        }
 
        c_nritems = btrfs_header_nritems(c);
+       mid = (c_nritems + 1) / 2;
+       btrfs_node_key(c, &disk_key, mid);
 
-       split = btrfs_alloc_free_block(trans, root, root->nodesize,
-                                       path->nodes[level + 1]->start,
+       split = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
                                        root->root_key.objectid,
-                                       trans->transid, level, c->start, 0);
+                                       &disk_key, level, c->start, 0);
        if (IS_ERR(split))
                return PTR_ERR(split);
 
-       btrfs_set_header_flags(split, btrfs_header_flags(c));
+       memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_level(split, btrfs_header_level(c));
        btrfs_set_header_bytenr(split, split->start);
        btrfs_set_header_generation(split, trans->transid);
+       btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(split, root->root_key.objectid);
-       btrfs_set_header_flags(split, 0);
        write_extent_buffer(split, root->fs_info->fsid,
                            (unsigned long)btrfs_header_fsid(split),
                            BTRFS_FSID_SIZE);
@@ -2265,7 +2212,6 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
                            (unsigned long)btrfs_header_chunk_tree_uuid(split),
                            BTRFS_UUID_SIZE);
 
-       mid = (c_nritems + 1) / 2;
 
        copy_extent_buffer(split, c,
                           btrfs_node_key_ptr_offset(0),
@@ -2278,16 +2224,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(c);
        btrfs_mark_buffer_dirty(split);
 
-       btrfs_node_key(split, &disk_key, 0);
        wret = insert_ptr(trans, root, path, &disk_key, split->start,
                          path->slots[level + 1] + 1,
                          level + 1);
        if (wret)
                ret = wret;
 
-       ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
-       BUG_ON(ret);
-
        if (path->slots[level] >= mid) {
                path->slots[level] -= mid;
                btrfs_tree_unlock(c);
@@ -2360,7 +2302,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
        u32 right_nritems;
        u32 data_end;
        u32 this_item_size;
-       int ret;
 
        if (empty)
                nr = 0;
@@ -2473,9 +2414,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
                btrfs_mark_buffer_dirty(left);
        btrfs_mark_buffer_dirty(right);
 
-       ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
-       BUG_ON(ret);
-
        btrfs_item_key(right, &disk_key, 0);
        btrfs_set_node_key(upper, &disk_key, slot + 1);
        btrfs_mark_buffer_dirty(upper);
@@ -2720,10 +2658,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
        if (right_nritems)
                btrfs_mark_buffer_dirty(right);
 
-       ret = btrfs_update_ref(trans, root, right, left,
-                              old_left_nritems, push_items);
-       BUG_ON(ret);
-
        btrfs_item_key(right, &disk_key, 0);
        wret = fixup_low_keys(trans, root, path, &disk_key, 1);
        if (wret)
@@ -2880,9 +2814,6 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(l);
        BUG_ON(path->slots[0] != slot);
 
-       ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
-       BUG_ON(ret);
-
        if (mid <= slot) {
                btrfs_tree_unlock(path->nodes[0]);
                free_extent_buffer(path->nodes[0]);
@@ -2911,6 +2842,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
                               struct btrfs_path *path, int data_size,
                               int extend)
 {
+       struct btrfs_disk_key disk_key;
        struct extent_buffer *l;
        u32 nritems;
        int mid;
@@ -2918,12 +2850,11 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
        struct extent_buffer *right;
        int ret = 0;
        int wret;
-       int double_split;
+       int split;
        int num_doubles = 0;
 
        /* first try to make some room by pushing left and right */
-       if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY &&
-           !trans->transaction->delayed_refs.flushing) {
+       if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
                wret = push_leaf_right(trans, root, path, data_size, 0);
                if (wret < 0)
                        return wret;
@@ -2945,16 +2876,53 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
                        return ret;
        }
 again:
-       double_split = 0;
+       split = 1;
        l = path->nodes[0];
        slot = path->slots[0];
        nritems = btrfs_header_nritems(l);
        mid = (nritems + 1) / 2;
 
-       right = btrfs_alloc_free_block(trans, root, root->leafsize,
-                                       path->nodes[1]->start,
+       if (mid <= slot) {
+               if (nritems == 1 ||
+                   leaf_space_used(l, mid, nritems - mid) + data_size >
+                       BTRFS_LEAF_DATA_SIZE(root)) {
+                       if (slot >= nritems) {
+                               split = 0;
+                       } else {
+                               mid = slot;
+                               if (mid != nritems &&
+                                   leaf_space_used(l, mid, nritems - mid) +
+                                   data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       split = 2;
+                               }
+                       }
+               }
+       } else {
+               if (leaf_space_used(l, 0, mid) + data_size >
+                       BTRFS_LEAF_DATA_SIZE(root)) {
+                       if (!extend && data_size && slot == 0) {
+                               split = 0;
+                       } else if ((extend || !data_size) && slot == 0) {
+                               mid = 1;
+                       } else {
+                               mid = slot;
+                               if (mid != nritems &&
+                                   leaf_space_used(l, mid, nritems - mid) +
+                                   data_size > BTRFS_LEAF_DATA_SIZE(root)) {
+                                       split = 2 ;
+                               }
+                       }
+               }
+       }
+
+       if (split == 0)
+               btrfs_cpu_key_to_disk(&disk_key, ins_key);
+       else
+               btrfs_item_key(l, &disk_key, mid);
+
+       right = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
                                        root->root_key.objectid,
-                                       trans->transid, 0, l->start, 0);
+                                       &disk_key, 0, l->start, 0);
        if (IS_ERR(right)) {
                BUG_ON(1);
                return PTR_ERR(right);
@@ -2963,6 +2931,7 @@ again:
        memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_bytenr(right, right->start);
        btrfs_set_header_generation(right, trans->transid);
+       btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(right, root->root_key.objectid);
        btrfs_set_header_level(right, 0);
        write_extent_buffer(right, root->fs_info->fsid,
@@ -2973,79 +2942,47 @@ again:
                            (unsigned long)btrfs_header_chunk_tree_uuid(right),
                            BTRFS_UUID_SIZE);
 
-       if (mid <= slot) {
-               if (nritems == 1 ||
-                   leaf_space_used(l, mid, nritems - mid) + data_size >
-                       BTRFS_LEAF_DATA_SIZE(root)) {
-                       if (slot >= nritems) {
-                               struct btrfs_disk_key disk_key;
-
-                               btrfs_cpu_key_to_disk(&disk_key, ins_key);
-                               btrfs_set_header_nritems(right, 0);
-                               wret = insert_ptr(trans, root, path,
-                                                 &disk_key, right->start,
-                                                 path->slots[1] + 1, 1);
-                               if (wret)
-                                       ret = wret;
+       if (split == 0) {
+               if (mid <= slot) {
+                       btrfs_set_header_nritems(right, 0);
+                       wret = insert_ptr(trans, root, path,
+                                         &disk_key, right->start,
+                                         path->slots[1] + 1, 1);
+                       if (wret)
+                               ret = wret;
 
-                               btrfs_tree_unlock(path->nodes[0]);
-                               free_extent_buffer(path->nodes[0]);
-                               path->nodes[0] = right;
-                               path->slots[0] = 0;
-                               path->slots[1] += 1;
-                               btrfs_mark_buffer_dirty(right);
-                               return ret;
-                       }
-                       mid = slot;
-                       if (mid != nritems &&
-                           leaf_space_used(l, mid, nritems - mid) +
-                           data_size > BTRFS_LEAF_DATA_SIZE(root)) {
-                               double_split = 1;
-                       }
-               }
-       } else {
-               if (leaf_space_used(l, 0, mid) + data_size >
-                       BTRFS_LEAF_DATA_SIZE(root)) {
-                       if (!extend && data_size && slot == 0) {
-                               struct btrfs_disk_key disk_key;
-
-                               btrfs_cpu_key_to_disk(&disk_key, ins_key);
-                               btrfs_set_header_nritems(right, 0);
-                               wret = insert_ptr(trans, root, path,
-                                                 &disk_key,
-                                                 right->start,
-                                                 path->slots[1], 1);
+                       btrfs_tree_unlock(path->nodes[0]);
+                       free_extent_buffer(path->nodes[0]);
+                       path->nodes[0] = right;
+                       path->slots[0] = 0;
+                       path->slots[1] += 1;
+               } else {
+                       btrfs_set_header_nritems(right, 0);
+                       wret = insert_ptr(trans, root, path,
+                                         &disk_key,
+                                         right->start,
+                                         path->slots[1], 1);
+                       if (wret)
+                               ret = wret;
+                       btrfs_tree_unlock(path->nodes[0]);
+                       free_extent_buffer(path->nodes[0]);
+                       path->nodes[0] = right;
+                       path->slots[0] = 0;
+                       if (path->slots[1] == 0) {
+                               wret = fixup_low_keys(trans, root,
+                                               path, &disk_key, 1);
                                if (wret)
                                        ret = wret;
-                               btrfs_tree_unlock(path->nodes[0]);
-                               free_extent_buffer(path->nodes[0]);
-                               path->nodes[0] = right;
-                               path->slots[0] = 0;
-                               if (path->slots[1] == 0) {
-                                       wret = fixup_low_keys(trans, root,
-                                                     path, &disk_key, 1);
-                                       if (wret)
-                                               ret = wret;
-                               }
-                               btrfs_mark_buffer_dirty(right);
-                               return ret;
-                       } else if ((extend || !data_size) && slot == 0) {
-                               mid = 1;
-                       } else {
-                               mid = slot;
-                               if (mid != nritems &&
-                                   leaf_space_used(l, mid, nritems - mid) +
-                                   data_size > BTRFS_LEAF_DATA_SIZE(root)) {
-                                       double_split = 1;
-                               }
                        }
                }
+               btrfs_mark_buffer_dirty(right);
+               return ret;
        }
 
        ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems);
        BUG_ON(ret);
 
-       if (double_split) {
+       if (split == 2) {
                BUG_ON(num_doubles != 0);
                num_doubles++;
                goto again;
@@ -3447,7 +3384,7 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
                /* figure out how many keys we can insert in here */
                total_data = data_size[0];
                for (i = 1; i < nr; i++) {
-                       if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
+                       if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0)
                                break;
                        total_data += data_size[i];
                }
@@ -3745,9 +3682,7 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
 /*
  * a helper function to delete the leaf pointed to by path->slots[1] and
- * path->nodes[1].  bytenr is the node block pointer, but since the callers
- * already know it, it is faster to have them pass it down than to
- * read it out of the node again.
+ * path->nodes[1].
  *
  * This deletes the pointer in path->nodes[1] and frees the leaf
  * block extent.  zero is returned if it all worked out, < 0 otherwise.
@@ -3755,15 +3690,14 @@ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  * The path must have already been setup for deleting the leaf, including
  * all the proper balancing.  path->nodes[1] must be locked.
  */
-noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
-                           struct btrfs_root *root,
-                           struct btrfs_path *path, u64 bytenr)
+static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+                                  struct btrfs_root *root,
+                                  struct btrfs_path *path,
+                                  struct extent_buffer *leaf)
 {
        int ret;
-       u64 root_gen = btrfs_header_generation(path->nodes[1]);
-       u64 parent_start = path->nodes[1]->start;
-       u64 parent_owner = btrfs_header_owner(path->nodes[1]);
 
+       WARN_ON(btrfs_header_generation(leaf) != trans->transid);
        ret = del_ptr(trans, root, path, 1, path->slots[1]);
        if (ret)
                return ret;
@@ -3774,10 +3708,8 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
         */
        btrfs_unlock_up_safe(path, 0);
 
-       ret = btrfs_free_extent(trans, root, bytenr,
-                               btrfs_level_size(root, 0),
-                               parent_start, parent_owner,
-                               root_gen, 0, 1);
+       ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
+                               0, root->root_key.objectid, 0, 0);
        return ret;
 }
 /*
@@ -3845,7 +3777,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                if (leaf == root->node) {
                        btrfs_set_header_level(leaf, 0);
                } else {
-                       ret = btrfs_del_leaf(trans, root, path, leaf->start);
+                       ret = btrfs_del_leaf(trans, root, path, leaf);
                        BUG_ON(ret);
                }
        } else {
@@ -3861,8 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                }
 
                /* delete the leaf if it is mostly empty */
-               if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 &&
-                   !trans->transaction->delayed_refs.flushing) {
+               if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
                        /* push_leaf_left fixes the path.
                         * make sure the path still points to our leaf
                         * for possible call to del_ptr below
@@ -3884,8 +3815,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 
                        if (btrfs_header_nritems(leaf) == 0) {
                                path->slots[1] = slot;
-                               ret = btrfs_del_leaf(trans, root, path,
-                                                    leaf->start);
+                               ret = btrfs_del_leaf(trans, root, path, leaf);
                                BUG_ON(ret);
                                free_extent_buffer(leaf);
                        } else {
@@ -4098,10 +4028,9 @@ out:
  * calling this function.
  */
 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
-                       struct btrfs_key *key, int lowest_level,
+                       struct btrfs_key *key, int level,
                        int cache_only, u64 min_trans)
 {
-       int level = lowest_level;
        int slot;
        struct extent_buffer *c;
 
@@ -4114,11 +4043,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
                c = path->nodes[level];
 next:
                if (slot >= btrfs_header_nritems(c)) {
-                       level++;
-                       if (level == BTRFS_MAX_LEVEL)
+                       int ret;
+                       int orig_lowest;
+                       struct btrfs_key cur_key;
+                       if (level + 1 >= BTRFS_MAX_LEVEL ||
+                           !path->nodes[level + 1])
                                return 1;
-                       continue;
+
+                       if (path->locks[level + 1]) {
+                               level++;
+                               continue;
+                       }
+
+                       slot = btrfs_header_nritems(c) - 1;
+                       if (level == 0)
+                               btrfs_item_key_to_cpu(c, &cur_key, slot);
+                       else
+                               btrfs_node_key_to_cpu(c, &cur_key, slot);
+
+                       orig_lowest = path->lowest_level;
+                       btrfs_release_path(root, path);
+                       path->lowest_level = level;
+                       ret = btrfs_search_slot(NULL, root, &cur_key, path,
+                                               0, 0);
+                       path->lowest_level = orig_lowest;
+                       if (ret < 0)
+                               return ret;
+
+                       c = path->nodes[level];
+                       slot = path->slots[level];
+                       if (ret == 0)
+                               slot++;
+                       goto next;
                }
+
                if (level == 0)
                        btrfs_item_key_to_cpu(c, key, slot);
                else {
@@ -4202,7 +4160,8 @@ again:
         * advance the path if there are now more items available.
         */
        if (nritems > 0 && path->slots[0] < nritems - 1) {
-               path->slots[0]++;
+               if (ret == 0)
+                       path->slots[0]++;
                ret = 0;
                goto done;
        }
@@ -4334,10 +4293,10 @@ int btrfs_previous_item(struct btrfs_root *root,
                        path->slots[0]--;
 
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-               if (found_key.type == type)
-                       return 0;
                if (found_key.objectid < min_objectid)
                        break;
+               if (found_key.type == type)
+                       return 0;
                if (found_key.objectid == min_objectid &&
                    found_key.type < type)
                        break;