Btrfs: use the commit_root for reading free_space_inode crcs
[pandora-kernel.git] / fs / btrfs / inode.c
index 7cd8ab0..4b9be28 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/posix_acl.h>
 #include <linux/falloc.h>
 #include <linux/slab.h>
+#include <linux/ratelimit.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -51,6 +52,7 @@
 #include "compression.h"
 #include "locking.h"
 #include "free-space-cache.h"
+#include "inode-map.h"
 
 struct btrfs_iget_args {
        u64 ino;
@@ -136,9 +138,8 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        path->leave_spinning = 1;
-       btrfs_set_trans_block_group(trans, inode);
 
-       key.objectid = inode->i_ino;
+       key.objectid = btrfs_ino(inode);
        key.offset = start;
        btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
        datasize = btrfs_file_extent_calc_inline_size(cur_size);
@@ -340,6 +341,10 @@ static noinline int compress_file_range(struct inode *inode,
        int will_compress;
        int compress_type = root->fs_info->compress_type;
 
+       /* if this is a small write inside eof, kick off a defragbot */
+       if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024)
+               btrfs_add_inode_defrag(NULL, inode);
+
        actual_end = min_t(u64, isize, end + 1);
 again:
        will_compress = 0;
@@ -420,9 +425,8 @@ again:
                }
        }
        if (start == 0) {
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
-               btrfs_set_trans_block_group(trans, inode);
                trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
                /* lets try to make an inline extent */
@@ -617,8 +621,9 @@ retry:
                            async_extent->start + async_extent->ram_size - 1,
                            GFP_NOFS);
 
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
+               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                ret = btrfs_reserve_extent(trans, root,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
@@ -649,7 +654,7 @@ retry:
                                        async_extent->start +
                                        async_extent->ram_size - 1, 0);
 
-               em = alloc_extent_map(GFP_NOFS);
+               em = alloc_extent_map();
                BUG_ON(!em);
                em->start = async_extent->start;
                em->len = async_extent->ram_size;
@@ -777,10 +782,9 @@ static noinline int cow_file_range(struct inode *inode,
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
 
-       BUG_ON(root == root->fs_info->tree_root);
-       trans = btrfs_join_transaction(root, 1);
+       BUG_ON(btrfs_is_free_space_inode(root, inode));
+       trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
-       btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -788,6 +792,10 @@ static noinline int cow_file_range(struct inode *inode,
        disk_num_bytes = num_bytes;
        ret = 0;
 
+       /* if this is a small write inside eof, kick off defrag */
+       if (end <= BTRFS_I(inode)->disk_i_size && num_bytes < 64 * 1024)
+               btrfs_add_inode_defrag(trans, inode);
+
        if (start == 0) {
                /* lets try to make an inline extent */
                ret = cow_file_range_inline(trans, root, inode,
@@ -826,7 +834,7 @@ static noinline int cow_file_range(struct inode *inode,
                                           (u64)-1, &ins, 1);
                BUG_ON(ret);
 
-               em = alloc_extent_map(GFP_NOFS);
+               em = alloc_extent_map();
                BUG_ON(!em);
                em->start = start;
                em->orig_start = em->start;
@@ -1008,7 +1016,7 @@ static noinline int csum_exist_in_range(struct btrfs_root *root,
        LIST_HEAD(list);
 
        ret = btrfs_lookup_csums_range(root->fs_info->csum_root, bytenr,
-                                      bytenr + num_bytes - 1, &list);
+                                      bytenr + num_bytes - 1, &list, 0);
        if (ret == 0 && list_empty(&list))
                return 0;
 
@@ -1049,29 +1057,33 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        int type;
        int nocow;
        int check_prev = 1;
-       bool nolock = false;
+       bool nolock;
+       u64 ino = btrfs_ino(inode);
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
-       if (root == root->fs_info->tree_root) {
-               nolock = true;
-               trans = btrfs_join_transaction_nolock(root, 1);
-       } else {
-               trans = btrfs_join_transaction(root, 1);
-       }
+
+       nolock = btrfs_is_free_space_inode(root, inode);
+
+       if (nolock)
+               trans = btrfs_join_transaction_nolock(root);
+       else
+               trans = btrfs_join_transaction(root);
+
        BUG_ON(IS_ERR(trans));
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        cow_start = (u64)-1;
        cur_offset = start;
        while (1) {
-               ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+               ret = btrfs_lookup_file_extent(trans, root, path, ino,
                                               cur_offset, 0);
                BUG_ON(ret < 0);
                if (ret > 0 && path->slots[0] > 0 && check_prev) {
                        leaf = path->nodes[0];
                        btrfs_item_key_to_cpu(leaf, &found_key,
                                              path->slots[0] - 1);
-                       if (found_key.objectid == inode->i_ino &&
+                       if (found_key.objectid == ino &&
                            found_key.type == BTRFS_EXTENT_DATA_KEY)
                                path->slots[0]--;
                }
@@ -1092,7 +1104,7 @@ next_slot:
                num_bytes = 0;
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
-               if (found_key.objectid > inode->i_ino ||
+               if (found_key.objectid > ino ||
                    found_key.type > BTRFS_EXTENT_DATA_KEY ||
                    found_key.offset > end)
                        break;
@@ -1127,7 +1139,7 @@ next_slot:
                                goto out_check;
                        if (btrfs_extent_readonly(root, disk_bytenr))
                                goto out_check;
-                       if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
+                       if (btrfs_cross_ref_exist(trans, root, ino,
                                                  found_key.offset -
                                                  extent_offset, disk_bytenr))
                                goto out_check;
@@ -1164,7 +1176,7 @@ out_check:
                        goto next_slot;
                }
 
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
                if (cow_start != (u64)-1) {
                        ret = cow_file_range(inode, locked_page, cow_start,
                                        found_key.offset - 1, page_started,
@@ -1177,7 +1189,7 @@ out_check:
                        struct extent_map *em;
                        struct extent_map_tree *em_tree;
                        em_tree = &BTRFS_I(inode)->extent_tree;
-                       em = alloc_extent_map(GFP_NOFS);
+                       em = alloc_extent_map();
                        BUG_ON(!em);
                        em->start = cur_offset;
                        em->orig_start = em->start;
@@ -1222,7 +1234,7 @@ out_check:
                if (cur_offset > end)
                        break;
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        if (cur_offset <= end && cow_start == (u64)-1)
                cow_start = cur_offset;
@@ -1277,7 +1289,9 @@ static int btrfs_split_extent_hook(struct inode *inode,
        if (!(orig->state & EXTENT_DELALLOC))
                return 0;
 
-       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents++;
+       spin_unlock(&BTRFS_I(inode)->lock);
        return 0;
 }
 
@@ -1295,7 +1309,9 @@ static int btrfs_merge_extent_hook(struct inode *inode,
        if (!(other->state & EXTENT_DELALLOC))
                return 0;
 
-       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents--;
+       spin_unlock(&BTRFS_I(inode)->lock);
        return 0;
 }
 
@@ -1310,19 +1326,21 @@ static int btrfs_set_bit_hook(struct inode *inode,
 
        /*
         * set_bit and clear bit hooks normally require _irqsave/restore
-        * but in this case, we are only testeing for the DELALLOC
+        * but in this case, we are only testing for the DELALLOC
         * bit, which is only set or cleared with irqs on
         */
        if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               int do_list = (root->root_key.objectid !=
-                              BTRFS_ROOT_TREE_OBJECTID);
+               bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-               if (*bits & EXTENT_FIRST_DELALLOC)
+               if (*bits & EXTENT_FIRST_DELALLOC) {
                        *bits &= ~EXTENT_FIRST_DELALLOC;
-               else
-                       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+               } else {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents++;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
 
                spin_lock(&root->fs_info->delalloc_lock);
                BTRFS_I(inode)->delalloc_bytes += len;
@@ -1344,19 +1362,21 @@ static int btrfs_clear_bit_hook(struct inode *inode,
 {
        /*
         * set_bit and clear bit hooks normally require _irqsave/restore
-        * but in this case, we are only testeing for the DELALLOC
+        * but in this case, we are only testing for the DELALLOC
         * bit, which is only set or cleared with irqs on
         */
        if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                struct btrfs_root *root = BTRFS_I(inode)->root;
                u64 len = state->end + 1 - state->start;
-               int do_list = (root->root_key.objectid !=
-                              BTRFS_ROOT_TREE_OBJECTID);
+               bool do_list = !btrfs_is_free_space_inode(root, inode);
 
-               if (*bits & EXTENT_FIRST_DELALLOC)
+               if (*bits & EXTENT_FIRST_DELALLOC) {
                        *bits &= ~EXTENT_FIRST_DELALLOC;
-               else if (!(*bits & EXTENT_DO_ACCOUNTING))
-                       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+               } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->outstanding_extents--;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+               }
 
                if (*bits & EXTENT_DO_ACCOUNTING)
                        btrfs_delalloc_release_metadata(inode, len);
@@ -1458,7 +1478,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-       if (root == root->fs_info->tree_root)
+       if (btrfs_is_free_space_inode(root, inode))
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
        else
                ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1500,8 +1520,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 {
        struct btrfs_ordered_sum *sum;
 
-       btrfs_set_trans_block_group(trans, inode);
-
        list_for_each_entry(sum, list, list) {
                btrfs_csum_file_blocks(trans,
                       BTRFS_I(inode)->root->fs_info->csum_root, sum);
@@ -1644,7 +1662,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
                                 &hint, 0);
        BUG_ON(ret);
 
-       ins.objectid = inode->i_ino;
+       ins.objectid = btrfs_ino(inode);
        ins.offset = file_pos;
        ins.type = BTRFS_EXTENT_DATA_KEY;
        ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi));
@@ -1675,7 +1693,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        ins.type = BTRFS_EXTENT_ITEM_KEY;
        ret = btrfs_alloc_reserved_file_extent(trans, root,
                                        root->root_key.objectid,
-                                       inode->i_ino, file_pos, &ins);
+                                       btrfs_ino(inode), file_pos, &ins);
        BUG_ON(ret);
        btrfs_free_path(path);
 
@@ -1701,7 +1719,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        struct extent_state *cached_state = NULL;
        int compress_type = 0;
        int ret;
-       bool nolock = false;
+       bool nolock;
 
        ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
                                             end - start + 1);
@@ -1709,18 +1727,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                return 0;
        BUG_ON(!ordered_extent);
 
-       nolock = (root == root->fs_info->tree_root);
+       nolock = btrfs_is_free_space_inode(root, inode);
 
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list));
                ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
                if (!ret) {
                        if (nolock)
-                               trans = btrfs_join_transaction_nolock(root, 1);
+                               trans = btrfs_join_transaction_nolock(root);
                        else
-                               trans = btrfs_join_transaction(root, 1);
+                               trans = btrfs_join_transaction(root);
                        BUG_ON(IS_ERR(trans));
-                       btrfs_set_trans_block_group(trans, inode);
                        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                        ret = btrfs_update_inode(trans, root, inode);
                        BUG_ON(ret);
@@ -1733,11 +1750,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                         0, &cached_state, GFP_NOFS);
 
        if (nolock)
-               trans = btrfs_join_transaction_nolock(root, 1);
+               trans = btrfs_join_transaction_nolock(root);
        else
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
-       btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -1855,7 +1871,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
                }
                read_unlock(&em_tree->lock);
 
-               if (!em || IS_ERR(em)) {
+               if (IS_ERR_OR_NULL(em)) {
                        kfree(failrec);
                        return -EIO;
                }
@@ -1971,7 +1987,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        }
 
        if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
-               return 0;
+               goto good;
 
        if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID &&
            test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {
@@ -2004,12 +2020,11 @@ good:
        return 0;
 
 zeroit:
-       if (printk_ratelimit()) {
-               printk(KERN_INFO "btrfs csum failed ino %lu off %llu csum %u "
-                      "private %llu\n", page->mapping->host->i_ino,
+       printk_ratelimited(KERN_INFO "btrfs csum failed ino %llu off %llu csum %u "
+                      "private %llu\n",
+                      (unsigned long long)btrfs_ino(page->mapping->host),
                       (unsigned long long)start, csum,
                       (unsigned long long)private);
-       }
        memset(kaddr + offset, 1, end - start + 1);
        flush_dcache_page(page);
        kunmap_atomic(kaddr, KM_USER0);
@@ -2244,7 +2259,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
 
        /* insert an orphan item to track this unlinked/truncated file */
        if (insert >= 1) {
-               ret = btrfs_insert_orphan_item(trans, root, inode->i_ino);
+               ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
                BUG_ON(ret);
        }
 
@@ -2281,7 +2296,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
        spin_unlock(&root->orphan_lock);
 
        if (trans && delete_item) {
-               ret = btrfs_del_orphan_item(trans, root, inode->i_ino);
+               ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
                BUG_ON(ret);
        }
 
@@ -2346,7 +2361,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                        break;
 
                /* release the path since we're done with it */
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
 
                /*
                 * this is where we are basically btrfs_lookup, without the
@@ -2413,7 +2428,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                        (u64)-1);
 
        if (root->orphan_block_rsv || root->orphan_item_inserted) {
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
                if (!IS_ERR(trans))
                        btrfs_end_transaction(trans, root);
        }
@@ -2493,12 +2508,17 @@ static void btrfs_read_locked_inode(struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key location;
        int maybe_acls;
-       u64 alloc_group_block;
        u32 rdev;
        int ret;
+       bool filled = false;
+
+       ret = btrfs_fill_inode(inode, &rdev);
+       if (!ret)
+               filled = true;
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
+       path->leave_spinning = 1;
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -2506,9 +2526,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
                goto make_bad;
 
        leaf = path->nodes[0];
+
+       if (filled)
+               goto cache_acl;
+
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
-
        inode->i_mode = btrfs_inode_mode(leaf, inode_item);
        inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
        inode->i_uid = btrfs_inode_uid(leaf, inode_item);
@@ -2536,21 +2559,17 @@ static void btrfs_read_locked_inode(struct inode *inode)
 
        BTRFS_I(inode)->index_cnt = (u64)-1;
        BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
-
-       alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
-
+cache_acl:
        /*
         * try to precache a NULL acl entry for files that don't have
         * any xattrs or acls
         */
-       maybe_acls = acls_after_inode_item(leaf, path->slots[0], inode->i_ino);
+       maybe_acls = acls_after_inode_item(leaf, path->slots[0],
+                                          btrfs_ino(inode));
        if (!maybe_acls)
                cache_no_acl(inode);
 
-       BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
-                                               alloc_group_block, 0);
        btrfs_free_path(path);
-       inode_item = NULL;
 
        switch (inode->i_mode & S_IFMT) {
        case S_IFREG:
@@ -2594,13 +2613,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                            struct btrfs_inode_item *item,
                            struct inode *inode)
 {
-       if (!leaf->map_token)
-               map_private_extent_buffer(leaf, (unsigned long)item,
-                                         sizeof(struct btrfs_inode_item),
-                                         &leaf->map_token, &leaf->kaddr,
-                                         &leaf->map_start, &leaf->map_len,
-                                         KM_USER1);
-
        btrfs_set_inode_uid(leaf, item, inode->i_uid);
        btrfs_set_inode_gid(leaf, item, inode->i_gid);
        btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2628,12 +2640,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_inode_transid(leaf, item, trans->transid);
        btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
        btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
-       btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
-
-       if (leaf->map_token) {
-               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
-               leaf->map_token = NULL;
-       }
+       btrfs_set_inode_block_group(leaf, item, 0);
 }
 
 /*
@@ -2647,11 +2654,28 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        int ret;
 
+       /*
+        * If the inode is a free space inode, we can deadlock during commit
+        * if we put it into the delayed code.
+        *
+        * The data relocation inode should also be directly updated
+        * without delay
+        */
+       if (!btrfs_is_free_space_inode(root, inode)
+           && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+               ret = btrfs_delayed_update_inode(trans, root, inode);
+               if (!ret)
+                       btrfs_set_inode_last_trans(trans, inode);
+               return ret;
+       }
+
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
+
        path->leave_spinning = 1;
-       ret = btrfs_lookup_inode(trans, root, path,
-                                &BTRFS_I(inode)->location, 1);
+       ret = btrfs_lookup_inode(trans, root, path, &BTRFS_I(inode)->location,
+                                1);
        if (ret) {
                if (ret > 0)
                        ret = -ENOENT;
@@ -2661,7 +2685,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
        btrfs_unlock_up_safe(path, 1);
        leaf = path->nodes[0];
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
-                                 struct btrfs_inode_item);
+                                   struct btrfs_inode_item);
 
        fill_inode_item(trans, leaf, inode_item, inode);
        btrfs_mark_buffer_dirty(leaf);
@@ -2672,7 +2696,6 @@ failed:
        return ret;
 }
 
-
 /*
  * unlink helper that gets used here in inode.c and in the tree logging
  * recovery code.  It remove a link in a directory with a given name, and
@@ -2689,6 +2712,8 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        struct btrfs_dir_item *di;
        struct btrfs_key key;
        u64 index;
+       u64 ino = btrfs_ino(inode);
+       u64 dir_ino = btrfs_ino(dir);
 
        path = btrfs_alloc_path();
        if (!path) {
@@ -2697,7 +2722,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        }
 
        path->leave_spinning = 1;
-       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+       di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
                                    name, name_len, -1);
        if (IS_ERR(di)) {
                ret = PTR_ERR(di);
@@ -2712,33 +2737,23 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        ret = btrfs_delete_one_dir_name(trans, root, path, di);
        if (ret)
                goto err;
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
-       ret = btrfs_del_inode_ref(trans, root, name, name_len,
-                                 inode->i_ino,
-                                 dir->i_ino, &index);
+       ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,
+                                 dir_ino, &index);
        if (ret) {
                printk(KERN_INFO "btrfs failed to delete reference to %.*s, "
-                      "inode %lu parent %lu\n", name_len, name,
-                      inode->i_ino, dir->i_ino);
+                      "inode %llu parent %llu\n", name_len, name,
+                      (unsigned long long)ino, (unsigned long long)dir_ino);
                goto err;
        }
 
-       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
-                                        index, name, name_len, -1);
-       if (IS_ERR(di)) {
-               ret = PTR_ERR(di);
-               goto err;
-       }
-       if (!di) {
-               ret = -ENOENT;
+       ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
+       if (ret)
                goto err;
-       }
-       ret = btrfs_delete_one_dir_name(trans, root, path, di);
-       btrfs_release_path(root, path);
 
        ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
-                                        inode, dir->i_ino);
+                                        inode, dir_ino);
        BUG_ON(ret != 0 && ret != -ENOENT);
 
        ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
@@ -2816,12 +2831,14 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        int check_link = 1;
        int err = -ENOSPC;
        int ret;
+       u64 ino = btrfs_ino(inode);
+       u64 dir_ino = btrfs_ino(dir);
 
        trans = btrfs_start_transaction(root, 10);
        if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
                return trans;
 
-       if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+       if (ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
                return ERR_PTR(-ENOSPC);
 
        /* check if there is someone else holds reference */
@@ -2862,7 +2879,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        } else {
                check_link = 0;
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        ret = btrfs_lookup_inode(trans, root, path,
                                &BTRFS_I(inode)->location, 0);
@@ -2876,11 +2893,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        } else {
                check_link = 0;
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        if (ret == 0 && S_ISREG(inode->i_mode)) {
                ret = btrfs_lookup_file_extent(trans, root, path,
-                                              inode->i_ino, (u64)-1, 0);
+                                              ino, (u64)-1, 0);
                if (ret < 0) {
                        err = ret;
                        goto out;
@@ -2888,7 +2905,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
                BUG_ON(ret == 0);
                if (check_path_shared(root, path))
                        goto out;
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
        }
 
        if (!check_link) {
@@ -2896,7 +2913,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
                goto out;
        }
 
-       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+       di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
                                dentry->d_name.name, dentry->d_name.len, 0);
        if (IS_ERR(di)) {
                err = PTR_ERR(di);
@@ -2909,11 +2926,11 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
                err = 0;
                goto out;
        }
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        ref = btrfs_lookup_inode_ref(trans, root, path,
                                dentry->d_name.name, dentry->d_name.len,
-                               inode->i_ino, dir->i_ino, 0);
+                               ino, dir_ino, 0);
        if (IS_ERR(ref)) {
                err = PTR_ERR(ref);
                goto out;
@@ -2922,9 +2939,17 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        if (check_path_shared(root, path))
                goto out;
        index = btrfs_inode_ref_index(path->nodes[0], ref);
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
-       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index,
+       /*
+        * This is a commit root search, if we can lookup inode item and other
+        * relative items in the commit root, it means the transaction of
+        * dir/file creation has been committed, and the dir index item that we
+        * delay to insert has also been inserted into the commit root. So
+        * we needn't worry about the delayed insertion of the dir index item
+        * here.
+        */
+       di = btrfs_lookup_dir_index_item(trans, root, path, dir_ino, index,
                                dentry->d_name.name, dentry->d_name.len, 0);
        if (IS_ERR(di)) {
                err = PTR_ERR(di);
@@ -2969,8 +2994,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
-
        btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
 
        ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
@@ -2999,47 +3022,41 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
        struct btrfs_key key;
        u64 index;
        int ret;
+       u64 dir_ino = btrfs_ino(dir);
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
+       di = btrfs_lookup_dir_item(trans, root, path, dir_ino,
                                   name, name_len, -1);
-       BUG_ON(!di || IS_ERR(di));
+       BUG_ON(IS_ERR_OR_NULL(di));
 
        leaf = path->nodes[0];
        btrfs_dir_item_key_to_cpu(leaf, di, &key);
        WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
        ret = btrfs_delete_one_dir_name(trans, root, path, di);
        BUG_ON(ret);
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
 
        ret = btrfs_del_root_ref(trans, root->fs_info->tree_root,
                                 objectid, root->root_key.objectid,
-                                dir->i_ino, &index, name, name_len);
+                                dir_ino, &index, name, name_len);
        if (ret < 0) {
                BUG_ON(ret != -ENOENT);
-               di = btrfs_search_dir_index_item(root, path, dir->i_ino,
+               di = btrfs_search_dir_index_item(root, path, dir_ino,
                                                 name, name_len);
-               BUG_ON(!di || IS_ERR(di));
+               BUG_ON(IS_ERR_OR_NULL(di));
 
                leaf = path->nodes[0];
                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-               btrfs_release_path(root, path);
+               btrfs_release_path(path);
                index = key.offset;
        }
+       btrfs_release_path(path);
 
-       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
-                                        index, name, name_len, -1);
-       BUG_ON(!di || IS_ERR(di));
-
-       leaf = path->nodes[0];
-       btrfs_dir_item_key_to_cpu(leaf, di, &key);
-       WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid);
-       ret = btrfs_delete_one_dir_name(trans, root, path, di);
+       ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);
        BUG_ON(ret);
-       btrfs_release_path(root, path);
 
        btrfs_i_size_write(dir, dir->i_size - name_len * 2);
        dir->i_mtime = dir->i_ctime = CURRENT_TIME;
@@ -3059,16 +3076,14 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        unsigned long nr = 0;
 
        if (inode->i_size > BTRFS_EMPTY_DIR_SIZE ||
-           inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+           btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID)
                return -ENOTEMPTY;
 
        trans = __unlink_start_trans(dir, dentry);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
-
-       if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
+       if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
                err = btrfs_unlink_subvol(trans, root, dir,
                                          BTRFS_I(inode)->location.objectid,
                                          dentry->d_name.name,
@@ -3093,178 +3108,6 @@ out:
        return err;
 }
 
-#if 0
-/*
- * when truncating bytes in a file, it is possible to avoid reading
- * the leaves that contain only checksum items.  This can be the
- * majority of the IO required to delete a large file, but it must
- * be done carefully.
- *
- * The keys in the level just above the leaves are checked to make sure
- * the lowest key in a given leaf is a csum key, and starts at an offset
- * after the new  size.
- *
- * Then the key for the next leaf is checked to make sure it also has
- * a checksum item for the same file.  If it does, we know our target leaf
- * contains only checksum items, and it can be safely freed without reading
- * it.
- *
- * This is just an optimization targeted at large files.  It may do
- * nothing.  It will return 0 unless things went badly.
- */
-static noinline int drop_csum_leaves(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *root,
-                                    struct btrfs_path *path,
-                                    struct inode *inode, u64 new_size)
-{
-       struct btrfs_key key;
-       int ret;
-       int nritems;
-       struct btrfs_key found_key;
-       struct btrfs_key other_key;
-       struct btrfs_leaf_ref *ref;
-       u64 leaf_gen;
-       u64 leaf_start;
-
-       path->lowest_level = 1;
-       key.objectid = inode->i_ino;
-       key.type = BTRFS_CSUM_ITEM_KEY;
-       key.offset = new_size;
-again:
-       ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-       if (ret < 0)
-               goto out;
-
-       if (path->nodes[1] == NULL) {
-               ret = 0;
-               goto out;
-       }
-       ret = 0;
-       btrfs_node_key_to_cpu(path->nodes[1], &found_key, path->slots[1]);
-       nritems = btrfs_header_nritems(path->nodes[1]);
-
-       if (!nritems)
-               goto out;
-
-       if (path->slots[1] >= nritems)
-               goto next_node;
-
-       /* did we find a key greater than anything we want to delete? */
-       if (found_key.objectid > inode->i_ino ||
-          (found_key.objectid == inode->i_ino && found_key.type > key.type))
-               goto out;
-
-       /* we check the next key in the node to make sure the leave contains
-        * only checksum items.  This comparison doesn't work if our
-        * leaf is the last one in the node
-        */
-       if (path->slots[1] + 1 >= nritems) {
-next_node:
-               /* search forward from the last key in the node, this
-                * will bring us into the next node in the tree
-                */
-               btrfs_node_key_to_cpu(path->nodes[1], &found_key, nritems - 1);
-
-               /* unlikely, but we inc below, so check to be safe */
-               if (found_key.offset == (u64)-1)
-                       goto out;
-
-               /* search_forward needs a path with locks held, do the
-                * search again for the original key.  It is possible
-                * this will race with a balance and return a path that
-                * we could modify, but this drop is just an optimization
-                * and is allowed to miss some leaves.
-                */
-               btrfs_release_path(root, path);
-               found_key.offset++;
-
-               /* setup a max key for search_forward */
-               other_key.offset = (u64)-1;
-               other_key.type = key.type;
-               other_key.objectid = key.objectid;
-
-               path->keep_locks = 1;
-               ret = btrfs_search_forward(root, &found_key, &other_key,
-                                          path, 0, 0);
-               path->keep_locks = 0;
-               if (ret || found_key.objectid != key.objectid ||
-                   found_key.type != key.type) {
-                       ret = 0;
-                       goto out;
-               }
-
-               key.offset = found_key.offset;
-               btrfs_release_path(root, path);
-               cond_resched();
-               goto again;
-       }
-
-       /* we know there's one more slot after us in the tree,
-        * read that key so we can verify it is also a checksum item
-        */
-       btrfs_node_key_to_cpu(path->nodes[1], &other_key, path->slots[1] + 1);
-
-       if (found_key.objectid < inode->i_ino)
-               goto next_key;
-
-       if (found_key.type != key.type || found_key.offset < new_size)
-               goto next_key;
-
-       /*
-        * if the key for the next leaf isn't a csum key from this objectid,
-        * we can't be sure there aren't good items inside this leaf.
-        * Bail out
-        */
-       if (other_key.objectid != inode->i_ino || other_key.type != key.type)
-               goto out;
-
-       leaf_start = btrfs_node_blockptr(path->nodes[1], path->slots[1]);
-       leaf_gen = btrfs_node_ptr_generation(path->nodes[1], path->slots[1]);
-       /*
-        * it is safe to delete this leaf, it contains only
-        * csum items from this inode at an offset >= new_size
-        */
-       ret = btrfs_del_leaf(trans, root, path, leaf_start);
-       BUG_ON(ret);
-
-       if (root->ref_cows && leaf_gen < trans->transid) {
-               ref = btrfs_alloc_leaf_ref(root, 0);
-               if (ref) {
-                       ref->root_gen = root->root_key.offset;
-                       ref->bytenr = leaf_start;
-                       ref->owner = 0;
-                       ref->generation = leaf_gen;
-                       ref->nritems = 0;
-
-                       btrfs_sort_leaf_ref(ref);
-
-                       ret = btrfs_add_leaf_ref(root, ref, 0);
-                       WARN_ON(ret);
-                       btrfs_free_leaf_ref(root, ref);
-               } else {
-                       WARN_ON(1);
-               }
-       }
-next_key:
-       btrfs_release_path(root, path);
-
-       if (other_key.objectid == inode->i_ino &&
-           other_key.type == key.type && other_key.offset > key.offset) {
-               key.offset = other_key.offset;
-               cond_resched();
-               goto again;
-       }
-       ret = 0;
-out:
-       /* fixup any changes we've made to the path */
-       path->lowest_level = 0;
-       path->keep_locks = 0;
-       btrfs_release_path(root, path);
-       return ret;
-}
-
-#endif
-
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
@@ -3300,17 +3143,27 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        int encoding;
        int ret;
        int err = 0;
+       u64 ino = btrfs_ino(inode);
 
        BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
 
        if (root->ref_cows || root == root->fs_info->tree_root)
                btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
 
+       /*
+        * This function is also used to drop the items in the log tree before
+        * we relog the inode, so if root != BTRFS_I(inode)->root, it means
+        * it is used to drop the loged items. So we shouldn't kill the delayed
+        * items.
+        */
+       if (min_type == 0 && root == BTRFS_I(inode)->root)
+               btrfs_kill_delayed_inode_items(inode);
+
        path = btrfs_alloc_path();
        BUG_ON(!path);
        path->reada = -1;
 
-       key.objectid = inode->i_ino;
+       key.objectid = ino;
        key.offset = (u64)-1;
        key.type = (u8)-1;
 
@@ -3338,7 +3191,7 @@ search_again:
                found_type = btrfs_key_type(&found_key);
                encoding = 0;
 
-               if (found_key.objectid != inode->i_ino)
+               if (found_key.objectid != ino)
                        break;
 
                if (found_type < min_type)
@@ -3428,7 +3281,6 @@ search_again:
                                    btrfs_file_extent_calc_inline_size(size);
                                ret = btrfs_truncate_item(trans, root, path,
                                                          size, 1);
-                               BUG_ON(ret);
                        } else if (root->ref_cows) {
                                inode_sub_bytes(inode, item_end + 1 -
                                                found_key.offset);
@@ -3457,7 +3309,7 @@ delete:
                        ret = btrfs_free_extent(trans, root, extent_start,
                                                extent_num_bytes, 0,
                                                btrfs_header_owner(leaf),
-                                               inode->i_ino, extent_offset);
+                                               ino, extent_offset);
                        BUG_ON(ret);
                }
 
@@ -3466,7 +3318,9 @@ delete:
 
                if (path->slots[0] == 0 ||
                    path->slots[0] != pending_del_slot) {
-                       if (root->ref_cows) {
+                       if (root->ref_cows &&
+                           BTRFS_I(inode)->location.objectid !=
+                                               BTRFS_FREE_INO_OBJECTID) {
                                err = -EAGAIN;
                                goto out;
                        }
@@ -3477,7 +3331,7 @@ delete:
                                BUG_ON(ret);
                                pending_del_nr = 0;
                        }
-                       btrfs_release_path(root, path);
+                       btrfs_release_path(path);
                        goto search_again;
                } else {
                        path->slots[0]--;
@@ -3521,7 +3375,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 
        ret = -ENOMEM;
 again:
-       page = grab_cache_page(mapping, index);
+       page = find_or_create_page(mapping, index, GFP_NOFS);
        if (!page) {
                btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
                goto out;
@@ -3635,7 +3489,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
        while (1) {
                em = btrfs_get_extent(inode, NULL, 0, cur_offset,
                                block_end - cur_offset, 0);
-               BUG_ON(IS_ERR(em) || !em);
+               BUG_ON(IS_ERR_OR_NULL(em));
                last_byte = min(extent_map_end(em), block_end);
                last_byte = (last_byte + mask) & ~mask;
                if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
@@ -3647,7 +3501,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                err = PTR_ERR(trans);
                                break;
                        }
-                       btrfs_set_trans_block_group(trans, inode);
 
                        err = btrfs_drop_extents(trans, inode, cur_offset,
                                                 cur_offset + hole_size,
@@ -3656,7 +3509,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                break;
 
                        err = btrfs_insert_file_extent(trans, root,
-                                       inode->i_ino, cur_offset, 0,
+                                       btrfs_ino(inode), cur_offset, 0,
                                        0, hole_size, 0, hole_size,
                                        0, 0, 0);
                        if (err)
@@ -3758,7 +3611,7 @@ void btrfs_evict_inode(struct inode *inode)
 
        truncate_inode_pages(&inode->i_data, 0);
        if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
-                              root == root->fs_info->tree_root))
+                              btrfs_is_free_space_inode(root, inode)))
                goto no_delete;
 
        if (is_bad_inode(inode)) {
@@ -3781,9 +3634,8 @@ void btrfs_evict_inode(struct inode *inode)
        btrfs_i_size_write(inode, 0);
 
        while (1) {
-               trans = btrfs_start_transaction(root, 0);
+               trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
-               btrfs_set_trans_block_group(trans, inode);
                trans->block_rsv = root->orphan_block_rsv;
 
                ret = btrfs_block_rsv_check(trans, root,
@@ -3811,6 +3663,10 @@ void btrfs_evict_inode(struct inode *inode)
                BUG_ON(ret);
        }
 
+       if (!(root == root->fs_info->tree_root ||
+             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
+               btrfs_return_ino(root, btrfs_ino(inode));
+
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root, nr);
@@ -3836,12 +3692,12 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
-       di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
+       di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
                                    namelen, 0);
        if (IS_ERR(di))
                ret = PTR_ERR(di);
 
-       if (!di || IS_ERR(di))
+       if (IS_ERR_OR_NULL(di))
                goto out_err;
 
        btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
@@ -3889,7 +3745,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
 
        leaf = path->nodes[0];
        ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref);
-       if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino ||
+       if (btrfs_root_ref_dirid(leaf, ref) != btrfs_ino(dir) ||
            btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len)
                goto out;
 
@@ -3899,7 +3755,7 @@ static int fixup_tree_root_location(struct btrfs_root *root,
        if (ret)
                goto out;
 
-       btrfs_release_path(root->fs_info->tree_root, path);
+       btrfs_release_path(path);
 
        new_root = btrfs_read_fs_root_no_name(root->fs_info, location);
        if (IS_ERR(new_root)) {
@@ -3928,6 +3784,7 @@ static void inode_tree_add(struct inode *inode)
        struct btrfs_inode *entry;
        struct rb_node **p;
        struct rb_node *parent;
+       u64 ino = btrfs_ino(inode);
 again:
        p = &root->inode_tree.rb_node;
        parent = NULL;
@@ -3940,9 +3797,9 @@ again:
                parent = *p;
                entry = rb_entry(parent, struct btrfs_inode, rb_node);
 
-               if (inode->i_ino < entry->vfs_inode.i_ino)
+               if (ino < btrfs_ino(&entry->vfs_inode))
                        p = &parent->rb_left;
-               else if (inode->i_ino > entry->vfs_inode.i_ino)
+               else if (ino > btrfs_ino(&entry->vfs_inode))
                        p = &parent->rb_right;
                else {
                        WARN_ON(!(entry->vfs_inode.i_state &
@@ -4006,9 +3863,9 @@ again:
                prev = node;
                entry = rb_entry(node, struct btrfs_inode, rb_node);
 
-               if (objectid < entry->vfs_inode.i_ino)
+               if (objectid < btrfs_ino(&entry->vfs_inode))
                        node = node->rb_left;
-               else if (objectid > entry->vfs_inode.i_ino)
+               else if (objectid > btrfs_ino(&entry->vfs_inode))
                        node = node->rb_right;
                else
                        break;
@@ -4016,7 +3873,7 @@ again:
        if (!node) {
                while (prev) {
                        entry = rb_entry(prev, struct btrfs_inode, rb_node);
-                       if (objectid <= entry->vfs_inode.i_ino) {
+                       if (objectid <= btrfs_ino(&entry->vfs_inode)) {
                                node = prev;
                                break;
                        }
@@ -4025,7 +3882,7 @@ again:
        }
        while (node) {
                entry = rb_entry(node, struct btrfs_inode, rb_node);
-               objectid = entry->vfs_inode.i_ino + 1;
+               objectid = btrfs_ino(&entry->vfs_inode) + 1;
                inode = igrab(&entry->vfs_inode);
                if (inode) {
                        spin_unlock(&root->inode_lock);
@@ -4063,7 +3920,7 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
 static int btrfs_find_actor(struct inode *inode, void *opaque)
 {
        struct btrfs_iget_args *args = opaque;
-       return args->ino == inode->i_ino &&
+       return args->ino == btrfs_ino(inode) &&
                args->root == BTRFS_I(inode)->root;
 }
 
@@ -4208,7 +4065,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
        return d_splice_alias(inode, dentry);
 }
 
-static unsigned char btrfs_filetype_table[] = {
+unsigned char btrfs_filetype_table[] = {
        DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 };
 
@@ -4222,6 +4079,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
+       struct list_head ins_list;
+       struct list_head del_list;
        int ret;
        struct extent_buffer *leaf;
        int slot;
@@ -4234,6 +4093,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        char tmp_name[32];
        char *name_ptr;
        int name_len;
+       int is_curr = 0;        /* filp->f_pos points to the current index? */
 
        /* FIXME, use a real flag for deciding about the key type */
        if (root->fs_info->tree_root == root)
@@ -4241,9 +4101,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 
        /* special case for "." */
        if (filp->f_pos == 0) {
-               over = filldir(dirent, ".", 1,
-                              1, inode->i_ino,
-                              DT_DIR);
+               over = filldir(dirent, ".", 1, 1, btrfs_ino(inode), DT_DIR);
                if (over)
                        return 0;
                filp->f_pos = 1;
@@ -4258,11 +4116,20 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                filp->f_pos = 2;
        }
        path = btrfs_alloc_path();
-       path->reada = 2;
+       if (!path)
+               return -ENOMEM;
+
+       path->reada = 1;
+
+       if (key_type == BTRFS_DIR_INDEX_KEY) {
+               INIT_LIST_HEAD(&ins_list);
+               INIT_LIST_HEAD(&del_list);
+               btrfs_get_delayed_items(inode, &ins_list, &del_list);
+       }
 
        btrfs_set_key_type(&key, key_type);
        key.offset = filp->f_pos;
-       key.objectid = inode->i_ino;
+       key.objectid = btrfs_ino(inode);
 
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
@@ -4289,8 +4156,13 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                        break;
                if (found_key.offset < filp->f_pos)
                        goto next;
+               if (key_type == BTRFS_DIR_INDEX_KEY &&
+                   btrfs_should_delete_dir_index(&del_list,
+                                                 found_key.offset))
+                       goto next;
 
                filp->f_pos = found_key.offset;
+               is_curr = 1;
 
                di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
                di_cur = 0;
@@ -4345,6 +4217,15 @@ next:
                path->slots[0]++;
        }
 
+       if (key_type == BTRFS_DIR_INDEX_KEY) {
+               if (is_curr)
+                       filp->f_pos++;
+               ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir,
+                                                     &ins_list);
+               if (ret)
+                       goto nopos;
+       }
+
        /* Reached end of directory/root. Bump pos past the last item. */
        if (key_type == BTRFS_DIR_INDEX_KEY)
                /*
@@ -4357,6 +4238,8 @@ next:
 nopos:
        ret = 0;
 err:
+       if (key_type == BTRFS_DIR_INDEX_KEY)
+               btrfs_put_delayed_items(&ins_list, &del_list);
        btrfs_free_path(path);
        return ret;
 }
@@ -4371,17 +4254,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        if (BTRFS_I(inode)->dummy_inode)
                return 0;
 
-       smp_mb();
-       nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
+       if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
+               nolock = true;
 
        if (wbc->sync_mode == WB_SYNC_ALL) {
                if (nolock)
-                       trans = btrfs_join_transaction_nolock(root, 1);
+                       trans = btrfs_join_transaction_nolock(root);
                else
-                       trans = btrfs_join_transaction(root, 1);
+                       trans = btrfs_join_transaction(root);
                if (IS_ERR(trans))
                        return PTR_ERR(trans);
-               btrfs_set_trans_block_group(trans, inode);
                if (nolock)
                        ret = btrfs_end_transaction_nolock(trans, root);
                else
@@ -4405,9 +4287,8 @@ void btrfs_dirty_inode(struct inode *inode)
        if (BTRFS_I(inode)->dummy_inode)
                return;
 
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
-       btrfs_set_trans_block_group(trans, inode);
 
        ret = btrfs_update_inode(trans, root, inode);
        if (ret && ret == -ENOSPC) {
@@ -4415,25 +4296,24 @@ void btrfs_dirty_inode(struct inode *inode)
                btrfs_end_transaction(trans, root);
                trans = btrfs_start_transaction(root, 1);
                if (IS_ERR(trans)) {
-                       if (printk_ratelimit()) {
-                               printk(KERN_ERR "btrfs: fail to "
-                                      "dirty  inode %lu error %ld\n",
-                                      inode->i_ino, PTR_ERR(trans));
-                       }
+                       printk_ratelimited(KERN_ERR "btrfs: fail to "
+                                      "dirty  inode %llu error %ld\n",
+                                      (unsigned long long)btrfs_ino(inode),
+                                      PTR_ERR(trans));
                        return;
                }
-               btrfs_set_trans_block_group(trans, inode);
 
                ret = btrfs_update_inode(trans, root, inode);
                if (ret) {
-                       if (printk_ratelimit()) {
-                               printk(KERN_ERR "btrfs: fail to "
-                                      "dirty  inode %lu error %d\n",
-                                      inode->i_ino, ret);
-                       }
+                       printk_ratelimited(KERN_ERR "btrfs: fail to "
+                                      "dirty  inode %llu error %d\n",
+                                      (unsigned long long)btrfs_ino(inode),
+                                      ret);
                }
        }
        btrfs_end_transaction(trans, root);
+       if (BTRFS_I(inode)->delayed_node)
+               btrfs_balance_delayed_items(root);
 }
 
 /*
@@ -4449,7 +4329,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
        struct extent_buffer *leaf;
        int ret;
 
-       key.objectid = inode->i_ino;
+       key.objectid = btrfs_ino(inode);
        btrfs_set_key_type(&key, BTRFS_DIR_INDEX_KEY);
        key.offset = (u64)-1;
 
@@ -4481,7 +4361,7 @@ static int btrfs_set_inode_index_count(struct inode *inode)
        leaf = path->nodes[0];
        btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
-       if (found_key.objectid != inode->i_ino ||
+       if (found_key.objectid != btrfs_ino(inode) ||
            btrfs_key_type(&found_key) != BTRFS_DIR_INDEX_KEY) {
                BTRFS_I(inode)->index_cnt = 2;
                goto out;
@@ -4502,9 +4382,12 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
        int ret = 0;
 
        if (BTRFS_I(dir)->index_cnt == (u64)-1) {
-               ret = btrfs_set_inode_index_count(dir);
-               if (ret)
-                       return ret;
+               ret = btrfs_inode_delayed_dir_index_count(dir);
+               if (ret) {
+                       ret = btrfs_set_inode_index_count(dir);
+                       if (ret)
+                               return ret;
+               }
        }
 
        *index = BTRFS_I(dir)->index_cnt;
@@ -4517,8 +4400,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct inode *dir,
                                     const char *name, int name_len,
-                                    u64 ref_objectid, u64 objectid,
-                                    u64 alloc_hint, int mode, u64 *index)
+                                    u64 ref_objectid, u64 objectid, int mode,
+                                    u64 *index)
 {
        struct inode *inode;
        struct btrfs_inode_item *inode_item;
@@ -4540,6 +4423,12 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                return ERR_PTR(-ENOMEM);
        }
 
+       /*
+        * we have to initialize this early, so we can reclaim the inode
+        * number if we fail afterwards in this function.
+        */
+       inode->i_ino = objectid;
+
        if (dir) {
                trace_btrfs_inode_request(dir);
 
@@ -4565,8 +4454,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                owner = 0;
        else
                owner = 1;
-       BTRFS_I(inode)->block_group =
-                       btrfs_find_block_group(root, 0, alloc_hint, owner);
 
        key[0].objectid = objectid;
        btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -4585,7 +4472,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                goto fail;
 
        inode_init_owner(inode, dir, mode);
-       inode->i_ino = objectid;
        inode_set_bytes(inode, 0);
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
        inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -4621,6 +4507,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        inode_tree_add(inode);
 
        trace_btrfs_inode_new(inode);
+       btrfs_set_inode_last_trans(trans, inode);
 
        return inode;
 fail:
@@ -4649,29 +4536,29 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
        int ret = 0;
        struct btrfs_key key;
        struct btrfs_root *root = BTRFS_I(parent_inode)->root;
+       u64 ino = btrfs_ino(inode);
+       u64 parent_ino = btrfs_ino(parent_inode);
 
-       if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+       if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
                memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key));
        } else {
-               key.objectid = inode->i_ino;
+               key.objectid = ino;
                btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
                key.offset = 0;
        }
 
-       if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+       if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
                ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
                                         key.objectid, root->root_key.objectid,
-                                        parent_inode->i_ino,
-                                        index, name, name_len);
+                                        parent_ino, index, name, name_len);
        } else if (add_backref) {
-               ret = btrfs_insert_inode_ref(trans, root,
-                                            name, name_len, inode->i_ino,
-                                            parent_inode->i_ino, index);
+               ret = btrfs_insert_inode_ref(trans, root, name, name_len, ino,
+                                            parent_ino, index);
        }
 
        if (ret == 0) {
                ret = btrfs_insert_dir_item(trans, root, name, name_len,
-                                           parent_inode->i_ino, &key,
+                                           parent_inode, &key,
                                            btrfs_inode_type(inode), index);
                BUG_ON(ret);
 
@@ -4714,10 +4601,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        if (!new_valid_dev(rdev))
                return -EINVAL;
 
-       err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
-       if (err)
-               return err;
-
        /*
         * 2 for inode item and ref
         * 2 for dir items
@@ -4727,11 +4610,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
+       err = btrfs_find_free_ino(root, &objectid);
+       if (err)
+               goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-                               dentry->d_name.len, dir->i_ino, objectid,
-                               BTRFS_I(dir)->block_group, mode, &index);
+                               dentry->d_name.len, btrfs_ino(dir), objectid,
+                               mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -4743,7 +4628,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       btrfs_set_trans_block_group(trans, inode);
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
@@ -4752,8 +4636,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                init_special_inode(inode, inode->i_mode, rdev);
                btrfs_update_inode(trans, root, inode);
        }
-       btrfs_update_inode_block_group(trans, inode);
-       btrfs_update_inode_block_group(trans, dir);
 out_unlock:
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
@@ -4777,9 +4659,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        u64 objectid;
        u64 index = 0;
 
-       err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
-       if (err)
-               return err;
        /*
         * 2 for inode item and ref
         * 2 for dir items
@@ -4789,11 +4668,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
+       err = btrfs_find_free_ino(root, &objectid);
+       if (err)
+               goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-                               dentry->d_name.len, dir->i_ino, objectid,
-                               BTRFS_I(dir)->block_group, mode, &index);
+                               dentry->d_name.len, btrfs_ino(dir), objectid,
+                               mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -4805,7 +4686,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       btrfs_set_trans_block_group(trans, inode);
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
@@ -4816,8 +4696,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                inode->i_op = &btrfs_file_inode_operations;
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
-       btrfs_update_inode_block_group(trans, inode);
-       btrfs_update_inode_block_group(trans, dir);
 out_unlock:
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
@@ -4864,8 +4742,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 
        btrfs_inc_nlink(inode);
        inode->i_ctime = CURRENT_TIME;
-
-       btrfs_set_trans_block_group(trans, dir);
        ihold(inode);
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
@@ -4874,7 +4750,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                drop_inode = 1;
        } else {
                struct dentry *parent = dget_parent(dentry);
-               btrfs_update_inode_block_group(trans, dir);
                err = btrfs_update_inode(trans, root, inode);
                BUG_ON(err);
                btrfs_log_new_name(trans, inode, NULL, parent);
@@ -4903,10 +4778,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        u64 index = 0;
        unsigned long nr = 1;
 
-       err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
-       if (err)
-               return err;
-
        /*
         * 2 items for inode and ref
         * 2 items for dir items
@@ -4915,12 +4786,14 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        trans = btrfs_start_transaction(root, 5);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
-       btrfs_set_trans_block_group(trans, dir);
+
+       err = btrfs_find_free_ino(root, &objectid);
+       if (err)
+               goto out_fail;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-                               dentry->d_name.len, dir->i_ino, objectid,
-                               BTRFS_I(dir)->block_group, S_IFDIR | mode,
-                               &index);
+                               dentry->d_name.len, btrfs_ino(dir), objectid,
+                               S_IFDIR | mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_fail;
@@ -4934,7 +4807,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
        inode->i_op = &btrfs_dir_inode_operations;
        inode->i_fop = &btrfs_dir_file_operations;
-       btrfs_set_trans_block_group(trans, inode);
 
        btrfs_i_size_write(inode, 0);
        err = btrfs_update_inode(trans, root, inode);
@@ -4948,8 +4820,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
        d_instantiate(dentry, inode);
        drop_on_err = 0;
-       btrfs_update_inode_block_group(trans, inode);
-       btrfs_update_inode_block_group(trans, dir);
 
 out_fail:
        nr = trans->blocks_used;
@@ -5041,7 +4911,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
        u64 bytenr;
        u64 extent_start = 0;
        u64 extent_end = 0;
-       u64 objectid = inode->i_ino;
+       u64 objectid = btrfs_ino(inode);
        u32 found_type;
        struct btrfs_path *path = NULL;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5069,7 +4939,7 @@ again:
                else
                        goto out;
        }
-       em = alloc_extent_map(GFP_NOFS);
+       em = alloc_extent_map();
        if (!em) {
                err = -ENOMEM;
                goto out;
@@ -5082,7 +4952,15 @@ again:
 
        if (!path) {
                path = btrfs_alloc_path();
-               BUG_ON(!path);
+               if (!path) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               /*
+                * Chances are we'll be called again, so go ahead and do
+                * readahead
+                */
+               path->reada = 1;
        }
 
        ret = btrfs_lookup_file_extent(trans, root, path,
@@ -5223,8 +5101,10 @@ again:
                                kunmap(page);
                                free_extent_map(em);
                                em = NULL;
-                               btrfs_release_path(root, path);
-                               trans = btrfs_join_transaction(root, 1);
+
+                               btrfs_release_path(path);
+                               trans = btrfs_join_transaction(root);
+
                                if (IS_ERR(trans))
                                        return ERR_CAST(trans);
                                goto again;
@@ -5249,7 +5129,7 @@ not_found_em:
        em->block_start = EXTENT_MAP_HOLE;
        set_bit(EXTENT_FLAG_VACANCY, &em->flags);
 insert:
-       btrfs_release_path(root, path);
+       btrfs_release_path(path);
        if (em->start > start || extent_map_end(em) <= start) {
                printk(KERN_ERR "Btrfs: bad extent! em: [%llu %llu] passed "
                       "[%llu %llu]\n", (unsigned long long)em->start,
@@ -5382,7 +5262,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
                u64 hole_start = start;
                u64 hole_len = len;
 
-               em = alloc_extent_map(GFP_NOFS);
+               em = alloc_extent_map();
                if (!em) {
                        err = -ENOMEM;
                        goto out;
@@ -5468,10 +5348,13 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
        }
 
-       trans = btrfs_join_transaction(root, 0);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return ERR_CAST(trans);
 
+       if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
+               btrfs_add_inode_defrag(trans, inode);
+
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5483,7 +5366,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
        }
 
        if (!em) {
-               em = alloc_extent_map(GFP_NOFS);
+               em = alloc_extent_map();
                if (!em) {
                        em = ERR_PTR(-ENOMEM);
                        goto out;
@@ -5549,7 +5432,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+       ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
                                       offset, 0);
        if (ret < 0)
                goto out;
@@ -5566,7 +5449,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
        ret = 0;
        leaf = path->nodes[0];
        btrfs_item_key_to_cpu(leaf, &key, slot);
-       if (key.objectid != inode->i_ino ||
+       if (key.objectid != btrfs_ino(inode) ||
            key.type != BTRFS_EXTENT_DATA_KEY) {
                /* not our file or wrong item type, must cow */
                goto out;
@@ -5600,7 +5483,7 @@ static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans,
         * look for other files referencing this extent, if we
         * find any we must cow
         */
-       if (btrfs_cross_ref_exist(trans, root, inode->i_ino,
+       if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
                                  key.offset - backref_offset, disk_bytenr))
                goto out;
 
@@ -5701,7 +5584,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                 * to make sure the current transaction stays open
                 * while we look for nocow cross refs
                 */
-               trans = btrfs_join_transaction(root, 0);
+               trans = btrfs_join_transaction(root);
                if (IS_ERR(trans))
                        goto must_cow;
 
@@ -5790,9 +5673,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
 
                        flush_dcache_page(bvec->bv_page);
                        if (csum != *private) {
-                               printk(KERN_ERR "btrfs csum failed ino %lu off"
+                               printk(KERN_ERR "btrfs csum failed ino %llu off"
                                      " %llu csum %u private %u\n",
-                                     inode->i_ino, (unsigned long long)start,
+                                     (unsigned long long)btrfs_ino(inode),
+                                     (unsigned long long)start,
                                      csum, *private);
                                err = -EIO;
                        }
@@ -5839,7 +5723,7 @@ again:
 
        BUG_ON(!ordered);
 
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans)) {
                err = -ENOMEM;
                goto out;
@@ -5939,9 +5823,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
        struct btrfs_dio_private *dip = bio->bi_private;
 
        if (err) {
-               printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
+               printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
                      "sector %#Lx len %u err no %d\n",
-                     dip->inode->i_ino, bio->bi_rw,
+                     (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
                      (unsigned long long)bio->bi_sector, bio->bi_size, err);
                dip->errors = 1;
 
@@ -6589,6 +6473,7 @@ out:
 static int btrfs_truncate(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_block_rsv *rsv;
        int ret;
        int err = 0;
        struct btrfs_trans_handle *trans;
@@ -6602,28 +6487,80 @@ static int btrfs_truncate(struct inode *inode)
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
        btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 
-       trans = btrfs_start_transaction(root, 5);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
+       /*
+        * Yes ladies and gentelment, this is indeed ugly.  The fact is we have
+        * 3 things going on here
+        *
+        * 1) We need to reserve space for our orphan item and the space to
+        * delete our orphan item.  Lord knows we don't want to have a dangling
+        * orphan item because we didn't reserve space to remove it.
+        *
+        * 2) We need to reserve space to update our inode.
+        *
+        * 3) We need to have something to cache all the space that is going to
+        * be free'd up by the truncate operation, but also have some slack
+        * space reserved in case it uses space during the truncate (thank you
+        * very much snapshotting).
+        *
+        * And we need these to all be seperate.  The fact is we can use alot of
+        * space doing the truncate, and we have no earthly idea how much space
+        * we will use, so we need the truncate reservation to be seperate so it
+        * doesn't end up using space reserved for updating the inode or
+        * removing the orphan item.  We also need to be able to stop the
+        * transaction and start a new one, which means we need to be able to
+        * update the inode several times, and we have no idea of knowing how
+        * many times that will be, so we can't just reserve 1 item for the
+        * entirety of the opration, so that has to be done seperately as well.
+        * Then there is the orphan item, which does indeed need to be held on
+        * to for the whole operation, and we need nobody to touch this reserved
+        * space except the orphan code.
+        *
+        * So that leaves us with
+        *
+        * 1) root->orphan_block_rsv - for the orphan deletion.
+        * 2) rsv - for the truncate reservation, which we will steal from the
+        * transaction reservation.
+        * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
+        * updating the inode.
+        */
+       rsv = btrfs_alloc_block_rsv(root);
+       if (!rsv)
+               return -ENOMEM;
+       btrfs_add_durable_block_rsv(root->fs_info, rsv);
 
-       btrfs_set_trans_block_group(trans, inode);
+       trans = btrfs_start_transaction(root, 4);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto out;
+       }
+
+       /*
+        * Reserve space for the truncate process.  Truncate should be adding
+        * space, but if there are snapshots it may end up using space.
+        */
+       ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
+       BUG_ON(ret);
 
        ret = btrfs_orphan_add(trans, inode);
        if (ret) {
                btrfs_end_transaction(trans, root);
-               return ret;
+               goto out;
        }
 
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root, nr);
 
-       /* Now start a transaction for the truncate */
-       trans = btrfs_start_transaction(root, 0);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
-       btrfs_set_trans_block_group(trans, inode);
-       trans->block_rsv = root->orphan_block_rsv;
+       /*
+        * Ok so we've already migrated our bytes over for the truncate, so here
+        * just reserve the one slot we need for updating the inode.
+        */
+       trans = btrfs_start_transaction(root, 1);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto out;
+       }
+       trans->block_rsv = rsv;
 
        /*
         * setattr is responsible for setting the ordered_data_close flag,
@@ -6647,24 +6584,17 @@ static int btrfs_truncate(struct inode *inode)
 
        while (1) {
                if (!trans) {
-                       trans = btrfs_start_transaction(root, 0);
-                       if (IS_ERR(trans))
-                               return PTR_ERR(trans);
-                       btrfs_set_trans_block_group(trans, inode);
-                       trans->block_rsv = root->orphan_block_rsv;
-               }
+                       trans = btrfs_start_transaction(root, 3);
+                       if (IS_ERR(trans)) {
+                               err = PTR_ERR(trans);
+                               goto out;
+                       }
 
-               ret = btrfs_block_rsv_check(trans, root,
-                                           root->orphan_block_rsv, 0, 5);
-               if (ret == -EAGAIN) {
-                       ret = btrfs_commit_transaction(trans, root);
-                       if (ret)
-                               return ret;
-                       trans = NULL;
-                       continue;
-               } else if (ret) {
-                       err = ret;
-                       break;
+                       ret = btrfs_truncate_reserve_metadata(trans, root,
+                                                             rsv);
+                       BUG_ON(ret);
+
+                       trans->block_rsv = rsv;
                }
 
                ret = btrfs_truncate_inode_items(trans, root, inode,
@@ -6675,6 +6605,7 @@ static int btrfs_truncate(struct inode *inode)
                        break;
                }
 
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
                ret = btrfs_update_inode(trans, root, inode);
                if (ret) {
                        err = ret;
@@ -6688,6 +6619,7 @@ static int btrfs_truncate(struct inode *inode)
        }
 
        if (ret == 0 && inode->i_nlink > 0) {
+               trans->block_rsv = root->orphan_block_rsv;
                ret = btrfs_orphan_del(trans, inode);
                if (ret)
                        err = ret;
@@ -6699,15 +6631,20 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_orphan_del(NULL, inode);
        }
 
+       trans->block_rsv = &root->fs_info->trans_block_rsv;
        ret = btrfs_update_inode(trans, root, inode);
        if (ret && !err)
                err = ret;
 
        nr = trans->blocks_used;
        ret = btrfs_end_transaction_throttle(trans, root);
+       btrfs_btree_balance_dirty(root, nr);
+
+out:
+       btrfs_free_block_rsv(root, rsv);
+
        if (ret && !err)
                err = ret;
-       btrfs_btree_balance_dirty(root, nr);
 
        return err;
 }
@@ -6716,15 +6653,14 @@ static int btrfs_truncate(struct inode *inode)
  * create a new subvolume directory/inode (helper for the ioctl).
  */
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root,
-                            u64 new_dirid, u64 alloc_hint)
+                            struct btrfs_root *new_root, u64 new_dirid)
 {
        struct inode *inode;
        int err;
        u64 index = 0;
 
        inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
-                               new_dirid, alloc_hint, S_IFDIR | 0700, &index);
+                               new_dirid, S_IFDIR | 0700, &index);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
        inode->i_op = &btrfs_dir_inode_operations;
@@ -6776,18 +6712,22 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->index_cnt = (u64)-1;
        ei->last_unlink_trans = 0;
 
-       atomic_set(&ei->outstanding_extents, 0);
-       atomic_set(&ei->reserved_extents, 0);
+       spin_lock_init(&ei->lock);
+       ei->outstanding_extents = 0;
+       ei->reserved_extents = 0;
 
        ei->ordered_data_close = 0;
        ei->orphan_meta_reserved = 0;
        ei->dummy_inode = 0;
+       ei->in_defrag = 0;
        ei->force_compress = BTRFS_COMPRESS_NONE;
 
+       ei->delayed_node = NULL;
+
        inode = &ei->vfs_inode;
-       extent_map_tree_init(&ei->extent_tree, GFP_NOFS);
-       extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS);
-       extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS);
+       extent_map_tree_init(&ei->extent_tree);
+       extent_io_tree_init(&ei->io_tree, &inode->i_data);
+       extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
        mutex_init(&ei->log_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
        INIT_LIST_HEAD(&ei->i_orphan);
@@ -6812,8 +6752,8 @@ void btrfs_destroy_inode(struct inode *inode)
 
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
-       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
-       WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
+       WARN_ON(BTRFS_I(inode)->outstanding_extents);
+       WARN_ON(BTRFS_I(inode)->reserved_extents);
 
        /*
         * This can happen where we create an inode, but somebody else also
@@ -6834,25 +6774,10 @@ void btrfs_destroy_inode(struct inode *inode)
                spin_unlock(&root->fs_info->ordered_extent_lock);
        }
 
-       if (root == root->fs_info->tree_root) {
-               struct btrfs_block_group_cache *block_group;
-
-               block_group = btrfs_lookup_block_group(root->fs_info,
-                                               BTRFS_I(inode)->block_group);
-               if (block_group && block_group->inode == inode) {
-                       spin_lock(&block_group->lock);
-                       block_group->inode = NULL;
-                       spin_unlock(&block_group->lock);
-                       btrfs_put_block_group(block_group);
-               } else if (block_group) {
-                       btrfs_put_block_group(block_group);
-               }
-       }
-
        spin_lock(&root->orphan_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
-               printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
-                      inode->i_ino);
+               printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
+                      (unsigned long long)btrfs_ino(inode));
                list_del_init(&BTRFS_I(inode)->i_orphan);
        }
        spin_unlock(&root->orphan_lock);
@@ -6874,6 +6799,7 @@ void btrfs_destroy_inode(struct inode *inode)
        inode_tree_del(inode);
        btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
 free:
+       btrfs_remove_delayed_node(inode);
        call_rcu(&inode->i_rcu, btrfs_i_callback);
 }
 
@@ -6882,7 +6808,7 @@ int btrfs_drop_inode(struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
 
        if (btrfs_root_refs(&root->root_item) == 0 &&
-           root != root->fs_info->tree_root)
+           !btrfs_is_free_space_inode(root, inode))
                return 1;
        else
                return generic_drop_inode(inode);
@@ -6991,16 +6917,17 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        u64 index = 0;
        u64 root_objectid;
        int ret;
+       u64 old_ino = btrfs_ino(old_inode);
 
-       if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+       if (btrfs_ino(new_dir) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
                return -EPERM;
 
        /* we only allow rename subvolume link between subvolumes */
-       if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
+       if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
                return -EXDEV;
 
-       if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
-           (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID))
+       if (old_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID ||
+           (new_inode && btrfs_ino(new_inode) == BTRFS_FIRST_FREE_OBJECTID))
                return -ENOTEMPTY;
 
        if (S_ISDIR(old_inode->i_mode) && new_inode &&
@@ -7016,7 +6943,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                filemap_flush(old_inode->i_mapping);
 
        /* close the racy window with snapshot create/destroy ioctl */
-       if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+       if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
                down_read(&root->fs_info->subvol_sem);
        /*
         * We want to reserve the absolute worst case amount of items.  So if
@@ -7032,8 +6959,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 goto out_notrans;
         }
 
-       btrfs_set_trans_block_group(trans, new_dir);
-
        if (dest != root)
                btrfs_record_root_in_trans(trans, dest);
 
@@ -7041,15 +6966,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (ret)
                goto out_fail;
 
-       if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+       if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                /* force full log commit if subvolume involved. */
                root->fs_info->last_trans_log_full_commit = trans->transid;
        } else {
                ret = btrfs_insert_inode_ref(trans, dest,
                                             new_dentry->d_name.name,
                                             new_dentry->d_name.len,
-                                            old_inode->i_ino,
-                                            new_dir->i_ino, index);
+                                            old_ino,
+                                            btrfs_ino(new_dir), index);
                if (ret)
                        goto out_fail;
                /*
@@ -7065,10 +6990,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         * make sure the inode gets flushed if it is replacing
         * something.
         */
-       if (new_inode && new_inode->i_size &&
-           old_inode && S_ISREG(old_inode->i_mode)) {
+       if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
                btrfs_add_ordered_operation(trans, root, old_inode);
-       }
 
        old_dir->i_ctime = old_dir->i_mtime = ctime;
        new_dir->i_ctime = new_dir->i_mtime = ctime;
@@ -7077,7 +7000,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (old_dentry->d_parent != new_dentry->d_parent)
                btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
 
-       if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) {
+       if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
                root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
                ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid,
                                        old_dentry->d_name.name,
@@ -7094,7 +7017,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        if (new_inode) {
                new_inode->i_ctime = CURRENT_TIME;
-               if (unlikely(new_inode->i_ino ==
+               if (unlikely(btrfs_ino(new_inode) ==
                             BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
                        root_objectid = BTRFS_I(new_inode)->location.objectid;
                        ret = btrfs_unlink_subvol(trans, dest, new_dir,
@@ -7122,7 +7045,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                             new_dentry->d_name.len, 0, index);
        BUG_ON(ret);
 
-       if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
+       if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
                struct dentry *parent = dget_parent(new_dentry);
                btrfs_log_new_name(trans, old_inode, old_dir, parent);
                dput(parent);
@@ -7131,7 +7054,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 out_fail:
        btrfs_end_transaction_throttle(trans, root);
 out_notrans:
-       if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)
+       if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
                up_read(&root->fs_info->subvol_sem);
 
        return ret;
@@ -7185,58 +7108,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
        return 0;
 }
 
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
-                                  int sync)
-{
-       struct btrfs_inode *binode;
-       struct inode *inode = NULL;
-
-       spin_lock(&root->fs_info->delalloc_lock);
-       while (!list_empty(&root->fs_info->delalloc_inodes)) {
-               binode = list_entry(root->fs_info->delalloc_inodes.next,
-                                   struct btrfs_inode, delalloc_inodes);
-               inode = igrab(&binode->vfs_inode);
-               if (inode) {
-                       list_move_tail(&binode->delalloc_inodes,
-                                      &root->fs_info->delalloc_inodes);
-                       break;
-               }
-
-               list_del_init(&binode->delalloc_inodes);
-               cond_resched_lock(&root->fs_info->delalloc_lock);
-       }
-       spin_unlock(&root->fs_info->delalloc_lock);
-
-       if (inode) {
-               if (sync) {
-                       filemap_write_and_wait(inode->i_mapping);
-                       /*
-                        * We have to do this because compression doesn't
-                        * actually set PG_writeback until it submits the pages
-                        * for IO, which happens in an async thread, so we could
-                        * race and not actually wait for any writeback pages
-                        * because they've not been submitted yet.  Technically
-                        * this could still be the case for the ordered stuff
-                        * since the async thread may not have started to do its
-                        * work yet.  If this becomes the case then we need to
-                        * figure out a way to make sure that in writepage we
-                        * wait for any async pages to be submitted before
-                        * returning so that fdatawait does what its supposed to
-                        * do.
-                        */
-                       btrfs_wait_ordered_range(inode, 0, (u64)-1);
-               } else {
-                       filemap_flush(inode->i_mapping);
-               }
-               if (delay_iput)
-                       btrfs_add_delayed_iput(inode);
-               else
-                       iput(inode);
-               return 1;
-       }
-       return 0;
-}
-
 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                         const char *symname)
 {
@@ -7260,9 +7131,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
                return -ENAMETOOLONG;
 
-       err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid);
-       if (err)
-               return err;
        /*
         * 2 items for inode item and ref
         * 2 items for dir items
@@ -7272,12 +7140,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
+       err = btrfs_find_free_ino(root, &objectid);
+       if (err)
+               goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-                               dentry->d_name.len, dir->i_ino, objectid,
-                               BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
-                               &index);
+                               dentry->d_name.len, btrfs_ino(dir), objectid,
+                               S_IFLNK|S_IRWXUGO, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -7289,7 +7158,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       btrfs_set_trans_block_group(trans, inode);
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
@@ -7300,14 +7168,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                inode->i_op = &btrfs_file_inode_operations;
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
-       btrfs_update_inode_block_group(trans, inode);
-       btrfs_update_inode_block_group(trans, dir);
        if (drop_inode)
                goto out_unlock;
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
-       key.objectid = inode->i_ino;
+       key.objectid = btrfs_ino(inode);
        key.offset = 0;
        btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
        datasize = btrfs_file_extent_calc_inline_size(name_len);
@@ -7315,6 +7181,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                                      datasize);
        if (err) {
                drop_inode = 1;
+               btrfs_free_path(path);
                goto out_unlock;
        }
        leaf = path->nodes[0];