btrfs: rename the option to nospace_cache
[pandora-kernel.git] / fs / btrfs / inode.c
index ba95176..e16215f 100644 (file)
 #include "btrfs_inode.h"
 #include "ioctl.h"
 #include "print-tree.h"
-#include "volumes.h"
 #include "ordered-data.h"
 #include "xattr.h"
 #include "tree-log.h"
+#include "volumes.h"
 #include "compression.h"
 #include "locking.h"
 #include "free-space-cache.h"
@@ -93,6 +93,8 @@ static noinline int cow_file_range(struct inode *inode,
                                   struct page *locked_page,
                                   u64 start, u64 end, int *page_started,
                                   unsigned long *nr_written, int unlock);
+static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, struct inode *inode);
 
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
@@ -393,7 +395,10 @@ again:
             (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
                WARN_ON(pages);
                pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
-               BUG_ON(!pages);
+               if (!pages) {
+                       /* just bail out to the uncompressed code */
+                       goto cont;
+               }
 
                if (BTRFS_I(inode)->force_compress)
                        compress_type = BTRFS_I(inode)->force_compress;
@@ -424,6 +429,7 @@ again:
                        will_compress = 1;
                }
        }
+cont:
        if (start == 0) {
                trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
@@ -820,7 +826,7 @@ static noinline int cow_file_range(struct inode *inode,
        }
 
        BUG_ON(disk_num_bytes >
-              btrfs_super_total_bytes(&root->fs_info->super_copy));
+              btrfs_super_total_bytes(root->fs_info->super_copy));
 
        alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
        btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
@@ -1737,7 +1743,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                trans = btrfs_join_transaction(root);
                        BUG_ON(IS_ERR(trans));
                        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_update_inode(trans, root, inode);
+                       ret = btrfs_update_inode_fallback(trans, root, inode);
                        BUG_ON(ret);
                }
                goto out;
@@ -1787,17 +1793,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
        ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
        if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-               ret = btrfs_update_inode(trans, root, inode);
+               ret = btrfs_update_inode_fallback(trans, root, inode);
                BUG_ON(ret);
        }
        ret = 0;
 out:
-       if (nolock) {
-               if (trans)
-                       btrfs_end_transaction_nolock(trans, root);
-       } else {
+       if (root != root->fs_info->tree_root)
                btrfs_delalloc_release_metadata(inode, ordered_extent->len);
-               if (trans)
+       if (trans) {
+               if (nolock)
+                       btrfs_end_transaction_nolock(trans, root);
+               else
                        btrfs_end_transaction(trans, root);
        }
 
@@ -1818,154 +1824,10 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
        return btrfs_finish_ordered_io(page->mapping->host, start, end);
 }
 
-/*
- * When IO fails, either with EIO or csum verification fails, we
- * try other mirrors that might have a good copy of the data.  This
- * io_failure_record is used to record state as we go through all the
- * mirrors.  If another mirror has good data, the page is set up to date
- * and things continue.  If a good mirror can't be found, the original
- * bio end_io callback is called to indicate things have failed.
- */
-struct io_failure_record {
-       struct page *page;
-       u64 start;
-       u64 len;
-       u64 logical;
-       unsigned long bio_flags;
-       int last_mirror;
-};
-
-static int btrfs_io_failed_hook(struct bio *failed_bio,
-                        struct page *page, u64 start, u64 end,
-                        struct extent_state *state)
-{
-       struct io_failure_record *failrec = NULL;
-       u64 private;
-       struct extent_map *em;
-       struct inode *inode = page->mapping->host;
-       struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
-       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-       struct bio *bio;
-       int num_copies;
-       int ret;
-       int rw;
-       u64 logical;
-
-       ret = get_state_private(failure_tree, start, &private);
-       if (ret) {
-               failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
-               if (!failrec)
-                       return -ENOMEM;
-               failrec->start = start;
-               failrec->len = end - start + 1;
-               failrec->last_mirror = 0;
-               failrec->bio_flags = 0;
-
-               read_lock(&em_tree->lock);
-               em = lookup_extent_mapping(em_tree, start, failrec->len);
-               if (em->start > start || em->start + em->len < start) {
-                       free_extent_map(em);
-                       em = NULL;
-               }
-               read_unlock(&em_tree->lock);
-
-               if (IS_ERR_OR_NULL(em)) {
-                       kfree(failrec);
-                       return -EIO;
-               }
-               logical = start - em->start;
-               logical = em->block_start + logical;
-               if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
-                       logical = em->block_start;
-                       failrec->bio_flags = EXTENT_BIO_COMPRESSED;
-                       extent_set_compress_type(&failrec->bio_flags,
-                                                em->compress_type);
-               }
-               failrec->logical = logical;
-               free_extent_map(em);
-               set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
-                               EXTENT_DIRTY, GFP_NOFS);
-               set_state_private(failure_tree, start,
-                                (u64)(unsigned long)failrec);
-       } else {
-               failrec = (struct io_failure_record *)(unsigned long)private;
-       }
-       num_copies = btrfs_num_copies(
-                             &BTRFS_I(inode)->root->fs_info->mapping_tree,
-                             failrec->logical, failrec->len);
-       failrec->last_mirror++;
-       if (!state) {
-               spin_lock(&BTRFS_I(inode)->io_tree.lock);
-               state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
-                                                   failrec->start,
-                                                   EXTENT_LOCKED);
-               if (state && state->start != failrec->start)
-                       state = NULL;
-               spin_unlock(&BTRFS_I(inode)->io_tree.lock);
-       }
-       if (!state || failrec->last_mirror > num_copies) {
-               set_state_private(failure_tree, failrec->start, 0);
-               clear_extent_bits(failure_tree, failrec->start,
-                                 failrec->start + failrec->len - 1,
-                                 EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
-               kfree(failrec);
-               return -EIO;
-       }
-       bio = bio_alloc(GFP_NOFS, 1);
-       bio->bi_private = state;
-       bio->bi_end_io = failed_bio->bi_end_io;
-       bio->bi_sector = failrec->logical >> 9;
-       bio->bi_bdev = failed_bio->bi_bdev;
-       bio->bi_size = 0;
-
-       bio_add_page(bio, page, failrec->len, start - page_offset(page));
-       if (failed_bio->bi_rw & REQ_WRITE)
-               rw = WRITE;
-       else
-               rw = READ;
-
-       ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
-                                                     failrec->last_mirror,
-                                                     failrec->bio_flags, 0);
-       return ret;
-}
-
-/*
- * each time an IO finishes, we do a fast check in the IO failure tree
- * to see if we need to process or clean up an io_failure_record
- */
-static int btrfs_clean_io_failures(struct inode *inode, u64 start)
-{
-       u64 private;
-       u64 private_failure;
-       struct io_failure_record *failure;
-       int ret;
-
-       private = 0;
-       if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
-                            (u64)-1, 1, EXTENT_DIRTY, 0)) {
-               ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
-                                       start, &private_failure);
-               if (ret == 0) {
-                       failure = (struct io_failure_record *)(unsigned long)
-                                  private_failure;
-                       set_state_private(&BTRFS_I(inode)->io_failure_tree,
-                                         failure->start, 0);
-                       clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
-                                         failure->start,
-                                         failure->start + failure->len - 1,
-                                         EXTENT_DIRTY | EXTENT_LOCKED,
-                                         GFP_NOFS);
-                       kfree(failure);
-               }
-       }
-       return 0;
-}
-
 /*
  * when reads are done, we need to check csums to verify the data is correct
- * if there's a match, we allow the bio to finish.  If not, we go through
- * the io_failure_record routines to find good copies
+ * if there's a match, we allow the bio to finish.  If not, the code in
+ * extent_io.c will try to find good copies for us.
  */
 static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                               struct extent_state *state)
@@ -2011,10 +1873,6 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
 
        kunmap_atomic(kaddr, KM_USER0);
 good:
-       /* if the io failure tree for this inode is non-empty,
-        * check to see if we've recovered from a failed IO
-        */
-       btrfs_clean_io_failures(inode, start);
        return 0;
 
 zeroit:
@@ -2079,89 +1937,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
        up_read(&root->fs_info->cleanup_work_sem);
 }
 
-/*
- * calculate extra metadata reservation when snapshotting a subvolume
- * contains orphan files.
- */
-void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
-                               struct btrfs_pending_snapshot *pending,
-                               u64 *bytes_to_reserve)
-{
-       struct btrfs_root *root;
-       struct btrfs_block_rsv *block_rsv;
-       u64 num_bytes;
-       int index;
-
-       root = pending->root;
-       if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
-               return;
-
-       block_rsv = root->orphan_block_rsv;
-
-       /* orphan block reservation for the snapshot */
-       num_bytes = block_rsv->size;
-
-       /*
-        * after the snapshot is created, COWing tree blocks may use more
-        * space than it frees. So we should make sure there is enough
-        * reserved space.
-        */
-       index = trans->transid & 0x1;
-       if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
-               num_bytes += block_rsv->size -
-                            (block_rsv->reserved + block_rsv->freed[index]);
-       }
-
-       *bytes_to_reserve += num_bytes;
-}
-
-void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
-                               struct btrfs_pending_snapshot *pending)
-{
-       struct btrfs_root *root = pending->root;
-       struct btrfs_root *snap = pending->snap;
-       struct btrfs_block_rsv *block_rsv;
-       u64 num_bytes;
-       int index;
-       int ret;
-
-       if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
-               return;
-
-       /* refill source subvolume's orphan block reservation */
-       block_rsv = root->orphan_block_rsv;
-       index = trans->transid & 0x1;
-       if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
-               num_bytes = block_rsv->size -
-                           (block_rsv->reserved + block_rsv->freed[index]);
-               ret = btrfs_block_rsv_migrate(&pending->block_rsv,
-                                             root->orphan_block_rsv,
-                                             num_bytes);
-               BUG_ON(ret);
-       }
-
-       /* setup orphan block reservation for the snapshot */
-       block_rsv = btrfs_alloc_block_rsv(snap);
-       BUG_ON(!block_rsv);
-
-       btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
-       snap->orphan_block_rsv = block_rsv;
-
-       num_bytes = root->orphan_block_rsv->size;
-       ret = btrfs_block_rsv_migrate(&pending->block_rsv,
-                                     block_rsv, num_bytes);
-       BUG_ON(ret);
-
-#if 0
-       /* insert orphan item for the snapshot */
-       WARN_ON(!root->orphan_item_inserted);
-       ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
-                                      snap->root_key.objectid);
-       BUG_ON(ret);
-       snap->orphan_item_inserted = 1;
-#endif
-}
-
 enum btrfs_orphan_cleanup_state {
        ORPHAN_CLEANUP_STARTED  = 1,
        ORPHAN_CLEANUP_DONE     = 2,
@@ -2247,9 +2022,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
        }
        spin_unlock(&root->orphan_lock);
 
-       if (block_rsv)
-               btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
-
        /* grab metadata reservation from transaction handle */
        if (reserve) {
                ret = btrfs_orphan_reserve_metadata(trans, inode);
@@ -2316,6 +2088,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
        struct btrfs_key key, found_key;
        struct btrfs_trans_handle *trans;
        struct inode *inode;
+       u64 last_objectid = 0;
        int ret = 0, nr_unlink = 0, nr_truncate = 0;
 
        if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
@@ -2367,41 +2140,49 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 * crossing root thing.  we store the inode number in the
                 * offset of the orphan item.
                 */
+
+               if (found_key.offset == last_objectid) {
+                       printk(KERN_ERR "btrfs: Error removing orphan entry, "
+                              "stopping orphan cleanup\n");
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               last_objectid = found_key.offset;
+
                found_key.objectid = found_key.offset;
                found_key.type = BTRFS_INODE_ITEM_KEY;
                found_key.offset = 0;
                inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
-               if (IS_ERR(inode)) {
-                       ret = PTR_ERR(inode);
+               ret = PTR_RET(inode);
+               if (ret && ret != -ESTALE)
                        goto out;
-               }
 
                /*
-                * add this inode to the orphan list so btrfs_orphan_del does
-                * the proper thing when we hit it
+                * Inode is already gone but the orphan item is still there,
+                * kill the orphan item.
                 */
-               spin_lock(&root->orphan_lock);
-               list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
-               spin_unlock(&root->orphan_lock);
-
-               /*
-                * if this is a bad inode, means we actually succeeded in
-                * removing the inode, but not the orphan record, which means
-                * we need to manually delete the orphan since iput will just
-                * do a destroy_inode
-                */
-               if (is_bad_inode(inode)) {
-                       trans = btrfs_start_transaction(root, 0);
+               if (ret == -ESTALE) {
+                       trans = btrfs_start_transaction(root, 1);
                        if (IS_ERR(trans)) {
                                ret = PTR_ERR(trans);
                                goto out;
                        }
-                       btrfs_orphan_del(trans, inode);
+                       ret = btrfs_del_orphan_item(trans, root,
+                                                   found_key.objectid);
+                       BUG_ON(ret);
                        btrfs_end_transaction(trans, root);
-                       iput(inode);
                        continue;
                }
 
+               /*
+                * add this inode to the orphan list so btrfs_orphan_del does
+                * the proper thing when we hit it
+                */
+               spin_lock(&root->orphan_lock);
+               list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+               spin_unlock(&root->orphan_lock);
+
                /* if we have links, this was a truncate, lets do that */
                if (inode->i_nlink) {
                        if (!S_ISREG(inode->i_mode)) {
@@ -2420,6 +2201,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                if (ret)
                        goto out;
        }
+       /* release the path since we're done with it */
+       btrfs_release_path(path);
+
        root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
 
        if (root->orphan_block_rsv)
@@ -2647,7 +2431,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
 /*
  * copy everything in the in-memory inode into the btree.
  */
-noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
+static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root, struct inode *inode)
 {
        struct btrfs_inode_item *inode_item;
@@ -2655,21 +2439,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        int ret;
 
-       /*
-        * If the inode is a free space inode, we can deadlock during commit
-        * if we put it into the delayed code.
-        *
-        * The data relocation inode should also be directly updated
-        * without delay
-        */
-       if (!btrfs_is_free_space_inode(root, inode)
-           && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
-               ret = btrfs_delayed_update_inode(trans, root, inode);
-               if (!ret)
-                       btrfs_set_inode_last_trans(trans, inode);
-               return ret;
-       }
-
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -2697,6 +2466,43 @@ failed:
        return ret;
 }
 
+/*
+ * copy everything in the in-memory inode into the btree.
+ */
+noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, struct inode *inode)
+{
+       int ret;
+
+       /*
+        * If the inode is a free space inode, we can deadlock during commit
+        * if we put it into the delayed code.
+        *
+        * The data relocation inode should also be directly updated
+        * without delay
+        */
+       if (!btrfs_is_free_space_inode(root, inode)
+           && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+               ret = btrfs_delayed_update_inode(trans, root, inode);
+               if (!ret)
+                       btrfs_set_inode_last_trans(trans, inode);
+               return ret;
+       }
+
+       return btrfs_update_inode_item(trans, root, inode);
+}
+
+static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, struct inode *inode)
+{
+       int ret;
+
+       ret = btrfs_update_inode(trans, root, inode);
+       if (ret == -ENOSPC)
+               return btrfs_update_inode_item(trans, root, inode);
+       return ret;
+}
+
 /*
  * unlink helper that gets used here in inode.c and in the tree logging
  * recovery code.  It remove a link in a directory with a given name, and
@@ -2835,7 +2641,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        u64 ino = btrfs_ino(inode);
        u64 dir_ino = btrfs_ino(dir);
 
-       trans = btrfs_start_transaction(root, 10);
+       /*
+        * 1 for the possible orphan item
+        * 1 for the dir item
+        * 1 for the dir index
+        * 1 for the inode ref
+        * 1 for the inode ref in the tree log
+        * 2 for the dir entries in the log
+        * 1 for the inode
+        */
+       trans = btrfs_start_transaction(root, 8);
        if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
                return trans;
 
@@ -2858,7 +2673,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
                return ERR_PTR(-ENOMEM);
        }
 
-       trans = btrfs_start_transaction(root, 0);
+       /* 1 for the orphan item */
+       trans = btrfs_start_transaction(root, 1);
        if (IS_ERR(trans)) {
                btrfs_free_path(path);
                root->fs_info->enospc_unlink = 0;
@@ -2963,6 +2779,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
        err = 0;
 out:
        btrfs_free_path(path);
+       /* Migrate the orphan reservation over */
+       if (!err)
+               err = btrfs_block_rsv_migrate(trans->block_rsv,
+                               &root->fs_info->global_block_rsv,
+                               trans->bytes_reserved);
+
        if (err) {
                btrfs_end_transaction(trans, root);
                root->fs_info->enospc_unlink = 0;
@@ -2977,6 +2799,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root)
 {
        if (trans->block_rsv == &root->fs_info->global_block_rsv) {
+               btrfs_block_rsv_release(root, trans->block_rsv,
+                                       trans->bytes_reserved);
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
                BUG_ON(!root->fs_info->enospc_unlink);
                root->fs_info->enospc_unlink = 0;
        }
@@ -3368,6 +3193,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
        pgoff_t index = from >> PAGE_CACHE_SHIFT;
        unsigned offset = from & (PAGE_CACHE_SIZE-1);
        struct page *page;
+       gfp_t mask = btrfs_alloc_write_mask(mapping);
        int ret = 0;
        u64 page_start;
        u64 page_end;
@@ -3380,7 +3206,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 
        ret = -ENOMEM;
 again:
-       page = find_or_create_page(mapping, index, GFP_NOFS);
+       page = find_or_create_page(mapping, index, mask);
        if (!page) {
                btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
                goto out;
@@ -3613,6 +3439,8 @@ void btrfs_evict_inode(struct inode *inode)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_block_rsv *rsv, *global_rsv;
+       u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
        unsigned long nr;
        int ret;
 
@@ -3640,22 +3468,55 @@ void btrfs_evict_inode(struct inode *inode)
                goto no_delete;
        }
 
+       rsv = btrfs_alloc_block_rsv(root);
+       if (!rsv) {
+               btrfs_orphan_del(NULL, inode);
+               goto no_delete;
+       }
+       rsv->size = min_size;
+       global_rsv = &root->fs_info->global_block_rsv;
+
        btrfs_i_size_write(inode, 0);
 
+       /*
+        * This is a bit simpler than btrfs_truncate since
+        *
+        * 1) We've already reserved our space for our orphan item in the
+        *    unlink.
+        * 2) We're going to delete the inode item, so we don't need to update
+        *    it at all.
+        *
+        * So we just need to reserve some slack space in case we add bytes when
+        * doing the truncate.
+        */
        while (1) {
-               trans = btrfs_join_transaction(root);
-               BUG_ON(IS_ERR(trans));
-               trans->block_rsv = root->orphan_block_rsv;
+               ret = btrfs_block_rsv_refill(root, rsv, min_size);
+
+               /*
+                * Try and steal from the global reserve since we will
+                * likely not use this space anyway, we want to try as
+                * hard as possible to get this to work.
+                */
+               if (ret)
+                       ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
 
-               ret = btrfs_block_rsv_check(trans, root,
-                                           root->orphan_block_rsv, 0, 5);
                if (ret) {
-                       BUG_ON(ret != -EAGAIN);
-                       ret = btrfs_commit_transaction(trans, root);
-                       BUG_ON(ret);
-                       continue;
+                       printk(KERN_WARNING "Could not get space for a "
+                              "delete, will truncate on mount %d\n", ret);
+                       btrfs_orphan_del(NULL, inode);
+                       btrfs_free_block_rsv(root, rsv);
+                       goto no_delete;
+               }
+
+               trans = btrfs_start_transaction(root, 0);
+               if (IS_ERR(trans)) {
+                       btrfs_orphan_del(NULL, inode);
+                       btrfs_free_block_rsv(root, rsv);
+                       goto no_delete;
                }
 
+               trans->block_rsv = rsv;
+
                ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
                if (ret != -EAGAIN)
                        break;
@@ -3664,14 +3525,17 @@ void btrfs_evict_inode(struct inode *inode)
                btrfs_end_transaction(trans, root);
                trans = NULL;
                btrfs_btree_balance_dirty(root, nr);
-
        }
 
+       btrfs_free_block_rsv(root, rsv);
+
        if (ret == 0) {
+               trans->block_rsv = root->orphan_block_rsv;
                ret = btrfs_orphan_del(trans, inode);
                BUG_ON(ret);
        }
 
+       trans->block_rsv = &root->fs_info->trans_block_rsv;
        if (!(root == root->fs_info->tree_root ||
              root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
                btrfs_return_ino(root, btrfs_ino(inode));
@@ -4009,12 +3873,20 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
        struct btrfs_root *sub_root = root;
        struct btrfs_key location;
        int index;
-       int ret;
+       int ret = 0;
 
        if (dentry->d_name.len > BTRFS_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
 
-       ret = btrfs_inode_by_name(dir, dentry, &location);
+       if (unlikely(d_need_lookup(dentry))) {
+               memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
+               kfree(dentry->d_fsdata);
+               dentry->d_fsdata = NULL;
+               /* This thing is hashed, drop it for now */
+               d_drop(dentry);
+       } else {
+               ret = btrfs_inode_by_name(dir, dentry, &location);
+       }
 
        if (ret < 0)
                return ERR_PTR(ret);
@@ -4069,16 +3941,24 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
        return 0;
 }
 
+static void btrfs_dentry_release(struct dentry *dentry)
+{
+       if (dentry->d_fsdata)
+               kfree(dentry->d_fsdata);
+}
+
 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
                                   struct nameidata *nd)
 {
-       struct inode *inode;
-
-       inode = btrfs_lookup_dentry(dir, dentry);
-       if (IS_ERR(inode))
-               return ERR_CAST(inode);
+       struct dentry *ret;
 
-       return d_splice_alias(inode, dentry);
+       ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
+       if (unlikely(d_need_lookup(dentry))) {
+               spin_lock(&dentry->d_lock);
+               dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
+               spin_unlock(&dentry->d_lock);
+       }
+       return ret;
 }
 
 unsigned char btrfs_filetype_table[] = {
@@ -4097,6 +3977,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        struct btrfs_path *path;
        struct list_head ins_list;
        struct list_head del_list;
+       struct qstr q;
        int ret;
        struct extent_buffer *leaf;
        int slot;
@@ -4187,6 +4068,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 
                while (di_cur < di_total) {
                        struct btrfs_key location;
+                       struct dentry *tmp;
 
                        if (verify_dir_item(root, leaf, di))
                                break;
@@ -4207,6 +4089,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                        d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
                        btrfs_dir_item_key_to_cpu(leaf, di, &location);
 
+                       q.name = name_ptr;
+                       q.len = name_len;
+                       q.hash = full_name_hash(q.name, q.len);
+                       tmp = d_lookup(filp->f_dentry, &q);
+                       if (!tmp) {
+                               struct btrfs_key *newkey;
+
+                               newkey = kzalloc(sizeof(struct btrfs_key),
+                                                GFP_NOFS);
+                               if (!newkey)
+                                       goto no_dentry;
+                               tmp = d_alloc(filp->f_dentry, &q);
+                               if (!tmp) {
+                                       kfree(newkey);
+                                       dput(tmp);
+                                       goto no_dentry;
+                               }
+                               memcpy(newkey, &location,
+                                      sizeof(struct btrfs_key));
+                               tmp->d_fsdata = newkey;
+                               tmp->d_flags |= DCACHE_NEED_LOOKUP;
+                               d_rehash(tmp);
+                               dput(tmp);
+                       } else {
+                               dput(tmp);
+                       }
+no_dentry:
                        /* is this a reference to our own snapshot? If so
                         * skip it
                         */
@@ -4468,7 +4377,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        inode->i_generation = BTRFS_I(inode)->generation;
        btrfs_set_inode_space_info(root, inode);
 
-       if (mode & S_IFDIR)
+       if (S_ISDIR(mode))
                owner = 0;
        else
                owner = 1;
@@ -4513,7 +4422,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 
        btrfs_inherit_iflags(inode, dir);
 
-       if ((mode & S_IFREG)) {
+       if (S_ISREG(mode)) {
                if (btrfs_test_opt(root, NODATASUM))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
                if (btrfs_test_opt(root, NODATACOW) ||
@@ -4767,11 +4676,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        if (err) {
                drop_inode = 1;
        } else {
-               struct dentry *parent = dget_parent(dentry);
+               struct dentry *parent = dentry->d_parent;
                err = btrfs_update_inode(trans, root, inode);
                BUG_ON(err);
                btrfs_log_new_name(trans, inode, NULL, parent);
-               dput(parent);
        }
 
        nr = trans->blocks_used;
@@ -5751,8 +5659,7 @@ again:
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
                ret = btrfs_ordered_update_i_size(inode, 0, ordered);
                if (!ret)
-                       ret = btrfs_update_inode(trans, root, inode);
-               err = ret;
+                       err = btrfs_update_inode_fallback(trans, root, inode);
                goto out;
        }
 
@@ -5790,7 +5697,7 @@ again:
        add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
        ret = btrfs_ordered_update_i_size(inode, 0, ordered);
        if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
-               btrfs_update_inode(trans, root, inode);
+               btrfs_update_inode_fallback(trans, root, inode);
        ret = 0;
 out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
@@ -6245,7 +6152,7 @@ int btrfs_readpage(struct file *file, struct page *page)
 {
        struct extent_io_tree *tree;
        tree = &BTRFS_I(page->mapping->host)->io_tree;
-       return extent_read_full_page(tree, page, btrfs_get_extent);
+       return extent_read_full_page(tree, page, btrfs_get_extent, 0);
 }
 
 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -6497,6 +6404,7 @@ static int btrfs_truncate(struct inode *inode)
        struct btrfs_trans_handle *trans;
        unsigned long nr;
        u64 mask = root->sectorsize - 1;
+       u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
 
        ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
        if (ret)
@@ -6544,19 +6452,23 @@ static int btrfs_truncate(struct inode *inode)
        rsv = btrfs_alloc_block_rsv(root);
        if (!rsv)
                return -ENOMEM;
-       btrfs_add_durable_block_rsv(root->fs_info, rsv);
+       rsv->size = min_size;
 
+       /*
+        * 1 for the truncate slack space
+        * 1 for the orphan item we're going to add
+        * 1 for the orphan item deletion
+        * 1 for updating the inode.
+        */
        trans = btrfs_start_transaction(root, 4);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
                goto out;
        }
 
-       /*
-        * Reserve space for the truncate process.  Truncate should be adding
-        * space, but if there are snapshots it may end up using space.
-        */
-       ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
+       /* Migrate the slack space for the truncate to our reserve */
+       ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
+                                     min_size);
        BUG_ON(ret);
 
        ret = btrfs_orphan_add(trans, inode);
@@ -6565,21 +6477,6 @@ static int btrfs_truncate(struct inode *inode)
                goto out;
        }
 
-       nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
-
-       /*
-        * Ok so we've already migrated our bytes over for the truncate, so here
-        * just reserve the one slot we need for updating the inode.
-        */
-       trans = btrfs_start_transaction(root, 1);
-       if (IS_ERR(trans)) {
-               err = PTR_ERR(trans);
-               goto out;
-       }
-       trans->block_rsv = rsv;
-
        /*
         * setattr is responsible for setting the ordered_data_close flag,
         * but that is only tested during the last file release.  That
@@ -6601,20 +6498,30 @@ static int btrfs_truncate(struct inode *inode)
                btrfs_add_ordered_operation(trans, root, inode);
 
        while (1) {
+               ret = btrfs_block_rsv_refill(root, rsv, min_size);
+               if (ret) {
+                       /*
+                        * This can only happen with the original transaction we
+                        * started above, every other time we shouldn't have a
+                        * transaction started yet.
+                        */
+                       if (ret == -EAGAIN)
+                               goto end_trans;
+                       err = ret;
+                       break;
+               }
+
                if (!trans) {
-                       trans = btrfs_start_transaction(root, 3);
+                       /* Just need the 1 for updating the inode */
+                       trans = btrfs_start_transaction(root, 1);
                        if (IS_ERR(trans)) {
                                err = PTR_ERR(trans);
                                goto out;
                        }
-
-                       ret = btrfs_truncate_reserve_metadata(trans, root,
-                                                             rsv);
-                       BUG_ON(ret);
-
-                       trans->block_rsv = rsv;
                }
 
+               trans->block_rsv = rsv;
+
                ret = btrfs_truncate_inode_items(trans, root, inode,
                                                 inode->i_size,
                                                 BTRFS_EXTENT_DATA_KEY);
@@ -6629,7 +6536,7 @@ static int btrfs_truncate(struct inode *inode)
                        err = ret;
                        break;
                }
-
+end_trans:
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
                trans = NULL;
@@ -6649,14 +6556,16 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_orphan_del(NULL, inode);
        }
 
-       trans->block_rsv = &root->fs_info->trans_block_rsv;
-       ret = btrfs_update_inode(trans, root, inode);
-       if (ret && !err)
-               err = ret;
+       if (trans) {
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
+               ret = btrfs_update_inode(trans, root, inode);
+               if (ret && !err)
+                       err = ret;
 
-       nr = trans->blocks_used;
-       ret = btrfs_end_transaction_throttle(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
+               nr = trans->blocks_used;
+               ret = btrfs_end_transaction_throttle(trans, root);
+               btrfs_btree_balance_dirty(root, nr);
+       }
 
 out:
        btrfs_free_block_rsv(root, rsv);
@@ -6711,9 +6620,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->last_sub_trans = 0;
        ei->logged_trans = 0;
        ei->delalloc_bytes = 0;
-       ei->reserved_bytes = 0;
        ei->disk_i_size = 0;
        ei->flags = 0;
+       ei->csum_bytes = 0;
        ei->index_cnt = (u64)-1;
        ei->last_unlink_trans = 0;
 
@@ -6725,6 +6634,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->orphan_meta_reserved = 0;
        ei->dummy_inode = 0;
        ei->in_defrag = 0;
+       ei->delalloc_meta_reserved = 0;
        ei->force_compress = BTRFS_COMPRESS_NONE;
 
        ei->delayed_node = NULL;
@@ -6759,6 +6669,8 @@ void btrfs_destroy_inode(struct inode *inode)
        WARN_ON(inode->i_data.nrpages);
        WARN_ON(BTRFS_I(inode)->outstanding_extents);
        WARN_ON(BTRFS_I(inode)->reserved_extents);
+       WARN_ON(BTRFS_I(inode)->delalloc_bytes);
+       WARN_ON(BTRFS_I(inode)->csum_bytes);
 
        /*
         * This can happen where we create an inode, but somebody else also
@@ -6883,7 +6795,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
 {
        struct inode *inode = dentry->d_inode;
        generic_fillattr(inode, stat);
-       stat->dev = BTRFS_I(inode)->root->anon_super.s_dev;
+       stat->dev = BTRFS_I(inode)->root->anon_dev;
        stat->blksize = PAGE_CACHE_SIZE;
        stat->blocks = (inode_get_bytes(inode) +
                        BTRFS_I(inode)->delalloc_bytes) >> 9;
@@ -7051,9 +6963,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        BUG_ON(ret);
 
        if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
-               struct dentry *parent = dget_parent(new_dentry);
+               struct dentry *parent = new_dentry->d_parent;
                btrfs_log_new_name(trans, old_inode, old_dir, parent);
-               dput(parent);
                btrfs_end_log_trans(root);
        }
 out_fail:
@@ -7318,15 +7229,19 @@ static int btrfs_set_page_dirty(struct page *page)
        return __set_page_dirty_nobuffers(page);
 }
 
-static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
+static int btrfs_permission(struct inode *inode, int mask)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       umode_t mode = inode->i_mode;
 
-       if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
-               return -EROFS;
-       if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
-               return -EACCES;
-       return generic_permission(inode, mask, flags, btrfs_check_acl);
+       if (mask & MAY_WRITE &&
+           (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+               if (btrfs_root_readonly(root))
+                       return -EROFS;
+               if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
+                       return -EACCES;
+       }
+       return generic_permission(inode, mask);
 }
 
 static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7346,10 +7261,12 @@ static const struct inode_operations btrfs_dir_inode_operations = {
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
+       .get_acl        = btrfs_get_acl,
 };
 static const struct inode_operations btrfs_dir_ro_inode_operations = {
        .lookup         = btrfs_lookup,
        .permission     = btrfs_permission,
+       .get_acl        = btrfs_get_acl,
 };
 
 static const struct file_operations btrfs_dir_file_operations = {
@@ -7371,7 +7288,6 @@ static struct extent_io_ops btrfs_extent_io_ops = {
        .readpage_end_io_hook = btrfs_readpage_end_io_hook,
        .writepage_end_io_hook = btrfs_writepage_end_io_hook,
        .writepage_start_hook = btrfs_writepage_start_hook,
-       .readpage_io_failed_hook = btrfs_io_failed_hook,
        .set_bit_hook = btrfs_set_bit_hook,
        .clear_bit_hook = btrfs_clear_bit_hook,
        .merge_extent_hook = btrfs_merge_extent_hook,
@@ -7418,6 +7334,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
        .fiemap         = btrfs_fiemap,
+       .get_acl        = btrfs_get_acl,
 };
 static const struct inode_operations btrfs_special_inode_operations = {
        .getattr        = btrfs_getattr,
@@ -7427,6 +7344,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
        .getxattr       = btrfs_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
+       .get_acl        = btrfs_get_acl,
 };
 static const struct inode_operations btrfs_symlink_inode_operations = {
        .readlink       = generic_readlink,
@@ -7438,8 +7356,10 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
        .getxattr       = btrfs_getxattr,
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
+       .get_acl        = btrfs_get_acl,
 };
 
 const struct dentry_operations btrfs_dentry_operations = {
        .d_delete       = btrfs_dentry_delete,
+       .d_release      = btrfs_dentry_release,
 };