Merge branch 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Mar 2011 22:31:05 +0000 (15:31 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 28 Mar 2011 22:31:05 +0000 (15:31 -0700)
* 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (45 commits)
  Btrfs: fix __btrfs_map_block on 32 bit machines
  btrfs: fix possible deadlock by clearing __GFP_FS flag
  btrfs: check link counter overflow in link(2)
  btrfs: don't mess with i_nlink of unlocked inode in rename()
  Btrfs: check return value of btrfs_alloc_path()
  Btrfs: fix OOPS of empty filesystem after balance
  Btrfs: fix memory leak of empty filesystem after balance
  Btrfs: fix return value of setflags ioctl
  Btrfs: fix uncheck memory allocations
  btrfs: make inode ref log recovery faster
  Btrfs: add btrfs_trim_fs() to handle FITRIM
  Btrfs: adjust btrfs_discard_extent() return errors and trimmed bytes
  Btrfs: make btrfs_map_block() return entire free extent for each device of RAID0/1/10/DUP
  Btrfs: make update_reserved_bytes() public
  btrfs: return EXDEV when linking from different subvolumes
  Btrfs: Per file/directory controls for COW and compression
  Btrfs: add datacow flag in inode flag
  btrfs: use GFP_NOFS instead of GFP_KERNEL
  Btrfs: check return value of read_tree_block()
  btrfs: properly access unaligned checksum buffer
  ...

Fix up trivial conflicts in fs/btrfs/volumes.c due to plug removal in
the block layer.

28 files changed:
fs/btrfs/btrfs_inode.h
fs/btrfs/compression.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/dir-item.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/btrfs/xattr.c
include/linux/fs.h
include/trace/events/btrfs.h [new file with mode: 0644]

index ccc991c..57c3bb2 100644 (file)
@@ -136,9 +136,8 @@ struct btrfs_inode {
         * items we think we'll end up using, and reserved_extents is the number
         * of extent items we've reserved metadata for.
         */
-       spinlock_t accounting_lock;
        atomic_t outstanding_extents;
-       int reserved_extents;
+       atomic_t reserved_extents;
 
        /*
         * ordered_data_close is set by truncate when a file that used
index 4d2110e..41d1d7c 100644 (file)
@@ -340,6 +340,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 
        WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
        cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
+       if (!cb)
+               return -ENOMEM;
        atomic_set(&cb->pending_bios, 0);
        cb->errors = 0;
        cb->inode = inode;
@@ -354,6 +356,10 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
        bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
 
        bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
+       if(!bio) {
+               kfree(cb);
+               return -ENOMEM;
+       }
        bio->bi_private = cb;
        bio->bi_end_io = end_compressed_bio_write;
        atomic_inc(&cb->pending_bios);
@@ -657,8 +663,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        atomic_inc(&cb->pending_bios);
 
                        if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
-                               btrfs_lookup_bio_sums(root, inode, comp_bio,
-                                                     sums);
+                               ret = btrfs_lookup_bio_sums(root, inode,
+                                                       comp_bio, sums);
+                               BUG_ON(ret);
                        }
                        sums += (comp_bio->bi_size + root->sectorsize - 1) /
                                root->sectorsize;
@@ -683,8 +690,10 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
        BUG_ON(ret);
 
-       if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
-               btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
+       if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
+               ret = btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
+               BUG_ON(ret);
+       }
 
        ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
        BUG_ON(ret);
index b5baff0..84d7ca1 100644 (file)
@@ -147,10 +147,11 @@ noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
 struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
 {
        struct extent_buffer *eb;
-       spin_lock(&root->node_lock);
-       eb = root->node;
+
+       rcu_read_lock();
+       eb = rcu_dereference(root->node);
        extent_buffer_get(eb);
-       spin_unlock(&root->node_lock);
+       rcu_read_unlock();
        return eb;
 }
 
@@ -165,14 +166,8 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
        while (1) {
                eb = btrfs_root_node(root);
                btrfs_tree_lock(eb);
-
-               spin_lock(&root->node_lock);
-               if (eb == root->node) {
-                       spin_unlock(&root->node_lock);
+               if (eb == root->node)
                        break;
-               }
-               spin_unlock(&root->node_lock);
-
                btrfs_tree_unlock(eb);
                free_extent_buffer(eb);
        }
@@ -458,10 +453,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
                else
                        parent_start = 0;
 
-               spin_lock(&root->node_lock);
-               root->node = cow;
                extent_buffer_get(cow);
-               spin_unlock(&root->node_lock);
+               rcu_assign_pointer(root->node, cow);
 
                btrfs_free_tree_block(trans, root, buf, parent_start,
                                      last_ref);
@@ -542,6 +535,9 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
 
        ret = __btrfs_cow_block(trans, root, buf, parent,
                                 parent_slot, cow_ret, search_start, 0);
+
+       trace_btrfs_cow_block(root, buf, *cow_ret);
+
        return ret;
 }
 
@@ -686,6 +682,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
                        if (!cur) {
                                cur = read_tree_block(root, blocknr,
                                                         blocksize, gen);
+                               if (!cur)
+                                       return -EIO;
                        } else if (!uptodate) {
                                btrfs_read_buffer(cur, gen);
                        }
@@ -732,122 +730,6 @@ static inline unsigned int leaf_data_end(struct btrfs_root *root,
        return btrfs_item_offset_nr(leaf, nr - 1);
 }
 
-/*
- * extra debugging checks to make sure all the items in a key are
- * well formed and in the proper order
- */
-static int check_node(struct btrfs_root *root, struct btrfs_path *path,
-                     int level)
-{
-       struct extent_buffer *parent = NULL;
-       struct extent_buffer *node = path->nodes[level];
-       struct btrfs_disk_key parent_key;
-       struct btrfs_disk_key node_key;
-       int parent_slot;
-       int slot;
-       struct btrfs_key cpukey;
-       u32 nritems = btrfs_header_nritems(node);
-
-       if (path->nodes[level + 1])
-               parent = path->nodes[level + 1];
-
-       slot = path->slots[level];
-       BUG_ON(nritems == 0);
-       if (parent) {
-               parent_slot = path->slots[level + 1];
-               btrfs_node_key(parent, &parent_key, parent_slot);
-               btrfs_node_key(node, &node_key, 0);
-               BUG_ON(memcmp(&parent_key, &node_key,
-                             sizeof(struct btrfs_disk_key)));
-               BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-                      btrfs_header_bytenr(node));
-       }
-       BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
-       if (slot != 0) {
-               btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
-               btrfs_node_key(node, &node_key, slot);
-               BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
-       }
-       if (slot < nritems - 1) {
-               btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
-               btrfs_node_key(node, &node_key, slot);
-               BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
-       }
-       return 0;
-}
-
-/*
- * extra checking to make sure all the items in a leaf are
- * well formed and in the proper order
- */
-static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
-                     int level)
-{
-       struct extent_buffer *leaf = path->nodes[level];
-       struct extent_buffer *parent = NULL;
-       int parent_slot;
-       struct btrfs_key cpukey;
-       struct btrfs_disk_key parent_key;
-       struct btrfs_disk_key leaf_key;
-       int slot = path->slots[0];
-
-       u32 nritems = btrfs_header_nritems(leaf);
-
-       if (path->nodes[level + 1])
-               parent = path->nodes[level + 1];
-
-       if (nritems == 0)
-               return 0;
-
-       if (parent) {
-               parent_slot = path->slots[level + 1];
-               btrfs_node_key(parent, &parent_key, parent_slot);
-               btrfs_item_key(leaf, &leaf_key, 0);
-
-               BUG_ON(memcmp(&parent_key, &leaf_key,
-                      sizeof(struct btrfs_disk_key)));
-               BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
-                      btrfs_header_bytenr(leaf));
-       }
-       if (slot != 0 && slot < nritems - 1) {
-               btrfs_item_key(leaf, &leaf_key, slot);
-               btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
-               if (comp_keys(&leaf_key, &cpukey) <= 0) {
-                       btrfs_print_leaf(root, leaf);
-                       printk(KERN_CRIT "slot %d offset bad key\n", slot);
-                       BUG_ON(1);
-               }
-               if (btrfs_item_offset_nr(leaf, slot - 1) !=
-                      btrfs_item_end_nr(leaf, slot)) {
-                       btrfs_print_leaf(root, leaf);
-                       printk(KERN_CRIT "slot %d offset bad\n", slot);
-                       BUG_ON(1);
-               }
-       }
-       if (slot < nritems - 1) {
-               btrfs_item_key(leaf, &leaf_key, slot);
-               btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
-               BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
-               if (btrfs_item_offset_nr(leaf, slot) !=
-                       btrfs_item_end_nr(leaf, slot + 1)) {
-                       btrfs_print_leaf(root, leaf);
-                       printk(KERN_CRIT "slot %d offset bad\n", slot);
-                       BUG_ON(1);
-               }
-       }
-       BUG_ON(btrfs_item_offset_nr(leaf, 0) +
-              btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
-       return 0;
-}
-
-static noinline int check_block(struct btrfs_root *root,
-                               struct btrfs_path *path, int level)
-{
-       return 0;
-       if (level == 0)
-               return check_leaf(root, path, level);
-       return check_node(root, path, level);
-}
 
 /*
  * search for key in the extent_buffer.  The items start at offset p,
@@ -1046,9 +928,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                        goto enospc;
                }
 
-               spin_lock(&root->node_lock);
-               root->node = child;
-               spin_unlock(&root->node_lock);
+               rcu_assign_pointer(root->node, child);
 
                add_root_to_dirty_list(root);
                btrfs_tree_unlock(child);
@@ -1188,7 +1068,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
                }
        }
        /* double check we haven't messed things up */
-       check_block(root, path, level);
        if (orig_ptr !=
            btrfs_node_blockptr(path->nodes[level], path->slots[level]))
                BUG();
@@ -1798,12 +1677,6 @@ cow_done:
                if (!cow)
                        btrfs_unlock_up_safe(p, level + 1);
 
-               ret = check_block(root, p, level);
-               if (ret) {
-                       ret = -1;
-                       goto done;
-               }
-
                ret = bin_search(b, key, level, &slot);
 
                if (level != 0) {
@@ -2130,10 +2003,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 
        btrfs_mark_buffer_dirty(c);
 
-       spin_lock(&root->node_lock);
        old = root->node;
-       root->node = c;
-       spin_unlock(&root->node_lock);
+       rcu_assign_pointer(root->node, c);
 
        /* the super has an extra ref to root->node */
        free_extent_buffer(old);
@@ -3840,7 +3711,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
        unsigned long ptr;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
        ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
        if (!ret) {
                leaf = path->nodes[0];
@@ -4217,6 +4089,7 @@ find_next_key:
                }
                btrfs_set_path_blocking(path);
                cur = read_node_slot(root, cur, slot);
+               BUG_ON(!cur);
 
                btrfs_tree_lock(cur);
 
index 7f78cc7..d47ce83 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/wait.h>
 #include <linux/slab.h>
 #include <linux/kobject.h>
+#include <trace/events/btrfs.h>
 #include <asm/kmap_types.h>
 #include "extent_io.h"
 #include "extent_map.h"
@@ -40,6 +41,7 @@ extern struct kmem_cache *btrfs_trans_handle_cachep;
 extern struct kmem_cache *btrfs_transaction_cachep;
 extern struct kmem_cache *btrfs_bit_radix_cachep;
 extern struct kmem_cache *btrfs_path_cachep;
+extern struct kmem_cache *btrfs_free_space_cachep;
 struct btrfs_ordered_sum;
 
 #define BTRFS_MAGIC "_BHRfS_M"
@@ -782,9 +784,6 @@ struct btrfs_free_cluster {
        /* first extent starting offset */
        u64 window_start;
 
-       /* if this cluster simply points at a bitmap in the block group */
-       bool points_to_bitmap;
-
        struct btrfs_block_group_cache *block_group;
        /*
         * when a cluster is allocated from a block group, we put the
@@ -1283,6 +1282,7 @@ struct btrfs_root {
 #define BTRFS_INODE_NODUMP             (1 << 8)
 #define BTRFS_INODE_NOATIME            (1 << 9)
 #define BTRFS_INODE_DIRSYNC            (1 << 10)
+#define BTRFS_INODE_COMPRESS           (1 << 11)
 
 /* some macros to generate set/get funcs for the struct fields.  This
  * assumes there is a lefoo_to_cpu for every type, so lets make a simple
@@ -2157,6 +2157,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
                      u64 root_objectid, u64 owner, u64 offset);
 
 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
+int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
+                               u64 num_bytes, int reserve, int sinfo);
 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
@@ -2227,10 +2229,12 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
 int btrfs_error_unpin_extent_range(struct btrfs_root *root,
                                   u64 start, u64 end);
 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-                              u64 num_bytes);
+                              u64 num_bytes, u64 *actual_bytes);
 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
                            struct btrfs_root *root, u64 type);
+int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
 
+int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@ -2392,6 +2396,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
                                          struct btrfs_path *path, u64 dir,
                                          const char *name, u16 name_len,
                                          int mod);
+int verify_dir_item(struct btrfs_root *root,
+                   struct extent_buffer *leaf,
+                   struct btrfs_dir_item *dir_item);
 
 /* orphan.c */
 int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
@@ -2528,7 +2535,7 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans,
                              struct inode *inode);
 int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode);
 int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
-void btrfs_orphan_cleanup(struct btrfs_root *root);
+int btrfs_orphan_cleanup(struct btrfs_root *root);
 void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
                                struct btrfs_pending_snapshot *pending,
                                u64 *bytes_to_reserve);
@@ -2536,7 +2543,7 @@ void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
                                struct btrfs_pending_snapshot *pending);
 void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root);
-int btrfs_cont_expand(struct inode *inode, loff_t size);
+int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size);
 int btrfs_invalidate_inodes(struct btrfs_root *root);
 void btrfs_add_delayed_iput(struct inode *inode);
 void btrfs_run_delayed_iputs(struct btrfs_root *root);
index e807b14..bce28f6 100644 (file)
@@ -483,6 +483,8 @@ static noinline int add_delayed_ref_head(struct btrfs_trans_handle *trans,
        INIT_LIST_HEAD(&head_ref->cluster);
        mutex_init(&head_ref->mutex);
 
+       trace_btrfs_delayed_ref_head(ref, head_ref, action);
+
        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 
        if (existing) {
@@ -537,6 +539,8 @@ static noinline int add_delayed_tree_ref(struct btrfs_trans_handle *trans,
        }
        full_ref->level = level;
 
+       trace_btrfs_delayed_tree_ref(ref, full_ref, action);
+
        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 
        if (existing) {
@@ -591,6 +595,8 @@ static noinline int add_delayed_data_ref(struct btrfs_trans_handle *trans,
        full_ref->objectid = owner;
        full_ref->offset = offset;
 
+       trace_btrfs_delayed_data_ref(ref, full_ref, action);
+
        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 
        if (existing) {
index f0cad5a..c62f02f 100644 (file)
@@ -151,7 +151,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
                ret = PTR_ERR(dir_item);
                if (ret == -EEXIST)
                        goto second_insert;
-               goto out;
+               goto out_free;
        }
 
        leaf = path->nodes[0];
@@ -170,7 +170,7 @@ second_insert:
        /* FIXME, use some real flag for selecting the extra index */
        if (root == root->fs_info->tree_root) {
                ret = 0;
-               goto out;
+               goto out_free;
        }
        btrfs_release_path(root, path);
 
@@ -180,7 +180,7 @@ second_insert:
                                        name, name_len);
        if (IS_ERR(dir_item)) {
                ret2 = PTR_ERR(dir_item);
-               goto out;
+               goto out_free;
        }
        leaf = path->nodes[0];
        btrfs_cpu_key_to_disk(&disk_key, location);
@@ -192,7 +192,9 @@ second_insert:
        name_ptr = (unsigned long)(dir_item + 1);
        write_extent_buffer(leaf, name, name_ptr, name_len);
        btrfs_mark_buffer_dirty(leaf);
-out:
+
+out_free:
+
        btrfs_free_path(path);
        if (ret)
                return ret;
@@ -377,6 +379,9 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
 
        leaf = path->nodes[0];
        dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+       if (verify_dir_item(root, leaf, dir_item))
+               return NULL;
+
        total_len = btrfs_item_size_nr(leaf, path->slots[0]);
        while (cur < total_len) {
                this_len = sizeof(*dir_item) +
@@ -429,3 +434,35 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
        }
        return ret;
 }
+
+int verify_dir_item(struct btrfs_root *root,
+                   struct extent_buffer *leaf,
+                   struct btrfs_dir_item *dir_item)
+{
+       u16 namelen = BTRFS_NAME_LEN;
+       u8 type = btrfs_dir_type(leaf, dir_item);
+
+       if (type >= BTRFS_FT_MAX) {
+               printk(KERN_CRIT "btrfs: invalid dir item type: %d\n",
+                      (int)type);
+               return 1;
+       }
+
+       if (type == BTRFS_FT_XATTR)
+               namelen = XATTR_NAME_MAX;
+
+       if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
+               printk(KERN_CRIT "btrfS: invalid dir item name len: %u\n",
+                      (unsigned)btrfs_dir_data_len(leaf, dir_item));
+               return 1;
+       }
+
+       /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
+       if (btrfs_dir_data_len(leaf, dir_item) > BTRFS_MAX_XATTR_SIZE(root)) {
+               printk(KERN_CRIT "btrfs: invalid dir item data len: %u\n",
+                      (unsigned)btrfs_dir_data_len(leaf, dir_item));
+               return 1;
+       }
+
+       return 0;
+}
index 830d261..d7a7315 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/crc32c.h>
 #include <linux/slab.h>
 #include <linux/migrate.h>
+#include <asm/unaligned.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -198,7 +199,7 @@ u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
 
 void btrfs_csum_final(u32 crc, char *result)
 {
-       *(__le32 *)result = ~cpu_to_le32(crc);
+       put_unaligned_le32(~crc, result);
 }
 
 /*
@@ -323,6 +324,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
        int num_copies = 0;
        int mirror_num = 0;
 
+       clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
        io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
        while (1) {
                ret = read_extent_buffer_pages(io_tree, eb, start, 1,
@@ -331,6 +333,14 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                    !verify_parent_transid(io_tree, eb, parent_transid))
                        return ret;
 
+               /*
+                * This buffer's crc is fine, but its contents are corrupted, so
+                * there is no reason to read the other copies, they won't be
+                * any less wrong.
+                */
+               if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
+                       return ret;
+
                num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
                                              eb->start, eb->len);
                if (num_copies == 1)
@@ -419,6 +429,73 @@ static int check_tree_block_fsid(struct btrfs_root *root,
        return ret;
 }
 
+#define CORRUPT(reason, eb, root, slot)                                \
+       printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
+              "root=%llu, slot=%d\n", reason,                  \
+              (unsigned long long)btrfs_header_bytenr(eb),     \
+              (unsigned long long)root->objectid, slot)
+
+static noinline int check_leaf(struct btrfs_root *root,
+                              struct extent_buffer *leaf)
+{
+       struct btrfs_key key;
+       struct btrfs_key leaf_key;
+       u32 nritems = btrfs_header_nritems(leaf);
+       int slot;
+
+       if (nritems == 0)
+               return 0;
+
+       /* Check the 0 item */
+       if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) !=
+           BTRFS_LEAF_DATA_SIZE(root)) {
+               CORRUPT("invalid item offset size pair", leaf, root, 0);
+               return -EIO;
+       }
+
+       /*
+        * Check to make sure each items keys are in the correct order and their
+        * offsets make sense.  We only have to loop through nritems-1 because
+        * we check the current slot against the next slot, which verifies the
+        * next slot's offset+size makes sense and that the current's slot
+        * offset is correct.
+        */
+       for (slot = 0; slot < nritems - 1; slot++) {
+               btrfs_item_key_to_cpu(leaf, &leaf_key, slot);
+               btrfs_item_key_to_cpu(leaf, &key, slot + 1);
+
+               /* Make sure the keys are in the right order */
+               if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) {
+                       CORRUPT("bad key order", leaf, root, slot);
+                       return -EIO;
+               }
+
+               /*
+                * Make sure the offset and ends are right, remember that the
+                * item data starts at the end of the leaf and grows towards the
+                * front.
+                */
+               if (btrfs_item_offset_nr(leaf, slot) !=
+                       btrfs_item_end_nr(leaf, slot + 1)) {
+                       CORRUPT("slot offset bad", leaf, root, slot);
+                       return -EIO;
+               }
+
+               /*
+                * Check to make sure that we don't point outside of the leaf,
+                * just incase all the items are consistent to eachother, but
+                * all point outside of the leaf.
+                */
+               if (btrfs_item_end_nr(leaf, slot) >
+                   BTRFS_LEAF_DATA_SIZE(root)) {
+                       CORRUPT("slot end outside of leaf", leaf, root, slot);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
 {
@@ -485,8 +562,20 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        btrfs_set_buffer_lockdep_class(eb, found_level);
 
        ret = csum_tree_block(root, eb, 1);
-       if (ret)
+       if (ret) {
                ret = -EIO;
+               goto err;
+       }
+
+       /*
+        * If this is a leaf block and it is corrupt, set the corrupt bit so
+        * that we don't try and read the other copies of this block, just
+        * return -EIO.
+        */
+       if (found_level == 0 && check_leaf(root, eb)) {
+               set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
+               ret = -EIO;
+       }
 
        end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
        end = eb->start + end - 1;
@@ -1159,7 +1248,10 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
                     root, fs_info, location->objectid);
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               kfree(root);
+               return ERR_PTR(-ENOMEM);
+       }
        ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
        if (ret == 0) {
                l = path->nodes[0];
@@ -1553,6 +1645,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
                goto fail_bdi;
        }
 
+       fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS;
+
        INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
        INIT_LIST_HEAD(&fs_info->dead_roots);
@@ -1683,6 +1777,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
        btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
 
+       /*
+        * In the long term, we'll store the compression type in the super
+        * block, and it'll be used for per file compression control.
+        */
+       fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
+
        ret = btrfs_parse_options(tree_root, options);
        if (ret) {
                err = ret;
@@ -1888,6 +1988,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->metadata_alloc_profile = (u64)-1;
        fs_info->system_alloc_profile = fs_info->metadata_alloc_profile;
 
+       ret = btrfs_init_space_info(fs_info);
+       if (ret) {
+               printk(KERN_ERR "Failed to initial space info: %d\n", ret);
+               goto fail_block_groups;
+       }
+
        ret = btrfs_read_block_groups(extent_root);
        if (ret) {
                printk(KERN_ERR "Failed to read block groups: %d\n", ret);
@@ -1979,9 +2085,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
        if (!(sb->s_flags & MS_RDONLY)) {
                down_read(&fs_info->cleanup_work_sem);
-               btrfs_orphan_cleanup(fs_info->fs_root);
-               btrfs_orphan_cleanup(fs_info->tree_root);
+               err = btrfs_orphan_cleanup(fs_info->fs_root);
+               if (!err)
+                       err = btrfs_orphan_cleanup(fs_info->tree_root);
                up_read(&fs_info->cleanup_work_sem);
+               if (err) {
+                       close_ctree(tree_root);
+                       return ERR_PTR(err);
+               }
        }
 
        return tree_root;
@@ -2356,8 +2467,12 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
 
                root_objectid = gang[ret - 1]->root_key.objectid + 1;
                for (i = 0; i < ret; i++) {
+                       int err;
+
                        root_objectid = gang[i]->root_key.objectid;
-                       btrfs_orphan_cleanup(gang[i]);
+                       err = btrfs_orphan_cleanup(gang[i]);
+                       if (err)
+                               return err;
                }
                root_objectid++;
        }
@@ -2868,7 +2983,10 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
                        break;
 
                /* opt_discard */
-               ret = btrfs_error_discard_extent(root, start, end + 1 - start);
+               if (btrfs_test_opt(root, DISCARD))
+                       ret = btrfs_error_discard_extent(root, start,
+                                                        end + 1 - start,
+                                                        NULL);
 
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
                btrfs_error_unpin_extent_range(root, start, end);
index 7b3089b..f619c3c 100644 (file)
@@ -36,8 +36,6 @@
 static int update_block_group(struct btrfs_trans_handle *trans,
                              struct btrfs_root *root,
                              u64 bytenr, u64 num_bytes, int alloc);
-static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                u64 num_bytes, int reserve, int sinfo);
 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                u64 bytenr, u64 num_bytes, u64 parent,
@@ -442,7 +440,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
         * allocate blocks for the tree root we can't do the fast caching since
         * we likely hold important locks.
         */
-       if (!trans->transaction->in_commit &&
+       if (trans && (!trans->transaction->in_commit) &&
            (root && root != root->fs_info->tree_root)) {
                spin_lock(&cache->lock);
                if (cache->cached != BTRFS_CACHE_NO) {
@@ -471,7 +469,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
        if (load_cache_only)
                return 0;
 
-       caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
+       caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS);
        BUG_ON(!caching_ctl);
 
        INIT_LIST_HEAD(&caching_ctl->list);
@@ -1740,39 +1738,45 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
        return ret;
 }
 
-static void btrfs_issue_discard(struct block_device *bdev,
+static int btrfs_issue_discard(struct block_device *bdev,
                                u64 start, u64 len)
 {
-       blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
+       return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
 }
 
 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
-                               u64 num_bytes)
+                               u64 num_bytes, u64 *actual_bytes)
 {
        int ret;
-       u64 map_length = num_bytes;
+       u64 discarded_bytes = 0;
        struct btrfs_multi_bio *multi = NULL;
 
-       if (!btrfs_test_opt(root, DISCARD))
-               return 0;
 
        /* Tell the block device(s) that the sectors can be discarded */
-       ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
-                             bytenr, &map_length, &multi, 0);
+       ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD,
+                             bytenr, &num_bytes, &multi, 0);
        if (!ret) {
                struct btrfs_bio_stripe *stripe = multi->stripes;
                int i;
 
-               if (map_length > num_bytes)
-                       map_length = num_bytes;
 
                for (i = 0; i < multi->num_stripes; i++, stripe++) {
-                       btrfs_issue_discard(stripe->dev->bdev,
-                                           stripe->physical,
-                                           map_length);
+                       ret = btrfs_issue_discard(stripe->dev->bdev,
+                                                 stripe->physical,
+                                                 stripe->length);
+                       if (!ret)
+                               discarded_bytes += stripe->length;
+                       else if (ret != -EOPNOTSUPP)
+                               break;
                }
                kfree(multi);
        }
+       if (discarded_bytes && ret == -EOPNOTSUPP)
+               ret = 0;
+
+       if (actual_bytes)
+               *actual_bytes = discarded_bytes;
+
 
        return ret;
 }
@@ -3996,6 +4000,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
        u64 to_reserve;
        int nr_extents;
+       int reserved_extents;
        int ret;
 
        if (btrfs_transaction_in_commit(root->fs_info))
@@ -4003,25 +4008,24 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
 
-       spin_lock(&BTRFS_I(inode)->accounting_lock);
        nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
-       if (nr_extents > BTRFS_I(inode)->reserved_extents) {
-               nr_extents -= BTRFS_I(inode)->reserved_extents;
+       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+
+       if (nr_extents > reserved_extents) {
+               nr_extents -= reserved_extents;
                to_reserve = calc_trans_metadata_size(root, nr_extents);
        } else {
                nr_extents = 0;
                to_reserve = 0;
        }
-       spin_unlock(&BTRFS_I(inode)->accounting_lock);
+
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
        ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
        if (ret)
                return ret;
 
-       spin_lock(&BTRFS_I(inode)->accounting_lock);
-       BTRFS_I(inode)->reserved_extents += nr_extents;
+       atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
        atomic_inc(&BTRFS_I(inode)->outstanding_extents);
-       spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
@@ -4036,20 +4040,30 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 to_free;
        int nr_extents;
+       int reserved_extents;
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
        atomic_dec(&BTRFS_I(inode)->outstanding_extents);
        WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
 
-       spin_lock(&BTRFS_I(inode)->accounting_lock);
-       nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
-       if (nr_extents < BTRFS_I(inode)->reserved_extents) {
-               nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents;
-               BTRFS_I(inode)->reserved_extents -= nr_extents;
-       } else {
-               nr_extents = 0;
-       }
-       spin_unlock(&BTRFS_I(inode)->accounting_lock);
+       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+       do {
+               int old, new;
+
+               nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
+               if (nr_extents >= reserved_extents) {
+                       nr_extents = 0;
+                       break;
+               }
+               old = reserved_extents;
+               nr_extents = reserved_extents - nr_extents;
+               new = reserved_extents - nr_extents;
+               old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
+                                    reserved_extents, new);
+               if (likely(old == reserved_extents))
+                       break;
+               reserved_extents = old;
+       } while (1);
 
        to_free = calc_csum_metadata_size(inode, num_bytes);
        if (nr_extents > 0)
@@ -4223,8 +4237,8 @@ int btrfs_pin_extent(struct btrfs_root *root,
  * update size of reserved extents. this function may return -EAGAIN
  * if 'reserve' is true or 'sinfo' is false.
  */
-static int update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                u64 num_bytes, int reserve, int sinfo)
+int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
+                               u64 num_bytes, int reserve, int sinfo)
 {
        int ret = 0;
        if (sinfo) {
@@ -4363,7 +4377,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                if (ret)
                        break;
 
-               ret = btrfs_discard_extent(root, start, end + 1 - start);
+               if (btrfs_test_opt(root, DISCARD))
+                       ret = btrfs_discard_extent(root, start,
+                                                  end + 1 - start, NULL);
 
                clear_extent_dirty(unpin, start, end, GFP_NOFS);
                unpin_extent_range(root, start, end);
@@ -4704,10 +4720,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
 
                btrfs_add_free_space(cache, buf->start, buf->len);
-               ret = update_reserved_bytes(cache, buf->len, 0, 0);
+               ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0);
                if (ret == -EAGAIN) {
                        /* block group became read-only */
-                       update_reserved_bytes(cache, buf->len, 0, 1);
+                       btrfs_update_reserved_bytes(cache, buf->len, 0, 1);
                        goto out;
                }
 
@@ -4744,6 +4760,11 @@ pin:
                }
        }
 out:
+       /*
+        * Deleting the buffer, clear the corrupt flag since it doesn't matter
+        * anymore.
+        */
+       clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
        btrfs_put_block_group(cache);
 }
 
@@ -5191,7 +5212,7 @@ checks:
                                             search_start - offset);
                BUG_ON(offset > search_start);
 
-               ret = update_reserved_bytes(block_group, num_bytes, 1,
+               ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1,
                                            (data & BTRFS_BLOCK_GROUP_DATA));
                if (ret == -EAGAIN) {
                        btrfs_add_free_space(block_group, offset, num_bytes);
@@ -5397,6 +5418,8 @@ again:
                dump_space_info(sinfo, num_bytes, 1);
        }
 
+       trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset);
+
        return ret;
 }
 
@@ -5412,12 +5435,15 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
                return -ENOSPC;
        }
 
-       ret = btrfs_discard_extent(root, start, len);
+       if (btrfs_test_opt(root, DISCARD))
+               ret = btrfs_discard_extent(root, start, len, NULL);
 
        btrfs_add_free_space(cache, start, len);
-       update_reserved_bytes(cache, len, 0, 1);
+       btrfs_update_reserved_bytes(cache, len, 0, 1);
        btrfs_put_block_group(cache);
 
+       trace_btrfs_reserved_extent_free(root, start, len);
+
        return ret;
 }
 
@@ -5444,7 +5470,8 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type);
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
 
        path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5614,7 +5641,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
                put_caching_control(caching_ctl);
        }
 
-       ret = update_reserved_bytes(block_group, ins->offset, 1, 1);
+       ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1);
        BUG_ON(ret);
        btrfs_put_block_group(block_group);
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
@@ -6047,6 +6074,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
                if (reada && level == 1)
                        reada_walk_down(trans, root, wc, path);
                next = read_tree_block(root, bytenr, blocksize, generation);
+               if (!next)
+                       return -EIO;
                btrfs_tree_lock(next);
                btrfs_set_lock_blocking(next);
        }
@@ -6438,10 +6467,14 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
        BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
 
        wc = kzalloc(sizeof(*wc), GFP_NOFS);
-       BUG_ON(!wc);
+       if (!wc) {
+               btrfs_free_path(path);
+               return -ENOMEM;
+       }
 
        btrfs_assert_tree_locked(parent);
        parent_level = btrfs_header_level(parent);
@@ -6899,7 +6932,11 @@ static noinline int get_new_locations(struct inode *reloc_inode,
        }
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               if (exts != *extents)
+                       kfree(exts);
+               return -ENOMEM;
+       }
 
        cur_pos = extent_key->objectid - offset;
        last_byte = extent_key->objectid + extent_key->offset;
@@ -6941,6 +6978,10 @@ static noinline int get_new_locations(struct inode *reloc_inode,
                        struct disk_extent *old = exts;
                        max *= 2;
                        exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
+                       if (!exts) {
+                               ret = -ENOMEM;
+                               goto out;
+                       }
                        memcpy(exts, old, sizeof(*exts) * nr);
                        if (old != *extents)
                                kfree(old);
@@ -7423,7 +7464,8 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
        int ret;
 
        new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
-       BUG_ON(!new_extent);
+       if (!new_extent)
+               return -ENOMEM;
 
        ref = btrfs_lookup_leaf_ref(root, leaf->start);
        BUG_ON(!ref);
@@ -7609,7 +7651,8 @@ int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
 
        reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
        BUG_ON(!reloc_root);
-       btrfs_orphan_cleanup(reloc_root);
+       ret = btrfs_orphan_cleanup(reloc_root);
+       BUG_ON(ret);
        return 0;
 }
 
@@ -7627,7 +7670,8 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
                return 0;
 
        root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
-       BUG_ON(!root_item);
+       if (!root_item)
+               return -ENOMEM;
 
        ret = btrfs_copy_root(trans, root, root->commit_root,
                              &eb, BTRFS_TREE_RELOC_OBJECTID);
@@ -7653,7 +7697,7 @@ static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
 
        reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
                                                 &root_key);
-       BUG_ON(!reloc_root);
+       BUG_ON(IS_ERR(reloc_root));
        reloc_root->last_trans = trans->transid;
        reloc_root->commit_root = NULL;
        reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
@@ -7906,6 +7950,10 @@ static noinline int relocate_one_extent(struct btrfs_root *extent_root,
 
                        eb = read_tree_block(found_root, block_start,
                                             block_size, 0);
+                       if (!eb) {
+                               ret = -EIO;
+                               goto out;
+                       }
                        btrfs_tree_lock(eb);
                        BUG_ON(level != btrfs_header_level(eb));
 
@@ -8621,6 +8669,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        BUG_ON(!block_group);
        BUG_ON(!block_group->ro);
 
+       /*
+        * Free the reserved super bytes from this block group before
+        * remove it.
+        */
+       free_excluded_extents(root, block_group);
+
        memcpy(&key, &block_group->key, sizeof(key));
        if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
                                  BTRFS_BLOCK_GROUP_RAID1 |
@@ -8724,13 +8778,84 @@ out:
        return ret;
 }
 
+int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_space_info *space_info;
+       int ret;
+
+       ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM, 0, 0,
+                                                                &space_info);
+       if (ret)
+               return ret;
+
+       ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA, 0, 0,
+                                                                &space_info);
+       if (ret)
+               return ret;
+
+       ret = update_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA, 0, 0,
+                                                                &space_info);
+       if (ret)
+               return ret;
+
+       return ret;
+}
+
 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 {
        return unpin_extent_range(root, start, end);
 }
 
 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-                              u64 num_bytes)
+                              u64 num_bytes, u64 *actual_bytes)
+{
+       return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
+}
+
+int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
 {
-       return btrfs_discard_extent(root, bytenr, num_bytes);
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_block_group_cache *cache = NULL;
+       u64 group_trimmed;
+       u64 start;
+       u64 end;
+       u64 trimmed = 0;
+       int ret = 0;
+
+       cache = btrfs_lookup_block_group(fs_info, range->start);
+
+       while (cache) {
+               if (cache->key.objectid >= (range->start + range->len)) {
+                       btrfs_put_block_group(cache);
+                       break;
+               }
+
+               start = max(range->start, cache->key.objectid);
+               end = min(range->start + range->len,
+                               cache->key.objectid + cache->key.offset);
+
+               if (end - start >= range->minlen) {
+                       if (!block_group_cache_done(cache)) {
+                               ret = cache_block_group(cache, NULL, root, 0);
+                               if (!ret)
+                                       wait_block_group_cache_done(cache);
+                       }
+                       ret = btrfs_trim_block_group(cache,
+                                                    &group_trimmed,
+                                                    start,
+                                                    end,
+                                                    range->minlen);
+
+                       trimmed += group_trimmed;
+                       if (ret) {
+                               btrfs_put_block_group(cache);
+                               break;
+                       }
+               }
+
+               cache = next_block_group(fs_info->tree_root, cache);
+       }
+
+       range->len = trimmed;
+       return ret;
 }
index b5b9282..20ddb28 100644 (file)
@@ -2192,6 +2192,8 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        else
                write_flags = WRITE;
 
+       trace___extent_writepage(page, inode, wbc);
+
        WARN_ON(!PageLocked(page));
        pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
        if (page->index > end_index ||
@@ -3690,6 +3692,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
                       "wanted %lu %lu\n", (unsigned long long)eb->start,
                       eb->len, start, min_len);
                WARN_ON(1);
+               return -EINVAL;
        }
 
        p = extent_buffer_page(eb, i);
index 9318dfe..f62c544 100644 (file)
@@ -31,6 +31,7 @@
 #define EXTENT_BUFFER_UPTODATE 0
 #define EXTENT_BUFFER_BLOCKING 1
 #define EXTENT_BUFFER_DIRTY 2
+#define EXTENT_BUFFER_CORRUPT 3
 
 /* these are flags for extent_clear_unlock_delalloc */
 #define EXTENT_CLEAR_UNLOCK_PAGE 0x1
index 4f19a3e..a6a9d4e 100644 (file)
@@ -48,7 +48,8 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
        file_key.objectid = objectid;
        file_key.offset = pos;
        btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY);
@@ -169,6 +170,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 
        path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
        if (bio->bi_size > PAGE_CACHE_SIZE * 8)
                path->reada = 2;
 
index f447b78..656bc0a 100644 (file)
  * and be replaced with calls into generic code.
  */
 static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
-                                        int write_bytes,
+                                        size_t write_bytes,
                                         struct page **prepared_pages,
                                         struct iov_iter *i)
 {
        size_t copied = 0;
+       size_t total_copied = 0;
        int pg = 0;
        int offset = pos & (PAGE_CACHE_SIZE - 1);
-       int total_copied = 0;
 
        while (write_bytes > 0) {
                size_t count = min_t(size_t,
@@ -88,9 +88,8 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
                total_copied += copied;
 
                /* Return to btrfs_file_aio_write to fault page */
-               if (unlikely(copied == 0)) {
+               if (unlikely(copied == 0))
                        break;
-               }
 
                if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
                        offset += copied;
@@ -109,8 +108,6 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
 {
        size_t i;
        for (i = 0; i < num_pages; i++) {
-               if (!pages[i])
-                       break;
                /* page checked is some magic around finding pages that
                 * have been modified without going through btrfs_set_page_dirty
                 * clear it here
@@ -130,13 +127,12 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
  * this also makes the decision about creating an inline extent vs
  * doing real data extents, marking pages dirty and delalloc as required.
  */
-static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
-                                  struct btrfs_root *root,
-                                  struct file *file,
-                                  struct page **pages,
-                                  size_t num_pages,
-                                  loff_t pos,
-                                  size_t write_bytes)
+static noinline int dirty_and_release_pages(struct btrfs_root *root,
+                                           struct file *file,
+                                           struct page **pages,
+                                           size_t num_pages,
+                                           loff_t pos,
+                                           size_t write_bytes)
 {
        int err = 0;
        int i;
@@ -154,7 +150,8 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
        end_of_last_block = start_pos + num_bytes - 1;
        err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
                                        NULL);
-       BUG_ON(err);
+       if (err)
+               return err;
 
        for (i = 0; i < num_pages; i++) {
                struct page *p = pages[i];
@@ -162,13 +159,14 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
                ClearPageChecked(p);
                set_page_dirty(p);
        }
-       if (end_pos > isize) {
+
+       /*
+        * we've only changed i_size in ram, and we haven't updated
+        * the disk i_size.  There is no need to log the inode
+        * at this time.
+        */
+       if (end_pos > isize)
                i_size_write(inode, end_pos);
-               /* we've only changed i_size in ram, and we haven't updated
-                * the disk i_size.  There is no need to log the inode
-                * at this time.
-                */
-       }
        return 0;
 }
 
@@ -610,6 +608,8 @@ again:
        key.offset = split;
 
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+       if (ret < 0)
+               goto out;
        if (ret > 0 && path->slots[0] > 0)
                path->slots[0]--;
 
@@ -819,12 +819,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
        last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
 
        if (start_pos > inode->i_size) {
-               err = btrfs_cont_expand(inode, start_pos);
+               err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
                if (err)
                        return err;
        }
 
-       memset(pages, 0, num_pages * sizeof(struct page *));
 again:
        for (i = 0; i < num_pages; i++) {
                pages[i] = grab_cache_page(inode->i_mapping, index + i);
@@ -896,156 +895,71 @@ fail:
 
 }
 
-static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
-                                   const struct iovec *iov,
-                                   unsigned long nr_segs, loff_t pos)
+static noinline ssize_t __btrfs_buffered_write(struct file *file,
+                                              struct iov_iter *i,
+                                              loff_t pos)
 {
-       struct file *file = iocb->ki_filp;
        struct inode *inode = fdentry(file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct page **pages = NULL;
-       struct iov_iter i;
-       loff_t *ppos = &iocb->ki_pos;
-       loff_t start_pos;
-       ssize_t num_written = 0;
-       ssize_t err = 0;
-       size_t count;
-       size_t ocount;
-       int ret = 0;
-       int nrptrs;
        unsigned long first_index;
        unsigned long last_index;
-       int will_write;
-       int buffered = 0;
-       int copied = 0;
-       int dirty_pages = 0;
-
-       will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
-                     (file->f_flags & O_DIRECT));
-
-       start_pos = pos;
-
-       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
-       mutex_lock(&inode->i_mutex);
-
-       err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
-       if (err)
-               goto out;
-       count = ocount;
-
-       current->backing_dev_info = inode->i_mapping->backing_dev_info;
-       err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
-       if (err)
-               goto out;
-
-       if (count == 0)
-               goto out;
-
-       err = file_remove_suid(file);
-       if (err)
-               goto out;
-
-       /*
-        * If BTRFS flips readonly due to some impossible error
-        * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
-        * although we have opened a file as writable, we have
-        * to stop this write operation to ensure FS consistency.
-        */
-       if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
-               err = -EROFS;
-               goto out;
-       }
-
-       file_update_time(file);
-       BTRFS_I(inode)->sequence++;
-
-       if (unlikely(file->f_flags & O_DIRECT)) {
-               num_written = generic_file_direct_write(iocb, iov, &nr_segs,
-                                                       pos, ppos, count,
-                                                       ocount);
-               /*
-                * the generic O_DIRECT will update in-memory i_size after the
-                * DIOs are done.  But our endio handlers that update the on
-                * disk i_size never update past the in memory i_size.  So we
-                * need one more update here to catch any additions to the
-                * file
-                */
-               if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
-                       btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
-                       mark_inode_dirty(inode);
-               }
-
-               if (num_written < 0) {
-                       ret = num_written;
-                       num_written = 0;
-                       goto out;
-               } else if (num_written == count) {
-                       /* pick up pos changes done by the generic code */
-                       pos = *ppos;
-                       goto out;
-               }
-               /*
-                * We are going to do buffered for the rest of the range, so we
-                * need to make sure to invalidate the buffered pages when we're
-                * done.
-                */
-               buffered = 1;
-               pos += num_written;
-       }
+       size_t num_written = 0;
+       int nrptrs;
+       int ret;
 
-       iov_iter_init(&i, iov, nr_segs, count, num_written);
-       nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
+       nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
                     PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
                     (sizeof(struct page *)));
        pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
-       if (!pages) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       /* generic_write_checks can change our pos */
-       start_pos = pos;
+       if (!pages)
+               return -ENOMEM;
 
        first_index = pos >> PAGE_CACHE_SHIFT;
-       last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
+       last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
 
-       while (iov_iter_count(&i) > 0) {
+       while (iov_iter_count(i) > 0) {
                size_t offset = pos & (PAGE_CACHE_SIZE - 1);
-               size_t write_bytes = min(iov_iter_count(&i),
+               size_t write_bytes = min(iov_iter_count(i),
                                         nrptrs * (size_t)PAGE_CACHE_SIZE -
                                         offset);
                size_t num_pages = (write_bytes + offset +
                                    PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+               size_t dirty_pages;
+               size_t copied;
 
                WARN_ON(num_pages > nrptrs);
-               memset(pages, 0, sizeof(struct page *) * nrptrs);
 
                /*
                 * Fault pages before locking them in prepare_pages
                 * to avoid recursive lock
                 */
-               if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
+               if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
                        ret = -EFAULT;
-                       goto out;
+                       break;
                }
 
                ret = btrfs_delalloc_reserve_space(inode,
                                        num_pages << PAGE_CACHE_SHIFT);
                if (ret)
-                       goto out;
+                       break;
 
+               /*
+                * This is going to setup the pages array with the number of
+                * pages we want, so we don't really need to worry about the
+                * contents of pages from loop to loop
+                */
                ret = prepare_pages(root, file, pages, num_pages,
                                    pos, first_index, last_index,
                                    write_bytes);
                if (ret) {
                        btrfs_delalloc_release_space(inode,
                                        num_pages << PAGE_CACHE_SHIFT);
-                       goto out;
+                       break;
                }
 
                copied = btrfs_copy_from_user(pos, num_pages,
-                                          write_bytes, pages, &i);
+                                          write_bytes, pages, i);
 
                /*
                 * if we have trouble faulting in the pages, fall
@@ -1061,6 +975,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                                       PAGE_CACHE_SIZE - 1) >>
                                       PAGE_CACHE_SHIFT;
 
+               /*
+                * If we had a short copy we need to release the excess delaloc
+                * bytes we reserved.  We need to increment outstanding_extents
+                * because btrfs_delalloc_release_space will decrement it, but
+                * we still have an outstanding extent for the chunk we actually
+                * managed to copy.
+                */
                if (num_pages > dirty_pages) {
                        if (copied > 0)
                                atomic_inc(
@@ -1071,39 +992,157 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
                }
 
                if (copied > 0) {
-                       dirty_and_release_pages(NULL, root, file, pages,
-                                               dirty_pages, pos, copied);
+                       ret = dirty_and_release_pages(root, file, pages,
+                                                     dirty_pages, pos,
+                                                     copied);
+                       if (ret) {
+                               btrfs_delalloc_release_space(inode,
+                                       dirty_pages << PAGE_CACHE_SHIFT);
+                               btrfs_drop_pages(pages, num_pages);
+                               break;
+                       }
                }
 
                btrfs_drop_pages(pages, num_pages);
 
-               if (copied > 0) {
-                       if (will_write) {
-                               filemap_fdatawrite_range(inode->i_mapping, pos,
-                                                        pos + copied - 1);
-                       } else {
-                               balance_dirty_pages_ratelimited_nr(
-                                                       inode->i_mapping,
-                                                       dirty_pages);
-                               if (dirty_pages <
-                               (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
-                                       btrfs_btree_balance_dirty(root, 1);
-                               btrfs_throttle(root);
-                       }
-               }
+               cond_resched();
+
+               balance_dirty_pages_ratelimited_nr(inode->i_mapping,
+                                                  dirty_pages);
+               if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+                       btrfs_btree_balance_dirty(root, 1);
+               btrfs_throttle(root);
 
                pos += copied;
                num_written += copied;
+       }
 
-               cond_resched();
+       kfree(pages);
+
+       return num_written ? num_written : ret;
+}
+
+static ssize_t __btrfs_direct_write(struct kiocb *iocb,
+                                   const struct iovec *iov,
+                                   unsigned long nr_segs, loff_t pos,
+                                   loff_t *ppos, size_t count, size_t ocount)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = fdentry(file)->d_inode;
+       struct iov_iter i;
+       ssize_t written;
+       ssize_t written_buffered;
+       loff_t endbyte;
+       int err;
+
+       written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
+                                           count, ocount);
+
+       /*
+        * the generic O_DIRECT will update in-memory i_size after the
+        * DIOs are done.  But our endio handlers that update the on
+        * disk i_size never update past the in memory i_size.  So we
+        * need one more update here to catch any additions to the
+        * file
+        */
+       if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
+               btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+               mark_inode_dirty(inode);
        }
+
+       if (written < 0 || written == count)
+               return written;
+
+       pos += written;
+       count -= written;
+       iov_iter_init(&i, iov, nr_segs, count, written);
+       written_buffered = __btrfs_buffered_write(file, &i, pos);
+       if (written_buffered < 0) {
+               err = written_buffered;
+               goto out;
+       }
+       endbyte = pos + written_buffered - 1;
+       err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
+       if (err)
+               goto out;
+       written += written_buffered;
+       *ppos = pos + written_buffered;
+       invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
+                                endbyte >> PAGE_CACHE_SHIFT);
 out:
-       mutex_unlock(&inode->i_mutex);
-       if (ret)
-               err = ret;
+       return written ? written : err;
+}
 
-       kfree(pages);
-       *ppos = pos;
+static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
+                                   const struct iovec *iov,
+                                   unsigned long nr_segs, loff_t pos)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = fdentry(file)->d_inode;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       loff_t *ppos = &iocb->ki_pos;
+       ssize_t num_written = 0;
+       ssize_t err = 0;
+       size_t count, ocount;
+
+       vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
+       mutex_lock(&inode->i_mutex);
+
+       err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
+       if (err) {
+               mutex_unlock(&inode->i_mutex);
+               goto out;
+       }
+       count = ocount;
+
+       current->backing_dev_info = inode->i_mapping->backing_dev_info;
+       err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+       if (err) {
+               mutex_unlock(&inode->i_mutex);
+               goto out;
+       }
+
+       if (count == 0) {
+               mutex_unlock(&inode->i_mutex);
+               goto out;
+       }
+
+       err = file_remove_suid(file);
+       if (err) {
+               mutex_unlock(&inode->i_mutex);
+               goto out;
+       }
+
+       /*
+        * If BTRFS flips readonly due to some impossible error
+        * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
+        * although we have opened a file as writable, we have
+        * to stop this write operation to ensure FS consistency.
+        */
+       if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
+               mutex_unlock(&inode->i_mutex);
+               err = -EROFS;
+               goto out;
+       }
+
+       file_update_time(file);
+       BTRFS_I(inode)->sequence++;
+
+       if (unlikely(file->f_flags & O_DIRECT)) {
+               num_written = __btrfs_direct_write(iocb, iov, nr_segs,
+                                                  pos, ppos, count, ocount);
+       } else {
+               struct iov_iter i;
+
+               iov_iter_init(&i, iov, nr_segs, count, num_written);
+
+               num_written = __btrfs_buffered_write(file, &i, pos);
+               if (num_written > 0)
+                       *ppos = pos + num_written;
+       }
+
+       mutex_unlock(&inode->i_mutex);
 
        /*
         * we want to make sure fsync finds this change
@@ -1118,43 +1157,12 @@ out:
         * one running right now.
         */
        BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
-
-       if (num_written > 0 && will_write) {
-               struct btrfs_trans_handle *trans;
-
-               err = btrfs_wait_ordered_range(inode, start_pos, num_written);
-               if (err)
+       if (num_written > 0 || num_written == -EIOCBQUEUED) {
+               err = generic_write_sync(file, pos, num_written);
+               if (err < 0 && num_written > 0)
                        num_written = err;
-
-               if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
-                       trans = btrfs_start_transaction(root, 0);
-                       if (IS_ERR(trans)) {
-                               num_written = PTR_ERR(trans);
-                               goto done;
-                       }
-                       mutex_lock(&inode->i_mutex);
-                       ret = btrfs_log_dentry_safe(trans, root,
-                                                   file->f_dentry);
-                       mutex_unlock(&inode->i_mutex);
-                       if (ret == 0) {
-                               ret = btrfs_sync_log(trans, root);
-                               if (ret == 0)
-                                       btrfs_end_transaction(trans, root);
-                               else
-                                       btrfs_commit_transaction(trans, root);
-                       } else if (ret != BTRFS_NO_LOG_SYNC) {
-                               btrfs_commit_transaction(trans, root);
-                       } else {
-                               btrfs_end_transaction(trans, root);
-                       }
-               }
-               if (file->f_flags & O_DIRECT && buffered) {
-                       invalidate_mapping_pages(inode->i_mapping,
-                             start_pos >> PAGE_CACHE_SHIFT,
-                            (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
-               }
        }
-done:
+out:
        current->backing_dev_info = NULL;
        return num_written ? num_written : err;
 }
@@ -1197,6 +1205,7 @@ int btrfs_sync_file(struct file *file, int datasync)
        int ret = 0;
        struct btrfs_trans_handle *trans;
 
+       trace_btrfs_sync_file(file, datasync);
 
        /* we wait first, since the writeback may change the inode */
        root->log_batch++;
@@ -1324,7 +1333,8 @@ static long btrfs_fallocate(struct file *file, int mode,
                goto out;
 
        if (alloc_start > inode->i_size) {
-               ret = btrfs_cont_expand(inode, alloc_start);
+               ret = btrfs_cont_expand(inode, i_size_read(inode),
+                                       alloc_start);
                if (ret)
                        goto out;
        }
index a039065..0037427 100644 (file)
@@ -393,7 +393,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
                                break;
 
                        need_loop = 1;
-                       e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+                       e = kmem_cache_zalloc(btrfs_free_space_cachep,
+                                             GFP_NOFS);
                        if (!e) {
                                kunmap(page);
                                unlock_page(page);
@@ -405,7 +406,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
                        e->bytes = le64_to_cpu(entry->bytes);
                        if (!e->bytes) {
                                kunmap(page);
-                               kfree(e);
+                               kmem_cache_free(btrfs_free_space_cachep, e);
                                unlock_page(page);
                                page_cache_release(page);
                                goto free_cache;
@@ -420,7 +421,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
                                e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
                                if (!e->bitmap) {
                                        kunmap(page);
-                                       kfree(e);
+                                       kmem_cache_free(
+                                               btrfs_free_space_cachep, e);
                                        unlock_page(page);
                                        page_cache_release(page);
                                        goto free_cache;
@@ -1187,7 +1189,7 @@ static void free_bitmap(struct btrfs_block_group_cache *block_group,
 {
        unlink_free_space(block_group, bitmap_info);
        kfree(bitmap_info->bitmap);
-       kfree(bitmap_info);
+       kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
        block_group->total_bitmaps--;
        recalculate_thresholds(block_group);
 }
@@ -1285,9 +1287,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
         * If we are below the extents threshold then we can add this as an
         * extent, and don't have to deal with the bitmap
         */
-       if (block_group->free_extents < block_group->extents_thresh &&
-           info->bytes > block_group->sectorsize * 4)
-               return 0;
+       if (block_group->free_extents < block_group->extents_thresh) {
+               /*
+                * If this block group has some small extents we don't want to
+                * use up all of our free slots in the cache with them, we want
+                * to reserve them to larger extents, however if we have plent
+                * of cache left then go ahead an dadd them, no sense in adding
+                * the overhead of a bitmap if we don't have to.
+                */
+               if (info->bytes <= block_group->sectorsize * 4) {
+                       if (block_group->free_extents * 2 <=
+                           block_group->extents_thresh)
+                               return 0;
+               } else {
+                       return 0;
+               }
+       }
 
        /*
         * some block groups are so tiny they can't be enveloped by a bitmap, so
@@ -1342,8 +1357,8 @@ new_bitmap:
 
                /* no pre-allocated info, allocate a new one */
                if (!info) {
-                       info = kzalloc(sizeof(struct btrfs_free_space),
-                                      GFP_NOFS);
+                       info = kmem_cache_zalloc(btrfs_free_space_cachep,
+                                                GFP_NOFS);
                        if (!info) {
                                spin_lock(&block_group->tree_lock);
                                ret = -ENOMEM;
@@ -1365,7 +1380,7 @@ out:
        if (info) {
                if (info->bitmap)
                        kfree(info->bitmap);
-               kfree(info);
+               kmem_cache_free(btrfs_free_space_cachep, info);
        }
 
        return ret;
@@ -1398,7 +1413,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
                else
                        __unlink_free_space(block_group, right_info);
                info->bytes += right_info->bytes;
-               kfree(right_info);
+               kmem_cache_free(btrfs_free_space_cachep, right_info);
                merged = true;
        }
 
@@ -1410,7 +1425,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
                        __unlink_free_space(block_group, left_info);
                info->offset = left_info->offset;
                info->bytes += left_info->bytes;
-               kfree(left_info);
+               kmem_cache_free(btrfs_free_space_cachep, left_info);
                merged = true;
        }
 
@@ -1423,7 +1438,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
        struct btrfs_free_space *info;
        int ret = 0;
 
-       info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+       info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
        if (!info)
                return -ENOMEM;
 
@@ -1450,7 +1465,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 link:
        ret = link_free_space(block_group, info);
        if (ret)
-               kfree(info);
+               kmem_cache_free(btrfs_free_space_cachep, info);
 out:
        spin_unlock(&block_group->tree_lock);
 
@@ -1520,7 +1535,7 @@ again:
                        kfree(info->bitmap);
                        block_group->total_bitmaps--;
                }
-               kfree(info);
+               kmem_cache_free(btrfs_free_space_cachep, info);
                goto out_lock;
        }
 
@@ -1556,7 +1571,7 @@ again:
                        /* the hole we're creating ends at the end
                         * of the info struct, just free the info
                         */
-                       kfree(info);
+                       kmem_cache_free(btrfs_free_space_cachep, info);
                }
                spin_unlock(&block_group->tree_lock);
 
@@ -1629,30 +1644,28 @@ __btrfs_return_cluster_to_free_space(
 {
        struct btrfs_free_space *entry;
        struct rb_node *node;
-       bool bitmap;
 
        spin_lock(&cluster->lock);
        if (cluster->block_group != block_group)
                goto out;
 
-       bitmap = cluster->points_to_bitmap;
        cluster->block_group = NULL;
        cluster->window_start = 0;
        list_del_init(&cluster->block_group_list);
-       cluster->points_to_bitmap = false;
-
-       if (bitmap)
-               goto out;
 
        node = rb_first(&cluster->root);
        while (node) {
+               bool bitmap;
+
                entry = rb_entry(node, struct btrfs_free_space, offset_index);
                node = rb_next(&entry->offset_index);
                rb_erase(&entry->offset_index, &cluster->root);
-               BUG_ON(entry->bitmap);
-               try_merge_free_space(block_group, entry, false);
+
+               bitmap = (entry->bitmap != NULL);
+               if (!bitmap)
+                       try_merge_free_space(block_group, entry, false);
                tree_insert_offset(&block_group->free_space_offset,
-                                  entry->offset, &entry->offset_index, 0);
+                                  entry->offset, &entry->offset_index, bitmap);
        }
        cluster->root = RB_ROOT;
 
@@ -1689,7 +1702,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
                unlink_free_space(block_group, info);
                if (info->bitmap)
                        kfree(info->bitmap);
-               kfree(info);
+               kmem_cache_free(btrfs_free_space_cachep, info);
                if (need_resched()) {
                        spin_unlock(&block_group->tree_lock);
                        cond_resched();
@@ -1722,7 +1735,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
                entry->offset += bytes;
                entry->bytes -= bytes;
                if (!entry->bytes)
-                       kfree(entry);
+                       kmem_cache_free(btrfs_free_space_cachep, entry);
                else
                        link_free_space(block_group, entry);
        }
@@ -1775,50 +1788,24 @@ int btrfs_return_cluster_to_free_space(
 
 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
                                   struct btrfs_free_cluster *cluster,
+                                  struct btrfs_free_space *entry,
                                   u64 bytes, u64 min_start)
 {
-       struct btrfs_free_space *entry;
        int err;
        u64 search_start = cluster->window_start;
        u64 search_bytes = bytes;
        u64 ret = 0;
 
-       spin_lock(&block_group->tree_lock);
-       spin_lock(&cluster->lock);
-
-       if (!cluster->points_to_bitmap)
-               goto out;
-
-       if (cluster->block_group != block_group)
-               goto out;
-
-       /*
-        * search_start is the beginning of the bitmap, but at some point it may
-        * be a good idea to point to the actual start of the free area in the
-        * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
-        * to 1 to make sure we get the bitmap entry
-        */
-       entry = tree_search_offset(block_group,
-                                  offset_to_bitmap(block_group, search_start),
-                                  1, 0);
-       if (!entry || !entry->bitmap)
-               goto out;
-
        search_start = min_start;
        search_bytes = bytes;
 
        err = search_bitmap(block_group, entry, &search_start,
                            &search_bytes);
        if (err)
-               goto out;
+               return 0;
 
        ret = search_start;
        bitmap_clear_bits(block_group, entry, ret, bytes);
-       if (entry->bytes == 0)
-               free_bitmap(block_group, entry);
-out:
-       spin_unlock(&cluster->lock);
-       spin_unlock(&block_group->tree_lock);
 
        return ret;
 }
@@ -1836,10 +1823,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
        struct rb_node *node;
        u64 ret = 0;
 
-       if (cluster->points_to_bitmap)
-               return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
-                                              min_start);
-
        spin_lock(&cluster->lock);
        if (bytes > cluster->max_size)
                goto out;
@@ -1852,9 +1835,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
                goto out;
 
        entry = rb_entry(node, struct btrfs_free_space, offset_index);
-
        while(1) {
-               if (entry->bytes < bytes || entry->offset < min_start) {
+               if (entry->bytes < bytes ||
+                   (!entry->bitmap && entry->offset < min_start)) {
                        struct rb_node *node;
 
                        node = rb_next(&entry->offset_index);
@@ -1864,10 +1847,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
                                         offset_index);
                        continue;
                }
-               ret = entry->offset;
 
-               entry->offset += bytes;
-               entry->bytes -= bytes;
+               if (entry->bitmap) {
+                       ret = btrfs_alloc_from_bitmap(block_group,
+                                                     cluster, entry, bytes,
+                                                     min_start);
+                       if (ret == 0) {
+                               struct rb_node *node;
+                               node = rb_next(&entry->offset_index);
+                               if (!node)
+                                       break;
+                               entry = rb_entry(node, struct btrfs_free_space,
+                                                offset_index);
+                               continue;
+                       }
+               } else {
+
+                       ret = entry->offset;
+
+                       entry->offset += bytes;
+                       entry->bytes -= bytes;
+               }
 
                if (entry->bytes == 0)
                        rb_erase(&entry->offset_index, &cluster->root);
@@ -1884,7 +1884,12 @@ out:
        block_group->free_space -= bytes;
        if (entry->bytes == 0) {
                block_group->free_extents--;
-               kfree(entry);
+               if (entry->bitmap) {
+                       kfree(entry->bitmap);
+                       block_group->total_bitmaps--;
+                       recalculate_thresholds(block_group);
+               }
+               kmem_cache_free(btrfs_free_space_cachep, entry);
        }
 
        spin_unlock(&block_group->tree_lock);
@@ -1904,12 +1909,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
        unsigned long found_bits;
        unsigned long start = 0;
        unsigned long total_found = 0;
+       int ret;
        bool found = false;
 
        i = offset_to_bit(entry->offset, block_group->sectorsize,
                          max_t(u64, offset, entry->offset));
-       search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
-       total_bits = bytes_to_bits(bytes, block_group->sectorsize);
+       search_bits = bytes_to_bits(bytes, block_group->sectorsize);
+       total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
 
 again:
        found_bits = 0;
@@ -1926,7 +1932,7 @@ again:
        }
 
        if (!found_bits)
-               return -1;
+               return -ENOSPC;
 
        if (!found) {
                start = i;
@@ -1950,189 +1956,208 @@ again:
 
        cluster->window_start = start * block_group->sectorsize +
                entry->offset;
-       cluster->points_to_bitmap = true;
+       rb_erase(&entry->offset_index, &block_group->free_space_offset);
+       ret = tree_insert_offset(&cluster->root, entry->offset,
+                                &entry->offset_index, 1);
+       BUG_ON(ret);
 
        return 0;
 }
 
 /*
- * here we try to find a cluster of blocks in a block group.  The goal
- * is to find at least bytes free and up to empty_size + bytes free.
- * We might not find them all in one contiguous area.
- *
- * returns zero and sets up cluster if things worked out, otherwise
- * it returns -enospc
+ * This searches the block group for just extents to fill the cluster with.
  */
-int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *root,
-                            struct btrfs_block_group_cache *block_group,
-                            struct btrfs_free_cluster *cluster,
-                            u64 offset, u64 bytes, u64 empty_size)
+static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
+                                  struct btrfs_free_cluster *cluster,
+                                  u64 offset, u64 bytes, u64 min_bytes)
 {
+       struct btrfs_free_space *first = NULL;
        struct btrfs_free_space *entry = NULL;
+       struct btrfs_free_space *prev = NULL;
+       struct btrfs_free_space *last;
        struct rb_node *node;
-       struct btrfs_free_space *next;
-       struct btrfs_free_space *last = NULL;
-       u64 min_bytes;
        u64 window_start;
        u64 window_free;
-       u64 max_extent = 0;
-       bool found_bitmap = false;
-       int ret;
+       u64 max_extent;
+       u64 max_gap = 128 * 1024;
 
-       /* for metadata, allow allocates with more holes */
-       if (btrfs_test_opt(root, SSD_SPREAD)) {
-               min_bytes = bytes + empty_size;
-       } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
-               /*
-                * we want to do larger allocations when we are
-                * flushing out the delayed refs, it helps prevent
-                * making more work as we go along.
-                */
-               if (trans->transaction->delayed_refs.flushing)
-                       min_bytes = max(bytes, (bytes + empty_size) >> 1);
-               else
-                       min_bytes = max(bytes, (bytes + empty_size) >> 4);
-       } else
-               min_bytes = max(bytes, (bytes + empty_size) >> 2);
-
-       spin_lock(&block_group->tree_lock);
-       spin_lock(&cluster->lock);
-
-       /* someone already found a cluster, hooray */
-       if (cluster->block_group) {
-               ret = 0;
-               goto out;
-       }
-again:
-       entry = tree_search_offset(block_group, offset, found_bitmap, 1);
-       if (!entry) {
-               ret = -ENOSPC;
-               goto out;
-       }
+       entry = tree_search_offset(block_group, offset, 0, 1);
+       if (!entry)
+               return -ENOSPC;
 
        /*
-        * If found_bitmap is true, we exhausted our search for extent entries,
-        * and we just want to search all of the bitmaps that we can find, and
-        * ignore any extent entries we find.
+        * We don't want bitmaps, so just move along until we find a normal
+        * extent entry.
         */
-       while (entry->bitmap || found_bitmap ||
-              (!entry->bitmap && entry->bytes < min_bytes)) {
-               struct rb_node *node = rb_next(&entry->offset_index);
-
-               if (entry->bitmap && entry->bytes > bytes + empty_size) {
-                       ret = btrfs_bitmap_cluster(block_group, entry, cluster,
-                                                  offset, bytes + empty_size,
-                                                  min_bytes);
-                       if (!ret)
-                               goto got_it;
-               }
-
-               if (!node) {
-                       ret = -ENOSPC;
-                       goto out;
-               }
+       while (entry->bitmap) {
+               node = rb_next(&entry->offset_index);
+               if (!node)
+                       return -ENOSPC;
                entry = rb_entry(node, struct btrfs_free_space, offset_index);
        }
 
-       /*
-        * We already searched all the extent entries from the passed in offset
-        * to the end and didn't find enough space for the cluster, and we also
-        * didn't find any bitmaps that met our criteria, just go ahead and exit
-        */
-       if (found_bitmap) {
-               ret = -ENOSPC;
-               goto out;
-       }
-
-       cluster->points_to_bitmap = false;
        window_start = entry->offset;
        window_free = entry->bytes;
-       last = entry;
        max_extent = entry->bytes;
+       first = entry;
+       last = entry;
+       prev = entry;
 
-       while (1) {
-               /* out window is just right, lets fill it */
-               if (window_free >= bytes + empty_size)
-                       break;
-
-               node = rb_next(&last->offset_index);
-               if (!node) {
-                       if (found_bitmap)
-                               goto again;
-                       ret = -ENOSPC;
-                       goto out;
-               }
-               next = rb_entry(node, struct btrfs_free_space, offset_index);
+       while (window_free <= min_bytes) {
+               node = rb_next(&entry->offset_index);
+               if (!node)
+                       return -ENOSPC;
+               entry = rb_entry(node, struct btrfs_free_space, offset_index);
 
-               /*
-                * we found a bitmap, so if this search doesn't result in a
-                * cluster, we know to go and search again for the bitmaps and
-                * start looking for space there
-                */
-               if (next->bitmap) {
-                       if (!found_bitmap)
-                               offset = next->offset;
-                       found_bitmap = true;
-                       last = next;
+               if (entry->bitmap)
                        continue;
-               }
-
                /*
                 * we haven't filled the empty size and the window is
                 * very large.  reset and try again
                 */
-               if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
-                   next->offset - window_start > (bytes + empty_size) * 2) {
-                       entry = next;
+               if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
+                   entry->offset - window_start > (min_bytes * 2)) {
+                       first = entry;
                        window_start = entry->offset;
                        window_free = entry->bytes;
                        last = entry;
                        max_extent = entry->bytes;
                } else {
-                       last = next;
-                       window_free += next->bytes;
+                       last = entry;
+                       window_free += entry->bytes;
                        if (entry->bytes > max_extent)
                                max_extent = entry->bytes;
                }
+               prev = entry;
        }
 
-       cluster->window_start = entry->offset;
+       cluster->window_start = first->offset;
+
+       node = &first->offset_index;
 
        /*
         * now we've found our entries, pull them out of the free space
         * cache and put them into the cluster rbtree
-        *
-        * The cluster includes an rbtree, but only uses the offset index
-        * of each free space cache entry.
         */
-       while (1) {
+       do {
+               int ret;
+
+               entry = rb_entry(node, struct btrfs_free_space, offset_index);
                node = rb_next(&entry->offset_index);
-               if (entry->bitmap && node) {
-                       entry = rb_entry(node, struct btrfs_free_space,
-                                        offset_index);
+               if (entry->bitmap)
                        continue;
-               } else if (entry->bitmap && !node) {
-                       break;
-               }
 
                rb_erase(&entry->offset_index, &block_group->free_space_offset);
                ret = tree_insert_offset(&cluster->root, entry->offset,
                                         &entry->offset_index, 0);
                BUG_ON(ret);
+       } while (node && entry != last);
 
-               if (!node || entry == last)
-                       break;
+       cluster->max_size = max_extent;
+
+       return 0;
+}
+
+/*
+ * This specifically looks for bitmaps that may work in the cluster, we assume
+ * that we have already failed to find extents that will work.
+ */
+static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
+                               struct btrfs_free_cluster *cluster,
+                               u64 offset, u64 bytes, u64 min_bytes)
+{
+       struct btrfs_free_space *entry;
+       struct rb_node *node;
+       int ret = -ENOSPC;
+
+       if (block_group->total_bitmaps == 0)
+               return -ENOSPC;
 
+       entry = tree_search_offset(block_group,
+                                  offset_to_bitmap(block_group, offset),
+                                  0, 1);
+       if (!entry)
+               return -ENOSPC;
+
+       node = &entry->offset_index;
+       do {
                entry = rb_entry(node, struct btrfs_free_space, offset_index);
+               node = rb_next(&entry->offset_index);
+               if (!entry->bitmap)
+                       continue;
+               if (entry->bytes < min_bytes)
+                       continue;
+               ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
+                                          bytes, min_bytes);
+       } while (ret && node);
+
+       return ret;
+}
+
+/*
+ * here we try to find a cluster of blocks in a block group.  The goal
+ * is to find at least bytes free and up to empty_size + bytes free.
+ * We might not find them all in one contiguous area.
+ *
+ * returns zero and sets up cluster if things worked out, otherwise
+ * it returns -enospc
+ */
+int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
+                            struct btrfs_block_group_cache *block_group,
+                            struct btrfs_free_cluster *cluster,
+                            u64 offset, u64 bytes, u64 empty_size)
+{
+       u64 min_bytes;
+       int ret;
+
+       /* for metadata, allow allocates with more holes */
+       if (btrfs_test_opt(root, SSD_SPREAD)) {
+               min_bytes = bytes + empty_size;
+       } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
+               /*
+                * we want to do larger allocations when we are
+                * flushing out the delayed refs, it helps prevent
+                * making more work as we go along.
+                */
+               if (trans->transaction->delayed_refs.flushing)
+                       min_bytes = max(bytes, (bytes + empty_size) >> 1);
+               else
+                       min_bytes = max(bytes, (bytes + empty_size) >> 4);
+       } else
+               min_bytes = max(bytes, (bytes + empty_size) >> 2);
+
+       spin_lock(&block_group->tree_lock);
+
+       /*
+        * If we know we don't have enough space to make a cluster don't even
+        * bother doing all the work to try and find one.
+        */
+       if (block_group->free_space < min_bytes) {
+               spin_unlock(&block_group->tree_lock);
+               return -ENOSPC;
        }
 
-       cluster->max_size = max_extent;
-got_it:
-       ret = 0;
-       atomic_inc(&block_group->count);
-       list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
-       cluster->block_group = block_group;
+       spin_lock(&cluster->lock);
+
+       /* someone already found a cluster, hooray */
+       if (cluster->block_group) {
+               ret = 0;
+               goto out;
+       }
+
+       ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes,
+                                     min_bytes);
+       if (ret)
+               ret = setup_cluster_bitmap(block_group, cluster, offset,
+                                          bytes, min_bytes);
+
+       if (!ret) {
+               atomic_inc(&block_group->count);
+               list_add_tail(&cluster->block_group_list,
+                             &block_group->cluster_list);
+               cluster->block_group = block_group;
+       }
 out:
        spin_unlock(&cluster->lock);
        spin_unlock(&block_group->tree_lock);
@@ -2149,8 +2174,99 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
        spin_lock_init(&cluster->refill_lock);
        cluster->root = RB_ROOT;
        cluster->max_size = 0;
-       cluster->points_to_bitmap = false;
        INIT_LIST_HEAD(&cluster->block_group_list);
        cluster->block_group = NULL;
 }
 
+int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+                          u64 *trimmed, u64 start, u64 end, u64 minlen)
+{
+       struct btrfs_free_space *entry = NULL;
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
+       u64 bytes = 0;
+       u64 actually_trimmed;
+       int ret = 0;
+
+       *trimmed = 0;
+
+       while (start < end) {
+               spin_lock(&block_group->tree_lock);
+
+               if (block_group->free_space < minlen) {
+                       spin_unlock(&block_group->tree_lock);
+                       break;
+               }
+
+               entry = tree_search_offset(block_group, start, 0, 1);
+               if (!entry)
+                       entry = tree_search_offset(block_group,
+                                                  offset_to_bitmap(block_group,
+                                                                   start),
+                                                  1, 1);
+
+               if (!entry || entry->offset >= end) {
+                       spin_unlock(&block_group->tree_lock);
+                       break;
+               }
+
+               if (entry->bitmap) {
+                       ret = search_bitmap(block_group, entry, &start, &bytes);
+                       if (!ret) {
+                               if (start >= end) {
+                                       spin_unlock(&block_group->tree_lock);
+                                       break;
+                               }
+                               bytes = min(bytes, end - start);
+                               bitmap_clear_bits(block_group, entry,
+                                                 start, bytes);
+                               if (entry->bytes == 0)
+                                       free_bitmap(block_group, entry);
+                       } else {
+                               start = entry->offset + BITS_PER_BITMAP *
+                                       block_group->sectorsize;
+                               spin_unlock(&block_group->tree_lock);
+                               ret = 0;
+                               continue;
+                       }
+               } else {
+                       start = entry->offset;
+                       bytes = min(entry->bytes, end - start);
+                       unlink_free_space(block_group, entry);
+                       kfree(entry);
+               }
+
+               spin_unlock(&block_group->tree_lock);
+
+               if (bytes >= minlen) {
+                       int update_ret;
+                       update_ret = btrfs_update_reserved_bytes(block_group,
+                                                                bytes, 1, 1);
+
+                       ret = btrfs_error_discard_extent(fs_info->extent_root,
+                                                        start,
+                                                        bytes,
+                                                        &actually_trimmed);
+
+                       btrfs_add_free_space(block_group,
+                                            start, bytes);
+                       if (!update_ret)
+                               btrfs_update_reserved_bytes(block_group,
+                                                           bytes, 0, 1);
+
+                       if (ret)
+                               break;
+                       *trimmed += actually_trimmed;
+               }
+               start += bytes;
+               bytes = 0;
+
+               if (fatal_signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+
+               cond_resched();
+       }
+
+       return ret;
+}
index e49ca5c..65c3b93 100644 (file)
@@ -68,4 +68,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
 int btrfs_return_cluster_to_free_space(
                               struct btrfs_block_group_cache *block_group,
                               struct btrfs_free_cluster *cluster);
+int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
+                          u64 *trimmed, u64 start, u64 end, u64 minlen);
 #endif
index c56eb59..c05a08f 100644 (file)
@@ -30,7 +30,8 @@ int btrfs_find_highest_inode(struct btrfs_root *root, u64 *objectid)
        int slot;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
 
        search_key.objectid = BTRFS_LAST_FREE_OBJECTID;
        search_key.type = -1;
index 119520b..93c28a1 100644 (file)
@@ -50,6 +50,7 @@
 #include "tree-log.h"
 #include "compression.h"
 #include "locking.h"
+#include "free-space-cache.h"
 
 struct btrfs_iget_args {
        u64 ino;
@@ -70,6 +71,7 @@ static struct kmem_cache *btrfs_inode_cachep;
 struct kmem_cache *btrfs_trans_handle_cachep;
 struct kmem_cache *btrfs_transaction_cachep;
 struct kmem_cache *btrfs_path_cachep;
+struct kmem_cache *btrfs_free_space_cachep;
 
 #define S_SHIFT 12
 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
@@ -82,7 +84,8 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
        [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
 };
 
-static void btrfs_truncate(struct inode *inode);
+static int btrfs_setsize(struct inode *inode, loff_t newsize);
+static int btrfs_truncate(struct inode *inode);
 static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end);
 static noinline int cow_file_range(struct inode *inode,
                                   struct page *locked_page,
@@ -288,6 +291,7 @@ static noinline int add_async_extent(struct async_cow *cow,
        struct async_extent *async_extent;
 
        async_extent = kmalloc(sizeof(*async_extent), GFP_NOFS);
+       BUG_ON(!async_extent);
        async_extent->start = start;
        async_extent->ram_size = ram_size;
        async_extent->compressed_size = compressed_size;
@@ -382,9 +386,11 @@ again:
         */
        if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
            (btrfs_test_opt(root, COMPRESS) ||
-            (BTRFS_I(inode)->force_compress))) {
+            (BTRFS_I(inode)->force_compress) ||
+            (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
                WARN_ON(pages);
                pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
+               BUG_ON(!pages);
 
                if (BTRFS_I(inode)->force_compress)
                        compress_type = BTRFS_I(inode)->force_compress;
@@ -1254,7 +1260,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
                ret = run_delalloc_nocow(inode, locked_page, start, end,
                                         page_started, 0, nr_written);
        else if (!btrfs_test_opt(root, COMPRESS) &&
-                !(BTRFS_I(inode)->force_compress))
+                !(BTRFS_I(inode)->force_compress) &&
+                !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))
                ret = cow_file_range(inode, locked_page, start, end,
                                      page_started, nr_written, 1);
        else
@@ -1461,8 +1468,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                if (bio_flags & EXTENT_BIO_COMPRESSED) {
                        return btrfs_submit_compressed_read(inode, bio,
                                                    mirror_num, bio_flags);
-               } else if (!skip_sum)
-                       btrfs_lookup_bio_sums(root, inode, bio, NULL);
+               } else if (!skip_sum) {
+                       ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
+                       if (ret)
+                               return ret;
+               }
                goto mapit;
        } else if (!skip_sum) {
                /* csum items have already been cloned */
@@ -1785,6 +1795,8 @@ out:
 static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
                                struct extent_state *state, int uptodate)
 {
+       trace_btrfs_writepage_end_io_hook(page, start, end, uptodate);
+
        ClearPagePrivate2(page);
        return btrfs_finish_ordered_io(page->mapping->host, start, end);
 }
@@ -1895,10 +1907,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
        else
                rw = READ;
 
-       BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
+       ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
                                                      failrec->last_mirror,
                                                      failrec->bio_flags, 0);
-       return 0;
+       return ret;
 }
 
 /*
@@ -2282,7 +2294,7 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode)
  * this cleans up any orphans that may be left on the list from the last use
  * of this root.
  */
-void btrfs_orphan_cleanup(struct btrfs_root *root)
+int btrfs_orphan_cleanup(struct btrfs_root *root)
 {
        struct btrfs_path *path;
        struct extent_buffer *leaf;
@@ -2292,10 +2304,13 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
        int ret = 0, nr_unlink = 0, nr_truncate = 0;
 
        if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
-               return;
+               return 0;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
        path->reada = -1;
 
        key.objectid = BTRFS_ORPHAN_OBJECTID;
@@ -2304,11 +2319,8 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 
        while (1) {
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-               if (ret < 0) {
-                       printk(KERN_ERR "Error searching slot for orphan: %d"
-                              "\n", ret);
-                       break;
-               }
+               if (ret < 0)
+                       goto out;
 
                /*
                 * if ret == 0 means we found what we were searching for, which
@@ -2316,6 +2328,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                 * find the key and see if we have stuff that matches
                 */
                if (ret > 0) {
+                       ret = 0;
                        if (path->slots[0] == 0)
                                break;
                        path->slots[0]--;
@@ -2343,7 +2356,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                found_key.type = BTRFS_INODE_ITEM_KEY;
                found_key.offset = 0;
                inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
-               BUG_ON(IS_ERR(inode));
+               if (IS_ERR(inode)) {
+                       ret = PTR_ERR(inode);
+                       goto out;
+               }
 
                /*
                 * add this inode to the orphan list so btrfs_orphan_del does
@@ -2361,7 +2377,10 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
                 */
                if (is_bad_inode(inode)) {
                        trans = btrfs_start_transaction(root, 0);
-                       BUG_ON(IS_ERR(trans));
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               goto out;
+                       }
                        btrfs_orphan_del(trans, inode);
                        btrfs_end_transaction(trans, root);
                        iput(inode);
@@ -2370,17 +2389,22 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 
                /* if we have links, this was a truncate, lets do that */
                if (inode->i_nlink) {
+                       if (!S_ISREG(inode->i_mode)) {
+                               WARN_ON(1);
+                               iput(inode);
+                               continue;
+                       }
                        nr_truncate++;
-                       btrfs_truncate(inode);
+                       ret = btrfs_truncate(inode);
                } else {
                        nr_unlink++;
                }
 
                /* this will do delete_inode and everything for us */
                iput(inode);
+               if (ret)
+                       goto out;
        }
-       btrfs_free_path(path);
-
        root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
 
        if (root->orphan_block_rsv)
@@ -2389,14 +2413,20 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 
        if (root->orphan_block_rsv || root->orphan_item_inserted) {
                trans = btrfs_join_transaction(root, 1);
-               BUG_ON(IS_ERR(trans));
-               btrfs_end_transaction(trans, root);
+               if (!IS_ERR(trans))
+                       btrfs_end_transaction(trans, root);
        }
 
        if (nr_unlink)
                printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink);
        if (nr_truncate)
                printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate);
+
+out:
+       if (ret)
+               printk(KERN_CRIT "btrfs: could not do orphan cleanup %d\n", ret);
+       btrfs_free_path(path);
+       return ret;
 }
 
 /*
@@ -2507,6 +2537,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
        BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
 
        alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
+       if (location.objectid == BTRFS_FREE_SPACE_OBJECTID)
+               inode->i_mapping->flags &= ~__GFP_FS;
 
        /*
         * try to precache a NULL acl entry for files that don't have
@@ -2635,10 +2667,10 @@ failed:
  * recovery code.  It remove a link in a directory with a given name, and
  * also drops the back refs in the inode to the directory
  */
-int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
-                      struct btrfs_root *root,
-                      struct inode *dir, struct inode *inode,
-                      const char *name, int name_len)
+static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root,
+                               struct inode *dir, struct inode *inode,
+                               const char *name, int name_len)
 {
        struct btrfs_path *path;
        int ret = 0;
@@ -2710,12 +2742,25 @@ err:
        btrfs_i_size_write(dir, dir->i_size - name_len * 2);
        inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
        btrfs_update_inode(trans, root, dir);
-       btrfs_drop_nlink(inode);
-       ret = btrfs_update_inode(trans, root, inode);
 out:
        return ret;
 }
 
+int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
+                      struct btrfs_root *root,
+                      struct inode *dir, struct inode *inode,
+                      const char *name, int name_len)
+{
+       int ret;
+       ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
+       if (!ret) {
+               btrfs_drop_nlink(inode);
+               ret = btrfs_update_inode(trans, root, inode);
+       }
+       return ret;
+}
+               
+
 /* helper to check if there is any shared block in the path */
 static int check_path_shared(struct btrfs_root *root,
                             struct btrfs_path *path)
@@ -3537,7 +3582,13 @@ out:
        return ret;
 }
 
-int btrfs_cont_expand(struct inode *inode, loff_t size)
+/*
+ * This function puts in dummy file extents for the area we're creating a hole
+ * for.  So if we are truncating this file to a larger size we need to insert
+ * these file extents so that btrfs_get_extent will return a EXTENT_MAP_HOLE for
+ * the range between oldsize and size
+ */
+int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
 {
        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3545,7 +3596,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
        u64 mask = root->sectorsize - 1;
-       u64 hole_start = (inode->i_size + mask) & ~mask;
+       u64 hole_start = (oldsize + mask) & ~mask;
        u64 block_end = (size + mask) & ~mask;
        u64 last_byte;
        u64 cur_offset;
@@ -3590,13 +3641,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
                        err = btrfs_drop_extents(trans, inode, cur_offset,
                                                 cur_offset + hole_size,
                                                 &hint_byte, 1);
-                       BUG_ON(err);
+                       if (err)
+                               break;
 
                        err = btrfs_insert_file_extent(trans, root,
                                        inode->i_ino, cur_offset, 0,
                                        0, hole_size, 0, hole_size,
                                        0, 0, 0);
-                       BUG_ON(err);
+                       if (err)
+                               break;
 
                        btrfs_drop_extent_cache(inode, hole_start,
                                        last_byte - 1, 0);
@@ -3616,81 +3669,41 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
        return err;
 }
 
-static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
+static int btrfs_setsize(struct inode *inode, loff_t newsize)
 {
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
-       unsigned long nr;
+       loff_t oldsize = i_size_read(inode);
        int ret;
 
-       if (attr->ia_size == inode->i_size)
+       if (newsize == oldsize)
                return 0;
 
-       if (attr->ia_size > inode->i_size) {
-               unsigned long limit;
-               limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-               if (attr->ia_size > inode->i_sb->s_maxbytes)
-                       return -EFBIG;
-               if (limit != RLIM_INFINITY && attr->ia_size > limit) {
-                       send_sig(SIGXFSZ, current, 0);
-                       return -EFBIG;
-               }
-       }
-
-       trans = btrfs_start_transaction(root, 5);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
-
-       btrfs_set_trans_block_group(trans, inode);
-
-       ret = btrfs_orphan_add(trans, inode);
-       BUG_ON(ret);
-
-       nr = trans->blocks_used;
-       btrfs_end_transaction(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
-
-       if (attr->ia_size > inode->i_size) {
-               ret = btrfs_cont_expand(inode, attr->ia_size);
+       if (newsize > oldsize) {
+               i_size_write(inode, newsize);
+               btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+               truncate_pagecache(inode, oldsize, newsize);
+               ret = btrfs_cont_expand(inode, oldsize, newsize);
                if (ret) {
-                       btrfs_truncate(inode);
+                       btrfs_setsize(inode, oldsize);
                        return ret;
                }
 
-               i_size_write(inode, attr->ia_size);
-               btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+               mark_inode_dirty(inode);
+       } else {
 
-               trans = btrfs_start_transaction(root, 0);
-               BUG_ON(IS_ERR(trans));
-               btrfs_set_trans_block_group(trans, inode);
-               trans->block_rsv = root->orphan_block_rsv;
-               BUG_ON(!trans->block_rsv);
+               /*
+                * We're truncating a file that used to have good data down to
+                * zero. Make sure it gets into the ordered flush list so that
+                * any new writes get down to disk quickly.
+                */
+               if (newsize == 0)
+                       BTRFS_I(inode)->ordered_data_close = 1;
 
-               ret = btrfs_update_inode(trans, root, inode);
-               BUG_ON(ret);
-               if (inode->i_nlink > 0) {
-                       ret = btrfs_orphan_del(trans, inode);
-                       BUG_ON(ret);
-               }
-               nr = trans->blocks_used;
-               btrfs_end_transaction(trans, root);
-               btrfs_btree_balance_dirty(root, nr);
-               return 0;
+               /* we don't support swapfiles, so vmtruncate shouldn't fail */
+               truncate_setsize(inode, newsize);
+               ret = btrfs_truncate(inode);
        }
 
-       /*
-        * We're truncating a file that used to have good data down to
-        * zero. Make sure it gets into the ordered flush list so that
-        * any new writes get down to disk quickly.
-        */
-       if (attr->ia_size == 0)
-               BTRFS_I(inode)->ordered_data_close = 1;
-
-       /* we don't support swapfiles, so vmtruncate shouldn't fail */
-       ret = vmtruncate(inode, attr->ia_size);
-       BUG_ON(ret);
-
-       return 0;
+       return ret;
 }
 
 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -3707,7 +3720,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
                return err;
 
        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
-               err = btrfs_setattr_size(inode, attr);
+               err = btrfs_setsize(inode, attr->ia_size);
                if (err)
                        return err;
        }
@@ -3730,6 +3743,8 @@ void btrfs_evict_inode(struct inode *inode)
        unsigned long nr;
        int ret;
 
+       trace_btrfs_inode_evict(inode);
+
        truncate_inode_pages(&inode->i_data, 0);
        if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
                               root == root->fs_info->tree_root))
@@ -4072,7 +4087,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                BTRFS_I(inode)->root = root;
                memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
                btrfs_read_locked_inode(inode);
-
                inode_tree_add(inode);
                unlock_new_inode(inode);
                if (new)
@@ -4147,8 +4161,10 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
        if (!IS_ERR(inode) && root != sub_root) {
                down_read(&root->fs_info->cleanup_work_sem);
                if (!(inode->i_sb->s_flags & MS_RDONLY))
-                       btrfs_orphan_cleanup(sub_root);
+                       ret = btrfs_orphan_cleanup(sub_root);
                up_read(&root->fs_info->cleanup_work_sem);
+               if (ret)
+                       inode = ERR_PTR(ret);
        }
 
        return inode;
@@ -4282,6 +4298,9 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                while (di_cur < di_total) {
                        struct btrfs_key location;
 
+                       if (verify_dir_item(root, leaf, di))
+                               break;
+
                        name_len = btrfs_dir_name_len(leaf, di);
                        if (name_len <= sizeof(tmp_name)) {
                                name_ptr = tmp_name;
@@ -4517,6 +4536,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                return ERR_PTR(-ENOMEM);
 
        if (dir) {
+               trace_btrfs_inode_request(dir);
+
                ret = btrfs_set_inode_index(dir, index);
                if (ret) {
                        iput(inode);
@@ -4585,12 +4606,16 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if ((mode & S_IFREG)) {
                if (btrfs_test_opt(root, NODATASUM))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
-               if (btrfs_test_opt(root, NODATACOW))
+               if (btrfs_test_opt(root, NODATACOW) ||
+                   (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
                        BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
        }
 
        insert_inode_hash(inode);
        inode_tree_add(inode);
+
+       trace_btrfs_inode_new(inode);
+
        return inode;
 fail:
        if (dir)
@@ -4809,7 +4834,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 
        /* do not allow sys_link's with other subvols of the same device */
        if (root->objectid != BTRFS_I(inode)->root->objectid)
-               return -EPERM;
+               return -EXDEV;
+
+       if (inode->i_nlink == ~0U)
+               return -EMLINK;
 
        btrfs_inc_nlink(inode);
        inode->i_ctime = CURRENT_TIME;
@@ -5265,6 +5293,9 @@ insert:
        }
        write_unlock(&em_tree->lock);
 out:
+
+       trace_btrfs_get_extent(root, em);
+
        if (path)
                btrfs_free_path(path);
        if (trans) {
@@ -5748,6 +5779,10 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
 
        kfree(dip->csums);
        kfree(dip);
+
+       /* If we had a csum failure make sure to clear the uptodate flag */
+       if (err)
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
        dio_end_io(bio, err);
 }
 
@@ -5849,6 +5884,10 @@ out_done:
 
        kfree(dip->csums);
        kfree(dip);
+
+       /* If we had an error make sure to clear the uptodate flag */
+       if (err)
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
        dio_end_io(bio, err);
 }
 
@@ -5922,9 +5961,12 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                   __btrfs_submit_bio_start_direct_io,
                                   __btrfs_submit_bio_done);
                goto err;
-       } else if (!skip_sum)
-               btrfs_lookup_bio_sums_dio(root, inode, bio,
+       } else if (!skip_sum) {
+               ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
                                          file_offset, csums);
+               if (ret)
+                       goto err;
+       }
 
        ret = btrfs_map_bio(root, rw, bio, 0, 1);
 err:
@@ -5948,6 +5990,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
        int nr_pages = 0;
        u32 *csums = dip->csums;
        int ret = 0;
+       int write = rw & REQ_WRITE;
 
        bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
        if (!bio)
@@ -5984,7 +6027,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                                goto out_err;
                        }
 
-                       if (!skip_sum)
+                       /* Write's use the ordered csums */
+                       if (!write && !skip_sum)
                                csums = csums + nr_pages;
                        start_sector += submit_len >> 9;
                        file_offset += submit_len;
@@ -6052,7 +6096,8 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
        }
        dip->csums = NULL;
 
-       if (!skip_sum) {
+       /* Write's use the ordered csum stuff, so we don't need dip->csums */
+       if (!write && !skip_sum) {
                dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
                if (!dip->csums) {
                        kfree(dip);
@@ -6474,28 +6519,42 @@ out:
        return ret;
 }
 
-static void btrfs_truncate(struct inode *inode)
+static int btrfs_truncate(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
+       int err = 0;
        struct btrfs_trans_handle *trans;
        unsigned long nr;
        u64 mask = root->sectorsize - 1;
 
-       if (!S_ISREG(inode->i_mode)) {
-               WARN_ON(1);
-               return;
-       }
-
        ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
        if (ret)
-               return;
+               return ret;
 
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
        btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 
+       trans = btrfs_start_transaction(root, 5);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       btrfs_set_trans_block_group(trans, inode);
+
+       ret = btrfs_orphan_add(trans, inode);
+       if (ret) {
+               btrfs_end_transaction(trans, root);
+               return ret;
+       }
+
+       nr = trans->blocks_used;
+       btrfs_end_transaction(trans, root);
+       btrfs_btree_balance_dirty(root, nr);
+
+       /* Now start a transaction for the truncate */
        trans = btrfs_start_transaction(root, 0);
-       BUG_ON(IS_ERR(trans));
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
        btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = root->orphan_block_rsv;
 
@@ -6522,29 +6581,38 @@ static void btrfs_truncate(struct inode *inode)
        while (1) {
                if (!trans) {
                        trans = btrfs_start_transaction(root, 0);
-                       BUG_ON(IS_ERR(trans));
+                       if (IS_ERR(trans))
+                               return PTR_ERR(trans);
                        btrfs_set_trans_block_group(trans, inode);
                        trans->block_rsv = root->orphan_block_rsv;
                }
 
                ret = btrfs_block_rsv_check(trans, root,
                                            root->orphan_block_rsv, 0, 5);
-               if (ret) {
-                       BUG_ON(ret != -EAGAIN);
+               if (ret == -EAGAIN) {
                        ret = btrfs_commit_transaction(trans, root);
-                       BUG_ON(ret);
+                       if (ret)
+                               return ret;
                        trans = NULL;
                        continue;
+               } else if (ret) {
+                       err = ret;
+                       break;
                }
 
                ret = btrfs_truncate_inode_items(trans, root, inode,
                                                 inode->i_size,
                                                 BTRFS_EXTENT_DATA_KEY);
-               if (ret != -EAGAIN)
+               if (ret != -EAGAIN) {
+                       err = ret;
                        break;
+               }
 
                ret = btrfs_update_inode(trans, root, inode);
-               BUG_ON(ret);
+               if (ret) {
+                       err = ret;
+                       break;
+               }
 
                nr = trans->blocks_used;
                btrfs_end_transaction(trans, root);
@@ -6554,16 +6622,27 @@ static void btrfs_truncate(struct inode *inode)
 
        if (ret == 0 && inode->i_nlink > 0) {
                ret = btrfs_orphan_del(trans, inode);
-               BUG_ON(ret);
+               if (ret)
+                       err = ret;
+       } else if (ret && inode->i_nlink > 0) {
+               /*
+                * Failed to do the truncate, remove us from the in memory
+                * orphan list.
+                */
+               ret = btrfs_orphan_del(NULL, inode);
        }
 
        ret = btrfs_update_inode(trans, root, inode);
-       BUG_ON(ret);
+       if (ret && !err)
+               err = ret;
 
        nr = trans->blocks_used;
        ret = btrfs_end_transaction_throttle(trans, root);
-       BUG_ON(ret);
+       if (ret && !err)
+               err = ret;
        btrfs_btree_balance_dirty(root, nr);
+
+       return err;
 }
 
 /*
@@ -6630,9 +6709,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->index_cnt = (u64)-1;
        ei->last_unlink_trans = 0;
 
-       spin_lock_init(&ei->accounting_lock);
        atomic_set(&ei->outstanding_extents, 0);
-       ei->reserved_extents = 0;
+       atomic_set(&ei->reserved_extents, 0);
 
        ei->ordered_data_close = 0;
        ei->orphan_meta_reserved = 0;
@@ -6668,7 +6746,7 @@ void btrfs_destroy_inode(struct inode *inode)
        WARN_ON(!list_empty(&inode->i_dentry));
        WARN_ON(inode->i_data.nrpages);
        WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
-       WARN_ON(BTRFS_I(inode)->reserved_extents);
+       WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
 
        /*
         * This can happen where we create an inode, but somebody else also
@@ -6760,6 +6838,8 @@ void btrfs_destroy_cachep(void)
                kmem_cache_destroy(btrfs_transaction_cachep);
        if (btrfs_path_cachep)
                kmem_cache_destroy(btrfs_path_cachep);
+       if (btrfs_free_space_cachep)
+               kmem_cache_destroy(btrfs_free_space_cachep);
 }
 
 int btrfs_init_cachep(void)
@@ -6788,6 +6868,12 @@ int btrfs_init_cachep(void)
        if (!btrfs_path_cachep)
                goto fail;
 
+       btrfs_free_space_cachep = kmem_cache_create("btrfs_free_space_cache",
+                       sizeof(struct btrfs_free_space), 0,
+                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
+       if (!btrfs_free_space_cachep)
+               goto fail;
+
        return 0;
 fail:
        btrfs_destroy_cachep();
@@ -6806,6 +6892,26 @@ static int btrfs_getattr(struct vfsmount *mnt,
        return 0;
 }
 
+/*
+ * If a file is moved, it will inherit the cow and compression flags of the new
+ * directory.
+ */
+static void fixup_inode_flags(struct inode *dir, struct inode *inode)
+{
+       struct btrfs_inode *b_dir = BTRFS_I(dir);
+       struct btrfs_inode *b_inode = BTRFS_I(inode);
+
+       if (b_dir->flags & BTRFS_INODE_NODATACOW)
+               b_inode->flags |= BTRFS_INODE_NODATACOW;
+       else
+               b_inode->flags &= ~BTRFS_INODE_NODATACOW;
+
+       if (b_dir->flags & BTRFS_INODE_COMPRESS)
+               b_inode->flags |= BTRFS_INODE_COMPRESS;
+       else
+               b_inode->flags &= ~BTRFS_INODE_COMPRESS;
+}
+
 static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry)
 {
@@ -6908,11 +7014,12 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                        old_dentry->d_name.name,
                                        old_dentry->d_name.len);
        } else {
-               btrfs_inc_nlink(old_dentry->d_inode);
-               ret = btrfs_unlink_inode(trans, root, old_dir,
-                                        old_dentry->d_inode,
-                                        old_dentry->d_name.name,
-                                        old_dentry->d_name.len);
+               ret = __btrfs_unlink_inode(trans, root, old_dir,
+                                       old_dentry->d_inode,
+                                       old_dentry->d_name.name,
+                                       old_dentry->d_name.len);
+               if (!ret)
+                       ret = btrfs_update_inode(trans, root, old_inode);
        }
        BUG_ON(ret);
 
@@ -6939,6 +7046,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                }
        }
 
+       fixup_inode_flags(new_dir, old_inode);
+
        ret = btrfs_add_link(trans, new_dir, old_inode,
                             new_dentry->d_name.name,
                             new_dentry->d_name.len, 0, index);
@@ -7355,7 +7464,6 @@ static const struct address_space_operations btrfs_symlink_aops = {
 };
 
 static const struct inode_operations btrfs_file_inode_operations = {
-       .truncate       = btrfs_truncate,
        .getattr        = btrfs_getattr,
        .setattr        = btrfs_setattr,
        .setxattr       = btrfs_setxattr,
index d1bace3..7c07fe2 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/xattr.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -138,6 +139,24 @@ static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
        return 0;
 }
 
+static int check_flags(unsigned int flags)
+{
+       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+                     FS_NOATIME_FL | FS_NODUMP_FL | \
+                     FS_SYNC_FL | FS_DIRSYNC_FL | \
+                     FS_NOCOMP_FL | FS_COMPR_FL | \
+                     FS_NOCOW_FL | FS_COW_FL))
+               return -EOPNOTSUPP;
+
+       if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL))
+               return -EINVAL;
+
+       if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL))
+               return -EINVAL;
+
+       return 0;
+}
+
 static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
 {
        struct inode *inode = file->f_path.dentry->d_inode;
@@ -153,10 +172,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        if (copy_from_user(&flags, arg, sizeof(flags)))
                return -EFAULT;
 
-       if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
-                     FS_NOATIME_FL | FS_NODUMP_FL | \
-                     FS_SYNC_FL | FS_DIRSYNC_FL))
-               return -EOPNOTSUPP;
+       ret = check_flags(flags);
+       if (ret)
+               return ret;
 
        if (!inode_owner_or_capable(inode))
                return -EACCES;
@@ -201,6 +219,22 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        else
                ip->flags &= ~BTRFS_INODE_DIRSYNC;
 
+       /*
+        * The COMPRESS flag can only be changed by users, while the NOCOMPRESS
+        * flag may be changed automatically if compression code won't make
+        * things smaller.
+        */
+       if (flags & FS_NOCOMP_FL) {
+               ip->flags &= ~BTRFS_INODE_COMPRESS;
+               ip->flags |= BTRFS_INODE_NOCOMPRESS;
+       } else if (flags & FS_COMPR_FL) {
+               ip->flags |= BTRFS_INODE_COMPRESS;
+               ip->flags &= ~BTRFS_INODE_NOCOMPRESS;
+       }
+       if (flags & FS_NOCOW_FL)
+               ip->flags |= BTRFS_INODE_NODATACOW;
+       else if (flags & FS_COW_FL)
+               ip->flags &= ~BTRFS_INODE_NODATACOW;
 
        trans = btrfs_join_transaction(root, 1);
        BUG_ON(IS_ERR(trans));
@@ -213,9 +247,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        btrfs_end_transaction(trans, root);
 
        mnt_drop_write(file->f_path.mnt);
+
+       ret = 0;
  out_unlock:
        mutex_unlock(&inode->i_mutex);
-       return 0;
+       return ret;
 }
 
 static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
@@ -225,6 +261,49 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
        return put_user(inode->i_generation, arg);
 }
 
+static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
+{
+       struct btrfs_root *root = fdentry(file)->d_sb->s_fs_info;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_device *device;
+       struct request_queue *q;
+       struct fstrim_range range;
+       u64 minlen = ULLONG_MAX;
+       u64 num_devices = 0;
+       int ret;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       mutex_lock(&fs_info->fs_devices->device_list_mutex);
+       list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
+               if (!device->bdev)
+                       continue;
+               q = bdev_get_queue(device->bdev);
+               if (blk_queue_discard(q)) {
+                       num_devices++;
+                       minlen = min((u64)q->limits.discard_granularity,
+                                    minlen);
+               }
+       }
+       mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+       if (!num_devices)
+               return -EOPNOTSUPP;
+
+       if (copy_from_user(&range, arg, sizeof(range)))
+               return -EFAULT;
+
+       range.minlen = max(range.minlen, minlen);
+       ret = btrfs_trim_fs(root, &range);
+       if (ret < 0)
+               return ret;
+
+       if (copy_to_user(arg, &range, sizeof(range)))
+               return -EFAULT;
+
+       return 0;
+}
+
 static noinline int create_subvol(struct btrfs_root *root,
                                  struct dentry *dentry,
                                  char *name, int namelen,
@@ -409,7 +488,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
        if (ret)
                goto fail;
 
-       btrfs_orphan_cleanup(pending_snapshot->snap);
+       ret = btrfs_orphan_cleanup(pending_snapshot->snap);
+       if (ret)
+               goto fail;
 
        parent = dget_parent(dentry);
        inode = btrfs_lookup_dentry(parent->d_inode, dentry);
@@ -2348,12 +2429,15 @@ static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp
        struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
        struct btrfs_trans_handle *trans;
        u64 transid;
+       int ret;
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
        transid = trans->transid;
-       btrfs_commit_transaction_async(trans, root, 0);
+       ret = btrfs_commit_transaction_async(trans, root, 0);
+       if (ret)
+               return ret;
 
        if (argp)
                if (copy_to_user(argp, &transid, sizeof(transid)))
@@ -2388,6 +2472,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_setflags(file, argp);
        case FS_IOC_GETVERSION:
                return btrfs_ioctl_getversion(file, argp);
+       case FITRIM:
+               return btrfs_ioctl_fitrim(file, argp);
        case BTRFS_IOC_SNAP_CREATE:
                return btrfs_ioctl_snap_create(file, argp, 0);
        case BTRFS_IOC_SNAP_CREATE_V2:
index 083a554..a1c9404 100644 (file)
@@ -202,6 +202,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        INIT_LIST_HEAD(&entry->list);
        INIT_LIST_HEAD(&entry->root_extent_list);
 
+       trace_btrfs_ordered_extent_add(inode, entry);
+
        spin_lock(&tree->lock);
        node = tree_insert(&tree->tree, file_offset,
                           &entry->rb_node);
@@ -387,6 +389,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
        struct list_head *cur;
        struct btrfs_ordered_sum *sum;
 
+       trace_btrfs_ordered_extent_put(entry->inode, entry);
+
        if (atomic_dec_and_test(&entry->refs)) {
                while (!list_empty(&entry->list)) {
                        cur = entry->list.next;
@@ -420,6 +424,8 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
        spin_lock(&root->fs_info->ordered_extent_lock);
        list_del_init(&entry->root_extent_list);
 
+       trace_btrfs_ordered_extent_remove(inode, entry);
+
        /*
         * we have no more ordered extents for this inode and
         * no dirty pages.  We can safely remove it from the
@@ -585,6 +591,8 @@ void btrfs_start_ordered_extent(struct inode *inode,
        u64 start = entry->file_offset;
        u64 end = start + entry->len - 1;
 
+       trace_btrfs_ordered_extent_start(inode, entry);
+
        /*
         * pages in the range can be dirty, clean or writeback.  We
         * start IO on any dirty ones so the wait doesn't stall waiting
index 31ade58..58250e0 100644 (file)
@@ -1724,6 +1724,7 @@ again:
 
                        eb = read_tree_block(dest, old_bytenr, blocksize,
                                             old_ptr_gen);
+                       BUG_ON(!eb);
                        btrfs_tree_lock(eb);
                        if (cow) {
                                ret = btrfs_cow_block(trans, dest, eb, parent,
@@ -2513,6 +2514,10 @@ static int do_relocation(struct btrfs_trans_handle *trans,
                blocksize = btrfs_level_size(root, node->level);
                generation = btrfs_node_ptr_generation(upper->eb, slot);
                eb = read_tree_block(root, bytenr, blocksize, generation);
+               if (!eb) {
+                       err = -EIO;
+                       goto next;
+               }
                btrfs_tree_lock(eb);
                btrfs_set_lock_blocking(eb);
 
@@ -2670,6 +2675,7 @@ static int get_tree_block_key(struct reloc_control *rc,
        BUG_ON(block->key_ready);
        eb = read_tree_block(rc->extent_root, block->bytenr,
                             block->key.objectid, block->key.offset);
+       BUG_ON(!eb);
        WARN_ON(btrfs_header_level(eb) != block->level);
        if (block->level == 0)
                btrfs_item_key_to_cpu(eb, &block->key, 0);
@@ -4209,7 +4215,7 @@ out:
                if (IS_ERR(fs_root))
                        err = PTR_ERR(fs_root);
                else
-                       btrfs_orphan_cleanup(fs_root);
+                       err = btrfs_orphan_cleanup(fs_root);
        }
        return err;
 }
index 6a1086e..29b2d7c 100644 (file)
@@ -88,7 +88,8 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
        search_key.offset = (u64)-1;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
        ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
        if (ret < 0)
                goto out;
@@ -332,7 +333,8 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        struct extent_buffer *leaf;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
        ret = btrfs_search_slot(trans, root, key, path, -1, 1);
        if (ret < 0)
                goto out;
index d39a989..2edfc03 100644 (file)
@@ -52,6 +52,9 @@
 #include "export.h"
 #include "compression.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/btrfs.h>
+
 static const struct super_operations btrfs_super_ops;
 
 static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno,
@@ -620,6 +623,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
        struct btrfs_root *root = btrfs_sb(sb);
        int ret;
 
+       trace_btrfs_sync_fs(wait);
+
        if (!wait) {
                filemap_flush(root->fs_info->btree_inode->i_mapping);
                return 0;
index 3d73c8d..ce48eb5 100644 (file)
@@ -57,7 +57,8 @@ static noinline int join_transaction(struct btrfs_root *root)
        if (!cur_trans) {
                cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
                                             GFP_NOFS);
-               BUG_ON(!cur_trans);
+               if (!cur_trans)
+                       return -ENOMEM;
                root->fs_info->generation++;
                cur_trans->num_writers = 1;
                cur_trans->num_joined = 0;
@@ -195,7 +196,11 @@ again:
                wait_current_trans(root);
 
        ret = join_transaction(root);
-       BUG_ON(ret);
+       if (ret < 0) {
+               if (type != TRANS_JOIN_NOLOCK)
+                       mutex_unlock(&root->fs_info->trans_mutex);
+               return ERR_PTR(ret);
+       }
 
        cur_trans = root->fs_info->running_transaction;
        cur_trans->use_count++;
@@ -1156,7 +1161,8 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
        struct btrfs_transaction *cur_trans;
 
        ac = kmalloc(sizeof(*ac), GFP_NOFS);
-       BUG_ON(!ac);
+       if (!ac)
+               return -ENOMEM;
 
        INIT_DELAYED_WORK(&ac->work, do_async_commit);
        ac->root = root;
@@ -1389,6 +1395,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        put_transaction(cur_trans);
        put_transaction(cur_trans);
 
+       trace_btrfs_transaction_commit(root);
+
        mutex_unlock(&root->fs_info->trans_mutex);
 
        if (current->journal_info == trans)
index a4bbb85..c50271a 100644 (file)
@@ -799,12 +799,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
        struct inode *dir;
        int ret;
        struct btrfs_inode_ref *ref;
-       struct btrfs_dir_item *di;
        struct inode *inode;
        char *name;
        int namelen;
        unsigned long ref_ptr;
        unsigned long ref_end;
+       int search_done = 0;
 
        /*
         * it is possible that we didn't log all the parent directories
@@ -845,7 +845,10 @@ again:
         * existing back reference, and we don't want to create
         * dangling pointers in the directory.
         */
-conflict_again:
+
+       if (search_done)
+               goto insert;
+
        ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
        if (ret == 0) {
                char *victim_name;
@@ -886,37 +889,21 @@ conflict_again:
                                ret = btrfs_unlink_inode(trans, root, dir,
                                                         inode, victim_name,
                                                         victim_name_len);
-                               kfree(victim_name);
-                               btrfs_release_path(root, path);
-                               goto conflict_again;
                        }
                        kfree(victim_name);
                        ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
                }
                BUG_ON(ret);
-       }
-       btrfs_release_path(root, path);
-
-       /* look for a conflicting sequence number */
-       di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
-                                        btrfs_inode_ref_index(eb, ref),
-                                        name, namelen, 0);
-       if (di && !IS_ERR(di)) {
-               ret = drop_one_dir_item(trans, root, path, dir, di);
-               BUG_ON(ret);
-       }
-       btrfs_release_path(root, path);
 
-
-       /* look for a conflicting name */
-       di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
-                                  name, namelen, 0);
-       if (di && !IS_ERR(di)) {
-               ret = drop_one_dir_item(trans, root, path, dir, di);
-               BUG_ON(ret);
+               /*
+                * NOTE: we have searched root tree and checked the
+                * coresponding ref, it does not need to check again.
+                */
+               search_done = 1;
        }
        btrfs_release_path(root, path);
 
+insert:
        /* insert our name */
        ret = btrfs_add_link(trans, dir, inode, name, namelen, 0,
                             btrfs_inode_ref_index(eb, ref));
@@ -1286,6 +1273,8 @@ static noinline int replay_one_dir_item(struct btrfs_trans_handle *trans,
        ptr_end = ptr + item_size;
        while (ptr < ptr_end) {
                di = (struct btrfs_dir_item *)ptr;
+               if (verify_dir_item(root, eb, di))
+                       return -EIO;
                name_len = btrfs_dir_name_len(eb, di);
                ret = replay_one_name(trans, root, path, eb, di, key);
                BUG_ON(ret);
@@ -1412,6 +1401,11 @@ again:
        ptr_end = ptr + item_size;
        while (ptr < ptr_end) {
                di = (struct btrfs_dir_item *)ptr;
+               if (verify_dir_item(root, eb, di)) {
+                       ret = -EIO;
+                       goto out;
+               }
+
                name_len = btrfs_dir_name_len(eb, di);
                name = kmalloc(name_len, GFP_NOFS);
                if (!name) {
@@ -1821,7 +1815,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
        int orig_level;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
 
        level = btrfs_header_level(log->node);
        orig_level = level;
@@ -3107,9 +3102,11 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
                .stage = 0,
        };
 
-       fs_info->log_root_recovering = 1;
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
+
+       fs_info->log_root_recovering = 1;
 
        trans = btrfs_start_transaction(fs_info->tree_root, 0);
        BUG_ON(IS_ERR(trans));
@@ -3117,7 +3114,8 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
        wc.trans = trans;
        wc.pin = 1;
 
-       walk_log_tree(trans, log_root_tree, &wc);
+       ret = walk_log_tree(trans, log_root_tree, &wc);
+       BUG_ON(ret);
 
 again:
        key.objectid = BTRFS_TREE_LOG_OBJECTID;
@@ -3141,8 +3139,7 @@ again:
 
                log = btrfs_read_fs_root_no_radix(log_root_tree,
                                                  &found_key);
-               BUG_ON(!log);
-
+               BUG_ON(IS_ERR(log));
 
                tmp_key.objectid = found_key.offset;
                tmp_key.type = BTRFS_ROOT_ITEM_KEY;
index 9d554e8..309a57b 100644 (file)
 #include "volumes.h"
 #include "async-thread.h"
 
-struct map_lookup {
-       u64 type;
-       int io_align;
-       int io_width;
-       int stripe_len;
-       int sector_size;
-       int num_stripes;
-       int sub_stripes;
-       struct btrfs_bio_stripe stripes[];
-};
-
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                struct btrfs_device *device);
@@ -1879,6 +1868,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
 
        BUG_ON(ret);
 
+       trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
+
        if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
                ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
                BUG_ON(ret);
@@ -2606,6 +2597,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        *num_bytes = chunk_bytes_by_type(type, calc_size,
                                         map->num_stripes, sub_stripes);
 
+       trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
+
        em = alloc_extent_map(GFP_NOFS);
        if (!em) {
                ret = -ENOMEM;
@@ -2714,6 +2707,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
                                             item_size);
                BUG_ON(ret);
        }
+
        kfree(chunk);
        return 0;
 }
@@ -2918,7 +2912,10 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        struct extent_map_tree *em_tree = &map_tree->map_tree;
        u64 offset;
        u64 stripe_offset;
+       u64 stripe_end_offset;
        u64 stripe_nr;
+       u64 stripe_nr_orig;
+       u64 stripe_nr_end;
        int stripes_allocated = 8;
        int stripes_required = 1;
        int stripe_index;
@@ -2927,7 +2924,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        int max_errors = 0;
        struct btrfs_multi_bio *multi = NULL;
 
-       if (multi_ret && !(rw & REQ_WRITE))
+       if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
                stripes_allocated = 1;
 again:
        if (multi_ret) {
@@ -2968,7 +2965,15 @@ again:
                        max_errors = 1;
                }
        }
-       if (multi_ret && (rw & REQ_WRITE) &&
+       if (rw & REQ_DISCARD) {
+               if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                                BTRFS_BLOCK_GROUP_RAID1 |
+                                BTRFS_BLOCK_GROUP_DUP |
+                                BTRFS_BLOCK_GROUP_RAID10)) {
+                       stripes_required = map->num_stripes;
+               }
+       }
+       if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
            stripes_allocated < stripes_required) {
                stripes_allocated = map->num_stripes;
                free_extent_map(em);
@@ -2988,12 +2993,15 @@ again:
        /* stripe_offset is the offset of this block in its stripe*/
        stripe_offset = offset - stripe_offset;
 
-       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
-                        BTRFS_BLOCK_GROUP_RAID10 |
-                        BTRFS_BLOCK_GROUP_DUP)) {
+       if (rw & REQ_DISCARD)
+               *length = min_t(u64, em->len - offset, *length);
+       else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                             BTRFS_BLOCK_GROUP_RAID1 |
+                             BTRFS_BLOCK_GROUP_RAID10 |
+                             BTRFS_BLOCK_GROUP_DUP)) {
                /* we limit the length of each bio to what fits in a stripe */
                *length = min_t(u64, em->len - offset,
-                             map->stripe_len - stripe_offset);
+                               map->stripe_len - stripe_offset);
        } else {
                *length = em->len - offset;
        }
@@ -3003,8 +3011,19 @@ again:
 
        num_stripes = 1;
        stripe_index = 0;
-       if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-               if (rw & REQ_WRITE)
+       stripe_nr_orig = stripe_nr;
+       stripe_nr_end = (offset + *length + map->stripe_len - 1) &
+                       (~(map->stripe_len - 1));
+       do_div(stripe_nr_end, map->stripe_len);
+       stripe_end_offset = stripe_nr_end * map->stripe_len -
+                           (offset + *length);
+       if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+               if (rw & REQ_DISCARD)
+                       num_stripes = min_t(u64, map->num_stripes,
+                                           stripe_nr_end - stripe_nr_orig);
+               stripe_index = do_div(stripe_nr, map->num_stripes);
+       } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+               if (rw & (REQ_WRITE | REQ_DISCARD))
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -3015,7 +3034,7 @@ again:
                }
 
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-               if (rw & REQ_WRITE)
+               if (rw & (REQ_WRITE | REQ_DISCARD))
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -3028,6 +3047,10 @@ again:
 
                if (rw & REQ_WRITE)
                        num_stripes = map->sub_stripes;
+               else if (rw & REQ_DISCARD)
+                       num_stripes = min_t(u64, map->sub_stripes *
+                                           (stripe_nr_end - stripe_nr_orig),
+                                           map->num_stripes);
                else if (mirror_num)
                        stripe_index += mirror_num - 1;
                else {
@@ -3045,12 +3068,101 @@ again:
        }
        BUG_ON(stripe_index >= map->num_stripes);
 
-       for (i = 0; i < num_stripes; i++) {
-               multi->stripes[i].physical =
-                       map->stripes[stripe_index].physical +
-                       stripe_offset + stripe_nr * map->stripe_len;
-               multi->stripes[i].dev = map->stripes[stripe_index].dev;
-               stripe_index++;
+       if (rw & REQ_DISCARD) {
+               for (i = 0; i < num_stripes; i++) {
+                       multi->stripes[i].physical =
+                               map->stripes[stripe_index].physical +
+                               stripe_offset + stripe_nr * map->stripe_len;
+                       multi->stripes[i].dev = map->stripes[stripe_index].dev;
+
+                       if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+                               u64 stripes;
+                               u32 last_stripe = 0;
+                               int j;
+
+                               div_u64_rem(stripe_nr_end - 1,
+                                           map->num_stripes,
+                                           &last_stripe);
+
+                               for (j = 0; j < map->num_stripes; j++) {
+                                       u32 test;
+
+                                       div_u64_rem(stripe_nr_end - 1 - j,
+                                                   map->num_stripes, &test);
+                                       if (test == stripe_index)
+                                               break;
+                               }
+                               stripes = stripe_nr_end - 1 - j;
+                               do_div(stripes, map->num_stripes);
+                               multi->stripes[i].length = map->stripe_len *
+                                       (stripes - stripe_nr + 1);
+
+                               if (i == 0) {
+                                       multi->stripes[i].length -=
+                                               stripe_offset;
+                                       stripe_offset = 0;
+                               }
+                               if (stripe_index == last_stripe)
+                                       multi->stripes[i].length -=
+                                               stripe_end_offset;
+                       } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+                               u64 stripes;
+                               int j;
+                               int factor = map->num_stripes /
+                                            map->sub_stripes;
+                               u32 last_stripe = 0;
+
+                               div_u64_rem(stripe_nr_end - 1,
+                                           factor, &last_stripe);
+                               last_stripe *= map->sub_stripes;
+
+                               for (j = 0; j < factor; j++) {
+                                       u32 test;
+
+                                       div_u64_rem(stripe_nr_end - 1 - j,
+                                                   factor, &test);
+
+                                       if (test ==
+                                           stripe_index / map->sub_stripes)
+                                               break;
+                               }
+                               stripes = stripe_nr_end - 1 - j;
+                               do_div(stripes, factor);
+                               multi->stripes[i].length = map->stripe_len *
+                                       (stripes - stripe_nr + 1);
+
+                               if (i < map->sub_stripes) {
+                                       multi->stripes[i].length -=
+                                               stripe_offset;
+                                       if (i == map->sub_stripes - 1)
+                                               stripe_offset = 0;
+                               }
+                               if (stripe_index >= last_stripe &&
+                                   stripe_index <= (last_stripe +
+                                                    map->sub_stripes - 1)) {
+                                       multi->stripes[i].length -=
+                                               stripe_end_offset;
+                               }
+                       } else
+                               multi->stripes[i].length = *length;
+
+                       stripe_index++;
+                       if (stripe_index == map->num_stripes) {
+                               /* This could only happen for RAID0/10 */
+                               stripe_index = 0;
+                               stripe_nr++;
+                       }
+               }
+       } else {
+               for (i = 0; i < num_stripes; i++) {
+                       multi->stripes[i].physical =
+                               map->stripes[stripe_index].physical +
+                               stripe_offset +
+                               stripe_nr * map->stripe_len;
+                       multi->stripes[i].dev =
+                               map->stripes[stripe_index].dev;
+                       stripe_index++;
+               }
        }
        if (multi_ret) {
                *multi_ret = multi;
index 7fb59d4..cc2eada 100644 (file)
@@ -126,6 +126,7 @@ struct btrfs_fs_devices {
 struct btrfs_bio_stripe {
        struct btrfs_device *dev;
        u64 physical;
+       u64 length; /* only used for discard mappings */
 };
 
 struct btrfs_multi_bio {
@@ -145,6 +146,17 @@ struct btrfs_device_info {
        u64 max_avail;
 };
 
+struct map_lookup {
+       u64 type;
+       int io_align;
+       int io_width;
+       int stripe_len;
+       int sector_size;
+       int num_stripes;
+       int sub_stripes;
+       struct btrfs_bio_stripe stripes[];
+};
+
 /* Used to sort the devices by max_avail(descending sort) */
 int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2);
 
index d779cef..a5303b8 100644 (file)
@@ -242,6 +242,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
                        break;
 
                di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
+               if (verify_dir_item(root, leaf, di))
+                       continue;
 
                name_len = btrfs_dir_name_len(leaf, di);
                total_size += name_len + 1;
index b677bd7..52f283c 100644 (file)
@@ -357,6 +357,8 @@ struct inodes_stat_t {
 #define FS_TOPDIR_FL                   0x00020000 /* Top of directory hierarchies*/
 #define FS_EXTENT_FL                   0x00080000 /* Extents */
 #define FS_DIRECTIO_FL                 0x00100000 /* Use direct i/o */
+#define FS_NOCOW_FL                    0x00800000 /* Do not cow file */
+#define FS_COW_FL                      0x02000000 /* Cow file */
 #define FS_RESERVED_FL                 0x80000000 /* reserved for ext2 lib */
 
 #define FS_FL_USER_VISIBLE             0x0003DFFF /* User visible flags */
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
new file mode 100644 (file)
index 0000000..f445cff
--- /dev/null
@@ -0,0 +1,667 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM btrfs
+
+#if !defined(_TRACE_BTRFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BTRFS_H
+
+#include <linux/writeback.h>
+#include <linux/tracepoint.h>
+
+struct btrfs_root;
+struct btrfs_fs_info;
+struct btrfs_inode;
+struct extent_map;
+struct btrfs_ordered_extent;
+struct btrfs_delayed_ref_node;
+struct btrfs_delayed_tree_ref;
+struct btrfs_delayed_data_ref;
+struct btrfs_delayed_ref_head;
+struct map_lookup;
+struct extent_buffer;
+
+#define show_ref_type(type)                                            \
+       __print_symbolic(type,                                          \
+               { BTRFS_TREE_BLOCK_REF_KEY,     "TREE_BLOCK_REF" },     \
+               { BTRFS_EXTENT_DATA_REF_KEY,    "EXTENT_DATA_REF" },    \
+               { BTRFS_EXTENT_REF_V0_KEY,      "EXTENT_REF_V0" },      \
+               { BTRFS_SHARED_BLOCK_REF_KEY,   "SHARED_BLOCK_REF" },   \
+               { BTRFS_SHARED_DATA_REF_KEY,    "SHARED_DATA_REF" })
+
+#define __show_root_type(obj)                                          \
+       __print_symbolic(obj,                                           \
+               { BTRFS_ROOT_TREE_OBJECTID,     "ROOT_TREE"     },      \
+               { BTRFS_EXTENT_TREE_OBJECTID,   "EXTENT_TREE"   },      \
+               { BTRFS_CHUNK_TREE_OBJECTID,    "CHUNK_TREE"    },      \
+               { BTRFS_DEV_TREE_OBJECTID,      "DEV_TREE"      },      \
+               { BTRFS_FS_TREE_OBJECTID,       "FS_TREE"       },      \
+               { BTRFS_ROOT_TREE_DIR_OBJECTID, "ROOT_TREE_DIR" },      \
+               { BTRFS_CSUM_TREE_OBJECTID,     "CSUM_TREE"     },      \
+               { BTRFS_TREE_LOG_OBJECTID,      "TREE_LOG"      },      \
+               { BTRFS_TREE_RELOC_OBJECTID,    "TREE_RELOC"    },      \
+               { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
+
+#define show_root_type(obj)                                            \
+       obj, ((obj >= BTRFS_DATA_RELOC_TREE_OBJECTID) ||                \
+             (obj <= BTRFS_CSUM_TREE_OBJECTID )) ? __show_root_type(obj) : "-"
+
+TRACE_EVENT(btrfs_transaction_commit,
+
+       TP_PROTO(struct btrfs_root *root),
+
+       TP_ARGS(root),
+
+       TP_STRUCT__entry(
+               __field(        u64,  generation                )
+               __field(        u64,  root_objectid             )
+       ),
+
+       TP_fast_assign(
+               __entry->generation     = root->fs_info->generation;
+               __entry->root_objectid  = root->root_key.objectid;
+       ),
+
+       TP_printk("root = %llu(%s), gen = %llu",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long long)__entry->generation)
+);
+
+DECLARE_EVENT_CLASS(btrfs__inode,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        blkcnt_t,  blocks               )
+               __field(        u64,  disk_i_size               )
+               __field(        u64,  generation                )
+               __field(        u64,  last_trans                )
+               __field(        u64,  logged_trans              )
+               __field(        u64,  root_objectid             )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = inode->i_ino;
+               __entry->blocks = inode->i_blocks;
+               __entry->disk_i_size  = BTRFS_I(inode)->disk_i_size;
+               __entry->generation = BTRFS_I(inode)->generation;
+               __entry->last_trans = BTRFS_I(inode)->last_trans;
+               __entry->logged_trans = BTRFS_I(inode)->logged_trans;
+               __entry->root_objectid =
+                               BTRFS_I(inode)->root->root_key.objectid;
+       ),
+
+       TP_printk("root = %llu(%s), gen = %llu, ino = %lu, blocks = %llu, "
+                 "disk_i_size = %llu, last_trans = %llu, logged_trans = %llu",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long long)__entry->generation,
+                 (unsigned long)__entry->ino,
+                 (unsigned long long)__entry->blocks,
+                 (unsigned long long)__entry->disk_i_size,
+                 (unsigned long long)__entry->last_trans,
+                 (unsigned long long)__entry->logged_trans)
+);
+
+DEFINE_EVENT(btrfs__inode, btrfs_inode_new,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+DEFINE_EVENT(btrfs__inode, btrfs_inode_request,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+DEFINE_EVENT(btrfs__inode, btrfs_inode_evict,
+
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode)
+);
+
+#define __show_map_type(type)                                          \
+       __print_symbolic(type,                                          \
+               { EXTENT_MAP_LAST_BYTE, "LAST_BYTE"     },              \
+               { EXTENT_MAP_HOLE,      "HOLE"          },              \
+               { EXTENT_MAP_INLINE,    "INLINE"        },              \
+               { EXTENT_MAP_DELALLOC,  "DELALLOC"      })
+
+#define show_map_type(type)                    \
+       type, (type >= EXTENT_MAP_LAST_BYTE) ? "-" :  __show_map_type(type)
+
+#define show_map_flags(flag)                                           \
+       __print_flags(flag, "|",                                        \
+               { EXTENT_FLAG_PINNED,           "PINNED"        },      \
+               { EXTENT_FLAG_COMPRESSED,       "COMPRESSED"    },      \
+               { EXTENT_FLAG_VACANCY,          "VACANCY"       },      \
+               { EXTENT_FLAG_PREALLOC,         "PREALLOC"      })
+
+TRACE_EVENT(btrfs_get_extent,
+
+       TP_PROTO(struct btrfs_root *root, struct extent_map *map),
+
+       TP_ARGS(root, map),
+
+       TP_STRUCT__entry(
+               __field(        u64,  root_objectid     )
+               __field(        u64,  start             )
+               __field(        u64,  len               )
+               __field(        u64,  orig_start        )
+               __field(        u64,  block_start       )
+               __field(        u64,  block_len         )
+               __field(        unsigned long,  flags   )
+               __field(        int,  refs              )
+               __field(        unsigned int,  compress_type    )
+       ),
+
+       TP_fast_assign(
+               __entry->root_objectid  = root->root_key.objectid;
+               __entry->start          = map->start;
+               __entry->len            = map->len;
+               __entry->orig_start     = map->orig_start;
+               __entry->block_start    = map->block_start;
+               __entry->block_len      = map->block_len;
+               __entry->flags          = map->flags;
+               __entry->refs           = atomic_read(&map->refs);
+               __entry->compress_type  = map->compress_type;
+       ),
+
+       TP_printk("root = %llu(%s), start = %llu, len = %llu, "
+                 "orig_start = %llu, block_start = %llu(%s), "
+                 "block_len = %llu, flags = %s, refs = %u, "
+                 "compress_type = %u",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long long)__entry->start,
+                 (unsigned long long)__entry->len,
+                 (unsigned long long)__entry->orig_start,
+                 show_map_type(__entry->block_start),
+                 (unsigned long long)__entry->block_len,
+                 show_map_flags(__entry->flags),
+                 __entry->refs, __entry->compress_type)
+);
+
+#define show_ordered_flags(flags)                                      \
+       __print_symbolic(flags,                                 \
+               { BTRFS_ORDERED_IO_DONE,        "IO_DONE"       },      \
+               { BTRFS_ORDERED_COMPLETE,       "COMPLETE"      },      \
+               { BTRFS_ORDERED_NOCOW,          "NOCOW"         },      \
+               { BTRFS_ORDERED_COMPRESSED,     "COMPRESSED"    },      \
+               { BTRFS_ORDERED_PREALLOC,       "PREALLOC"      },      \
+               { BTRFS_ORDERED_DIRECT,         "DIRECT"        })
+
+DECLARE_EVENT_CLASS(btrfs__ordered_extent,
+
+       TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered),
+
+       TP_ARGS(inode, ordered),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino             )
+               __field(        u64,  file_offset       )
+               __field(        u64,  start             )
+               __field(        u64,  len               )
+               __field(        u64,  disk_len          )
+               __field(        u64,  bytes_left        )
+               __field(        unsigned long,  flags   )
+               __field(        int,  compress_type     )
+               __field(        int,  refs              )
+               __field(        u64,  root_objectid     )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->file_offset    = ordered->file_offset;
+               __entry->start          = ordered->start;
+               __entry->len            = ordered->len;
+               __entry->disk_len       = ordered->disk_len;
+               __entry->bytes_left     = ordered->bytes_left;
+               __entry->flags          = ordered->flags;
+               __entry->compress_type  = ordered->compress_type;
+               __entry->refs           = atomic_read(&ordered->refs);
+               __entry->root_objectid  =
+                               BTRFS_I(inode)->root->root_key.objectid;
+       ),
+
+       TP_printk("root = %llu(%s), ino = %llu, file_offset = %llu, "
+                 "start = %llu, len = %llu, disk_len = %llu, "
+                 "bytes_left = %llu, flags = %s, compress_type = %d, "
+                 "refs = %d",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long long)__entry->ino,
+                 (unsigned long long)__entry->file_offset,
+                 (unsigned long long)__entry->start,
+                 (unsigned long long)__entry->len,
+                 (unsigned long long)__entry->disk_len,
+                 (unsigned long long)__entry->bytes_left,
+                 show_ordered_flags(__entry->flags),
+                 __entry->compress_type, __entry->refs)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_add,
+
+       TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered),
+
+       TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_remove,
+
+       TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered),
+
+       TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_start,
+
+       TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered),
+
+       TP_ARGS(inode, ordered)
+);
+
+DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_put,
+
+       TP_PROTO(struct inode *inode, struct btrfs_ordered_extent *ordered),
+
+       TP_ARGS(inode, ordered)
+);
+
+DECLARE_EVENT_CLASS(btrfs__writepage,
+
+       TP_PROTO(struct page *page, struct inode *inode,
+                struct writeback_control *wbc),
+
+       TP_ARGS(page, inode, wbc),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino                     )
+               __field(        pgoff_t,  index                 )
+               __field(        long,   nr_to_write             )
+               __field(        long,   pages_skipped           )
+               __field(        loff_t, range_start             )
+               __field(        loff_t, range_end               )
+               __field(        char,   nonblocking             )
+               __field(        char,   for_kupdate             )
+               __field(        char,   for_reclaim             )
+               __field(        char,   range_cyclic            )
+               __field(        pgoff_t,  writeback_index       )
+               __field(        u64,    root_objectid           )
+       ),
+
+       TP_fast_assign(
+               __entry->ino            = inode->i_ino;
+               __entry->index          = page->index;
+               __entry->nr_to_write    = wbc->nr_to_write;
+               __entry->pages_skipped  = wbc->pages_skipped;
+               __entry->range_start    = wbc->range_start;
+               __entry->range_end      = wbc->range_end;
+               __entry->nonblocking    = wbc->nonblocking;
+               __entry->for_kupdate    = wbc->for_kupdate;
+               __entry->for_reclaim    = wbc->for_reclaim;
+               __entry->range_cyclic   = wbc->range_cyclic;
+               __entry->writeback_index = inode->i_mapping->writeback_index;
+               __entry->root_objectid  =
+                                BTRFS_I(inode)->root->root_key.objectid;
+       ),
+
+       TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, "
+                 "nr_to_write = %ld, pages_skipped = %ld, range_start = %llu, "
+                 "range_end = %llu, nonblocking = %d, for_kupdate = %d, "
+                 "for_reclaim = %d, range_cyclic = %d, writeback_index = %lu",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long)__entry->ino, __entry->index,
+                 __entry->nr_to_write, __entry->pages_skipped,
+                 __entry->range_start, __entry->range_end,
+                 __entry->nonblocking, __entry->for_kupdate,
+                 __entry->for_reclaim, __entry->range_cyclic,
+                 (unsigned long)__entry->writeback_index)
+);
+
+DEFINE_EVENT(btrfs__writepage, __extent_writepage,
+
+       TP_PROTO(struct page *page, struct inode *inode,
+                struct writeback_control *wbc),
+
+       TP_ARGS(page, inode, wbc)
+);
+
+TRACE_EVENT(btrfs_writepage_end_io_hook,
+
+       TP_PROTO(struct page *page, u64 start, u64 end, int uptodate),
+
+       TP_ARGS(page, start, end, uptodate),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,   ino            )
+               __field(        pgoff_t, index          )
+               __field(        u64,     start          )
+               __field(        u64,     end            )
+               __field(        int,     uptodate       )
+               __field(        u64,    root_objectid   )
+       ),
+
+       TP_fast_assign(
+               __entry->ino    = page->mapping->host->i_ino;
+               __entry->index  = page->index;
+               __entry->start  = start;
+               __entry->end    = end;
+               __entry->uptodate = uptodate;
+               __entry->root_objectid  =
+                        BTRFS_I(page->mapping->host)->root->root_key.objectid;
+       ),
+
+       TP_printk("root = %llu(%s), ino = %lu, page_index = %lu, start = %llu, "
+                 "end = %llu, uptodate = %d",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long)__entry->ino, (unsigned long)__entry->index,
+                 (unsigned long long)__entry->start,
+                 (unsigned long long)__entry->end, __entry->uptodate)
+);
+
+TRACE_EVENT(btrfs_sync_file,
+
+       TP_PROTO(struct file *file, int datasync),
+
+       TP_ARGS(file, datasync),
+
+       TP_STRUCT__entry(
+               __field(        ino_t,  ino             )
+               __field(        ino_t,  parent          )
+               __field(        int,    datasync        )
+               __field(        u64,    root_objectid   )
+       ),
+
+       TP_fast_assign(
+               struct dentry *dentry = file->f_path.dentry;
+               struct inode *inode = dentry->d_inode;
+
+               __entry->ino            = inode->i_ino;
+               __entry->parent         = dentry->d_parent->d_inode->i_ino;
+               __entry->datasync       = datasync;
+               __entry->root_objectid  =
+                                BTRFS_I(inode)->root->root_key.objectid;
+       ),
+
+       TP_printk("root = %llu(%s), ino = %ld, parent = %ld, datasync = %d",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long)__entry->ino, (unsigned long)__entry->parent,
+                 __entry->datasync)
+);
+
+TRACE_EVENT(btrfs_sync_fs,
+
+       TP_PROTO(int wait),
+
+       TP_ARGS(wait),
+
+       TP_STRUCT__entry(
+               __field(        int,  wait              )
+       ),
+
+       TP_fast_assign(
+               __entry->wait   = wait;
+       ),
+
+       TP_printk("wait = %d", __entry->wait)
+);
+
+#define show_ref_action(action)                                                \
+       __print_symbolic(action,                                        \
+               { BTRFS_ADD_DELAYED_REF,    "ADD_DELAYED_REF" },        \
+               { BTRFS_DROP_DELAYED_REF,   "DROP_DELAYED_REF" },       \
+               { BTRFS_ADD_DELAYED_EXTENT, "ADD_DELAYED_EXTENT" },     \
+               { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" })
+                       
+
+TRACE_EVENT(btrfs_delayed_tree_ref,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_tree_ref *full_ref,
+                int action),
+
+       TP_ARGS(ref, full_ref, action),
+
+       TP_STRUCT__entry(
+               __field(        u64,  bytenr            )
+               __field(        u64,  num_bytes         )
+               __field(        int,  action            ) 
+               __field(        u64,  parent            )
+               __field(        u64,  ref_root          )
+               __field(        int,  level             )
+               __field(        int,  type              )
+       ),
+
+       TP_fast_assign(
+               __entry->bytenr         = ref->bytenr;
+               __entry->num_bytes      = ref->num_bytes;
+               __entry->action         = action;
+               __entry->parent         = full_ref->parent;
+               __entry->ref_root       = full_ref->root;
+               __entry->level          = full_ref->level;
+               __entry->type           = ref->type;
+       ),
+
+       TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, "
+                 "parent = %llu(%s), ref_root = %llu(%s), level = %d, "
+                 "type = %s",
+                 (unsigned long long)__entry->bytenr,
+                 (unsigned long long)__entry->num_bytes,
+                 show_ref_action(__entry->action),
+                 show_root_type(__entry->parent),
+                 show_root_type(__entry->ref_root),
+                 __entry->level, show_ref_type(__entry->type))
+);
+
+TRACE_EVENT(btrfs_delayed_data_ref,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_data_ref *full_ref,
+                int action),
+
+       TP_ARGS(ref, full_ref, action),
+
+       TP_STRUCT__entry(
+               __field(        u64,  bytenr            )
+               __field(        u64,  num_bytes         )
+               __field(        int,  action            ) 
+               __field(        u64,  parent            )
+               __field(        u64,  ref_root          )
+               __field(        u64,  owner             )
+               __field(        u64,  offset            )
+               __field(        int,  type              )
+       ),
+
+       TP_fast_assign(
+               __entry->bytenr         = ref->bytenr;
+               __entry->num_bytes      = ref->num_bytes;
+               __entry->action         = action;
+               __entry->parent         = full_ref->parent;
+               __entry->ref_root       = full_ref->root;
+               __entry->owner          = full_ref->objectid;
+               __entry->offset         = full_ref->offset;
+               __entry->type           = ref->type;
+       ),
+
+       TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, "
+                 "parent = %llu(%s), ref_root = %llu(%s), owner = %llu, "
+                 "offset = %llu, type = %s",
+                 (unsigned long long)__entry->bytenr,
+                 (unsigned long long)__entry->num_bytes,
+                 show_ref_action(__entry->action),
+                 show_root_type(__entry->parent),
+                 show_root_type(__entry->ref_root),
+                 (unsigned long long)__entry->owner,
+                 (unsigned long long)__entry->offset,
+                 show_ref_type(__entry->type))
+);
+
+TRACE_EVENT(btrfs_delayed_ref_head,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_ref_head *head_ref,
+                int action),
+
+       TP_ARGS(ref, head_ref, action),
+
+       TP_STRUCT__entry(
+               __field(        u64,  bytenr            )
+               __field(        u64,  num_bytes         )
+               __field(        int,  action            ) 
+               __field(        int,  is_data           )
+       ),
+
+       TP_fast_assign(
+               __entry->bytenr         = ref->bytenr;
+               __entry->num_bytes      = ref->num_bytes;
+               __entry->action         = action;
+               __entry->is_data        = head_ref->is_data;
+       ),
+
+       TP_printk("bytenr = %llu, num_bytes = %llu, action = %s, is_data = %d",
+                 (unsigned long long)__entry->bytenr,
+                 (unsigned long long)__entry->num_bytes,
+                 show_ref_action(__entry->action),
+                 __entry->is_data)
+);
+
+#define show_chunk_type(type)                                  \
+       __print_flags(type, "|",                                \
+               { BTRFS_BLOCK_GROUP_DATA,       "DATA"  },      \
+               { BTRFS_BLOCK_GROUP_SYSTEM,     "SYSTEM"},      \
+               { BTRFS_BLOCK_GROUP_METADATA,   "METADATA"},    \
+               { BTRFS_BLOCK_GROUP_RAID0,      "RAID0" },      \
+               { BTRFS_BLOCK_GROUP_RAID1,      "RAID1" },      \
+               { BTRFS_BLOCK_GROUP_DUP,        "DUP"   },      \
+               { BTRFS_BLOCK_GROUP_RAID10,     "RAID10"})
+
+DECLARE_EVENT_CLASS(btrfs__chunk,
+
+       TP_PROTO(struct btrfs_root *root, struct map_lookup *map,
+                u64 offset, u64 size),
+
+       TP_ARGS(root, map, offset, size),
+
+       TP_STRUCT__entry(
+               __field(        int,  num_stripes               )
+               __field(        u64,  type                      )
+               __field(        int,  sub_stripes               )
+               __field(        u64,  offset                    )
+               __field(        u64,  size                      )
+               __field(        u64,  root_objectid             )
+       ),
+
+       TP_fast_assign(
+               __entry->num_stripes    = map->num_stripes;
+               __entry->type           = map->type;
+               __entry->sub_stripes    = map->sub_stripes;
+               __entry->offset         = offset;
+               __entry->size           = size;
+               __entry->root_objectid  = root->root_key.objectid;
+       ),
+
+       TP_printk("root = %llu(%s), offset = %llu, size = %llu, "
+                 "num_stripes = %d, sub_stripes = %d, type = %s",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long long)__entry->offset,
+                 (unsigned long long)__entry->size,
+                 __entry->num_stripes, __entry->sub_stripes,
+                 show_chunk_type(__entry->type))
+);
+
+DEFINE_EVENT(btrfs__chunk,  btrfs_chunk_alloc,
+
+       TP_PROTO(struct btrfs_root *root, struct map_lookup *map,
+                u64 offset, u64 size),
+
+       TP_ARGS(root, map, offset, size)
+);
+
+DEFINE_EVENT(btrfs__chunk,  btrfs_chunk_free,
+
+       TP_PROTO(struct btrfs_root *root, struct map_lookup *map,
+                u64 offset, u64 size),
+
+       TP_ARGS(root, map, offset, size)
+);
+
+TRACE_EVENT(btrfs_cow_block,
+
+       TP_PROTO(struct btrfs_root *root, struct extent_buffer *buf,
+                struct extent_buffer *cow),
+
+       TP_ARGS(root, buf, cow),
+
+       TP_STRUCT__entry(
+               __field(        u64,  root_objectid             )
+               __field(        u64,  buf_start                 )
+               __field(        int,  refs                      )
+               __field(        u64,  cow_start                 )
+               __field(        int,  buf_level                 )
+               __field(        int,  cow_level                 )
+       ),
+
+       TP_fast_assign(
+               __entry->root_objectid  = root->root_key.objectid;
+               __entry->buf_start      = buf->start;
+               __entry->refs           = atomic_read(&buf->refs);
+               __entry->cow_start      = cow->start;
+               __entry->buf_level      = btrfs_header_level(buf);
+               __entry->cow_level      = btrfs_header_level(cow);
+       ),
+
+       TP_printk("root = %llu(%s), refs = %d, orig_buf = %llu "
+                 "(orig_level = %d), cow_buf = %llu (cow_level = %d)",
+                 show_root_type(__entry->root_objectid),
+                 __entry->refs,
+                 (unsigned long long)__entry->buf_start,
+                 __entry->buf_level,
+                 (unsigned long long)__entry->cow_start,
+                 __entry->cow_level)
+);
+
+DECLARE_EVENT_CLASS(btrfs__reserved_extent,
+
+       TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
+
+       TP_ARGS(root, start, len),
+
+       TP_STRUCT__entry(
+               __field(        u64,  root_objectid             )
+               __field(        u64,  start                     )
+               __field(        u64,  len                       )
+       ),
+
+       TP_fast_assign(
+               __entry->root_objectid  = root->root_key.objectid;
+               __entry->start          = start;
+               __entry->len            = len;
+       ),
+
+       TP_printk("root = %llu(%s), start = %llu, len = %llu",
+                 show_root_type(__entry->root_objectid),
+                 (unsigned long long)__entry->start,
+                 (unsigned long long)__entry->len)
+);
+
+DEFINE_EVENT(btrfs__reserved_extent,  btrfs_reserved_extent_alloc,
+
+       TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
+
+       TP_ARGS(root, start, len)
+);
+
+DEFINE_EVENT(btrfs__reserved_extent,  btrfs_reserved_extent_free,
+
+       TP_PROTO(struct btrfs_root *root, u64 start, u64 len),
+
+       TP_ARGS(root, start, len)
+);
+
+#endif /* _TRACE_BTRFS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>