Merge branch 'bug-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs...

author Chris Mason <chris.mason@oracle.com>

Fri, 29 Oct 2010 13:27:49 +0000 (09:27 -0400)

committer Chris Mason <chris.mason@oracle.com>

Fri, 29 Oct 2010 13:27:49 +0000 (09:27 -0400)
author Chris Mason <chris.mason@oracle.com>
Fri, 29 Oct 2010 13:27:49 +0000 (09:27 -0400)
committer Chris Mason <chris.mason@oracle.com>
Fri, 29 Oct 2010 13:27:49 +0000 (09:27 -0400)
diff --combined fs/btrfs/ctree.h

index 633e559,47bc66e..88c0fb7
--- 1/fs/btrfs/ctree.h
--- 2/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@@ -99,9 -99,6 +99,9 @@@ struct btrfs_ordered_sum
    */
   #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
   
+ +/* For storing free space cache */
+ +#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+ +
   /* dummy objectid represents multiple objectids */
   #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
   
@@@ -268,22 -265,6 +268,22 @@@ struct btrfs_chunk 
         /* additional stripes go here */
   } __attribute__ ((__packed__));
   
+ +#define BTRFS_FREE_SPACE_EXTENT       1
+ +#define BTRFS_FREE_SPACE_BITMAP       2
+ +
+ +struct btrfs_free_space_entry {
+ +      __le64 offset;
+ +      __le64 bytes;
+ +      u8 type;
+ +} __attribute__ ((__packed__));
+ +
+ +struct btrfs_free_space_header {
+ +      struct btrfs_disk_key location;
+ +      __le64 generation;
+ +      __le64 num_entries;
+ +      __le64 num_bitmaps;
+ +} __attribute__ ((__packed__));
+ +
   static inline unsigned long btrfs_chunk_item_size(int num_stripes)
   {
         BUG_ON(num_stripes == 0);
@@@ -384,10 -365,8 +384,10 @@@ struct btrfs_super_block 
   
         char label[BTRFS_LABEL_SIZE];
   
+ +      __le64 cache_generation;
+ +
         /* future expansion */
- -      __le64 reserved[32];
+ +      __le64 reserved[31];
         u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
   } __attribute__ ((__packed__));
   
@@@ -396,15 -375,13 +396,15 @@@
    * ones specified below then we will fail to mount
    */
   #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF  (1ULL << 0)
- -#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0)
+ +#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+ +#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS   (1ULL << 2)
   
   #define BTRFS_FEATURE_COMPAT_SUPP             0ULL
   #define BTRFS_FEATURE_COMPAT_RO_SUPP          0ULL
- -#define BTRFS_FEATURE_INCOMPAT_SUPP           \
- -      (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
- -       BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
+ +#define BTRFS_FEATURE_INCOMPAT_SUPP                   \
+ +      (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |         \
+ +       BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |        \
+ +       BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
   
   /*
    * A leaf is full of items. offset and size tell us where to find
@@@ -698,7 -675,8 +698,8 @@@ struct btrfs_block_group_item 
   struct btrfs_space_info {
         u64 flags;
   
-       u64 total_bytes;        /* total bytes in the space */
+       u64 total_bytes;        /* total bytes in the space,
+                                  this doesn't take mirrors into account */
         u64 bytes_used;         /* total bytes used,
                                    this does't take mirrors into account */
         u64 bytes_pinned;       /* total bytes pinned, will be freed when the
@@@ -710,6 -688,8 +711,8 @@@
         u64 bytes_may_use;      /* number of bytes that may be used for
                                    delalloc/allocations */
         u64 disk_used;          /* total bytes used on disk */
+       u64 disk_total;         /* total bytes on disk, takes mirrors into
+                                  account */
   
         int full;               /* indicates that we cannot allocate any more
                                    chunks for this space */
@@@ -773,14 -753,6 +776,14 @@@ enum btrfs_caching_type 
         BTRFS_CACHE_FINISHED    = 2,
   };
   
+ +enum btrfs_disk_cache_state {
+ +      BTRFS_DC_WRITTEN        = 0,
+ +      BTRFS_DC_ERROR          = 1,
+ +      BTRFS_DC_CLEAR          = 2,
+ +      BTRFS_DC_SETUP          = 3,
+ +      BTRFS_DC_NEED_WRITE     = 4,
+ +};
+ +
   struct btrfs_caching_control {
         struct list_head list;
         struct mutex mutex;
@@@ -794,7 -766,6 +797,7 @@@ struct btrfs_block_group_cache 
         struct btrfs_key key;
         struct btrfs_block_group_item item;
         struct btrfs_fs_info *fs_info;
+ +      struct inode *inode;
         spinlock_t lock;
         u64 pinned;
         u64 reserved;
@@@ -805,11 -776,8 +808,11 @@@
         int extents_thresh;
         int free_extents;
         int total_bitmaps;
- -      int ro;
- -      int dirty;
+ +      int ro:1;
+ +      int dirty:1;
+ +      int iref:1;
+ +
+ +      int disk_cache_state;
   
         /* cache tracking stuff */
         int cached;
@@@ -984,7 -952,6 +987,7 @@@ struct btrfs_fs_info 
         struct btrfs_workers endio_meta_workers;
         struct btrfs_workers endio_meta_write_workers;
         struct btrfs_workers endio_write_workers;
+ +      struct btrfs_workers endio_freespace_worker;
         struct btrfs_workers submit_workers;
         /*
          * fixup workers take dirty pages that didn't properly go through
@@@ -1228,8 -1195,6 +1231,8 @@@ struct btrfs_root 
   #define BTRFS_MOUNT_NOSSD             (1 << 9)
   #define BTRFS_MOUNT_DISCARD           (1 << 10)
   #define BTRFS_MOUNT_FORCE_COMPRESS      (1 << 11)
+ +#define BTRFS_MOUNT_SPACE_CACHE               (1 << 12)
+ +#define BTRFS_MOUNT_CLEAR_CACHE               (1 << 13)
   
   #define btrfs_clear_opt(o, opt)               ((o) &= ~BTRFS_MOUNT_##opt)
   #define btrfs_set_opt(o, opt)         ((o) |= BTRFS_MOUNT_##opt)
@@@ -1703,27 -1668,6 +1706,27 @@@ static inline void btrfs_set_dir_item_k
         write_eb_member(eb, item, struct btrfs_dir_item, location, key);
   }
   
+ +BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+ +                 num_entries, 64);
+ +BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+ +                 num_bitmaps, 64);
+ +BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+ +                 generation, 64);
+ +
+ +static inline void btrfs_free_space_key(struct extent_buffer *eb,
+ +                                      struct btrfs_free_space_header *h,
+ +                                      struct btrfs_disk_key *key)
+ +{
+ +      read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+ +}
+ +
+ +static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+ +                                          struct btrfs_free_space_header *h,
+ +                                          struct btrfs_disk_key *key)
+ +{
+ +      write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+ +}
+ +
   /* struct btrfs_disk_key */
   BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
                          objectid, 64);
@@@ -1935,8 -1879,6 +1938,8 @@@ BTRFS_SETGET_STACK_FUNCS(super_incompat
                          incompat_flags, 64);
   BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
                          csum_type, 16);
+ +BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+ +                       cache_generation, 64);
   
   static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
   {
@@@ -2049,12 -1991,6 +2052,12 @@@ static inline struct dentry *fdentry(st
         return file->f_path.dentry;
   }
   
+ +static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+ +{
+ +      return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ +              (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+ +}
+ +
   /* extent-tree.c */
   void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
   int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@@ -2146,7 -2082,7 +2149,7 @@@ int btrfs_check_data_free_space(struct 
   void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
   int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
-                               int num_items, int *retries);
+                               int num_items);
   void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root);
   int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@@ -2167,7 -2103,7 +2170,7 @@@ void btrfs_add_durable_block_rsv(struc
   int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         struct btrfs_block_rsv *block_rsv,
-                       u64 num_bytes, int *retries);
+                       u64 num_bytes);
   int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct btrfs_block_rsv *block_rsv,
@@@ -2182,7 -2118,6 +2185,7 @@@ int btrfs_set_block_group_ro(struct btr
                              struct btrfs_block_group_cache *cache);
   int btrfs_set_block_group_rw(struct btrfs_root *root,
                              struct btrfs_block_group_cache *cache);
+ +void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
   /* ctree.c */
   int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                      int level, int *slot);
@@@ -2441,7 -2376,8 +2444,8 @@@ int btrfs_truncate_inode_items(struct b
                                u32 min_type);
   
   int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
- int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
+ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+                                  int sync);
   int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
                               struct extent_state **cached_state);
   int btrfs_writepages(struct address_space *mapping,
@@@ -2457,13 -2393,13 +2461,13 @@@ unsigned long btrfs_force_ra(struct add
                               pgoff_t offset, pgoff_t last_index);
   int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
   int btrfs_readpage(struct file *file, struct page *page);
- -void btrfs_delete_inode(struct inode *inode);
+ +void btrfs_evict_inode(struct inode *inode);
   void btrfs_put_inode(struct inode *inode);
   int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
   void btrfs_dirty_inode(struct inode *inode);
   struct inode *btrfs_alloc_inode(struct super_block *sb);
   void btrfs_destroy_inode(struct inode *inode);
- -void btrfs_drop_inode(struct inode *inode);
+ +int btrfs_drop_inode(struct inode *inode);
   int btrfs_init_cachep(void);
   void btrfs_destroy_cachep(void);
   long btrfs_ioctl_trans_end(struct file *file);
@@@ -2494,10 -2430,6 +2498,10 @@@ void btrfs_run_delayed_iputs(struct btr
   int btrfs_prealloc_file_range(struct inode *inode, int mode,
                               u64 start, u64 num_bytes, u64 min_size,
                               loff_t actual_len, u64 *alloc_hint);
+ +int btrfs_prealloc_file_range_trans(struct inode *inode,
+ +                                  struct btrfs_trans_handle *trans, int mode,
+ +                                  u64 start, u64 num_bytes, u64 min_size,
+ +                                  loff_t actual_len, u64 *alloc_hint);
   extern const struct dentry_operations btrfs_dentry_operations;
   
   /* ioctl.c */
diff --combined fs/btrfs/extent-tree.c

index d2a7ff5,3378533..372fd22
--- 1/fs/btrfs/extent-tree.c
--- 2/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@@ -242,12 -242,6 +242,12 @@@ get_caching_control(struct btrfs_block_
                 return NULL;
         }
   
+ +      /* We're loading it the fast way, so we don't have a caching_ctl. */
+ +      if (!cache->caching_ctl) {
+ +              spin_unlock(&cache->lock);
+ +              return NULL;
+ +      }
+ +
         ctl = cache->caching_ctl;
         atomic_inc(&ctl->count);
         spin_unlock(&cache->lock);
@@@ -427,9 -421,7 +427,9 @@@ err
         return 0;
   }
   
- -static int cache_block_group(struct btrfs_block_group_cache *cache)
+ +static int cache_block_group(struct btrfs_block_group_cache *cache,
+ +                           struct btrfs_trans_handle *trans,
+ +                           int load_cache_only)
   {
         struct btrfs_fs_info *fs_info = cache->fs_info;
         struct btrfs_caching_control *caching_ctl;
@@@ -440,36 -432,6 +440,36 @@@
         if (cache->cached != BTRFS_CACHE_NO)
                 return 0;
   
+ +      /*
+ +       * We can't do the read from on-disk cache during a commit since we need
+ +       * to have the normal tree locking.
+ +       */
+ +      if (!trans->transaction->in_commit) {
+ +              spin_lock(&cache->lock);
+ +              if (cache->cached != BTRFS_CACHE_NO) {
+ +                      spin_unlock(&cache->lock);
+ +                      return 0;
+ +              }
+ +              cache->cached = BTRFS_CACHE_STARTED;
+ +              spin_unlock(&cache->lock);
+ +
+ +              ret = load_free_space_cache(fs_info, cache);
+ +
+ +              spin_lock(&cache->lock);
+ +              if (ret == 1) {
+ +                      cache->cached = BTRFS_CACHE_FINISHED;
+ +                      cache->last_byte_to_unpin = (u64)-1;
+ +              } else {
+ +                      cache->cached = BTRFS_CACHE_NO;
+ +              }
+ +              spin_unlock(&cache->lock);
+ +              if (ret == 1)
+ +                      return 0;
+ +      }
+ +
+ +      if (load_cache_only)
+ +              return 0;
+ +
         caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
         BUG_ON(!caching_ctl);
   
@@@ -547,7 -509,7 +547,7 @@@ static struct btrfs_space_info *__find_
   
         rcu_read_lock();
         list_for_each_entry_rcu(found, head, list) {
- -              if (found->flags == flags) {
+ +              if (found->flags & flags) {
                         rcu_read_unlock();
                         return found;
                 }
@@@ -2726,109 -2688,6 +2726,109 @@@ next_block_group(struct btrfs_root *roo
         return cache;
   }
   
+ +static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+ +                          struct btrfs_trans_handle *trans,
+ +                          struct btrfs_path *path)
+ +{
+ +      struct btrfs_root *root = block_group->fs_info->tree_root;
+ +      struct inode *inode = NULL;
+ +      u64 alloc_hint = 0;
+ +      int num_pages = 0;
+ +      int retries = 0;
+ +      int ret = 0;
+ +
+ +      /*
+ +       * If this block group is smaller than 100 megs don't bother caching the
+ +       * block group.
+ +       */
+ +      if (block_group->key.offset < (100 * 1024 * 1024)) {
+ +              spin_lock(&block_group->lock);
+ +              block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+ +              spin_unlock(&block_group->lock);
+ +              return 0;
+ +      }
+ +
+ +again:
+ +      inode = lookup_free_space_inode(root, block_group, path);
+ +      if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+ +              ret = PTR_ERR(inode);
+ +              btrfs_release_path(root, path);
+ +              goto out;
+ +      }
+ +
+ +      if (IS_ERR(inode)) {
+ +              BUG_ON(retries);
+ +              retries++;
+ +
+ +              if (block_group->ro)
+ +                      goto out_free;
+ +
+ +              ret = create_free_space_inode(root, trans, block_group, path);
+ +              if (ret)
+ +                      goto out_free;
+ +              goto again;
+ +      }
+ +
+ +      /*
+ +       * We want to set the generation to 0, that way if anything goes wrong
+ +       * from here on out we know not to trust this cache when we load up next
+ +       * time.
+ +       */
+ +      BTRFS_I(inode)->generation = 0;
+ +      ret = btrfs_update_inode(trans, root, inode);
+ +      WARN_ON(ret);
+ +
+ +      if (i_size_read(inode) > 0) {
+ +              ret = btrfs_truncate_free_space_cache(root, trans, path,
+ +                                                    inode);
+ +              if (ret)
+ +                      goto out_put;
+ +      }
+ +
+ +      spin_lock(&block_group->lock);
+ +      if (block_group->cached != BTRFS_CACHE_FINISHED) {
+ +              spin_unlock(&block_group->lock);
+ +              goto out_put;
+ +      }
+ +      spin_unlock(&block_group->lock);
+ +
+ +      num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+ +      if (!num_pages)
+ +              num_pages = 1;
+ +
+ +      /*
+ +       * Just to make absolutely sure we have enough space, we're going to
+ +       * preallocate 12 pages worth of space for each block group.  In
+ +       * practice we ought to use at most 8, but we need extra space so we can
+ +       * add our header and have a terminator between the extents and the
+ +       * bitmaps.
+ +       */
+ +      num_pages *= 16;
+ +      num_pages *= PAGE_CACHE_SIZE;
+ +
+ +      ret = btrfs_check_data_free_space(inode, num_pages);
+ +      if (ret)
+ +              goto out_put;
+ +
+ +      ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
+ +                                            num_pages, num_pages,
+ +                                            &alloc_hint);
+ +      btrfs_free_reserved_data_space(inode, num_pages);
+ +out_put:
+ +      iput(inode);
+ +out_free:
+ +      btrfs_release_path(root, path);
+ +out:
+ +      spin_lock(&block_group->lock);
+ +      if (ret)
+ +              block_group->disk_cache_state = BTRFS_DC_ERROR;
+ +      else
+ +              block_group->disk_cache_state = BTRFS_DC_SETUP;
+ +      spin_unlock(&block_group->lock);
+ +
+ +      return ret;
+ +}
+ +
   int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root)
   {
@@@ -2841,25 -2700,6 +2841,25 @@@
         if (!path)
                 return -ENOMEM;
   
+ +again:
+ +      while (1) {
+ +              cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ +              while (cache) {
+ +                      if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+ +                              break;
+ +                      cache = next_block_group(root, cache);
+ +              }
+ +              if (!cache) {
+ +                      if (last == 0)
+ +                              break;
+ +                      last = 0;
+ +                      continue;
+ +              }
+ +              err = cache_save_setup(cache, trans, path);
+ +              last = cache->key.objectid + cache->key.offset;
+ +              btrfs_put_block_group(cache);
+ +      }
+ +
         while (1) {
                 if (last == 0) {
                         err = btrfs_run_delayed_refs(trans, root,
@@@ -2869,11 -2709,6 +2869,11 @@@
   
                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
                 while (cache) {
+ +                      if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
+ +                              btrfs_put_block_group(cache);
+ +                              goto again;
+ +                      }
+ +
                         if (cache->dirty)
                                 break;
                         cache = next_block_group(root, cache);
@@@ -2885,8 -2720,6 +2885,8 @@@
                         continue;
                 }
   
+ +              if (cache->disk_cache_state == BTRFS_DC_SETUP)
+ +                      cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
                 cache->dirty = 0;
                 last = cache->key.objectid + cache->key.offset;
   
@@@ -2895,52 -2728,6 +2895,52 @@@
                 btrfs_put_block_group(cache);
         }
   
+ +      while (1) {
+ +              /*
+ +               * I don't think this is needed since we're just marking our
+ +               * preallocated extent as written, but just in case it can't
+ +               * hurt.
+ +               */
+ +              if (last == 0) {
+ +                      err = btrfs_run_delayed_refs(trans, root,
+ +                                                   (unsigned long)-1);
+ +                      BUG_ON(err);
+ +              }
+ +
+ +              cache = btrfs_lookup_first_block_group(root->fs_info, last);
+ +              while (cache) {
+ +                      /*
+ +                       * Really this shouldn't happen, but it could if we
+ +                       * couldn't write the entire preallocated extent and
+ +                       * splitting the extent resulted in a new block.
+ +                       */
+ +                      if (cache->dirty) {
+ +                              btrfs_put_block_group(cache);
+ +                              goto again;
+ +                      }
+ +                      if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ +                              break;
+ +                      cache = next_block_group(root, cache);
+ +              }
+ +              if (!cache) {
+ +                      if (last == 0)
+ +                              break;
+ +                      last = 0;
+ +                      continue;
+ +              }
+ +
+ +              btrfs_write_out_cache(root, trans, cache, path);
+ +
+ +              /*
+ +               * If we didn't have an error then the cache state is still
+ +               * NEED_WRITE, so we can set it to WRITTEN.
+ +               */
+ +              if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+ +                      cache->disk_cache_state = BTRFS_DC_WRITTEN;
+ +              last = cache->key.objectid + cache->key.offset;
+ +              btrfs_put_block_group(cache);
+ +      }
+ +
         btrfs_free_path(path);
         return 0;
   }
@@@ -2976,6 -2763,7 +2976,7 @@@ static int update_space_info(struct btr
         if (found) {
                 spin_lock(&found->lock);
                 found->total_bytes += total_bytes;
+               found->disk_total += total_bytes * factor;
                 found->bytes_used += bytes_used;
                 found->disk_used += bytes_used * factor;
                 found->full = 0;
@@@ -2995,6 -2783,7 +2996,7 @@@
                                 BTRFS_BLOCK_GROUP_SYSTEM |
                                 BTRFS_BLOCK_GROUP_METADATA);
         found->total_bytes = total_bytes;
+       found->disk_total = total_bytes * factor;
         found->bytes_used = bytes_used;
         found->disk_used = bytes_used * factor;
         found->bytes_pinned = 0;
@@@ -3096,16 -2885,11 +3098,16 @@@ int btrfs_check_data_free_space(struct 
         struct btrfs_space_info *data_sinfo;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         u64 used;
- -      int ret = 0, committed = 0;
+ +      int ret = 0, committed = 0, alloc_chunk = 1;
   
         /* make sure bytes are sectorsize aligned */
         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
   
+ +      if (root == root->fs_info->tree_root) {
+ +              alloc_chunk = 0;
+ +              committed = 1;
+ +      }
+ +
         data_sinfo = BTRFS_I(inode)->space_info;
         if (!data_sinfo)
                 goto alloc;
@@@ -3124,7 -2908,7 +3126,7 @@@ again
                  * if we don't have enough free bytes in this space then we need
                  * to alloc a new chunk.
                  */
- -              if (!data_sinfo->full) {
+ +              if (!data_sinfo->full && alloc_chunk) {
                         u64 alloc_target;
   
                         data_sinfo->force_alloc = 1;
@@@ -3216,8 -3000,7 +3218,7 @@@ static void force_metadata_allocation(s
         rcu_read_unlock();
   }
   
- static int should_alloc_chunk(struct btrfs_space_info *sinfo,
-                             u64 alloc_bytes)
+ static int should_alloc_chunk(struct btrfs_space_info *sinfo, u64 alloc_bytes)
   {
         u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
   
@@@ -3229,6 -3012,10 +3230,10 @@@
             alloc_bytes < div_factor(num_bytes, 8))
                 return 0;
   
+       if (num_bytes > 256 * 1024 * 1024 &&
+           sinfo->bytes_used < div_factor(num_bytes, 3))
+               return 0;
+ 
         return 1;
   }
   
@@@ -3266,13 -3053,6 +3271,13 @@@ static int do_chunk_alloc(struct btrfs_
         }
         spin_unlock(&space_info->lock);
   
+ +      /*
+ +       * If we have mixed data/metadata chunks we want to make sure we keep
+ +       * allocating mixed chunks instead of individual chunks.
+ +       */
+ +      if (btrfs_mixed_space_info(space_info))
+ +              flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+ +
         /*
          * if we're doing a data chunk, go ahead and make sure that
          * we keep a reasonable number of metadata chunks allocated in the
@@@ -3298,55 -3078,26 +3303,26 @@@ out
         return ret;
   }
   
- static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               struct btrfs_space_info *sinfo, u64 num_bytes)
- {
-       int ret;
-       int end_trans = 0;
- 
-       if (sinfo->full)
-               return 0;
- 
-       spin_lock(&sinfo->lock);
-       ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
-       spin_unlock(&sinfo->lock);
-       if (!ret)
-               return 0;
- 
-       if (!trans) {
-               trans = btrfs_join_transaction(root, 1);
-               BUG_ON(IS_ERR(trans));
-               end_trans = 1;
-       }
- 
-       ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                            num_bytes + 2 * 1024 * 1024,
-                            get_alloc_profile(root, sinfo->flags), 0);
- 
-       if (end_trans)
-               btrfs_end_transaction(trans, root);
- 
-       return ret == 1 ? 1 : 0;
- }
- 
   /*
    * shrink metadata reservation for delalloc
    */
   static int shrink_delalloc(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root, u64 to_reclaim)
+                          struct btrfs_root *root, u64 to_reclaim, int sync)
   {
         struct btrfs_block_rsv *block_rsv;
+       struct btrfs_space_info *space_info;
         u64 reserved;
         u64 max_reclaim;
         u64 reclaimed = 0;
+       int no_reclaim = 0;
         int pause = 1;
         int ret;
   
         block_rsv = &root->fs_info->delalloc_block_rsv;
-       spin_lock(&block_rsv->lock);
-       reserved = block_rsv->reserved;
-       spin_unlock(&block_rsv->lock);
+       space_info = block_rsv->space_info;
+       spin_lock(&space_info->lock);
+       reserved = space_info->bytes_reserved;
+       spin_unlock(&space_info->lock);
   
         if (reserved == 0)
                 return 0;
@@@ -3354,22 -3105,26 +3330,26 @@@
         max_reclaim = min(reserved, to_reclaim);
   
         while (1) {
-               ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
+               ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0, sync);
                 if (!ret) {
+                       if (no_reclaim > 2)
+                               break;
+                       no_reclaim++;
                         __set_current_state(TASK_INTERRUPTIBLE);
                         schedule_timeout(pause);
                         pause <<= 1;
                         if (pause > HZ / 10)
                                 pause = HZ / 10;
                 } else {
+                       no_reclaim = 0;
                         pause = 1;
                 }
   
-               spin_lock(&block_rsv->lock);
-               if (reserved > block_rsv->reserved)
-                       reclaimed = reserved - block_rsv->reserved;
-               reserved = block_rsv->reserved;
-               spin_unlock(&block_rsv->lock);
+               spin_lock(&space_info->lock);
+               if (reserved > space_info->bytes_reserved)
+                       reclaimed += reserved - space_info->bytes_reserved;
+               reserved = space_info->bytes_reserved;
+               spin_unlock(&space_info->lock);
   
                 if (reserved == 0 || reclaimed >= max_reclaim)
                         break;
@@@ -3380,78 -3135,141 +3360,141 @@@
         return reclaimed >= to_reclaim;
   }
   
- static int should_retry_reserve(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root,
-                               struct btrfs_block_rsv *block_rsv,
-                               u64 num_bytes, int *retries)
+ /*
+  * Retries tells us how many times we've called reserve_metadata_bytes.  The
+  * idea is if this is the first call (retries == 0) then we will add to our
+  * reserved count if we can't make the allocation in order to hold our place
+  * while we go and try and free up space.  That way for retries > 1 we don't try
+  * and add space, we just check to see if the amount of unused space is >= the
+  * total space, meaning that our reservation is valid.
+  *
+  * However if we don't intend to retry this reservation, pass -1 as retries so
+  * that it short circuits this logic.
+  */
+ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+                                 struct btrfs_block_rsv *block_rsv,
+                                 u64 orig_bytes, int flush)
   {
         struct btrfs_space_info *space_info = block_rsv->space_info;
-       int ret;
+       u64 unused;
+       u64 num_bytes = orig_bytes;
+       int retries = 0;
+       int ret = 0;
+       bool reserved = false;
+       bool committed = false;
   
-       if ((*retries) > 2)
-               return -ENOSPC;
+ again:
+       ret = -ENOSPC;
+       if (reserved)
+               num_bytes = 0;
   
-       ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
-       if (ret)
-               return 1;
+       spin_lock(&space_info->lock);
+       unused = space_info->bytes_used + space_info->bytes_reserved +
+                space_info->bytes_pinned + space_info->bytes_readonly +
+                space_info->bytes_may_use;
   
-       if (trans && trans->transaction->in_commit)
-               return -ENOSPC;
+       /*
+        * The idea here is that we've not already over-reserved the block group
+        * then we can go ahead and save our reservation first and then start
+        * flushing if we need to.  Otherwise if we've already overcommitted
+        * lets start flushing stuff first and then come back and try to make
+        * our reservation.
+        */
+       if (unused <= space_info->total_bytes) {
+               unused -= space_info->total_bytes;
+               if (unused >= num_bytes) {
+                       if (!reserved)
+                               space_info->bytes_reserved += orig_bytes;
+                       ret = 0;
+               } else {
+                       /*
+                        * Ok set num_bytes to orig_bytes since we aren't
+                        * overocmmitted, this way we only try and reclaim what
+                        * we need.
+                        */
+                       num_bytes = orig_bytes;
+               }
+       } else {
+               /*
+                * Ok we're over committed, set num_bytes to the overcommitted
+                * amount plus the amount of bytes that we need for this
+                * reservation.
+                */
+               num_bytes = unused - space_info->total_bytes +
+                       (orig_bytes * (retries + 1));
+       }
   
-       ret = shrink_delalloc(trans, root, num_bytes);
-       if (ret)
-               return ret;
+       /*
+        * Couldn't make our reservation, save our place so while we're trying
+        * to reclaim space we can actually use it instead of somebody else
+        * stealing it from us.
+        */
+       if (ret && !reserved) {
+               space_info->bytes_reserved += orig_bytes;
+               reserved = true;
+       }
   
-       spin_lock(&space_info->lock);
-       if (space_info->bytes_pinned < num_bytes)
-               ret = 1;
         spin_unlock(&space_info->lock);
-       if (ret)
-               return -ENOSPC;
   
-       (*retries)++;
- 
-       if (trans)
-               return -EAGAIN;
+       if (!ret)
+               return 0;
   
-       trans = btrfs_join_transaction(root, 1);
-       BUG_ON(IS_ERR(trans));
-       ret = btrfs_commit_transaction(trans, root);
-       BUG_ON(ret);
+       if (!flush)
+               goto out;
   
-       return 1;
- }
+       /*
+        * We do synchronous shrinking since we don't actually unreserve
+        * metadata until after the IO is completed.
+        */
+       ret = shrink_delalloc(trans, root, num_bytes, 1);
+       if (ret > 0)
+               return 0;
+       else if (ret < 0)
+               goto out;
   
- static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
-                                 u64 num_bytes)
- {
-       struct btrfs_space_info *space_info = block_rsv->space_info;
-       u64 unused;
-       int ret = -ENOSPC;
+       /*
+        * So if we were overcommitted it's possible that somebody else flushed
+        * out enough space and we simply didn't have enough space to reclaim,
+        * so go back around and try again.
+        */
+       if (retries < 2) {
+               retries++;
+               goto again;
+       }
   
         spin_lock(&space_info->lock);
-       unused = space_info->bytes_used + space_info->bytes_reserved +
-                space_info->bytes_pinned + space_info->bytes_readonly;
+       /*
+        * Not enough space to be reclaimed, don't bother committing the
+        * transaction.
+        */
+       if (space_info->bytes_pinned < orig_bytes)
+               ret = -ENOSPC;
+       spin_unlock(&space_info->lock);
+       if (ret)
+               goto out;
   
-       if (unused < space_info->total_bytes)
-               unused = space_info->total_bytes - unused;
-       else
-               unused = 0;
+       ret = -EAGAIN;
+       if (trans || committed)
+               goto out;
   
-       if (unused >= num_bytes) {
-               if (block_rsv->priority >= 10) {
-                       space_info->bytes_reserved += num_bytes;
-                       ret = 0;
-               } else {
-                       if ((unused + block_rsv->reserved) *
-                           block_rsv->priority >=
-                           (num_bytes + block_rsv->reserved) * 10) {
-                               space_info->bytes_reserved += num_bytes;
-                               ret = 0;
-                       }
-               }
+       ret = -ENOSPC;
+       trans = btrfs_join_transaction(root, 1);
+       if (IS_ERR(trans))
+               goto out;
+       ret = btrfs_commit_transaction(trans, root);
+       if (!ret) {
+               trans = NULL;
+               committed = true;
+               goto again;
+       }
+ 
+ out:
+       if (reserved) {
+               spin_lock(&space_info->lock);
+               space_info->bytes_reserved -= orig_bytes;
+               spin_unlock(&space_info->lock);
         }
-       spin_unlock(&space_info->lock);
   
         return ret;
   }
@@@ -3595,23 -3413,19 +3638,19 @@@ void btrfs_add_durable_block_rsv(struc
   int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         struct btrfs_block_rsv *block_rsv,
-                       u64 num_bytes, int *retries)
+                       u64 num_bytes)
   {
         int ret;
   
         if (num_bytes == 0)
                 return 0;
- again:
-       ret = reserve_metadata_bytes(block_rsv, num_bytes);
+ 
+       ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
         if (!ret) {
                 block_rsv_add_bytes(block_rsv, num_bytes, 1);
                 return 0;
         }
   
-       ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
-       if (ret > 0)
-               goto again;
- 
         return ret;
   }
   
@@@ -3646,7 -3460,8 +3685,8 @@@ int btrfs_block_rsv_check(struct btrfs_
                 return 0;
   
         if (block_rsv->refill_used) {
-               ret = reserve_metadata_bytes(block_rsv, num_bytes);
+               ret = reserve_metadata_bytes(trans, root, block_rsv,
+                                            num_bytes, 0);
                 if (!ret) {
                         block_rsv_add_bytes(block_rsv, num_bytes, 0);
                         return 0;
@@@ -3725,6 -3540,8 +3765,8 @@@ static u64 calc_global_metadata_size(st
   
         sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
         spin_lock(&sinfo->lock);
+       if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
+               data_used = 0;
         meta_used = sinfo->bytes_used;
         spin_unlock(&sinfo->lock);
   
@@@ -3752,7 -3569,8 +3794,8 @@@ static void update_global_block_rsv(str
         block_rsv->size = num_bytes;
   
         num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
-                   sinfo->bytes_reserved + sinfo->bytes_readonly;
+                   sinfo->bytes_reserved + sinfo->bytes_readonly +
+                   sinfo->bytes_may_use;
   
         if (sinfo->total_bytes > num_bytes) {
                 num_bytes = sinfo->total_bytes - num_bytes;
@@@ -3823,7 -3641,7 +3866,7 @@@ static u64 calc_trans_metadata_size(str
   
   int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root,
-                                int num_items, int *retries)
+                                int num_items)
   {
         u64 num_bytes;
         int ret;
@@@ -3833,7 -3651,7 +3876,7 @@@
   
         num_bytes = calc_trans_metadata_size(root, num_items);
         ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
-                                 num_bytes, retries);
+                                 num_bytes);
         if (!ret) {
                 trans->bytes_reserved += num_bytes;
                 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@@ -3907,14 -3725,13 +3950,13 @@@ int btrfs_delalloc_reserve_metadata(str
         struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
         u64 to_reserve;
         int nr_extents;
-       int retries = 0;
         int ret;
   
         if (btrfs_transaction_in_commit(root->fs_info))
                 schedule_timeout(1);
   
         num_bytes = ALIGN(num_bytes, root->sectorsize);
- again:
+ 
         spin_lock(&BTRFS_I(inode)->accounting_lock);
         nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
         if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@@ -3924,18 -3741,14 +3966,14 @@@
                 nr_extents = 0;
                 to_reserve = 0;
         }
+       spin_unlock(&BTRFS_I(inode)->accounting_lock);
   
         to_reserve += calc_csum_metadata_size(inode, num_bytes);
-       ret = reserve_metadata_bytes(block_rsv, to_reserve);
-       if (ret) {
-               spin_unlock(&BTRFS_I(inode)->accounting_lock);
-               ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
-                                          &retries);
-               if (ret > 0)
-                       goto again;
+       ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
+       if (ret)
                 return ret;
-       }
   
+       spin_lock(&BTRFS_I(inode)->accounting_lock);
         BTRFS_I(inode)->reserved_extents += nr_extents;
         atomic_inc(&BTRFS_I(inode)->outstanding_extents);
         spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@@ -3943,7 -3756,7 +3981,7 @@@
         block_rsv_add_bytes(block_rsv, to_reserve, 1);
   
         if (block_rsv->size > 512 * 1024 * 1024)
-               shrink_delalloc(NULL, root, to_reserve);
+               shrink_delalloc(NULL, root, to_reserve, 0);
   
         return 0;
   }
@@@ -4002,12 -3815,12 +4040,12 @@@ static int update_block_group(struct bt
                               struct btrfs_root *root,
                               u64 bytenr, u64 num_bytes, int alloc)
   {
- -      struct btrfs_block_group_cache *cache;
+ +      struct btrfs_block_group_cache *cache = NULL;
         struct btrfs_fs_info *info = root->fs_info;
- -      int factor;
         u64 total = num_bytes;
         u64 old_val;
         u64 byte_in_group;
+ +      int factor;
   
         /* block accounting for super block */
         spin_lock(&info->delalloc_lock);
@@@ -4029,25 -3842,11 +4067,25 @@@
                         factor = 2;
                 else
                         factor = 1;
+ +              /*
+ +               * If this block group has free space cache written out, we
+ +               * need to make sure to load it if we are removing space.  This
+ +               * is because we need the unpinning stage to actually add the
+ +               * space back to the block group, otherwise we will leak space.
+ +               */
+ +              if (!alloc && cache->cached == BTRFS_CACHE_NO)
+ +                      cache_block_group(cache, trans, 1);
+ +
                 byte_in_group = bytenr - cache->key.objectid;
                 WARN_ON(byte_in_group > cache->key.offset);
   
                 spin_lock(&cache->space_info->lock);
                 spin_lock(&cache->lock);
+ +
+ +              if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+ +                  cache->disk_cache_state < BTRFS_DC_CLEAR)
+ +                      cache->disk_cache_state = BTRFS_DC_CLEAR;
+ +
                 cache->dirty = 1;
                 old_val = btrfs_block_group_used(&cache->item);
                 num_bytes = min(total, cache->key.offset - byte_in_group);
@@@ -4794,7 -4593,6 +4832,7 @@@ static noinline int find_free_extent(st
         bool found_uncached_bg = false;
         bool failed_cluster_refill = false;
         bool failed_alloc = false;
+ +      bool use_cluster = true;
         u64 ideal_cache_percent = 0;
         u64 ideal_cache_offset = 0;
   
@@@ -4809,24 -4607,16 +4847,24 @@@
                 return -ENOSPC;
         }
   
+ +      /*
+ +       * If the space info is for both data and metadata it means we have a
+ +       * small filesystem and we can't use the clustering stuff.
+ +       */
+ +      if (btrfs_mixed_space_info(space_info))
+ +              use_cluster = false;
+ +
         if (orig_root->ref_cows || empty_size)
                 allowed_chunk_alloc = 1;
   
- -      if (data & BTRFS_BLOCK_GROUP_METADATA) {
+ +      if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
                 last_ptr = &root->fs_info->meta_alloc_cluster;
                 if (!btrfs_test_opt(root, SSD))
                         empty_cluster = 64 * 1024;
         }
   
- -      if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+ +      if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+ +          btrfs_test_opt(root, SSD)) {
                 last_ptr = &root->fs_info->data_alloc_cluster;
         }
   
@@@ -4890,10 -4680,6 +4928,10 @@@ have_block_group
                 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
                         u64 free_percent;
   
+ +                      ret = cache_block_group(block_group, trans, 1);
+ +                      if (block_group->cached == BTRFS_CACHE_FINISHED)
+ +                              goto have_block_group;
+ +
                         free_percent = btrfs_block_group_used(&block_group->item);
                         free_percent *= 100;
                         free_percent = div64_u64(free_percent,
@@@ -4914,7 -4700,7 +4952,7 @@@
                         if (loop > LOOP_CACHING_NOWAIT ||
                             (loop > LOOP_FIND_IDEAL &&
                              atomic_read(&space_info->caching_threads) < 2)) {
- -                              ret = cache_block_group(block_group);
+ +                              ret = cache_block_group(block_group, trans, 0);
                                 BUG_ON(ret);
                         }
                         found_uncached_bg = true;
@@@ -5471,7 -5257,7 +5509,7 @@@ int btrfs_alloc_logged_file_extent(stru
         u64 num_bytes = ins->offset;
   
         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
- -      cache_block_group(block_group);
+ +      cache_block_group(block_group, trans, 0);
         caching_ctl = get_caching_control(block_group);
   
         if (!caching_ctl) {
@@@ -5561,7 -5347,8 +5599,8 @@@ use_block_rsv(struct btrfs_trans_handl
         block_rsv = get_block_rsv(trans, root);
   
         if (block_rsv->size == 0) {
-               ret = reserve_metadata_bytes(block_rsv, blocksize);
+               ret = reserve_metadata_bytes(trans, root, block_rsv,
+                                            blocksize, 0);
                 if (ret)
                         return ERR_PTR(ret);
                 return block_rsv;
@@@ -5571,11 -5358,6 +5610,6 @@@
         if (!ret)
                 return block_rsv;
   
-       WARN_ON(1);
-       printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
-               block_rsv->size, block_rsv->reserved,
-               block_rsv->freed[0], block_rsv->freed[1]);
- 
         return ERR_PTR(-ENOSPC);
   }
   
@@@ -8066,40 -7848,6 +8100,40 @@@ out
         return ret;
   }
   
+ +void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+ +{
+ +      struct btrfs_block_group_cache *block_group;
+ +      u64 last = 0;
+ +
+ +      while (1) {
+ +              struct inode *inode;
+ +
+ +              block_group = btrfs_lookup_first_block_group(info, last);
+ +              while (block_group) {
+ +                      spin_lock(&block_group->lock);
+ +                      if (block_group->iref)
+ +                              break;
+ +                      spin_unlock(&block_group->lock);
+ +                      block_group = next_block_group(info->tree_root,
+ +                                                     block_group);
+ +              }
+ +              if (!block_group) {
+ +                      if (last == 0)
+ +                              break;
+ +                      last = 0;
+ +                      continue;
+ +              }
+ +
+ +              inode = block_group->inode;
+ +              block_group->iref = 0;
+ +              block_group->inode = NULL;
+ +              spin_unlock(&block_group->lock);
+ +              iput(inode);
+ +              last = block_group->key.objectid + block_group->key.offset;
+ +              btrfs_put_block_group(block_group);
+ +      }
+ +}
+ +
   int btrfs_free_block_groups(struct btrfs_fs_info *info)
   {
         struct btrfs_block_group_cache *block_group;
@@@ -8183,8 -7931,6 +8217,8 @@@ int btrfs_read_block_groups(struct btrf
         struct btrfs_key key;
         struct btrfs_key found_key;
         struct extent_buffer *leaf;
+ +      int need_clear = 0;
+ +      u64 cache_gen;
   
         root = info->extent_root;
         key.objectid = 0;
@@@ -8194,15 -7940,6 +8228,15 @@@
         if (!path)
                 return -ENOMEM;
   
+ +      cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+ +      if (cache_gen != 0 &&
+ +          btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+ +              need_clear = 1;
+ +      if (btrfs_test_opt(root, CLEAR_CACHE))
+ +              need_clear = 1;
+ +      if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
+ +              printk(KERN_INFO "btrfs: disk space caching is enabled\n");
+ +
         while (1) {
                 ret = find_first_block_group(root, path, &key);
                 if (ret > 0)
@@@ -8225,9 -7962,6 +8259,9 @@@
                 INIT_LIST_HEAD(&cache->list);
                 INIT_LIST_HEAD(&cache->cluster_list);
   
+ +              if (need_clear)
+ +                      cache->disk_cache_state = BTRFS_DC_CLEAR;
+ +
                 /*
                  * we only want to have 32k of ram per block group for keeping
                  * track of free space, and if we pass 1/2 of that we want to
@@@ -8332,7 -8066,6 +8366,7 @@@ int btrfs_make_block_group(struct btrfs
         cache->key.offset = size;
         cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
         cache->sectorsize = root->sectorsize;
+ +      cache->fs_info = root->fs_info;
   
         /*
          * we only want to have 32k of ram per block group for keeping track
@@@ -8389,10 -8122,9 +8423,11 @@@ int btrfs_remove_block_group(struct btr
         struct btrfs_path *path;
         struct btrfs_block_group_cache *block_group;
         struct btrfs_free_cluster *cluster;
+ +      struct btrfs_root *tree_root = root->fs_info->tree_root;
         struct btrfs_key key;
+ +      struct inode *inode;
         int ret;
+       int factor;
   
         root = root->fs_info->extent_root;
   
@@@ -8400,6 -8132,14 +8435,14 @@@
         BUG_ON(!block_group);
         BUG_ON(!block_group->ro);
   
+       memcpy(&key, &block_group->key, sizeof(key));
+       if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
+                                 BTRFS_BLOCK_GROUP_RAID1 |
+                                 BTRFS_BLOCK_GROUP_RAID10))
+               factor = 2;
+       else
+               factor = 1;
+ 
         /* make sure this block group isn't part of an allocation cluster */
         cluster = &root->fs_info->data_alloc_cluster;
         spin_lock(&cluster->refill_lock);
@@@ -8418,40 -8158,6 +8461,40 @@@
         path = btrfs_alloc_path();
         BUG_ON(!path);
   
+ +      inode = lookup_free_space_inode(root, block_group, path);
+ +      if (!IS_ERR(inode)) {
+ +              btrfs_orphan_add(trans, inode);
+ +              clear_nlink(inode);
+ +              /* One for the block groups ref */
+ +              spin_lock(&block_group->lock);
+ +              if (block_group->iref) {
+ +                      block_group->iref = 0;
+ +                      block_group->inode = NULL;
+ +                      spin_unlock(&block_group->lock);
+ +                      iput(inode);
+ +              } else {
+ +                      spin_unlock(&block_group->lock);
+ +              }
+ +              /* One for our lookup ref */
+ +              iput(inode);
+ +      }
+ +
+ +      key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+ +      key.offset = block_group->key.objectid;
+ +      key.type = 0;
+ +
+ +      ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+ +      if (ret < 0)
+ +              goto out;
+ +      if (ret > 0)
+ +              btrfs_release_path(tree_root, path);
+ +      if (ret == 0) {
+ +              ret = btrfs_del_item(trans, tree_root, path);
+ +              if (ret)
+ +                      goto out;
+ +              btrfs_release_path(tree_root, path);
+ +      }
+ +
         spin_lock(&root->fs_info->block_group_cache_lock);
         rb_erase(&block_group->cache_node,
                  &root->fs_info->block_group_cache_tree);
@@@ -8473,10 -8179,9 +8516,11 @@@
         spin_lock(&block_group->space_info->lock);
         block_group->space_info->total_bytes -= block_group->key.offset;
         block_group->space_info->bytes_readonly -= block_group->key.offset;
+       block_group->space_info->disk_total -= block_group->key.offset * factor;
         spin_unlock(&block_group->space_info->lock);
   
+ +      memcpy(&key, &block_group->key, sizeof(key));
+ +
         btrfs_clear_space_info_full(root->fs_info);
   
         btrfs_put_block_group(block_group);
diff --combined fs/btrfs/inode.c

index f2fb974,5f9e4fc..9f08136
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -764,7 -764,6 +764,7 @@@ static noinline int cow_file_range(stru
         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
         int ret = 0;
   
+ +      BUG_ON(root == root->fs_info->tree_root);
         trans = btrfs_join_transaction(root, 1);
         BUG_ON(!trans);
         btrfs_set_trans_block_group(trans, inode);
@@@ -1036,16 -1035,10 +1036,16 @@@ static noinline int run_delalloc_nocow(
         int type;
         int nocow;
         int check_prev = 1;
+ +      bool nolock = false;
   
         path = btrfs_alloc_path();
         BUG_ON(!path);
- -      trans = btrfs_join_transaction(root, 1);
+ +      if (root == root->fs_info->tree_root) {
+ +              nolock = true;
+ +              trans = btrfs_join_transaction_nolock(root, 1);
+ +      } else {
+ +              trans = btrfs_join_transaction(root, 1);
+ +      }
         BUG_ON(!trans);
   
         cow_start = (u64)-1;
@@@ -1218,13 -1211,8 +1218,13 @@@ out_check
                 BUG_ON(ret);
         }
   
- -      ret = btrfs_end_transaction(trans, root);
- -      BUG_ON(ret);
+ +      if (nolock) {
+ +              ret = btrfs_end_transaction_nolock(trans, root);
+ +              BUG_ON(ret);
+ +      } else {
+ +              ret = btrfs_end_transaction(trans, root);
+ +              BUG_ON(ret);
+ +      }
         btrfs_free_path(path);
         return 0;
   }
@@@ -1301,8 -1289,6 +1301,8 @@@ static int btrfs_set_bit_hook(struct in
         if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                 struct btrfs_root *root = BTRFS_I(inode)->root;
                 u64 len = state->end + 1 - state->start;
+ +              int do_list = (root->root_key.objectid !=
+ +                             BTRFS_ROOT_TREE_OBJECTID);
   
                 if (*bits & EXTENT_FIRST_DELALLOC)
                         *bits &= ~EXTENT_FIRST_DELALLOC;
@@@ -1312,7 -1298,7 +1312,7 @@@
                 spin_lock(&root->fs_info->delalloc_lock);
                 BTRFS_I(inode)->delalloc_bytes += len;
                 root->fs_info->delalloc_bytes += len;
- -              if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+ +              if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                         list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
                                       &root->fs_info->delalloc_inodes);
                 }
@@@ -1335,8 -1321,6 +1335,8 @@@ static int btrfs_clear_bit_hook(struct 
         if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
                 struct btrfs_root *root = BTRFS_I(inode)->root;
                 u64 len = state->end + 1 - state->start;
+ +              int do_list = (root->root_key.objectid !=
+ +                             BTRFS_ROOT_TREE_OBJECTID);
   
                 if (*bits & EXTENT_FIRST_DELALLOC)
                         *bits &= ~EXTENT_FIRST_DELALLOC;
@@@ -1346,15 -1330,14 +1346,15 @@@
                 if (*bits & EXTENT_DO_ACCOUNTING)
                         btrfs_delalloc_release_metadata(inode, len);
   
- -              if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ +              if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+ +                  && do_list)
                         btrfs_free_reserved_data_space(inode, len);
   
                 spin_lock(&root->fs_info->delalloc_lock);
                 root->fs_info->delalloc_bytes -= len;
                 BTRFS_I(inode)->delalloc_bytes -= len;
   
- -              if (BTRFS_I(inode)->delalloc_bytes == 0 &&
+ +              if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
                     !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
                         list_del_init(&BTRFS_I(inode)->delalloc_inodes);
                 }
@@@ -1443,13 -1426,10 +1443,13 @@@ static int btrfs_submit_bio_hook(struc
   
         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
   
- -      ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+ +      if (root == root->fs_info->tree_root)
+ +              ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
+ +      else
+ +              ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
         BUG_ON(ret);
   
- -      if (!(rw & (1 << BIO_RW))) {
+ +      if (!(rw & REQ_WRITE)) {
                 if (bio_flags & EXTENT_BIO_COMPRESSED) {
                         return btrfs_submit_compressed_read(inode, bio,
                                                     mirror_num, bio_flags);
@@@ -1682,7 -1662,6 +1682,7 @@@ static int btrfs_finish_ordered_io(stru
         struct extent_state *cached_state = NULL;
         int compressed = 0;
         int ret;
+ +      bool nolock = false;
   
         ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
                                              end - start + 1);
@@@ -1690,17 -1669,11 +1690,17 @@@
                 return 0;
         BUG_ON(!ordered_extent);
   
+ +      nolock = (root == root->fs_info->tree_root);
+ +
         if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                 BUG_ON(!list_empty(&ordered_extent->list));
                 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
                 if (!ret) {
- -                      trans = btrfs_join_transaction(root, 1);
+ +                      if (nolock)
+ +                              trans = btrfs_join_transaction_nolock(root, 1);
+ +                      else
+ +                              trans = btrfs_join_transaction(root, 1);
+ +                      BUG_ON(!trans);
                         btrfs_set_trans_block_group(trans, inode);
                         trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                         ret = btrfs_update_inode(trans, root, inode);
@@@ -1713,10 -1686,7 +1713,10 @@@
                          ordered_extent->file_offset + ordered_extent->len - 1,
                          0, &cached_state, GFP_NOFS);
   
- -      trans = btrfs_join_transaction(root, 1);
+ +      if (nolock)
+ +              trans = btrfs_join_transaction_nolock(root, 1);
+ +      else
+ +              trans = btrfs_join_transaction(root, 1);
         btrfs_set_trans_block_group(trans, inode);
         trans->block_rsv = &root->fs_info->delalloc_block_rsv;
   
@@@ -1730,7 -1700,6 +1730,7 @@@
                                                 ordered_extent->len);
                 BUG_ON(ret);
         } else {
+ +              BUG_ON(root == root->fs_info->tree_root);
                 ret = insert_reserved_file_extent(trans, inode,
                                                 ordered_extent->file_offset,
                                                 ordered_extent->start,
@@@ -1755,15 -1724,9 +1755,15 @@@
         ret = btrfs_update_inode(trans, root, inode);
         BUG_ON(ret);
   out:
- -      btrfs_delalloc_release_metadata(inode, ordered_extent->len);
- -      if (trans)
- -              btrfs_end_transaction(trans, root);
+ +      if (nolock) {
+ +              if (trans)
+ +                      btrfs_end_transaction_nolock(trans, root);
+ +      } else {
+ +              btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+ +              if (trans)
+ +                      btrfs_end_transaction(trans, root);
+ +      }
+ +
         /* once for us */
         btrfs_put_ordered_extent(ordered_extent);
         /* once for the tree */
@@@ -1878,7 -1841,7 +1878,7 @@@ static int btrfs_io_failed_hook(struct 
         bio->bi_size = 0;
   
         bio_add_page(bio, page, failrec->len, start - page_offset(page));
- -      if (failed_bio->bi_rw & (1 << BIO_RW))
+ +      if (failed_bio->bi_rw & REQ_WRITE)
                 rw = WRITE;
         else
                 rw = READ;
@@@ -2975,6 -2938,7 +2975,6 @@@ int btrfs_unlink_subvol(struct btrfs_tr
         dir->i_mtime = dir->i_ctime = CURRENT_TIME;
         ret = btrfs_update_inode(trans, root, dir);
         BUG_ON(ret);
- -      dir->i_sb->s_dirt = 1;
   
         btrfs_free_path(path);
         return 0;
@@@ -3233,7 -3197,7 +3233,7 @@@ int btrfs_truncate_inode_items(struct b
   
         BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
   
- -      if (root->ref_cows)
+ +      if (root->ref_cows || root == root->fs_info->tree_root)
                 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
   
         path = btrfs_alloc_path();
@@@ -3381,8 -3345,7 +3381,8 @@@ delete
                 } else {
                         break;
                 }
- -              if (found_extent && root->ref_cows) {
+ +              if (found_extent && (root->ref_cows ||
+ +                                   root == root->fs_info->tree_root)) {
                         btrfs_set_path_blocking(path);
                         ret = btrfs_free_extent(trans, root, extent_start,
                                                 extent_num_bytes, 0,
@@@ -3693,19 -3656,17 +3693,19 @@@ static int btrfs_setattr(struct dentry 
                 if (err)
                         return err;
         }
- -      attr->ia_valid &= ~ATTR_SIZE;
   
- -      if (attr->ia_valid)
- -              err = inode_setattr(inode, attr);
+ +      if (attr->ia_valid) {
+ +              setattr_copy(inode, attr);
+ +              mark_inode_dirty(inode);
+ +
+ +              if (attr->ia_valid & ATTR_MODE)
+ +                      err = btrfs_acl_chmod(inode);
+ +      }
   
- -      if (!err && ((attr->ia_valid & ATTR_MODE)))
- -              err = btrfs_acl_chmod(inode);
         return err;
   }
   
- -void btrfs_delete_inode(struct inode *inode)
+ +void btrfs_evict_inode(struct inode *inode)
   {
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(inode)->root;
@@@ -3713,15 -3674,10 +3713,15 @@@
         int ret;
   
         truncate_inode_pages(&inode->i_data, 0);
+ +      if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
+ +                             root == root->fs_info->tree_root))
+ +              goto no_delete;
+ +
         if (is_bad_inode(inode)) {
                 btrfs_orphan_del(NULL, inode);
                 goto no_delete;
         }
+ +      /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
         btrfs_wait_ordered_range(inode, 0, (u64)-1);
   
         if (root->fs_info->log_root_recovering) {
@@@ -3771,7 -3727,7 +3771,7 @@@
         btrfs_end_transaction(trans, root);
         btrfs_btree_balance_dirty(root, nr);
   no_delete:
- -      clear_inode(inode);
+ +      end_writeback(inode);
         return;
   }
   
@@@ -3902,7 -3858,7 +3902,7 @@@ again
                         p = &parent->rb_right;
                 else {
                         WARN_ON(!(entry->vfs_inode.i_state &
- -                                (I_WILL_FREE | I_FREEING | I_CLEAR)));
+ +                                (I_WILL_FREE | I_FREEING)));
                         rb_erase(parent, &root->inode_tree);
                         RB_CLEAR_NODE(parent);
                         spin_unlock(&root->inode_lock);
@@@ -3927,14 -3883,7 +3927,14 @@@ static void inode_tree_del(struct inod
         }
         spin_unlock(&root->inode_lock);
   
- -      if (empty && btrfs_root_refs(&root->root_item) == 0) {
+ +      /*
+ +       * Free space cache has inodes in the tree root, but the tree root has a
+ +       * root_refs of 0, so this could end up dropping the tree root as a
+ +       * snapshot, so we need the extra !root->fs_info->tree_root check to
+ +       * make sure we don't drop it.
+ +       */
+ +      if (empty && btrfs_root_refs(&root->root_item) == 0 &&
+ +          root != root->fs_info->tree_root) {
                 synchronize_srcu(&root->fs_info->subvol_srcu);
                 spin_lock(&root->inode_lock);
                 empty = RB_EMPTY_ROOT(&root->inode_tree);
@@@ -3988,7 -3937,7 +3988,7 @@@ again
                         if (atomic_read(&inode->i_count) > 1)
                                 d_prune_aliases(inode);
                         /*
- -                       * btrfs_drop_inode will remove it from
+ +                       * btrfs_drop_inode will have it removed from
                          * the inode cache when its usage count
                          * hits zero.
                          */
@@@ -4328,24 -4277,14 +4328,24 @@@ int btrfs_write_inode(struct inode *ino
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_trans_handle *trans;
         int ret = 0;
+ +      bool nolock = false;
   
         if (BTRFS_I(inode)->dummy_inode)
                 return 0;
   
+ +      smp_mb();
+ +      nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
+ +
         if (wbc->sync_mode == WB_SYNC_ALL) {
- -              trans = btrfs_join_transaction(root, 1);
+ +              if (nolock)
+ +                      trans = btrfs_join_transaction_nolock(root, 1);
+ +              else
+ +                      trans = btrfs_join_transaction(root, 1);
                 btrfs_set_trans_block_group(trans, inode);
- -              ret = btrfs_commit_transaction(trans, root);
+ +              if (nolock)
+ +                      ret = btrfs_end_transaction_nolock(trans, root);
+ +              else
+ +                      ret = btrfs_commit_transaction(trans, root);
         }
         return ret;
   }
@@@ -5703,7 -5642,7 +5703,7 @@@ static void btrfs_submit_direct(int rw
         struct bio_vec *bvec = bio->bi_io_vec;
         u64 start;
         int skip_sum;
- -      int write = rw & (1 << BIO_RW);
+ +      int write = rw & REQ_WRITE;
         int ret = 0;
   
         skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@@ -6364,21 -6303,6 +6364,21 @@@ void btrfs_destroy_inode(struct inode *
                 spin_unlock(&root->fs_info->ordered_extent_lock);
         }
   
+ +      if (root == root->fs_info->tree_root) {
+ +              struct btrfs_block_group_cache *block_group;
+ +
+ +              block_group = btrfs_lookup_block_group(root->fs_info,
+ +                                              BTRFS_I(inode)->block_group);
+ +              if (block_group && block_group->inode == inode) {
+ +                      spin_lock(&block_group->lock);
+ +                      block_group->inode = NULL;
+ +                      spin_unlock(&block_group->lock);
+ +                      btrfs_put_block_group(block_group);
+ +              } else if (block_group) {
+ +                      btrfs_put_block_group(block_group);
+ +              }
+ +      }
+ +
         spin_lock(&root->orphan_lock);
         if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
                 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@@ -6407,15 -6331,13 +6407,15 @@@ free
         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
   }
   
- -void btrfs_drop_inode(struct inode *inode)
+ +int btrfs_drop_inode(struct inode *inode)
   {
         struct btrfs_root *root = BTRFS_I(inode)->root;
- -      if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
- -              generic_delete_inode(inode);
+ +
+ +      if (btrfs_root_refs(&root->root_item) == 0 &&
+ +          root != root->fs_info->tree_root)
+ +              return 1;
         else
- -              generic_drop_inode(inode);
+ +              return generic_drop_inode(inode);
   }
   
   static void init_once(void *foo)
@@@ -6681,7 -6603,8 +6681,8 @@@ int btrfs_start_delalloc_inodes(struct 
         return 0;
   }
   
- int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+                                  int sync)
   {
         struct btrfs_inode *binode;
         struct inode *inode = NULL;
@@@ -6703,7 -6626,26 +6704,26 @@@
         spin_unlock(&root->fs_info->delalloc_lock);
   
         if (inode) {
-               write_inode_now(inode, 0);
+               if (sync) {
+                       filemap_write_and_wait(inode->i_mapping);
+                       /*
+                        * We have to do this because compression doesn't
+                        * actually set PG_writeback until it submits the pages
+                        * for IO, which happens in an async thread, so we could
+                        * race and not actually wait for any writeback pages
+                        * because they've not been submitted yet.  Technically
+                        * this could still be the case for the ordered stuff
+                        * since the async thread may not have started to do its
+                        * work yet.  If this becomes the case then we need to
+                        * figure out a way to make sure that in writepage we
+                        * wait for any async pages to be submitted before
+                        * returning so that fdatawait does what its supposed to
+                        * do.
+                        */
+                       btrfs_wait_ordered_range(inode, 0, (u64)-1);
+               } else {
+                       filemap_flush(inode->i_mapping);
+               }
                 if (delay_iput)
                         btrfs_add_delayed_iput(inode);
                 else
@@@ -6829,33 -6771,27 +6849,33 @@@ out_unlock
         return err;
   }
   
- -int btrfs_prealloc_file_range(struct inode *inode, int mode,
- -                            u64 start, u64 num_bytes, u64 min_size,
- -                            loff_t actual_len, u64 *alloc_hint)
+ +static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
+ +                                     u64 start, u64 num_bytes, u64 min_size,
+ +                                     loff_t actual_len, u64 *alloc_hint,
+ +                                     struct btrfs_trans_handle *trans)
   {
- -      struct btrfs_trans_handle *trans;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_key ins;
         u64 cur_offset = start;
         int ret = 0;
+ +      bool own_trans = true;
   
+ +      if (trans)
+ +              own_trans = false;
         while (num_bytes > 0) {
- -              trans = btrfs_start_transaction(root, 3);
- -              if (IS_ERR(trans)) {
- -                      ret = PTR_ERR(trans);
- -                      break;
+ +              if (own_trans) {
+ +                      trans = btrfs_start_transaction(root, 3);
+ +                      if (IS_ERR(trans)) {
+ +                              ret = PTR_ERR(trans);
+ +                              break;
+ +                      }
                 }
   
                 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
                                            0, *alloc_hint, (u64)-1, &ins, 1);
                 if (ret) {
- -                      btrfs_end_transaction(trans, root);
+ +                      if (own_trans)
+ +                              btrfs_end_transaction(trans, root);
                         break;
                 }
   
@@@ -6888,30 -6824,11 +6908,30 @@@
                 ret = btrfs_update_inode(trans, root, inode);
                 BUG_ON(ret);
   
- -              btrfs_end_transaction(trans, root);
+ +              if (own_trans)
+ +                      btrfs_end_transaction(trans, root);
         }
         return ret;
   }
   
+ +int btrfs_prealloc_file_range(struct inode *inode, int mode,
+ +                            u64 start, u64 num_bytes, u64 min_size,
+ +                            loff_t actual_len, u64 *alloc_hint)
+ +{
+ +      return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+ +                                         min_size, actual_len, alloc_hint,
+ +                                         NULL);
+ +}
+ +
+ +int btrfs_prealloc_file_range_trans(struct inode *inode,
+ +                                  struct btrfs_trans_handle *trans, int mode,
+ +                                  u64 start, u64 num_bytes, u64 min_size,
+ +                                  loff_t actual_len, u64 *alloc_hint)
+ +{
+ +      return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+ +                                         min_size, actual_len, alloc_hint, trans);
+ +}
+ +
   static long btrfs_fallocate(struct inode *inode, int mode,
                             loff_t offset, loff_t len)
   {
diff --combined fs/btrfs/relocation.c

index af339ee,39adb68..fd07144
--- 1/fs/btrfs/relocation.c
--- 2/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@@ -29,7 -29,6 +29,7 @@@
   #include "locking.h"
   #include "btrfs_inode.h"
   #include "async-thread.h"
+ +#include "free-space-cache.h"
   
   /*
    * backref_node, mapping_node and tree_block start with this
@@@ -179,8 -178,6 +179,6 @@@ struct reloc_control 
         u64 search_start;
         u64 extents_found;
   
-       int block_rsv_retries;
- 
         unsigned int stage:8;
         unsigned int create_reloc_tree:1;
         unsigned int merge_reloc_tree:1;
@@@ -2134,7 -2131,6 +2132,6 @@@ int prepare_to_merge(struct reloc_contr
         LIST_HEAD(reloc_roots);
         u64 num_bytes = 0;
         int ret;
-       int retries = 0;
   
         mutex_lock(&root->fs_info->trans_mutex);
         rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@@ -2144,7 -2140,7 +2141,7 @@@ again
         if (!err) {
                 num_bytes = rc->merging_rsv_size;
                 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
-                                         num_bytes, &retries);
+                                         num_bytes);
                 if (ret)
                         err = ret;
         }
@@@ -2156,7 -2152,6 +2153,6 @@@
                         btrfs_end_transaction(trans, rc->extent_root);
                         btrfs_block_rsv_release(rc->extent_root,
                                                 rc->block_rsv, num_bytes);
-                       retries = 0;
                         goto again;
                 }
         }
@@@ -2406,15 -2401,13 +2402,13 @@@ static int reserve_metadata_space(struc
         num_bytes = calcu_metadata_size(rc, node, 1) * 2;
   
         trans->block_rsv = rc->block_rsv;
-       ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
-                                 &rc->block_rsv_retries);
+       ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
         if (ret) {
                 if (ret == -EAGAIN)
                         rc->commit_transaction = 1;
                 return ret;
         }
   
-       rc->block_rsv_retries = 0;
         return 0;
   }
   
@@@ -3192,54 -3185,6 +3186,54 @@@ static int block_use_full_backref(struc
         return ret;
   }
   
+ +static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
+ +                                  struct inode *inode, u64 ino)
+ +{
+ +      struct btrfs_key key;
+ +      struct btrfs_path *path;
+ +      struct btrfs_root *root = fs_info->tree_root;
+ +      struct btrfs_trans_handle *trans;
+ +      unsigned long nr;
+ +      int ret = 0;
+ +
+ +      if (inode)
+ +              goto truncate;
+ +
+ +      key.objectid = ino;
+ +      key.type = BTRFS_INODE_ITEM_KEY;
+ +      key.offset = 0;
+ +
+ +      inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ +      if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
+ +              if (inode && !IS_ERR(inode))
+ +                      iput(inode);
+ +              return -ENOENT;
+ +      }
+ +
+ +truncate:
+ +      path = btrfs_alloc_path();
+ +      if (!path) {
+ +              ret = -ENOMEM;
+ +              goto out;
+ +      }
+ +
+ +      trans = btrfs_join_transaction(root, 0);
+ +      if (IS_ERR(trans)) {
+ +              btrfs_free_path(path);
+ +              goto out;
+ +      }
+ +
+ +      ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+ +
+ +      btrfs_free_path(path);
+ +      nr = trans->blocks_used;
+ +      btrfs_end_transaction(trans, root);
+ +      btrfs_btree_balance_dirty(root, nr);
+ +out:
+ +      iput(inode);
+ +      return ret;
+ +}
+ +
   /*
    * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
    * this function scans fs tree to find blocks reference the data extent
@@@ -3266,27 -3211,15 +3260,27 @@@ static int find_data_references(struct 
         int counted;
         int ret;
   
- -      path = btrfs_alloc_path();
- -      if (!path)
- -              return -ENOMEM;
- -
         ref_root = btrfs_extent_data_ref_root(leaf, ref);
         ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
         ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
         ref_count = btrfs_extent_data_ref_count(leaf, ref);
   
+ +      /*
+ +       * This is an extent belonging to the free space cache, lets just delete
+ +       * it and redo the search.
+ +       */
+ +      if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
+ +              ret = delete_block_group_cache(rc->extent_root->fs_info,
+ +                                             NULL, ref_objectid);
+ +              if (ret != -ENOENT)
+ +                      return ret;
+ +              ret = 0;
+ +      }
+ +
+ +      path = btrfs_alloc_path();
+ +      if (!path)
+ +              return -ENOMEM;
+ +
         root = read_fs_root(rc->extent_root->fs_info, ref_root);
         if (IS_ERR(root)) {
                 err = PTR_ERR(root);
@@@ -3615,8 -3548,7 +3609,7 @@@ int prepare_to_relocate(struct reloc_co
          * is no reservation in transaction handle.
          */
         ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
-                                 rc->extent_root->nodesize * 256,
-                                 &rc->block_rsv_retries);
+                                 rc->extent_root->nodesize * 256);
         if (ret)
                 return ret;
   
@@@ -3628,7 -3560,6 +3621,6 @@@
         rc->extents_found = 0;
         rc->nodes_relocated = 0;
         rc->merging_rsv_size = 0;
-       rc->block_rsv_retries = 0;
   
         rc->create_reloc_tree = 1;
         set_reloc_control(rc);
@@@ -3921,8 -3852,6 +3913,8 @@@ int btrfs_relocate_block_group(struct b
   {
         struct btrfs_fs_info *fs_info = extent_root->fs_info;
         struct reloc_control *rc;
+ +      struct inode *inode;
+ +      struct btrfs_path *path;
         int ret;
         int rw = 0;
         int err = 0;
@@@ -3945,26 -3874,6 +3937,26 @@@
                 rw = 1;
         }
   
+ +      path = btrfs_alloc_path();
+ +      if (!path) {
+ +              err = -ENOMEM;
+ +              goto out;
+ +      }
+ +
+ +      inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
+ +                                      path);
+ +      btrfs_free_path(path);
+ +
+ +      if (!IS_ERR(inode))
+ +              ret = delete_block_group_cache(fs_info, inode, 0);
+ +      else
+ +              ret = PTR_ERR(inode);
+ +
+ +      if (ret && ret != -ENOENT) {
+ +              err = ret;
+ +              goto out;
+ +      }
+ +
         rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
         if (IS_ERR(rc->data_inode)) {
                 err = PTR_ERR(rc->data_inode);
diff --combined fs/btrfs/super.c

index 5f56213,d1867cd..65b62da
--- 1/fs/btrfs/super.c
--- 2/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@@ -68,7 -68,7 +68,7 @@@ enum 
         Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
         Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
         Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
- -      Opt_discard, Opt_err,
+ +      Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
   };
   
   static match_table_t tokens = {
@@@ -92,8 -92,6 +92,8 @@@
         {Opt_flushoncommit, "flushoncommit"},
         {Opt_ratio, "metadata_ratio=%d"},
         {Opt_discard, "discard"},
+ +      {Opt_space_cache, "space_cache"},
+ +      {Opt_clear_cache, "clear_cache"},
         {Opt_err, NULL},
   };
   
@@@ -237,13 -235,6 +237,13 @@@ int btrfs_parse_options(struct btrfs_ro
                 case Opt_discard:
                         btrfs_set_opt(info->mount_opt, DISCARD);
                         break;
+ +              case Opt_space_cache:
+ +                      printk(KERN_INFO "btrfs: enabling disk space caching\n");
+ +                      btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+ +              case Opt_clear_cache:
+ +                      printk(KERN_INFO "btrfs: force clearing of disk cache\n");
+ +                      btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+ +                      break;
                 case Opt_err:
                         printk(KERN_INFO "btrfs: unrecognized mount option "
                                "'%s'\n", p);
@@@ -638,7 -629,7 +638,7 @@@ static int btrfs_get_sb(struct file_sys
         if (IS_ERR(root)) {
                 error = PTR_ERR(root);
                 deactivate_locked_super(s);
-               goto error;
+               goto error_free_subvol_name;
         }
         /* if they gave us a subvolume name bind mount into that */
         if (strcmp(subvol_name, ".")) {
@@@ -652,14 -643,14 +652,14 @@@
                         deactivate_locked_super(s);
                         error = PTR_ERR(new_root);
                         dput(root);
-                       goto error_close_devices;
+                       goto error_free_subvol_name;
                 }
                 if (!new_root->d_inode) {
                         dput(root);
                         dput(new_root);
                         deactivate_locked_super(s);
                         error = -ENXIO;
-                       goto error_close_devices;
+                       goto error_free_subvol_name;
                 }
                 dput(root);
                 root = new_root;
@@@ -677,7 -668,6 +677,6 @@@ error_close_devices
         btrfs_close_devices(fs_devices);
   error_free_subvol_name:
         kfree(subvol_name);
- error:
         return error;
   }
   
@@@ -725,18 -715,25 +724,25 @@@ static int btrfs_statfs(struct dentry *
         struct list_head *head = &root->fs_info->space_info;
         struct btrfs_space_info *found;
         u64 total_used = 0;
+       u64 total_used_data = 0;
         int bits = dentry->d_sb->s_blocksize_bits;
         __be32 *fsid = (__be32 *)root->fs_info->fsid;
   
         rcu_read_lock();
-       list_for_each_entry_rcu(found, head, list)
+       list_for_each_entry_rcu(found, head, list) {
+               if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
+                                   BTRFS_BLOCK_GROUP_SYSTEM))
+                       total_used_data += found->disk_total;
+               else
+                       total_used_data += found->disk_used;
                 total_used += found->disk_used;
+       }
         rcu_read_unlock();
   
         buf->f_namelen = BTRFS_NAME_LEN;
         buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
         buf->f_bfree = buf->f_blocks - (total_used >> bits);
-       buf->f_bavail = buf->f_bfree;
+       buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
         buf->f_bsize = dentry->d_sb->s_blocksize;
         buf->f_type = BTRFS_SUPER_MAGIC;
   
@@@ -806,7 -803,7 +812,7 @@@ static int btrfs_unfreeze(struct super_
   
   static const struct super_operations btrfs_super_ops = {
         .drop_inode     = btrfs_drop_inode,
- -      .delete_inode   = btrfs_delete_inode,
+ +      .evict_inode    = btrfs_evict_inode,
         .put_super      = btrfs_put_super,
         .sync_fs        = btrfs_sync_fs,
         .show_options   = btrfs_show_options,
diff --combined fs/btrfs/transaction.c

index e7144c4,abbec80..325d9a5
--- 1/fs/btrfs/transaction.c
--- 2/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@@ -163,7 -163,6 +163,7 @@@ enum btrfs_trans_type 
         TRANS_START,
         TRANS_JOIN,
         TRANS_USERSPACE,
+ +      TRANS_JOIN_NOLOCK,
   };
   
   static int may_wait_transaction(struct btrfs_root *root, int type)
@@@ -180,15 -179,13 +180,14 @@@ static struct btrfs_trans_handle *start
   {
         struct btrfs_trans_handle *h;
         struct btrfs_transaction *cur_trans;
-       int retries = 0;
         int ret;
   again:
         h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
         if (!h)
                 return ERR_PTR(-ENOMEM);
   
- -      mutex_lock(&root->fs_info->trans_mutex);
+ +      if (type != TRANS_JOIN_NOLOCK)
+ +              mutex_lock(&root->fs_info->trans_mutex);
         if (may_wait_transaction(root, type))
                 wait_current_trans(root);
   
@@@ -197,8 -194,7 +196,8 @@@
   
         cur_trans = root->fs_info->running_transaction;
         cur_trans->use_count++;
- -      mutex_unlock(&root->fs_info->trans_mutex);
+ +      if (type != TRANS_JOIN_NOLOCK)
+ +              mutex_unlock(&root->fs_info->trans_mutex);
   
         h->transid = cur_trans->transid;
         h->transaction = cur_trans;
@@@ -215,8 -211,7 +214,7 @@@
         }
   
         if (num_items > 0) {
-               ret = btrfs_trans_reserve_metadata(h, root, num_items,
-                                                  &retries);
+               ret = btrfs_trans_reserve_metadata(h, root, num_items);
                 if (ret == -EAGAIN) {
                         btrfs_commit_transaction(h, root);
                         goto again;
@@@ -227,11 -222,9 +225,11 @@@
                 }
         }
   
- -      mutex_lock(&root->fs_info->trans_mutex);
+ +      if (type != TRANS_JOIN_NOLOCK)
+ +              mutex_lock(&root->fs_info->trans_mutex);
         record_root_in_trans(h, root);
- -      mutex_unlock(&root->fs_info->trans_mutex);
+ +      if (type != TRANS_JOIN_NOLOCK)
+ +              mutex_unlock(&root->fs_info->trans_mutex);
   
         if (!current->journal_info && type != TRANS_USERSPACE)
                 current->journal_info = h;
@@@ -249,12 -242,6 +247,12 @@@ struct btrfs_trans_handle *btrfs_join_t
         return start_transaction(root, 0, TRANS_JOIN);
   }
   
+ +struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+ +                                                        int num_blocks)
+ +{
+ +      return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
+ +}
+ +
   struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
                                                          int num_blocks)
   {
@@@ -359,7 -346,7 +357,7 @@@ int btrfs_should_end_transaction(struc
   }
   
   static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
- -                        struct btrfs_root *root, int throttle)
+ +                        struct btrfs_root *root, int throttle, int lock)
   {
         struct btrfs_transaction *cur_trans = trans->transaction;
         struct btrfs_fs_info *info = root->fs_info;
@@@ -387,19 -374,18 +385,19 @@@
   
         btrfs_trans_release_metadata(trans, root);
   
- -      if (!root->fs_info->open_ioctl_trans &&
+ +      if (lock && !root->fs_info->open_ioctl_trans &&
             should_end_transaction(trans, root))
                 trans->transaction->blocked = 1;
   
- -      if (cur_trans->blocked && !cur_trans->in_commit) {
+ +      if (lock && cur_trans->blocked && !cur_trans->in_commit) {
                 if (throttle)
                         return btrfs_commit_transaction(trans, root);
                 else
                         wake_up_process(info->transaction_kthread);
         }
   
- -      mutex_lock(&info->trans_mutex);
+ +      if (lock)
+ +              mutex_lock(&info->trans_mutex);
         WARN_ON(cur_trans != info->running_transaction);
         WARN_ON(cur_trans->num_writers < 1);
         cur_trans->num_writers--;
@@@ -407,8 -393,7 +405,8 @@@
         if (waitqueue_active(&cur_trans->writer_wait))
                 wake_up(&cur_trans->writer_wait);
         put_transaction(cur_trans);
- -      mutex_unlock(&info->trans_mutex);
+ +      if (lock)
+ +              mutex_unlock(&info->trans_mutex);
   
         if (current->journal_info == trans)
                 current->journal_info = NULL;
@@@ -424,19 -409,13 +422,19 @@@
   int btrfs_end_transaction(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root)
   {
- -      return __btrfs_end_transaction(trans, root, 0);
+ +      return __btrfs_end_transaction(trans, root, 0, 1);
   }
   
   int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root)
   {
- -      return __btrfs_end_transaction(trans, root, 1);
+ +      return __btrfs_end_transaction(trans, root, 1, 1);
+ +}
+ +
+ +int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+ +                               struct btrfs_root *root)
+ +{
+ +      return __btrfs_end_transaction(trans, root, 0, 0);
   }
   
   /*
@@@ -855,7 -834,6 +853,6 @@@ static noinline int create_pending_snap
         struct extent_buffer *tmp;
         struct extent_buffer *old;
         int ret;
-       int retries = 0;
         u64 to_reserve = 0;
         u64 index = 0;
         u64 objectid;
@@@ -877,7 -855,7 +874,7 @@@
   
         if (to_reserve > 0) {
                 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
-                                         to_reserve, &retries);
+                                         to_reserve);
                 if (ret) {
                         pending->error = ret;
                         goto fail;
@@@ -985,8 -963,6 +982,8 @@@ static void update_super_roots(struct b
         super->root = root_item->bytenr;
         super->generation = root_item->generation;
         super->root_level = root_item->level;
+ +      if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
+ +              super->cache_generation = root_item->generation;
   }
   
   int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
author	Chris Mason <chris.mason@oracle.com>
	Fri, 29 Oct 2010 13:27:49 +0000 (09:27 -0400)
committer	Chris Mason <chris.mason@oracle.com>
	Fri, 29 Oct 2010 13:27:49 +0000 (09:27 -0400)
		1	2
fs/btrfs/ctree.h	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent-tree.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/relocation.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/transaction.c	patch \|	diff1 \|	diff2 \|	blob \| history