Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

[pandora-kernel.git] / fs / btrfs / extent-tree.c
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 3378533..bcd59c7 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
                 return NULL;
         }
  
+       /* We're loading it the fast way, so we don't have a caching_ctl. */
+       if (!cache->caching_ctl) {
+               spin_unlock(&cache->lock);
+               return NULL;
+       }
+
         ctl = cache->caching_ctl;
         atomic_inc(&ctl->count);
         spin_unlock(&cache->lock);
@@ -421,7 +427,9 @@ err:
         return 0;
  }
  
-static int cache_block_group(struct btrfs_block_group_cache *cache)
+static int cache_block_group(struct btrfs_block_group_cache *cache,
+                            struct btrfs_trans_handle *trans,
+                            int load_cache_only)
  {
         struct btrfs_fs_info *fs_info = cache->fs_info;
         struct btrfs_caching_control *caching_ctl;
@@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
         if (cache->cached != BTRFS_CACHE_NO)
                 return 0;
  
+       /*
+        * We can't do the read from on-disk cache during a commit since we need
+        * to have the normal tree locking.
+        */
+       if (!trans->transaction->in_commit) {
+               spin_lock(&cache->lock);
+               if (cache->cached != BTRFS_CACHE_NO) {
+                       spin_unlock(&cache->lock);
+                       return 0;
+               }
+               cache->cached = BTRFS_CACHE_STARTED;
+               spin_unlock(&cache->lock);
+
+               ret = load_free_space_cache(fs_info, cache);
+
+               spin_lock(&cache->lock);
+               if (ret == 1) {
+                       cache->cached = BTRFS_CACHE_FINISHED;
+                       cache->last_byte_to_unpin = (u64)-1;
+               } else {
+                       cache->cached = BTRFS_CACHE_NO;
+               }
+               spin_unlock(&cache->lock);
+               if (ret == 1)
+                       return 0;
+       }
+
+       if (load_cache_only)
+               return 0;
+
         caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
         BUG_ON(!caching_ctl);
  
@@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
  
         rcu_read_lock();
         list_for_each_entry_rcu(found, head, list) {
-               if (found->flags == flags) {
+               if (found->flags & flags) {
                         rcu_read_unlock();
                         return found;
                 }
@@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor)
         return num;
  }
  
+static u64 div_factor_fine(u64 num, int factor)
+{
+       if (factor == 100)
+               return num;
+       num *= factor;
+       do_div(num, 100);
+       return num;
+}
+
  u64 btrfs_find_block_group(struct btrfs_root *root,
                            u64 search_start, u64 search_hint, int owner)
  {
@@ -1695,8 +1742,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
  static void btrfs_issue_discard(struct block_device *bdev,
                                 u64 start, u64 len)
  {
-       blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
-                       BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+       blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
  }
  
  static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@@ -2688,6 +2734,109 @@ next_block_group(struct btrfs_root *root,
         return cache;
  }
  
+static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+                           struct btrfs_trans_handle *trans,
+                           struct btrfs_path *path)
+{
+       struct btrfs_root *root = block_group->fs_info->tree_root;
+       struct inode *inode = NULL;
+       u64 alloc_hint = 0;
+       int num_pages = 0;
+       int retries = 0;
+       int ret = 0;
+
+       /*
+        * If this block group is smaller than 100 megs don't bother caching the
+        * block group.
+        */
+       if (block_group->key.offset < (100 * 1024 * 1024)) {
+               spin_lock(&block_group->lock);
+               block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+               spin_unlock(&block_group->lock);
+               return 0;
+       }
+
+again:
+       inode = lookup_free_space_inode(root, block_group, path);
+       if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+               ret = PTR_ERR(inode);
+               btrfs_release_path(root, path);
+               goto out;
+       }
+
+       if (IS_ERR(inode)) {
+               BUG_ON(retries);
+               retries++;
+
+               if (block_group->ro)
+                       goto out_free;
+
+               ret = create_free_space_inode(root, trans, block_group, path);
+               if (ret)
+                       goto out_free;
+               goto again;
+       }
+
+       /*
+        * We want to set the generation to 0, that way if anything goes wrong
+        * from here on out we know not to trust this cache when we load up next
+        * time.
+        */
+       BTRFS_I(inode)->generation = 0;
+       ret = btrfs_update_inode(trans, root, inode);
+       WARN_ON(ret);
+
+       if (i_size_read(inode) > 0) {
+               ret = btrfs_truncate_free_space_cache(root, trans, path,
+                                                     inode);
+               if (ret)
+                       goto out_put;
+       }
+
+       spin_lock(&block_group->lock);
+       if (block_group->cached != BTRFS_CACHE_FINISHED) {
+               spin_unlock(&block_group->lock);
+               goto out_put;
+       }
+       spin_unlock(&block_group->lock);
+
+       num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+       if (!num_pages)
+               num_pages = 1;
+
+       /*
+        * Just to make absolutely sure we have enough space, we're going to
+        * preallocate 12 pages worth of space for each block group.  In
+        * practice we ought to use at most 8, but we need extra space so we can
+        * add our header and have a terminator between the extents and the
+        * bitmaps.
+        */
+       num_pages *= 16;
+       num_pages *= PAGE_CACHE_SIZE;
+
+       ret = btrfs_check_data_free_space(inode, num_pages);
+       if (ret)
+               goto out_put;
+
+       ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
+                                             num_pages, num_pages,
+                                             &alloc_hint);
+       btrfs_free_reserved_data_space(inode, num_pages);
+out_put:
+       iput(inode);
+out_free:
+       btrfs_release_path(root, path);
+out:
+       spin_lock(&block_group->lock);
+       if (ret)
+               block_group->disk_cache_state = BTRFS_DC_ERROR;
+       else
+               block_group->disk_cache_state = BTRFS_DC_SETUP;
+       spin_unlock(&block_group->lock);
+
+       return ret;
+}
+
  int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root)
  {
@@ -2700,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
         if (!path)
                 return -ENOMEM;
  
+again:
+       while (1) {
+               cache = btrfs_lookup_first_block_group(root->fs_info, last);
+               while (cache) {
+                       if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+                               break;
+                       cache = next_block_group(root, cache);
+               }
+               if (!cache) {
+                       if (last == 0)
+                               break;
+                       last = 0;
+                       continue;
+               }
+               err = cache_save_setup(cache, trans, path);
+               last = cache->key.objectid + cache->key.offset;
+               btrfs_put_block_group(cache);
+       }
+
         while (1) {
                 if (last == 0) {
                         err = btrfs_run_delayed_refs(trans, root,
@@ -2709,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
  
                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
                 while (cache) {
+                       if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
+                               btrfs_put_block_group(cache);
+                               goto again;
+                       }
+
                         if (cache->dirty)
                                 break;
                         cache = next_block_group(root, cache);
@@ -2720,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                         continue;
                 }
  
+               if (cache->disk_cache_state == BTRFS_DC_SETUP)
+                       cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
                 cache->dirty = 0;
                 last = cache->key.objectid + cache->key.offset;
  
@@ -2728,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                 btrfs_put_block_group(cache);
         }
  
+       while (1) {
+               /*
+                * I don't think this is needed since we're just marking our
+                * preallocated extent as written, but just in case it can't
+                * hurt.
+                */
+               if (last == 0) {
+                       err = btrfs_run_delayed_refs(trans, root,
+                                                    (unsigned long)-1);
+                       BUG_ON(err);
+               }
+
+               cache = btrfs_lookup_first_block_group(root->fs_info, last);
+               while (cache) {
+                       /*
+                        * Really this shouldn't happen, but it could if we
+                        * couldn't write the entire preallocated extent and
+                        * splitting the extent resulted in a new block.
+                        */
+                       if (cache->dirty) {
+                               btrfs_put_block_group(cache);
+                               goto again;
+                       }
+                       if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+                               break;
+                       cache = next_block_group(root, cache);
+               }
+               if (!cache) {
+                       if (last == 0)
+                               break;
+                       last = 0;
+                       continue;
+               }
+
+               btrfs_write_out_cache(root, trans, cache, path);
+
+               /*
+                * If we didn't have an error then the cache state is still
+                * NEED_WRITE, so we can set it to WRITTEN.
+                */
+               if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+                       cache->disk_cache_state = BTRFS_DC_WRITTEN;
+               last = cache->key.objectid + cache->key.offset;
+               btrfs_put_block_group(cache);
+       }
+
         btrfs_free_path(path);
         return 0;
  }
@@ -2885,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
         struct btrfs_space_info *data_sinfo;
         struct btrfs_root *root = BTRFS_I(inode)->root;
         u64 used;
-       int ret = 0, committed = 0;
+       int ret = 0, committed = 0, alloc_chunk = 1;
  
         /* make sure bytes are sectorsize aligned */
         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
  
+       if (root == root->fs_info->tree_root) {
+               alloc_chunk = 0;
+               committed = 1;
+       }
+
         data_sinfo = BTRFS_I(inode)->space_info;
         if (!data_sinfo)
                 goto alloc;
@@ -2908,7 +3134,7 @@ again:
                  * if we don't have enough free bytes in this space then we need
                  * to alloc a new chunk.
                  */
-               if (!data_sinfo->full) {
+               if (!data_sinfo->full && alloc_chunk) {
                         u64 alloc_target;
  
                         data_sinfo->force_alloc = 1;
@@ -3000,9 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
         rcu_read_unlock();
  }
  
-static int should_alloc_chunk(struct btrfs_space_info *sinfo, u64 alloc_bytes)
+static int should_alloc_chunk(struct btrfs_root *root,
+                             struct btrfs_space_info *sinfo, u64 alloc_bytes)
  {
         u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+       u64 thresh;
  
         if (sinfo->bytes_used + sinfo->bytes_reserved +
             alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3012,8 +3240,10 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo, u64 alloc_bytes)
             alloc_bytes < div_factor(num_bytes, 8))
                 return 0;
  
-       if (num_bytes > 256 * 1024 * 1024 &&
-           sinfo->bytes_used < div_factor(num_bytes, 3))
+       thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+       thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+
+       if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
                 return 0;
  
         return 1;
@@ -3047,12 +3277,20 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                 goto out;
         }
  
-       if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
+       if (!force && !should_alloc_chunk(extent_root, space_info,
+                                         alloc_bytes)) {
                 spin_unlock(&space_info->lock);
                 goto out;
         }
         spin_unlock(&space_info->lock);
  
+       /*
+        * If we have mixed data/metadata chunks we want to make sure we keep
+        * allocating mixed chunks instead of individual chunks.
+        */
+       if (btrfs_mixed_space_info(space_info))
+               flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+
         /*
          * if we're doing a data chunk, go ahead and make sure that
          * we keep a reasonable number of metadata chunks allocated in the
@@ -3089,15 +3327,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
         u64 reserved;
         u64 max_reclaim;
         u64 reclaimed = 0;
-       int no_reclaim = 0;
         int pause = 1;
-       int ret;
+       int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
  
         block_rsv = &root->fs_info->delalloc_block_rsv;
         space_info = block_rsv->space_info;
-       spin_lock(&space_info->lock);
+
+       smp_mb();
         reserved = space_info->bytes_reserved;
-       spin_unlock(&space_info->lock);
  
         if (reserved == 0)
                 return 0;
@@ -3105,20 +3342,11 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
         max_reclaim = min(reserved, to_reclaim);
  
         while (1) {
-               ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0, sync);
-               if (!ret) {
-                       if (no_reclaim > 2)
-                               break;
-                       no_reclaim++;
-                       __set_current_state(TASK_INTERRUPTIBLE);
-                       schedule_timeout(pause);
-                       pause <<= 1;
-                       if (pause > HZ / 10)
-                               pause = HZ / 10;
-               } else {
-                       no_reclaim = 0;
-                       pause = 1;
-               }
+               /* have the flusher threads jump in and do some IO */
+               smp_mb();
+               nr_pages = min_t(unsigned long, nr_pages,
+                      root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
  
                 spin_lock(&space_info->lock);
                 if (reserved > space_info->bytes_reserved)
@@ -3131,6 +3359,13 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
  
                 if (trans && trans->transaction->blocked)
                         return -EAGAIN;
+
+               __set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(pause);
+               pause <<= 1;
+               if (pause > HZ / 10)
+                       pause = HZ / 10;
+
         }
         return reclaimed >= to_reclaim;
  }
@@ -3177,7 +3412,7 @@ again:
          * our reservation.
          */
         if (unused <= space_info->total_bytes) {
-               unused -= space_info->total_bytes;
+               unused = space_info->total_bytes - unused;
                 if (unused >= num_bytes) {
                         if (!reserved)
                                 space_info->bytes_reserved += orig_bytes;
@@ -3371,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
  {
         struct btrfs_block_rsv *block_rsv;
         struct btrfs_fs_info *fs_info = root->fs_info;
-       u64 alloc_target;
  
         block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
         if (!block_rsv)
                 return NULL;
  
         btrfs_init_block_rsv(block_rsv);
-
-       alloc_target = btrfs_get_alloc_profile(root, 0);
         block_rsv->space_info = __find_space_info(fs_info,
                                                   BTRFS_BLOCK_GROUP_METADATA);
-
         return block_rsv;
  }
  
@@ -3815,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               u64 bytenr, u64 num_bytes, int alloc)
  {
-       struct btrfs_block_group_cache *cache;
+       struct btrfs_block_group_cache *cache = NULL;
         struct btrfs_fs_info *info = root->fs_info;
-       int factor;
         u64 total = num_bytes;
         u64 old_val;
         u64 byte_in_group;
+       int factor;
  
         /* block accounting for super block */
         spin_lock(&info->delalloc_lock);
@@ -3842,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
                         factor = 2;
                 else
                         factor = 1;
+               /*
+                * If this block group has free space cache written out, we
+                * need to make sure to load it if we are removing space.  This
+                * is because we need the unpinning stage to actually add the
+                * space back to the block group, otherwise we will leak space.
+                */
+               if (!alloc && cache->cached == BTRFS_CACHE_NO)
+                       cache_block_group(cache, trans, 1);
+
                 byte_in_group = bytenr - cache->key.objectid;
                 WARN_ON(byte_in_group > cache->key.offset);
  
                 spin_lock(&cache->space_info->lock);
                 spin_lock(&cache->lock);
+
+               if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+                   cache->disk_cache_state < BTRFS_DC_CLEAR)
+                       cache->disk_cache_state = BTRFS_DC_CLEAR;
+
                 cache->dirty = 1;
                 old_val = btrfs_block_group_used(&cache->item);
                 num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4593,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
         bool found_uncached_bg = false;
         bool failed_cluster_refill = false;
         bool failed_alloc = false;
+       bool use_cluster = true;
         u64 ideal_cache_percent = 0;
         u64 ideal_cache_offset = 0;
  
@@ -4607,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                 return -ENOSPC;
         }
  
+       /*
+        * If the space info is for both data and metadata it means we have a
+        * small filesystem and we can't use the clustering stuff.
+        */
+       if (btrfs_mixed_space_info(space_info))
+               use_cluster = false;
+
         if (orig_root->ref_cows || empty_size)
                 allowed_chunk_alloc = 1;
  
-       if (data & BTRFS_BLOCK_GROUP_METADATA) {
+       if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
                 last_ptr = &root->fs_info->meta_alloc_cluster;
                 if (!btrfs_test_opt(root, SSD))
                         empty_cluster = 64 * 1024;
         }
  
-       if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+       if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+           btrfs_test_opt(root, SSD)) {
                 last_ptr = &root->fs_info->data_alloc_cluster;
         }
  
@@ -4680,6 +4934,10 @@ have_block_group:
                 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
                         u64 free_percent;
  
+                       ret = cache_block_group(block_group, trans, 1);
+                       if (block_group->cached == BTRFS_CACHE_FINISHED)
+                               goto have_block_group;
+
                         free_percent = btrfs_block_group_used(&block_group->item);
                         free_percent *= 100;
                         free_percent = div64_u64(free_percent,
@@ -4700,7 +4958,7 @@ have_block_group:
                         if (loop > LOOP_CACHING_NOWAIT ||
                             (loop > LOOP_FIND_IDEAL &&
                              atomic_read(&space_info->caching_threads) < 2)) {
-                               ret = cache_block_group(block_group);
+                               ret = cache_block_group(block_group, trans, 0);
                                 BUG_ON(ret);
                         }
                         found_uncached_bg = true;
@@ -5257,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
         u64 num_bytes = ins->offset;
  
         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-       cache_block_group(block_group);
+       cache_block_group(block_group, trans, 0);
         caching_ctl = get_caching_control(block_group);
  
         if (!caching_ctl) {
@@ -5456,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
         u64 generation;
         u64 refs;
         u64 flags;
-       u64 last = 0;
         u32 nritems;
         u32 blocksize;
         struct btrfs_key key;
@@ -5524,7 +5781,6 @@ reada:
                                            generation);
                 if (ret)
                         break;
-               last = bytenr + blocksize;
                 nread++;
         }
         wc->reada_slot = slot;
@@ -7848,6 +8104,40 @@ out:
         return ret;
  }
  
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+{
+       struct btrfs_block_group_cache *block_group;
+       u64 last = 0;
+
+       while (1) {
+               struct inode *inode;
+
+               block_group = btrfs_lookup_first_block_group(info, last);
+               while (block_group) {
+                       spin_lock(&block_group->lock);
+                       if (block_group->iref)
+                               break;
+                       spin_unlock(&block_group->lock);
+                       block_group = next_block_group(info->tree_root,
+                                                      block_group);
+               }
+               if (!block_group) {
+                       if (last == 0)
+                               break;
+                       last = 0;
+                       continue;
+               }
+
+               inode = block_group->inode;
+               block_group->iref = 0;
+               block_group->inode = NULL;
+               spin_unlock(&block_group->lock);
+               iput(inode);
+               last = block_group->key.objectid + block_group->key.offset;
+               btrfs_put_block_group(block_group);
+       }
+}
+
  int btrfs_free_block_groups(struct btrfs_fs_info *info)
  {
         struct btrfs_block_group_cache *block_group;
@@ -7931,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
         struct btrfs_key key;
         struct btrfs_key found_key;
         struct extent_buffer *leaf;
+       int need_clear = 0;
+       u64 cache_gen;
  
         root = info->extent_root;
         key.objectid = 0;
@@ -7940,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
         if (!path)
                 return -ENOMEM;
  
+       cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+       if (cache_gen != 0 &&
+           btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+               need_clear = 1;
+       if (btrfs_test_opt(root, CLEAR_CACHE))
+               need_clear = 1;
+       if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
+               printk(KERN_INFO "btrfs: disk space caching is enabled\n");
+
         while (1) {
                 ret = find_first_block_group(root, path, &key);
                 if (ret > 0)
@@ -7962,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 INIT_LIST_HEAD(&cache->list);
                 INIT_LIST_HEAD(&cache->cluster_list);
  
+               if (need_clear)
+                       cache->disk_cache_state = BTRFS_DC_CLEAR;
+
                 /*
                  * we only want to have 32k of ram per block group for keeping
                  * track of free space, and if we pass 1/2 of that we want to
@@ -8066,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
         cache->key.offset = size;
         cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
         cache->sectorsize = root->sectorsize;
+       cache->fs_info = root->fs_info;
  
         /*
          * we only want to have 32k of ram per block group for keeping track
@@ -8122,7 +8427,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         struct btrfs_path *path;
         struct btrfs_block_group_cache *block_group;
         struct btrfs_free_cluster *cluster;
+       struct btrfs_root *tree_root = root->fs_info->tree_root;
         struct btrfs_key key;
+       struct inode *inode;
         int ret;
         int factor;
  
@@ -8158,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         path = btrfs_alloc_path();
         BUG_ON(!path);
  
+       inode = lookup_free_space_inode(root, block_group, path);
+       if (!IS_ERR(inode)) {
+               btrfs_orphan_add(trans, inode);
+               clear_nlink(inode);
+               /* One for the block groups ref */
+               spin_lock(&block_group->lock);
+               if (block_group->iref) {
+                       block_group->iref = 0;
+                       block_group->inode = NULL;
+                       spin_unlock(&block_group->lock);
+                       iput(inode);
+               } else {
+                       spin_unlock(&block_group->lock);
+               }
+               /* One for our lookup ref */
+               iput(inode);
+       }
+
+       key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+       key.offset = block_group->key.objectid;
+       key.type = 0;
+
+       ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+       if (ret < 0)
+               goto out;
+       if (ret > 0)
+               btrfs_release_path(tree_root, path);
+       if (ret == 0) {
+               ret = btrfs_del_item(trans, tree_root, path);
+               if (ret)
+                       goto out;
+               btrfs_release_path(tree_root, path);
+       }
+
         spin_lock(&root->fs_info->block_group_cache_lock);
         rb_erase(&block_group->cache_node,
                  &root->fs_info->block_group_cache_tree);
@@ -8182,6 +8523,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         block_group->space_info->disk_total -= block_group->key.offset * factor;
         spin_unlock(&block_group->space_info->lock);
  
+       memcpy(&key, &block_group->key, sizeof(key));
+
         btrfs_clear_space_info_full(root->fs_info);
  
         btrfs_put_block_group(block_group);