Merge branch 'btrfs-3.0' into for-linus
[pandora-kernel.git] / fs / btrfs / extent-tree.c
index 1f61bf5..80d6148 100644 (file)
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
        return total_added;
 }
 
-static int caching_kthread(void *data)
+static noinline void caching_thread(struct btrfs_work *work)
 {
-       struct btrfs_block_group_cache *block_group = data;
-       struct btrfs_fs_info *fs_info = block_group->fs_info;
-       struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
-       struct btrfs_root *extent_root = fs_info->extent_root;
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_caching_control *caching_ctl;
+       struct btrfs_root *extent_root;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_key key;
@@ -334,9 +334,14 @@ static int caching_kthread(void *data)
        u32 nritems;
        int ret = 0;
 
+       caching_ctl = container_of(work, struct btrfs_caching_control, work);
+       block_group = caching_ctl->block_group;
+       fs_info = block_group->fs_info;
+       extent_root = fs_info->extent_root;
+
        path = btrfs_alloc_path();
        if (!path)
-               return -ENOMEM;
+               goto out;
 
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
 
@@ -433,13 +438,11 @@ err:
        free_excluded_extents(extent_root, block_group);
 
        mutex_unlock(&caching_ctl->mutex);
+out:
        wake_up(&caching_ctl->wait);
 
        put_caching_control(caching_ctl);
-       atomic_dec(&block_group->space_info->caching_threads);
        btrfs_put_block_group(block_group);
-
-       return 0;
 }
 
 static int cache_block_group(struct btrfs_block_group_cache *cache,
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 {
        struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_caching_control *caching_ctl;
-       struct task_struct *tsk;
        int ret = 0;
 
        smp_mb();
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
        caching_ctl->progress = cache->key.objectid;
        /* one for caching kthread, one for caching block group list */
        atomic_set(&caching_ctl->count, 2);
+       caching_ctl->work.func = caching_thread;
 
        spin_lock(&cache->lock);
        if (cache->cached != BTRFS_CACHE_NO) {
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
        list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
        up_write(&fs_info->extent_commit_sem);
 
-       atomic_inc(&cache->space_info->caching_threads);
        btrfs_get_block_group(cache);
 
-       tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
-                         cache->key.objectid);
-       if (IS_ERR(tsk)) {
-               ret = PTR_ERR(tsk);
-               printk(KERN_ERR "error running thread %d\n", ret);
-               BUG();
-       }
+       btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work);
 
        return ret;
 }
@@ -667,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
        struct btrfs_path *path;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
+
        key.objectid = start;
        key.offset = len;
        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -1784,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 
 
                for (i = 0; i < multi->num_stripes; i++, stripe++) {
+                       if (!stripe->dev->can_discard)
+                               continue;
+
                        ret = btrfs_issue_discard(stripe->dev->bdev,
                                                  stripe->physical,
                                                  stripe->length);
@@ -1791,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
                                discarded_bytes += stripe->length;
                        else if (ret != -EOPNOTSUPP)
                                break;
+
+                       /*
+                        * Just in case we get back EOPNOTSUPP for some reason,
+                        * just ignore the return value so we don't screw up
+                        * people calling discard_extent.
+                        */
+                       ret = 0;
                }
                kfree(multi);
        }
-       if (discarded_bytes && ret == -EOPNOTSUPP)
-               ret = 0;
 
        if (actual_bytes)
                *actual_bytes = discarded_bytes;
@@ -2932,9 +2938,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->full = 0;
        found->force_alloc = CHUNK_ALLOC_NO_FORCE;
        found->chunk_alloc = 0;
+       found->flush = 0;
+       init_waitqueue_head(&found->wait);
        *space_info = found;
        list_add_rcu(&found->list, &info->space_info);
-       atomic_set(&found->caching_threads, 0);
        return 0;
 }
 
@@ -3275,6 +3282,9 @@ again:
        }
 
        ret = btrfs_alloc_chunk(trans, extent_root, flags);
+       if (ret < 0 && ret != -ENOSPC)
+               goto out;
+
        spin_lock(&space_info->lock);
        if (ret)
                space_info->full = 1;
@@ -3284,6 +3294,7 @@ again:
        space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
        space_info->chunk_alloc = 0;
        spin_unlock(&space_info->lock);
+out:
        mutex_unlock(&extent_root->fs_info->chunk_mutex);
        return ret;
 }
@@ -3314,6 +3325,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        if (reserved == 0)
                return 0;
 
+       smp_mb();
+       if (root->fs_info->delalloc_bytes == 0) {
+               if (trans)
+                       return 0;
+               btrfs_wait_ordered_extents(root, 0, 0);
+               return 0;
+       }
+
        max_reclaim = min(reserved, to_reclaim);
 
        while (loops < 1024) {
@@ -3356,6 +3375,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
                }
 
        }
+       if (reclaimed >= to_reclaim && !trans)
+               btrfs_wait_ordered_extents(root, 0, 0);
        return reclaimed >= to_reclaim;
 }
 
@@ -3380,15 +3401,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
        u64 num_bytes = orig_bytes;
        int retries = 0;
        int ret = 0;
-       bool reserved = false;
        bool committed = false;
+       bool flushing = false;
 
 again:
-       ret = -ENOSPC;
-       if (reserved)
-               num_bytes = 0;
-
+       ret = 0;
        spin_lock(&space_info->lock);
+       /*
+        * We only want to wait if somebody other than us is flushing and we are
+        * actually alloed to flush.
+        */
+       while (flush && !flushing && space_info->flush) {
+               spin_unlock(&space_info->lock);
+               /*
+                * If we have a trans handle we can't wait because the flusher
+                * may have to commit the transaction, which would mean we would
+                * deadlock since we are waiting for the flusher to finish, but
+                * hold the current transaction open.
+                */
+               if (trans)
+                       return -EAGAIN;
+               ret = wait_event_interruptible(space_info->wait,
+                                              !space_info->flush);
+               /* Must have been interrupted, return */
+               if (ret)
+                       return -EINTR;
+
+               spin_lock(&space_info->lock);
+       }
+
+       ret = -ENOSPC;
        unused = space_info->bytes_used + space_info->bytes_reserved +
                 space_info->bytes_pinned + space_info->bytes_readonly +
                 space_info->bytes_may_use;
@@ -3403,8 +3445,7 @@ again:
        if (unused <= space_info->total_bytes) {
                unused = space_info->total_bytes - unused;
                if (unused >= num_bytes) {
-                       if (!reserved)
-                               space_info->bytes_reserved += orig_bytes;
+                       space_info->bytes_reserved += orig_bytes;
                        ret = 0;
                } else {
                        /*
@@ -3429,17 +3470,14 @@ again:
         * to reclaim space we can actually use it instead of somebody else
         * stealing it from us.
         */
-       if (ret && !reserved) {
-               space_info->bytes_reserved += orig_bytes;
-               reserved = true;
+       if (ret && flush) {
+               flushing = true;
+               space_info->flush = 1;
        }
 
        spin_unlock(&space_info->lock);
 
-       if (!ret)
-               return 0;
-
-       if (!flush)
+       if (!ret || !flush)
                goto out;
 
        /*
@@ -3447,11 +3485,11 @@ again:
         * metadata until after the IO is completed.
         */
        ret = shrink_delalloc(trans, root, num_bytes, 1);
-       if (ret > 0)
-               return 0;
-       else if (ret < 0)
+       if (ret < 0)
                goto out;
 
+       ret = 0;
+
        /*
         * So if we were overcommitted it's possible that somebody else flushed
         * out enough space and we simply didn't have enough space to reclaim,
@@ -3462,11 +3500,11 @@ again:
                goto again;
        }
 
-       spin_lock(&space_info->lock);
        /*
         * Not enough space to be reclaimed, don't bother committing the
         * transaction.
         */
+       spin_lock(&space_info->lock);
        if (space_info->bytes_pinned < orig_bytes)
                ret = -ENOSPC;
        spin_unlock(&space_info->lock);
@@ -3474,10 +3512,13 @@ again:
                goto out;
 
        ret = -EAGAIN;
-       if (trans || committed)
+       if (trans)
                goto out;
 
        ret = -ENOSPC;
+       if (committed)
+               goto out;
+
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                goto out;
@@ -3489,12 +3530,12 @@ again:
        }
 
 out:
-       if (reserved) {
+       if (flushing) {
                spin_lock(&space_info->lock);
-               space_info->bytes_reserved -= orig_bytes;
+               space_info->flush = 0;
+               wake_up_all(&space_info->wait);
                spin_unlock(&space_info->lock);
        }
-
        return ret;
 }
 
@@ -3704,7 +3745,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
        if (commit_trans) {
                if (trans)
                        return -EAGAIN;
-
                trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
                ret = btrfs_commit_transaction(trans, root);
@@ -3874,26 +3914,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                int num_items)
-{
-       u64 num_bytes;
-       int ret;
-
-       if (num_items == 0 || root->fs_info->chunk_root == root)
-               return 0;
-
-       num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
-       ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
-                                 num_bytes);
-       if (!ret) {
-               trans->bytes_reserved += num_bytes;
-               trans->block_rsv = &root->fs_info->trans_block_rsv;
-       }
-       return ret;
-}
-
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
                                  struct btrfs_root *root)
 {
@@ -3944,6 +3964,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
        return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
+static unsigned drop_outstanding_extent(struct inode *inode)
+{
+       unsigned dropped_extents = 0;
+
+       spin_lock(&BTRFS_I(inode)->lock);
+       BUG_ON(!BTRFS_I(inode)->outstanding_extents);
+       BTRFS_I(inode)->outstanding_extents--;
+
+       /*
+        * If we have more or the same amount of outsanding extents than we have
+        * reserved then we need to leave the reserved extents count alone.
+        */
+       if (BTRFS_I(inode)->outstanding_extents >=
+           BTRFS_I(inode)->reserved_extents)
+               goto out;
+
+       dropped_extents = BTRFS_I(inode)->reserved_extents -
+               BTRFS_I(inode)->outstanding_extents;
+       BTRFS_I(inode)->reserved_extents -= dropped_extents;
+out:
+       spin_unlock(&BTRFS_I(inode)->lock);
+       return dropped_extents;
+}
+
 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
 {
        return num_bytes >>= 3;
@@ -3953,9 +3997,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
-       u64 to_reserve;
-       int nr_extents;
-       int reserved_extents;
+       u64 to_reserve = 0;
+       unsigned nr_extents = 0;
        int ret;
 
        if (btrfs_transaction_in_commit(root->fs_info))
@@ -3963,66 +4006,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
 
-       nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
-       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents++;
+
+       if (BTRFS_I(inode)->outstanding_extents >
+           BTRFS_I(inode)->reserved_extents) {
+               nr_extents = BTRFS_I(inode)->outstanding_extents -
+                       BTRFS_I(inode)->reserved_extents;
+               BTRFS_I(inode)->reserved_extents += nr_extents;
 
-       if (nr_extents > reserved_extents) {
-               nr_extents -= reserved_extents;
                to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
-       } else {
-               nr_extents = 0;
-               to_reserve = 0;
        }
+       spin_unlock(&BTRFS_I(inode)->lock);
 
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
        ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
-       if (ret)
+       if (ret) {
+               unsigned dropped;
+               /*
+                * We don't need the return value since our reservation failed,
+                * we just need to clean up our counter.
+                */
+               dropped = drop_outstanding_extent(inode);
+               WARN_ON(dropped > 1);
                return ret;
-
-       atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
-       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       }
 
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
-       if (block_rsv->size > 512 * 1024 * 1024)
-               shrink_delalloc(NULL, root, to_reserve, 0);
-
        return 0;
 }
 
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 to_free;
-       int nr_extents;
-       int reserved_extents;
+       u64 to_free = 0;
+       unsigned dropped;
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
-       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
-       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
-
-       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
-       do {
-               int old, new;
-
-               nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
-               if (nr_extents >= reserved_extents) {
-                       nr_extents = 0;
-                       break;
-               }
-               old = reserved_extents;
-               nr_extents = reserved_extents - nr_extents;
-               new = reserved_extents - nr_extents;
-               old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
-                                    reserved_extents, new);
-               if (likely(old == reserved_extents))
-                       break;
-               reserved_extents = old;
-       } while (1);
+       dropped = drop_outstanding_extent(inode);
 
        to_free = calc_csum_metadata_size(inode, num_bytes);
-       if (nr_extents > 0)
-               to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
+       if (dropped > 0)
+               to_free += btrfs_calc_trans_metadata_size(root, dropped);
 
        btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
                                to_free);
@@ -4444,7 +4470,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                printk(KERN_ERR "umm, got %d back from search"
                                       ", was looking for %llu\n", ret,
                                       (unsigned long long)bytenr);
-                               btrfs_print_leaf(extent_root, path->nodes[0]);
+                               if (ret > 0)
+                                       btrfs_print_leaf(extent_root,
+                                                        path->nodes[0]);
                        }
                        BUG_ON(ret);
                        extent_slot = path->slots[0];
@@ -4842,7 +4870,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                                     u64 num_bytes, u64 empty_size,
                                     u64 search_start, u64 search_end,
                                     u64 hint_byte, struct btrfs_key *ins,
-                                    int data)
+                                    u64 data)
 {
        int ret = 0;
        struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -4869,7 +4897,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 
        space_info = __find_space_info(root->fs_info, data);
        if (!space_info) {
-               printk(KERN_ERR "No space info for %d\n", data);
+               printk(KERN_ERR "No space info for %llu\n", data);
                return -ENOSPC;
        }
 
@@ -4990,14 +5018,10 @@ have_block_group:
                        }
 
                        /*
-                        * We only want to start kthread caching if we are at
-                        * the point where we will wait for caching to make
-                        * progress, or if our ideal search is over and we've
-                        * found somebody to start caching.
+                        * The caching workers are limited to 2 threads, so we
+                        * can queue as much work as we care to.
                         */
-                       if (loop > LOOP_CACHING_NOWAIT ||
-                           (loop > LOOP_FIND_IDEAL &&
-                            atomic_read(&space_info->caching_threads) < 2)) {
+                       if (loop > LOOP_FIND_IDEAL) {
                                ret = cache_block_group(block_group, trans,
                                                        orig_root, 0);
                                BUG_ON(ret);
@@ -5065,7 +5089,9 @@ have_block_group:
                         * group is does point to and try again
                         */
                        if (!last_ptr_loop && last_ptr->block_group &&
-                           last_ptr->block_group != block_group) {
+                           last_ptr->block_group != block_group &&
+                           index <=
+                                get_block_group_index(last_ptr->block_group)) {
 
                                btrfs_put_block_group(block_group);
                                block_group = last_ptr->block_group;
@@ -5219,8 +5245,7 @@ loop:
                if (loop == LOOP_FIND_IDEAL && found_uncached_bg) {
                        found_uncached_bg = false;
                        loop++;
-                       if (!ideal_cache_percent &&
-                           atomic_read(&space_info->caching_threads))
+                       if (!ideal_cache_percent)
                                goto search;
 
                        /*
@@ -5494,7 +5519,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
 
        path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5623,7 +5649,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
        if (!buf)
                return ERR_PTR(-ENOMEM);
        btrfs_set_header_generation(buf, trans->transid);
-       btrfs_set_buffer_lockdep_class(buf, level);
+       btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
        btrfs_tree_lock(buf);
        clean_tree_block(trans, root, buf);
 
@@ -5910,7 +5936,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                        return 1;
 
                if (path->locks[level] && !wc->keep_locks) {
-                       btrfs_tree_unlock(eb);
+                       btrfs_tree_unlock_rw(eb, path->locks[level]);
                        path->locks[level] = 0;
                }
                return 0;
@@ -5934,7 +5960,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
         * keep the tree lock
         */
        if (path->locks[level] && level > 0) {
-               btrfs_tree_unlock(eb);
+               btrfs_tree_unlock_rw(eb, path->locks[level]);
                path->locks[level] = 0;
        }
        return 0;
@@ -6047,7 +6073,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        BUG_ON(level != btrfs_header_level(next));
        path->nodes[level] = next;
        path->slots[level] = 0;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
        wc->level = level;
        if (wc->level == 1)
                wc->reada_slot = 0;
@@ -6118,7 +6144,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        BUG_ON(level == 0);
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking(eb);
-                       path->locks[level] = 1;
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
                        ret = btrfs_lookup_extent_info(trans, root,
                                                       eb->start, eb->len,
@@ -6127,8 +6153,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        BUG_ON(ret);
                        BUG_ON(wc->refs[level] == 0);
                        if (wc->refs[level] == 1) {
-                               btrfs_tree_unlock(eb);
-                               path->locks[level] = 0;
+                               btrfs_tree_unlock_rw(eb, path->locks[level]);
                                return 1;
                        }
                }
@@ -6150,7 +6175,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                    btrfs_header_generation(eb) == trans->transid) {
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking(eb);
-                       path->locks[level] = 1;
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                }
                clean_tree_block(trans, root, eb);
        }
@@ -6229,7 +6254,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
                                return 0;
 
                        if (path->locks[level]) {
-                               btrfs_tree_unlock(path->nodes[level]);
+                               btrfs_tree_unlock_rw(path->nodes[level],
+                                                    path->locks[level]);
                                path->locks[level] = 0;
                        }
                        free_extent_buffer(path->nodes[level]);
@@ -6251,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
  * also make sure backrefs for the shared block and all lower level
  * blocks are properly updated.
  */
-int btrfs_drop_snapshot(struct btrfs_root *root,
-                       struct btrfs_block_rsv *block_rsv, int update_ref)
+void btrfs_drop_snapshot(struct btrfs_root *root,
+                        struct btrfs_block_rsv *block_rsv, int update_ref)
 {
        struct btrfs_path *path;
        struct btrfs_trans_handle *trans;
@@ -6265,10 +6291,17 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        int level;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        wc = kzalloc(sizeof(*wc), GFP_NOFS);
-       BUG_ON(!wc);
+       if (!wc) {
+               btrfs_free_path(path);
+               err = -ENOMEM;
+               goto out;
+       }
 
        trans = btrfs_start_transaction(tree_root, 0);
        BUG_ON(IS_ERR(trans));
@@ -6281,7 +6314,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                path->nodes[level] = btrfs_lock_root_node(root);
                btrfs_set_lock_blocking(path->nodes[level]);
                path->slots[level] = 0;
-               path->locks[level] = 1;
+               path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                memset(&wc->update_progress, 0,
                       sizeof(wc->update_progress));
        } else {
@@ -6296,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                path->lowest_level = 0;
                if (ret < 0) {
                        err = ret;
-                       goto out;
+                       goto out_free;
                }
                WARN_ON(ret > 0);
 
@@ -6403,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                free_extent_buffer(root->commit_root);
                kfree(root);
        }
-out:
+out_free:
        btrfs_end_transaction_throttle(trans, tree_root);
        kfree(wc);
        btrfs_free_path(path);
-       return err;
+out:
+       if (err)
+               btrfs_std_error(root->fs_info, err);
+       return;
 }
 
 /*
@@ -6449,7 +6485,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
        level = btrfs_header_level(node);
        path->nodes[level] = node;
        path->slots[level] = 0;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
        wc->refs[parent_level] = 1;
        wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6524,30 +6560,48 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
        return flags;
 }
 
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 {
        struct btrfs_space_info *sinfo = cache->space_info;
        u64 num_bytes;
+       u64 min_allocable_bytes;
        int ret = -ENOSPC;
 
-       if (cache->ro)
-               return 0;
+
+       /*
+        * We need some metadata space and system metadata space for
+        * allocating chunks in some corner cases until we force to set
+        * it to be readonly.
+        */
+       if ((sinfo->flags &
+            (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
+           !force)
+               min_allocable_bytes = 1 * 1024 * 1024;
+       else
+               min_allocable_bytes = 0;
 
        spin_lock(&sinfo->lock);
        spin_lock(&cache->lock);
+
+       if (cache->ro) {
+               ret = 0;
+               goto out;
+       }
+
        num_bytes = cache->key.offset - cache->reserved - cache->pinned -
                    cache->bytes_super - btrfs_block_group_used(&cache->item);
 
        if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
            sinfo->bytes_may_use + sinfo->bytes_readonly +
-           cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+           cache->reserved_pinned + num_bytes + min_allocable_bytes <=
+           sinfo->total_bytes) {
                sinfo->bytes_readonly += num_bytes;
                sinfo->bytes_reserved += cache->reserved_pinned;
                cache->reserved_pinned = 0;
                cache->ro = 1;
                ret = 0;
        }
-
+out:
        spin_unlock(&cache->lock);
        spin_unlock(&sinfo->lock);
        return ret;
@@ -6571,7 +6625,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
                               CHUNK_ALLOC_FORCE);
 
-       ret = set_block_group_ro(cache);
+       ret = set_block_group_ro(cache, 0);
        if (!ret)
                goto out;
        alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6579,7 +6633,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                             CHUNK_ALLOC_FORCE);
        if (ret < 0)
                goto out;
-       ret = set_block_group_ro(cache);
+       ret = set_block_group_ro(cache, 0);
 out:
        btrfs_end_transaction(trans, root);
        return ret;
@@ -6680,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
        struct btrfs_space_info *space_info;
        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
        struct btrfs_device *device;
+       u64 min_free;
+       int index;
+       int dev_nr = 0;
+       int dev_min = 1;
        int full = 0;
        int ret = 0;
 
@@ -6689,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
        if (!block_group)
                return -1;
 
+       min_free = btrfs_block_group_used(&block_group->item);
+
        /* no bytes used, we're good */
-       if (!btrfs_block_group_used(&block_group->item))
+       if (!min_free)
                goto out;
 
        space_info = block_group->space_info;
@@ -6706,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
         * all of the extents from this block group.  If we can, we're good
         */
        if ((space_info->total_bytes != block_group->key.offset) &&
-          (space_info->bytes_used + space_info->bytes_reserved +
-           space_info->bytes_pinned + space_info->bytes_readonly +
-           btrfs_block_group_used(&block_group->item) <
-           space_info->total_bytes)) {
+           (space_info->bytes_used + space_info->bytes_reserved +
+            space_info->bytes_pinned + space_info->bytes_readonly +
+            min_free < space_info->total_bytes)) {
                spin_unlock(&space_info->lock);
                goto out;
        }
@@ -6726,9 +6785,29 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
        if (full)
                goto out;
 
+       /*
+        * index:
+        *      0: raid10
+        *      1: raid1
+        *      2: dup
+        *      3: raid0
+        *      4: single
+        */
+       index = get_block_group_index(block_group);
+       if (index == 0) {
+               dev_min = 4;
+               min_free /= 2;
+       } else if (index == 1) {
+               dev_min = 2;
+       } else if (index == 2) {
+               min_free *= 2;
+       } else if (index == 3) {
+               dev_min = fs_devices->rw_devices;
+               min_free /= dev_min;
+       }
+
        mutex_lock(&root->fs_info->chunk_mutex);
        list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
-               u64 min_free = btrfs_block_group_used(&block_group->item);
                u64 dev_offset;
 
                /*
@@ -6739,7 +6818,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
                        ret = find_free_dev_extent(NULL, device, min_free,
                                                   &dev_offset, NULL);
                        if (!ret)
+                               dev_nr++;
+
+                       if (dev_nr >= dev_min)
                                break;
+
                        ret = -1;
                }
        }
@@ -7016,7 +7099,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
                set_avail_alloc_bits(root->fs_info, cache->flags);
                if (btrfs_chunk_readonly(root, cache->key.objectid))
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
        }
 
        list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7030,9 +7113,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * mirrored block groups.
                 */
                list_for_each_entry(cache, &space_info->block_groups[3], list)
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
                list_for_each_entry(cache, &space_info->block_groups[4], list)
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
        }
 
        init_global_block_rsv(info);
@@ -7162,11 +7245,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        spin_unlock(&cluster->refill_lock);
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        inode = lookup_free_space_inode(root, block_group, path);
        if (!IS_ERR(inode)) {
-               btrfs_orphan_add(trans, inode);
+               ret = btrfs_orphan_add(trans, inode);
+               BUG_ON(ret);
                clear_nlink(inode);
                /* One for the block groups ref */
                spin_lock(&block_group->lock);