Merge branch 'btrfs-3.0' into for-linus
[pandora-kernel.git] / fs / btrfs / extent-tree.c
index 5ab31f7..80d6148 100644 (file)
@@ -663,7 +663,9 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
        struct btrfs_path *path;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
+
        key.objectid = start;
        key.offset = len;
        btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
@@ -1780,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 
 
                for (i = 0; i < multi->num_stripes; i++, stripe++) {
+                       if (!stripe->dev->can_discard)
+                               continue;
+
                        ret = btrfs_issue_discard(stripe->dev->bdev,
                                                  stripe->physical,
                                                  stripe->length);
@@ -1787,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
                                discarded_bytes += stripe->length;
                        else if (ret != -EOPNOTSUPP)
                                break;
+
+                       /*
+                        * Just in case we get back EOPNOTSUPP for some reason,
+                        * just ignore the return value so we don't screw up
+                        * people calling discard_extent.
+                        */
+                       ret = 0;
                }
                kfree(multi);
        }
-       if (discarded_bytes && ret == -EOPNOTSUPP)
-               ret = 0;
 
        if (actual_bytes)
                *actual_bytes = discarded_bytes;
@@ -3272,6 +3282,9 @@ again:
        }
 
        ret = btrfs_alloc_chunk(trans, extent_root, flags);
+       if (ret < 0 && ret != -ENOSPC)
+               goto out;
+
        spin_lock(&space_info->lock);
        if (ret)
                space_info->full = 1;
@@ -3281,6 +3294,7 @@ again:
        space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
        space_info->chunk_alloc = 0;
        spin_unlock(&space_info->lock);
+out:
        mutex_unlock(&extent_root->fs_info->chunk_mutex);
        return ret;
 }
@@ -3474,6 +3488,8 @@ again:
        if (ret < 0)
                goto out;
 
+       ret = 0;
+
        /*
         * So if we were overcommitted it's possible that somebody else flushed
         * out enough space and we simply didn't have enough space to reclaim,
@@ -3496,10 +3512,13 @@ again:
                goto out;
 
        ret = -EAGAIN;
-       if (trans || committed)
+       if (trans)
                goto out;
 
        ret = -ENOSPC;
+       if (committed)
+               goto out;
+
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                goto out;
@@ -3726,7 +3745,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
        if (commit_trans) {
                if (trans)
                        return -EAGAIN;
-
                trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
                ret = btrfs_commit_transaction(trans, root);
@@ -3946,6 +3964,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
        return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
+static unsigned drop_outstanding_extent(struct inode *inode)
+{
+       unsigned dropped_extents = 0;
+
+       spin_lock(&BTRFS_I(inode)->lock);
+       BUG_ON(!BTRFS_I(inode)->outstanding_extents);
+       BTRFS_I(inode)->outstanding_extents--;
+
+       /*
+        * If we have more or the same amount of outsanding extents than we have
+        * reserved then we need to leave the reserved extents count alone.
+        */
+       if (BTRFS_I(inode)->outstanding_extents >=
+           BTRFS_I(inode)->reserved_extents)
+               goto out;
+
+       dropped_extents = BTRFS_I(inode)->reserved_extents -
+               BTRFS_I(inode)->outstanding_extents;
+       BTRFS_I(inode)->reserved_extents -= dropped_extents;
+out:
+       spin_unlock(&BTRFS_I(inode)->lock);
+       return dropped_extents;
+}
+
 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes)
 {
        return num_bytes >>= 3;
@@ -3955,9 +3997,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
-       u64 to_reserve;
-       int nr_extents;
-       int reserved_extents;
+       u64 to_reserve = 0;
+       unsigned nr_extents = 0;
        int ret;
 
        if (btrfs_transaction_in_commit(root->fs_info))
@@ -3965,66 +4006,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
 
-       nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
-       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
+       spin_lock(&BTRFS_I(inode)->lock);
+       BTRFS_I(inode)->outstanding_extents++;
+
+       if (BTRFS_I(inode)->outstanding_extents >
+           BTRFS_I(inode)->reserved_extents) {
+               nr_extents = BTRFS_I(inode)->outstanding_extents -
+                       BTRFS_I(inode)->reserved_extents;
+               BTRFS_I(inode)->reserved_extents += nr_extents;
 
-       if (nr_extents > reserved_extents) {
-               nr_extents -= reserved_extents;
                to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
-       } else {
-               nr_extents = 0;
-               to_reserve = 0;
        }
+       spin_unlock(&BTRFS_I(inode)->lock);
 
        to_reserve += calc_csum_metadata_size(inode, num_bytes);
        ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
-       if (ret)
+       if (ret) {
+               unsigned dropped;
+               /*
+                * We don't need the return value since our reservation failed,
+                * we just need to clean up our counter.
+                */
+               dropped = drop_outstanding_extent(inode);
+               WARN_ON(dropped > 1);
                return ret;
-
-       atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents);
-       atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+       }
 
        block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
-       if (block_rsv->size > 512 * 1024 * 1024)
-               shrink_delalloc(NULL, root, to_reserve, 0);
-
        return 0;
 }
 
 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 to_free;
-       int nr_extents;
-       int reserved_extents;
+       u64 to_free = 0;
+       unsigned dropped;
 
        num_bytes = ALIGN(num_bytes, root->sectorsize);
-       atomic_dec(&BTRFS_I(inode)->outstanding_extents);
-       WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0);
-
-       reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents);
-       do {
-               int old, new;
-
-               nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents);
-               if (nr_extents >= reserved_extents) {
-                       nr_extents = 0;
-                       break;
-               }
-               old = reserved_extents;
-               nr_extents = reserved_extents - nr_extents;
-               new = reserved_extents - nr_extents;
-               old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents,
-                                    reserved_extents, new);
-               if (likely(old == reserved_extents))
-                       break;
-               reserved_extents = old;
-       } while (1);
+       dropped = drop_outstanding_extent(inode);
 
        to_free = calc_csum_metadata_size(inode, num_bytes);
-       if (nr_extents > 0)
-               to_free += btrfs_calc_trans_metadata_size(root, nr_extents);
+       if (dropped > 0)
+               to_free += btrfs_calc_trans_metadata_size(root, dropped);
 
        btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv,
                                to_free);
@@ -4446,7 +4470,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                printk(KERN_ERR "umm, got %d back from search"
                                       ", was looking for %llu\n", ret,
                                       (unsigned long long)bytenr);
-                               btrfs_print_leaf(extent_root, path->nodes[0]);
+                               if (ret > 0)
+                                       btrfs_print_leaf(extent_root,
+                                                        path->nodes[0]);
                        }
                        BUG_ON(ret);
                        extent_slot = path->slots[0];
@@ -5063,7 +5089,9 @@ have_block_group:
                         * group is does point to and try again
                         */
                        if (!last_ptr_loop && last_ptr->block_group &&
-                           last_ptr->block_group != block_group) {
+                           last_ptr->block_group != block_group &&
+                           index <=
+                                get_block_group_index(last_ptr->block_group)) {
 
                                btrfs_put_block_group(block_group);
                                block_group = last_ptr->block_group;
@@ -5491,7 +5519,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path)
+               return -ENOMEM;
 
        path->leave_spinning = 1;
        ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
@@ -5620,7 +5649,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
        if (!buf)
                return ERR_PTR(-ENOMEM);
        btrfs_set_header_generation(buf, trans->transid);
-       btrfs_set_buffer_lockdep_class(buf, level);
+       btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
        btrfs_tree_lock(buf);
        clean_tree_block(trans, root, buf);
 
@@ -5907,7 +5936,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
                        return 1;
 
                if (path->locks[level] && !wc->keep_locks) {
-                       btrfs_tree_unlock(eb);
+                       btrfs_tree_unlock_rw(eb, path->locks[level]);
                        path->locks[level] = 0;
                }
                return 0;
@@ -5931,7 +5960,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
         * keep the tree lock
         */
        if (path->locks[level] && level > 0) {
-               btrfs_tree_unlock(eb);
+               btrfs_tree_unlock_rw(eb, path->locks[level]);
                path->locks[level] = 0;
        }
        return 0;
@@ -6044,7 +6073,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
        BUG_ON(level != btrfs_header_level(next));
        path->nodes[level] = next;
        path->slots[level] = 0;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
        wc->level = level;
        if (wc->level == 1)
                wc->reada_slot = 0;
@@ -6115,7 +6144,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        BUG_ON(level == 0);
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking(eb);
-                       path->locks[level] = 1;
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
                        ret = btrfs_lookup_extent_info(trans, root,
                                                       eb->start, eb->len,
@@ -6124,8 +6153,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                        BUG_ON(ret);
                        BUG_ON(wc->refs[level] == 0);
                        if (wc->refs[level] == 1) {
-                               btrfs_tree_unlock(eb);
-                               path->locks[level] = 0;
+                               btrfs_tree_unlock_rw(eb, path->locks[level]);
                                return 1;
                        }
                }
@@ -6147,7 +6175,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
                    btrfs_header_generation(eb) == trans->transid) {
                        btrfs_tree_lock(eb);
                        btrfs_set_lock_blocking(eb);
-                       path->locks[level] = 1;
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                }
                clean_tree_block(trans, root, eb);
        }
@@ -6226,7 +6254,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
                                return 0;
 
                        if (path->locks[level]) {
-                               btrfs_tree_unlock(path->nodes[level]);
+                               btrfs_tree_unlock_rw(path->nodes[level],
+                                                    path->locks[level]);
                                path->locks[level] = 0;
                        }
                        free_extent_buffer(path->nodes[level]);
@@ -6248,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
  * also make sure backrefs for the shared block and all lower level
  * blocks are properly updated.
  */
-int btrfs_drop_snapshot(struct btrfs_root *root,
-                       struct btrfs_block_rsv *block_rsv, int update_ref)
+void btrfs_drop_snapshot(struct btrfs_root *root,
+                        struct btrfs_block_rsv *block_rsv, int update_ref)
 {
        struct btrfs_path *path;
        struct btrfs_trans_handle *trans;
@@ -6262,10 +6291,17 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        int level;
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        wc = kzalloc(sizeof(*wc), GFP_NOFS);
-       BUG_ON(!wc);
+       if (!wc) {
+               btrfs_free_path(path);
+               err = -ENOMEM;
+               goto out;
+       }
 
        trans = btrfs_start_transaction(tree_root, 0);
        BUG_ON(IS_ERR(trans));
@@ -6278,7 +6314,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                path->nodes[level] = btrfs_lock_root_node(root);
                btrfs_set_lock_blocking(path->nodes[level]);
                path->slots[level] = 0;
-               path->locks[level] = 1;
+               path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                memset(&wc->update_progress, 0,
                       sizeof(wc->update_progress));
        } else {
@@ -6293,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                path->lowest_level = 0;
                if (ret < 0) {
                        err = ret;
-                       goto out;
+                       goto out_free;
                }
                WARN_ON(ret > 0);
 
@@ -6400,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                free_extent_buffer(root->commit_root);
                kfree(root);
        }
-out:
+out_free:
        btrfs_end_transaction_throttle(trans, tree_root);
        kfree(wc);
        btrfs_free_path(path);
-       return err;
+out:
+       if (err)
+               btrfs_std_error(root->fs_info, err);
+       return;
 }
 
 /*
@@ -6446,7 +6485,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
        level = btrfs_header_level(node);
        path->nodes[level] = node;
        path->slots[level] = 0;
-       path->locks[level] = 1;
+       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
 
        wc->refs[parent_level] = 1;
        wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -6521,30 +6560,48 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
        return flags;
 }
 
-static int set_block_group_ro(struct btrfs_block_group_cache *cache)
+static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
 {
        struct btrfs_space_info *sinfo = cache->space_info;
        u64 num_bytes;
+       u64 min_allocable_bytes;
        int ret = -ENOSPC;
 
-       if (cache->ro)
-               return 0;
+
+       /*
+        * We need some metadata space and system metadata space for
+        * allocating chunks in some corner cases until we force to set
+        * it to be readonly.
+        */
+       if ((sinfo->flags &
+            (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
+           !force)
+               min_allocable_bytes = 1 * 1024 * 1024;
+       else
+               min_allocable_bytes = 0;
 
        spin_lock(&sinfo->lock);
        spin_lock(&cache->lock);
+
+       if (cache->ro) {
+               ret = 0;
+               goto out;
+       }
+
        num_bytes = cache->key.offset - cache->reserved - cache->pinned -
                    cache->bytes_super - btrfs_block_group_used(&cache->item);
 
        if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
            sinfo->bytes_may_use + sinfo->bytes_readonly +
-           cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
+           cache->reserved_pinned + num_bytes + min_allocable_bytes <=
+           sinfo->total_bytes) {
                sinfo->bytes_readonly += num_bytes;
                sinfo->bytes_reserved += cache->reserved_pinned;
                cache->reserved_pinned = 0;
                cache->ro = 1;
                ret = 0;
        }
-
+out:
        spin_unlock(&cache->lock);
        spin_unlock(&sinfo->lock);
        return ret;
@@ -6568,7 +6625,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
                               CHUNK_ALLOC_FORCE);
 
-       ret = set_block_group_ro(cache);
+       ret = set_block_group_ro(cache, 0);
        if (!ret)
                goto out;
        alloc_flags = get_alloc_profile(root, cache->space_info->flags);
@@ -6576,7 +6633,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
                             CHUNK_ALLOC_FORCE);
        if (ret < 0)
                goto out;
-       ret = set_block_group_ro(cache);
+       ret = set_block_group_ro(cache, 0);
 out:
        btrfs_end_transaction(trans, root);
        return ret;
@@ -6677,6 +6734,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
        struct btrfs_space_info *space_info;
        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
        struct btrfs_device *device;
+       u64 min_free;
+       int index;
+       int dev_nr = 0;
+       int dev_min = 1;
        int full = 0;
        int ret = 0;
 
@@ -6686,8 +6747,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
        if (!block_group)
                return -1;
 
+       min_free = btrfs_block_group_used(&block_group->item);
+
        /* no bytes used, we're good */
-       if (!btrfs_block_group_used(&block_group->item))
+       if (!min_free)
                goto out;
 
        space_info = block_group->space_info;
@@ -6703,10 +6766,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
         * all of the extents from this block group.  If we can, we're good
         */
        if ((space_info->total_bytes != block_group->key.offset) &&
-          (space_info->bytes_used + space_info->bytes_reserved +
-           space_info->bytes_pinned + space_info->bytes_readonly +
-           btrfs_block_group_used(&block_group->item) <
-           space_info->total_bytes)) {
+           (space_info->bytes_used + space_info->bytes_reserved +
+            space_info->bytes_pinned + space_info->bytes_readonly +
+            min_free < space_info->total_bytes)) {
                spin_unlock(&space_info->lock);
                goto out;
        }
@@ -6723,9 +6785,29 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
        if (full)
                goto out;
 
+       /*
+        * index:
+        *      0: raid10
+        *      1: raid1
+        *      2: dup
+        *      3: raid0
+        *      4: single
+        */
+       index = get_block_group_index(block_group);
+       if (index == 0) {
+               dev_min = 4;
+               min_free /= 2;
+       } else if (index == 1) {
+               dev_min = 2;
+       } else if (index == 2) {
+               min_free *= 2;
+       } else if (index == 3) {
+               dev_min = fs_devices->rw_devices;
+               min_free /= dev_min;
+       }
+
        mutex_lock(&root->fs_info->chunk_mutex);
        list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
-               u64 min_free = btrfs_block_group_used(&block_group->item);
                u64 dev_offset;
 
                /*
@@ -6736,7 +6818,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
                        ret = find_free_dev_extent(NULL, device, min_free,
                                                   &dev_offset, NULL);
                        if (!ret)
+                               dev_nr++;
+
+                       if (dev_nr >= dev_min)
                                break;
+
                        ret = -1;
                }
        }
@@ -7013,7 +7099,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 
                set_avail_alloc_bits(root->fs_info, cache->flags);
                if (btrfs_chunk_readonly(root, cache->key.objectid))
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
        }
 
        list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) {
@@ -7027,9 +7113,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * mirrored block groups.
                 */
                list_for_each_entry(cache, &space_info->block_groups[3], list)
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
                list_for_each_entry(cache, &space_info->block_groups[4], list)
-                       set_block_group_ro(cache);
+                       set_block_group_ro(cache, 1);
        }
 
        init_global_block_rsv(info);
@@ -7159,11 +7245,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        spin_unlock(&cluster->refill_lock);
 
        path = btrfs_alloc_path();
-       BUG_ON(!path);
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        inode = lookup_free_space_inode(root, block_group, path);
        if (!IS_ERR(inode)) {
-               btrfs_orphan_add(trans, inode);
+               ret = btrfs_orphan_add(trans, inode);
+               BUG_ON(ret);
                clear_nlink(inode);
                /* One for the block groups ref */
                spin_lock(&block_group->lock);