#include "locking.h"
#include "free-space-cache.h"
-static int update_reserved_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int reserve);
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, int alloc,
int mark_free);
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve);
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 parent, u64 root_objectid,
u64 flags, struct btrfs_disk_key *key,
int level, struct btrfs_key *ins);
-
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force);
+static int pin_down_bytes(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 bytenr, u64 num_bytes,
+ int is_data, int reserved,
+ struct extent_buffer **must_clean);
+static int find_next_key(struct btrfs_path *path, int level,
+ struct btrfs_key *key);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
return ret;
}
-/*
- * We always set EXTENT_LOCKED for the super mirror extents so we don't
- * overwrite them, so those bits need to be unset. Also, if we are unmounting
- * with pinned extents still sitting there because we had a block group caching,
- * we need to clear those now, since we are done.
- */
-void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
+static int add_excluded_extent(struct btrfs_root *root,
+ u64 start, u64 num_bytes)
{
- u64 start, end, last = 0;
- int ret;
+ u64 end = start + num_bytes - 1;
+ set_extent_bits(&root->fs_info->freed_extents[0],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ set_extent_bits(&root->fs_info->freed_extents[1],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ return 0;
+}
- while (1) {
- ret = find_first_extent_bit(&info->pinned_extents, last,
- &start, &end,
- EXTENT_LOCKED|EXTENT_DIRTY);
- if (ret)
- break;
+static void free_excluded_extents(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
+{
+ u64 start, end;
- clear_extent_bits(&info->pinned_extents, start, end,
- EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
- last = end+1;
- }
+ start = cache->key.objectid;
+ end = start + cache->key.offset - 1;
+
+ clear_extent_bits(&root->fs_info->freed_extents[0],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
+ clear_extent_bits(&root->fs_info->freed_extents[1],
+ start, end, EXTENT_UPTODATE, GFP_NOFS);
}
-static int remove_sb_from_cache(struct btrfs_root *root,
- struct btrfs_block_group_cache *cache)
+static int exclude_super_stripes(struct btrfs_root *root,
+ struct btrfs_block_group_cache *cache)
{
- struct btrfs_fs_info *fs_info = root->fs_info;
u64 bytenr;
u64 *logical;
int stripe_len;
cache->key.objectid, bytenr,
0, &logical, &nr, &stripe_len);
BUG_ON(ret);
+
while (nr--) {
- try_lock_extent(&fs_info->pinned_extents,
- logical[nr],
- logical[nr] + stripe_len - 1, GFP_NOFS);
+ ret = add_excluded_extent(root, logical[nr],
+ stripe_len);
+ BUG_ON(ret);
}
+
kfree(logical);
}
-
return 0;
}
+static struct btrfs_caching_control *
+get_caching_control(struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_caching_control *ctl;
+
+ spin_lock(&cache->lock);
+ if (cache->cached != BTRFS_CACHE_STARTED) {
+ spin_unlock(&cache->lock);
+ return NULL;
+ }
+
+ ctl = cache->caching_ctl;
+ atomic_inc(&ctl->count);
+ spin_unlock(&cache->lock);
+ return ctl;
+}
+
+static void put_caching_control(struct btrfs_caching_control *ctl)
+{
+ if (atomic_dec_and_test(&ctl->count))
+ kfree(ctl);
+}
+
/*
* this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet
int ret;
while (start < end) {
- ret = find_first_extent_bit(&info->pinned_extents, start,
+ ret = find_first_extent_bit(info->pinned_extents, start,
&extent_start, &extent_end,
- EXTENT_DIRTY|EXTENT_LOCKED);
+ EXTENT_DIRTY | EXTENT_UPTODATE);
if (ret)
break;
{
struct btrfs_block_group_cache *block_group = data;
struct btrfs_fs_info *fs_info = block_group->fs_info;
- u64 last = 0;
+ struct btrfs_caching_control *caching_ctl = block_group->caching_ctl;
+ struct btrfs_root *extent_root = fs_info->extent_root;
struct btrfs_path *path;
- int ret = 0;
- struct btrfs_key key;
struct extent_buffer *leaf;
- int slot;
+ struct btrfs_key key;
u64 total_found = 0;
-
- BUG_ON(!fs_info);
+ u64 last = 0;
+ u32 nritems;
+ int ret = 0;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- atomic_inc(&block_group->space_info->caching_threads);
+ exclude_super_stripes(extent_root, block_group);
+
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
+
/*
* We don't want to deadlock with somebody trying to allocate a new
* extent for the extent root while also trying to search the extent
key.objectid = last;
key.offset = 0;
- btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+ key.type = BTRFS_EXTENT_ITEM_KEY;
again:
+ mutex_lock(&caching_ctl->mutex);
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->extent_commit_sem);
- ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
+ ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto err;
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+
while (1) {
smp_mb();
- if (block_group->fs_info->closing > 1) {
+ if (fs_info->closing > 1) {
last = (u64)-1;
break;
}
- leaf = path->nodes[0];
- slot = path->slots[0];
- if (slot >= btrfs_header_nritems(leaf)) {
- ret = btrfs_next_leaf(fs_info->extent_root, path);
- if (ret < 0)
- goto err;
- else if (ret)
+ if (path->slots[0] < nritems) {
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ } else {
+ ret = find_next_key(path, 0, &key);
+ if (ret)
break;
- if (need_resched() ||
- btrfs_transaction_in_commit(fs_info)) {
- leaf = path->nodes[0];
-
- /* this shouldn't happen, but if the
- * leaf is empty just move on.
- */
- if (btrfs_header_nritems(leaf) == 0)
- break;
- /*
- * we need to copy the key out so that
- * we are sure the next search advances
- * us forward in the btree.
- */
- btrfs_item_key_to_cpu(leaf, &key, 0);
- btrfs_release_path(fs_info->extent_root, path);
- up_read(&fs_info->extent_commit_sem);
+ caching_ctl->progress = last;
+ btrfs_release_path(extent_root, path);
+ up_read(&fs_info->extent_commit_sem);
+ mutex_unlock(&caching_ctl->mutex);
+ if (btrfs_transaction_in_commit(fs_info))
schedule_timeout(1);
- goto again;
- }
+ else
+ cond_resched();
+ goto again;
+ }
+ if (key.objectid < block_group->key.objectid) {
+ path->slots[0]++;
continue;
}
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (key.objectid < block_group->key.objectid)
- goto next;
if (key.objectid >= block_group->key.objectid +
block_group->key.offset)
break;
- if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
+ if (key.type == BTRFS_EXTENT_ITEM_KEY) {
total_found += add_new_free_space(block_group,
fs_info, last,
key.objectid);
last = key.objectid + key.offset;
- }
- if (total_found > (1024 * 1024 * 2)) {
- total_found = 0;
- wake_up(&block_group->caching_q);
+ if (total_found > (1024 * 1024 * 2)) {
+ total_found = 0;
+ wake_up(&caching_ctl->wait);
+ }
}
-next:
path->slots[0]++;
}
ret = 0;
total_found += add_new_free_space(block_group, fs_info, last,
block_group->key.objectid +
block_group->key.offset);
+ caching_ctl->progress = (u64)-1;
spin_lock(&block_group->lock);
+ block_group->caching_ctl = NULL;
block_group->cached = BTRFS_CACHE_FINISHED;
spin_unlock(&block_group->lock);
err:
btrfs_free_path(path);
up_read(&fs_info->extent_commit_sem);
- atomic_dec(&block_group->space_info->caching_threads);
- wake_up(&block_group->caching_q);
+ free_excluded_extents(extent_root, block_group);
+
+ mutex_unlock(&caching_ctl->mutex);
+ wake_up(&caching_ctl->wait);
+
+ put_caching_control(caching_ctl);
+ atomic_dec(&block_group->space_info->caching_threads);
return 0;
}
static int cache_block_group(struct btrfs_block_group_cache *cache)
{
+ struct btrfs_fs_info *fs_info = cache->fs_info;
+ struct btrfs_caching_control *caching_ctl;
struct task_struct *tsk;
int ret = 0;
+ smp_mb();
+ if (cache->cached != BTRFS_CACHE_NO)
+ return 0;
+
+ caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
+ BUG_ON(!caching_ctl);
+
+ INIT_LIST_HEAD(&caching_ctl->list);
+ mutex_init(&caching_ctl->mutex);
+ init_waitqueue_head(&caching_ctl->wait);
+ caching_ctl->block_group = cache;
+ caching_ctl->progress = cache->key.objectid;
+ /* one for caching kthread, one for caching block group list */
+ atomic_set(&caching_ctl->count, 2);
+
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
- return ret;
+ kfree(caching_ctl);
+ return 0;
}
+ cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);
+ down_write(&fs_info->extent_commit_sem);
+ list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
+ up_write(&fs_info->extent_commit_sem);
+
+ atomic_inc(&cache->space_info->caching_threads);
+
tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
cache->key.objectid);
if (IS_ERR(tsk)) {
parent, ref_root, flags,
ref->objectid, ref->offset,
&ins, node->ref_mod);
- update_reserved_extents(root, ins.objectid, ins.offset, 0);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent,
extent_op->flags_to_set,
&extent_op->key,
ref->level, &ins);
- update_reserved_extents(root, ins.objectid, ins.offset, 0);
} else if (node->action == BTRFS_ADD_DELAYED_REF) {
ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
node->num_bytes, parent, ref_root,
BUG_ON(extent_op);
head = btrfs_delayed_node_to_head(node);
if (insert_reserved) {
+ int mark_free = 0;
+ struct extent_buffer *must_clean = NULL;
+
+ ret = pin_down_bytes(trans, root, NULL,
+ node->bytenr, node->num_bytes,
+ head->is_data, 1, &must_clean);
+ if (ret > 0)
+ mark_free = 1;
+
+ if (must_clean) {
+ clean_tree_block(NULL, root, must_clean);
+ btrfs_tree_unlock(must_clean);
+ free_extent_buffer(must_clean);
+ }
if (head->is_data) {
ret = btrfs_del_csums(trans, root,
node->bytenr,
node->num_bytes);
BUG_ON(ret);
}
- btrfs_update_pinned_extents(root, node->bytenr,
- node->num_bytes, 1);
- update_reserved_extents(root, node->bytenr,
- node->num_bytes, 0);
+ if (mark_free) {
+ ret = btrfs_free_reserved_extent(root,
+ node->bytenr,
+ node->num_bytes);
+ BUG_ON(ret);
+ }
}
mutex_unlock(&head->mutex);
return 0;
num_bytes = min(total, cache->key.offset - byte_in_group);
if (alloc) {
old_val += num_bytes;
+ btrfs_set_block_group_used(&cache->item, old_val);
+ cache->reserved -= num_bytes;
cache->space_info->bytes_used += num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
if (cache->ro)
cache->space_info->bytes_readonly -= num_bytes;
- btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
} else {
return bytenr;
}
-int btrfs_update_pinned_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int pin)
+/*
+ * this function must be called within transaction
+ */
+int btrfs_pin_extent(struct btrfs_root *root,
+ u64 bytenr, u64 num_bytes, int reserved)
{
- u64 len;
- struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *cache;
- if (pin)
- set_extent_dirty(&fs_info->pinned_extents,
- bytenr, bytenr + num - 1, GFP_NOFS);
-
- while (num > 0) {
- cache = btrfs_lookup_block_group(fs_info, bytenr);
- BUG_ON(!cache);
- len = min(num, cache->key.offset -
- (bytenr - cache->key.objectid));
- if (pin) {
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- cache->pinned += len;
- cache->space_info->bytes_pinned += len;
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- fs_info->total_pinned += len;
- } else {
- int unpin = 0;
+ cache = btrfs_lookup_block_group(fs_info, bytenr);
+ BUG_ON(!cache);
- /*
- * in order to not race with the block group caching, we
- * only want to unpin the extent if we are cached. If
- * we aren't cached, we want to start async caching this
- * block group so we can free the extent the next time
- * around.
- */
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- unpin = (cache->cached == BTRFS_CACHE_FINISHED);
- if (likely(unpin)) {
- cache->pinned -= len;
- cache->space_info->bytes_pinned -= len;
- fs_info->total_pinned -= len;
- }
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ cache->pinned += num_bytes;
+ cache->space_info->bytes_pinned += num_bytes;
+ if (reserved) {
+ cache->reserved -= num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
+ }
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
- if (likely(unpin))
- clear_extent_dirty(&fs_info->pinned_extents,
- bytenr, bytenr + len -1,
- GFP_NOFS);
- else
- cache_block_group(cache);
+ btrfs_put_block_group(cache);
- if (unpin)
- btrfs_add_free_space(cache, bytenr, len);
- }
- btrfs_put_block_group(cache);
- bytenr += len;
- num -= len;
+ set_extent_dirty(fs_info->pinned_extents,
+ bytenr, bytenr + num_bytes - 1, GFP_NOFS);
+ return 0;
+}
+
+static int update_reserved_extents(struct btrfs_block_group_cache *cache,
+ u64 num_bytes, int reserve)
+{
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ if (reserve) {
+ cache->reserved += num_bytes;
+ cache->space_info->bytes_reserved += num_bytes;
+ } else {
+ cache->reserved -= num_bytes;
+ cache->space_info->bytes_reserved -= num_bytes;
}
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
return 0;
}
-static int update_reserved_extents(struct btrfs_root *root,
- u64 bytenr, u64 num, int reserve)
+int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
{
- u64 len;
- struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_caching_control *next;
+ struct btrfs_caching_control *caching_ctl;
+ struct btrfs_block_group_cache *cache;
- while (num > 0) {
- cache = btrfs_lookup_block_group(fs_info, bytenr);
- BUG_ON(!cache);
- len = min(num, cache->key.offset -
- (bytenr - cache->key.objectid));
+ down_write(&fs_info->extent_commit_sem);
- spin_lock(&cache->space_info->lock);
- spin_lock(&cache->lock);
- if (reserve) {
- cache->reserved += len;
- cache->space_info->bytes_reserved += len;
+ list_for_each_entry_safe(caching_ctl, next,
+ &fs_info->caching_block_groups, list) {
+ cache = caching_ctl->block_group;
+ if (block_group_cache_done(cache)) {
+ cache->last_byte_to_unpin = (u64)-1;
+ list_del_init(&caching_ctl->list);
+ put_caching_control(caching_ctl);
} else {
- cache->reserved -= len;
- cache->space_info->bytes_reserved -= len;
+ cache->last_byte_to_unpin = caching_ctl->progress;
}
- spin_unlock(&cache->lock);
- spin_unlock(&cache->space_info->lock);
- btrfs_put_block_group(cache);
- bytenr += len;
- num -= len;
}
+
+ if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+ fs_info->pinned_extents = &fs_info->freed_extents[1];
+ else
+ fs_info->pinned_extents = &fs_info->freed_extents[0];
+
+ up_write(&fs_info->extent_commit_sem);
return 0;
}
-int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
{
- u64 last = 0;
- u64 start;
- u64 end;
- struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
- int ret;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_block_group_cache *cache = NULL;
+ u64 len;
- while (1) {
- ret = find_first_extent_bit(pinned_extents, last,
- &start, &end, EXTENT_DIRTY);
- if (ret)
- break;
+ while (start <= end) {
+ if (!cache ||
+ start >= cache->key.objectid + cache->key.offset) {
+ if (cache)
+ btrfs_put_block_group(cache);
+ cache = btrfs_lookup_block_group(fs_info, start);
+ BUG_ON(!cache);
+ }
+
+ len = cache->key.objectid + cache->key.offset - start;
+ len = min(len, end + 1 - start);
- set_extent_dirty(copy, start, end, GFP_NOFS);
- last = end + 1;
+ if (start < cache->last_byte_to_unpin) {
+ len = min(len, cache->last_byte_to_unpin - start);
+ btrfs_add_free_space(cache, start, len);
+ }
+
+ spin_lock(&cache->space_info->lock);
+ spin_lock(&cache->lock);
+ cache->pinned -= len;
+ cache->space_info->bytes_pinned -= len;
+ spin_unlock(&cache->lock);
+ spin_unlock(&cache->space_info->lock);
+
+ start += len;
}
+
+ if (cache)
+ btrfs_put_block_group(cache);
return 0;
}
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct extent_io_tree *unpin)
+ struct btrfs_root *root)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct extent_io_tree *unpin;
u64 start;
u64 end;
int ret;
+ if (fs_info->pinned_extents == &fs_info->freed_extents[0])
+ unpin = &fs_info->freed_extents[1];
+ else
+ unpin = &fs_info->freed_extents[0];
+
while (1) {
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY);
ret = btrfs_discard_extent(root, start, end + 1 - start);
- /* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
-
+ unpin_extent_range(root, start, end);
cond_resched();
}
static int pin_down_bytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
- u64 bytenr, u64 num_bytes, int is_data,
+ u64 bytenr, u64 num_bytes,
+ int is_data, int reserved,
struct extent_buffer **must_clean)
{
int err = 0;
}
free_extent_buffer(buf);
pinit:
- btrfs_set_path_blocking(path);
+ if (path)
+ btrfs_set_path_blocking(path);
/* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
+ btrfs_pin_extent(root, bytenr, num_bytes, reserved);
BUG_ON(err < 0);
return 0;
}
-
static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
}
ret = pin_down_bytes(trans, root, path, bytenr,
- num_bytes, is_data, &must_clean);
+ num_bytes, is_data, 0, &must_clean);
if (ret > 0)
mark_free = 1;
BUG_ON(ret < 0);
if (root_objectid == BTRFS_TREE_LOG_OBJECTID) {
WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
/* unlocks the pinned mutex */
- btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
- update_reserved_extents(root, bytenr, num_bytes, 0);
+ btrfs_pin_extent(root, bytenr, num_bytes, 1);
ret = 0;
} else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes,
wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
u64 num_bytes)
{
+ struct btrfs_caching_control *caching_ctl;
DEFINE_WAIT(wait);
- prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
-
- if (block_group_cache_done(cache)) {
- finish_wait(&cache->caching_q, &wait);
+ caching_ctl = get_caching_control(cache);
+ if (!caching_ctl)
return 0;
- }
- schedule();
- finish_wait(&cache->caching_q, &wait);
- wait_event(cache->caching_q, block_group_cache_done(cache) ||
+ wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
(cache->free_space >= num_bytes));
+
+ put_caching_control(caching_ctl);
+ return 0;
+}
+
+static noinline int
+wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
+{
+ struct btrfs_caching_control *caching_ctl;
+ DEFINE_WAIT(wait);
+
+ caching_ctl = get_caching_control(cache);
+ if (!caching_ctl)
+ return 0;
+
+ wait_event(caching_ctl->wait, block_group_cache_done(cache));
+
+ put_caching_control(caching_ctl);
return 0;
}
search_start - offset);
BUG_ON(offset > search_start);
+ update_reserved_extents(block_group, num_bytes, 1);
+
/* we are all good, lets return */
break;
loop:
up_read(&info->groups_sem);
}
-static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data)
+int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 num_bytes, u64 min_alloc_size,
+ u64 empty_size, u64 hint_byte,
+ u64 search_end, struct btrfs_key *ins,
+ u64 data)
{
int ret;
u64 search_start = 0;
ret = btrfs_discard_extent(root, start, len);
btrfs_add_free_space(cache, start, len);
+ update_reserved_extents(cache, len, 0);
btrfs_put_block_group(cache);
- update_reserved_extents(root, start, len, 0);
-
- return ret;
-}
-
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- u64 num_bytes, u64 min_alloc_size,
- u64 empty_size, u64 hint_byte,
- u64 search_end, struct btrfs_key *ins,
- u64 data)
-{
- int ret;
- ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
- empty_size, hint_byte, search_end, ins,
- data);
- if (!ret)
- update_reserved_extents(root, ins->objectid, ins->offset, 1);
return ret;
}
{
int ret;
struct btrfs_block_group_cache *block_group;
+ struct btrfs_caching_control *caching_ctl;
+ u64 start = ins->objectid;
+ u64 num_bytes = ins->offset;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
cache_block_group(block_group);
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ caching_ctl = get_caching_control(block_group);
- ret = btrfs_remove_free_space(block_group, ins->objectid,
- ins->offset);
- BUG_ON(ret);
+ if (!caching_ctl) {
+ BUG_ON(!block_group_cache_done(block_group));
+ ret = btrfs_remove_free_space(block_group, start, num_bytes);
+ BUG_ON(ret);
+ } else {
+ mutex_lock(&caching_ctl->mutex);
+
+ if (start >= caching_ctl->progress) {
+ ret = add_excluded_extent(root, start, num_bytes);
+ BUG_ON(ret);
+ } else if (start + num_bytes <= caching_ctl->progress) {
+ ret = btrfs_remove_free_space(block_group,
+ start, num_bytes);
+ BUG_ON(ret);
+ } else {
+ num_bytes = caching_ctl->progress - start;
+ ret = btrfs_remove_free_space(block_group,
+ start, num_bytes);
+ BUG_ON(ret);
+
+ start = caching_ctl->progress;
+ num_bytes = ins->objectid + ins->offset -
+ caching_ctl->progress;
+ ret = add_excluded_extent(root, start, num_bytes);
+ BUG_ON(ret);
+ }
+
+ mutex_unlock(&caching_ctl->mutex);
+ put_caching_control(caching_ctl);
+ }
+
+ update_reserved_extents(block_group, ins->offset, 1);
btrfs_put_block_group(block_group);
ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
0, owner, offset, ins, 1);
int ret;
u64 flags = 0;
- ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
- empty_size, hint_byte, search_end,
- ins, 0);
+ ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
+ empty_size, hint_byte, search_end,
+ ins, 0);
if (ret)
return ret;
} else
BUG_ON(parent > 0);
- update_reserved_extents(root, ins->objectid, ins->offset, 1);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
int shared_level;
int update_ref;
int keep_locks;
+ int reada_slot;
+ int reada_count;
};
#define DROP_REFERENCE 1
#define UPDATE_BACKREF 2
+static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct walk_control *wc,
+ struct btrfs_path *path)
+{
+ u64 bytenr;
+ u64 generation;
+ u64 refs;
+ u64 last = 0;
+ u32 nritems;
+ u32 blocksize;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ int ret;
+ int slot;
+ int nread = 0;
+
+ if (path->slots[wc->level] < wc->reada_slot) {
+ wc->reada_count = wc->reada_count * 2 / 3;
+ wc->reada_count = max(wc->reada_count, 2);
+ } else {
+ wc->reada_count = wc->reada_count * 3 / 2;
+ wc->reada_count = min_t(int, wc->reada_count,
+ BTRFS_NODEPTRS_PER_BLOCK(root));
+ }
+
+ eb = path->nodes[wc->level];
+ nritems = btrfs_header_nritems(eb);
+ blocksize = btrfs_level_size(root, wc->level - 1);
+
+ for (slot = path->slots[wc->level]; slot < nritems; slot++) {
+ if (nread >= wc->reada_count)
+ break;
+
+ cond_resched();
+ bytenr = btrfs_node_blockptr(eb, slot);
+ generation = btrfs_node_ptr_generation(eb, slot);
+
+ if (slot == path->slots[wc->level])
+ goto reada;
+
+ if (wc->stage == UPDATE_BACKREF &&
+ generation <= root->root_key.offset)
+ continue;
+
+ if (wc->stage == DROP_REFERENCE) {
+ ret = btrfs_lookup_extent_info(trans, root,
+ bytenr, blocksize,
+ &refs, NULL);
+ BUG_ON(ret);
+ BUG_ON(refs == 0);
+ if (refs == 1)
+ goto reada;
+
+ if (!wc->update_ref ||
+ generation <= root->root_key.offset)
+ continue;
+ btrfs_node_key_to_cpu(eb, &key, slot);
+ ret = btrfs_comp_cpu_keys(&key,
+ &wc->update_progress);
+ if (ret < 0)
+ continue;
+ }
+reada:
+ ret = readahead_tree_block(root, bytenr, blocksize,
+ generation);
+ if (ret)
+ break;
+ last = bytenr + blocksize;
+ nread++;
+ }
+ wc->reada_slot = slot;
+}
+
/*
* hepler to process tree block while walking down the tree.
*
- * when wc->stage == DROP_REFERENCE, this function checks
- * reference count of the block. if the block is shared and
- * we need update back refs for the subtree rooted at the
- * block, this function changes wc->stage to UPDATE_BACKREF
- *
* when wc->stage == UPDATE_BACKREF, this function updates
* back refs for pointers in the block.
*
{
int level = wc->level;
struct extent_buffer *eb = path->nodes[level];
- struct btrfs_key key;
u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF;
int ret;
BUG_ON(wc->refs[level] == 0);
}
- if (wc->stage == DROP_REFERENCE &&
- wc->update_ref && wc->refs[level] > 1) {
- BUG_ON(eb == root->node);
- BUG_ON(path->slots[level] > 0);
- if (level == 0)
- btrfs_item_key_to_cpu(eb, &key, path->slots[level]);
- else
- btrfs_node_key_to_cpu(eb, &key, path->slots[level]);
- if (btrfs_header_owner(eb) == root->root_key.objectid &&
- btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) {
- wc->stage = UPDATE_BACKREF;
- wc->shared_level = level;
- }
- }
-
if (wc->stage == DROP_REFERENCE) {
if (wc->refs[level] > 1)
return 1;
return 0;
}
+/*
+ * hepler to process tree block pointer.
+ *
+ * when wc->stage == DROP_REFERENCE, this function checks
+ * reference count of the block pointed to. if the block
+ * is shared and we need update back refs for the subtree
+ * rooted at the block, this function changes wc->stage to
+ * UPDATE_BACKREF. if the block is shared and there is no
+ * need to update back, this function drops the reference
+ * to the block.
+ *
+ * NOTE: return value 1 means we should stop walking down.
+ */
+static noinline int do_walk_down(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct walk_control *wc)
+{
+ u64 bytenr;
+ u64 generation;
+ u64 parent;
+ u32 blocksize;
+ struct btrfs_key key;
+ struct extent_buffer *next;
+ int level = wc->level;
+ int reada = 0;
+ int ret = 0;
+
+ generation = btrfs_node_ptr_generation(path->nodes[level],
+ path->slots[level]);
+ /*
+ * if the lower level block was created before the snapshot
+ * was created, we know there is no need to update back refs
+ * for the subtree
+ */
+ if (wc->stage == UPDATE_BACKREF &&
+ generation <= root->root_key.offset)
+ return 1;
+
+ bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]);
+ blocksize = btrfs_level_size(root, level - 1);
+
+ next = btrfs_find_tree_block(root, bytenr, blocksize);
+ if (!next) {
+ next = btrfs_find_create_tree_block(root, bytenr, blocksize);
+ reada = 1;
+ }
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+
+ if (wc->stage == DROP_REFERENCE) {
+ ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize,
+ &wc->refs[level - 1],
+ &wc->flags[level - 1]);
+ BUG_ON(ret);
+ BUG_ON(wc->refs[level - 1] == 0);
+
+ if (wc->refs[level - 1] > 1) {
+ if (!wc->update_ref ||
+ generation <= root->root_key.offset)
+ goto skip;
+
+ btrfs_node_key_to_cpu(path->nodes[level], &key,
+ path->slots[level]);
+ ret = btrfs_comp_cpu_keys(&key, &wc->update_progress);
+ if (ret < 0)
+ goto skip;
+
+ wc->stage = UPDATE_BACKREF;
+ wc->shared_level = level - 1;
+ }
+ }
+
+ if (!btrfs_buffer_uptodate(next, generation)) {
+ btrfs_tree_unlock(next);
+ free_extent_buffer(next);
+ next = NULL;
+ }
+
+ if (!next) {
+ if (reada && level == 1)
+ reada_walk_down(trans, root, wc, path);
+ next = read_tree_block(root, bytenr, blocksize, generation);
+ btrfs_tree_lock(next);
+ btrfs_set_lock_blocking(next);
+ }
+
+ level--;
+ BUG_ON(level != btrfs_header_level(next));
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ path->locks[level] = 1;
+ wc->level = level;
+ if (wc->level == 1)
+ wc->reada_slot = 0;
+ return 0;
+skip:
+ wc->refs[level - 1] = 0;
+ wc->flags[level - 1] = 0;
+
+ if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
+ parent = path->nodes[level]->start;
+ } else {
+ BUG_ON(root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level]));
+ parent = 0;
+ }
+
+ ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
+ root->root_key.objectid, level - 1, 0);
+ BUG_ON(ret);
+
+ btrfs_tree_unlock(next);
+ free_extent_buffer(next);
+ return 1;
+}
+
/*
* hepler to process tree block while walking up the tree.
*
if (level < wc->shared_level)
goto out;
- BUG_ON(wc->refs[level] <= 1);
ret = find_next_key(path, level + 1, &wc->update_progress);
if (ret > 0)
wc->update_ref = 0;
path->locks[level] = 0;
return 1;
}
- } else {
- BUG_ON(level != 0);
}
}
struct btrfs_path *path,
struct walk_control *wc)
{
- struct extent_buffer *next;
- struct extent_buffer *cur;
- u64 bytenr;
- u64 ptr_gen;
- u32 blocksize;
int level = wc->level;
int ret;
while (level >= 0) {
- cur = path->nodes[level];
- BUG_ON(path->slots[level] >= btrfs_header_nritems(cur));
+ if (path->slots[level] >=
+ btrfs_header_nritems(path->nodes[level]))
+ break;
ret = walk_down_proc(trans, root, path, wc);
if (ret > 0)
if (level == 0)
break;
- bytenr = btrfs_node_blockptr(cur, path->slots[level]);
- blocksize = btrfs_level_size(root, level - 1);
- ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]);
-
- next = read_tree_block(root, bytenr, blocksize, ptr_gen);
- btrfs_tree_lock(next);
- btrfs_set_lock_blocking(next);
-
- level--;
- BUG_ON(level != btrfs_header_level(next));
- path->nodes[level] = next;
- path->slots[level] = 0;
- path->locks[level] = 1;
- wc->level = level;
+ ret = do_walk_down(trans, root, path, wc);
+ if (ret > 0) {
+ path->slots[level]++;
+ continue;
+ }
+ level = wc->level;
}
return 0;
}
err = ret;
goto out;
}
- btrfs_node_key_to_cpu(path->nodes[level], &key,
- path->slots[level]);
- WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key)));
+ WARN_ON(ret > 0);
/*
* unlock our path, this is safe because only this
wc->stage = DROP_REFERENCE;
wc->update_ref = update_ref;
wc->keep_locks = 0;
+ wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
ret = walk_down_tree(trans, root, path, wc);
ret = btrfs_del_root(trans, tree_root, &root->root_key);
BUG_ON(ret);
- free_extent_buffer(root->node);
- free_extent_buffer(root->commit_root);
- kfree(root);
+ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+ ret = btrfs_find_last_root(tree_root, root->root_key.objectid,
+ NULL, NULL);
+ BUG_ON(ret < 0);
+ if (ret > 0) {
+ ret = btrfs_del_orphan_item(trans, tree_root,
+ root->root_key.objectid);
+ BUG_ON(ret);
+ }
+ }
+
+ if (root->in_radix) {
+ btrfs_free_fs_root(tree_root->fs_info, root);
+ } else {
+ free_extent_buffer(root->node);
+ free_extent_buffer(root->commit_root);
+ kfree(root);
+ }
out:
btrfs_end_transaction(trans, tree_root);
kfree(wc);
wc->stage = DROP_REFERENCE;
wc->update_ref = 0;
wc->keep_locks = 1;
+ wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
while (1) {
wret = walk_down_tree(trans, root, path, wc);
lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
while (1) {
int ret;
- spin_lock(&em_tree->lock);
+ write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
- spin_unlock(&em_tree->lock);
+ write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
{
struct btrfs_block_group_cache *block_group;
struct btrfs_space_info *space_info;
+ struct btrfs_caching_control *caching_ctl;
struct rb_node *n;
+ down_write(&info->extent_commit_sem);
+ while (!list_empty(&info->caching_block_groups)) {
+ caching_ctl = list_entry(info->caching_block_groups.next,
+ struct btrfs_caching_control, list);
+ list_del(&caching_ctl->list);
+ put_caching_control(caching_ctl);
+ }
+ up_write(&info->extent_commit_sem);
+
spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
block_group = rb_entry(n, struct btrfs_block_group_cache,
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ wait_block_group_cache_done(block_group);
btrfs_remove_free_space_cache(block_group);
spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock);
cache->fs_info = info;
- init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
cache->flags = btrfs_block_group_flags(&cache->item);
cache->sectorsize = root->sectorsize;
- remove_sb_from_cache(root, cache);
-
/*
* check for two cases, either we are full, and therefore
* don't need to bother with the caching work since we won't
* time, particularly in the full case.
*/
if (found_key.offset == btrfs_block_group_used(&cache->item)) {
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
} else if (btrfs_block_group_used(&cache->item) == 0) {
+ exclude_super_stripes(root, cache);
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, root->fs_info,
found_key.objectid,
found_key.objectid +
found_key.offset);
+ free_excluded_extents(root, cache);
}
ret = update_space_info(info, cache->flags, found_key.offset,
atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock);
- init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list);
cache->flags = type;
btrfs_set_block_group_flags(&cache->item, type);
+ cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
- remove_sb_from_cache(root, cache);
+ exclude_super_stripes(root, cache);
add_new_free_space(cache, root->fs_info, chunk_offset,
chunk_offset + size);
+ free_excluded_extents(root, cache);
+
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info);
BUG_ON(ret);
up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
- wait_event(block_group->caching_q,
- block_group_cache_done(block_group));
+ wait_block_group_cache_done(block_group);
btrfs_remove_free_space_cache(block_group);