* we likely hold important locks.
*/
if (trans && (!trans->transaction->in_commit) &&
- (root && root != root->fs_info->tree_root)) {
+ (root && root != root->fs_info->tree_root) &&
+ btrfs_test_opt(root, SPACE_CACHE)) {
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
goto again;
}
+ /* We've already setup this transaction, go ahead and exit */
+ if (block_group->cache_generation == trans->transid &&
+ i_size_read(inode)) {
+ dcs = BTRFS_DC_SETUP;
+ goto out_put;
+ }
+
/*
* We want to set the generation to 0, that way if anything goes wrong
* from here on out we know not to trust this cache when we load up next
num_pages *= 16;
num_pages *= PAGE_CACHE_SIZE;
- ret = btrfs_delalloc_reserve_space(inode, num_pages);
+ ret = btrfs_check_data_free_space(inode, num_pages);
if (ret)
goto out_put;
ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
num_pages, num_pages,
&alloc_hint);
- if (!ret) {
+ if (!ret)
dcs = BTRFS_DC_SETUP;
- btrfs_free_reserved_data_space(inode, num_pages);
- } else {
- btrfs_delalloc_release_space(inode, num_pages);
- }
+ btrfs_free_reserved_data_space(inode, num_pages);
out_put:
iput(inode);
btrfs_release_path(path);
out:
spin_lock(&block_group->lock);
+ if (!ret)
+ block_group->cache_generation = trans->transid;
block_group->disk_cache_state = dcs;
spin_unlock(&block_group->lock);
* shrink metadata reservation for delalloc
*/
static int shrink_delalloc(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, u64 to_reclaim, int sync)
+ struct btrfs_root *root, u64 to_reclaim,
+ bool wait_ordered)
{
struct btrfs_block_rsv *block_rsv;
struct btrfs_space_info *space_info;
u64 max_reclaim;
u64 reclaimed = 0;
long time_left;
- int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
int loops = 0;
unsigned long progress;
}
max_reclaim = min(reserved, to_reclaim);
-
+ nr_pages = max_t(unsigned long, nr_pages,
+ max_reclaim >> PAGE_CACHE_SHIFT);
while (loops < 1024) {
/* have the flusher threads jump in and do some IO */
smp_mb();
if (trans && trans->transaction->blocked)
return -EAGAIN;
- time_left = schedule_timeout_interruptible(1);
+ if (wait_ordered && !trans) {
+ btrfs_wait_ordered_extents(root, 0, 0);
+ } else {
+ time_left = schedule_timeout_interruptible(1);
- /* We were interrupted, exit */
- if (time_left)
- break;
+ /* We were interrupted, exit */
+ if (time_left)
+ break;
+ }
/* we've kicked the IO a few times, if anything has been freed,
* exit. There is no sense in looping here for a long time
}
}
- if (reclaimed >= to_reclaim && !trans)
- btrfs_wait_ordered_extents(root, 0, 0);
+
return reclaimed >= to_reclaim;
}
-/*
- * Retries tells us how many times we've called reserve_metadata_bytes. The
- * idea is if this is the first call (retries == 0) then we will add to our
- * reserved count if we can't make the allocation in order to hold our place
- * while we go and try and free up space. That way for retries > 1 we don't try
- * and add space, we just check to see if the amount of unused space is >= the
- * total space, meaning that our reservation is valid.
+/**
+ * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
+ * @root - the root we're allocating for
+ * @block_rsv - the block_rsv we're allocating for
+ * @orig_bytes - the number of bytes we want
+ * @flush - wether or not we can flush to make our reservation
*
- * However if we don't intend to retry this reservation, pass -1 as retries so
- * that it short circuits this logic.
+ * This will reserve orgi_bytes number of bytes from the space info associated
+ * with the block_rsv. If there is not enough space it will make an attempt to
+ * flush out space to make room. It will do this by flushing delalloc if
+ * possible or committing the transaction. If flush is 0 then no attempts to
+ * regain reservations will be made and this will fail if there is not enough
+ * space already.
*/
-static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+static int reserve_metadata_bytes(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 orig_bytes, int flush)
{
struct btrfs_space_info *space_info = block_rsv->space_info;
- u64 unused;
+ struct btrfs_trans_handle *trans;
+ u64 used;
u64 num_bytes = orig_bytes;
int retries = 0;
int ret = 0;
bool committed = false;
bool flushing = false;
+ bool wait_ordered = false;
+
+ trans = (struct btrfs_trans_handle *)current->journal_info;
again:
ret = 0;
spin_lock(&space_info->lock);
}
ret = -ENOSPC;
- unused = space_info->bytes_used + space_info->bytes_reserved +
- space_info->bytes_pinned + space_info->bytes_readonly +
- space_info->bytes_may_use;
+ used = space_info->bytes_used + space_info->bytes_reserved +
+ space_info->bytes_pinned + space_info->bytes_readonly +
+ space_info->bytes_may_use;
/*
* The idea here is that we've not already over-reserved the block group
* lets start flushing stuff first and then come back and try to make
* our reservation.
*/
- if (unused <= space_info->total_bytes) {
- unused = space_info->total_bytes - unused;
- if (unused >= num_bytes) {
+ if (used <= space_info->total_bytes) {
+ if (used + orig_bytes <= space_info->total_bytes) {
space_info->bytes_may_use += orig_bytes;
ret = 0;
} else {
* amount plus the amount of bytes that we need for this
* reservation.
*/
- num_bytes = unused - space_info->total_bytes +
+ wait_ordered = true;
+ num_bytes = used - space_info->total_bytes +
(orig_bytes * (retries + 1));
}
+ if (ret) {
+ u64 profile = btrfs_get_alloc_profile(root, 0);
+ u64 avail;
+
+ /*
+ * If we have a lot of space that's pinned, don't bother doing
+ * the overcommit dance yet and just commit the transaction.
+ */
+ avail = (space_info->total_bytes - space_info->bytes_used) * 8;
+ do_div(avail, 10);
+ if (space_info->bytes_pinned >= avail && flush && !trans &&
+ !committed) {
+ space_info->flush = 1;
+ flushing = true;
+ spin_unlock(&space_info->lock);
+ goto commit;
+ }
+
+ spin_lock(&root->fs_info->free_chunk_lock);
+ avail = root->fs_info->free_chunk_space;
+
+ /*
+ * If we have dup, raid1 or raid10 then only half of the free
+ * space is actually useable.
+ */
+ if (profile & (BTRFS_BLOCK_GROUP_DUP |
+ BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID10))
+ avail >>= 1;
+
+ /*
+ * If we aren't flushing don't let us overcommit too much, say
+ * 1/8th of the space. If we can flush, let it overcommit up to
+ * 1/2 of the space.
+ */
+ if (flush)
+ avail >>= 3;
+ else
+ avail >>= 1;
+ spin_unlock(&root->fs_info->free_chunk_lock);
+
+ if (used + num_bytes < space_info->total_bytes + avail) {
+ space_info->bytes_may_use += orig_bytes;
+ ret = 0;
+ } else {
+ wait_ordered = true;
+ }
+ }
+
/*
* Couldn't make our reservation, save our place so while we're trying
* to reclaim space we can actually use it instead of somebody else
* We do synchronous shrinking since we don't actually unreserve
* metadata until after the IO is completed.
*/
- ret = shrink_delalloc(trans, root, num_bytes, 1);
+ ret = shrink_delalloc(trans, root, num_bytes, wait_ordered);
if (ret < 0)
goto out;
* so go back around and try again.
*/
if (retries < 2) {
+ wait_ordered = true;
retries++;
goto again;
}
- /*
- * Not enough space to be reclaimed, don't bother committing the
- * transaction.
- */
- spin_lock(&space_info->lock);
- if (space_info->bytes_pinned < orig_bytes)
- ret = -ENOSPC;
- spin_unlock(&space_info->lock);
- if (ret)
- goto out;
-
ret = -EAGAIN;
if (trans)
goto out;
+commit:
ret = -ENOSPC;
if (committed)
goto out;
static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
- struct btrfs_block_rsv *block_rsv;
- if (root->ref_cows)
+ struct btrfs_block_rsv *block_rsv = NULL;
+
+ if (root->ref_cows || root == root->fs_info->csum_root)
block_rsv = trans->block_rsv;
- else
+
+ if (!block_rsv)
block_rsv = root->block_rsv;
if (!block_rsv)
kfree(rsv);
}
-int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
+int btrfs_block_rsv_add(struct btrfs_root *root,
struct btrfs_block_rsv *block_rsv,
u64 num_bytes)
{
if (num_bytes == 0)
return 0;
- ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 1);
return 0;
return ret;
}
-int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_block_rsv *block_rsv,
- u64 min_reserved, int min_factor, int flush)
+int btrfs_block_rsv_check(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, int min_factor)
{
u64 num_bytes = 0;
int ret = -ENOSPC;
return 0;
spin_lock(&block_rsv->lock);
- if (min_factor > 0)
- num_bytes = div_factor(block_rsv->size, min_factor);
- if (min_reserved > num_bytes)
- num_bytes = min_reserved;
+ num_bytes = div_factor(block_rsv->size, min_factor);
+ if (block_rsv->reserved >= num_bytes)
+ ret = 0;
+ spin_unlock(&block_rsv->lock);
+
+ return ret;
+}
+
+int btrfs_block_rsv_refill(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv,
+ u64 min_reserved)
+{
+ u64 num_bytes = 0;
+ int ret = -ENOSPC;
+
+ if (!block_rsv)
+ return 0;
+ spin_lock(&block_rsv->lock);
+ num_bytes = min_reserved;
if (block_rsv->reserved >= num_bytes)
ret = 0;
else
if (!ret)
return 0;
- ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, flush);
+ ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
if (!ret) {
block_rsv_add_bytes(block_rsv, num_bytes, 0);
return 0;
if (!trans->bytes_reserved)
return;
- BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv);
- btrfs_block_rsv_release(root, trans->block_rsv,
- trans->bytes_reserved);
+ btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
trans->bytes_reserved = 0;
}
to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
spin_unlock(&BTRFS_I(inode)->lock);
- ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, flush);
+ ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
if (ret) {
u64 to_free = 0;
unsigned dropped;
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
- if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+ if (btrfs_test_opt(root, SPACE_CACHE) &&
cache->disk_cache_state < BTRFS_DC_CLEAR)
cache->disk_cache_state = BTRFS_DC_CLEAR;
struct extent_buffer *buf,
u64 parent, int last_ref)
{
- struct btrfs_block_rsv *block_rsv;
struct btrfs_block_group_cache *cache = NULL;
int ret;
if (!last_ref)
return;
- block_rsv = get_block_rsv(trans, root);
cache = btrfs_lookup_block_group(root->fs_info, buf->start);
- if (block_rsv->space_info != cache->space_info)
- goto out;
if (btrfs_header_generation(buf) == trans->transid) {
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
bool failed_cluster_refill = false;
bool failed_alloc = false;
bool use_cluster = true;
+ bool have_caching_bg = false;
u64 ideal_cache_percent = 0;
u64 ideal_cache_offset = 0;
}
}
search:
+ have_caching_bg = false;
down_read(&space_info->groups_sem);
list_for_each_entry(block_group, &space_info->block_groups[index],
list) {
failed_alloc = true;
goto have_block_group;
} else if (!offset) {
+ if (!cached)
+ have_caching_bg = true;
goto loop;
}
checks:
}
up_read(&space_info->groups_sem);
+ if (!ins->objectid && loop >= LOOP_CACHING_WAIT && have_caching_bg)
+ goto search;
+
if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES)
goto search;
block_rsv = get_block_rsv(trans, root);
if (block_rsv->size == 0) {
- ret = reserve_metadata_bytes(trans, root, block_rsv,
- blocksize, 0);
+ ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
/*
* If we couldn't reserve metadata bytes try and use some from
* the global reserve.
return block_rsv;
if (ret) {
WARN_ON(1);
- ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize,
- 0);
+ ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0);
if (!ret) {
return block_rsv;
} else if (ret && block_rsv != global_rsv) {
path->reada = 1;
cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
- if (cache_gen != 0 &&
+ if (btrfs_test_opt(root, SPACE_CACHE) &&
btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
need_clear = 1;
if (btrfs_test_opt(root, CLEAR_CACHE))
need_clear = 1;
- if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
- printk(KERN_INFO "btrfs: disk space caching is enabled\n");
while (1) {
ret = find_first_block_group(root, path, &key);
goto out;
}
- inode = lookup_free_space_inode(root, block_group, path);
+ inode = lookup_free_space_inode(tree_root, block_group, path);
if (!IS_ERR(inode)) {
ret = btrfs_orphan_add(trans, inode);
BUG_ON(ret);
spin_unlock(&block_group->lock);
}
/* One for our lookup ref */
- iput(inode);
+ btrfs_add_delayed_iput(inode);
}
key.objectid = BTRFS_FREE_SPACE_OBJECTID;