* to make sure the old root from before we joined the transaction is deleted
* when the transaction commits
*/
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (root->ref_cows && root->last_trans < trans->transid) {
WARN_ON(root == root->fs_info->extent_root);
WARN_ON(root->commit_root != root->node);
+ /*
+ * see below for in_trans_setup usage rules
+ * we have the reloc mutex held now, so there
+ * is only one writer in this function
+ */
+ root->in_trans_setup = 1;
+
+ /* make sure readers find in_trans_setup before
+ * they find our root->last_trans update
+ */
+ smp_wmb();
+
spin_lock(&root->fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid) {
spin_unlock(&root->fs_info->fs_roots_radix_lock);
return 0;
}
- root->last_trans = trans->transid;
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG);
spin_unlock(&root->fs_info->fs_roots_radix_lock);
+ root->last_trans = trans->transid;
+
+ /* this is pretty tricky. We don't want to
+ * take the relocation lock in btrfs_record_root_in_trans
+ * unless we're really doing the first setup for this root in
+ * this transaction.
+ *
+ * Normally we'd use root->last_trans as a flag to decide
+ * if we want to take the expensive mutex.
+ *
+ * But, we have to set root->last_trans before we
+ * init the relocation root, otherwise, we trip over warnings
+ * in ctree.c. The solution used here is to flag ourselves
+ * with root->in_trans_setup. When this is 1, we're still
+ * fixing up the reloc trees and everyone must wait.
+ *
+ * When this is zero, they can trust root->last_trans and fly
+ * through btrfs_record_root_in_trans without having to take the
+ * lock. smp_wmb() makes sure that all the writes above are
+ * done before we pop in the zero below
+ */
btrfs_init_reloc_root(trans, root);
+ smp_wmb();
+ root->in_trans_setup = 0;
}
return 0;
}
+
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ if (!root->ref_cows)
+ return 0;
+
+ /*
+ * see record_root_in_trans for comments about in_trans_setup usage
+ * and barriers
+ */
+ smp_rmb();
+ if (root->last_trans == trans->transid &&
+ !root->in_trans_setup)
+ return 0;
+
+ mutex_lock(&root->fs_info->reloc_mutex);
+ record_root_in_trans(trans, root);
+ mutex_unlock(&root->fs_info->reloc_mutex);
+
+ return 0;
+}
+
/* wait for commit against the current transaction to become unblocked
* when this is done, it is safe to start a new transaction, but the current
* transaction might not be fully on disk.
{
struct btrfs_trans_handle *h;
struct btrfs_transaction *cur_trans;
- int retries = 0;
+ u64 num_bytes = 0;
int ret;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
h->block_rsv = NULL;
goto got_it;
}
+
+ /*
+ * Do the reservation before we join the transaction so we can do all
+ * the appropriate flushing if need be.
+ */
+ if (num_items > 0 && root != root->fs_info->chunk_root) {
+ num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+ ret = btrfs_block_rsv_add(NULL, root,
+ &root->fs_info->trans_block_rsv,
+ num_bytes);
+ if (ret)
+ return ERR_PTR(ret);
+ }
again:
h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
if (!h)
goto again;
}
- if (num_items > 0) {
- ret = btrfs_trans_reserve_metadata(h, root, num_items);
- if (ret == -EAGAIN && !retries) {
- retries++;
- btrfs_commit_transaction(h, root);
- goto again;
- } else if (ret == -EAGAIN) {
- /*
- * We have already retried and got EAGAIN, so really we
- * don't have space, so set ret to -ENOSPC.
- */
- ret = -ENOSPC;
- }
-
- if (ret < 0) {
- btrfs_end_transaction(h, root);
- return ERR_PTR(ret);
- }
+ if (num_bytes) {
+ h->block_rsv = &root->fs_info->trans_block_rsv;
+ h->bytes_reserved = num_bytes;
}
got_it:
list) {
if (t->in_commit) {
if (t->commit_done)
- goto out;
+ break;
cur_trans = t;
atomic_inc(&cur_trans->use_count);
break;
}
if (lock && cur_trans->blocked && !cur_trans->in_commit) {
- if (throttle)
+ if (throttle) {
+ /*
+ * We may race with somebody else here so end up having
+ * to call end_transaction on ourselves again, so inc
+ * our use_count.
+ */
+ trans->use_count++;
return btrfs_commit_transaction(trans, root);
- else
+ } else {
wake_up_process(info->transaction_kthread);
+ }
}
WARN_ON(cur_trans != info->running_transaction);
btrfs_btree_balance_dirty(info->tree_root, nr);
cond_resched();
- if (root->fs_info->closing || ret != -EAGAIN)
+ if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
break;
}
root->defrag_running = 0;
parent = dget_parent(dentry);
parent_inode = parent->d_inode;
parent_root = BTRFS_I(parent_inode)->root;
- btrfs_record_root_in_trans(trans, parent_root);
+ record_root_in_trans(trans, parent_root);
/*
* insert the directory item
ret = btrfs_update_inode(trans, parent_root, parent_inode);
BUG_ON(ret);
- btrfs_record_root_in_trans(trans, root);
+ /*
+ * pull in the delayed directory update
+ * and the delayed inode item
+ * otherwise we corrupt the FS during
+ * snapshot
+ */
+ ret = btrfs_run_delayed_items(trans, root);
+ BUG_ON(ret);
+
+ record_root_in_trans(trans, root);
btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
btrfs_check_and_init_root_item(new_root_item);
int ret;
list_for_each_entry(pending, head, list) {
- /*
- * We must deal with the delayed items before creating
- * snapshots, or we will create a snapthot with inconsistent
- * information.
- */
- ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
- BUG_ON(ret);
-
ret = create_pending_snapshot(trans, fs_info, pending);
BUG_ON(ret);
}
wait_current_trans_commit_start_and_unblock(root, cur_trans);
else
wait_current_trans_commit_start(root, cur_trans);
- put_transaction(cur_trans);
+ if (current->journal_info == trans)
+ current->journal_info = NULL;
+
+ put_transaction(cur_trans);
return 0;
}
schedule_timeout(1);
finish_wait(&cur_trans->writer_wait, &wait);
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->trans_no_join = 1;
- spin_unlock(&root->fs_info->trans_lock);
} while (atomic_read(&cur_trans->num_writers) > 1 ||
(should_grow && cur_trans->num_joined != joined));
- ret = create_pending_snapshots(trans, root->fs_info);
- BUG_ON(ret);
+ /*
+ * Ok now we need to make sure to block out any other joins while we
+ * commit the transaction. We could have started a join before setting
+ * no_join so make sure to wait for num_writers to == 1 again.
+ */
+ spin_lock(&root->fs_info->trans_lock);
+ root->fs_info->trans_no_join = 1;
+ spin_unlock(&root->fs_info->trans_lock);
+ wait_event(cur_trans->writer_wait,
+ atomic_read(&cur_trans->num_writers) == 1);
+
+ /*
+ * the reloc mutex makes sure that we stop
+ * the balancing code from coming in and moving
+ * extents around in the middle of the commit
+ */
+ mutex_lock(&root->fs_info->reloc_mutex);
ret = btrfs_run_delayed_items(trans, root);
BUG_ON(ret);
+ ret = create_pending_snapshots(trans, root->fs_info);
+ BUG_ON(ret);
+
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
+ /*
+ * make sure none of the code above managed to slip in a
+ * delayed item
+ */
+ btrfs_assert_delayed_root_empty(root);
+
WARN_ON(cur_trans != trans->transaction);
btrfs_scrub_pause(root);
root->fs_info->running_transaction = NULL;
root->fs_info->trans_no_join = 0;
spin_unlock(&root->fs_info->trans_lock);
+ mutex_unlock(&root->fs_info->reloc_mutex);
wake_up(&root->fs_info->transaction_wait);