Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Jun 2011 15:58:53 +0000 (08:58 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 20 Jun 2011 15:58:53 +0000 (08:58 -0700)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: avoid delayed metadata items during commits
  btrfs: fix uninitialized return value
  btrfs: fix wrong reservation when doing delayed inode operations
  btrfs: Remove unused sysfs code
  btrfs: fix dereference of ERR_PTR value
  Btrfs: fix relocation races
  Btrfs: set no_trans_join after trying to expand the transaction
  Btrfs: protect the pending_snapshots list with trans_lock
  Btrfs: fix path leakage on subvol deletion
  Btrfs: drop the delalloc_bytes check in shrink_delalloc
  Btrfs: check the return value from set_anon_super

fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-inode.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/relocation.c
fs/btrfs/sysfs.c
fs/btrfs/transaction.c
fs/btrfs/tree-log.c

index 378b5b4..3006287 100644 (file)
@@ -967,6 +967,12 @@ struct btrfs_fs_info {
        struct srcu_struct subvol_srcu;
 
        spinlock_t trans_lock;
+       /*
+        * the reloc mutex goes with the trans lock, it is taken
+        * during commit to protect us from the relocation code
+        */
+       struct mutex reloc_mutex;
+
        struct list_head trans_list;
        struct list_head hashers;
        struct list_head dead_roots;
@@ -1172,6 +1178,14 @@ struct btrfs_root {
        u32 type;
 
        u64 highest_objectid;
+
+       /* btrfs_record_root_in_trans is a multi-step process,
+        * and it can race with the balancing code.   But the
+        * race is very small, and only the first time the root
+        * is added to each transaction.  So in_trans_setup
+        * is used to tell us when more checks are required
+        */
+       unsigned long in_trans_setup;
        int ref_cows;
        int track_dirty;
        int in_radix;
@@ -1181,7 +1195,6 @@ struct btrfs_root {
        struct btrfs_key defrag_max;
        int defrag_running;
        char *name;
-       int in_sysfs;
 
        /* the dirty list is only used by non-reference counted roots */
        struct list_head dirty_list;
index 6462c29..f1cbd02 100644 (file)
@@ -297,7 +297,6 @@ struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
                item->data_len = data_len;
                item->ins_or_del = 0;
                item->bytes_reserved = 0;
-               item->block_rsv = NULL;
                item->delayed_node = NULL;
                atomic_set(&item->refs, 1);
        }
@@ -593,10 +592,8 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 
        num_bytes = btrfs_calc_trans_metadata_size(root, 1);
        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
-       if (!ret) {
+       if (!ret)
                item->bytes_reserved = num_bytes;
-               item->block_rsv = dst_rsv;
-       }
 
        return ret;
 }
@@ -604,10 +601,13 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
                                                struct btrfs_delayed_item *item)
 {
+       struct btrfs_block_rsv *rsv;
+
        if (!item->bytes_reserved)
                return;
 
-       btrfs_block_rsv_release(root, item->block_rsv,
+       rsv = &root->fs_info->global_block_rsv;
+       btrfs_block_rsv_release(root, rsv,
                                item->bytes_reserved);
 }
 
@@ -1014,6 +1014,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
        struct btrfs_delayed_root *delayed_root;
        struct btrfs_delayed_node *curr_node, *prev_node;
        struct btrfs_path *path;
+       struct btrfs_block_rsv *block_rsv;
        int ret = 0;
 
        path = btrfs_alloc_path();
@@ -1021,6 +1022,9 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        path->leave_spinning = 1;
 
+       block_rsv = trans->block_rsv;
+       trans->block_rsv = &root->fs_info->global_block_rsv;
+
        delayed_root = btrfs_get_delayed_root(root);
 
        curr_node = btrfs_first_delayed_node(delayed_root);
@@ -1045,6 +1049,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
        }
 
        btrfs_free_path(path);
+       trans->block_rsv = block_rsv;
        return ret;
 }
 
@@ -1052,6 +1057,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
                                              struct btrfs_delayed_node *node)
 {
        struct btrfs_path *path;
+       struct btrfs_block_rsv *block_rsv;
        int ret;
 
        path = btrfs_alloc_path();
@@ -1059,6 +1065,9 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
                return -ENOMEM;
        path->leave_spinning = 1;
 
+       block_rsv = trans->block_rsv;
+       trans->block_rsv = &node->root->fs_info->global_block_rsv;
+
        ret = btrfs_insert_delayed_items(trans, path, node->root, node);
        if (!ret)
                ret = btrfs_delete_delayed_items(trans, path, node->root, node);
@@ -1066,6 +1075,7 @@ static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
                ret = btrfs_update_delayed_inode(trans, node->root, path, node);
        btrfs_free_path(path);
 
+       trans->block_rsv = block_rsv;
        return ret;
 }
 
@@ -1116,6 +1126,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
        struct btrfs_path *path;
        struct btrfs_delayed_node *delayed_node = NULL;
        struct btrfs_root *root;
+       struct btrfs_block_rsv *block_rsv;
        unsigned long nr = 0;
        int need_requeue = 0;
        int ret;
@@ -1134,6 +1145,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
        if (IS_ERR(trans))
                goto free_path;
 
+       block_rsv = trans->block_rsv;
+       trans->block_rsv = &root->fs_info->global_block_rsv;
+
        ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
        if (!ret)
                ret = btrfs_delete_delayed_items(trans, path, root,
@@ -1176,6 +1190,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
 
        nr = trans->blocks_used;
 
+       trans->block_rsv = block_rsv;
        btrfs_end_transaction_dmeta(trans, root);
        __btrfs_btree_balance_dirty(root, nr);
 free_path:
@@ -1222,6 +1237,13 @@ again:
        return 0;
 }
 
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root)
+{
+       struct btrfs_delayed_root *delayed_root;
+       delayed_root = btrfs_get_delayed_root(root);
+       WARN_ON(btrfs_first_delayed_node(delayed_root));
+}
+
 void btrfs_balance_delayed_items(struct btrfs_root *root)
 {
        struct btrfs_delayed_root *delayed_root;
index eb7d240..d1a6a29 100644 (file)
@@ -75,7 +75,6 @@ struct btrfs_delayed_item {
        struct list_head tree_list;     /* used for batch insert/delete items */
        struct list_head readdir_list;  /* used for readdir items */
        u64 bytes_reserved;
-       struct btrfs_block_rsv *block_rsv;
        struct btrfs_delayed_node *delayed_node;
        atomic_t refs;
        int ins_or_del;
@@ -138,4 +137,8 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
 /* for init */
 int __init btrfs_delayed_inode_init(void);
 void btrfs_delayed_inode_exit(void);
+
+/* for debugging */
+void btrfs_assert_delayed_root_empty(struct btrfs_root *root);
+
 #endif
index 9f68c68..1ac8db5 100644 (file)
@@ -1044,7 +1044,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        root->last_trans = 0;
        root->highest_objectid = 0;
        root->name = NULL;
-       root->in_sysfs = 0;
        root->inode_tree = RB_ROOT;
        INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
        root->block_rsv = NULL;
@@ -1300,19 +1299,21 @@ again:
                return root;
 
        root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
-       if (!root->free_ino_ctl)
-               goto fail;
        root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
                                        GFP_NOFS);
-       if (!root->free_ino_pinned)
+       if (!root->free_ino_pinned || !root->free_ino_ctl) {
+               ret = -ENOMEM;
                goto fail;
+       }
 
        btrfs_init_free_ino_ctl(root);
        mutex_init(&root->fs_commit_mutex);
        spin_lock_init(&root->cache_lock);
        init_waitqueue_head(&root->cache_wait);
 
-       set_anon_super(&root->anon_super, NULL);
+       ret = set_anon_super(&root->anon_super, NULL);
+       if (ret)
+               goto fail;
 
        if (btrfs_root_refs(&root->root_item) == 0) {
                ret = -ENOENT;
@@ -1618,6 +1619,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->fs_roots_radix_lock);
        spin_lock_init(&fs_info->delayed_iput_lock);
        spin_lock_init(&fs_info->defrag_inodes_lock);
+       mutex_init(&fs_info->reloc_mutex);
 
        init_completion(&fs_info->kobj_unregister);
        fs_info->tree_root = tree_root;
index b42efc2..1f61bf5 100644 (file)
@@ -3314,10 +3314,6 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
        if (reserved == 0)
                return 0;
 
-       /* nothing to shrink - nothing to reclaim */
-       if (root->fs_info->delalloc_bytes == 0)
-               return 0;
-
        max_reclaim = min(reserved, to_reclaim);
 
        while (loops < 1024) {
index 751ddf8..0a9b10c 100644 (file)
@@ -3076,6 +3076,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
        ret = btrfs_update_inode(trans, root, dir);
        BUG_ON(ret);
 
+       btrfs_free_path(path);
        return 0;
 }
 
index b793d11..a3c4751 100644 (file)
@@ -482,8 +482,10 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
        ret = btrfs_snap_reserve_metadata(trans, pending_snapshot);
        BUG_ON(ret);
 
+       spin_lock(&root->fs_info->trans_lock);
        list_add(&pending_snapshot->list,
                 &trans->transaction->pending_snapshots);
+       spin_unlock(&root->fs_info->trans_lock);
        if (async_transid) {
                *async_transid = trans->transid;
                ret = btrfs_commit_transaction_async(trans,
index b1ef27c..5e0a3dc 100644 (file)
@@ -1368,7 +1368,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
        int ret;
 
        if (!root->reloc_root)
-               return 0;
+               goto out;
 
        reloc_root = root->reloc_root;
        root_item = &reloc_root->root_item;
@@ -1390,6 +1390,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
        ret = btrfs_update_root(trans, root->fs_info->tree_root,
                                &reloc_root->root_key, root_item);
        BUG_ON(ret);
+
+out:
        return 0;
 }
 
@@ -2142,10 +2144,11 @@ int prepare_to_merge(struct reloc_control *rc, int err)
        u64 num_bytes = 0;
        int ret;
 
-       spin_lock(&root->fs_info->trans_lock);
+       mutex_lock(&root->fs_info->reloc_mutex);
        rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
        rc->merging_rsv_size += rc->nodes_relocated * 2;
-       spin_unlock(&root->fs_info->trans_lock);
+       mutex_unlock(&root->fs_info->reloc_mutex);
+
 again:
        if (!err) {
                num_bytes = rc->merging_rsv_size;
@@ -2214,9 +2217,16 @@ int merge_reloc_roots(struct reloc_control *rc)
        int ret;
 again:
        root = rc->extent_root;
-       spin_lock(&root->fs_info->trans_lock);
+
+       /*
+        * this serializes us with btrfs_record_root_in_transaction,
+        * we have to make sure nobody is in the middle of
+        * adding their roots to the list while we are
+        * doing this splice
+        */
+       mutex_lock(&root->fs_info->reloc_mutex);
        list_splice_init(&rc->reloc_roots, &reloc_roots);
-       spin_unlock(&root->fs_info->trans_lock);
+       mutex_unlock(&root->fs_info->reloc_mutex);
 
        while (!list_empty(&reloc_roots)) {
                found = 1;
@@ -3590,17 +3600,19 @@ next:
 static void set_reloc_control(struct reloc_control *rc)
 {
        struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-       spin_lock(&fs_info->trans_lock);
+
+       mutex_lock(&fs_info->reloc_mutex);
        fs_info->reloc_ctl = rc;
-       spin_unlock(&fs_info->trans_lock);
+       mutex_unlock(&fs_info->reloc_mutex);
 }
 
 static void unset_reloc_control(struct reloc_control *rc)
 {
        struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-       spin_lock(&fs_info->trans_lock);
+
+       mutex_lock(&fs_info->reloc_mutex);
        fs_info->reloc_ctl = NULL;
-       spin_unlock(&fs_info->trans_lock);
+       mutex_unlock(&fs_info->reloc_mutex);
 }
 
 static int check_extent_flags(u64 flags)
index c3c223a..daac9ae 100644 (file)
 #include "disk-io.h"
 #include "transaction.h"
 
-static ssize_t root_blocks_used_show(struct btrfs_root *root, char *buf)
-{
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
-               (unsigned long long)btrfs_root_used(&root->root_item));
-}
-
-static ssize_t root_block_limit_show(struct btrfs_root *root, char *buf)
-{
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
-               (unsigned long long)btrfs_root_limit(&root->root_item));
-}
-
-static ssize_t super_blocks_used_show(struct btrfs_fs_info *fs, char *buf)
-{
-
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
-               (unsigned long long)btrfs_super_bytes_used(&fs->super_copy));
-}
-
-static ssize_t super_total_blocks_show(struct btrfs_fs_info *fs, char *buf)
-{
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
-               (unsigned long long)btrfs_super_total_bytes(&fs->super_copy));
-}
-
-static ssize_t super_blocksize_show(struct btrfs_fs_info *fs, char *buf)
-{
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
-               (unsigned long long)btrfs_super_sectorsize(&fs->super_copy));
-}
-
-/* this is for root attrs (subvols/snapshots) */
-struct btrfs_root_attr {
-       struct attribute attr;
-       ssize_t (*show)(struct btrfs_root *, char *);
-       ssize_t (*store)(struct btrfs_root *, const char *, size_t);
-};
-
-#define ROOT_ATTR(name, mode, show, store) \
-static struct btrfs_root_attr btrfs_root_attr_##name = __ATTR(name, mode, \
-                                                             show, store)
-
-ROOT_ATTR(blocks_used, 0444,   root_blocks_used_show,  NULL);
-ROOT_ATTR(block_limit, 0644,   root_block_limit_show,  NULL);
-
-static struct attribute *btrfs_root_attrs[] = {
-       &btrfs_root_attr_blocks_used.attr,
-       &btrfs_root_attr_block_limit.attr,
-       NULL,
-};
-
-/* this is for super attrs (actual full fs) */
-struct btrfs_super_attr {
-       struct attribute attr;
-       ssize_t (*show)(struct btrfs_fs_info *, char *);
-       ssize_t (*store)(struct btrfs_fs_info *, const char *, size_t);
-};
-
-#define SUPER_ATTR(name, mode, show, store) \
-static struct btrfs_super_attr btrfs_super_attr_##name = __ATTR(name, mode, \
-                                                               show, store)
-
-SUPER_ATTR(blocks_used,                0444,   super_blocks_used_show,         NULL);
-SUPER_ATTR(total_blocks,       0444,   super_total_blocks_show,        NULL);
-SUPER_ATTR(blocksize,          0444,   super_blocksize_show,           NULL);
-
-static struct attribute *btrfs_super_attrs[] = {
-       &btrfs_super_attr_blocks_used.attr,
-       &btrfs_super_attr_total_blocks.attr,
-       &btrfs_super_attr_blocksize.attr,
-       NULL,
-};
-
-static ssize_t btrfs_super_attr_show(struct kobject *kobj,
-                                   struct attribute *attr, char *buf)
-{
-       struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-                                               super_kobj);
-       struct btrfs_super_attr *a = container_of(attr,
-                                                 struct btrfs_super_attr,
-                                                 attr);
-
-       return a->show ? a->show(fs, buf) : 0;
-}
-
-static ssize_t btrfs_super_attr_store(struct kobject *kobj,
-                                    struct attribute *attr,
-                                    const char *buf, size_t len)
-{
-       struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-                                               super_kobj);
-       struct btrfs_super_attr *a = container_of(attr,
-                                                 struct btrfs_super_attr,
-                                                 attr);
-
-       return a->store ? a->store(fs, buf, len) : 0;
-}
-
-static ssize_t btrfs_root_attr_show(struct kobject *kobj,
-                                   struct attribute *attr, char *buf)
-{
-       struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-                                               root_kobj);
-       struct btrfs_root_attr *a = container_of(attr,
-                                                struct btrfs_root_attr,
-                                                attr);
-
-       return a->show ? a->show(root, buf) : 0;
-}
-
-static ssize_t btrfs_root_attr_store(struct kobject *kobj,
-                                    struct attribute *attr,
-                                    const char *buf, size_t len)
-{
-       struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-                                               root_kobj);
-       struct btrfs_root_attr *a = container_of(attr,
-                                                struct btrfs_root_attr,
-                                                attr);
-       return a->store ? a->store(root, buf, len) : 0;
-}
-
-static void btrfs_super_release(struct kobject *kobj)
-{
-       struct btrfs_fs_info *fs = container_of(kobj, struct btrfs_fs_info,
-                                               super_kobj);
-       complete(&fs->kobj_unregister);
-}
-
-static void btrfs_root_release(struct kobject *kobj)
-{
-       struct btrfs_root *root = container_of(kobj, struct btrfs_root,
-                                               root_kobj);
-       complete(&root->kobj_unregister);
-}
-
-static const struct sysfs_ops btrfs_super_attr_ops = {
-       .show   = btrfs_super_attr_show,
-       .store  = btrfs_super_attr_store,
-};
-
-static const struct sysfs_ops btrfs_root_attr_ops = {
-       .show   = btrfs_root_attr_show,
-       .store  = btrfs_root_attr_store,
-};
-
 /* /sys/fs/btrfs/ entry */
 static struct kset *btrfs_kset;
 
index 2b3590b..51dcec8 100644 (file)
@@ -126,28 +126,85 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
  * to make sure the old root from before we joined the transaction is deleted
  * when the transaction commits
  */
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+static int record_root_in_trans(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root)
 {
        if (root->ref_cows && root->last_trans < trans->transid) {
                WARN_ON(root == root->fs_info->extent_root);
                WARN_ON(root->commit_root != root->node);
 
+               /*
+                * see below for in_trans_setup usage rules
+                * we have the reloc mutex held now, so there
+                * is only one writer in this function
+                */
+               root->in_trans_setup = 1;
+
+               /* make sure readers find in_trans_setup before
+                * they find our root->last_trans update
+                */
+               smp_wmb();
+
                spin_lock(&root->fs_info->fs_roots_radix_lock);
                if (root->last_trans == trans->transid) {
                        spin_unlock(&root->fs_info->fs_roots_radix_lock);
                        return 0;
                }
-               root->last_trans = trans->transid;
                radix_tree_tag_set(&root->fs_info->fs_roots_radix,
                           (unsigned long)root->root_key.objectid,
                           BTRFS_ROOT_TRANS_TAG);
                spin_unlock(&root->fs_info->fs_roots_radix_lock);
+               root->last_trans = trans->transid;
+
+               /* this is pretty tricky.  We don't want to
+                * take the relocation lock in btrfs_record_root_in_trans
+                * unless we're really doing the first setup for this root in
+                * this transaction.
+                *
+                * Normally we'd use root->last_trans as a flag to decide
+                * if we want to take the expensive mutex.
+                *
+                * But, we have to set root->last_trans before we
+                * init the relocation root, otherwise, we trip over warnings
+                * in ctree.c.  The solution used here is to flag ourselves
+                * with root->in_trans_setup.  When this is 1, we're still
+                * fixing up the reloc trees and everyone must wait.
+                *
+                * When this is zero, they can trust root->last_trans and fly
+                * through btrfs_record_root_in_trans without having to take the
+                * lock.  smp_wmb() makes sure that all the writes above are
+                * done before we pop in the zero below
+                */
                btrfs_init_reloc_root(trans, root);
+               smp_wmb();
+               root->in_trans_setup = 0;
        }
        return 0;
 }
 
+
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root)
+{
+       if (!root->ref_cows)
+               return 0;
+
+       /*
+        * see record_root_in_trans for comments about in_trans_setup usage
+        * and barriers
+        */
+       smp_rmb();
+       if (root->last_trans == trans->transid &&
+           !root->in_trans_setup)
+               return 0;
+
+       mutex_lock(&root->fs_info->reloc_mutex);
+       record_root_in_trans(trans, root);
+       mutex_unlock(&root->fs_info->reloc_mutex);
+
+       return 0;
+}
+
 /* wait for commit against the current transaction to become unblocked
  * when this is done, it is safe to start a new transaction, but the current
  * transaction might not be fully on disk.
@@ -882,7 +939,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        parent = dget_parent(dentry);
        parent_inode = parent->d_inode;
        parent_root = BTRFS_I(parent_inode)->root;
-       btrfs_record_root_in_trans(trans, parent_root);
+       record_root_in_trans(trans, parent_root);
 
        /*
         * insert the directory item
@@ -900,7 +957,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        ret = btrfs_update_inode(trans, parent_root, parent_inode);
        BUG_ON(ret);
 
-       btrfs_record_root_in_trans(trans, root);
+       /*
+        * pull in the delayed directory update
+        * and the delayed inode item
+        * otherwise we corrupt the FS during
+        * snapshot
+        */
+       ret = btrfs_run_delayed_items(trans, root);
+       BUG_ON(ret);
+
+       record_root_in_trans(trans, root);
        btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
        memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
        btrfs_check_and_init_root_item(new_root_item);
@@ -961,14 +1027,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
        int ret;
 
        list_for_each_entry(pending, head, list) {
-               /*
-                * We must deal with the delayed items before creating
-                * snapshots, or we will create a snapthot with inconsistent
-                * information.
-               */
-               ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
-               BUG_ON(ret);
-
                ret = create_pending_snapshot(trans, fs_info, pending);
                BUG_ON(ret);
        }
@@ -1241,21 +1299,42 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                        schedule_timeout(1);
 
                finish_wait(&cur_trans->writer_wait, &wait);
-               spin_lock(&root->fs_info->trans_lock);
-               root->fs_info->trans_no_join = 1;
-               spin_unlock(&root->fs_info->trans_lock);
        } while (atomic_read(&cur_trans->num_writers) > 1 ||
                 (should_grow && cur_trans->num_joined != joined));
 
-       ret = create_pending_snapshots(trans, root->fs_info);
-       BUG_ON(ret);
+       /*
+        * Ok now we need to make sure to block out any other joins while we
+        * commit the transaction.  We could have started a join before setting
+        * no_join so make sure to wait for num_writers to == 1 again.
+        */
+       spin_lock(&root->fs_info->trans_lock);
+       root->fs_info->trans_no_join = 1;
+       spin_unlock(&root->fs_info->trans_lock);
+       wait_event(cur_trans->writer_wait,
+                  atomic_read(&cur_trans->num_writers) == 1);
+
+       /*
+        * the reloc mutex makes sure that we stop
+        * the balancing code from coming in and moving
+        * extents around in the middle of the commit
+        */
+       mutex_lock(&root->fs_info->reloc_mutex);
 
        ret = btrfs_run_delayed_items(trans, root);
        BUG_ON(ret);
 
+       ret = create_pending_snapshots(trans, root->fs_info);
+       BUG_ON(ret);
+
        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
        BUG_ON(ret);
 
+       /*
+        * make sure none of the code above managed to slip in a
+        * delayed item
+        */
+       btrfs_assert_delayed_root_empty(root);
+
        WARN_ON(cur_trans != trans->transaction);
 
        btrfs_scrub_pause(root);
@@ -1312,6 +1391,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        root->fs_info->running_transaction = NULL;
        root->fs_info->trans_no_join = 0;
        spin_unlock(&root->fs_info->trans_lock);
+       mutex_unlock(&root->fs_info->reloc_mutex);
 
        wake_up(&root->fs_info->transaction_wait);
 
index 592396c..4ce8a9f 100644 (file)
@@ -3177,7 +3177,7 @@ again:
                tmp_key.offset = (u64)-1;
 
                wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
-               BUG_ON(!wc.replay_dest);
+               BUG_ON(IS_ERR_OR_NULL(wc.replay_dest));
 
                wc.replay_dest->log_root = log;
                btrfs_record_root_in_trans(trans, wc.replay_dest);