Btrfs: async delayed refs
authorChris Mason <clm@fb.com>
Thu, 22 May 2014 23:18:52 +0000 (16:18 -0700)
committerChris Mason <clm@fb.com>
Tue, 10 Jun 2014 00:20:58 +0000 (17:20 -0700)
Delayed extent operations are triggered during transaction commits.
The goal is to queue up a healthly batch of changes to the extent
allocation tree and run through them in bulk.

This farms them off to async helper threads.  The goal is to have the
bulk of the delayed operations being done in the background, but this is
also important to limit our stack footprint.

Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/transaction.c

index 4896d7a..02895a1 100644 (file)
@@ -1546,6 +1546,9 @@ struct btrfs_fs_info {
         */
        struct btrfs_workqueue *fixup_workers;
        struct btrfs_workqueue *delayed_workers;
+
+       /* the extent workers do delayed refs on the extent allocation tree */
+       struct btrfs_workqueue *extent_workers;
        struct task_struct *transaction_kthread;
        struct task_struct *cleaner_kthread;
        int thread_pool_size;
@@ -3268,6 +3271,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root, unsigned long count);
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+                                unsigned long count, int wait);
 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 bytenr,
index 0c0fa78..8bb4aa1 100644 (file)
@@ -2069,6 +2069,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
        btrfs_destroy_workqueue(fs_info->readahead_workers);
        btrfs_destroy_workqueue(fs_info->flush_workers);
        btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
+       btrfs_destroy_workqueue(fs_info->extent_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2586,6 +2587,10 @@ int open_ctree(struct super_block *sb,
                btrfs_alloc_workqueue("readahead", flags, max_active, 2);
        fs_info->qgroup_rescan_workers =
                btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+       fs_info->extent_workers =
+               btrfs_alloc_workqueue("extent-refs", flags,
+                                     min_t(u64, fs_devices->num_devices,
+                                           max_active), 8);
 
        if (!(fs_info->workers && fs_info->delalloc_workers &&
              fs_info->submit_workers && fs_info->flush_workers &&
@@ -2595,6 +2600,7 @@ int open_ctree(struct super_block *sb,
              fs_info->endio_freespace_worker && fs_info->rmw_workers &&
              fs_info->caching_workers && fs_info->readahead_workers &&
              fs_info->fixup_workers && fs_info->delayed_workers &&
+             fs_info->fixup_workers && fs_info->extent_workers &&
              fs_info->qgroup_rescan_workers)) {
                err = -ENOMEM;
                goto fail_sb_buffer;
index bb5b306..6caddd5 100644 (file)
@@ -2674,15 +2674,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
        u64 num_entries =
                atomic_read(&trans->transaction->delayed_refs.num_entries);
        u64 avg_runtime;
+       u64 val;
 
        smp_mb();
        avg_runtime = fs_info->avg_delayed_ref_runtime;
+       val = num_entries * avg_runtime;
        if (num_entries * avg_runtime >= NSEC_PER_SEC)
                return 1;
+       if (val >= NSEC_PER_SEC / 2)
+               return 2;
 
        return btrfs_check_space_for_delayed_refs(trans, root);
 }
 
+struct async_delayed_refs {
+       struct btrfs_root *root;
+       int count;
+       int error;
+       int sync;
+       struct completion wait;
+       struct btrfs_work work;
+};
+
+static void delayed_ref_async_start(struct btrfs_work *work)
+{
+       struct async_delayed_refs *async;
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       async = container_of(work, struct async_delayed_refs, work);
+
+       trans = btrfs_join_transaction(async->root);
+       if (IS_ERR(trans)) {
+               async->error = PTR_ERR(trans);
+               goto done;
+       }
+
+       /*
+        * trans->sync means that when we call end_transaciton, we won't
+        * wait on delayed refs
+        */
+       trans->sync = true;
+       ret = btrfs_run_delayed_refs(trans, async->root, async->count);
+       if (ret)
+               async->error = ret;
+
+       ret = btrfs_end_transaction(trans, async->root);
+       if (ret && !async->error)
+               async->error = ret;
+done:
+       if (async->sync)
+               complete(&async->wait);
+       else
+               kfree(async);
+}
+
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+                                unsigned long count, int wait)
+{
+       struct async_delayed_refs *async;
+       int ret;
+
+       async = kmalloc(sizeof(*async), GFP_NOFS);
+       if (!async)
+               return -ENOMEM;
+
+       async->root = root->fs_info->tree_root;
+       async->count = count;
+       async->error = 0;
+       if (wait)
+               async->sync = 1;
+       else
+               async->sync = 0;
+       init_completion(&async->wait);
+
+       btrfs_init_work(&async->work, delayed_ref_async_start,
+                       NULL, NULL);
+
+       btrfs_queue_work(root->fs_info->extent_workers, &async->work);
+
+       if (wait) {
+               wait_for_completion(&async->wait);
+               ret = async->error;
+               kfree(async);
+               return ret;
+       }
+       return 0;
+}
+
 /*
  * this starts processing the delayed reference count updates and
  * extent insertions we have queued up so far.  count can be
index 992aae6..38d1e7b 100644 (file)
@@ -2678,6 +2678,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                trans = NULL;
                goto out_unlock;
        }
+
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
index 3aafbde..1c54e2e 100644 (file)
@@ -697,6 +697,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        unsigned long cur = trans->delayed_ref_updates;
        int lock = (trans->type != TRANS_JOIN_NOLOCK);
        int err = 0;
+       int must_run_delayed_refs = 0;
 
        if (trans->use_count > 1) {
                trans->use_count--;
@@ -711,10 +712,18 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                btrfs_create_pending_block_groups(trans, root);
 
        trans->delayed_ref_updates = 0;
-       if (!trans->sync && btrfs_should_throttle_delayed_refs(trans, root)) {
+       if (!trans->sync) {
+               must_run_delayed_refs =
+                       btrfs_should_throttle_delayed_refs(trans, root);
                cur = max_t(unsigned long, cur, 32);
-               trans->delayed_ref_updates = 0;
-               btrfs_run_delayed_refs(trans, root, cur);
+
+               /*
+                * don't make the caller wait if they are from a NOLOCK
+                * or ATTACH transaction, it will deadlock with commit
+                */
+               if (must_run_delayed_refs == 1 &&
+                   (trans->type & (__TRANS_JOIN_NOLOCK | __TRANS_ATTACH)))
+                       must_run_delayed_refs = 2;
        }
 
        if (trans->qgroup_reserved) {
@@ -775,6 +784,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        assert_qgroups_uptodate(trans);
 
        kmem_cache_free(btrfs_trans_handle_cachep, trans);
+       if (must_run_delayed_refs) {
+               btrfs_async_run_delayed_refs(root, cur,
+                                            must_run_delayed_refs == 1);
+       }
        return err;
 }