Btrfs: make sure all dirty blocks are written at commit time
authorYan Zheng <zheng.yan@oracle.com>
Wed, 22 Jul 2009 14:07:05 +0000 (10:07 -0400)
committerChris Mason <chris.mason@oracle.com>
Wed, 22 Jul 2009 14:07:05 +0000 (10:07 -0400)
Write dirty block groups may allocate new block, and so may add new delayed
back ref. btrfs_run_delayed_refs may make some block groups dirty.

commit_cowonly_roots does not handle the recursion properly, and some dirty
blocks can be left unwritten at commit time. This patch moves
btrfs_run_delayed_refs into the loop that writes dirty block groups, and makes
the code not break out of the loop until there are no dirty block groups or
delayed back refs.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/extent-tree.c
fs/btrfs/transaction.c

index a5aca39..62a332d 100644 (file)
@@ -2387,13 +2387,29 @@ fail:
 
 }
 
+static struct btrfs_block_group_cache *
+next_block_group(struct btrfs_root *root,
+                struct btrfs_block_group_cache *cache)
+{
+       struct rb_node *node;
+       spin_lock(&root->fs_info->block_group_cache_lock);
+       node = rb_next(&cache->cache_node);
+       btrfs_put_block_group(cache);
+       if (node) {
+               cache = rb_entry(node, struct btrfs_block_group_cache,
+                                cache_node);
+               atomic_inc(&cache->count);
+       } else
+               cache = NULL;
+       spin_unlock(&root->fs_info->block_group_cache_lock);
+       return cache;
+}
+
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
-       struct btrfs_block_group_cache *cache, *entry;
-       struct rb_node *n;
+       struct btrfs_block_group_cache *cache;
        int err = 0;
-       int werr = 0;
        struct btrfs_path *path;
        u64 last = 0;
 
@@ -2402,39 +2418,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        while (1) {
-               cache = NULL;
-               spin_lock(&root->fs_info->block_group_cache_lock);
-               for (n = rb_first(&root->fs_info->block_group_cache_tree);
-                    n; n = rb_next(n)) {
-                       entry = rb_entry(n, struct btrfs_block_group_cache,
-                                        cache_node);
-                       if (entry->dirty) {
-                               cache = entry;
-                               break;
-                       }
+               if (last == 0) {
+                       err = btrfs_run_delayed_refs(trans, root,
+                                                    (unsigned long)-1);
+                       BUG_ON(err);
                }
-               spin_unlock(&root->fs_info->block_group_cache_lock);
 
-               if (!cache)
-                       break;
+               cache = btrfs_lookup_first_block_group(root->fs_info, last);
+               while (cache) {
+                       if (cache->dirty)
+                               break;
+                       cache = next_block_group(root, cache);
+               }
+               if (!cache) {
+                       if (last == 0)
+                               break;
+                       last = 0;
+                       continue;
+               }
 
                cache->dirty = 0;
-               last += cache->key.offset;
+               last = cache->key.objectid + cache->key.offset;
 
-               err = write_one_cache_group(trans, root,
-                                           path, cache);
-               /*
-                * if we fail to write the cache group, we want
-                * to keep it marked dirty in hopes that a later
-                * write will work
-                */
-               if (err) {
-                       werr = err;
-                       continue;
-               }
+               err = write_one_cache_group(trans, root, path, cache);
+               BUG_ON(err);
+               btrfs_put_block_group(cache);
        }
+
        btrfs_free_path(path);
-       return werr;
+       return 0;
 }
 
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
index 2dbf1c1..81f7124 100644 (file)
@@ -444,9 +444,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
 
        btrfs_write_dirty_block_groups(trans, root);
 
-       ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-       BUG_ON(ret);
-
        while (1) {
                old_root_bytenr = btrfs_root_bytenr(&root->root_item);
                if (old_root_bytenr == root->node->start)
@@ -457,9 +454,8 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
                                        &root->root_key,
                                        &root->root_item);
                BUG_ON(ret);
-               btrfs_write_dirty_block_groups(trans, root);
 
-               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+               ret = btrfs_write_dirty_block_groups(trans, root);
                BUG_ON(ret);
        }
        free_extent_buffer(root->commit_root);
@@ -495,9 +491,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
                root = list_entry(next, struct btrfs_root, dirty_list);
 
                update_cowonly_root(trans, root);
-
-               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-               BUG_ON(ret);
        }
        return 0;
 }