Merge branch 'for-chris' of
authorChris Mason <chris.mason@oracle.com>
Sat, 28 May 2011 11:00:39 +0000 (07:00 -0400)
committerChris Mason <chris.mason@oracle.com>
Sat, 28 May 2011 11:00:39 +0000 (07:00 -0400)
git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-work into for-linus

Conflicts:
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/transaction.c

Signed-off-by: Chris Mason <chris.mason@oracle.com>
15 files changed:
1  2 
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/relocation.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/xattr.c

Simple merge
Simple merge
Simple merge
index 01e2950,0000000..b46d94d
mode 100644,000000..100644
--- /dev/null
@@@ -1,1695 -1,0 +1,1694 @@@
-       trans = btrfs_join_transaction(root, 0);
 +/*
 + * Copyright (C) 2011 Fujitsu.  All rights reserved.
 + * Written by Miao Xie <miaox@cn.fujitsu.com>
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public
 + * License v2 as published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public
 + * License along with this program; if not, write to the
 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 + * Boston, MA 021110-1307, USA.
 + */
 +
 +#include <linux/slab.h>
 +#include "delayed-inode.h"
 +#include "disk-io.h"
 +#include "transaction.h"
 +
 +#define BTRFS_DELAYED_WRITEBACK               400
 +#define BTRFS_DELAYED_BACKGROUND      100
 +
 +static struct kmem_cache *delayed_node_cache;
 +
 +int __init btrfs_delayed_inode_init(void)
 +{
 +      delayed_node_cache = kmem_cache_create("delayed_node",
 +                                      sizeof(struct btrfs_delayed_node),
 +                                      0,
 +                                      SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
 +                                      NULL);
 +      if (!delayed_node_cache)
 +              return -ENOMEM;
 +      return 0;
 +}
 +
 +void btrfs_delayed_inode_exit(void)
 +{
 +      if (delayed_node_cache)
 +              kmem_cache_destroy(delayed_node_cache);
 +}
 +
 +static inline void btrfs_init_delayed_node(
 +                              struct btrfs_delayed_node *delayed_node,
 +                              struct btrfs_root *root, u64 inode_id)
 +{
 +      delayed_node->root = root;
 +      delayed_node->inode_id = inode_id;
 +      atomic_set(&delayed_node->refs, 0);
 +      delayed_node->count = 0;
 +      delayed_node->in_list = 0;
 +      delayed_node->inode_dirty = 0;
 +      delayed_node->ins_root = RB_ROOT;
 +      delayed_node->del_root = RB_ROOT;
 +      mutex_init(&delayed_node->mutex);
 +      delayed_node->index_cnt = 0;
 +      INIT_LIST_HEAD(&delayed_node->n_list);
 +      INIT_LIST_HEAD(&delayed_node->p_list);
 +      delayed_node->bytes_reserved = 0;
 +}
 +
 +static inline int btrfs_is_continuous_delayed_item(
 +                                      struct btrfs_delayed_item *item1,
 +                                      struct btrfs_delayed_item *item2)
 +{
 +      if (item1->key.type == BTRFS_DIR_INDEX_KEY &&
 +          item1->key.objectid == item2->key.objectid &&
 +          item1->key.type == item2->key.type &&
 +          item1->key.offset + 1 == item2->key.offset)
 +              return 1;
 +      return 0;
 +}
 +
 +static inline struct btrfs_delayed_root *btrfs_get_delayed_root(
 +                                                      struct btrfs_root *root)
 +{
 +      return root->fs_info->delayed_root;
 +}
 +
 +static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
 +                                                      struct inode *inode)
 +{
 +      struct btrfs_delayed_node *node;
 +      struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
 +      struct btrfs_root *root = btrfs_inode->root;
 +      u64 ino = btrfs_ino(inode);
 +      int ret;
 +
 +again:
 +      node = ACCESS_ONCE(btrfs_inode->delayed_node);
 +      if (node) {
 +              atomic_inc(&node->refs);        /* can be accessed */
 +              return node;
 +      }
 +
 +      spin_lock(&root->inode_lock);
 +      node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
 +      if (node) {
 +              if (btrfs_inode->delayed_node) {
 +                      spin_unlock(&root->inode_lock);
 +                      goto again;
 +              }
 +              btrfs_inode->delayed_node = node;
 +              atomic_inc(&node->refs);        /* can be accessed */
 +              atomic_inc(&node->refs);        /* cached in the inode */
 +              spin_unlock(&root->inode_lock);
 +              return node;
 +      }
 +      spin_unlock(&root->inode_lock);
 +
 +      node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
 +      if (!node)
 +              return ERR_PTR(-ENOMEM);
 +      btrfs_init_delayed_node(node, root, ino);
 +
 +      atomic_inc(&node->refs);        /* cached in the btrfs inode */
 +      atomic_inc(&node->refs);        /* can be accessed */
 +
 +      ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
 +      if (ret) {
 +              kmem_cache_free(delayed_node_cache, node);
 +              return ERR_PTR(ret);
 +      }
 +
 +      spin_lock(&root->inode_lock);
 +      ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
 +      if (ret == -EEXIST) {
 +              kmem_cache_free(delayed_node_cache, node);
 +              spin_unlock(&root->inode_lock);
 +              radix_tree_preload_end();
 +              goto again;
 +      }
 +      btrfs_inode->delayed_node = node;
 +      spin_unlock(&root->inode_lock);
 +      radix_tree_preload_end();
 +
 +      return node;
 +}
 +
 +/*
 + * Call it when holding delayed_node->mutex
 + *
 + * If mod = 1, add this node into the prepared list.
 + */
 +static void btrfs_queue_delayed_node(struct btrfs_delayed_root *root,
 +                                   struct btrfs_delayed_node *node,
 +                                   int mod)
 +{
 +      spin_lock(&root->lock);
 +      if (node->in_list) {
 +              if (!list_empty(&node->p_list))
 +                      list_move_tail(&node->p_list, &root->prepare_list);
 +              else if (mod)
 +                      list_add_tail(&node->p_list, &root->prepare_list);
 +      } else {
 +              list_add_tail(&node->n_list, &root->node_list);
 +              list_add_tail(&node->p_list, &root->prepare_list);
 +              atomic_inc(&node->refs);        /* inserted into list */
 +              root->nodes++;
 +              node->in_list = 1;
 +      }
 +      spin_unlock(&root->lock);
 +}
 +
 +/* Call it when holding delayed_node->mutex */
 +static void btrfs_dequeue_delayed_node(struct btrfs_delayed_root *root,
 +                                     struct btrfs_delayed_node *node)
 +{
 +      spin_lock(&root->lock);
 +      if (node->in_list) {
 +              root->nodes--;
 +              atomic_dec(&node->refs);        /* not in the list */
 +              list_del_init(&node->n_list);
 +              if (!list_empty(&node->p_list))
 +                      list_del_init(&node->p_list);
 +              node->in_list = 0;
 +      }
 +      spin_unlock(&root->lock);
 +}
 +
 +struct btrfs_delayed_node *btrfs_first_delayed_node(
 +                      struct btrfs_delayed_root *delayed_root)
 +{
 +      struct list_head *p;
 +      struct btrfs_delayed_node *node = NULL;
 +
 +      spin_lock(&delayed_root->lock);
 +      if (list_empty(&delayed_root->node_list))
 +              goto out;
 +
 +      p = delayed_root->node_list.next;
 +      node = list_entry(p, struct btrfs_delayed_node, n_list);
 +      atomic_inc(&node->refs);
 +out:
 +      spin_unlock(&delayed_root->lock);
 +
 +      return node;
 +}
 +
 +struct btrfs_delayed_node *btrfs_next_delayed_node(
 +                                              struct btrfs_delayed_node *node)
 +{
 +      struct btrfs_delayed_root *delayed_root;
 +      struct list_head *p;
 +      struct btrfs_delayed_node *next = NULL;
 +
 +      delayed_root = node->root->fs_info->delayed_root;
 +      spin_lock(&delayed_root->lock);
 +      if (!node->in_list) {   /* not in the list */
 +              if (list_empty(&delayed_root->node_list))
 +                      goto out;
 +              p = delayed_root->node_list.next;
 +      } else if (list_is_last(&node->n_list, &delayed_root->node_list))
 +              goto out;
 +      else
 +              p = node->n_list.next;
 +
 +      next = list_entry(p, struct btrfs_delayed_node, n_list);
 +      atomic_inc(&next->refs);
 +out:
 +      spin_unlock(&delayed_root->lock);
 +
 +      return next;
 +}
 +
 +static void __btrfs_release_delayed_node(
 +                              struct btrfs_delayed_node *delayed_node,
 +                              int mod)
 +{
 +      struct btrfs_delayed_root *delayed_root;
 +
 +      if (!delayed_node)
 +              return;
 +
 +      delayed_root = delayed_node->root->fs_info->delayed_root;
 +
 +      mutex_lock(&delayed_node->mutex);
 +      if (delayed_node->count)
 +              btrfs_queue_delayed_node(delayed_root, delayed_node, mod);
 +      else
 +              btrfs_dequeue_delayed_node(delayed_root, delayed_node);
 +      mutex_unlock(&delayed_node->mutex);
 +
 +      if (atomic_dec_and_test(&delayed_node->refs)) {
 +              struct btrfs_root *root = delayed_node->root;
 +              spin_lock(&root->inode_lock);
 +              if (atomic_read(&delayed_node->refs) == 0) {
 +                      radix_tree_delete(&root->delayed_nodes_tree,
 +                                        delayed_node->inode_id);
 +                      kmem_cache_free(delayed_node_cache, delayed_node);
 +              }
 +              spin_unlock(&root->inode_lock);
 +      }
 +}
 +
 +static inline void btrfs_release_delayed_node(struct btrfs_delayed_node *node)
 +{
 +      __btrfs_release_delayed_node(node, 0);
 +}
 +
 +struct btrfs_delayed_node *btrfs_first_prepared_delayed_node(
 +                                      struct btrfs_delayed_root *delayed_root)
 +{
 +      struct list_head *p;
 +      struct btrfs_delayed_node *node = NULL;
 +
 +      spin_lock(&delayed_root->lock);
 +      if (list_empty(&delayed_root->prepare_list))
 +              goto out;
 +
 +      p = delayed_root->prepare_list.next;
 +      list_del_init(p);
 +      node = list_entry(p, struct btrfs_delayed_node, p_list);
 +      atomic_inc(&node->refs);
 +out:
 +      spin_unlock(&delayed_root->lock);
 +
 +      return node;
 +}
 +
 +static inline void btrfs_release_prepared_delayed_node(
 +                                      struct btrfs_delayed_node *node)
 +{
 +      __btrfs_release_delayed_node(node, 1);
 +}
 +
 +struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
 +{
 +      struct btrfs_delayed_item *item;
 +      item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
 +      if (item) {
 +              item->data_len = data_len;
 +              item->ins_or_del = 0;
 +              item->bytes_reserved = 0;
 +              item->block_rsv = NULL;
 +              item->delayed_node = NULL;
 +              atomic_set(&item->refs, 1);
 +      }
 +      return item;
 +}
 +
 +/*
 + * __btrfs_lookup_delayed_item - look up the delayed item by key
 + * @delayed_node: pointer to the delayed node
 + * @key:        the key to look up
 + * @prev:       used to store the prev item if the right item isn't found
 + * @next:       used to store the next item if the right item isn't found
 + *
 + * Note: if we don't find the right item, we will return the prev item and
 + * the next item.
 + */
 +static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
 +                              struct rb_root *root,
 +                              struct btrfs_key *key,
 +                              struct btrfs_delayed_item **prev,
 +                              struct btrfs_delayed_item **next)
 +{
 +      struct rb_node *node, *prev_node = NULL;
 +      struct btrfs_delayed_item *delayed_item = NULL;
 +      int ret = 0;
 +
 +      node = root->rb_node;
 +
 +      while (node) {
 +              delayed_item = rb_entry(node, struct btrfs_delayed_item,
 +                                      rb_node);
 +              prev_node = node;
 +              ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
 +              if (ret < 0)
 +                      node = node->rb_right;
 +              else if (ret > 0)
 +                      node = node->rb_left;
 +              else
 +                      return delayed_item;
 +      }
 +
 +      if (prev) {
 +              if (!prev_node)
 +                      *prev = NULL;
 +              else if (ret < 0)
 +                      *prev = delayed_item;
 +              else if ((node = rb_prev(prev_node)) != NULL) {
 +                      *prev = rb_entry(node, struct btrfs_delayed_item,
 +                                       rb_node);
 +              } else
 +                      *prev = NULL;
 +      }
 +
 +      if (next) {
 +              if (!prev_node)
 +                      *next = NULL;
 +              else if (ret > 0)
 +                      *next = delayed_item;
 +              else if ((node = rb_next(prev_node)) != NULL) {
 +                      *next = rb_entry(node, struct btrfs_delayed_item,
 +                                       rb_node);
 +              } else
 +                      *next = NULL;
 +      }
 +      return NULL;
 +}
 +
 +struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
 +                                      struct btrfs_delayed_node *delayed_node,
 +                                      struct btrfs_key *key)
 +{
 +      struct btrfs_delayed_item *item;
 +
 +      item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
 +                                         NULL, NULL);
 +      return item;
 +}
 +
 +struct btrfs_delayed_item *__btrfs_lookup_delayed_deletion_item(
 +                                      struct btrfs_delayed_node *delayed_node,
 +                                      struct btrfs_key *key)
 +{
 +      struct btrfs_delayed_item *item;
 +
 +      item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
 +                                         NULL, NULL);
 +      return item;
 +}
 +
 +struct btrfs_delayed_item *__btrfs_search_delayed_insertion_item(
 +                                      struct btrfs_delayed_node *delayed_node,
 +                                      struct btrfs_key *key)
 +{
 +      struct btrfs_delayed_item *item, *next;
 +
 +      item = __btrfs_lookup_delayed_item(&delayed_node->ins_root, key,
 +                                         NULL, &next);
 +      if (!item)
 +              item = next;
 +
 +      return item;
 +}
 +
 +struct btrfs_delayed_item *__btrfs_search_delayed_deletion_item(
 +                                      struct btrfs_delayed_node *delayed_node,
 +                                      struct btrfs_key *key)
 +{
 +      struct btrfs_delayed_item *item, *next;
 +
 +      item = __btrfs_lookup_delayed_item(&delayed_node->del_root, key,
 +                                         NULL, &next);
 +      if (!item)
 +              item = next;
 +
 +      return item;
 +}
 +
 +static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
 +                                  struct btrfs_delayed_item *ins,
 +                                  int action)
 +{
 +      struct rb_node **p, *node;
 +      struct rb_node *parent_node = NULL;
 +      struct rb_root *root;
 +      struct btrfs_delayed_item *item;
 +      int cmp;
 +
 +      if (action == BTRFS_DELAYED_INSERTION_ITEM)
 +              root = &delayed_node->ins_root;
 +      else if (action == BTRFS_DELAYED_DELETION_ITEM)
 +              root = &delayed_node->del_root;
 +      else
 +              BUG();
 +      p = &root->rb_node;
 +      node = &ins->rb_node;
 +
 +      while (*p) {
 +              parent_node = *p;
 +              item = rb_entry(parent_node, struct btrfs_delayed_item,
 +                               rb_node);
 +
 +              cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
 +              if (cmp < 0)
 +                      p = &(*p)->rb_right;
 +              else if (cmp > 0)
 +                      p = &(*p)->rb_left;
 +              else
 +                      return -EEXIST;
 +      }
 +
 +      rb_link_node(node, parent_node, p);
 +      rb_insert_color(node, root);
 +      ins->delayed_node = delayed_node;
 +      ins->ins_or_del = action;
 +
 +      if (ins->key.type == BTRFS_DIR_INDEX_KEY &&
 +          action == BTRFS_DELAYED_INSERTION_ITEM &&
 +          ins->key.offset >= delayed_node->index_cnt)
 +                      delayed_node->index_cnt = ins->key.offset + 1;
 +
 +      delayed_node->count++;
 +      atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
 +      return 0;
 +}
 +
 +static int __btrfs_add_delayed_insertion_item(struct btrfs_delayed_node *node,
 +                                            struct btrfs_delayed_item *item)
 +{
 +      return __btrfs_add_delayed_item(node, item,
 +                                      BTRFS_DELAYED_INSERTION_ITEM);
 +}
 +
 +static int __btrfs_add_delayed_deletion_item(struct btrfs_delayed_node *node,
 +                                           struct btrfs_delayed_item *item)
 +{
 +      return __btrfs_add_delayed_item(node, item,
 +                                      BTRFS_DELAYED_DELETION_ITEM);
 +}
 +
 +static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
 +{
 +      struct rb_root *root;
 +      struct btrfs_delayed_root *delayed_root;
 +
 +      delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
 +
 +      BUG_ON(!delayed_root);
 +      BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
 +             delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
 +
 +      if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
 +              root = &delayed_item->delayed_node->ins_root;
 +      else
 +              root = &delayed_item->delayed_node->del_root;
 +
 +      rb_erase(&delayed_item->rb_node, root);
 +      delayed_item->delayed_node->count--;
 +      atomic_dec(&delayed_root->items);
 +      if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND &&
 +          waitqueue_active(&delayed_root->wait))
 +              wake_up(&delayed_root->wait);
 +}
 +
 +static void btrfs_release_delayed_item(struct btrfs_delayed_item *item)
 +{
 +      if (item) {
 +              __btrfs_remove_delayed_item(item);
 +              if (atomic_dec_and_test(&item->refs))
 +                      kfree(item);
 +      }
 +}
 +
 +struct btrfs_delayed_item *__btrfs_first_delayed_insertion_item(
 +                                      struct btrfs_delayed_node *delayed_node)
 +{
 +      struct rb_node *p;
 +      struct btrfs_delayed_item *item = NULL;
 +
 +      p = rb_first(&delayed_node->ins_root);
 +      if (p)
 +              item = rb_entry(p, struct btrfs_delayed_item, rb_node);
 +
 +      return item;
 +}
 +
 +struct btrfs_delayed_item *__btrfs_first_delayed_deletion_item(
 +                                      struct btrfs_delayed_node *delayed_node)
 +{
 +      struct rb_node *p;
 +      struct btrfs_delayed_item *item = NULL;
 +
 +      p = rb_first(&delayed_node->del_root);
 +      if (p)
 +              item = rb_entry(p, struct btrfs_delayed_item, rb_node);
 +
 +      return item;
 +}
 +
 +struct btrfs_delayed_item *__btrfs_next_delayed_item(
 +                                              struct btrfs_delayed_item *item)
 +{
 +      struct rb_node *p;
 +      struct btrfs_delayed_item *next = NULL;
 +
 +      p = rb_next(&item->rb_node);
 +      if (p)
 +              next = rb_entry(p, struct btrfs_delayed_item, rb_node);
 +
 +      return next;
 +}
 +
 +static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
 +                                                      struct inode *inode)
 +{
 +      struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
 +      struct btrfs_delayed_node *delayed_node;
 +
 +      delayed_node = btrfs_inode->delayed_node;
 +      if (delayed_node)
 +              atomic_inc(&delayed_node->refs);
 +
 +      return delayed_node;
 +}
 +
 +static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
 +                                                 u64 root_id)
 +{
 +      struct btrfs_key root_key;
 +
 +      if (root->objectid == root_id)
 +              return root;
 +
 +      root_key.objectid = root_id;
 +      root_key.type = BTRFS_ROOT_ITEM_KEY;
 +      root_key.offset = (u64)-1;
 +      return btrfs_read_fs_root_no_name(root->fs_info, &root_key);
 +}
 +
 +static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
 +                                             struct btrfs_root *root,
 +                                             struct btrfs_delayed_item *item)
 +{
 +      struct btrfs_block_rsv *src_rsv;
 +      struct btrfs_block_rsv *dst_rsv;
 +      u64 num_bytes;
 +      int ret;
 +
 +      if (!trans->bytes_reserved)
 +              return 0;
 +
 +      src_rsv = trans->block_rsv;
 +      dst_rsv = &root->fs_info->global_block_rsv;
 +
 +      num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 +      ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 +      if (!ret) {
 +              item->bytes_reserved = num_bytes;
 +              item->block_rsv = dst_rsv;
 +      }
 +
 +      return ret;
 +}
 +
 +static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
 +                                              struct btrfs_delayed_item *item)
 +{
 +      if (!item->bytes_reserved)
 +              return;
 +
 +      btrfs_block_rsv_release(root, item->block_rsv,
 +                              item->bytes_reserved);
 +}
 +
 +static int btrfs_delayed_inode_reserve_metadata(
 +                                      struct btrfs_trans_handle *trans,
 +                                      struct btrfs_root *root,
 +                                      struct btrfs_delayed_node *node)
 +{
 +      struct btrfs_block_rsv *src_rsv;
 +      struct btrfs_block_rsv *dst_rsv;
 +      u64 num_bytes;
 +      int ret;
 +
 +      if (!trans->bytes_reserved)
 +              return 0;
 +
 +      src_rsv = trans->block_rsv;
 +      dst_rsv = &root->fs_info->global_block_rsv;
 +
 +      num_bytes = btrfs_calc_trans_metadata_size(root, 1);
 +      ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
 +      if (!ret)
 +              node->bytes_reserved = num_bytes;
 +
 +      return ret;
 +}
 +
 +static void btrfs_delayed_inode_release_metadata(struct btrfs_root *root,
 +                                              struct btrfs_delayed_node *node)
 +{
 +      struct btrfs_block_rsv *rsv;
 +
 +      if (!node->bytes_reserved)
 +              return;
 +
 +      rsv = &root->fs_info->global_block_rsv;
 +      btrfs_block_rsv_release(root, rsv,
 +                              node->bytes_reserved);
 +      node->bytes_reserved = 0;
 +}
 +
 +/*
 + * This helper will insert some continuous items into the same leaf according
 + * to the free space of the leaf.
 + */
 +static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
 +                              struct btrfs_root *root,
 +                              struct btrfs_path *path,
 +                              struct btrfs_delayed_item *item)
 +{
 +      struct btrfs_delayed_item *curr, *next;
 +      int free_space;
 +      int total_data_size = 0, total_size = 0;
 +      struct extent_buffer *leaf;
 +      char *data_ptr;
 +      struct btrfs_key *keys;
 +      u32 *data_size;
 +      struct list_head head;
 +      int slot;
 +      int nitems;
 +      int i;
 +      int ret = 0;
 +
 +      BUG_ON(!path->nodes[0]);
 +
 +      leaf = path->nodes[0];
 +      free_space = btrfs_leaf_free_space(root, leaf);
 +      INIT_LIST_HEAD(&head);
 +
 +      next = item;
 +
 +      /*
 +       * count the number of the continuous items that we can insert in batch
 +       */
 +      while (total_size + next->data_len + sizeof(struct btrfs_item) <=
 +             free_space) {
 +              total_data_size += next->data_len;
 +              total_size += next->data_len + sizeof(struct btrfs_item);
 +              list_add_tail(&next->tree_list, &head);
 +              nitems++;
 +
 +              curr = next;
 +              next = __btrfs_next_delayed_item(curr);
 +              if (!next)
 +                      break;
 +
 +              if (!btrfs_is_continuous_delayed_item(curr, next))
 +                      break;
 +      }
 +
 +      if (!nitems) {
 +              ret = 0;
 +              goto out;
 +      }
 +
 +      /*
 +       * we need allocate some memory space, but it might cause the task
 +       * to sleep, so we set all locked nodes in the path to blocking locks
 +       * first.
 +       */
 +      btrfs_set_path_blocking(path);
 +
 +      keys = kmalloc(sizeof(struct btrfs_key) * nitems, GFP_NOFS);
 +      if (!keys) {
 +              ret = -ENOMEM;
 +              goto out;
 +      }
 +
 +      data_size = kmalloc(sizeof(u32) * nitems, GFP_NOFS);
 +      if (!data_size) {
 +              ret = -ENOMEM;
 +              goto error;
 +      }
 +
 +      /* get keys of all the delayed items */
 +      i = 0;
 +      list_for_each_entry(next, &head, tree_list) {
 +              keys[i] = next->key;
 +              data_size[i] = next->data_len;
 +              i++;
 +      }
 +
 +      /* reset all the locked nodes in the patch to spinning locks. */
 +      btrfs_clear_path_blocking(path, NULL);
 +
 +      /* insert the keys of the items */
 +      ret = setup_items_for_insert(trans, root, path, keys, data_size,
 +                                   total_data_size, total_size, nitems);
 +      if (ret)
 +              goto error;
 +
 +      /* insert the dir index items */
 +      slot = path->slots[0];
 +      list_for_each_entry_safe(curr, next, &head, tree_list) {
 +              data_ptr = btrfs_item_ptr(leaf, slot, char);
 +              write_extent_buffer(leaf, &curr->data,
 +                                  (unsigned long)data_ptr,
 +                                  curr->data_len);
 +              slot++;
 +
 +              btrfs_delayed_item_release_metadata(root, curr);
 +
 +              list_del(&curr->tree_list);
 +              btrfs_release_delayed_item(curr);
 +      }
 +
 +error:
 +      kfree(data_size);
 +      kfree(keys);
 +out:
 +      return ret;
 +}
 +
 +/*
 + * This helper can just do simple insertion that needn't extend item for new
 + * data, such as directory name index insertion, inode insertion.
 + */
 +static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
 +                                   struct btrfs_root *root,
 +                                   struct btrfs_path *path,
 +                                   struct btrfs_delayed_item *delayed_item)
 +{
 +      struct extent_buffer *leaf;
 +      struct btrfs_item *item;
 +      char *ptr;
 +      int ret;
 +
 +      ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
 +                                    delayed_item->data_len);
 +      if (ret < 0 && ret != -EEXIST)
 +              return ret;
 +
 +      leaf = path->nodes[0];
 +
 +      item = btrfs_item_nr(leaf, path->slots[0]);
 +      ptr = btrfs_item_ptr(leaf, path->slots[0], char);
 +
 +      write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
 +                          delayed_item->data_len);
 +      btrfs_mark_buffer_dirty(leaf);
 +
 +      btrfs_delayed_item_release_metadata(root, delayed_item);
 +      return 0;
 +}
 +
 +/*
 + * we insert an item first, then if there are some continuous items, we try
 + * to insert those items into the same leaf.
 + */
 +static int btrfs_insert_delayed_items(struct btrfs_trans_handle *trans,
 +                                    struct btrfs_path *path,
 +                                    struct btrfs_root *root,
 +                                    struct btrfs_delayed_node *node)
 +{
 +      struct btrfs_delayed_item *curr, *prev;
 +      int ret = 0;
 +
 +do_again:
 +      mutex_lock(&node->mutex);
 +      curr = __btrfs_first_delayed_insertion_item(node);
 +      if (!curr)
 +              goto insert_end;
 +
 +      ret = btrfs_insert_delayed_item(trans, root, path, curr);
 +      if (ret < 0) {
 +              btrfs_release_path(path);
 +              goto insert_end;
 +      }
 +
 +      prev = curr;
 +      curr = __btrfs_next_delayed_item(prev);
 +      if (curr && btrfs_is_continuous_delayed_item(prev, curr)) {
 +              /* insert the continuous items into the same leaf */
 +              path->slots[0]++;
 +              btrfs_batch_insert_items(trans, root, path, curr);
 +      }
 +      btrfs_release_delayed_item(prev);
 +      btrfs_mark_buffer_dirty(path->nodes[0]);
 +
 +      btrfs_release_path(path);
 +      mutex_unlock(&node->mutex);
 +      goto do_again;
 +
 +insert_end:
 +      mutex_unlock(&node->mutex);
 +      return ret;
 +}
 +
 +static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
 +                                  struct btrfs_root *root,
 +                                  struct btrfs_path *path,
 +                                  struct btrfs_delayed_item *item)
 +{
 +      struct btrfs_delayed_item *curr, *next;
 +      struct extent_buffer *leaf;
 +      struct btrfs_key key;
 +      struct list_head head;
 +      int nitems, i, last_item;
 +      int ret = 0;
 +
 +      BUG_ON(!path->nodes[0]);
 +
 +      leaf = path->nodes[0];
 +
 +      i = path->slots[0];
 +      last_item = btrfs_header_nritems(leaf) - 1;
 +      if (i > last_item)
 +              return -ENOENT; /* FIXME: Is errno suitable? */
 +
 +      next = item;
 +      INIT_LIST_HEAD(&head);
 +      btrfs_item_key_to_cpu(leaf, &key, i);
 +      nitems = 0;
 +      /*
 +       * count the number of the dir index items that we can delete in batch
 +       */
 +      while (btrfs_comp_cpu_keys(&next->key, &key) == 0) {
 +              list_add_tail(&next->tree_list, &head);
 +              nitems++;
 +
 +              curr = next;
 +              next = __btrfs_next_delayed_item(curr);
 +              if (!next)
 +                      break;
 +
 +              if (!btrfs_is_continuous_delayed_item(curr, next))
 +                      break;
 +
 +              i++;
 +              if (i > last_item)
 +                      break;
 +              btrfs_item_key_to_cpu(leaf, &key, i);
 +      }
 +
 +      if (!nitems)
 +              return 0;
 +
 +      ret = btrfs_del_items(trans, root, path, path->slots[0], nitems);
 +      if (ret)
 +              goto out;
 +
 +      list_for_each_entry_safe(curr, next, &head, tree_list) {
 +              btrfs_delayed_item_release_metadata(root, curr);
 +              list_del(&curr->tree_list);
 +              btrfs_release_delayed_item(curr);
 +      }
 +
 +out:
 +      return ret;
 +}
 +
 +static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
 +                                    struct btrfs_path *path,
 +                                    struct btrfs_root *root,
 +                                    struct btrfs_delayed_node *node)
 +{
 +      struct btrfs_delayed_item *curr, *prev;
 +      int ret = 0;
 +
 +do_again:
 +      mutex_lock(&node->mutex);
 +      curr = __btrfs_first_delayed_deletion_item(node);
 +      if (!curr)
 +              goto delete_fail;
 +
 +      ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
 +      if (ret < 0)
 +              goto delete_fail;
 +      else if (ret > 0) {
 +              /*
 +               * can't find the item which the node points to, so this node
 +               * is invalid, just drop it.
 +               */
 +              prev = curr;
 +              curr = __btrfs_next_delayed_item(prev);
 +              btrfs_release_delayed_item(prev);
 +              ret = 0;
 +              btrfs_release_path(path);
 +              if (curr)
 +                      goto do_again;
 +              else
 +                      goto delete_fail;
 +      }
 +
 +      btrfs_batch_delete_items(trans, root, path, curr);
 +      btrfs_release_path(path);
 +      mutex_unlock(&node->mutex);
 +      goto do_again;
 +
 +delete_fail:
 +      btrfs_release_path(path);
 +      mutex_unlock(&node->mutex);
 +      return ret;
 +}
 +
 +static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
 +{
 +      struct btrfs_delayed_root *delayed_root;
 +
 +      if (delayed_node && delayed_node->inode_dirty) {
 +              BUG_ON(!delayed_node->root);
 +              delayed_node->inode_dirty = 0;
 +              delayed_node->count--;
 +
 +              delayed_root = delayed_node->root->fs_info->delayed_root;
 +              atomic_dec(&delayed_root->items);
 +              if (atomic_read(&delayed_root->items) <
 +                  BTRFS_DELAYED_BACKGROUND &&
 +                  waitqueue_active(&delayed_root->wait))
 +                      wake_up(&delayed_root->wait);
 +      }
 +}
 +
 +static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
 +                                    struct btrfs_root *root,
 +                                    struct btrfs_path *path,
 +                                    struct btrfs_delayed_node *node)
 +{
 +      struct btrfs_key key;
 +      struct btrfs_inode_item *inode_item;
 +      struct extent_buffer *leaf;
 +      int ret;
 +
 +      mutex_lock(&node->mutex);
 +      if (!node->inode_dirty) {
 +              mutex_unlock(&node->mutex);
 +              return 0;
 +      }
 +
 +      key.objectid = node->inode_id;
 +      btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
 +      key.offset = 0;
 +      ret = btrfs_lookup_inode(trans, root, path, &key, 1);
 +      if (ret > 0) {
 +              btrfs_release_path(path);
 +              mutex_unlock(&node->mutex);
 +              return -ENOENT;
 +      } else if (ret < 0) {
 +              mutex_unlock(&node->mutex);
 +              return ret;
 +      }
 +
 +      btrfs_unlock_up_safe(path, 1);
 +      leaf = path->nodes[0];
 +      inode_item = btrfs_item_ptr(leaf, path->slots[0],
 +                                  struct btrfs_inode_item);
 +      write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
 +                          sizeof(struct btrfs_inode_item));
 +      btrfs_mark_buffer_dirty(leaf);
 +      btrfs_release_path(path);
 +
 +      btrfs_delayed_inode_release_metadata(root, node);
 +      btrfs_release_delayed_inode(node);
 +      mutex_unlock(&node->mutex);
 +
 +      return 0;
 +}
 +
 +/* Called when committing the transaction. */
 +int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
 +                          struct btrfs_root *root)
 +{
 +      struct btrfs_delayed_root *delayed_root;
 +      struct btrfs_delayed_node *curr_node, *prev_node;
 +      struct btrfs_path *path;
 +      int ret = 0;
 +
 +      path = btrfs_alloc_path();
 +      if (!path)
 +              return -ENOMEM;
 +      path->leave_spinning = 1;
 +
 +      delayed_root = btrfs_get_delayed_root(root);
 +
 +      curr_node = btrfs_first_delayed_node(delayed_root);
 +      while (curr_node) {
 +              root = curr_node->root;
 +              ret = btrfs_insert_delayed_items(trans, path, root,
 +                                               curr_node);
 +              if (!ret)
 +                      ret = btrfs_delete_delayed_items(trans, path, root,
 +                                                       curr_node);
 +              if (!ret)
 +                      ret = btrfs_update_delayed_inode(trans, root, path,
 +                                                       curr_node);
 +              if (ret) {
 +                      btrfs_release_delayed_node(curr_node);
 +                      break;
 +              }
 +
 +              prev_node = curr_node;
 +              curr_node = btrfs_next_delayed_node(curr_node);
 +              btrfs_release_delayed_node(prev_node);
 +      }
 +
 +      btrfs_free_path(path);
 +      return ret;
 +}
 +
 +static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 +                                            struct btrfs_delayed_node *node)
 +{
 +      struct btrfs_path *path;
 +      int ret;
 +
 +      path = btrfs_alloc_path();
 +      if (!path)
 +              return -ENOMEM;
 +      path->leave_spinning = 1;
 +
 +      ret = btrfs_insert_delayed_items(trans, path, node->root, node);
 +      if (!ret)
 +              ret = btrfs_delete_delayed_items(trans, path, node->root, node);
 +      if (!ret)
 +              ret = btrfs_update_delayed_inode(trans, node->root, path, node);
 +      btrfs_free_path(path);
 +
 +      return ret;
 +}
 +
 +int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
 +                                   struct inode *inode)
 +{
 +      struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
 +      int ret;
 +
 +      if (!delayed_node)
 +              return 0;
 +
 +      mutex_lock(&delayed_node->mutex);
 +      if (!delayed_node->count) {
 +              mutex_unlock(&delayed_node->mutex);
 +              btrfs_release_delayed_node(delayed_node);
 +              return 0;
 +      }
 +      mutex_unlock(&delayed_node->mutex);
 +
 +      ret = __btrfs_commit_inode_delayed_items(trans, delayed_node);
 +      btrfs_release_delayed_node(delayed_node);
 +      return ret;
 +}
 +
 +void btrfs_remove_delayed_node(struct inode *inode)
 +{
 +      struct btrfs_delayed_node *delayed_node;
 +
 +      delayed_node = ACCESS_ONCE(BTRFS_I(inode)->delayed_node);
 +      if (!delayed_node)
 +              return;
 +
 +      BTRFS_I(inode)->delayed_node = NULL;
 +      btrfs_release_delayed_node(delayed_node);
 +}
 +
 +struct btrfs_async_delayed_node {
 +      struct btrfs_root *root;
 +      struct btrfs_delayed_node *delayed_node;
 +      struct btrfs_work work;
 +};
 +
 +static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
 +{
 +      struct btrfs_async_delayed_node *async_node;
 +      struct btrfs_trans_handle *trans;
 +      struct btrfs_path *path;
 +      struct btrfs_delayed_node *delayed_node = NULL;
 +      struct btrfs_root *root;
 +      unsigned long nr = 0;
 +      int need_requeue = 0;
 +      int ret;
 +
 +      async_node = container_of(work, struct btrfs_async_delayed_node, work);
 +
 +      path = btrfs_alloc_path();
 +      if (!path)
 +              goto out;
 +      path->leave_spinning = 1;
 +
 +      delayed_node = async_node->delayed_node;
 +      root = delayed_node->root;
 +
-       btrfs_set_stack_inode_block_group(inode_item,
-                                         BTRFS_I(inode)->block_group);
++      trans = btrfs_join_transaction(root);
 +      if (IS_ERR(trans))
 +              goto free_path;
 +
 +      ret = btrfs_insert_delayed_items(trans, path, root, delayed_node);
 +      if (!ret)
 +              ret = btrfs_delete_delayed_items(trans, path, root,
 +                                               delayed_node);
 +
 +      if (!ret)
 +              btrfs_update_delayed_inode(trans, root, path, delayed_node);
 +
 +      /*
 +       * Maybe new delayed items have been inserted, so we need requeue
 +       * the work. Besides that, we must dequeue the empty delayed nodes
 +       * to avoid the race between delayed items balance and the worker.
 +       * The race like this:
 +       *      Task1                           Worker thread
 +       *                                      count == 0, needn't requeue
 +       *                                        also needn't insert the
 +       *                                        delayed node into prepare
 +       *                                        list again.
 +       *      add lots of delayed items
 +       *      queue the delayed node
 +       *        already in the list,
 +       *        and not in the prepare
 +       *        list, it means the delayed
 +       *        node is being dealt with
 +       *        by the worker.
 +       *      do delayed items balance
 +       *        the delayed node is being
 +       *        dealt with by the worker
 +       *        now, just wait.
 +       *                                      the worker goto idle.
 +       * Task1 will sleep until the transaction is commited.
 +       */
 +      mutex_lock(&delayed_node->mutex);
 +      if (delayed_node->count)
 +              need_requeue = 1;
 +      else
 +              btrfs_dequeue_delayed_node(root->fs_info->delayed_root,
 +                                         delayed_node);
 +      mutex_unlock(&delayed_node->mutex);
 +
 +      nr = trans->blocks_used;
 +
 +      btrfs_end_transaction_dmeta(trans, root);
 +      __btrfs_btree_balance_dirty(root, nr);
 +free_path:
 +      btrfs_free_path(path);
 +out:
 +      if (need_requeue)
 +              btrfs_requeue_work(&async_node->work);
 +      else {
 +              btrfs_release_prepared_delayed_node(delayed_node);
 +              kfree(async_node);
 +      }
 +}
 +
 +static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
 +                                   struct btrfs_root *root, int all)
 +{
 +      struct btrfs_async_delayed_node *async_node;
 +      struct btrfs_delayed_node *curr;
 +      int count = 0;
 +
 +again:
 +      curr = btrfs_first_prepared_delayed_node(delayed_root);
 +      if (!curr)
 +              return 0;
 +
 +      async_node = kmalloc(sizeof(*async_node), GFP_NOFS);
 +      if (!async_node) {
 +              btrfs_release_prepared_delayed_node(curr);
 +              return -ENOMEM;
 +      }
 +
 +      async_node->root = root;
 +      async_node->delayed_node = curr;
 +
 +      async_node->work.func = btrfs_async_run_delayed_node_done;
 +      async_node->work.flags = 0;
 +
 +      btrfs_queue_worker(&root->fs_info->delayed_workers, &async_node->work);
 +      count++;
 +
 +      if (all || count < 4)
 +              goto again;
 +
 +      return 0;
 +}
 +
 +void btrfs_balance_delayed_items(struct btrfs_root *root)
 +{
 +      struct btrfs_delayed_root *delayed_root;
 +
 +      delayed_root = btrfs_get_delayed_root(root);
 +
 +      if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
 +              return;
 +
 +      if (atomic_read(&delayed_root->items) >= BTRFS_DELAYED_WRITEBACK) {
 +              int ret;
 +              ret = btrfs_wq_run_delayed_node(delayed_root, root, 1);
 +              if (ret)
 +                      return;
 +
 +              wait_event_interruptible_timeout(
 +                              delayed_root->wait,
 +                              (atomic_read(&delayed_root->items) <
 +                               BTRFS_DELAYED_BACKGROUND),
 +                              HZ);
 +              return;
 +      }
 +
 +      btrfs_wq_run_delayed_node(delayed_root, root, 0);
 +}
 +
 +int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 +                                 struct btrfs_root *root, const char *name,
 +                                 int name_len, struct inode *dir,
 +                                 struct btrfs_disk_key *disk_key, u8 type,
 +                                 u64 index)
 +{
 +      struct btrfs_delayed_node *delayed_node;
 +      struct btrfs_delayed_item *delayed_item;
 +      struct btrfs_dir_item *dir_item;
 +      int ret;
 +
 +      delayed_node = btrfs_get_or_create_delayed_node(dir);
 +      if (IS_ERR(delayed_node))
 +              return PTR_ERR(delayed_node);
 +
 +      delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
 +      if (!delayed_item) {
 +              ret = -ENOMEM;
 +              goto release_node;
 +      }
 +
 +      ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
 +      /*
 +       * we have reserved enough space when we start a new transaction,
 +       * so reserving metadata failure is impossible
 +       */
 +      BUG_ON(ret);
 +
 +      delayed_item->key.objectid = btrfs_ino(dir);
 +      btrfs_set_key_type(&delayed_item->key, BTRFS_DIR_INDEX_KEY);
 +      delayed_item->key.offset = index;
 +
 +      dir_item = (struct btrfs_dir_item *)delayed_item->data;
 +      dir_item->location = *disk_key;
 +      dir_item->transid = cpu_to_le64(trans->transid);
 +      dir_item->data_len = 0;
 +      dir_item->name_len = cpu_to_le16(name_len);
 +      dir_item->type = type;
 +      memcpy((char *)(dir_item + 1), name, name_len);
 +
 +      mutex_lock(&delayed_node->mutex);
 +      ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
 +      if (unlikely(ret)) {
 +              printk(KERN_ERR "err add delayed dir index item(name: %s) into "
 +                              "the insertion tree of the delayed node"
 +                              "(root id: %llu, inode id: %llu, errno: %d)\n",
 +                              name,
 +                              (unsigned long long)delayed_node->root->objectid,
 +                              (unsigned long long)delayed_node->inode_id,
 +                              ret);
 +              BUG();
 +      }
 +      mutex_unlock(&delayed_node->mutex);
 +
 +release_node:
 +      btrfs_release_delayed_node(delayed_node);
 +      return ret;
 +}
 +
 +static int btrfs_delete_delayed_insertion_item(struct btrfs_root *root,
 +                                             struct btrfs_delayed_node *node,
 +                                             struct btrfs_key *key)
 +{
 +      struct btrfs_delayed_item *item;
 +
 +      mutex_lock(&node->mutex);
 +      item = __btrfs_lookup_delayed_insertion_item(node, key);
 +      if (!item) {
 +              mutex_unlock(&node->mutex);
 +              return 1;
 +      }
 +
 +      btrfs_delayed_item_release_metadata(root, item);
 +      btrfs_release_delayed_item(item);
 +      mutex_unlock(&node->mutex);
 +      return 0;
 +}
 +
 +int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
 +                                 struct btrfs_root *root, struct inode *dir,
 +                                 u64 index)
 +{
 +      struct btrfs_delayed_node *node;
 +      struct btrfs_delayed_item *item;
 +      struct btrfs_key item_key;
 +      int ret;
 +
 +      node = btrfs_get_or_create_delayed_node(dir);
 +      if (IS_ERR(node))
 +              return PTR_ERR(node);
 +
 +      item_key.objectid = btrfs_ino(dir);
 +      btrfs_set_key_type(&item_key, BTRFS_DIR_INDEX_KEY);
 +      item_key.offset = index;
 +
 +      ret = btrfs_delete_delayed_insertion_item(root, node, &item_key);
 +      if (!ret)
 +              goto end;
 +
 +      item = btrfs_alloc_delayed_item(0);
 +      if (!item) {
 +              ret = -ENOMEM;
 +              goto end;
 +      }
 +
 +      item->key = item_key;
 +
 +      ret = btrfs_delayed_item_reserve_metadata(trans, root, item);
 +      /*
 +       * we have reserved enough space when we start a new transaction,
 +       * so reserving metadata failure is impossible.
 +       */
 +      BUG_ON(ret);
 +
 +      mutex_lock(&node->mutex);
 +      ret = __btrfs_add_delayed_deletion_item(node, item);
 +      if (unlikely(ret)) {
 +              printk(KERN_ERR "err add delayed dir index item(index: %llu) "
 +                              "into the deletion tree of the delayed node"
 +                              "(root id: %llu, inode id: %llu, errno: %d)\n",
 +                              (unsigned long long)index,
 +                              (unsigned long long)node->root->objectid,
 +                              (unsigned long long)node->inode_id,
 +                              ret);
 +              BUG();
 +      }
 +      mutex_unlock(&node->mutex);
 +end:
 +      btrfs_release_delayed_node(node);
 +      return ret;
 +}
 +
 +int btrfs_inode_delayed_dir_index_count(struct inode *inode)
 +{
 +      struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
 +      int ret = 0;
 +
 +      if (!delayed_node)
 +              return -ENOENT;
 +
 +      /*
 +       * Since we have held i_mutex of this directory, it is impossible that
 +       * a new directory index is added into the delayed node and index_cnt
 +       * is updated now. So we needn't lock the delayed node.
 +       */
 +      if (!delayed_node->index_cnt)
 +              return -EINVAL;
 +
 +      BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
 +      return ret;
 +}
 +
 +void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
 +                           struct list_head *del_list)
 +{
 +      struct btrfs_delayed_node *delayed_node;
 +      struct btrfs_delayed_item *item;
 +
 +      delayed_node = btrfs_get_delayed_node(inode);
 +      if (!delayed_node)
 +              return;
 +
 +      mutex_lock(&delayed_node->mutex);
 +      item = __btrfs_first_delayed_insertion_item(delayed_node);
 +      while (item) {
 +              atomic_inc(&item->refs);
 +              list_add_tail(&item->readdir_list, ins_list);
 +              item = __btrfs_next_delayed_item(item);
 +      }
 +
 +      item = __btrfs_first_delayed_deletion_item(delayed_node);
 +      while (item) {
 +              atomic_inc(&item->refs);
 +              list_add_tail(&item->readdir_list, del_list);
 +              item = __btrfs_next_delayed_item(item);
 +      }
 +      mutex_unlock(&delayed_node->mutex);
 +      /*
 +       * This delayed node is still cached in the btrfs inode, so refs
 +       * must be > 1 now, and we needn't check it is going to be freed
 +       * or not.
 +       *
 +       * Besides that, this function is used to read dir, we do not
 +       * insert/delete delayed items in this period. So we also needn't
 +       * requeue or dequeue this delayed node.
 +       */
 +      atomic_dec(&delayed_node->refs);
 +}
 +
 +void btrfs_put_delayed_items(struct list_head *ins_list,
 +                           struct list_head *del_list)
 +{
 +      struct btrfs_delayed_item *curr, *next;
 +
 +      list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
 +              list_del(&curr->readdir_list);
 +              if (atomic_dec_and_test(&curr->refs))
 +                      kfree(curr);
 +      }
 +
 +      list_for_each_entry_safe(curr, next, del_list, readdir_list) {
 +              list_del(&curr->readdir_list);
 +              if (atomic_dec_and_test(&curr->refs))
 +                      kfree(curr);
 +      }
 +}
 +
 +int btrfs_should_delete_dir_index(struct list_head *del_list,
 +                                u64 index)
 +{
 +      struct btrfs_delayed_item *curr, *next;
 +      int ret;
 +
 +      if (list_empty(del_list))
 +              return 0;
 +
 +      list_for_each_entry_safe(curr, next, del_list, readdir_list) {
 +              if (curr->key.offset > index)
 +                      break;
 +
 +              list_del(&curr->readdir_list);
 +              ret = (curr->key.offset == index);
 +
 +              if (atomic_dec_and_test(&curr->refs))
 +                      kfree(curr);
 +
 +              if (ret)
 +                      return 1;
 +              else
 +                      continue;
 +      }
 +      return 0;
 +}
 +
 +/*
 + * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree
 + *
 + */
 +int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent,
 +                                  filldir_t filldir,
 +                                  struct list_head *ins_list)
 +{
 +      struct btrfs_dir_item *di;
 +      struct btrfs_delayed_item *curr, *next;
 +      struct btrfs_key location;
 +      char *name;
 +      int name_len;
 +      int over = 0;
 +      unsigned char d_type;
 +
 +      if (list_empty(ins_list))
 +              return 0;
 +
 +      /*
 +       * Changing the data of the delayed item is impossible. So
 +       * we needn't lock them. And we have held i_mutex of the
 +       * directory, nobody can delete any directory indexes now.
 +       */
 +      list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
 +              list_del(&curr->readdir_list);
 +
 +              if (curr->key.offset < filp->f_pos) {
 +                      if (atomic_dec_and_test(&curr->refs))
 +                              kfree(curr);
 +                      continue;
 +              }
 +
 +              filp->f_pos = curr->key.offset;
 +
 +              di = (struct btrfs_dir_item *)curr->data;
 +              name = (char *)(di + 1);
 +              name_len = le16_to_cpu(di->name_len);
 +
 +              d_type = btrfs_filetype_table[di->type];
 +              btrfs_disk_key_to_cpu(&location, &di->location);
 +
 +              over = filldir(dirent, name, name_len, curr->key.offset,
 +                             location.objectid, d_type);
 +
 +              if (atomic_dec_and_test(&curr->refs))
 +                      kfree(curr);
 +
 +              if (over)
 +                      return 1;
 +      }
 +      return 0;
 +}
 +
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
 +                       generation, 64);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
 +                       sequence, 64);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
 +                       transid, 64);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
 +                       nbytes, 64);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
 +                       block_group, 64);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
 +BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
 +
 +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
 +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
 +
 +static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
 +                                struct btrfs_inode_item *inode_item,
 +                                struct inode *inode)
 +{
 +      btrfs_set_stack_inode_uid(inode_item, inode->i_uid);
 +      btrfs_set_stack_inode_gid(inode_item, inode->i_gid);
 +      btrfs_set_stack_inode_size(inode_item, BTRFS_I(inode)->disk_i_size);
 +      btrfs_set_stack_inode_mode(inode_item, inode->i_mode);
 +      btrfs_set_stack_inode_nlink(inode_item, inode->i_nlink);
 +      btrfs_set_stack_inode_nbytes(inode_item, inode_get_bytes(inode));
 +      btrfs_set_stack_inode_generation(inode_item,
 +                                       BTRFS_I(inode)->generation);
 +      btrfs_set_stack_inode_sequence(inode_item, BTRFS_I(inode)->sequence);
 +      btrfs_set_stack_inode_transid(inode_item, trans->transid);
 +      btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
 +      btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
++      btrfs_set_stack_inode_block_group(inode_item, 0);
 +
 +      btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
 +                                   inode->i_atime.tv_sec);
 +      btrfs_set_stack_timespec_nsec(btrfs_inode_atime(inode_item),
 +                                    inode->i_atime.tv_nsec);
 +
 +      btrfs_set_stack_timespec_sec(btrfs_inode_mtime(inode_item),
 +                                   inode->i_mtime.tv_sec);
 +      btrfs_set_stack_timespec_nsec(btrfs_inode_mtime(inode_item),
 +                                    inode->i_mtime.tv_nsec);
 +
 +      btrfs_set_stack_timespec_sec(btrfs_inode_ctime(inode_item),
 +                                   inode->i_ctime.tv_sec);
 +      btrfs_set_stack_timespec_nsec(btrfs_inode_ctime(inode_item),
 +                                    inode->i_ctime.tv_nsec);
 +}
 +
 +int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
 +                             struct btrfs_root *root, struct inode *inode)
 +{
 +      struct btrfs_delayed_node *delayed_node;
 +      int ret;
 +
 +      delayed_node = btrfs_get_or_create_delayed_node(inode);
 +      if (IS_ERR(delayed_node))
 +              return PTR_ERR(delayed_node);
 +
 +      mutex_lock(&delayed_node->mutex);
 +      if (delayed_node->inode_dirty) {
 +              fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
 +              goto release_node;
 +      }
 +
 +      ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
 +      /*
 +       * we must reserve enough space when we start a new transaction,
 +       * so reserving metadata failure is impossible
 +       */
 +      BUG_ON(ret);
 +
 +      fill_stack_inode_item(trans, &delayed_node->inode_item, inode);
 +      delayed_node->inode_dirty = 1;
 +      delayed_node->count++;
 +      atomic_inc(&root->fs_info->delayed_root->items);
 +release_node:
 +      mutex_unlock(&delayed_node->mutex);
 +      btrfs_release_delayed_node(delayed_node);
 +      return ret;
 +}
 +
 +static void __btrfs_kill_delayed_node(struct btrfs_delayed_node *delayed_node)
 +{
 +      struct btrfs_root *root = delayed_node->root;
 +      struct btrfs_delayed_item *curr_item, *prev_item;
 +
 +      mutex_lock(&delayed_node->mutex);
 +      curr_item = __btrfs_first_delayed_insertion_item(delayed_node);
 +      while (curr_item) {
 +              btrfs_delayed_item_release_metadata(root, curr_item);
 +              prev_item = curr_item;
 +              curr_item = __btrfs_next_delayed_item(prev_item);
 +              btrfs_release_delayed_item(prev_item);
 +      }
 +
 +      curr_item = __btrfs_first_delayed_deletion_item(delayed_node);
 +      while (curr_item) {
 +              btrfs_delayed_item_release_metadata(root, curr_item);
 +              prev_item = curr_item;
 +              curr_item = __btrfs_next_delayed_item(prev_item);
 +              btrfs_release_delayed_item(prev_item);
 +      }
 +
 +      if (delayed_node->inode_dirty) {
 +              btrfs_delayed_inode_release_metadata(root, delayed_node);
 +              btrfs_release_delayed_inode(delayed_node);
 +      }
 +      mutex_unlock(&delayed_node->mutex);
 +}
 +
 +void btrfs_kill_delayed_inode_items(struct inode *inode)
 +{
 +      struct btrfs_delayed_node *delayed_node;
 +
 +      delayed_node = btrfs_get_delayed_node(inode);
 +      if (!delayed_node)
 +              return;
 +
 +      __btrfs_kill_delayed_node(delayed_node);
 +      btrfs_release_delayed_node(delayed_node);
 +}
 +
 +void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
 +{
 +      u64 inode_id = 0;
 +      struct btrfs_delayed_node *delayed_nodes[8];
 +      int i, n;
 +
 +      while (1) {
 +              spin_lock(&root->inode_lock);
 +              n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
 +                                         (void **)delayed_nodes, inode_id,
 +                                         ARRAY_SIZE(delayed_nodes));
 +              if (!n) {
 +                      spin_unlock(&root->inode_lock);
 +                      break;
 +              }
 +
 +              inode_id = delayed_nodes[n - 1]->inode_id + 1;
 +
 +              for (i = 0; i < n; i++)
 +                      atomic_inc(&delayed_nodes[i]->refs);
 +              spin_unlock(&root->inode_lock);
 +
 +              for (i = 0; i < n; i++) {
 +                      __btrfs_kill_delayed_node(delayed_nodes[i]);
 +                      btrfs_release_delayed_node(delayed_nodes[i]);
 +              }
 +      }
 +}
@@@ -1644,7 -1687,7 +1644,8 @@@ struct btrfs_root *open_ctree(struct su
        fs_info->sb = sb;
        fs_info->max_inline = 8192 * 1024;
        fs_info->metadata_ratio = 0;
 +      fs_info->defrag_inodes = RB_ROOT;
+       fs_info->trans_no_join = 0;
  
        fs_info->thread_pool_size = min_t(unsigned long,
                                          num_online_cpus() + 2, 8);
@@@ -379,15 -378,18 +379,18 @@@ again
                        if (ret)
                                break;
  
-                       caching_ctl->progress = last;
-                       btrfs_release_path(path);
-                       up_read(&fs_info->extent_commit_sem);
-                       mutex_unlock(&caching_ctl->mutex);
-                       if (btrfs_transaction_in_commit(fs_info))
-                               schedule_timeout(1);
-                       else
+                       if (need_resched() ||
+                           btrfs_next_leaf(extent_root, path)) {
+                               caching_ctl->progress = last;
 -                              btrfs_release_path(extent_root, path);
++                              btrfs_release_path(path);
+                               up_read(&fs_info->extent_commit_sem);
+                               mutex_unlock(&caching_ctl->mutex);
                                cond_resched();
-                       goto again;
+                               goto again;
+                       }
+                       leaf = path->nodes[0];
+                       nritems = btrfs_header_nritems(leaf);
+                       continue;
                }
  
                if (key.objectid < block_group->key.objectid) {
@@@ -3837,6 -3977,43 +3841,37 @@@ static void release_global_block_rsv(st
        WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
  }
  
 -static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
 -{
 -      return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
 -              3 * num_items;
 -}
 -
+ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root,
+                                   struct btrfs_block_rsv *rsv)
+ {
+       struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
+       u64 num_bytes;
+       int ret;
+       /*
+        * Truncate should be freeing data, but give us 2 items just in case it
+        * needs to use some space.  We may want to be smarter about this in the
+        * future.
+        */
 -      num_bytes = calc_trans_metadata_size(root, 2);
++      num_bytes = btrfs_calc_trans_metadata_size(root, 2);
+       /* We already have enough bytes, just return */
+       if (rsv->reserved >= num_bytes)
+               return 0;
+       num_bytes -= rsv->reserved;
+       /*
+        * You should have reserved enough space before hand to do this, so this
+        * should not fail.
+        */
+       ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
+       BUG_ON(ret);
+       return 0;
+ }
  int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 int num_items)
@@@ -3877,23 -4054,18 +3912,18 @@@ int btrfs_orphan_reserve_metadata(struc
        struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
  
        /*
-        * one for deleting orphan item, one for updating inode and
-        * two for calling btrfs_truncate_inode_items.
-        *
-        * btrfs_truncate_inode_items is a delete operation, it frees
-        * more space than it uses in most cases. So two units of
-        * metadata space should be enough for calling it many times.
-        * If all of the metadata space is used, we can commit
-        * transaction and use space it freed.
+        * We need to hold space in order to delete our orphan item once we've
+        * added it, so this takes the reservation so we can release it later
+        * when we are truly done with the orphan item.
         */
-       u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
 -      u64 num_bytes = calc_trans_metadata_size(root, 1);
++      u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
        return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
  }
  
  void btrfs_orphan_release_metadata(struct inode *inode)
  {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
 -      u64 num_bytes = calc_trans_metadata_size(root, 1);
++      u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
        btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
  }
  
@@@ -4987,6 -5159,14 +5017,15 @@@ have_block_group
                if (unlikely(block_group->ro))
                        goto loop;
  
 -              spin_lock(&block_group->tree_lock);
++              spin_lock(&block_group->free_space_ctl->tree_lock);
+               if (cached &&
 -                  block_group->free_space < num_bytes + empty_size) {
 -                      spin_unlock(&block_group->tree_lock);
++                  block_group->free_space_ctl->free_space <
++                  num_bytes + empty_size) {
++                      spin_unlock(&block_group->free_space_ctl->tree_lock);
+                       goto loop;
+               }
 -              spin_unlock(&block_group->tree_lock);
++              spin_unlock(&block_group->free_space_ctl->tree_lock);
                /*
                 * Ok we want to try and use the cluster allocator, so lets look
                 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
@@@ -6442,154 -6616,1662 +6475,154 @@@ int btrfs_drop_subtree(struct btrfs_tra
        return ret;
  }
  
 -#if 0
 -static unsigned long calc_ra(unsigned long start, unsigned long last,
 -                           unsigned long nr)
 -{
 -      return min(last, start + nr - 1);
 -}
 -
 -static noinline int relocate_inode_pages(struct inode *inode, u64 start,
 -                                       u64 len)
 +static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
  {
 -      u64 page_start;
 -      u64 page_end;
 -      unsigned long first_index;
 -      unsigned long last_index;
 -      unsigned long i;
 -      struct page *page;
 -      struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 -      struct file_ra_state *ra;
 -      struct btrfs_ordered_extent *ordered;
 -      unsigned int total_read = 0;
 -      unsigned int total_dirty = 0;
 -      int ret = 0;
 -
 -      ra = kzalloc(sizeof(*ra), GFP_NOFS);
 -      if (!ra)
 -              return -ENOMEM;
 +      u64 num_devices;
 +      u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
 +              BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
  
 -      mutex_lock(&inode->i_mutex);
 -      first_index = start >> PAGE_CACHE_SHIFT;
 -      last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
 +      /*
 +       * we add in the count of missing devices because we want
 +       * to make sure that any RAID levels on a degraded FS
 +       * continue to be honored.
 +       */
 +      num_devices = root->fs_info->fs_devices->rw_devices +
 +              root->fs_info->fs_devices->missing_devices;
  
 -      /* make sure the dirty trick played by the caller work */
 -      ret = invalidate_inode_pages2_range(inode->i_mapping,
 -                                          first_index, last_index);
 -      if (ret)
 -              goto out_unlock;
 +      if (num_devices == 1) {
 +              stripped |= BTRFS_BLOCK_GROUP_DUP;
 +              stripped = flags & ~stripped;
  
 -      file_ra_state_init(ra, inode->i_mapping);
 +              /* turn raid0 into single device chunks */
 +              if (flags & BTRFS_BLOCK_GROUP_RAID0)
 +                      return stripped;
  
 -      for (i = first_index ; i <= last_index; i++) {
 -              if (total_read % ra->ra_pages == 0) {
 -                      btrfs_force_ra(inode->i_mapping, ra, NULL, i,
 -                                     calc_ra(i, last_index, ra->ra_pages));
 -              }
 -              total_read++;
 -again:
 -              if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
 -                      BUG_ON(1);
 -              page = grab_cache_page(inode->i_mapping, i);
 -              if (!page) {
 -                      ret = -ENOMEM;
 -                      goto out_unlock;
 -              }
 -              if (!PageUptodate(page)) {
 -                      btrfs_readpage(NULL, page);
 -                      lock_page(page);
 -                      if (!PageUptodate(page)) {
 -                              unlock_page(page);
 -                              page_cache_release(page);
 -                              ret = -EIO;
 -                              goto out_unlock;
 -                      }
 -              }
 -              wait_on_page_writeback(page);
 -
 -              page_start = (u64)page->index << PAGE_CACHE_SHIFT;
 -              page_end = page_start + PAGE_CACHE_SIZE - 1;
 -              lock_extent(io_tree, page_start, page_end, GFP_NOFS);
 -
 -              ordered = btrfs_lookup_ordered_extent(inode, page_start);
 -              if (ordered) {
 -                      unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 -                      unlock_page(page);
 -                      page_cache_release(page);
 -                      btrfs_start_ordered_extent(inode, ordered, 1);
 -                      btrfs_put_ordered_extent(ordered);
 -                      goto again;
 -              }
 -              set_page_extent_mapped(page);
 +              /* turn mirroring into duplication */
 +              if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
 +                           BTRFS_BLOCK_GROUP_RAID10))
 +                      return stripped | BTRFS_BLOCK_GROUP_DUP;
 +              return flags;
 +      } else {
 +              /* they already had raid on here, just return */
 +              if (flags & stripped)
 +                      return flags;
  
 -              if (i == first_index)
 -                      set_extent_bits(io_tree, page_start, page_end,
 -                                      EXTENT_BOUNDARY, GFP_NOFS);
 -              btrfs_set_extent_delalloc(inode, page_start, page_end);
 +              stripped |= BTRFS_BLOCK_GROUP_DUP;
 +              stripped = flags & ~stripped;
  
 -              set_page_dirty(page);
 -              total_dirty++;
 +              /* switch duplicated blocks with raid1 */
 +              if (flags & BTRFS_BLOCK_GROUP_DUP)
 +                      return stripped | BTRFS_BLOCK_GROUP_RAID1;
  
 -              unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
 -              unlock_page(page);
 -              page_cache_release(page);
 +              /* turn single device chunks into raid0 */
 +              return stripped | BTRFS_BLOCK_GROUP_RAID0;
        }
 -
 -out_unlock:
 -      kfree(ra);
 -      mutex_unlock(&inode->i_mutex);
 -      balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
 -      return ret;
 +      return flags;
  }
  
 -static noinline int relocate_data_extent(struct inode *reloc_inode,
 -                                       struct btrfs_key *extent_key,
 -                                       u64 offset)
 +static int set_block_group_ro(struct btrfs_block_group_cache *cache)
  {
 -      struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
 -      struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
 -      struct extent_map *em;
 -      u64 start = extent_key->objectid - offset;
 -      u64 end = start + extent_key->offset - 1;
 +      struct btrfs_space_info *sinfo = cache->space_info;
 +      u64 num_bytes;
 +      int ret = -ENOSPC;
  
 -      em = alloc_extent_map(GFP_NOFS);
 -      BUG_ON(!em);
 +      if (cache->ro)
 +              return 0;
  
 -      em->start = start;
 -      em->len = extent_key->offset;
 -      em->block_len = extent_key->offset;
 -      em->block_start = extent_key->objectid;
 -      em->bdev = root->fs_info->fs_devices->latest_bdev;
 -      set_bit(EXTENT_FLAG_PINNED, &em->flags);
 +      spin_lock(&sinfo->lock);
 +      spin_lock(&cache->lock);
 +      num_bytes = cache->key.offset - cache->reserved - cache->pinned -
 +                  cache->bytes_super - btrfs_block_group_used(&cache->item);
  
 -      /* setup extent map to cheat btrfs_readpage */
 -      lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
 -      while (1) {
 -              int ret;
 -              write_lock(&em_tree->lock);
 -              ret = add_extent_mapping(em_tree, em);
 -              write_unlock(&em_tree->lock);
 -              if (ret != -EEXIST) {
 -                      free_extent_map(em);
 -                      break;
 -              }
 -              btrfs_drop_extent_cache(reloc_inode, start, end, 0);
 +      if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
 +          sinfo->bytes_may_use + sinfo->bytes_readonly +
 +          cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
 +              sinfo->bytes_readonly += num_bytes;
 +              sinfo->bytes_reserved += cache->reserved_pinned;
 +              cache->reserved_pinned = 0;
 +              cache->ro = 1;
 +              ret = 0;
        }
 -      unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
  
 -      return relocate_inode_pages(reloc_inode, start, extent_key->offset);
 +      spin_unlock(&cache->lock);
 +      spin_unlock(&sinfo->lock);
 +      return ret;
  }
  
 -struct btrfs_ref_path {
 -      u64 extent_start;
 -      u64 nodes[BTRFS_MAX_LEVEL];
 -      u64 root_objectid;
 -      u64 root_generation;
 -      u64 owner_objectid;
 -      u32 num_refs;
 -      int lowest_level;
 -      int current_level;
 -      int shared_level;
 -
 -      struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
 -      u64 new_nodes[BTRFS_MAX_LEVEL];
 -};
 -
 -struct disk_extent {
 -      u64 ram_bytes;
 -      u64 disk_bytenr;
 -      u64 disk_num_bytes;
 -      u64 offset;
 -      u64 num_bytes;
 -      u8 compression;
 -      u8 encryption;
 -      u16 other_encoding;
 -};
 +int btrfs_set_block_group_ro(struct btrfs_root *root,
 +                           struct btrfs_block_group_cache *cache)
  
 -static int is_cowonly_root(u64 root_objectid)
  {
 -      if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
 -          root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
 -          root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
 -          root_objectid == BTRFS_DEV_TREE_OBJECTID ||
 -          root_objectid == BTRFS_TREE_LOG_OBJECTID ||
 -          root_objectid == BTRFS_CSUM_TREE_OBJECTID)
 -              return 1;
 -      return 0;
 -}
 +      struct btrfs_trans_handle *trans;
 +      u64 alloc_flags;
 +      int ret;
  
 -static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
 -                                  struct btrfs_root *extent_root,
 -                                  struct btrfs_ref_path *ref_path,
 -                                  int first_time)
 -{
 -      struct extent_buffer *leaf;
 -      struct btrfs_path *path;
 -      struct btrfs_extent_ref *ref;
 -      struct btrfs_key key;
 -      struct btrfs_key found_key;
 -      u64 bytenr;
 -      u32 nritems;
 -      int level;
 -      int ret = 1;
 +      BUG_ON(cache->ro);
  
-       trans = btrfs_join_transaction(root, 1);
 -      path = btrfs_alloc_path();
 -      if (!path)
 -              return -ENOMEM;
++      trans = btrfs_join_transaction(root);
 +      BUG_ON(IS_ERR(trans));
  
 -      if (first_time) {
 -              ref_path->lowest_level = -1;
 -              ref_path->current_level = -1;
 -              ref_path->shared_level = -1;
 -              goto walk_up;
 -      }
 -walk_down:
 -      level = ref_path->current_level - 1;
 -      while (level >= -1) {
 -              u64 parent;
 -              if (level < ref_path->lowest_level)
 -                      break;
 +      alloc_flags = update_block_group_flags(root, cache->flags);
 +      if (alloc_flags != cache->flags)
 +              do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
 +                             CHUNK_ALLOC_FORCE);
  
 -              if (level >= 0)
 -                      bytenr = ref_path->nodes[level];
 -              else
 -                      bytenr = ref_path->extent_start;
 -              BUG_ON(bytenr == 0);
 +      ret = set_block_group_ro(cache);
 +      if (!ret)
 +              goto out;
 +      alloc_flags = get_alloc_profile(root, cache->space_info->flags);
 +      ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
 +                           CHUNK_ALLOC_FORCE);
 +      if (ret < 0)
 +              goto out;
 +      ret = set_block_group_ro(cache);
 +out:
 +      btrfs_end_transaction(trans, root);
 +      return ret;
 +}
  
 -              parent = ref_path->nodes[level + 1];
 -              ref_path->nodes[level + 1] = 0;
 -              ref_path->current_level = level;
 -              BUG_ON(parent == 0);
 +int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
 +                          struct btrfs_root *root, u64 type)
 +{
 +      u64 alloc_flags = get_alloc_profile(root, type);
 +      return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
 +                            CHUNK_ALLOC_FORCE);
 +}
  
 -              key.objectid = bytenr;
 -              key.offset = parent + 1;
 -              key.type = BTRFS_EXTENT_REF_KEY;
 +/*
 + * helper to account the unused space of all the readonly block group in the
 + * list. takes mirrors into account.
 + */
 +static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
 +{
 +      struct btrfs_block_group_cache *block_group;
 +      u64 free_bytes = 0;
 +      int factor;
  
 -              ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
 -              if (ret < 0)
 -                      goto out;
 -              BUG_ON(ret == 0);
 +      list_for_each_entry(block_group, groups_list, list) {
 +              spin_lock(&block_group->lock);
  
 -              leaf = path->nodes[0];
 -              nritems = btrfs_header_nritems(leaf);
 -              if (path->slots[0] >= nritems) {
 -                      ret = btrfs_next_leaf(extent_root, path);
 -                      if (ret < 0)
 -                              goto out;
 -                      if (ret > 0)
 -                              goto next;
 -                      leaf = path->nodes[0];
 +              if (!block_group->ro) {
 +                      spin_unlock(&block_group->lock);
 +                      continue;
                }
  
 -              btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 -              if (found_key.objectid == bytenr &&
 -                  found_key.type == BTRFS_EXTENT_REF_KEY) {
 -                      if (level < ref_path->shared_level)
 -                              ref_path->shared_level = level;
 -                      goto found;
 -              }
 -next:
 -              level--;
 -              btrfs_release_path(extent_root, path);
 -              cond_resched();
 -      }
 -      /* reached lowest level */
 -      ret = 1;
 -      goto out;
 -walk_up:
 -      level = ref_path->current_level;
 -      while (level < BTRFS_MAX_LEVEL - 1) {
 -              u64 ref_objectid;
 -
 -              if (level >= 0)
 -                      bytenr = ref_path->nodes[level];
 +              if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
 +                                        BTRFS_BLOCK_GROUP_RAID10 |
 +                                        BTRFS_BLOCK_GROUP_DUP))
 +                      factor = 2;
                else
 -                      bytenr = ref_path->extent_start;
 +                      factor = 1;
  
 -              BUG_ON(bytenr == 0);
 +              free_bytes += (block_group->key.offset -
 +                             btrfs_block_group_used(&block_group->item)) *
 +                             factor;
  
 -              key.objectid = bytenr;
 -              key.offset = 0;
 -              key.type = BTRFS_EXTENT_REF_KEY;
 +              spin_unlock(&block_group->lock);
 +      }
  
 -              ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
 -              if (ret < 0)
 -                      goto out;
 -
 -              leaf = path->nodes[0];
 -              nritems = btrfs_header_nritems(leaf);
 -              if (path->slots[0] >= nritems) {
 -                      ret = btrfs_next_leaf(extent_root, path);
 -                      if (ret < 0)
 -                              goto out;
 -                      if (ret > 0) {
 -                              /* the extent was freed by someone */
 -                              if (ref_path->lowest_level == level)
 -                                      goto out;
 -                              btrfs_release_path(extent_root, path);
 -                              goto walk_down;
 -                      }
 -                      leaf = path->nodes[0];
 -              }
 -
 -              btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 -              if (found_key.objectid != bytenr ||
 -                              found_key.type != BTRFS_EXTENT_REF_KEY) {
 -                      /* the extent was freed by someone */
 -                      if (ref_path->lowest_level == level) {
 -                              ret = 1;
 -                              goto out;
 -                      }
 -                      btrfs_release_path(extent_root, path);
 -                      goto walk_down;
 -              }
 -found:
 -              ref = btrfs_item_ptr(leaf, path->slots[0],
 -                              struct btrfs_extent_ref);
 -              ref_objectid = btrfs_ref_objectid(leaf, ref);
 -              if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
 -                      if (first_time) {
 -                              level = (int)ref_objectid;
 -                              BUG_ON(level >= BTRFS_MAX_LEVEL);
 -                              ref_path->lowest_level = level;
 -                              ref_path->current_level = level;
 -                              ref_path->nodes[level] = bytenr;
 -                      } else {
 -                              WARN_ON(ref_objectid != level);
 -                      }
 -              } else {
 -                      WARN_ON(level != -1);
 -              }
 -              first_time = 0;
 -
 -              if (ref_path->lowest_level == level) {
 -                      ref_path->owner_objectid = ref_objectid;
 -                      ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
 -              }
 -
 -              /*
 -               * the block is tree root or the block isn't in reference
 -               * counted tree.
 -               */
 -              if (found_key.objectid == found_key.offset ||
 -                  is_cowonly_root(btrfs_ref_root(leaf, ref))) {
 -                      ref_path->root_objectid = btrfs_ref_root(leaf, ref);
 -                      ref_path->root_generation =
 -                              btrfs_ref_generation(leaf, ref);
 -                      if (level < 0) {
 -                              /* special reference from the tree log */
 -                              ref_path->nodes[0] = found_key.offset;
 -                              ref_path->current_level = 0;
 -                      }
 -                      ret = 0;
 -                      goto out;
 -              }
 -
 -              level++;
 -              BUG_ON(ref_path->nodes[level] != 0);
 -              ref_path->nodes[level] = found_key.offset;
 -              ref_path->current_level = level;
 -
 -              /*
 -               * the reference was created in the running transaction,
 -               * no need to continue walking up.
 -               */
 -              if (btrfs_ref_generation(leaf, ref) == trans->transid) {
 -                      ref_path->root_objectid = btrfs_ref_root(leaf, ref);
 -                      ref_path->root_generation =
 -                              btrfs_ref_generation(leaf, ref);
 -                      ret = 0;
 -                      goto out;
 -              }
 -
 -              btrfs_release_path(extent_root, path);
 -              cond_resched();
 -      }
 -      /* reached max tree level, but no tree root found. */
 -      BUG();
 -out:
 -      btrfs_free_path(path);
 -      return ret;
 -}
 -
 -static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
 -                              struct btrfs_root *extent_root,
 -                              struct btrfs_ref_path *ref_path,
 -                              u64 extent_start)
 -{
 -      memset(ref_path, 0, sizeof(*ref_path));
 -      ref_path->extent_start = extent_start;
 -
 -      return __next_ref_path(trans, extent_root, ref_path, 1);
 -}
 -
 -static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
 -                             struct btrfs_root *extent_root,
 -                             struct btrfs_ref_path *ref_path)
 -{
 -      return __next_ref_path(trans, extent_root, ref_path, 0);
 -}
 -
 -static noinline int get_new_locations(struct inode *reloc_inode,
 -                                    struct btrfs_key *extent_key,
 -                                    u64 offset, int no_fragment,
 -                                    struct disk_extent **extents,
 -                                    int *nr_extents)
 -{
 -      struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
 -      struct btrfs_path *path;
 -      struct btrfs_file_extent_item *fi;
 -      struct extent_buffer *leaf;
 -      struct disk_extent *exts = *extents;
 -      struct btrfs_key found_key;
 -      u64 cur_pos;
 -      u64 last_byte;
 -      u32 nritems;
 -      int nr = 0;
 -      int max = *nr_extents;
 -      int ret;
 -
 -      WARN_ON(!no_fragment && *extents);
 -      if (!exts) {
 -              max = 1;
 -              exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
 -              if (!exts)
 -                      return -ENOMEM;
 -      }
 -
 -      path = btrfs_alloc_path();
 -      if (!path) {
 -              if (exts != *extents)
 -                      kfree(exts);
 -              return -ENOMEM;
 -      }
 -
 -      cur_pos = extent_key->objectid - offset;
 -      last_byte = extent_key->objectid + extent_key->offset;
 -      ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
 -                                     cur_pos, 0);
 -      if (ret < 0)
 -              goto out;
 -      if (ret > 0) {
 -              ret = -ENOENT;
 -              goto out;
 -      }
 -
 -      while (1) {
 -              leaf = path->nodes[0];
 -              nritems = btrfs_header_nritems(leaf);
 -              if (path->slots[0] >= nritems) {
 -                      ret = btrfs_next_leaf(root, path);
 -                      if (ret < 0)
 -                              goto out;
 -                      if (ret > 0)
 -                              break;
 -                      leaf = path->nodes[0];
 -              }
 -
 -              btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 -              if (found_key.offset != cur_pos ||
 -                  found_key.type != BTRFS_EXTENT_DATA_KEY ||
 -                  found_key.objectid != reloc_inode->i_ino)
 -                      break;
 -
 -              fi = btrfs_item_ptr(leaf, path->slots[0],
 -                                  struct btrfs_file_extent_item);
 -              if (btrfs_file_extent_type(leaf, fi) !=
 -                  BTRFS_FILE_EXTENT_REG ||
 -                  btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
 -                      break;
 -
 -              if (nr == max) {
 -                      struct disk_extent *old = exts;
 -                      max *= 2;
 -                      exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
 -                      if (!exts) {
 -                              ret = -ENOMEM;
 -                              goto out;
 -                      }
 -                      memcpy(exts, old, sizeof(*exts) * nr);
 -                      if (old != *extents)
 -                              kfree(old);
 -              }
 -
 -              exts[nr].disk_bytenr =
 -                      btrfs_file_extent_disk_bytenr(leaf, fi);
 -              exts[nr].disk_num_bytes =
 -                      btrfs_file_extent_disk_num_bytes(leaf, fi);
 -              exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
 -              exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
 -              exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
 -              exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
 -              exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
 -              exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
 -                                                                         fi);
 -              BUG_ON(exts[nr].offset > 0);
 -              BUG_ON(exts[nr].compression || exts[nr].encryption);
 -              BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
 -
 -              cur_pos += exts[nr].num_bytes;
 -              nr++;
 -
 -              if (cur_pos + offset >= last_byte)
 -                      break;
 -
 -              if (no_fragment) {
 -                      ret = 1;
 -                      goto out;
 -              }
 -              path->slots[0]++;
 -      }
 -
 -      BUG_ON(cur_pos + offset > last_byte);
 -      if (cur_pos + offset < last_byte) {
 -              ret = -ENOENT;
 -              goto out;
 -      }
 -      ret = 0;
 -out:
 -      btrfs_free_path(path);
 -      if (ret) {
 -              if (exts != *extents)
 -                      kfree(exts);
 -      } else {
 -              *extents = exts;
 -              *nr_extents = nr;
 -      }
 -      return ret;
 -}
 -
 -static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
 -                                      struct btrfs_root *root,
 -                                      struct btrfs_path *path,
 -                                      struct btrfs_key *extent_key,
 -                                      struct btrfs_key *leaf_key,
 -                                      struct btrfs_ref_path *ref_path,
 -                                      struct disk_extent *new_extents,
 -                                      int nr_extents)
 -{
 -      struct extent_buffer *leaf;
 -      struct btrfs_file_extent_item *fi;
 -      struct inode *inode = NULL;
 -      struct btrfs_key key;
 -      u64 lock_start = 0;
 -      u64 lock_end = 0;
 -      u64 num_bytes;
 -      u64 ext_offset;
 -      u64 search_end = (u64)-1;
 -      u32 nritems;
 -      int nr_scaned = 0;
 -      int extent_locked = 0;
 -      int extent_type;
 -      int ret;
 -
 -      memcpy(&key, leaf_key, sizeof(key));
 -      if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
 -              if (key.objectid < ref_path->owner_objectid ||
 -                  (key.objectid == ref_path->owner_objectid &&
 -                   key.type < BTRFS_EXTENT_DATA_KEY)) {
 -                      key.objectid = ref_path->owner_objectid;
 -                      key.type = BTRFS_EXTENT_DATA_KEY;
 -                      key.offset = 0;
 -              }
 -      }
 -
 -      while (1) {
 -              ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 -              if (ret < 0)
 -                      goto out;
 -
 -              leaf = path->nodes[0];
 -              nritems = btrfs_header_nritems(leaf);
 -next:
 -              if (extent_locked && ret > 0) {
 -                      /*
 -                       * the file extent item was modified by someone
 -                       * before the extent got locked.
 -                       */
 -                      unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
 -                                    lock_end, GFP_NOFS);
 -                      extent_locked = 0;
 -              }
 -
 -              if (path->slots[0] >= nritems) {
 -                      if (++nr_scaned > 2)
 -                              break;
 -
 -                      BUG_ON(extent_locked);
 -                      ret = btrfs_next_leaf(root, path);
 -                      if (ret < 0)
 -                              goto out;
 -                      if (ret > 0)
 -                              break;
 -                      leaf = path->nodes[0];
 -                      nritems = btrfs_header_nritems(leaf);
 -              }
 -
 -              btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 -
 -              if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
 -                      if ((key.objectid > ref_path->owner_objectid) ||
 -                          (key.objectid == ref_path->owner_objectid &&
 -                           key.type > BTRFS_EXTENT_DATA_KEY) ||
 -                          key.offset >= search_end)
 -                              break;
 -              }
 -
 -              if (inode && key.objectid != inode->i_ino) {
 -                      BUG_ON(extent_locked);
 -                      btrfs_release_path(root, path);
 -                      mutex_unlock(&inode->i_mutex);
 -                      iput(inode);
 -                      inode = NULL;
 -                      continue;
 -              }
 -
 -              if (key.type != BTRFS_EXTENT_DATA_KEY) {
 -                      path->slots[0]++;
 -                      ret = 1;
 -                      goto next;
 -              }
 -              fi = btrfs_item_ptr(leaf, path->slots[0],
 -                                  struct btrfs_file_extent_item);
 -              extent_type = btrfs_file_extent_type(leaf, fi);
 -              if ((extent_type != BTRFS_FILE_EXTENT_REG &&
 -                   extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
 -                  (btrfs_file_extent_disk_bytenr(leaf, fi) !=
 -                   extent_key->objectid)) {
 -                      path->slots[0]++;
 -                      ret = 1;
 -                      goto next;
 -              }
 -
 -              num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
 -              ext_offset = btrfs_file_extent_offset(leaf, fi);
 -
 -              if (search_end == (u64)-1) {
 -                      search_end = key.offset - ext_offset +
 -                              btrfs_file_extent_ram_bytes(leaf, fi);
 -              }
 -
 -              if (!extent_locked) {
 -                      lock_start = key.offset;
 -                      lock_end = lock_start + num_bytes - 1;
 -              } else {
 -                      if (lock_start > key.offset ||
 -                          lock_end + 1 < key.offset + num_bytes) {
 -                              unlock_extent(&BTRFS_I(inode)->io_tree,
 -                                            lock_start, lock_end, GFP_NOFS);
 -                              extent_locked = 0;
 -                      }
 -              }
 -
 -              if (!inode) {
 -                      btrfs_release_path(root, path);
 -
 -                      inode = btrfs_iget_locked(root->fs_info->sb,
 -                                                key.objectid, root);
 -                      if (inode->i_state & I_NEW) {
 -                              BTRFS_I(inode)->root = root;
 -                              BTRFS_I(inode)->location.objectid =
 -                                      key.objectid;
 -                              BTRFS_I(inode)->location.type =
 -                                      BTRFS_INODE_ITEM_KEY;
 -                              BTRFS_I(inode)->location.offset = 0;
 -                              btrfs_read_locked_inode(inode);
 -                              unlock_new_inode(inode);
 -                      }
 -                      /*
 -                       * some code call btrfs_commit_transaction while
 -                       * holding the i_mutex, so we can't use mutex_lock
 -                       * here.
 -                       */
 -                      if (is_bad_inode(inode) ||
 -                          !mutex_trylock(&inode->i_mutex)) {
 -                              iput(inode);
 -                              inode = NULL;
 -                              key.offset = (u64)-1;
 -                              goto skip;
 -                      }
 -              }
 -
 -              if (!extent_locked) {
 -                      struct btrfs_ordered_extent *ordered;
 -
 -                      btrfs_release_path(root, path);
 -
 -                      lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
 -                                  lock_end, GFP_NOFS);
 -                      ordered = btrfs_lookup_first_ordered_extent(inode,
 -                                                                  lock_end);
 -                      if (ordered &&
 -                          ordered->file_offset <= lock_end &&
 -                          ordered->file_offset + ordered->len > lock_start) {
 -                              unlock_extent(&BTRFS_I(inode)->io_tree,
 -                                            lock_start, lock_end, GFP_NOFS);
 -                              btrfs_start_ordered_extent(inode, ordered, 1);
 -                              btrfs_put_ordered_extent(ordered);
 -                              key.offset += num_bytes;
 -                              goto skip;
 -                      }
 -                      if (ordered)
 -                              btrfs_put_ordered_extent(ordered);
 -
 -                      extent_locked = 1;
 -                      continue;
 -              }
 -
 -              if (nr_extents == 1) {
 -                      /* update extent pointer in place */
 -                      btrfs_set_file_extent_disk_bytenr(leaf, fi,
 -                                              new_extents[0].disk_bytenr);
 -                      btrfs_set_file_extent_disk_num_bytes(leaf, fi,
 -                                              new_extents[0].disk_num_bytes);
 -                      btrfs_mark_buffer_dirty(leaf);
 -
 -                      btrfs_drop_extent_cache(inode, key.offset,
 -                                              key.offset + num_bytes - 1, 0);
 -
 -                      ret = btrfs_inc_extent_ref(trans, root,
 -                                              new_extents[0].disk_bytenr,
 -                                              new_extents[0].disk_num_bytes,
 -                                              leaf->start,
 -                                              root->root_key.objectid,
 -                                              trans->transid,
 -                                              key.objectid);
 -                      BUG_ON(ret);
 -
 -                      ret = btrfs_free_extent(trans, root,
 -                                              extent_key->objectid,
 -                                              extent_key->offset,
 -                                              leaf->start,
 -                                              btrfs_header_owner(leaf),
 -                                              btrfs_header_generation(leaf),
 -                                              key.objectid, 0);
 -                      BUG_ON(ret);
 -
 -                      btrfs_release_path(root, path);
 -                      key.offset += num_bytes;
 -              } else {
 -                      BUG_ON(1);
 -#if 0
 -                      u64 alloc_hint;
 -                      u64 extent_len;
 -                      int i;
 -                      /*
 -                       * drop old extent pointer at first, then insert the
 -                       * new pointers one bye one
 -                       */
 -                      btrfs_release_path(root, path);
 -                      ret = btrfs_drop_extents(trans, root, inode, key.offset,
 -                                               key.offset + num_bytes,
 -                                               key.offset, &alloc_hint);
 -                      BUG_ON(ret);
 -
 -                      for (i = 0; i < nr_extents; i++) {
 -                              if (ext_offset >= new_extents[i].num_bytes) {
 -                                      ext_offset -= new_extents[i].num_bytes;
 -                                      continue;
 -                              }
 -                              extent_len = min(new_extents[i].num_bytes -
 -                                               ext_offset, num_bytes);
 -
 -                              ret = btrfs_insert_empty_item(trans, root,
 -                                                            path, &key,
 -                                                            sizeof(*fi));
 -                              BUG_ON(ret);
 -
 -                              leaf = path->nodes[0];
 -                              fi = btrfs_item_ptr(leaf, path->slots[0],
 -                                              struct btrfs_file_extent_item);
 -                              btrfs_set_file_extent_generation(leaf, fi,
 -                                                      trans->transid);
 -                              btrfs_set_file_extent_type(leaf, fi,
 -                                                      BTRFS_FILE_EXTENT_REG);
 -                              btrfs_set_file_extent_disk_bytenr(leaf, fi,
 -                                              new_extents[i].disk_bytenr);
 -                              btrfs_set_file_extent_disk_num_bytes(leaf, fi,
 -                                              new_extents[i].disk_num_bytes);
 -                              btrfs_set_file_extent_ram_bytes(leaf, fi,
 -                                              new_extents[i].ram_bytes);
 -
 -                              btrfs_set_file_extent_compression(leaf, fi,
 -                                              new_extents[i].compression);
 -                              btrfs_set_file_extent_encryption(leaf, fi,
 -                                              new_extents[i].encryption);
 -                              btrfs_set_file_extent_other_encoding(leaf, fi,
 -                                              new_extents[i].other_encoding);
 -
 -                              btrfs_set_file_extent_num_bytes(leaf, fi,
 -                                                      extent_len);
 -                              ext_offset += new_extents[i].offset;
 -                              btrfs_set_file_extent_offset(leaf, fi,
 -                                                      ext_offset);
 -                              btrfs_mark_buffer_dirty(leaf);
 -
 -                              btrfs_drop_extent_cache(inode, key.offset,
 -                                              key.offset + extent_len - 1, 0);
 -
 -                              ret = btrfs_inc_extent_ref(trans, root,
 -                                              new_extents[i].disk_bytenr,
 -                                              new_extents[i].disk_num_bytes,
 -                                              leaf->start,
 -                                              root->root_key.objectid,
 -                                              trans->transid, key.objectid);
 -                              BUG_ON(ret);
 -                              btrfs_release_path(root, path);
 -
 -                              inode_add_bytes(inode, extent_len);
 -
 -                              ext_offset = 0;
 -                              num_bytes -= extent_len;
 -                              key.offset += extent_len;
 -
 -                              if (num_bytes == 0)
 -                                      break;
 -                      }
 -                      BUG_ON(i >= nr_extents);
 -#endif
 -              }
 -
 -              if (extent_locked) {
 -                      unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
 -                                    lock_end, GFP_NOFS);
 -                      extent_locked = 0;
 -              }
 -skip:
 -              if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
 -                  key.offset >= search_end)
 -                      break;
 -
 -              cond_resched();
 -      }
 -      ret = 0;
 -out:
 -      btrfs_release_path(root, path);
 -      if (inode) {
 -              mutex_unlock(&inode->i_mutex);
 -              if (extent_locked) {
 -                      unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
 -                                    lock_end, GFP_NOFS);
 -              }
 -              iput(inode);
 -      }
 -      return ret;
 -}
 -
 -int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
 -                             struct btrfs_root *root,
 -                             struct extent_buffer *buf, u64 orig_start)
 -{
 -      int level;
 -      int ret;
 -
 -      BUG_ON(btrfs_header_generation(buf) != trans->transid);
 -      BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
 -
 -      level = btrfs_header_level(buf);
 -      if (level == 0) {
 -              struct btrfs_leaf_ref *ref;
 -              struct btrfs_leaf_ref *orig_ref;
 -
 -              orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
 -              if (!orig_ref)
 -                      return -ENOENT;
 -
 -              ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
 -              if (!ref) {
 -                      btrfs_free_leaf_ref(root, orig_ref);
 -                      return -ENOMEM;
 -              }
 -
 -              ref->nritems = orig_ref->nritems;
 -              memcpy(ref->extents, orig_ref->extents,
 -                      sizeof(ref->extents[0]) * ref->nritems);
 -
 -              btrfs_free_leaf_ref(root, orig_ref);
 -
 -              ref->root_gen = trans->transid;
 -              ref->bytenr = buf->start;
 -              ref->owner = btrfs_header_owner(buf);
 -              ref->generation = btrfs_header_generation(buf);
 -
 -              ret = btrfs_add_leaf_ref(root, ref, 0);
 -              WARN_ON(ret);
 -              btrfs_free_leaf_ref(root, ref);
 -      }
 -      return 0;
 -}
 -
 -static noinline int invalidate_extent_cache(struct btrfs_root *root,
 -                                      struct extent_buffer *leaf,
 -                                      struct btrfs_block_group_cache *group,
 -                                      struct btrfs_root *target_root)
 -{
 -      struct btrfs_key key;
 -      struct inode *inode = NULL;
 -      struct btrfs_file_extent_item *fi;
 -      struct extent_state *cached_state = NULL;
 -      u64 num_bytes;
 -      u64 skip_objectid = 0;
 -      u32 nritems;
 -      u32 i;
 -
 -      nritems = btrfs_header_nritems(leaf);
 -      for (i = 0; i < nritems; i++) {
 -              btrfs_item_key_to_cpu(leaf, &key, i);
 -              if (key.objectid == skip_objectid ||
 -                  key.type != BTRFS_EXTENT_DATA_KEY)
 -                      continue;
 -              fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
 -              if (btrfs_file_extent_type(leaf, fi) ==
 -                  BTRFS_FILE_EXTENT_INLINE)
 -                      continue;
 -              if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
 -                      continue;
 -              if (!inode || inode->i_ino != key.objectid) {
 -                      iput(inode);
 -                      inode = btrfs_ilookup(target_root->fs_info->sb,
 -                                            key.objectid, target_root, 1);
 -              }
 -              if (!inode) {
 -                      skip_objectid = key.objectid;
 -                      continue;
 -              }
 -              num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
 -
 -              lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
 -                               key.offset + num_bytes - 1, 0, &cached_state,
 -                               GFP_NOFS);
 -              btrfs_drop_extent_cache(inode, key.offset,
 -                                      key.offset + num_bytes - 1, 1);
 -              unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
 -                                   key.offset + num_bytes - 1, &cached_state,
 -                                   GFP_NOFS);
 -              cond_resched();
 -      }
 -      iput(inode);
 -      return 0;
 -}
 -
 -static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
 -                                      struct btrfs_root *root,
 -                                      struct extent_buffer *leaf,
 -                                      struct btrfs_block_group_cache *group,
 -                                      struct inode *reloc_inode)
 -{
 -      struct btrfs_key key;
 -      struct btrfs_key extent_key;
 -      struct btrfs_file_extent_item *fi;
 -      struct btrfs_leaf_ref *ref;
 -      struct disk_extent *new_extent;
 -      u64 bytenr;
 -      u64 num_bytes;
 -      u32 nritems;
 -      u32 i;
 -      int ext_index;
 -      int nr_extent;
 -      int ret;
 -
 -      new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
 -      if (!new_extent)
 -              return -ENOMEM;
 -
 -      ref = btrfs_lookup_leaf_ref(root, leaf->start);
 -      BUG_ON(!ref);
 -
 -      ext_index = -1;
 -      nritems = btrfs_header_nritems(leaf);
 -      for (i = 0; i < nritems; i++) {
 -              btrfs_item_key_to_cpu(leaf, &key, i);
 -              if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
 -                      continue;
 -              fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
 -              if (btrfs_file_extent_type(leaf, fi) ==
 -                  BTRFS_FILE_EXTENT_INLINE)
 -                      continue;
 -              bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 -              num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
 -              if (bytenr == 0)
 -                      continue;
 -
 -              ext_index++;
 -              if (bytenr >= group->key.objectid + group->key.offset ||
 -                  bytenr + num_bytes <= group->key.objectid)
 -                      continue;
 -
 -              extent_key.objectid = bytenr;
 -              extent_key.offset = num_bytes;
 -              extent_key.type = BTRFS_EXTENT_ITEM_KEY;
 -              nr_extent = 1;
 -              ret = get_new_locations(reloc_inode, &extent_key,
 -                                      group->key.objectid, 1,
 -                                      &new_extent, &nr_extent);
 -              if (ret > 0)
 -                      continue;
 -              BUG_ON(ret < 0);
 -
 -              BUG_ON(ref->extents[ext_index].bytenr != bytenr);
 -              BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
 -              ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
 -              ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
 -
 -              btrfs_set_file_extent_disk_bytenr(leaf, fi,
 -                                              new_extent->disk_bytenr);
 -              btrfs_set_file_extent_disk_num_bytes(leaf, fi,
 -                                              new_extent->disk_num_bytes);
 -              btrfs_mark_buffer_dirty(leaf);
 -
 -              ret = btrfs_inc_extent_ref(trans, root,
 -                                      new_extent->disk_bytenr,
 -                                      new_extent->disk_num_bytes,
 -                                      leaf->start,
 -                                      root->root_key.objectid,
 -                                      trans->transid, key.objectid);
 -              BUG_ON(ret);
 -
 -              ret = btrfs_free_extent(trans, root,
 -                                      bytenr, num_bytes, leaf->start,
 -                                      btrfs_header_owner(leaf),
 -                                      btrfs_header_generation(leaf),
 -                                      key.objectid, 0);
 -              BUG_ON(ret);
 -              cond_resched();
 -      }
 -      kfree(new_extent);
 -      BUG_ON(ext_index + 1 != ref->nritems);
 -      btrfs_free_leaf_ref(root, ref);
 -      return 0;
 -}
 -
 -int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
 -                        struct btrfs_root *root)
 -{
 -      struct btrfs_root *reloc_root;
 -      int ret;
 -
 -      if (root->reloc_root) {
 -              reloc_root = root->reloc_root;
 -              root->reloc_root = NULL;
 -              list_add(&reloc_root->dead_list,
 -                       &root->fs_info->dead_reloc_roots);
 -
 -              btrfs_set_root_bytenr(&reloc_root->root_item,
 -                                    reloc_root->node->start);
 -              btrfs_set_root_level(&root->root_item,
 -                                   btrfs_header_level(reloc_root->node));
 -              memset(&reloc_root->root_item.drop_progress, 0,
 -                      sizeof(struct btrfs_disk_key));
 -              reloc_root->root_item.drop_level = 0;
 -
 -              ret = btrfs_update_root(trans, root->fs_info->tree_root,
 -                                      &reloc_root->root_key,
 -                                      &reloc_root->root_item);
 -              BUG_ON(ret);
 -      }
 -      return 0;
 -}
 -
 -int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
 -{
 -      struct btrfs_trans_handle *trans;
 -      struct btrfs_root *reloc_root;
 -      struct btrfs_root *prev_root = NULL;
 -      struct list_head dead_roots;
 -      int ret;
 -      unsigned long nr;
 -
 -      INIT_LIST_HEAD(&dead_roots);
 -      list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
 -
 -      while (!list_empty(&dead_roots)) {
 -              reloc_root = list_entry(dead_roots.prev,
 -                                      struct btrfs_root, dead_list);
 -              list_del_init(&reloc_root->dead_list);
 -
 -              BUG_ON(reloc_root->commit_root != NULL);
 -              while (1) {
 -                      trans = btrfs_join_transaction(root);
 -                      BUG_ON(IS_ERR(trans));
 -
 -                      mutex_lock(&root->fs_info->drop_mutex);
 -                      ret = btrfs_drop_snapshot(trans, reloc_root);
 -                      if (ret != -EAGAIN)
 -                              break;
 -                      mutex_unlock(&root->fs_info->drop_mutex);
 -
 -                      nr = trans->blocks_used;
 -                      ret = btrfs_end_transaction(trans, root);
 -                      BUG_ON(ret);
 -                      btrfs_btree_balance_dirty(root, nr);
 -              }
 -
 -              free_extent_buffer(reloc_root->node);
 -
 -              ret = btrfs_del_root(trans, root->fs_info->tree_root,
 -                                   &reloc_root->root_key);
 -              BUG_ON(ret);
 -              mutex_unlock(&root->fs_info->drop_mutex);
 -
 -              nr = trans->blocks_used;
 -              ret = btrfs_end_transaction(trans, root);
 -              BUG_ON(ret);
 -              btrfs_btree_balance_dirty(root, nr);
 -
 -              kfree(prev_root);
 -              prev_root = reloc_root;
 -      }
 -      if (prev_root) {
 -              btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
 -              kfree(prev_root);
 -      }
 -      return 0;
 -}
 -
 -int btrfs_add_dead_reloc_root(struct btrfs_root *root)
 -{
 -      list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
 -      return 0;
 -}
 -
 -int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
 -{
 -      struct btrfs_root *reloc_root;
 -      struct btrfs_trans_handle *trans;
 -      struct btrfs_key location;
 -      int found;
 -      int ret;
 -
 -      mutex_lock(&root->fs_info->tree_reloc_mutex);
 -      ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
 -      BUG_ON(ret);
 -      found = !list_empty(&root->fs_info->dead_reloc_roots);
 -      mutex_unlock(&root->fs_info->tree_reloc_mutex);
 -
 -      if (found) {
 -              trans = btrfs_start_transaction(root, 1);
 -              BUG_ON(IS_ERR(trans));
 -              ret = btrfs_commit_transaction(trans, root);
 -              BUG_ON(ret);
 -      }
 -
 -      location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
 -      location.offset = (u64)-1;
 -      location.type = BTRFS_ROOT_ITEM_KEY;
 -
 -      reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
 -      BUG_ON(!reloc_root);
 -      ret = btrfs_orphan_cleanup(reloc_root);
 -      BUG_ON(ret);
 -      return 0;
 -}
 -
 -static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
 -                                  struct btrfs_root *root)
 -{
 -      struct btrfs_root *reloc_root;
 -      struct extent_buffer *eb;
 -      struct btrfs_root_item *root_item;
 -      struct btrfs_key root_key;
 -      int ret;
 -
 -      BUG_ON(!root->ref_cows);
 -      if (root->reloc_root)
 -              return 0;
 -
 -      root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
 -      if (!root_item)
 -              return -ENOMEM;
 -
 -      ret = btrfs_copy_root(trans, root, root->commit_root,
 -                            &eb, BTRFS_TREE_RELOC_OBJECTID);
 -      BUG_ON(ret);
 -
 -      root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
 -      root_key.offset = root->root_key.objectid;
 -      root_key.type = BTRFS_ROOT_ITEM_KEY;
 -
 -      memcpy(root_item, &root->root_item, sizeof(root_item));
 -      btrfs_set_root_refs(root_item, 0);
 -      btrfs_set_root_bytenr(root_item, eb->start);
 -      btrfs_set_root_level(root_item, btrfs_header_level(eb));
 -      btrfs_set_root_generation(root_item, trans->transid);
 -
 -      btrfs_tree_unlock(eb);
 -      free_extent_buffer(eb);
 -
 -      ret = btrfs_insert_root(trans, root->fs_info->tree_root,
 -                              &root_key, root_item);
 -      BUG_ON(ret);
 -      kfree(root_item);
 -
 -      reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
 -                                               &root_key);
 -      BUG_ON(IS_ERR(reloc_root));
 -      reloc_root->last_trans = trans->transid;
 -      reloc_root->commit_root = NULL;
 -      reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
 -
 -      root->reloc_root = reloc_root;
 -      return 0;
 -}
 -
 -/*
 - * Core function of space balance.
 - *
 - * The idea is using reloc trees to relocate tree blocks in reference
 - * counted roots. There is one reloc tree for each subvol, and all
 - * reloc trees share same root key objectid. Reloc trees are snapshots
 - * of the latest committed roots of subvols (root->commit_root).
 - *
 - * To relocate a tree block referenced by a subvol, there are two steps.
 - * COW the block through subvol's reloc tree, then update block pointer
 - * in the subvol to point to the new block. Since all reloc trees share
 - * same root key objectid, doing special handing for tree blocks owned
 - * by them is easy. Once a tree block has been COWed in one reloc tree,
 - * we can use the resulting new block directly when the same block is
 - * required to COW again through other reloc trees. By this way, relocated
 - * tree blocks are shared between reloc trees, so they are also shared
 - * between subvols.
 - */
 -static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
 -                                    struct btrfs_root *root,
 -                                    struct btrfs_path *path,
 -                                    struct btrfs_key *first_key,
 -                                    struct btrfs_ref_path *ref_path,
 -                                    struct btrfs_block_group_cache *group,
 -                                    struct inode *reloc_inode)
 -{
 -      struct btrfs_root *reloc_root;
 -      struct extent_buffer *eb = NULL;
 -      struct btrfs_key *keys;
 -      u64 *nodes;
 -      int level;
 -      int shared_level;
 -      int lowest_level = 0;
 -      int ret;
 -
 -      if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
 -              lowest_level = ref_path->owner_objectid;
 -
 -      if (!root->ref_cows) {
 -              path->lowest_level = lowest_level;
 -              ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
 -              BUG_ON(ret < 0);
 -              path->lowest_level = 0;
 -              btrfs_release_path(root, path);
 -              return 0;
 -      }
 -
 -      mutex_lock(&root->fs_info->tree_reloc_mutex);
 -      ret = init_reloc_tree(trans, root);
 -      BUG_ON(ret);
 -      reloc_root = root->reloc_root;
 -
 -      shared_level = ref_path->shared_level;
 -      ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
 -
 -      keys = ref_path->node_keys;
 -      nodes = ref_path->new_nodes;
 -      memset(&keys[shared_level + 1], 0,
 -             sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
 -      memset(&nodes[shared_level + 1], 0,
 -             sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
 -
 -      if (nodes[lowest_level] == 0) {
 -              path->lowest_level = lowest_level;
 -              ret = btrfs_search_slot(trans, reloc_root, first_key, path,
 -                                      0, 1);
 -              BUG_ON(ret);
 -              for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
 -                      eb = path->nodes[level];
 -                      if (!eb || eb == reloc_root->node)
 -                              break;
 -                      nodes[level] = eb->start;
 -                      if (level == 0)
 -                              btrfs_item_key_to_cpu(eb, &keys[level], 0);
 -                      else
 -                              btrfs_node_key_to_cpu(eb, &keys[level], 0);
 -              }
 -              if (nodes[0] &&
 -                  ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
 -                      eb = path->nodes[0];
 -                      ret = replace_extents_in_leaf(trans, reloc_root, eb,
 -                                                    group, reloc_inode);
 -                      BUG_ON(ret);
 -              }
 -              btrfs_release_path(reloc_root, path);
 -      } else {
 -              ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
 -                                     lowest_level);
 -              BUG_ON(ret);
 -      }
 -
 -      /*
 -       * replace tree blocks in the fs tree with tree blocks in
 -       * the reloc tree.
 -       */
 -      ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
 -      BUG_ON(ret < 0);
 -
 -      if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
 -              ret = btrfs_search_slot(trans, reloc_root, first_key, path,
 -                                      0, 0);
 -              BUG_ON(ret);
 -              extent_buffer_get(path->nodes[0]);
 -              eb = path->nodes[0];
 -              btrfs_release_path(reloc_root, path);
 -              ret = invalidate_extent_cache(reloc_root, eb, group, root);
 -              BUG_ON(ret);
 -              free_extent_buffer(eb);
 -      }
 -
 -      mutex_unlock(&root->fs_info->tree_reloc_mutex);
 -      path->lowest_level = 0;
 -      return 0;
 -}
 -
 -static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
 -                                      struct btrfs_root *root,
 -                                      struct btrfs_path *path,
 -                                      struct btrfs_key *first_key,
 -                                      struct btrfs_ref_path *ref_path)
 -{
 -      int ret;
 -
 -      ret = relocate_one_path(trans, root, path, first_key,
 -                              ref_path, NULL, NULL);
 -      BUG_ON(ret);
 -
 -      return 0;
 -}
 -
 -static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
 -                                  struct btrfs_root *extent_root,
 -                                  struct btrfs_path *path,
 -                                  struct btrfs_key *extent_key)
 -{
 -      int ret;
 -
 -      ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
 -      if (ret)
 -              goto out;
 -      ret = btrfs_del_item(trans, extent_root, path);
 -out:
 -      btrfs_release_path(extent_root, path);
 -      return ret;
 -}
 -
 -static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
 -                                              struct btrfs_ref_path *ref_path)
 -{
 -      struct btrfs_key root_key;
 -
 -      root_key.objectid = ref_path->root_objectid;
 -      root_key.type = BTRFS_ROOT_ITEM_KEY;
 -      if (is_cowonly_root(ref_path->root_objectid))
 -              root_key.offset = 0;
 -      else
 -              root_key.offset = (u64)-1;
 -
 -      return btrfs_read_fs_root_no_name(fs_info, &root_key);
 -}
 -
 -static noinline int relocate_one_extent(struct btrfs_root *extent_root,
 -                                      struct btrfs_path *path,
 -                                      struct btrfs_key *extent_key,
 -                                      struct btrfs_block_group_cache *group,
 -                                      struct inode *reloc_inode, int pass)
 -{
 -      struct btrfs_trans_handle *trans;
 -      struct btrfs_root *found_root;
 -      struct btrfs_ref_path *ref_path = NULL;
 -      struct disk_extent *new_extents = NULL;
 -      int nr_extents = 0;
 -      int loops;
 -      int ret;
 -      int level;
 -      struct btrfs_key first_key;
 -      u64 prev_block = 0;
 -
 -
 -      trans = btrfs_start_transaction(extent_root, 1);
 -      BUG_ON(IS_ERR(trans));
 -
 -      if (extent_key->objectid == 0) {
 -              ret = del_extent_zero(trans, extent_root, path, extent_key);
 -              goto out;
 -      }
 -
 -      ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
 -      if (!ref_path) {
 -              ret = -ENOMEM;
 -              goto out;
 -      }
 -
 -      for (loops = 0; ; loops++) {
 -              if (loops == 0) {
 -                      ret = btrfs_first_ref_path(trans, extent_root, ref_path,
 -                                                 extent_key->objectid);
 -              } else {
 -                      ret = btrfs_next_ref_path(trans, extent_root, ref_path);
 -              }
 -              if (ret < 0)
 -                      goto out;
 -              if (ret > 0)
 -                      break;
 -
 -              if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
 -                  ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
 -                      continue;
 -
 -              found_root = read_ref_root(extent_root->fs_info, ref_path);
 -              BUG_ON(!found_root);
 -              /*
 -               * for reference counted tree, only process reference paths
 -               * rooted at the latest committed root.
 -               */
 -              if (found_root->ref_cows &&
 -                  ref_path->root_generation != found_root->root_key.offset)
 -                      continue;
 -
 -              if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
 -                      if (pass == 0) {
 -                              /*
 -                               * copy data extents to new locations
 -                               */
 -                              u64 group_start = group->key.objectid;
 -                              ret = relocate_data_extent(reloc_inode,
 -                                                         extent_key,
 -                                                         group_start);
 -                              if (ret < 0)
 -                                      goto out;
 -                              break;
 -                      }
 -                      level = 0;
 -              } else {
 -                      level = ref_path->owner_objectid;
 -              }
 -
 -              if (prev_block != ref_path->nodes[level]) {
 -                      struct extent_buffer *eb;
 -                      u64 block_start = ref_path->nodes[level];
 -                      u64 block_size = btrfs_level_size(found_root, level);
 -
 -                      eb = read_tree_block(found_root, block_start,
 -                                           block_size, 0);
 -                      if (!eb) {
 -                              ret = -EIO;
 -                              goto out;
 -                      }
 -                      btrfs_tree_lock(eb);
 -                      BUG_ON(level != btrfs_header_level(eb));
 -
 -                      if (level == 0)
 -                              btrfs_item_key_to_cpu(eb, &first_key, 0);
 -                      else
 -                              btrfs_node_key_to_cpu(eb, &first_key, 0);
 -
 -                      btrfs_tree_unlock(eb);
 -                      free_extent_buffer(eb);
 -                      prev_block = block_start;
 -              }
 -
 -              mutex_lock(&extent_root->fs_info->trans_mutex);
 -              btrfs_record_root_in_trans(found_root);
 -              mutex_unlock(&extent_root->fs_info->trans_mutex);
 -              if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
 -                      /*
 -                       * try to update data extent references while
 -                       * keeping metadata shared between snapshots.
 -                       */
 -                      if (pass == 1) {
 -                              ret = relocate_one_path(trans, found_root,
 -                                              path, &first_key, ref_path,
 -                                              group, reloc_inode);
 -                              if (ret < 0)
 -                                      goto out;
 -                              continue;
 -                      }
 -                      /*
 -                       * use fallback method to process the remaining
 -                       * references.
 -                       */
 -                      if (!new_extents) {
 -                              u64 group_start = group->key.objectid;
 -                              new_extents = kmalloc(sizeof(*new_extents),
 -                                                    GFP_NOFS);
 -                              if (!new_extents) {
 -                                      ret = -ENOMEM;
 -                                      goto out;
 -                              }
 -                              nr_extents = 1;
 -                              ret = get_new_locations(reloc_inode,
 -                                                      extent_key,
 -                                                      group_start, 1,
 -                                                      &new_extents,
 -                                                      &nr_extents);
 -                              if (ret)
 -                                      goto out;
 -                      }
 -                      ret = replace_one_extent(trans, found_root,
 -                                              path, extent_key,
 -                                              &first_key, ref_path,
 -                                              new_extents, nr_extents);
 -              } else {
 -                      ret = relocate_tree_block(trans, found_root, path,
 -                                                &first_key, ref_path);
 -              }
 -              if (ret < 0)
 -                      goto out;
 -      }
 -      ret = 0;
 -out:
 -      btrfs_end_transaction(trans, extent_root);
 -      kfree(new_extents);
 -      kfree(ref_path);
 -      return ret;
 -}
 -#endif
 -
 -static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
 -{
 -      u64 num_devices;
 -      u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
 -              BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
 -
 -      /*
 -       * we add in the count of missing devices because we want
 -       * to make sure that any RAID levels on a degraded FS
 -       * continue to be honored.
 -       */
 -      num_devices = root->fs_info->fs_devices->rw_devices +
 -              root->fs_info->fs_devices->missing_devices;
 -
 -      if (num_devices == 1) {
 -              stripped |= BTRFS_BLOCK_GROUP_DUP;
 -              stripped = flags & ~stripped;
 -
 -              /* turn raid0 into single device chunks */
 -              if (flags & BTRFS_BLOCK_GROUP_RAID0)
 -                      return stripped;
 -
 -              /* turn mirroring into duplication */
 -              if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
 -                           BTRFS_BLOCK_GROUP_RAID10))
 -                      return stripped | BTRFS_BLOCK_GROUP_DUP;
 -              return flags;
 -      } else {
 -              /* they already had raid on here, just return */
 -              if (flags & stripped)
 -                      return flags;
 -
 -              stripped |= BTRFS_BLOCK_GROUP_DUP;
 -              stripped = flags & ~stripped;
 -
 -              /* switch duplicated blocks with raid1 */
 -              if (flags & BTRFS_BLOCK_GROUP_DUP)
 -                      return stripped | BTRFS_BLOCK_GROUP_RAID1;
 -
 -              /* turn single device chunks into raid0 */
 -              return stripped | BTRFS_BLOCK_GROUP_RAID0;
 -      }
 -      return flags;
 -}
 -
 -static int set_block_group_ro(struct btrfs_block_group_cache *cache)
 -{
 -      struct btrfs_space_info *sinfo = cache->space_info;
 -      u64 num_bytes;
 -      int ret = -ENOSPC;
 -
 -      if (cache->ro)
 -              return 0;
 -
 -      spin_lock(&sinfo->lock);
 -      spin_lock(&cache->lock);
 -      num_bytes = cache->key.offset - cache->reserved - cache->pinned -
 -                  cache->bytes_super - btrfs_block_group_used(&cache->item);
 -
 -      if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned +
 -          sinfo->bytes_may_use + sinfo->bytes_readonly +
 -          cache->reserved_pinned + num_bytes <= sinfo->total_bytes) {
 -              sinfo->bytes_readonly += num_bytes;
 -              sinfo->bytes_reserved += cache->reserved_pinned;
 -              cache->reserved_pinned = 0;
 -              cache->ro = 1;
 -              ret = 0;
 -      }
 -
 -      spin_unlock(&cache->lock);
 -      spin_unlock(&sinfo->lock);
 -      return ret;
 -}
 -
 -int btrfs_set_block_group_ro(struct btrfs_root *root,
 -                           struct btrfs_block_group_cache *cache)
 -
 -{
 -      struct btrfs_trans_handle *trans;
 -      u64 alloc_flags;
 -      int ret;
 -
 -      BUG_ON(cache->ro);
 -
 -      trans = btrfs_join_transaction(root);
 -      BUG_ON(IS_ERR(trans));
 -
 -      alloc_flags = update_block_group_flags(root, cache->flags);
 -      if (alloc_flags != cache->flags)
 -              do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
 -                             CHUNK_ALLOC_FORCE);
 -
 -      ret = set_block_group_ro(cache);
 -      if (!ret)
 -              goto out;
 -      alloc_flags = get_alloc_profile(root, cache->space_info->flags);
 -      ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
 -                           CHUNK_ALLOC_FORCE);
 -      if (ret < 0)
 -              goto out;
 -      ret = set_block_group_ro(cache);
 -out:
 -      btrfs_end_transaction(trans, root);
 -      return ret;
 -}
 -
 -int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
 -                          struct btrfs_root *root, u64 type)
 -{
 -      u64 alloc_flags = get_alloc_profile(root, type);
 -      return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
 -                            CHUNK_ALLOC_FORCE);
 -}
 -
 -/*
 - * helper to account the unused space of all the readonly block group in the
 - * list. takes mirrors into account.
 - */
 -static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
 -{
 -      struct btrfs_block_group_cache *block_group;
 -      u64 free_bytes = 0;
 -      int factor;
 -
 -      list_for_each_entry(block_group, groups_list, list) {
 -              spin_lock(&block_group->lock);
 -
 -              if (!block_group->ro) {
 -                      spin_unlock(&block_group->lock);
 -                      continue;
 -              }
 -
 -              if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 |
 -                                        BTRFS_BLOCK_GROUP_RAID10 |
 -                                        BTRFS_BLOCK_GROUP_DUP))
 -                      factor = 2;
 -              else
 -                      factor = 1;
 -
 -              free_bytes += (block_group->key.offset -
 -                             btrfs_block_group_used(&block_group->item)) *
 -                             factor;
 -
 -              spin_unlock(&block_group->lock);
 -      }
 -
 -      return free_bytes;
 -}
 +      return free_bytes;
 +}
  
  /*
   * helper to account the unused space of all the readonly block group in the
Simple merge
diff --cc fs/btrfs/file.c
Simple merge
@@@ -399,10 -417,17 +399,17 @@@ int __load_free_space_cache(struct btrf
                        }
  
                        if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
 -                              spin_lock(&block_group->tree_lock);
 -                              ret = link_free_space(block_group, e);
 -                              spin_unlock(&block_group->tree_lock);
 +                              spin_lock(&ctl->tree_lock);
 +                              ret = link_free_space(ctl, e);
 +                              spin_unlock(&ctl->tree_lock);
-                               BUG_ON(ret);
+                               if (ret) {
+                                       printk(KERN_ERR "Duplicate entries in "
+                                              "free space cache, dumping\n");
+                                       kunmap(page);
+                                       unlock_page(page);
+                                       page_cache_release(page);
+                                       goto free_cache;
+                               }
                        } else {
                                e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
                                if (!e->bitmap) {
                                        page_cache_release(page);
                                        goto free_cache;
                                }
 -                              spin_lock(&block_group->tree_lock);
 -                              ret = link_free_space(block_group, e);
 -                              block_group->total_bitmaps++;
 -                              recalculate_thresholds(block_group);
 -                              spin_unlock(&block_group->tree_lock);
 +                              spin_lock(&ctl->tree_lock);
 +                              ret2 = link_free_space(ctl, e);
 +                              ctl->total_bitmaps++;
 +                              ctl->op->recalc_thresholds(ctl);
 +                              spin_unlock(&ctl->tree_lock);
                                list_add_tail(&e->list, &bitmaps);
+                               if (ret) {
+                                       printk(KERN_ERR "Duplicate entries in "
+                                              "free space cache, dumping\n");
+                                       kunmap(page);
+                                       unlock_page(page);
+                                       page_cache_release(page);
+                                       goto free_cache;
+                               }
                        }
  
                        num_entries--;
@@@ -138,9 -136,8 +138,8 @@@ static noinline int insert_inline_exten
                return -ENOMEM;
  
        path->leave_spinning = 1;
-       btrfs_set_trans_block_group(trans, inode);
  
 -      key.objectid = inode->i_ino;
 +      key.objectid = btrfs_ino(inode);
        key.offset = start;
        btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
        datasize = btrfs_file_extent_calc_inline_size(cur_size);
@@@ -792,10 -776,9 +791,9 @@@ static noinline int cow_file_range(stru
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        int ret = 0;
  
 -      BUG_ON(root == root->fs_info->tree_root);
 +      BUG_ON(is_free_space_inode(root, inode));
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
-       btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
  
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@@ -1073,14 -1051,14 +1071,16 @@@ static noinline int run_delalloc_nocow(
  
        path = btrfs_alloc_path();
        BUG_ON(!path);
 -      if (root == root->fs_info->tree_root) {
 -              nolock = true;
 +
 +      nolock = is_free_space_inode(root, inode);
 +
 +      if (nolock)
-               trans = btrfs_join_transaction_nolock(root, 1);
+               trans = btrfs_join_transaction_nolock(root);
 -      } else {
 +      else
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
 -      }
++
        BUG_ON(IS_ERR(trans));
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
  
        cow_start = (u64)-1;
        cur_offset = start;
@@@ -3094,9 -3066,7 +3095,7 @@@ static int btrfs_rmdir(struct inode *di
        if (IS_ERR(trans))
                return PTR_ERR(trans);
  
-       btrfs_set_trans_block_group(trans, dir);
 -      if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
 +      if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
                err = btrfs_unlink_subvol(trans, root, dir,
                                          BTRFS_I(inode)->location.objectid,
                                          dentry->d_name.name,
@@@ -4133,17 -4256,11 +4130,18 @@@ static int btrfs_real_readdir(struct fi
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = 2;
++
+       path->reada = 1;
  
 +      if (key_type == BTRFS_DIR_INDEX_KEY) {
 +              INIT_LIST_HEAD(&ins_list);
 +              INIT_LIST_HEAD(&del_list);
 +              btrfs_get_delayed_items(inode, &ins_list, &del_list);
 +      }
 +
        btrfs_set_key_type(&key, key_type);
        key.offset = filp->f_pos;
 -      key.objectid = inode->i_ino;
 +      key.objectid = btrfs_ino(inode);
  
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
@@@ -4313,13 -4411,13 +4309,12 @@@ void btrfs_dirty_inode(struct inode *in
                btrfs_end_transaction(trans, root);
                trans = btrfs_start_transaction(root, 1);
                if (IS_ERR(trans)) {
 -                      if (printk_ratelimit()) {
 -                              printk(KERN_ERR "btrfs: fail to "
 -                                     "dirty  inode %lu error %ld\n",
 -                                     inode->i_ino, PTR_ERR(trans));
 -                      }
 +                      printk_ratelimited(KERN_ERR "btrfs: fail to "
 +                                     "dirty  inode %llu error %ld\n",
 +                                     (unsigned long long)btrfs_ino(inode),
 +                                     PTR_ERR(trans));
                        return;
                }
-               btrfs_set_trans_block_group(trans, inode);
  
                ret = btrfs_update_inode(trans, root, inode);
                if (ret) {
@@@ -4629,15 -4720,9 +4622,13 @@@ static int btrfs_mknod(struct inode *di
        if (IS_ERR(trans))
                return PTR_ERR(trans);
  
-       btrfs_set_trans_block_group(trans, dir);
 +      err = btrfs_find_free_ino(root, &objectid);
 +      if (err)
 +              goto out_unlock;
 +
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
 -                              dentry->d_name.len, dir->i_ino, objectid,
 +                              dentry->d_name.len, btrfs_ino(dir), objectid,
-                               BTRFS_I(dir)->block_group, mode, &index);
+                               mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@@ -4692,15 -4777,9 +4680,13 @@@ static int btrfs_create(struct inode *d
        if (IS_ERR(trans))
                return PTR_ERR(trans);
  
-       btrfs_set_trans_block_group(trans, dir);
 +      err = btrfs_find_free_ino(root, &objectid);
 +      if (err)
 +              goto out_unlock;
 +
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
 -                              dentry->d_name.len, dir->i_ino, objectid,
 +                              dentry->d_name.len, btrfs_ino(dir), objectid,
-                               BTRFS_I(dir)->block_group, mode, &index);
+                               mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@@ -4818,16 -4895,10 +4798,14 @@@ static int btrfs_mkdir(struct inode *di
        trans = btrfs_start_transaction(root, 5);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
-       btrfs_set_trans_block_group(trans, dir);
  
 +      err = btrfs_find_free_ino(root, &objectid);
 +      if (err)
 +              goto out_fail;
 +
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
 -                              dentry->d_name.len, dir->i_ino, objectid,
 +                              dentry->d_name.len, btrfs_ino(dir), objectid,
-                               BTRFS_I(dir)->block_group, S_IFDIR | mode,
-                               &index);
+                               S_IFDIR | mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_fail;
@@@ -5130,8 -5206,8 +5113,10 @@@ again
                                kunmap(page);
                                free_extent_map(em);
                                em = NULL;
 -                              btrfs_release_path(root, path);
++
 +                              btrfs_release_path(path);
-                               trans = btrfs_join_transaction(root, 1);
+                               trans = btrfs_join_transaction(root);
++
                                if (IS_ERR(trans))
                                        return ERR_CAST(trans);
                                goto again;
@@@ -6748,25 -6869,10 +6785,10 @@@ void btrfs_destroy_inode(struct inode *
                spin_unlock(&root->fs_info->ordered_extent_lock);
        }
  
-       if (root == root->fs_info->tree_root) {
-               struct btrfs_block_group_cache *block_group;
-               block_group = btrfs_lookup_block_group(root->fs_info,
-                                               BTRFS_I(inode)->block_group);
-               if (block_group && block_group->inode == inode) {
-                       spin_lock(&block_group->lock);
-                       block_group->inode = NULL;
-                       spin_unlock(&block_group->lock);
-                       btrfs_put_block_group(block_group);
-               } else if (block_group) {
-                       btrfs_put_block_group(block_group);
-               }
-       }
        spin_lock(&root->orphan_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
 -              printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
 -                     inode->i_ino);
 +              printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
 +                     (unsigned long long)btrfs_ino(inode));
                list_del_init(&BTRFS_I(inode)->i_orphan);
        }
        spin_unlock(&root->orphan_lock);
@@@ -7131,16 -7290,9 +7151,13 @@@ static int btrfs_symlink(struct inode *
        if (IS_ERR(trans))
                return PTR_ERR(trans);
  
-       btrfs_set_trans_block_group(trans, dir);
 +      err = btrfs_find_free_ino(root, &objectid);
 +      if (err)
 +              goto out_unlock;
 +
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
 -                              dentry->d_name.len, dir->i_ino, objectid,
 +                              dentry->d_name.len, btrfs_ino(dir), objectid,
-                               BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
-                               &index);
+                               S_IFLNK|S_IRWXUGO, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
Simple merge
Simple merge
@@@ -49,46 -49,73 +50,72 @@@ static noinline void switch_commit_root
  /*
   * either allocate a new transaction or hop into the existing one
   */
- static noinline int join_transaction(struct btrfs_root *root)
+ static noinline int join_transaction(struct btrfs_root *root, int nofail)
  {
        struct btrfs_transaction *cur_trans;
+       spin_lock(&root->fs_info->trans_lock);
+       if (root->fs_info->trans_no_join) {
+               if (!nofail) {
+                       spin_unlock(&root->fs_info->trans_lock);
+                       return -EBUSY;
+               }
+       }
        cur_trans = root->fs_info->running_transaction;
-       if (!cur_trans) {
-               cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
-                                            GFP_NOFS);
-               if (!cur_trans)
-                       return -ENOMEM;
-               root->fs_info->generation++;
-               atomic_set(&cur_trans->num_writers, 1);
-               cur_trans->num_joined = 0;
-               cur_trans->transid = root->fs_info->generation;
-               init_waitqueue_head(&cur_trans->writer_wait);
-               init_waitqueue_head(&cur_trans->commit_wait);
-               cur_trans->in_commit = 0;
-               cur_trans->blocked = 0;
-               atomic_set(&cur_trans->use_count, 1);
-               cur_trans->commit_done = 0;
-               cur_trans->start_time = get_seconds();
-               cur_trans->delayed_refs.root = RB_ROOT;
-               cur_trans->delayed_refs.num_entries = 0;
-               cur_trans->delayed_refs.num_heads_ready = 0;
-               cur_trans->delayed_refs.num_heads = 0;
-               cur_trans->delayed_refs.flushing = 0;
-               cur_trans->delayed_refs.run_delayed_start = 0;
-               spin_lock_init(&cur_trans->delayed_refs.lock);
-               INIT_LIST_HEAD(&cur_trans->pending_snapshots);
-               list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
-               extent_io_tree_init(&cur_trans->dirty_pages,
-                                    root->fs_info->btree_inode->i_mapping);
-               spin_lock(&root->fs_info->new_trans_lock);
-               root->fs_info->running_transaction = cur_trans;
-               spin_unlock(&root->fs_info->new_trans_lock);
-       } else {
+       if (cur_trans) {
+               atomic_inc(&cur_trans->use_count);
                atomic_inc(&cur_trans->num_writers);
                cur_trans->num_joined++;
+               spin_unlock(&root->fs_info->trans_lock);
+               return 0;
        }
 -                           root->fs_info->btree_inode->i_mapping,
 -                           GFP_NOFS);
+       spin_unlock(&root->fs_info->trans_lock);
+       cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
+       if (!cur_trans)
+               return -ENOMEM;
+       spin_lock(&root->fs_info->trans_lock);
+       if (root->fs_info->running_transaction) {
+               kmem_cache_free(btrfs_transaction_cachep, cur_trans);
+               cur_trans = root->fs_info->running_transaction;
+               atomic_inc(&cur_trans->use_count);
+               atomic_inc(&cur_trans->num_writers);
+               cur_trans->num_joined++;
+               spin_unlock(&root->fs_info->trans_lock);
+               return 0;
+       }
+       atomic_set(&cur_trans->num_writers, 1);
+       cur_trans->num_joined = 0;
+       init_waitqueue_head(&cur_trans->writer_wait);
+       init_waitqueue_head(&cur_trans->commit_wait);
+       cur_trans->in_commit = 0;
+       cur_trans->blocked = 0;
+       /*
+        * One for this trans handle, one so it will live on until we
+        * commit the transaction.
+        */
+       atomic_set(&cur_trans->use_count, 2);
+       cur_trans->commit_done = 0;
+       cur_trans->start_time = get_seconds();
+       cur_trans->delayed_refs.root = RB_ROOT;
+       cur_trans->delayed_refs.num_entries = 0;
+       cur_trans->delayed_refs.num_heads_ready = 0;
+       cur_trans->delayed_refs.num_heads = 0;
+       cur_trans->delayed_refs.flushing = 0;
+       cur_trans->delayed_refs.run_delayed_start = 0;
+       spin_lock_init(&cur_trans->commit_lock);
+       spin_lock_init(&cur_trans->delayed_refs.lock);
+       INIT_LIST_HEAD(&cur_trans->pending_snapshots);
+       list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
+       extent_io_tree_init(&cur_trans->dirty_pages,
++                           root->fs_info->btree_inode->i_mapping);
+       root->fs_info->generation++;
+       cur_trans->transid = root->fs_info->generation;
+       root->fs_info->running_transaction = cur_trans;
+       spin_unlock(&root->fs_info->trans_lock);
  
        return 0;
  }
@@@ -347,12 -368,53 +368,10 @@@ out
        return ret;
  }
  
 -#if 0
 -/*
 - * rate limit against the drop_snapshot code.  This helps to slow down new
 - * operations if the drop_snapshot code isn't able to keep up.
 - */
 -static void throttle_on_drops(struct btrfs_root *root)
 -{
 -      struct btrfs_fs_info *info = root->fs_info;
 -      int harder_count = 0;
 -
 -harder:
 -      if (atomic_read(&info->throttles)) {
 -              DEFINE_WAIT(wait);
 -              int thr;
 -              thr = atomic_read(&info->throttle_gen);
 -
 -              do {
 -                      prepare_to_wait(&info->transaction_throttle,
 -                                      &wait, TASK_UNINTERRUPTIBLE);
 -                      if (!atomic_read(&info->throttles)) {
 -                              finish_wait(&info->transaction_throttle, &wait);
 -                              break;
 -                      }
 -                      schedule();
 -                      finish_wait(&info->transaction_throttle, &wait);
 -              } while (thr == atomic_read(&info->throttle_gen));
 -              harder_count++;
 -
 -              if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 &&
 -                  harder_count < 2)
 -                      goto harder;
 -
 -              if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 &&
 -                  harder_count < 10)
 -                      goto harder;
 -
 -              if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 &&
 -                  harder_count < 20)
 -                      goto harder;
 -      }
 -}
 -#endif
 -
  void btrfs_throttle(struct btrfs_root *root)
  {
-       mutex_lock(&root->fs_info->trans_mutex);
-       if (!root->fs_info->open_ioctl_trans)
+       if (!atomic_read(&root->fs_info->open_ioctl_trans))
                wait_current_trans(root);
-       mutex_unlock(&root->fs_info->trans_mutex);
  }
  
  static int should_end_transaction(struct btrfs_trans_handle *trans,
@@@ -1311,10 -1431,6 +1339,8 @@@ int btrfs_commit_transaction(struct btr
  
        trace_btrfs_transaction_commit(root);
  
-       mutex_unlock(&root->fs_info->trans_mutex);
 +      btrfs_scrub_continue(root);
 +
        if (current->journal_info == trans)
                current->journal_info = NULL;
  
Simple merge
Simple merge