Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 4 Jun 2011 21:17:23 +0000 (06:17 +0900)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 4 Jun 2011 21:17:23 +0000 (06:17 +0900)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (25 commits)
  btrfs: fix uninitialized variable warning
  btrfs: add helper for fs_info->closing
  Btrfs: add mount -o inode_cache
  btrfs: scrub: add explicit plugging
  btrfs: use btrfs_ino to access inode number
  Btrfs: don't save the inode cache if we are deleting this root
  btrfs: false BUG_ON when degraded
  Btrfs: don't save the inode cache in non-FS roots
  Btrfs: make sure we don't overflow the free space cache crc page
  Btrfs: fix uninit variable in the delayed inode code
  btrfs: scrub: don't reuse bios and pages
  Btrfs: leave spinning on lookup and map the leaf
  Btrfs: check for duplicate entries in the free space cache
  Btrfs: don't try to allocate from a block group that doesn't have enough space
  Btrfs: don't always do readahead
  Btrfs: try not to sleep as much when doing slow caching
  Btrfs: kill BTRFS_I(inode)->block_group
  Btrfs: don't look at the extent buffer level 3 times in a row
  Btrfs: map the node block when looking for readahead targets
  Btrfs: set range_start to the right start in count_range_bits
  ...

19 files changed:
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/volumes.c
fs/btrfs/xattr.c

index 93b1aa9..52d7eca 100644 (file)
@@ -121,9 +121,6 @@ struct btrfs_inode {
         */
        u64 index_cnt;
 
-       /* the start of block group preferred for allocations. */
-       u64 block_group;
-
        /* the fsync log has some corner cases that mean we have to check
         * directories to see if any unlinks have been done before
         * the directory was logged.  See tree-log.c for all the
index b0e18d9..d840893 100644 (file)
@@ -43,8 +43,6 @@ struct btrfs_path *btrfs_alloc_path(void)
 {
        struct btrfs_path *path;
        path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
-       if (path)
-               path->reada = 1;
        return path;
 }
 
@@ -1224,6 +1222,7 @@ static void reada_for_search(struct btrfs_root *root,
        u64 search;
        u64 target;
        u64 nread = 0;
+       u64 gen;
        int direction = path->reada;
        struct extent_buffer *eb;
        u32 nr;
@@ -1251,6 +1250,15 @@ static void reada_for_search(struct btrfs_root *root,
        nritems = btrfs_header_nritems(node);
        nr = slot;
        while (1) {
+               if (!node->map_token) {
+                       unsigned long offset = btrfs_node_key_ptr_offset(nr);
+                       map_private_extent_buffer(node, offset,
+                                                 sizeof(struct btrfs_key_ptr),
+                                                 &node->map_token,
+                                                 &node->kaddr,
+                                                 &node->map_start,
+                                                 &node->map_len, KM_USER1);
+               }
                if (direction < 0) {
                        if (nr == 0)
                                break;
@@ -1268,14 +1276,23 @@ static void reada_for_search(struct btrfs_root *root,
                search = btrfs_node_blockptr(node, nr);
                if ((search <= target && target - search <= 65536) ||
                    (search > target && search - target <= 65536)) {
-                       readahead_tree_block(root, search, blocksize,
-                                    btrfs_node_ptr_generation(node, nr));
+                       gen = btrfs_node_ptr_generation(node, nr);
+                       if (node->map_token) {
+                               unmap_extent_buffer(node, node->map_token,
+                                                   KM_USER1);
+                               node->map_token = NULL;
+                       }
+                       readahead_tree_block(root, search, blocksize, gen);
                        nread += blocksize;
                }
                nscan++;
                if ((nread > 65536 || nscan > 32))
                        break;
        }
+       if (node->map_token) {
+               unmap_extent_buffer(node, node->map_token, KM_USER1);
+               node->map_token = NULL;
+       }
 }
 
 /*
@@ -1648,9 +1665,6 @@ again:
                }
 cow_done:
                BUG_ON(!cow && ins_len);
-               if (level != btrfs_header_level(b))
-                       WARN_ON(1);
-               level = btrfs_header_level(b);
 
                p->nodes[level] = b;
                if (!p->skip_locking)
index 6c093fa..378b5b4 100644 (file)
@@ -930,7 +930,6 @@ struct btrfs_fs_info {
         * is required instead of the faster short fsync log commits
         */
        u64 last_trans_log_full_commit;
-       u64 open_ioctl_trans;
        unsigned long mount_opt:20;
        unsigned long compress_type:4;
        u64 max_inline;
@@ -947,7 +946,6 @@ struct btrfs_fs_info {
        struct super_block *sb;
        struct inode *btree_inode;
        struct backing_dev_info bdi;
-       struct mutex trans_mutex;
        struct mutex tree_log_mutex;
        struct mutex transaction_kthread_mutex;
        struct mutex cleaner_mutex;
@@ -968,6 +966,7 @@ struct btrfs_fs_info {
        struct rw_semaphore subvol_sem;
        struct srcu_struct subvol_srcu;
 
+       spinlock_t trans_lock;
        struct list_head trans_list;
        struct list_head hashers;
        struct list_head dead_roots;
@@ -980,6 +979,7 @@ struct btrfs_fs_info {
        atomic_t async_submit_draining;
        atomic_t nr_async_bios;
        atomic_t async_delalloc_pages;
+       atomic_t open_ioctl_trans;
 
        /*
         * this is used by the balancing code to wait for all the pending
@@ -1044,6 +1044,7 @@ struct btrfs_fs_info {
        int closing;
        int log_root_recovering;
        int enospc_unlink;
+       int trans_no_join;
 
        u64 total_pinned;
 
@@ -1065,7 +1066,6 @@ struct btrfs_fs_info {
        struct reloc_control *reloc_ctl;
 
        spinlock_t delalloc_lock;
-       spinlock_t new_trans_lock;
        u64 delalloc_bytes;
 
        /* data_alloc_cluster is only used in ssd mode */
@@ -1340,6 +1340,7 @@ struct btrfs_ioctl_defrag_range_args {
 #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
 #define BTRFS_MOUNT_ENOSPC_DEBUG        (1 << 15)
 #define BTRFS_MOUNT_AUTO_DEFRAG                (1 << 16)
+#define BTRFS_MOUNT_INODE_MAP_CACHE    (1 << 17)
 
 #define btrfs_clear_opt(o, opt)                ((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)          ((o) |= BTRFS_MOUNT_##opt)
@@ -2238,6 +2239,9 @@ int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv,
 void btrfs_block_rsv_release(struct btrfs_root *root,
                             struct btrfs_block_rsv *block_rsv,
                             u64 num_bytes);
+int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root,
+                                   struct btrfs_block_rsv *rsv);
 int btrfs_set_block_group_ro(struct btrfs_root *root,
                             struct btrfs_block_group_cache *cache);
 int btrfs_set_block_group_rw(struct btrfs_root *root,
@@ -2350,6 +2354,15 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        struct extent_buffer *node,
                        struct extent_buffer *parent);
+static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
+{
+       /*
+        * Get synced with close_ctree()
+        */
+       smp_mb();
+       return fs_info->closing;
+}
+
 /* root-item.c */
 int btrfs_find_root_ref(struct btrfs_root *tree_root,
                        struct btrfs_path *path,
@@ -2512,8 +2525,7 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 int btrfs_writepages(struct address_space *mapping,
                     struct writeback_control *wbc);
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root,
-                            u64 new_dirid, u64 alloc_hint);
+                            struct btrfs_root *new_root, u64 new_dirid);
 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
                         size_t size, struct bio *bio, unsigned long bio_flags);
 
index 01e2950..6462c29 100644 (file)
@@ -678,6 +678,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
        INIT_LIST_HEAD(&head);
 
        next = item;
+       nitems = 0;
 
        /*
         * count the number of the continuous items that we can insert in batch
@@ -1129,7 +1130,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
        delayed_node = async_node->delayed_node;
        root = delayed_node->root;
 
-       trans = btrfs_join_transaction(root, 0);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                goto free_path;
 
@@ -1572,8 +1573,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_stack_inode_transid(inode_item, trans->transid);
        btrfs_set_stack_inode_rdev(inode_item, inode->i_rdev);
        btrfs_set_stack_inode_flags(inode_item, BTRFS_I(inode)->flags);
-       btrfs_set_stack_inode_block_group(inode_item,
-                                         BTRFS_I(inode)->block_group);
+       btrfs_set_stack_inode_block_group(inode_item, 0);
 
        btrfs_set_stack_timespec_sec(btrfs_inode_atime(inode_item),
                                     inode->i_atime.tv_sec);
@@ -1595,7 +1595,7 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root, struct inode *inode)
 {
        struct btrfs_delayed_node *delayed_node;
-       int ret;
+       int ret = 0;
 
        delayed_node = btrfs_get_or_create_delayed_node(inode);
        if (IS_ERR(delayed_node))
index 98b6a71..a203d36 100644 (file)
@@ -1505,24 +1505,24 @@ static int transaction_kthread(void *arg)
                vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
                mutex_lock(&root->fs_info->transaction_kthread_mutex);
 
-               spin_lock(&root->fs_info->new_trans_lock);
+               spin_lock(&root->fs_info->trans_lock);
                cur = root->fs_info->running_transaction;
                if (!cur) {
-                       spin_unlock(&root->fs_info->new_trans_lock);
+                       spin_unlock(&root->fs_info->trans_lock);
                        goto sleep;
                }
 
                now = get_seconds();
                if (!cur->blocked &&
                    (now < cur->start_time || now - cur->start_time < 30)) {
-                       spin_unlock(&root->fs_info->new_trans_lock);
+                       spin_unlock(&root->fs_info->trans_lock);
                        delay = HZ * 5;
                        goto sleep;
                }
                transid = cur->transid;
-               spin_unlock(&root->fs_info->new_trans_lock);
+               spin_unlock(&root->fs_info->trans_lock);
 
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
                if (transid == trans->transid) {
                        ret = btrfs_commit_transaction(trans, root);
@@ -1613,7 +1613,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        INIT_LIST_HEAD(&fs_info->ordered_operations);
        INIT_LIST_HEAD(&fs_info->caching_block_groups);
        spin_lock_init(&fs_info->delalloc_lock);
-       spin_lock_init(&fs_info->new_trans_lock);
+       spin_lock_init(&fs_info->trans_lock);
        spin_lock_init(&fs_info->ref_cache_lock);
        spin_lock_init(&fs_info->fs_roots_radix_lock);
        spin_lock_init(&fs_info->delayed_iput_lock);
@@ -1645,6 +1645,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->max_inline = 8192 * 1024;
        fs_info->metadata_ratio = 0;
        fs_info->defrag_inodes = RB_ROOT;
+       fs_info->trans_no_join = 0;
 
        fs_info->thread_pool_size = min_t(unsigned long,
                                          num_online_cpus() + 2, 8);
@@ -1709,7 +1710,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        fs_info->do_barriers = 1;
 
 
-       mutex_init(&fs_info->trans_mutex);
        mutex_init(&fs_info->ordered_operations_mutex);
        mutex_init(&fs_info->tree_log_mutex);
        mutex_init(&fs_info->chunk_mutex);
@@ -2479,13 +2479,13 @@ int btrfs_commit_super(struct btrfs_root *root)
        down_write(&root->fs_info->cleanup_work_sem);
        up_write(&root->fs_info->cleanup_work_sem);
 
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
        ret = btrfs_commit_transaction(trans, root);
        BUG_ON(ret);
        /* run commit again to drop the original snapshot */
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
        btrfs_commit_transaction(trans, root);
@@ -3024,10 +3024,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
 
        WARN_ON(1);
 
-       mutex_lock(&root->fs_info->trans_mutex);
        mutex_lock(&root->fs_info->transaction_kthread_mutex);
 
+       spin_lock(&root->fs_info->trans_lock);
        list_splice_init(&root->fs_info->trans_list, &list);
+       root->fs_info->trans_no_join = 1;
+       spin_unlock(&root->fs_info->trans_lock);
+
        while (!list_empty(&list)) {
                t = list_entry(list.next, struct btrfs_transaction, list);
                if (!t)
@@ -3052,23 +3055,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
                t->blocked = 0;
                if (waitqueue_active(&root->fs_info->transaction_wait))
                        wake_up(&root->fs_info->transaction_wait);
-               mutex_unlock(&root->fs_info->trans_mutex);
 
-               mutex_lock(&root->fs_info->trans_mutex);
                t->commit_done = 1;
                if (waitqueue_active(&t->commit_wait))
                        wake_up(&t->commit_wait);
-               mutex_unlock(&root->fs_info->trans_mutex);
-
-               mutex_lock(&root->fs_info->trans_mutex);
 
                btrfs_destroy_pending_snapshots(t);
 
                btrfs_destroy_delalloc_inodes(root);
 
-               spin_lock(&root->fs_info->new_trans_lock);
+               spin_lock(&root->fs_info->trans_lock);
                root->fs_info->running_transaction = NULL;
-               spin_unlock(&root->fs_info->new_trans_lock);
+               spin_unlock(&root->fs_info->trans_lock);
 
                btrfs_destroy_marked_extents(root, &t->dirty_pages,
                                             EXTENT_DIRTY);
@@ -3082,8 +3080,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
                kmem_cache_free(btrfs_transaction_cachep, t);
        }
 
+       spin_lock(&root->fs_info->trans_lock);
+       root->fs_info->trans_no_join = 0;
+       spin_unlock(&root->fs_info->trans_lock);
        mutex_unlock(&root->fs_info->transaction_kthread_mutex);
-       mutex_unlock(&root->fs_info->trans_mutex);
 
        return 0;
 }
index 169bd62..5b9b6b6 100644 (file)
@@ -348,7 +348,7 @@ static int caching_kthread(void *data)
         */
        path->skip_locking = 1;
        path->search_commit_root = 1;
-       path->reada = 2;
+       path->reada = 1;
 
        key.objectid = last;
        key.offset = 0;
@@ -366,8 +366,7 @@ again:
        nritems = btrfs_header_nritems(leaf);
 
        while (1) {
-               smp_mb();
-               if (fs_info->closing > 1) {
+               if (btrfs_fs_closing(fs_info) > 1) {
                        last = (u64)-1;
                        break;
                }
@@ -379,15 +378,18 @@ again:
                        if (ret)
                                break;
 
-                       caching_ctl->progress = last;
-                       btrfs_release_path(path);
-                       up_read(&fs_info->extent_commit_sem);
-                       mutex_unlock(&caching_ctl->mutex);
-                       if (btrfs_transaction_in_commit(fs_info))
-                               schedule_timeout(1);
-                       else
+                       if (need_resched() ||
+                           btrfs_next_leaf(extent_root, path)) {
+                               caching_ctl->progress = last;
+                               btrfs_release_path(path);
+                               up_read(&fs_info->extent_commit_sem);
+                               mutex_unlock(&caching_ctl->mutex);
                                cond_resched();
-                       goto again;
+                               goto again;
+                       }
+                       leaf = path->nodes[0];
+                       nritems = btrfs_header_nritems(leaf);
+                       continue;
                }
 
                if (key.objectid < block_group->key.objectid) {
@@ -3065,7 +3067,7 @@ again:
                        spin_unlock(&data_sinfo->lock);
 alloc:
                        alloc_target = btrfs_get_alloc_profile(root, 1);
-                       trans = btrfs_join_transaction(root, 1);
+                       trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans))
                                return PTR_ERR(trans);
 
@@ -3091,9 +3093,10 @@ alloc:
 
                /* commit the current transaction and try again */
 commit_trans:
-               if (!committed && !root->fs_info->open_ioctl_trans) {
+               if (!committed &&
+                   !atomic_read(&root->fs_info->open_ioctl_trans)) {
                        committed = 1;
-                       trans = btrfs_join_transaction(root, 1);
+                       trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans))
                                return PTR_ERR(trans);
                        ret = btrfs_commit_transaction(trans, root);
@@ -3472,7 +3475,7 @@ again:
                goto out;
 
        ret = -ENOSPC;
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                goto out;
        ret = btrfs_commit_transaction(trans, root);
@@ -3699,7 +3702,7 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
                if (trans)
                        return -EAGAIN;
 
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
                ret = btrfs_commit_transaction(trans, root);
                return 0;
@@ -3837,6 +3840,37 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info)
        WARN_ON(fs_info->chunk_block_rsv.reserved > 0);
 }
 
+int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans,
+                                   struct btrfs_root *root,
+                                   struct btrfs_block_rsv *rsv)
+{
+       struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv;
+       u64 num_bytes;
+       int ret;
+
+       /*
+        * Truncate should be freeing data, but give us 2 items just in case it
+        * needs to use some space.  We may want to be smarter about this in the
+        * future.
+        */
+       num_bytes = btrfs_calc_trans_metadata_size(root, 2);
+
+       /* We already have enough bytes, just return */
+       if (rsv->reserved >= num_bytes)
+               return 0;
+
+       num_bytes -= rsv->reserved;
+
+       /*
+        * You should have reserved enough space before hand to do this, so this
+        * should not fail.
+        */
+       ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes);
+       BUG_ON(ret);
+
+       return 0;
+}
+
 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 int num_items)
@@ -3877,23 +3911,18 @@ int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
        struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv;
 
        /*
-        * one for deleting orphan item, one for updating inode and
-        * two for calling btrfs_truncate_inode_items.
-        *
-        * btrfs_truncate_inode_items is a delete operation, it frees
-        * more space than it uses in most cases. So two units of
-        * metadata space should be enough for calling it many times.
-        * If all of the metadata space is used, we can commit
-        * transaction and use space it freed.
+        * We need to hold space in order to delete our orphan item once we've
+        * added it, so this takes the reservation so we can release it later
+        * when we are truly done with the orphan item.
         */
-       u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
+       u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
        return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
 
 void btrfs_orphan_release_metadata(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       u64 num_bytes = btrfs_calc_trans_metadata_size(root, 4);
+       u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1);
        btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes);
 }
 
@@ -4987,6 +5016,15 @@ have_block_group:
                if (unlikely(block_group->ro))
                        goto loop;
 
+               spin_lock(&block_group->free_space_ctl->tree_lock);
+               if (cached &&
+                   block_group->free_space_ctl->free_space <
+                   num_bytes + empty_size) {
+                       spin_unlock(&block_group->free_space_ctl->tree_lock);
+                       goto loop;
+               }
+               spin_unlock(&block_group->free_space_ctl->tree_lock);
+
                /*
                 * Ok we want to try and use the cluster allocator, so lets look
                 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
@@ -5150,6 +5188,7 @@ checks:
                        btrfs_add_free_space(block_group, offset,
                                             search_start - offset);
                BUG_ON(offset > search_start);
+               btrfs_put_block_group(block_group);
                break;
 loop:
                failed_cluster_refill = false;
@@ -5242,14 +5281,7 @@ loop:
                ret = -ENOSPC;
        } else if (!ins->objectid) {
                ret = -ENOSPC;
-       }
-
-       /* we found what we needed */
-       if (ins->objectid) {
-               if (!(data & BTRFS_BLOCK_GROUP_DATA))
-                       trans->block_group = block_group->key.objectid;
-
-               btrfs_put_block_group(block_group);
+       } else if (ins->objectid) {
                ret = 0;
        }
 
@@ -6526,7 +6558,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 
        BUG_ON(cache->ro);
 
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
 
        alloc_flags = update_block_group_flags(root, cache->flags);
@@ -6882,6 +6914,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
+       path->reada = 1;
 
        cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
        if (cache_gen != 0 &&
index c5d9fbb..7055d11 100644 (file)
@@ -1476,7 +1476,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
                        if (total_bytes >= max_bytes)
                                break;
                        if (!found) {
-                               *start = state->start;
+                               *start = max(cur_start, state->start);
                                found = 1;
                        }
                        last = state->end;
index c6a22d7..fa4ef18 100644 (file)
@@ -129,7 +129,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
        if (!btrfs_test_opt(root, AUTO_DEFRAG))
                return 0;
 
-       if (root->fs_info->closing)
+       if (btrfs_fs_closing(root->fs_info))
                return 0;
 
        if (BTRFS_I(inode)->in_defrag)
@@ -144,7 +144,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
        if (!defrag)
                return -ENOMEM;
 
-       defrag->ino = inode->i_ino;
+       defrag->ino = btrfs_ino(inode);
        defrag->transid = transid;
        defrag->root = root->root_key.objectid;
 
@@ -229,7 +229,7 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info)
                first_ino = defrag->ino + 1;
                rb_erase(&defrag->rb_node, &fs_info->defrag_inodes);
 
-               if (fs_info->closing)
+               if (btrfs_fs_closing(fs_info))
                        goto next_free;
 
                spin_unlock(&fs_info->defrag_inodes_lock);
@@ -1480,14 +1480,12 @@ int btrfs_sync_file(struct file *file, int datasync)
         * the current transaction, we can bail out now without any
         * syncing
         */
-       mutex_lock(&root->fs_info->trans_mutex);
+       smp_mb();
        if (BTRFS_I(inode)->last_trans <=
            root->fs_info->last_trans_committed) {
                BTRFS_I(inode)->last_trans = 0;
-               mutex_unlock(&root->fs_info->trans_mutex);
                goto out;
        }
-       mutex_unlock(&root->fs_info->trans_mutex);
 
        /*
         * ok we haven't committed the transaction yet, lets do a commit
index 70d4579..ad14473 100644 (file)
@@ -98,7 +98,7 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
                return inode;
 
        spin_lock(&block_group->lock);
-       if (!root->fs_info->closing) {
+       if (!btrfs_fs_closing(root->fs_info)) {
                block_group->inode = igrab(inode);
                block_group->iref = 1;
        }
@@ -402,7 +402,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                                spin_lock(&ctl->tree_lock);
                                ret = link_free_space(ctl, e);
                                spin_unlock(&ctl->tree_lock);
-                               BUG_ON(ret);
+                               if (ret) {
+                                       printk(KERN_ERR "Duplicate entries in "
+                                              "free space cache, dumping\n");
+                                       kunmap(page);
+                                       unlock_page(page);
+                                       page_cache_release(page);
+                                       goto free_cache;
+                               }
                        } else {
                                e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
                                if (!e->bitmap) {
@@ -419,6 +426,14 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                                ctl->op->recalc_thresholds(ctl);
                                spin_unlock(&ctl->tree_lock);
                                list_add_tail(&e->list, &bitmaps);
+                               if (ret) {
+                                       printk(KERN_ERR "Duplicate entries in "
+                                              "free space cache, dumping\n");
+                                       kunmap(page);
+                                       unlock_page(page);
+                                       page_cache_release(page);
+                                       goto free_cache;
+                               }
                        }
 
                        num_entries--;
@@ -478,8 +493,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
         * If we're unmounting then just return, since this does a search on the
         * normal root and not the commit root and we could deadlock.
         */
-       smp_mb();
-       if (fs_info->closing)
+       if (btrfs_fs_closing(fs_info))
                return 0;
 
        /*
@@ -575,10 +589,25 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
        num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
                PAGE_CACHE_SHIFT;
+
+       /* Since the first page has all of our checksums and our generation we
+        * need to calculate the offset into the page that we can start writing
+        * our entries.
+        */
+       first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
+
        filemap_write_and_wait(inode->i_mapping);
        btrfs_wait_ordered_range(inode, inode->i_size &
                                 ~(root->sectorsize - 1), (u64)-1);
 
+       /* make sure we don't overflow that first page */
+       if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) {
+               /* this is really the same as running out of space, where we also return 0 */
+               printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n");
+               ret = 0;
+               goto out_update;
+       }
+
        /* We need a checksum per page. */
        crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
        if (!crc)
@@ -590,12 +619,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                return -1;
        }
 
-       /* Since the first page has all of our checksums and our generation we
-        * need to calculate the offset into the page that we can start writing
-        * our entries.
-        */
-       first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
-
        /* Get the cluster for this block_group if it exists */
        if (block_group && !list_empty(&block_group->cluster_list))
                cluster = list_entry(block_group->cluster_list.next,
@@ -857,12 +880,14 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        ret = 1;
 
 out_free:
+       kfree(checksums);
+       kfree(pages);
+
+out_update:
        if (ret != 1) {
                invalidate_inode_pages2_range(inode->i_mapping, 0, index);
                BTRFS_I(inode)->generation = 0;
        }
-       kfree(checksums);
-       kfree(pages);
        btrfs_update_inode(trans, root, inode);
        return ret;
 }
@@ -963,10 +988,16 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
                         * logically.
                         */
                        if (bitmap) {
-                               WARN_ON(info->bitmap);
+                               if (info->bitmap) {
+                                       WARN_ON_ONCE(1);
+                                       return -EEXIST;
+                               }
                                p = &(*p)->rb_right;
                        } else {
-                               WARN_ON(!info->bitmap);
+                               if (!info->bitmap) {
+                                       WARN_ON_ONCE(1);
+                                       return -EEXIST;
+                               }
                                p = &(*p)->rb_left;
                        }
                }
@@ -2481,7 +2512,7 @@ struct inode *lookup_free_ino_inode(struct btrfs_root *root,
                return inode;
 
        spin_lock(&root->cache_lock);
-       if (!root->fs_info->closing)
+       if (!btrfs_fs_closing(root->fs_info))
                root->cache_inode = igrab(inode);
        spin_unlock(&root->cache_lock);
 
@@ -2504,12 +2535,14 @@ int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
        int ret = 0;
        u64 root_gen = btrfs_root_generation(&root->root_item);
 
+       if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+               return 0;
+
        /*
         * If we're unmounting then just return, since this does a search on the
         * normal root and not the commit root and we could deadlock.
         */
-       smp_mb();
-       if (fs_info->closing)
+       if (btrfs_fs_closing(fs_info))
                return 0;
 
        path = btrfs_alloc_path();
@@ -2543,6 +2576,9 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
        struct inode *inode;
        int ret;
 
+       if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+               return 0;
+
        inode = lookup_free_ino_inode(root, path);
        if (IS_ERR(inode))
                return 0;
index 3262cd1..b4087e0 100644 (file)
@@ -38,6 +38,9 @@ static int caching_kthread(void *data)
        int slot;
        int ret;
 
+       if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+               return 0;
+
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -59,8 +62,7 @@ again:
                goto out;
 
        while (1) {
-               smp_mb();
-               if (fs_info->closing)
+               if (btrfs_fs_closing(fs_info))
                        goto out;
 
                leaf = path->nodes[0];
@@ -141,6 +143,9 @@ static void start_caching(struct btrfs_root *root)
        int ret;
        u64 objectid;
 
+       if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+               return;
+
        spin_lock(&root->cache_lock);
        if (root->cached != BTRFS_CACHE_NO) {
                spin_unlock(&root->cache_lock);
@@ -178,6 +183,9 @@ static void start_caching(struct btrfs_root *root)
 
 int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
 {
+       if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+               return btrfs_find_free_objectid(root, objectid);
+
 again:
        *objectid = btrfs_find_ino_for_alloc(root);
 
@@ -201,6 +209,10 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 {
        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
+
+       if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+               return;
+
 again:
        if (root->cached == BTRFS_CACHE_FINISHED) {
                __btrfs_add_free_space(ctl, objectid, 1);
@@ -250,6 +262,9 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
        struct rb_node *n;
        u64 count;
 
+       if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+               return;
+
        while (1) {
                n = rb_first(rbroot);
                if (!n)
@@ -388,9 +403,24 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
        int prealloc;
        bool retry = false;
 
+       /* only fs tree and subvol/snap needs ino cache */
+       if (root->root_key.objectid != BTRFS_FS_TREE_OBJECTID &&
+           (root->root_key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
+            root->root_key.objectid > BTRFS_LAST_FREE_OBJECTID))
+               return 0;
+
+       /* Don't save inode cache if we are deleting this root */
+       if (btrfs_root_refs(&root->root_item) == 0 &&
+           root != root->fs_info->tree_root)
+               return 0;
+
+       if (!btrfs_test_opt(root, INODE_MAP_CACHE))
+               return 0;
+
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
+
 again:
        inode = lookup_free_ino_inode(root, path);
        if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
index 39a9d57..ebf95f7 100644 (file)
@@ -138,7 +138,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        path->leave_spinning = 1;
-       btrfs_set_trans_block_group(trans, inode);
 
        key.objectid = btrfs_ino(inode);
        key.offset = start;
@@ -426,9 +425,8 @@ again:
                }
        }
        if (start == 0) {
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
-               btrfs_set_trans_block_group(trans, inode);
                trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
                /* lets try to make an inline extent */
@@ -623,8 +621,9 @@ retry:
                            async_extent->start + async_extent->ram_size - 1,
                            GFP_NOFS);
 
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
                BUG_ON(IS_ERR(trans));
+               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                ret = btrfs_reserve_extent(trans, root,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
@@ -793,9 +792,8 @@ static noinline int cow_file_range(struct inode *inode,
        int ret = 0;
 
        BUG_ON(is_free_space_inode(root, inode));
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
-       btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        num_bytes = (end - start + blocksize) & ~(blocksize - 1);
@@ -1077,10 +1075,12 @@ static noinline int run_delalloc_nocow(struct inode *inode,
        nolock = is_free_space_inode(root, inode);
 
        if (nolock)
-               trans = btrfs_join_transaction_nolock(root, 1);
+               trans = btrfs_join_transaction_nolock(root);
        else
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
+
        BUG_ON(IS_ERR(trans));
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        cow_start = (u64)-1;
        cur_offset = start;
@@ -1519,8 +1519,6 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
 {
        struct btrfs_ordered_sum *sum;
 
-       btrfs_set_trans_block_group(trans, inode);
-
        list_for_each_entry(sum, list, list) {
                btrfs_csum_file_blocks(trans,
                       BTRFS_I(inode)->root->fs_info->csum_root, sum);
@@ -1735,11 +1733,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
                if (!ret) {
                        if (nolock)
-                               trans = btrfs_join_transaction_nolock(root, 1);
+                               trans = btrfs_join_transaction_nolock(root);
                        else
-                               trans = btrfs_join_transaction(root, 1);
+                               trans = btrfs_join_transaction(root);
                        BUG_ON(IS_ERR(trans));
-                       btrfs_set_trans_block_group(trans, inode);
                        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
                        ret = btrfs_update_inode(trans, root, inode);
                        BUG_ON(ret);
@@ -1752,11 +1749,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                         0, &cached_state, GFP_NOFS);
 
        if (nolock)
-               trans = btrfs_join_transaction_nolock(root, 1);
+               trans = btrfs_join_transaction_nolock(root);
        else
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
-       btrfs_set_trans_block_group(trans, inode);
        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
        if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
@@ -2431,7 +2427,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                        (u64)-1);
 
        if (root->orphan_block_rsv || root->orphan_item_inserted) {
-               trans = btrfs_join_transaction(root, 1);
+               trans = btrfs_join_transaction(root);
                if (!IS_ERR(trans))
                        btrfs_end_transaction(trans, root);
        }
@@ -2511,12 +2507,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key location;
        int maybe_acls;
-       u64 alloc_group_block;
        u32 rdev;
        int ret;
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
+       path->leave_spinning = 1;
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
 
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
@@ -2526,6 +2522,12 @@ static void btrfs_read_locked_inode(struct inode *inode)
        leaf = path->nodes[0];
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
+       if (!leaf->map_token)
+               map_private_extent_buffer(leaf, (unsigned long)inode_item,
+                                         sizeof(struct btrfs_inode_item),
+                                         &leaf->map_token, &leaf->kaddr,
+                                         &leaf->map_start, &leaf->map_len,
+                                         KM_USER1);
 
        inode->i_mode = btrfs_inode_mode(leaf, inode_item);
        inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
@@ -2555,8 +2557,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
        BTRFS_I(inode)->index_cnt = (u64)-1;
        BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
 
-       alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
-
        /*
         * try to precache a NULL acl entry for files that don't have
         * any xattrs or acls
@@ -2566,8 +2566,11 @@ static void btrfs_read_locked_inode(struct inode *inode)
        if (!maybe_acls)
                cache_no_acl(inode);
 
-       BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0,
-                                               alloc_group_block, 0);
+       if (leaf->map_token) {
+               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+               leaf->map_token = NULL;
+       }
+
        btrfs_free_path(path);
        inode_item = NULL;
 
@@ -2647,7 +2650,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
        btrfs_set_inode_transid(leaf, item, trans->transid);
        btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
        btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
-       btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
+       btrfs_set_inode_block_group(leaf, item, 0);
 
        if (leaf->map_token) {
                unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
@@ -3004,8 +3007,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
-
        btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
 
        ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
@@ -3094,8 +3095,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
-
        if (unlikely(btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
                err = btrfs_unlink_subvol(trans, root, dir,
                                          BTRFS_I(inode)->location.objectid,
@@ -3514,7 +3513,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
                                err = PTR_ERR(trans);
                                break;
                        }
-                       btrfs_set_trans_block_group(trans, inode);
 
                        err = btrfs_drop_extents(trans, inode, cur_offset,
                                                 cur_offset + hole_size,
@@ -3650,7 +3648,6 @@ void btrfs_evict_inode(struct inode *inode)
        while (1) {
                trans = btrfs_start_transaction(root, 0);
                BUG_ON(IS_ERR(trans));
-               btrfs_set_trans_block_group(trans, inode);
                trans->block_rsv = root->orphan_block_rsv;
 
                ret = btrfs_block_rsv_check(trans, root,
@@ -4133,7 +4130,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = 2;
+
+       path->reada = 1;
 
        if (key_type == BTRFS_DIR_INDEX_KEY) {
                INIT_LIST_HEAD(&ins_list);
@@ -4268,18 +4266,16 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        if (BTRFS_I(inode)->dummy_inode)
                return 0;
 
-       smp_mb();
-       if (root->fs_info->closing && is_free_space_inode(root, inode))
+       if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
                nolock = true;
 
        if (wbc->sync_mode == WB_SYNC_ALL) {
                if (nolock)
-                       trans = btrfs_join_transaction_nolock(root, 1);
+                       trans = btrfs_join_transaction_nolock(root);
                else
-                       trans = btrfs_join_transaction(root, 1);
+                       trans = btrfs_join_transaction(root);
                if (IS_ERR(trans))
                        return PTR_ERR(trans);
-               btrfs_set_trans_block_group(trans, inode);
                if (nolock)
                        ret = btrfs_end_transaction_nolock(trans, root);
                else
@@ -4303,9 +4299,8 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
        if (BTRFS_I(inode)->dummy_inode)
                return;
 
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
-       btrfs_set_trans_block_group(trans, inode);
 
        ret = btrfs_update_inode(trans, root, inode);
        if (ret && ret == -ENOSPC) {
@@ -4319,7 +4314,6 @@ void btrfs_dirty_inode(struct inode *inode, int flags)
                                       PTR_ERR(trans));
                        return;
                }
-               btrfs_set_trans_block_group(trans, inode);
 
                ret = btrfs_update_inode(trans, root, inode);
                if (ret) {
@@ -4418,8 +4412,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct inode *dir,
                                     const char *name, int name_len,
-                                    u64 ref_objectid, u64 objectid,
-                                    u64 alloc_hint, int mode, u64 *index)
+                                    u64 ref_objectid, u64 objectid, int mode,
+                                    u64 *index)
 {
        struct inode *inode;
        struct btrfs_inode_item *inode_item;
@@ -4472,8 +4466,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                owner = 0;
        else
                owner = 1;
-       BTRFS_I(inode)->block_group =
-                       btrfs_find_block_group(root, 0, alloc_hint, owner);
 
        key[0].objectid = objectid;
        btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
@@ -4629,15 +4621,13 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
-
        err = btrfs_find_free_ino(root, &objectid);
        if (err)
                goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
                                dentry->d_name.len, btrfs_ino(dir), objectid,
-                               BTRFS_I(dir)->block_group, mode, &index);
+                               mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -4649,7 +4639,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       btrfs_set_trans_block_group(trans, inode);
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
@@ -4658,8 +4647,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                init_special_inode(inode, inode->i_mode, rdev);
                btrfs_update_inode(trans, root, inode);
        }
-       btrfs_update_inode_block_group(trans, inode);
-       btrfs_update_inode_block_group(trans, dir);
 out_unlock:
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
@@ -4692,15 +4679,13 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
-
        err = btrfs_find_free_ino(root, &objectid);
        if (err)
                goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
                                dentry->d_name.len, btrfs_ino(dir), objectid,
-                               BTRFS_I(dir)->block_group, mode, &index);
+                               mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -4712,7 +4697,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       btrfs_set_trans_block_group(trans, inode);
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
@@ -4723,8 +4707,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                inode->i_op = &btrfs_file_inode_operations;
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
-       btrfs_update_inode_block_group(trans, inode);
-       btrfs_update_inode_block_group(trans, dir);
 out_unlock:
        nr = trans->blocks_used;
        btrfs_end_transaction_throttle(trans, root);
@@ -4771,8 +4753,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 
        btrfs_inc_nlink(inode);
        inode->i_ctime = CURRENT_TIME;
-
-       btrfs_set_trans_block_group(trans, dir);
        ihold(inode);
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
@@ -4781,7 +4761,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                drop_inode = 1;
        } else {
                struct dentry *parent = dget_parent(dentry);
-               btrfs_update_inode_block_group(trans, dir);
                err = btrfs_update_inode(trans, root, inode);
                BUG_ON(err);
                btrfs_log_new_name(trans, inode, NULL, parent);
@@ -4818,7 +4797,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        trans = btrfs_start_transaction(root, 5);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
-       btrfs_set_trans_block_group(trans, dir);
 
        err = btrfs_find_free_ino(root, &objectid);
        if (err)
@@ -4826,8 +4804,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
                                dentry->d_name.len, btrfs_ino(dir), objectid,
-                               BTRFS_I(dir)->block_group, S_IFDIR | mode,
-                               &index);
+                               S_IFDIR | mode, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_fail;
@@ -4841,7 +4818,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
        inode->i_op = &btrfs_dir_inode_operations;
        inode->i_fop = &btrfs_dir_file_operations;
-       btrfs_set_trans_block_group(trans, inode);
 
        btrfs_i_size_write(inode, 0);
        err = btrfs_update_inode(trans, root, inode);
@@ -4855,8 +4831,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 
        d_instantiate(dentry, inode);
        drop_on_err = 0;
-       btrfs_update_inode_block_group(trans, inode);
-       btrfs_update_inode_block_group(trans, dir);
 
 out_fail:
        nr = trans->blocks_used;
@@ -4989,7 +4963,15 @@ again:
 
        if (!path) {
                path = btrfs_alloc_path();
-               BUG_ON(!path);
+               if (!path) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               /*
+                * Chances are we'll be called again, so go ahead and do
+                * readahead
+                */
+               path->reada = 1;
        }
 
        ret = btrfs_lookup_file_extent(trans, root, path,
@@ -5130,8 +5112,10 @@ again:
                                kunmap(page);
                                free_extent_map(em);
                                em = NULL;
+
                                btrfs_release_path(path);
-                               trans = btrfs_join_transaction(root, 1);
+                               trans = btrfs_join_transaction(root);
+
                                if (IS_ERR(trans))
                                        return ERR_CAST(trans);
                                goto again;
@@ -5375,7 +5359,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
        }
 
-       trans = btrfs_join_transaction(root, 0);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return ERR_CAST(trans);
 
@@ -5611,7 +5595,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                 * to make sure the current transaction stays open
                 * while we look for nocow cross refs
                 */
-               trans = btrfs_join_transaction(root, 0);
+               trans = btrfs_join_transaction(root);
                if (IS_ERR(trans))
                        goto must_cow;
 
@@ -5750,7 +5734,7 @@ again:
 
        BUG_ON(!ordered);
 
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans)) {
                err = -ENOMEM;
                goto out;
@@ -6500,6 +6484,7 @@ out:
 static int btrfs_truncate(struct inode *inode)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_block_rsv *rsv;
        int ret;
        int err = 0;
        struct btrfs_trans_handle *trans;
@@ -6513,28 +6498,80 @@ static int btrfs_truncate(struct inode *inode)
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
        btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 
-       trans = btrfs_start_transaction(root, 5);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
+       /*
+        * Yes ladies and gentelment, this is indeed ugly.  The fact is we have
+        * 3 things going on here
+        *
+        * 1) We need to reserve space for our orphan item and the space to
+        * delete our orphan item.  Lord knows we don't want to have a dangling
+        * orphan item because we didn't reserve space to remove it.
+        *
+        * 2) We need to reserve space to update our inode.
+        *
+        * 3) We need to have something to cache all the space that is going to
+        * be free'd up by the truncate operation, but also have some slack
+        * space reserved in case it uses space during the truncate (thank you
+        * very much snapshotting).
+        *
+        * And we need these to all be seperate.  The fact is we can use alot of
+        * space doing the truncate, and we have no earthly idea how much space
+        * we will use, so we need the truncate reservation to be seperate so it
+        * doesn't end up using space reserved for updating the inode or
+        * removing the orphan item.  We also need to be able to stop the
+        * transaction and start a new one, which means we need to be able to
+        * update the inode several times, and we have no idea of knowing how
+        * many times that will be, so we can't just reserve 1 item for the
+        * entirety of the opration, so that has to be done seperately as well.
+        * Then there is the orphan item, which does indeed need to be held on
+        * to for the whole operation, and we need nobody to touch this reserved
+        * space except the orphan code.
+        *
+        * So that leaves us with
+        *
+        * 1) root->orphan_block_rsv - for the orphan deletion.
+        * 2) rsv - for the truncate reservation, which we will steal from the
+        * transaction reservation.
+        * 3) fs_info->trans_block_rsv - this will have 1 items worth left for
+        * updating the inode.
+        */
+       rsv = btrfs_alloc_block_rsv(root);
+       if (!rsv)
+               return -ENOMEM;
+       btrfs_add_durable_block_rsv(root->fs_info, rsv);
+
+       trans = btrfs_start_transaction(root, 4);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto out;
+       }
 
-       btrfs_set_trans_block_group(trans, inode);
+       /*
+        * Reserve space for the truncate process.  Truncate should be adding
+        * space, but if there are snapshots it may end up using space.
+        */
+       ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
+       BUG_ON(ret);
 
        ret = btrfs_orphan_add(trans, inode);
        if (ret) {
                btrfs_end_transaction(trans, root);
-               return ret;
+               goto out;
        }
 
        nr = trans->blocks_used;
        btrfs_end_transaction(trans, root);
        btrfs_btree_balance_dirty(root, nr);
 
-       /* Now start a transaction for the truncate */
-       trans = btrfs_start_transaction(root, 0);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
-       btrfs_set_trans_block_group(trans, inode);
-       trans->block_rsv = root->orphan_block_rsv;
+       /*
+        * Ok so we've already migrated our bytes over for the truncate, so here
+        * just reserve the one slot we need for updating the inode.
+        */
+       trans = btrfs_start_transaction(root, 1);
+       if (IS_ERR(trans)) {
+               err = PTR_ERR(trans);
+               goto out;
+       }
+       trans->block_rsv = rsv;
 
        /*
         * setattr is responsible for setting the ordered_data_close flag,
@@ -6558,24 +6595,17 @@ static int btrfs_truncate(struct inode *inode)
 
        while (1) {
                if (!trans) {
-                       trans = btrfs_start_transaction(root, 0);
-                       if (IS_ERR(trans))
-                               return PTR_ERR(trans);
-                       btrfs_set_trans_block_group(trans, inode);
-                       trans->block_rsv = root->orphan_block_rsv;
-               }
+                       trans = btrfs_start_transaction(root, 3);
+                       if (IS_ERR(trans)) {
+                               err = PTR_ERR(trans);
+                               goto out;
+                       }
 
-               ret = btrfs_block_rsv_check(trans, root,
-                                           root->orphan_block_rsv, 0, 5);
-               if (ret == -EAGAIN) {
-                       ret = btrfs_commit_transaction(trans, root);
-                       if (ret)
-                               return ret;
-                       trans = NULL;
-                       continue;
-               } else if (ret) {
-                       err = ret;
-                       break;
+                       ret = btrfs_truncate_reserve_metadata(trans, root,
+                                                             rsv);
+                       BUG_ON(ret);
+
+                       trans->block_rsv = rsv;
                }
 
                ret = btrfs_truncate_inode_items(trans, root, inode,
@@ -6586,6 +6616,7 @@ static int btrfs_truncate(struct inode *inode)
                        break;
                }
 
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
                ret = btrfs_update_inode(trans, root, inode);
                if (ret) {
                        err = ret;
@@ -6599,6 +6630,7 @@ static int btrfs_truncate(struct inode *inode)
        }
 
        if (ret == 0 && inode->i_nlink > 0) {
+               trans->block_rsv = root->orphan_block_rsv;
                ret = btrfs_orphan_del(trans, inode);
                if (ret)
                        err = ret;
@@ -6610,15 +6642,20 @@ static int btrfs_truncate(struct inode *inode)
                ret = btrfs_orphan_del(NULL, inode);
        }
 
+       trans->block_rsv = &root->fs_info->trans_block_rsv;
        ret = btrfs_update_inode(trans, root, inode);
        if (ret && !err)
                err = ret;
 
        nr = trans->blocks_used;
        ret = btrfs_end_transaction_throttle(trans, root);
+       btrfs_btree_balance_dirty(root, nr);
+
+out:
+       btrfs_free_block_rsv(root, rsv);
+
        if (ret && !err)
                err = ret;
-       btrfs_btree_balance_dirty(root, nr);
 
        return err;
 }
@@ -6627,15 +6664,14 @@ static int btrfs_truncate(struct inode *inode)
  * create a new subvolume directory/inode (helper for the ioctl).
  */
 int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *new_root,
-                            u64 new_dirid, u64 alloc_hint)
+                            struct btrfs_root *new_root, u64 new_dirid)
 {
        struct inode *inode;
        int err;
        u64 index = 0;
 
        inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
-                               new_dirid, alloc_hint, S_IFDIR | 0700, &index);
+                               new_dirid, S_IFDIR | 0700, &index);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
        inode->i_op = &btrfs_dir_inode_operations;
@@ -6748,21 +6784,6 @@ void btrfs_destroy_inode(struct inode *inode)
                spin_unlock(&root->fs_info->ordered_extent_lock);
        }
 
-       if (root == root->fs_info->tree_root) {
-               struct btrfs_block_group_cache *block_group;
-
-               block_group = btrfs_lookup_block_group(root->fs_info,
-                                               BTRFS_I(inode)->block_group);
-               if (block_group && block_group->inode == inode) {
-                       spin_lock(&block_group->lock);
-                       block_group->inode = NULL;
-                       spin_unlock(&block_group->lock);
-                       btrfs_put_block_group(block_group);
-               } else if (block_group) {
-                       btrfs_put_block_group(block_group);
-               }
-       }
-
        spin_lock(&root->orphan_lock);
        if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
                printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
@@ -6948,8 +6969,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                 goto out_notrans;
         }
 
-       btrfs_set_trans_block_group(trans, new_dir);
-
        if (dest != root)
                btrfs_record_root_in_trans(trans, dest);
 
@@ -7131,16 +7150,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, dir);
-
        err = btrfs_find_free_ino(root, &objectid);
        if (err)
                goto out_unlock;
 
        inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
                                dentry->d_name.len, btrfs_ino(dir), objectid,
-                               BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
-                               &index);
+                               S_IFLNK|S_IRWXUGO, &index);
        if (IS_ERR(inode)) {
                err = PTR_ERR(inode);
                goto out_unlock;
@@ -7152,7 +7168,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       btrfs_set_trans_block_group(trans, inode);
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
                drop_inode = 1;
@@ -7163,8 +7178,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                inode->i_op = &btrfs_file_inode_operations;
                BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
        }
-       btrfs_update_inode_block_group(trans, inode);
-       btrfs_update_inode_block_group(trans, dir);
        if (drop_inode)
                goto out_unlock;
 
index 85e818c..ac37040 100644 (file)
@@ -243,7 +243,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
                ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);
        }
 
-       trans = btrfs_join_transaction(root, 1);
+       trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
 
        ret = btrfs_update_inode(trans, root, inode);
@@ -414,8 +414,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 
        btrfs_record_root_in_trans(trans, new_root);
 
-       ret = btrfs_create_subvol_root(trans, new_root, new_dirid,
-                                      BTRFS_I(dir)->block_group);
+       ret = btrfs_create_subvol_root(trans, new_root, new_dirid);
        /*
         * insert the directory item
         */
@@ -707,16 +706,17 @@ static int find_new_extents(struct btrfs_root *root,
        struct btrfs_file_extent_item *extent;
        int type;
        int ret;
+       u64 ino = btrfs_ino(inode);
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
-       min_key.objectid = inode->i_ino;
+       min_key.objectid = ino;
        min_key.type = BTRFS_EXTENT_DATA_KEY;
        min_key.offset = *off;
 
-       max_key.objectid = inode->i_ino;
+       max_key.objectid = ino;
        max_key.type = (u8)-1;
        max_key.offset = (u64)-1;
 
@@ -727,7 +727,7 @@ static int find_new_extents(struct btrfs_root *root,
                                           path, 0, newer_than);
                if (ret != 0)
                        goto none;
-               if (min_key.objectid != inode->i_ino)
+               if (min_key.objectid != ino)
                        goto none;
                if (min_key.type != BTRFS_EXTENT_DATA_KEY)
                        goto none;
@@ -2489,12 +2489,10 @@ static long btrfs_ioctl_trans_start(struct file *file)
        if (ret)
                goto out;
 
-       mutex_lock(&root->fs_info->trans_mutex);
-       root->fs_info->open_ioctl_trans++;
-       mutex_unlock(&root->fs_info->trans_mutex);
+       atomic_inc(&root->fs_info->open_ioctl_trans);
 
        ret = -ENOMEM;
-       trans = btrfs_start_ioctl_transaction(root, 0);
+       trans = btrfs_start_ioctl_transaction(root);
        if (IS_ERR(trans))
                goto out_drop;
 
@@ -2502,9 +2500,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
        return 0;
 
 out_drop:
-       mutex_lock(&root->fs_info->trans_mutex);
-       root->fs_info->open_ioctl_trans--;
-       mutex_unlock(&root->fs_info->trans_mutex);
+       atomic_dec(&root->fs_info->open_ioctl_trans);
        mnt_drop_write(file->f_path.mnt);
 out:
        return ret;
@@ -2738,9 +2734,7 @@ long btrfs_ioctl_trans_end(struct file *file)
 
        btrfs_end_transaction(trans, root);
 
-       mutex_lock(&root->fs_info->trans_mutex);
-       root->fs_info->open_ioctl_trans--;
-       mutex_unlock(&root->fs_info->trans_mutex);
+       atomic_dec(&root->fs_info->open_ioctl_trans);
 
        mnt_drop_write(file->f_path.mnt);
        return 0;
index ca38eca..b1ef27c 100644 (file)
@@ -677,6 +677,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
                err = -ENOMEM;
                goto out;
        }
+       path1->reada = 1;
+       path2->reada = 2;
 
        node = alloc_backref_node(cache);
        if (!node) {
@@ -1999,6 +2001,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
+       path->reada = 1;
 
        reloc_root = root->reloc_root;
        root_item = &reloc_root->root_item;
@@ -2139,10 +2142,10 @@ int prepare_to_merge(struct reloc_control *rc, int err)
        u64 num_bytes = 0;
        int ret;
 
-       mutex_lock(&root->fs_info->trans_mutex);
+       spin_lock(&root->fs_info->trans_lock);
        rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
        rc->merging_rsv_size += rc->nodes_relocated * 2;
-       mutex_unlock(&root->fs_info->trans_mutex);
+       spin_unlock(&root->fs_info->trans_lock);
 again:
        if (!err) {
                num_bytes = rc->merging_rsv_size;
@@ -2152,7 +2155,7 @@ again:
                        err = ret;
        }
 
-       trans = btrfs_join_transaction(rc->extent_root, 1);
+       trans = btrfs_join_transaction(rc->extent_root);
        if (IS_ERR(trans)) {
                if (!err)
                        btrfs_block_rsv_release(rc->extent_root,
@@ -2211,9 +2214,9 @@ int merge_reloc_roots(struct reloc_control *rc)
        int ret;
 again:
        root = rc->extent_root;
-       mutex_lock(&root->fs_info->trans_mutex);
+       spin_lock(&root->fs_info->trans_lock);
        list_splice_init(&rc->reloc_roots, &reloc_roots);
-       mutex_unlock(&root->fs_info->trans_mutex);
+       spin_unlock(&root->fs_info->trans_lock);
 
        while (!list_empty(&reloc_roots)) {
                found = 1;
@@ -3236,7 +3239,7 @@ truncate:
                goto out;
        }
 
-       trans = btrfs_join_transaction(root, 0);
+       trans = btrfs_join_transaction(root);
        if (IS_ERR(trans)) {
                btrfs_free_path(path);
                ret = PTR_ERR(trans);
@@ -3300,6 +3303,7 @@ static int find_data_references(struct reloc_control *rc,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
+       path->reada = 1;
 
        root = read_fs_root(rc->extent_root->fs_info, ref_root);
        if (IS_ERR(root)) {
@@ -3586,17 +3590,17 @@ next:
 static void set_reloc_control(struct reloc_control *rc)
 {
        struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-       mutex_lock(&fs_info->trans_mutex);
+       spin_lock(&fs_info->trans_lock);
        fs_info->reloc_ctl = rc;
-       mutex_unlock(&fs_info->trans_mutex);
+       spin_unlock(&fs_info->trans_lock);
 }
 
 static void unset_reloc_control(struct reloc_control *rc)
 {
        struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
-       mutex_lock(&fs_info->trans_mutex);
+       spin_lock(&fs_info->trans_lock);
        fs_info->reloc_ctl = NULL;
-       mutex_unlock(&fs_info->trans_mutex);
+       spin_unlock(&fs_info->trans_lock);
 }
 
 static int check_extent_flags(u64 flags)
@@ -3645,7 +3649,7 @@ int prepare_to_relocate(struct reloc_control *rc)
        rc->create_reloc_tree = 1;
        set_reloc_control(rc);
 
-       trans = btrfs_join_transaction(rc->extent_root, 1);
+       trans = btrfs_join_transaction(rc->extent_root);
        BUG_ON(IS_ERR(trans));
        btrfs_commit_transaction(trans, rc->extent_root);
        return 0;
@@ -3668,6 +3672,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
+       path->reada = 1;
 
        ret = prepare_to_relocate(rc);
        if (ret) {
@@ -3834,7 +3839,7 @@ restart:
        btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
 
        /* get rid of pinned extents */
-       trans = btrfs_join_transaction(rc->extent_root, 1);
+       trans = btrfs_join_transaction(rc->extent_root);
        if (IS_ERR(trans))
                err = PTR_ERR(trans);
        else
@@ -4093,6 +4098,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
+       path->reada = -1;
 
        key.objectid = BTRFS_TREE_RELOC_OBJECTID;
        key.type = BTRFS_ROOT_ITEM_KEY;
@@ -4159,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
 
        set_reloc_control(rc);
 
-       trans = btrfs_join_transaction(rc->extent_root, 1);
+       trans = btrfs_join_transaction(rc->extent_root);
        if (IS_ERR(trans)) {
                unset_reloc_control(rc);
                err = PTR_ERR(trans);
@@ -4193,7 +4199,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
 
        unset_reloc_control(rc);
 
-       trans = btrfs_join_transaction(rc->extent_root, 1);
+       trans = btrfs_join_transaction(rc->extent_root);
        if (IS_ERR(trans))
                err = PTR_ERR(trans);
        else
index 6dfed0c..df50fd1 100644 (file)
@@ -117,33 +117,37 @@ static void scrub_free_csums(struct scrub_dev *sdev)
        }
 }
 
+static void scrub_free_bio(struct bio *bio)
+{
+       int i;
+       struct page *last_page = NULL;
+
+       if (!bio)
+               return;
+
+       for (i = 0; i < bio->bi_vcnt; ++i) {
+               if (bio->bi_io_vec[i].bv_page == last_page)
+                       continue;
+               last_page = bio->bi_io_vec[i].bv_page;
+               __free_page(last_page);
+       }
+       bio_put(bio);
+}
+
 static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev)
 {
        int i;
-       int j;
-       struct page *last_page;
 
        if (!sdev)
                return;
 
        for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
                struct scrub_bio *sbio = sdev->bios[i];
-               struct bio *bio;
 
                if (!sbio)
                        break;
 
-               bio = sbio->bio;
-               if (bio) {
-                       last_page = NULL;
-                       for (j = 0; j < bio->bi_vcnt; ++j) {
-                               if (bio->bi_io_vec[j].bv_page == last_page)
-                                       continue;
-                               last_page = bio->bi_io_vec[j].bv_page;
-                               __free_page(last_page);
-                       }
-                       bio_put(bio);
-               }
+               scrub_free_bio(sbio->bio);
                kfree(sbio);
        }
 
@@ -156,8 +160,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
 {
        struct scrub_dev *sdev;
        int             i;
-       int             j;
-       int             ret;
        struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
 
        sdev = kzalloc(sizeof(*sdev), GFP_NOFS);
@@ -165,7 +167,6 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
                goto nomem;
        sdev->dev = dev;
        for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) {
-               struct bio *bio;
                struct scrub_bio *sbio;
 
                sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
@@ -173,32 +174,10 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
                        goto nomem;
                sdev->bios[i] = sbio;
 
-               bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
-               if (!bio)
-                       goto nomem;
-
                sbio->index = i;
                sbio->sdev = sdev;
-               sbio->bio = bio;
                sbio->count = 0;
                sbio->work.func = scrub_checksum;
-               bio->bi_private = sdev->bios[i];
-               bio->bi_end_io = scrub_bio_end_io;
-               bio->bi_sector = 0;
-               bio->bi_bdev = dev->bdev;
-               bio->bi_size = 0;
-
-               for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) {
-                       struct page *page;
-                       page = alloc_page(GFP_NOFS);
-                       if (!page)
-                               goto nomem;
-
-                       ret = bio_add_page(bio, page, PAGE_SIZE, 0);
-                       if (!ret)
-                               goto nomem;
-               }
-               WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO);
 
                if (i != SCRUB_BIOS_PER_DEV-1)
                        sdev->bios[i]->next_free = i + 1;
@@ -369,9 +348,6 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
        int ret;
        DECLARE_COMPLETION_ONSTACK(complete);
 
-       /* we are going to wait on this IO */
-       rw |= REQ_SYNC;
-
        bio = bio_alloc(GFP_NOFS, 1);
        bio->bi_bdev = bdev;
        bio->bi_sector = sector;
@@ -380,6 +356,7 @@ static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
        bio->bi_private = &complete;
        submit_bio(rw, bio);
 
+       /* this will also unplug the queue */
        wait_for_completion(&complete);
 
        ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -394,6 +371,7 @@ static void scrub_bio_end_io(struct bio *bio, int err)
        struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
 
        sbio->err = err;
+       sbio->bio = bio;
 
        btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work);
 }
@@ -453,6 +431,8 @@ static void scrub_checksum(struct btrfs_work *work)
        }
 
 out:
+       scrub_free_bio(sbio->bio);
+       sbio->bio = NULL;
        spin_lock(&sdev->list_lock);
        sbio->next_free = sdev->first_free;
        sdev->first_free = sbio->index;
@@ -583,25 +563,50 @@ static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer)
 static int scrub_submit(struct scrub_dev *sdev)
 {
        struct scrub_bio *sbio;
+       struct bio *bio;
+       int i;
 
        if (sdev->curr == -1)
                return 0;
 
        sbio = sdev->bios[sdev->curr];
 
-       sbio->bio->bi_sector = sbio->physical >> 9;
-       sbio->bio->bi_size = sbio->count * PAGE_SIZE;
-       sbio->bio->bi_next = NULL;
-       sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
-       sbio->bio->bi_comp_cpu = -1;
-       sbio->bio->bi_bdev = sdev->dev->bdev;
+       bio = bio_alloc(GFP_NOFS, sbio->count);
+       if (!bio)
+               goto nomem;
+
+       bio->bi_private = sbio;
+       bio->bi_end_io = scrub_bio_end_io;
+       bio->bi_bdev = sdev->dev->bdev;
+       bio->bi_sector = sbio->physical >> 9;
+
+       for (i = 0; i < sbio->count; ++i) {
+               struct page *page;
+               int ret;
+
+               page = alloc_page(GFP_NOFS);
+               if (!page)
+                       goto nomem;
+
+               ret = bio_add_page(bio, page, PAGE_SIZE, 0);
+               if (!ret) {
+                       __free_page(page);
+                       goto nomem;
+               }
+       }
+
        sbio->err = 0;
        sdev->curr = -1;
        atomic_inc(&sdev->in_flight);
 
-       submit_bio(0, sbio->bio);
+       submit_bio(READ, bio);
 
        return 0;
+
+nomem:
+       scrub_free_bio(bio);
+
+       return -ENOMEM;
 }
 
 static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
@@ -633,7 +638,11 @@ again:
                sbio->logical = logical;
        } else if (sbio->physical + sbio->count * PAGE_SIZE != physical ||
                   sbio->logical + sbio->count * PAGE_SIZE != logical) {
-               scrub_submit(sdev);
+               int ret;
+
+               ret = scrub_submit(sdev);
+               if (ret)
+                       return ret;
                goto again;
        }
        sbio->spag[sbio->count].flags = flags;
@@ -645,8 +654,13 @@ again:
                memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size);
        }
        ++sbio->count;
-       if (sbio->count == SCRUB_PAGES_PER_BIO || force)
-               scrub_submit(sdev);
+       if (sbio->count == SCRUB_PAGES_PER_BIO || force) {
+               int ret;
+
+               ret = scrub_submit(sdev);
+               if (ret)
+                       return ret;
+       }
 
        return 0;
 }
@@ -727,6 +741,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
        struct btrfs_root *root = fs_info->extent_root;
        struct btrfs_root *csum_root = fs_info->csum_root;
        struct btrfs_extent_item *extent;
+       struct blk_plug plug;
        u64 flags;
        int ret;
        int slot;
@@ -831,6 +846,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
         * the scrub. This might currently (crc32) end up to be about 1MB
         */
        start_stripe = 0;
+       blk_start_plug(&plug);
 again:
        logical = base + offset + start_stripe * increment;
        for (i = start_stripe; i < nstripes; ++i) {
@@ -972,6 +988,7 @@ next:
        scrub_submit(sdev);
 
 out:
+       blk_finish_plug(&plug);
        btrfs_free_path(path);
        return ret < 0 ? ret : 0;
 }
@@ -1166,7 +1183,7 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
        int ret;
        struct btrfs_device *dev;
 
-       if (root->fs_info->closing)
+       if (btrfs_fs_closing(root->fs_info))
                return -EINVAL;
 
        /*
index 9b2e7e5..117e74e 100644 (file)
@@ -161,7 +161,8 @@ enum {
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
        Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-       Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_err,
+       Opt_enospc_debug, Opt_subvolrootid, Opt_defrag,
+       Opt_inode_cache, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -193,6 +194,7 @@ static match_table_t tokens = {
        {Opt_enospc_debug, "enospc_debug"},
        {Opt_subvolrootid, "subvolrootid=%d"},
        {Opt_defrag, "autodefrag"},
+       {Opt_inode_cache, "inode_cache"},
        {Opt_err, NULL},
 };
 
@@ -361,6 +363,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        printk(KERN_INFO "btrfs: enabling disk space caching\n");
                        btrfs_set_opt(info->mount_opt, SPACE_CACHE);
                        break;
+               case Opt_inode_cache:
+                       printk(KERN_INFO "btrfs: enabling inode map caching\n");
+                       btrfs_set_opt(info->mount_opt, INODE_MAP_CACHE);
+                       break;
                case Opt_clear_cache:
                        printk(KERN_INFO "btrfs: force clearing of disk cache\n");
                        btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
index dc80f71..dd71966 100644 (file)
@@ -35,6 +35,7 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
 {
        WARN_ON(atomic_read(&transaction->use_count) == 0);
        if (atomic_dec_and_test(&transaction->use_count)) {
+               BUG_ON(!list_empty(&transaction->list));
                memset(transaction, 0, sizeof(*transaction));
                kmem_cache_free(btrfs_transaction_cachep, transaction);
        }
@@ -49,46 +50,72 @@ static noinline void switch_commit_root(struct btrfs_root *root)
 /*
  * either allocate a new transaction or hop into the existing one
  */
-static noinline int join_transaction(struct btrfs_root *root)
+static noinline int join_transaction(struct btrfs_root *root, int nofail)
 {
        struct btrfs_transaction *cur_trans;
+
+       spin_lock(&root->fs_info->trans_lock);
+       if (root->fs_info->trans_no_join) {
+               if (!nofail) {
+                       spin_unlock(&root->fs_info->trans_lock);
+                       return -EBUSY;
+               }
+       }
+
        cur_trans = root->fs_info->running_transaction;
-       if (!cur_trans) {
-               cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
-                                            GFP_NOFS);
-               if (!cur_trans)
-                       return -ENOMEM;
-               root->fs_info->generation++;
-               atomic_set(&cur_trans->num_writers, 1);
-               cur_trans->num_joined = 0;
-               cur_trans->transid = root->fs_info->generation;
-               init_waitqueue_head(&cur_trans->writer_wait);
-               init_waitqueue_head(&cur_trans->commit_wait);
-               cur_trans->in_commit = 0;
-               cur_trans->blocked = 0;
-               atomic_set(&cur_trans->use_count, 1);
-               cur_trans->commit_done = 0;
-               cur_trans->start_time = get_seconds();
-
-               cur_trans->delayed_refs.root = RB_ROOT;
-               cur_trans->delayed_refs.num_entries = 0;
-               cur_trans->delayed_refs.num_heads_ready = 0;
-               cur_trans->delayed_refs.num_heads = 0;
-               cur_trans->delayed_refs.flushing = 0;
-               cur_trans->delayed_refs.run_delayed_start = 0;
-               spin_lock_init(&cur_trans->delayed_refs.lock);
-
-               INIT_LIST_HEAD(&cur_trans->pending_snapshots);
-               list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
-               extent_io_tree_init(&cur_trans->dirty_pages,
-                                    root->fs_info->btree_inode->i_mapping);
-               spin_lock(&root->fs_info->new_trans_lock);
-               root->fs_info->running_transaction = cur_trans;
-               spin_unlock(&root->fs_info->new_trans_lock);
-       } else {
+       if (cur_trans) {
+               atomic_inc(&cur_trans->use_count);
                atomic_inc(&cur_trans->num_writers);
                cur_trans->num_joined++;
+               spin_unlock(&root->fs_info->trans_lock);
+               return 0;
        }
+       spin_unlock(&root->fs_info->trans_lock);
+
+       cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS);
+       if (!cur_trans)
+               return -ENOMEM;
+       spin_lock(&root->fs_info->trans_lock);
+       if (root->fs_info->running_transaction) {
+               kmem_cache_free(btrfs_transaction_cachep, cur_trans);
+               cur_trans = root->fs_info->running_transaction;
+               atomic_inc(&cur_trans->use_count);
+               atomic_inc(&cur_trans->num_writers);
+               cur_trans->num_joined++;
+               spin_unlock(&root->fs_info->trans_lock);
+               return 0;
+       }
+       atomic_set(&cur_trans->num_writers, 1);
+       cur_trans->num_joined = 0;
+       init_waitqueue_head(&cur_trans->writer_wait);
+       init_waitqueue_head(&cur_trans->commit_wait);
+       cur_trans->in_commit = 0;
+       cur_trans->blocked = 0;
+       /*
+        * One for this trans handle, one so it will live on until we
+        * commit the transaction.
+        */
+       atomic_set(&cur_trans->use_count, 2);
+       cur_trans->commit_done = 0;
+       cur_trans->start_time = get_seconds();
+
+       cur_trans->delayed_refs.root = RB_ROOT;
+       cur_trans->delayed_refs.num_entries = 0;
+       cur_trans->delayed_refs.num_heads_ready = 0;
+       cur_trans->delayed_refs.num_heads = 0;
+       cur_trans->delayed_refs.flushing = 0;
+       cur_trans->delayed_refs.run_delayed_start = 0;
+       spin_lock_init(&cur_trans->commit_lock);
+       spin_lock_init(&cur_trans->delayed_refs.lock);
+
+       INIT_LIST_HEAD(&cur_trans->pending_snapshots);
+       list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
+       extent_io_tree_init(&cur_trans->dirty_pages,
+                            root->fs_info->btree_inode->i_mapping);
+       root->fs_info->generation++;
+       cur_trans->transid = root->fs_info->generation;
+       root->fs_info->running_transaction = cur_trans;
+       spin_unlock(&root->fs_info->trans_lock);
 
        return 0;
 }
@@ -99,39 +126,28 @@ static noinline int join_transaction(struct btrfs_root *root)
  * to make sure the old root from before we joined the transaction is deleted
  * when the transaction commits
  */
-static noinline int record_root_in_trans(struct btrfs_trans_handle *trans,
-                                        struct btrfs_root *root)
+int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+                              struct btrfs_root *root)
 {
        if (root->ref_cows && root->last_trans < trans->transid) {
                WARN_ON(root == root->fs_info->extent_root);
                WARN_ON(root->commit_root != root->node);
 
+               spin_lock(&root->fs_info->fs_roots_radix_lock);
+               if (root->last_trans == trans->transid) {
+                       spin_unlock(&root->fs_info->fs_roots_radix_lock);
+                       return 0;
+               }
+               root->last_trans = trans->transid;
                radix_tree_tag_set(&root->fs_info->fs_roots_radix,
                           (unsigned long)root->root_key.objectid,
                           BTRFS_ROOT_TRANS_TAG);
-               root->last_trans = trans->transid;
+               spin_unlock(&root->fs_info->fs_roots_radix_lock);
                btrfs_init_reloc_root(trans, root);
        }
        return 0;
 }
 
-int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
-                              struct btrfs_root *root)
-{
-       if (!root->ref_cows)
-               return 0;
-
-       mutex_lock(&root->fs_info->trans_mutex);
-       if (root->last_trans == trans->transid) {
-               mutex_unlock(&root->fs_info->trans_mutex);
-               return 0;
-       }
-
-       record_root_in_trans(trans, root);
-       mutex_unlock(&root->fs_info->trans_mutex);
-       return 0;
-}
-
 /* wait for commit against the current transaction to become unblocked
  * when this is done, it is safe to start a new transaction, but the current
  * transaction might not be fully on disk.
@@ -140,21 +156,23 @@ static void wait_current_trans(struct btrfs_root *root)
 {
        struct btrfs_transaction *cur_trans;
 
+       spin_lock(&root->fs_info->trans_lock);
        cur_trans = root->fs_info->running_transaction;
        if (cur_trans && cur_trans->blocked) {
                DEFINE_WAIT(wait);
                atomic_inc(&cur_trans->use_count);
+               spin_unlock(&root->fs_info->trans_lock);
                while (1) {
                        prepare_to_wait(&root->fs_info->transaction_wait, &wait,
                                        TASK_UNINTERRUPTIBLE);
                        if (!cur_trans->blocked)
                                break;
-                       mutex_unlock(&root->fs_info->trans_mutex);
                        schedule();
-                       mutex_lock(&root->fs_info->trans_mutex);
                }
                finish_wait(&root->fs_info->transaction_wait, &wait);
                put_transaction(cur_trans);
+       } else {
+               spin_unlock(&root->fs_info->trans_lock);
        }
 }
 
@@ -167,10 +185,16 @@ enum btrfs_trans_type {
 
 static int may_wait_transaction(struct btrfs_root *root, int type)
 {
-       if (!root->fs_info->log_root_recovering &&
-           ((type == TRANS_START && !root->fs_info->open_ioctl_trans) ||
-            type == TRANS_USERSPACE))
+       if (root->fs_info->log_root_recovering)
+               return 0;
+
+       if (type == TRANS_USERSPACE)
+               return 1;
+
+       if (type == TRANS_START &&
+           !atomic_read(&root->fs_info->open_ioctl_trans))
                return 1;
+
        return 0;
 }
 
@@ -184,36 +208,44 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 
        if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
                return ERR_PTR(-EROFS);
+
+       if (current->journal_info) {
+               WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK);
+               h = current->journal_info;
+               h->use_count++;
+               h->orig_rsv = h->block_rsv;
+               h->block_rsv = NULL;
+               goto got_it;
+       }
 again:
        h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
        if (!h)
                return ERR_PTR(-ENOMEM);
 
-       if (type != TRANS_JOIN_NOLOCK)
-               mutex_lock(&root->fs_info->trans_mutex);
        if (may_wait_transaction(root, type))
                wait_current_trans(root);
 
-       ret = join_transaction(root);
+       do {
+               ret = join_transaction(root, type == TRANS_JOIN_NOLOCK);
+               if (ret == -EBUSY)
+                       wait_current_trans(root);
+       } while (ret == -EBUSY);
+
        if (ret < 0) {
                kmem_cache_free(btrfs_trans_handle_cachep, h);
-               if (type != TRANS_JOIN_NOLOCK)
-                       mutex_unlock(&root->fs_info->trans_mutex);
                return ERR_PTR(ret);
        }
 
        cur_trans = root->fs_info->running_transaction;
-       atomic_inc(&cur_trans->use_count);
-       if (type != TRANS_JOIN_NOLOCK)
-               mutex_unlock(&root->fs_info->trans_mutex);
 
        h->transid = cur_trans->transid;
        h->transaction = cur_trans;
        h->blocks_used = 0;
-       h->block_group = 0;
        h->bytes_reserved = 0;
        h->delayed_ref_updates = 0;
+       h->use_count = 1;
        h->block_rsv = NULL;
+       h->orig_rsv = NULL;
 
        smp_mb();
        if (cur_trans->blocked && may_wait_transaction(root, type)) {
@@ -241,11 +273,8 @@ again:
                }
        }
 
-       if (type != TRANS_JOIN_NOLOCK)
-               mutex_lock(&root->fs_info->trans_mutex);
-       record_root_in_trans(h, root);
-       if (type != TRANS_JOIN_NOLOCK)
-               mutex_unlock(&root->fs_info->trans_mutex);
+got_it:
+       btrfs_record_root_in_trans(h, root);
 
        if (!current->journal_info && type != TRANS_USERSPACE)
                current->journal_info = h;
@@ -257,22 +286,19 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 {
        return start_transaction(root, num_items, TRANS_START);
 }
-struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
-                                                  int num_blocks)
+struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root)
 {
        return start_transaction(root, 0, TRANS_JOIN);
 }
 
-struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
-                                                         int num_blocks)
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root)
 {
        return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
 }
 
-struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
-                                                        int num_blocks)
+struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root)
 {
-       return start_transaction(r, 0, TRANS_USERSPACE);
+       return start_transaction(root, 0, TRANS_USERSPACE);
 }
 
 /* wait for a transaction commit to be fully complete */
@@ -280,17 +306,13 @@ static noinline int wait_for_commit(struct btrfs_root *root,
                                    struct btrfs_transaction *commit)
 {
        DEFINE_WAIT(wait);
-       mutex_lock(&root->fs_info->trans_mutex);
        while (!commit->commit_done) {
                prepare_to_wait(&commit->commit_wait, &wait,
                                TASK_UNINTERRUPTIBLE);
                if (commit->commit_done)
                        break;
-               mutex_unlock(&root->fs_info->trans_mutex);
                schedule();
-               mutex_lock(&root->fs_info->trans_mutex);
        }
-       mutex_unlock(&root->fs_info->trans_mutex);
        finish_wait(&commit->commit_wait, &wait);
        return 0;
 }
@@ -300,59 +322,56 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
        struct btrfs_transaction *cur_trans = NULL, *t;
        int ret;
 
-       mutex_lock(&root->fs_info->trans_mutex);
-
        ret = 0;
        if (transid) {
                if (transid <= root->fs_info->last_trans_committed)
-                       goto out_unlock;
+                       goto out;
 
                /* find specified transaction */
+               spin_lock(&root->fs_info->trans_lock);
                list_for_each_entry(t, &root->fs_info->trans_list, list) {
                        if (t->transid == transid) {
                                cur_trans = t;
+                               atomic_inc(&cur_trans->use_count);
                                break;
                        }
                        if (t->transid > transid)
                                break;
                }
+               spin_unlock(&root->fs_info->trans_lock);
                ret = -EINVAL;
                if (!cur_trans)
-                       goto out_unlock;  /* bad transid */
+                       goto out;  /* bad transid */
        } else {
                /* find newest transaction that is committing | committed */
+               spin_lock(&root->fs_info->trans_lock);
                list_for_each_entry_reverse(t, &root->fs_info->trans_list,
                                            list) {
                        if (t->in_commit) {
                                if (t->commit_done)
-                                       goto out_unlock;
+                                       goto out;
                                cur_trans = t;
+                               atomic_inc(&cur_trans->use_count);
                                break;
                        }
                }
+               spin_unlock(&root->fs_info->trans_lock);
                if (!cur_trans)
-                       goto out_unlock;  /* nothing committing|committed */
+                       goto out;  /* nothing committing|committed */
        }
 
-       atomic_inc(&cur_trans->use_count);
-       mutex_unlock(&root->fs_info->trans_mutex);
-
        wait_for_commit(root, cur_trans);
 
-       mutex_lock(&root->fs_info->trans_mutex);
        put_transaction(cur_trans);
        ret = 0;
-out_unlock:
-       mutex_unlock(&root->fs_info->trans_mutex);
+out:
        return ret;
 }
 
 void btrfs_throttle(struct btrfs_root *root)
 {
-       mutex_lock(&root->fs_info->trans_mutex);
-       if (!root->fs_info->open_ioctl_trans)
+       if (!atomic_read(&root->fs_info->open_ioctl_trans))
                wait_current_trans(root);
-       mutex_unlock(&root->fs_info->trans_mutex);
 }
 
 static int should_end_transaction(struct btrfs_trans_handle *trans,
@@ -370,6 +389,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
        struct btrfs_transaction *cur_trans = trans->transaction;
        int updates;
 
+       smp_mb();
        if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
                return 1;
 
@@ -388,6 +408,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
        struct btrfs_fs_info *info = root->fs_info;
        int count = 0;
 
+       if (--trans->use_count) {
+               trans->block_rsv = trans->orig_rsv;
+               return 0;
+       }
+
        while (count < 4) {
                unsigned long cur = trans->delayed_ref_updates;
                trans->delayed_ref_updates = 0;
@@ -410,9 +435,11 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
        btrfs_trans_release_metadata(trans, root);
 
-       if (lock && !root->fs_info->open_ioctl_trans &&
-           should_end_transaction(trans, root))
+       if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
+           should_end_transaction(trans, root)) {
                trans->transaction->blocked = 1;
+               smp_wmb();
+       }
 
        if (lock && cur_trans->blocked && !cur_trans->in_commit) {
                if (throttle)
@@ -703,9 +730,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
  */
 int btrfs_add_dead_root(struct btrfs_root *root)
 {
-       mutex_lock(&root->fs_info->trans_mutex);
+       spin_lock(&root->fs_info->trans_lock);
        list_add(&root->root_list, &root->fs_info->dead_roots);
-       mutex_unlock(&root->fs_info->trans_mutex);
+       spin_unlock(&root->fs_info->trans_lock);
        return 0;
 }
 
@@ -721,6 +748,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
        int ret;
        int err = 0;
 
+       spin_lock(&fs_info->fs_roots_radix_lock);
        while (1) {
                ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
                                                 (void **)gang, 0,
@@ -733,6 +761,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
                        radix_tree_tag_clear(&fs_info->fs_roots_radix,
                                        (unsigned long)root->root_key.objectid,
                                        BTRFS_ROOT_TRANS_TAG);
+                       spin_unlock(&fs_info->fs_roots_radix_lock);
 
                        btrfs_free_log(trans, root);
                        btrfs_update_reloc_root(trans, root);
@@ -753,10 +782,12 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
                        err = btrfs_update_root(trans, fs_info->tree_root,
                                                &root->root_key,
                                                &root->root_item);
+                       spin_lock(&fs_info->fs_roots_radix_lock);
                        if (err)
                                break;
                }
        }
+       spin_unlock(&fs_info->fs_roots_radix_lock);
        return err;
 }
 
@@ -786,7 +817,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
                btrfs_btree_balance_dirty(info->tree_root, nr);
                cond_resched();
 
-               if (root->fs_info->closing || ret != -EAGAIN)
+               if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
                        break;
        }
        root->defrag_running = 0;
@@ -851,7 +882,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        parent = dget_parent(dentry);
        parent_inode = parent->d_inode;
        parent_root = BTRFS_I(parent_inode)->root;
-       record_root_in_trans(trans, parent_root);
+       btrfs_record_root_in_trans(trans, parent_root);
 
        /*
         * insert the directory item
@@ -869,7 +900,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        ret = btrfs_update_inode(trans, parent_root, parent_inode);
        BUG_ON(ret);
 
-       record_root_in_trans(trans, root);
+       btrfs_record_root_in_trans(trans, root);
        btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
        memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
        btrfs_check_and_init_root_item(new_root_item);
@@ -967,20 +998,20 @@ static void update_super_roots(struct btrfs_root *root)
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
 {
        int ret = 0;
-       spin_lock(&info->new_trans_lock);
+       spin_lock(&info->trans_lock);
        if (info->running_transaction)
                ret = info->running_transaction->in_commit;
-       spin_unlock(&info->new_trans_lock);
+       spin_unlock(&info->trans_lock);
        return ret;
 }
 
 int btrfs_transaction_blocked(struct btrfs_fs_info *info)
 {
        int ret = 0;
-       spin_lock(&info->new_trans_lock);
+       spin_lock(&info->trans_lock);
        if (info->running_transaction)
                ret = info->running_transaction->blocked;
-       spin_unlock(&info->new_trans_lock);
+       spin_unlock(&info->trans_lock);
        return ret;
 }
 
@@ -1004,9 +1035,7 @@ static void wait_current_trans_commit_start(struct btrfs_root *root,
                                    &wait);
                        break;
                }
-               mutex_unlock(&root->fs_info->trans_mutex);
                schedule();
-               mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
        }
 }
@@ -1032,9 +1061,7 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
                                    &wait);
                        break;
                }
-               mutex_unlock(&root->fs_info->trans_mutex);
                schedule();
-               mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&root->fs_info->transaction_wait,
                            &wait);
        }
@@ -1072,7 +1099,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
 
        INIT_DELAYED_WORK(&ac->work, do_async_commit);
        ac->root = root;
-       ac->newtrans = btrfs_join_transaction(root, 0);
+       ac->newtrans = btrfs_join_transaction(root);
        if (IS_ERR(ac->newtrans)) {
                int err = PTR_ERR(ac->newtrans);
                kfree(ac);
@@ -1080,22 +1107,18 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
        }
 
        /* take transaction reference */
-       mutex_lock(&root->fs_info->trans_mutex);
        cur_trans = trans->transaction;
        atomic_inc(&cur_trans->use_count);
-       mutex_unlock(&root->fs_info->trans_mutex);
 
        btrfs_end_transaction(trans, root);
        schedule_delayed_work(&ac->work, 0);
 
        /* wait for transaction to start and unblock */
-       mutex_lock(&root->fs_info->trans_mutex);
        if (wait_for_unblock)
                wait_current_trans_commit_start_and_unblock(root, cur_trans);
        else
                wait_current_trans_commit_start(root, cur_trans);
        put_transaction(cur_trans);
-       mutex_unlock(&root->fs_info->trans_mutex);
 
        return 0;
 }
@@ -1139,38 +1162,41 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        ret = btrfs_run_delayed_refs(trans, root, 0);
        BUG_ON(ret);
 
-       mutex_lock(&root->fs_info->trans_mutex);
+       spin_lock(&cur_trans->commit_lock);
        if (cur_trans->in_commit) {
+               spin_unlock(&cur_trans->commit_lock);
                atomic_inc(&cur_trans->use_count);
-               mutex_unlock(&root->fs_info->trans_mutex);
                btrfs_end_transaction(trans, root);
 
                ret = wait_for_commit(root, cur_trans);
                BUG_ON(ret);
 
-               mutex_lock(&root->fs_info->trans_mutex);
                put_transaction(cur_trans);
-               mutex_unlock(&root->fs_info->trans_mutex);
 
                return 0;
        }
 
        trans->transaction->in_commit = 1;
        trans->transaction->blocked = 1;
+       spin_unlock(&cur_trans->commit_lock);
        wake_up(&root->fs_info->transaction_blocked_wait);
 
+       spin_lock(&root->fs_info->trans_lock);
        if (cur_trans->list.prev != &root->fs_info->trans_list) {
                prev_trans = list_entry(cur_trans->list.prev,
                                        struct btrfs_transaction, list);
                if (!prev_trans->commit_done) {
                        atomic_inc(&prev_trans->use_count);
-                       mutex_unlock(&root->fs_info->trans_mutex);
+                       spin_unlock(&root->fs_info->trans_lock);
 
                        wait_for_commit(root, prev_trans);
 
-                       mutex_lock(&root->fs_info->trans_mutex);
                        put_transaction(prev_trans);
+               } else {
+                       spin_unlock(&root->fs_info->trans_lock);
                }
+       } else {
+               spin_unlock(&root->fs_info->trans_lock);
        }
 
        if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
@@ -1178,12 +1204,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        do {
                int snap_pending = 0;
+
                joined = cur_trans->num_joined;
                if (!list_empty(&trans->transaction->pending_snapshots))
                        snap_pending = 1;
 
                WARN_ON(cur_trans != trans->transaction);
-               mutex_unlock(&root->fs_info->trans_mutex);
 
                if (flush_on_commit || snap_pending) {
                        btrfs_start_delalloc_inodes(root, 1);
@@ -1206,14 +1232,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                prepare_to_wait(&cur_trans->writer_wait, &wait,
                                TASK_UNINTERRUPTIBLE);
 
-               smp_mb();
                if (atomic_read(&cur_trans->num_writers) > 1)
                        schedule_timeout(MAX_SCHEDULE_TIMEOUT);
                else if (should_grow)
                        schedule_timeout(1);
 
-               mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
+               spin_lock(&root->fs_info->trans_lock);
+               root->fs_info->trans_no_join = 1;
+               spin_unlock(&root->fs_info->trans_lock);
        } while (atomic_read(&cur_trans->num_writers) > 1 ||
                 (should_grow && cur_trans->num_joined != joined));
 
@@ -1258,9 +1285,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        btrfs_prepare_extent_commit(trans, root);
 
        cur_trans = root->fs_info->running_transaction;
-       spin_lock(&root->fs_info->new_trans_lock);
-       root->fs_info->running_transaction = NULL;
-       spin_unlock(&root->fs_info->new_trans_lock);
 
        btrfs_set_root_node(&root->fs_info->tree_root->root_item,
                            root->fs_info->tree_root->node);
@@ -1281,10 +1305,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
               sizeof(root->fs_info->super_copy));
 
        trans->transaction->blocked = 0;
+       spin_lock(&root->fs_info->trans_lock);
+       root->fs_info->running_transaction = NULL;
+       root->fs_info->trans_no_join = 0;
+       spin_unlock(&root->fs_info->trans_lock);
 
        wake_up(&root->fs_info->transaction_wait);
 
-       mutex_unlock(&root->fs_info->trans_mutex);
        ret = btrfs_write_and_wait_transaction(trans, root);
        BUG_ON(ret);
        write_ctree_super(trans, root, 0);
@@ -1297,22 +1324,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        btrfs_finish_extent_commit(trans, root);
 
-       mutex_lock(&root->fs_info->trans_mutex);
-
        cur_trans->commit_done = 1;
 
        root->fs_info->last_trans_committed = cur_trans->transid;
 
        wake_up(&cur_trans->commit_wait);
 
+       spin_lock(&root->fs_info->trans_lock);
        list_del_init(&cur_trans->list);
+       spin_unlock(&root->fs_info->trans_lock);
+
        put_transaction(cur_trans);
        put_transaction(cur_trans);
 
        trace_btrfs_transaction_commit(root);
 
-       mutex_unlock(&root->fs_info->trans_mutex);
-
        btrfs_scrub_continue(root);
 
        if (current->journal_info == trans)
@@ -1334,9 +1360,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
        LIST_HEAD(list);
        struct btrfs_fs_info *fs_info = root->fs_info;
 
-       mutex_lock(&fs_info->trans_mutex);
+       spin_lock(&fs_info->trans_lock);
        list_splice_init(&fs_info->dead_roots, &list);
-       mutex_unlock(&fs_info->trans_mutex);
+       spin_unlock(&fs_info->trans_lock);
 
        while (!list_empty(&list)) {
                root = list_entry(list.next, struct btrfs_root, root_list);
index 804c886..02564e6 100644 (file)
@@ -28,10 +28,12 @@ struct btrfs_transaction {
         * transaction can end
         */
        atomic_t num_writers;
+       atomic_t use_count;
 
        unsigned long num_joined;
+
+       spinlock_t commit_lock;
        int in_commit;
-       atomic_t use_count;
        int commit_done;
        int blocked;
        struct list_head list;
@@ -45,13 +47,14 @@ struct btrfs_transaction {
 
 struct btrfs_trans_handle {
        u64 transid;
-       u64 block_group;
        u64 bytes_reserved;
+       unsigned long use_count;
        unsigned long blocks_reserved;
        unsigned long blocks_used;
        unsigned long delayed_ref_updates;
        struct btrfs_transaction *transaction;
        struct btrfs_block_rsv *block_rsv;
+       struct btrfs_block_rsv *orig_rsv;
 };
 
 struct btrfs_pending_snapshot {
@@ -66,19 +69,6 @@ struct btrfs_pending_snapshot {
        struct list_head list;
 };
 
-static inline void btrfs_set_trans_block_group(struct btrfs_trans_handle *trans,
-                                              struct inode *inode)
-{
-       trans->block_group = BTRFS_I(inode)->block_group;
-}
-
-static inline void btrfs_update_inode_block_group(
-                                         struct btrfs_trans_handle *trans,
-                                         struct inode *inode)
-{
-       BTRFS_I(inode)->block_group = trans->block_group;
-}
-
 static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
                                              struct inode *inode)
 {
@@ -92,12 +82,9 @@ int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
                                                   int num_items);
-struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
-                                                 int num_blocks);
-struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
-                                                         int num_blocks);
-struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
-                                                        int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
+struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root);
index c48214e..da541df 100644 (file)
@@ -504,7 +504,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
                BUG_ON(!new_device);
                memcpy(new_device, device, sizeof(*new_device));
                new_device->name = kstrdup(device->name, GFP_NOFS);
-               BUG_ON(!new_device->name);
+               BUG_ON(device->name && !new_device->name);
                new_device->bdev = NULL;
                new_device->writeable = 0;
                new_device->in_fs_metadata = 0;
index f3107e4..5366fe4 100644 (file)
@@ -158,8 +158,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       btrfs_set_trans_block_group(trans, inode);
-
        ret = do_setxattr(trans, inode, name, value, size, flags);
        if (ret)
                goto out;