Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Nov 2011 01:47:06 +0000 (23:47 -0200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 12 Nov 2011 01:47:06 +0000 (23:47 -0200)
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  btrfs: rename the option to nospace_cache
  Btrfs: handle bio_add_page failure gracefully in scrub
  Btrfs: fix deadlock caused by the race between relocation
  Btrfs: only map pages if we know we need them when reading the space cache
  Btrfs: fix orphan backref nodes
  Btrfs: Abstract similar code for btrfs_block_rsv_add{, _noflush}
  Btrfs: fix unreleased path in btrfs_orphan_cleanup()
  Btrfs: fix no reserved space for writing out inode cache
  Btrfs: fix nocow when deleting the item
  Btrfs: tweak the delayed inode reservations again
  Btrfs: rework error handling in btrfs_mount()
  Btrfs: close devices on all error paths in open_ctree()
  Btrfs: avoid null dereference and leaks when bailing from open_ctree()
  Btrfs: fix subvol_name leak on error in btrfs_mount()
  Btrfs: fix memory leak in btrfs_parse_early_options()
  Btrfs: fix our reservations for updating an inode when completing io
  Btrfs: fix oops on NULL trans handle in btrfs_truncate
  btrfs: fix double-free 'tree_root' in 'btrfs_mount()'

1  2 
fs/btrfs/delayed-inode.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c

diff --combined fs/btrfs/delayed-inode.c
@@@ -617,12 -617,14 +617,14 @@@ static void btrfs_delayed_item_release_
  static int btrfs_delayed_inode_reserve_metadata(
                                        struct btrfs_trans_handle *trans,
                                        struct btrfs_root *root,
+                                       struct inode *inode,
                                        struct btrfs_delayed_node *node)
  {
        struct btrfs_block_rsv *src_rsv;
        struct btrfs_block_rsv *dst_rsv;
        u64 num_bytes;
        int ret;
+       int release = false;
  
        src_rsv = trans->block_rsv;
        dst_rsv = &root->fs_info->delayed_block_rsv;
                if (!ret)
                        node->bytes_reserved = num_bytes;
                return ret;
+       } else if (src_rsv == &root->fs_info->delalloc_block_rsv) {
+               spin_lock(&BTRFS_I(inode)->lock);
+               if (BTRFS_I(inode)->delalloc_meta_reserved) {
+                       BTRFS_I(inode)->delalloc_meta_reserved = 0;
+                       spin_unlock(&BTRFS_I(inode)->lock);
+                       release = true;
+                       goto migrate;
+               }
+               spin_unlock(&BTRFS_I(inode)->lock);
+               /* Ok we didn't have space pre-reserved.  This shouldn't happen
+                * too often but it can happen if we do delalloc to an existing
+                * inode which gets dirtied because of the time update, and then
+                * isn't touched again until after the transaction commits and
+                * then we try to write out the data.  First try to be nice and
+                * reserve something strictly for us.  If not be a pain and try
+                * to steal from the delalloc block rsv.
+                */
+               ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes);
+               if (!ret)
+                       goto out;
+               ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+               if (!ret)
+                       goto out;
+               /*
+                * Ok this is a problem, let's just steal from the global rsv
+                * since this really shouldn't happen that often.
+                */
+               WARN_ON(1);
+               ret = btrfs_block_rsv_migrate(&root->fs_info->global_block_rsv,
+                                             dst_rsv, num_bytes);
+               goto out;
        }
  
+ migrate:
        ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
+ out:
+       /*
+        * Migrate only takes a reservation, it doesn't touch the size of the
+        * block_rsv.  This is to simplify people who don't normally have things
+        * migrated from their block rsv.  If they go to release their
+        * reservation, that will decrease the size as well, so if migrate
+        * reduced size we'd end up with a negative size.  But for the
+        * delalloc_meta_reserved stuff we will only know to drop 1 reservation,
+        * but we could in fact do this reserve/migrate dance several times
+        * between the time we did the original reservation and we'd clean it
+        * up.  So to take care of this, release the space for the meta
+        * reservation here.  I think it may be time for a documentation page on
+        * how block rsvs. work.
+        */
        if (!ret)
                node->bytes_reserved = num_bytes;
  
+       if (release)
+               btrfs_block_rsv_release(root, src_rsv, num_bytes);
        return ret;
  }
  
@@@ -1664,7 -1719,7 +1719,7 @@@ int btrfs_fill_inode(struct inode *inod
        inode->i_gid = btrfs_stack_inode_gid(inode_item);
        btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
        inode->i_mode = btrfs_stack_inode_mode(inode_item);
 -      inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
 +      set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
        inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
        BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
        BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
@@@ -1708,7 -1763,8 +1763,8 @@@ int btrfs_delayed_update_inode(struct b
                goto release_node;
        }
  
-       ret = btrfs_delayed_inode_reserve_metadata(trans, root, delayed_node);
+       ret = btrfs_delayed_inode_reserve_metadata(trans, root, inode,
+                                                  delayed_node);
        if (ret)
                goto release_node;
  
diff --combined fs/btrfs/disk-io.c
@@@ -1890,31 -1890,32 +1890,32 @@@ struct btrfs_root *open_ctree(struct su
        u64 features;
        struct btrfs_key location;
        struct buffer_head *bh;
-       struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
-                                                GFP_NOFS);
-       struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
-                                                GFP_NOFS);
+       struct btrfs_super_block *disk_super;
        struct btrfs_root *tree_root = btrfs_sb(sb);
-       struct btrfs_fs_info *fs_info = NULL;
-       struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
-                                               GFP_NOFS);
-       struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
-                                             GFP_NOFS);
+       struct btrfs_fs_info *fs_info = tree_root->fs_info;
+       struct btrfs_root *extent_root;
+       struct btrfs_root *csum_root;
+       struct btrfs_root *chunk_root;
+       struct btrfs_root *dev_root;
        struct btrfs_root *log_tree_root;
        int ret;
        int err = -EINVAL;
        int num_backups_tried = 0;
        int backup_index = 0;
  
-       struct btrfs_super_block *disk_super;
+       extent_root = fs_info->extent_root =
+               kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+       csum_root = fs_info->csum_root =
+               kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+       chunk_root = fs_info->chunk_root =
+               kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+       dev_root = fs_info->dev_root =
+               kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
  
-       if (!extent_root || !tree_root || !tree_root->fs_info ||
-           !chunk_root || !dev_root || !csum_root) {
+       if (!extent_root || !csum_root || !chunk_root || !dev_root) {
                err = -ENOMEM;
                goto fail;
        }
-       fs_info = tree_root->fs_info;
  
        ret = init_srcu_struct(&fs_info->subvol_srcu);
        if (ret) {
        mutex_init(&fs_info->reloc_mutex);
  
        init_completion(&fs_info->kobj_unregister);
-       fs_info->tree_root = tree_root;
-       fs_info->extent_root = extent_root;
-       fs_info->csum_root = csum_root;
-       fs_info->chunk_root = chunk_root;
-       fs_info->dev_root = dev_root;
-       fs_info->fs_devices = fs_devices;
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
        btrfs_mapping_init(&fs_info->mapping_tree);
        sb->s_bdi = &fs_info->bdi;
  
        fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
 -      fs_info->btree_inode->i_nlink = 1;
 +      set_nlink(fs_info->btree_inode, 1);
        /*
         * we set the i_size on the btree inode to the max possible int.
         * the real end of the address space is determined by all of
@@@ -2465,21 -2460,20 +2460,20 @@@ fail_sb_buffer
        btrfs_stop_workers(&fs_info->caching_workers);
  fail_alloc:
  fail_iput:
+       btrfs_mapping_tree_free(&fs_info->mapping_tree);
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        iput(fs_info->btree_inode);
-       btrfs_close_devices(fs_info->fs_devices);
-       btrfs_mapping_tree_free(&fs_info->mapping_tree);
  fail_bdi:
        bdi_destroy(&fs_info->bdi);
  fail_srcu:
        cleanup_srcu_struct(&fs_info->subvol_srcu);
  fail:
+       btrfs_close_devices(fs_info->fs_devices);
        free_fs_info(fs_info);
        return ERR_PTR(err);
  
  recovery_tree_root:
        if (!btrfs_test_opt(tree_root, RECOVERY))
                goto fail_tree_roots;
  
diff --combined fs/btrfs/extent-tree.c
@@@ -3375,8 -3375,7 +3375,8 @@@ static int shrink_delalloc(struct btrfs
                smp_mb();
                nr_pages = min_t(unsigned long, nr_pages,
                       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
 -              writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
 +              writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
 +                                              WB_REASON_FS_FREE_SPACE);
  
                spin_lock(&space_info->lock);
                if (reserved > space_info->bytes_may_use)
@@@ -3797,16 -3796,16 +3797,16 @@@ void btrfs_free_block_rsv(struct btrfs_
        kfree(rsv);
  }
  
int btrfs_block_rsv_add(struct btrfs_root *root,
-                       struct btrfs_block_rsv *block_rsv,
-                       u64 num_bytes)
static inline int __block_rsv_add(struct btrfs_root *root,
+                                 struct btrfs_block_rsv *block_rsv,
+                                 u64 num_bytes, int flush)
  {
        int ret;
  
        if (num_bytes == 0)
                return 0;
  
-       ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 1);
+       ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush);
        if (!ret) {
                block_rsv_add_bytes(block_rsv, num_bytes, 1);
                return 0;
        return ret;
  }
  
+ int btrfs_block_rsv_add(struct btrfs_root *root,
+                       struct btrfs_block_rsv *block_rsv,
+                       u64 num_bytes)
+ {
+       return __block_rsv_add(root, block_rsv, num_bytes, 1);
+ }
  int btrfs_block_rsv_add_noflush(struct btrfs_root *root,
                                struct btrfs_block_rsv *block_rsv,
                                u64 num_bytes)
  {
-       int ret;
-       if (num_bytes == 0)
-               return 0;
-       ret = reserve_metadata_bytes(root, block_rsv, num_bytes, 0);
-       if (!ret) {
-               block_rsv_add_bytes(block_rsv, num_bytes, 1);
-               return 0;
-       }
-       return ret;
+       return __block_rsv_add(root, block_rsv, num_bytes, 0);
  }
  
  int btrfs_block_rsv_check(struct btrfs_root *root,
@@@ -4064,23 -4059,30 +4060,30 @@@ int btrfs_snap_reserve_metadata(struct 
   */
  static unsigned drop_outstanding_extent(struct inode *inode)
  {
+       unsigned drop_inode_space = 0;
        unsigned dropped_extents = 0;
  
        BUG_ON(!BTRFS_I(inode)->outstanding_extents);
        BTRFS_I(inode)->outstanding_extents--;
  
+       if (BTRFS_I(inode)->outstanding_extents == 0 &&
+           BTRFS_I(inode)->delalloc_meta_reserved) {
+               drop_inode_space = 1;
+               BTRFS_I(inode)->delalloc_meta_reserved = 0;
+       }
        /*
         * If we have more or the same amount of outsanding extents than we have
         * reserved then we need to leave the reserved extents count alone.
         */
        if (BTRFS_I(inode)->outstanding_extents >=
            BTRFS_I(inode)->reserved_extents)
-               return 0;
+               return drop_inode_space;
  
        dropped_extents = BTRFS_I(inode)->reserved_extents -
                BTRFS_I(inode)->outstanding_extents;
        BTRFS_I(inode)->reserved_extents -= dropped_extents;
-       return dropped_extents;
+       return dropped_extents + drop_inode_space;
  }
  
  /**
@@@ -4166,9 -4168,18 +4169,18 @@@ int btrfs_delalloc_reserve_metadata(str
                nr_extents = BTRFS_I(inode)->outstanding_extents -
                        BTRFS_I(inode)->reserved_extents;
                BTRFS_I(inode)->reserved_extents += nr_extents;
+       }
  
-               to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
+       /*
+        * Add an item to reserve for updating the inode when we complete the
+        * delalloc io.
+        */
+       if (!BTRFS_I(inode)->delalloc_meta_reserved) {
+               nr_extents++;
+               BTRFS_I(inode)->delalloc_meta_reserved = 1;
        }
+       to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
        to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
        spin_unlock(&BTRFS_I(inode)->lock);
  
diff --combined fs/btrfs/inode.c
@@@ -93,6 -93,8 +93,8 @@@ static noinline int cow_file_range(stru
                                   struct page *locked_page,
                                   u64 start, u64 end, int *page_started,
                                   unsigned long *nr_written, int unlock);
+ static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, struct inode *inode);
  
  static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
@@@ -1741,7 -1743,7 +1743,7 @@@ static int btrfs_finish_ordered_io(stru
                                trans = btrfs_join_transaction(root);
                        BUG_ON(IS_ERR(trans));
                        trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_update_inode(trans, root, inode);
+                       ret = btrfs_update_inode_fallback(trans, root, inode);
                        BUG_ON(ret);
                }
                goto out;
  
        ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
        if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
-               ret = btrfs_update_inode(trans, root, inode);
+               ret = btrfs_update_inode_fallback(trans, root, inode);
                BUG_ON(ret);
        }
        ret = 0;
@@@ -2199,6 -2201,9 +2201,9 @@@ int btrfs_orphan_cleanup(struct btrfs_r
                if (ret)
                        goto out;
        }
+       /* release the path since we're done with it */
+       btrfs_release_path(path);
        root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
  
        if (root->orphan_block_rsv)
@@@ -2313,7 -2318,7 +2318,7 @@@ static void btrfs_read_locked_inode(str
        inode_item = btrfs_item_ptr(leaf, path->slots[0],
                                    struct btrfs_inode_item);
        inode->i_mode = btrfs_inode_mode(leaf, inode_item);
 -      inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
 +      set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
        inode->i_uid = btrfs_inode_uid(leaf, inode_item);
        inode->i_gid = btrfs_inode_gid(leaf, inode_item);
        btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
@@@ -2426,7 -2431,7 +2431,7 @@@ static void fill_inode_item(struct btrf
  /*
   * copy everything in the in-memory inode into the btree.
   */
noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root, struct inode *inode)
  {
        struct btrfs_inode_item *inode_item;
        struct extent_buffer *leaf;
        int ret;
  
-       /*
-        * If the inode is a free space inode, we can deadlock during commit
-        * if we put it into the delayed code.
-        *
-        * The data relocation inode should also be directly updated
-        * without delay
-        */
-       if (!btrfs_is_free_space_inode(root, inode)
-           && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
-               ret = btrfs_delayed_update_inode(trans, root, inode);
-               if (!ret)
-                       btrfs_set_inode_last_trans(trans, inode);
-               return ret;
-       }
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@@ -2476,6 -2466,43 +2466,43 @@@ failed
        return ret;
  }
  
+ /*
+  * copy everything in the in-memory inode into the btree.
+  */
+ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, struct inode *inode)
+ {
+       int ret;
+       /*
+        * If the inode is a free space inode, we can deadlock during commit
+        * if we put it into the delayed code.
+        *
+        * The data relocation inode should also be directly updated
+        * without delay
+        */
+       if (!btrfs_is_free_space_inode(root, inode)
+           && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+               ret = btrfs_delayed_update_inode(trans, root, inode);
+               if (!ret)
+                       btrfs_set_inode_last_trans(trans, inode);
+               return ret;
+       }
+       return btrfs_update_inode_item(trans, root, inode);
+ }
+ static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root, struct inode *inode)
+ {
+       int ret;
+       ret = btrfs_update_inode(trans, root, inode);
+       if (ret == -ENOSPC)
+               return btrfs_update_inode_item(trans, root, inode);
+       return ret;
+ }
  /*
   * unlink helper that gets used here in inode.c and in the tree logging
   * recovery code.  It remove a link in a directory with a given name, and
@@@ -5632,7 -5659,7 +5659,7 @@@ again
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
                ret = btrfs_ordered_update_i_size(inode, 0, ordered);
                if (!ret)
-                       err = btrfs_update_inode(trans, root, inode);
+                       err = btrfs_update_inode_fallback(trans, root, inode);
                goto out;
        }
  
        add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
        ret = btrfs_ordered_update_i_size(inode, 0, ordered);
        if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
-               btrfs_update_inode(trans, root, inode);
+               btrfs_update_inode_fallback(trans, root, inode);
        ret = 0;
  out_unlock:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
@@@ -6529,14 -6556,16 +6556,16 @@@ end_trans
                ret = btrfs_orphan_del(NULL, inode);
        }
  
-       trans->block_rsv = &root->fs_info->trans_block_rsv;
-       ret = btrfs_update_inode(trans, root, inode);
-       if (ret && !err)
-               err = ret;
+       if (trans) {
+               trans->block_rsv = &root->fs_info->trans_block_rsv;
+               ret = btrfs_update_inode(trans, root, inode);
+               if (ret && !err)
+                       err = ret;
  
-       nr = trans->blocks_used;
-       ret = btrfs_end_transaction_throttle(trans, root);
-       btrfs_btree_balance_dirty(root, nr);
+               nr = trans->blocks_used;
+               ret = btrfs_end_transaction_throttle(trans, root);
+               btrfs_btree_balance_dirty(root, nr);
+       }
  
  out:
        btrfs_free_block_rsv(root, rsv);
@@@ -6564,7 -6593,7 +6593,7 @@@ int btrfs_create_subvol_root(struct btr
        inode->i_op = &btrfs_dir_inode_operations;
        inode->i_fop = &btrfs_dir_file_operations;
  
 -      inode->i_nlink = 1;
 +      set_nlink(inode, 1);
        btrfs_i_size_write(inode, 0);
  
        err = btrfs_update_inode(trans, new_root, inode);
@@@ -6605,6 -6634,7 +6634,7 @@@ struct inode *btrfs_alloc_inode(struct 
        ei->orphan_meta_reserved = 0;
        ei->dummy_inode = 0;
        ei->in_defrag = 0;
+       ei->delalloc_meta_reserved = 0;
        ei->force_compress = BTRFS_COMPRESS_NONE;
  
        ei->delayed_node = NULL;