Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 14 Dec 2010 19:08:13 +0000 (11:08 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 14 Dec 2010 19:08:13 +0000 (11:08 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 14 Dec 2010 19:08:13 +0000 (11:08 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 14 Dec 2010 19:08:13 +0000 (11:08 -0800)
diff --combined fs/btrfs/disk-io.c

index c547cca,a5d2249..51d2e4d
--- 1/fs/btrfs/disk-io.c
--- 2/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -696,6 -696,7 +696,7 @@@ static int btree_submit_bio_hook(struc
                                    __btree_submit_bio_done);
   }
   
+ #ifdef CONFIG_MIGRATION
   static int btree_migratepage(struct address_space *mapping,
                         struct page *newpage, struct page *page)
   {
@@@ -712,12 -713,9 +713,9 @@@
         if (page_has_private(page) &&
             !try_to_release_page(page, GFP_KERNEL))
                 return -EAGAIN;
- #ifdef CONFIG_MIGRATION
         return migrate_page(mapping, newpage, page);
- #else
-       return -ENOSYS;
- #endif
   }
+ #endif
   
   static int btree_writepage(struct page *page, struct writeback_control *wbc)
   {
@@@ -1009,7 -1007,10 +1007,10 @@@ static int find_and_setup_root(struct b
         blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
         root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
                                      blocksize, generation);
-       BUG_ON(!root->node);
+       if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
+               free_extent_buffer(root->node);
+               return -EIO;
+       }
         root->commit_root = btrfs_root_node(root);
         return 0;
   }
@@@ -2093,7 -2094,7 +2094,7 @@@ static void btrfs_end_buffer_write_sync
         if (uptodate) {
                 set_buffer_uptodate(bh);
         } else {
- -              if (!buffer_eopnotsupp(bh) && printk_ratelimit()) {
+ +              if (printk_ratelimit()) {
                         printk(KERN_WARNING "lost page write due to "
                                         "I/O error on %s\n",
                                        bdevname(bh->b_bdev, b));
@@@ -2230,10 -2231,21 +2231,10 @@@ static int write_dev_supers(struct btrf
                         bh->b_end_io = btrfs_end_buffer_write_sync;
                 }
   
- -              if (i == last_barrier && do_barriers && device->barriers) {
- -                      ret = submit_bh(WRITE_BARRIER, bh);
- -                      if (ret == -EOPNOTSUPP) {
- -                              printk("btrfs: disabling barriers on dev %s\n",
- -                                     device->name);
- -                              set_buffer_uptodate(bh);
- -                              device->barriers = 0;
- -                              /* one reference for submit_bh */
- -                              get_bh(bh);
- -                              lock_buffer(bh);
- -                              ret = submit_bh(WRITE_SYNC, bh);
- -                      }
- -              } else {
+ +              if (i == last_barrier && do_barriers)
+ +                      ret = submit_bh(WRITE_FLUSH_FUA, bh);
+ +              else
                         ret = submit_bh(WRITE_SYNC, bh);
- -              }
   
                 if (ret)
                         errors++;
diff --combined fs/btrfs/extent-tree.c

index bcd59c7,7e5162e..227e581
--- 1/fs/btrfs/extent-tree.c
--- 2/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@@ -429,6 -429,7 +429,7 @@@ err
   
   static int cache_block_group(struct btrfs_block_group_cache *cache,
                              struct btrfs_trans_handle *trans,
+                            struct btrfs_root *root,
                              int load_cache_only)
   {
         struct btrfs_fs_info *fs_info = cache->fs_info;
@@@ -442,9 -443,12 +443,12 @@@
   
         /*
          * We can't do the read from on-disk cache during a commit since we need
-        * to have the normal tree locking.
+        * to have the normal tree locking.  Also if we are currently trying to
+        * allocate blocks for the tree root we can't do the fast caching since
+        * we likely hold important locks.
          */
-       if (!trans->transaction->in_commit) {
+       if (!trans->transaction->in_commit &&
+           (root && root != root->fs_info->tree_root)) {
                 spin_lock(&cache->lock);
                 if (cache->cached != BTRFS_CACHE_NO) {
                         spin_unlock(&cache->lock);
@@@ -1742,7 -1746,8 +1746,7 @@@ static int remove_extent_backref(struc
   static void btrfs_issue_discard(struct block_device *bdev,
                                 u64 start, u64 len)
   {
- -      blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
- -                      BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+ +      blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0);
   }
   
   static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@@@ -2741,6 -2746,7 +2745,7 @@@ static int cache_save_setup(struct btrf
         struct btrfs_root *root = block_group->fs_info->tree_root;
         struct inode *inode = NULL;
         u64 alloc_hint = 0;
+       int dcs = BTRFS_DC_ERROR;
         int num_pages = 0;
         int retries = 0;
         int ret = 0;
@@@ -2795,6 -2801,8 +2800,8 @@@ again
   
         spin_lock(&block_group->lock);
         if (block_group->cached != BTRFS_CACHE_FINISHED) {
+               /* We're not cached, don't bother trying to write stuff out */
+               dcs = BTRFS_DC_WRITTEN;
                 spin_unlock(&block_group->lock);
                 goto out_put;
         }
@@@ -2821,6 -2829,8 +2828,8 @@@
         ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
                                               num_pages, num_pages,
                                               &alloc_hint);
+       if (!ret)
+               dcs = BTRFS_DC_SETUP;
         btrfs_free_reserved_data_space(inode, num_pages);
   out_put:
         iput(inode);
@@@ -2828,10 -2838,7 +2837,7 @@@ out_free
         btrfs_release_path(root, path);
   out:
         spin_lock(&block_group->lock);
-       if (ret)
-               block_group->disk_cache_state = BTRFS_DC_ERROR;
-       else
-               block_group->disk_cache_state = BTRFS_DC_SETUP;
+       block_group->disk_cache_state = dcs;
         spin_unlock(&block_group->lock);
   
         return ret;
@@@ -3037,7 -3044,13 +3043,13 @@@ static void set_avail_alloc_bits(struc
   
   u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
   {
-       u64 num_devices = root->fs_info->fs_devices->rw_devices;
+       /*
+        * we add in the count of missing devices because we want
+        * to make sure that any RAID levels on a degraded FS
+        * continue to be honored.
+        */
+       u64 num_devices = root->fs_info->fs_devices->rw_devices +
+               root->fs_info->fs_devices->missing_devices;
   
         if (num_devices == 1)
                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@@ -4080,7 -4093,7 +4092,7 @@@ static int update_block_group(struct bt
                  * space back to the block group, otherwise we will leak space.
                  */
                 if (!alloc && cache->cached == BTRFS_CACHE_NO)
-                       cache_block_group(cache, trans, 1);
+                       cache_block_group(cache, trans, NULL, 1);
   
                 byte_in_group = bytenr - cache->key.objectid;
                 WARN_ON(byte_in_group > cache->key.offset);
@@@ -4930,11 -4943,31 +4942,31 @@@ search
                 btrfs_get_block_group(block_group);
                 search_start = block_group->key.objectid;
   
+               /*
+                * this can happen if we end up cycling through all the
+                * raid types, but we want to make sure we only allocate
+                * for the proper type.
+                */
+               if (!block_group_bits(block_group, data)) {
+                   u64 extra = BTRFS_BLOCK_GROUP_DUP |
+                               BTRFS_BLOCK_GROUP_RAID1 |
+                               BTRFS_BLOCK_GROUP_RAID10;
+ 
+                       /*
+                        * if they asked for extra copies and this block group
+                        * doesn't provide them, bail.  This does allow us to
+                        * fill raid0 from raid1.
+                        */
+                       if ((data & extra) && !(block_group->flags & extra))
+                               goto loop;
+               }
+ 
   have_block_group:
                 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
                         u64 free_percent;
   
-                       ret = cache_block_group(block_group, trans, 1);
+                       ret = cache_block_group(block_group, trans,
+                                               orig_root, 1);
                         if (block_group->cached == BTRFS_CACHE_FINISHED)
                                 goto have_block_group;
   
@@@ -4958,7 -4991,8 +4990,8 @@@
                         if (loop > LOOP_CACHING_NOWAIT ||
                             (loop > LOOP_FIND_IDEAL &&
                              atomic_read(&space_info->caching_threads) < 2)) {
-                               ret = cache_block_group(block_group, trans, 0);
+                               ret = cache_block_group(block_group, trans,
+                                                       orig_root, 0);
                                 BUG_ON(ret);
                         }
                         found_uncached_bg = true;
@@@ -5515,7 -5549,7 +5548,7 @@@ int btrfs_alloc_logged_file_extent(stru
         u64 num_bytes = ins->offset;
   
         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-       cache_block_group(block_group, trans, 0);
+       cache_block_group(block_group, trans, NULL, 0);
         caching_ctl = get_caching_control(block_group);
   
         if (!caching_ctl) {
@@@ -6300,9 -6334,13 +6333,13 @@@ int btrfs_drop_snapshot(struct btrfs_ro
                                            NULL, NULL);
                 BUG_ON(ret < 0);
                 if (ret > 0) {
-                       ret = btrfs_del_orphan_item(trans, tree_root,
-                                                   root->root_key.objectid);
-                       BUG_ON(ret);
+                       /* if we fail to delete the orphan item this time
+                        * around, it'll get picked up the next time.
+                        *
+                        * The most common failure here is just -ENOENT.
+                        */
+                       btrfs_del_orphan_item(trans, tree_root,
+                                             root->root_key.objectid);
                 }
         }
   
@@@ -7878,7 -7916,14 +7915,14 @@@ static u64 update_block_group_flags(str
         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
   
-       num_devices = root->fs_info->fs_devices->rw_devices;
+       /*
+        * we add in the count of missing devices because we want
+        * to make sure that any RAID levels on a degraded FS
+        * continue to be honored.
+        */
+       num_devices = root->fs_info->fs_devices->rw_devices +
+               root->fs_info->fs_devices->missing_devices;
+ 
         if (num_devices == 1) {
                 stripped |= BTRFS_BLOCK_GROUP_DUP;
                 stripped = flags & ~stripped;
@@@ -8247,7 -8292,6 +8291,6 @@@ int btrfs_read_block_groups(struct btrf
                         break;
                 if (ret != 0)
                         goto error;
- 
                 leaf = path->nodes[0];
                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
diff --combined fs/btrfs/inode.c

index 8039390,5f91944..72f31ec
--- 1/fs/btrfs/inode.c
--- 2/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@@ -495,7 -495,7 +495,7 @@@ again
                 add_async_extent(async_cow, start, num_bytes,
                                  total_compressed, pages, nr_pages_ret);
   
-               if (start + num_bytes < end && start + num_bytes < actual_end) {
+               if (start + num_bytes < end) {
                         start += num_bytes;
                         pages = NULL;
                         cond_resched();
@@@ -3877,7 -3877,7 +3877,7 @@@ again
         p = &root->inode_tree.rb_node;
         parent = NULL;
   
- -      if (hlist_unhashed(&inode->i_hash))
+ +      if (inode_unhashed(inode))
                 return;
   
         spin_lock(&root->inode_lock);
@@@ -4802,7 -4802,7 +4802,7 @@@ static int btrfs_link(struct dentry *ol
         }
   
         btrfs_set_trans_block_group(trans, dir);
- -      atomic_inc(&inode->i_count);
+ +      ihold(inode);
   
         err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
   
@@@ -5712,9 -5712,9 +5712,9 @@@ static void btrfs_end_dio_bio(struct bi
   
         if (err) {
                 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
-                     "disk_bytenr %lu len %u err no %d\n",
-                     dip->inode->i_ino, bio->bi_rw, bio->bi_sector,
-                     bio->bi_size, err);
+                     "sector %#Lx len %u err no %d\n",
+                     dip->inode->i_ino, bio->bi_rw,
+                     (unsigned long long)bio->bi_sector, bio->bi_size, err);
                 dip->errors = 1;
   
                 /*
@@@ -5934,8 -5934,7 +5934,7 @@@ free_ordered
          */
         if (write) {
                 struct btrfs_ordered_extent *ordered;
-               ordered = btrfs_lookup_ordered_extent(inode,
-                                                     dip->logical_offset);
+               ordered = btrfs_lookup_ordered_extent(inode, file_offset);
                 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
                     !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
                         btrfs_free_reserved_extent(root, ordered->start,
diff --combined fs/btrfs/super.c

index dbb51ea,61bd79a..883c6fa
--- 1/fs/btrfs/super.c
--- 2/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@@ -589,8 -589,8 +589,8 @@@ static int btrfs_set_super(struct super
    * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
    *      for multiple device setup.  Make sure to keep it in sync.
    */
- -static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
- -              const char *dev_name, void *data, struct vfsmount *mnt)
+ +static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+ +              const char *dev_name, void *data)
   {
         struct block_device *bdev = NULL;
         struct super_block *s;
@@@ -610,7 -610,7 +610,7 @@@
                                           &subvol_name, &subvol_objectid,
                                           &fs_devices);
         if (error)
- -              return error;
+ +              return ERR_PTR(error);
   
         error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
         if (error)
@@@ -685,9 -685,9 +685,9 @@@
                 mutex_unlock(&root->d_inode->i_mutex);
   
                 if (IS_ERR(new_root)) {
+                       dput(root);
                         deactivate_locked_super(s);
                         error = PTR_ERR(new_root);
-                       dput(root);
                         goto error_free_subvol_name;
                 }
                 if (!new_root->d_inode) {
@@@ -701,8 -701,11 +701,8 @@@
                 root = new_root;
         }
   
- -      mnt->mnt_sb = s;
- -      mnt->mnt_root = root;
- -
         kfree(subvol_name);
- -      return 0;
+ +      return root;
   
   error_s:
         error = PTR_ERR(s);
@@@ -712,7 -715,7 +712,7 @@@ error_close_devices
         kfree(tree_root);
   error_free_subvol_name:
         kfree(subvol_name);
- -      return error;
+ +      return ERR_PTR(error);
   }
   
   static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@@ -796,7 -799,7 +796,7 @@@ static int btrfs_statfs(struct dentry *
   static struct file_system_type btrfs_fs_type = {
         .owner          = THIS_MODULE,
         .name           = "btrfs",
- -      .get_sb         = btrfs_get_sb,
+ +      .mount          = btrfs_mount,
         .kill_sb        = kill_anon_super,
         .fs_flags       = FS_REQUIRES_DEV,
   };
@@@ -865,7 -868,6 +865,7 @@@ static const struct file_operations btr
         .unlocked_ioctl  = btrfs_control_ioctl,
         .compat_ioctl = btrfs_control_ioctl,
         .owner   = THIS_MODULE,
+ +      .llseek = noop_llseek,
   };
   
   static struct miscdevice btrfs_misc = {
diff --combined fs/btrfs/volumes.c

index cc04dc1,177b731..6b98845
--- 1/fs/btrfs/volumes.c
--- 2/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@@ -398,6 -398,7 +398,6 @@@ static noinline int device_list_add(con
                 device->work.func = pending_bios_fn;
                 memcpy(device->uuid, disk_super->dev_item.uuid,
                        BTRFS_UUID_SIZE);
- -              device->barriers = 1;
                 spin_lock_init(&device->io_lock);
                 device->name = kstrdup(path, GFP_NOFS);
                 if (!device->name) {
@@@ -412,12 -413,16 +412,16 @@@
   
                 device->fs_devices = fs_devices;
                 fs_devices->num_devices++;
-       } else if (strcmp(device->name, path)) {
+       } else if (!device->name || strcmp(device->name, path)) {
                 name = kstrdup(path, GFP_NOFS);
                 if (!name)
                         return -ENOMEM;
                 kfree(device->name);
                 device->name = name;
+               if (device->missing) {
+                       fs_devices->missing_devices--;
+                       device->missing = 0;
+               }
         }
   
         if (found_transid > fs_devices->latest_trans) {
@@@ -461,6 -466,7 +465,6 @@@ static struct btrfs_fs_devices *clone_f
                 device->devid = orig_dev->devid;
                 device->work.func = pending_bios_fn;
                 memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
- -              device->barriers = 1;
                 spin_lock_init(&device->io_lock);
                 INIT_LIST_HEAD(&device->dev_list);
                 INIT_LIST_HEAD(&device->dev_alloc_list);
@@@ -1236,6 -1242,9 +1240,9 @@@ int btrfs_rm_device(struct btrfs_root *
   
         device->fs_devices->num_devices--;
   
+       if (device->missing)
+               root->fs_info->fs_devices->missing_devices--;
+ 
         next_device = list_entry(root->fs_info->fs_devices->devices.next,
                                  struct btrfs_device, dev_list);
         if (device->bdev == root->fs_info->sb->s_bdev)
@@@ -1487,6 -1496,7 +1494,6 @@@ int btrfs_init_new_device(struct btrfs_
         trans = btrfs_start_transaction(root, 0);
         lock_chunks(root);
   
- -      device->barriers = 1;
         device->writeable = 1;
         device->work.func = pending_bios_fn;
         generate_random_uuid(device->uuid);
@@@ -3076,11 -3086,14 +3083,13 @@@ static struct btrfs_device *add_missing
                 return NULL;
         list_add(&device->dev_list,
                  &fs_devices->devices);
- -      device->barriers = 1;
         device->dev_root = root->fs_info->dev_root;
         device->devid = devid;
         device->work.func = pending_bios_fn;
         device->fs_devices = fs_devices;
+       device->missing = 1;
         fs_devices->num_devices++;
+       fs_devices->missing_devices++;
         spin_lock_init(&device->io_lock);
         INIT_LIST_HEAD(&device->dev_alloc_list);
         memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@@ -3278,6 -3291,15 +3287,15 @@@ static int read_one_dev(struct btrfs_ro
                         device = add_missing_dev(root, devid, dev_uuid);
                         if (!device)
                                 return -ENOMEM;
+               } else if (!device->missing) {
+                       /*
+                        * this happens when a device that was properly setup
+                        * in the device info lists suddenly goes bad.
+                        * device->bdev is NULL, and so we have to set
+                        * device->missing to one here
+                        */
+                       root->fs_info->fs_devices->missing_devices++;
+                       device->missing = 1;
                 }
         }
   
diff --combined fs/btrfs/volumes.h

index 2b638b6,a668c01..2740db4
--- 1/fs/btrfs/volumes.h
--- 2/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@@ -42,8 -42,10 +42,9 @@@ struct btrfs_device 
         int running_pending;
         u64 generation;
   
- -      int barriers;
         int writeable;
         int in_fs_metadata;
+       int missing;
   
         spinlock_t io_lock;
   
@@@ -93,6 -95,7 +94,7 @@@ struct btrfs_fs_devices 
         u64 num_devices;
         u64 open_devices;
         u64 rw_devices;
+       u64 missing_devices;
         u64 total_rw_bytes;
         struct block_device *latest_bdev;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 14 Dec 2010 19:08:13 +0000 (11:08 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 14 Dec 2010 19:08:13 +0000 (11:08 -0800)
		1	2
fs/btrfs/disk-io.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/extent-tree.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/btrfs/volumes.h	patch \|	diff1 \|	diff2 \|	blob \| history