Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tyhicks...
[pandora-kernel.git] / fs / btrfs / disk-io.c
index 102c176..632f8f3 100644 (file)
@@ -620,7 +620,7 @@ out:
 
 static int btree_io_failed_hook(struct bio *failed_bio,
                         struct page *page, u64 start, u64 end,
-                        u64 mirror_num, struct extent_state *state)
+                        int mirror_num, struct extent_state *state)
 {
        struct extent_io_tree *tree;
        unsigned long len;
@@ -1890,31 +1890,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        u64 features;
        struct btrfs_key location;
        struct buffer_head *bh;
-       struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
-                                                GFP_NOFS);
-       struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
-                                                GFP_NOFS);
+       struct btrfs_super_block *disk_super;
        struct btrfs_root *tree_root = btrfs_sb(sb);
-       struct btrfs_fs_info *fs_info = NULL;
-       struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
-                                               GFP_NOFS);
-       struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
-                                             GFP_NOFS);
+       struct btrfs_fs_info *fs_info = tree_root->fs_info;
+       struct btrfs_root *extent_root;
+       struct btrfs_root *csum_root;
+       struct btrfs_root *chunk_root;
+       struct btrfs_root *dev_root;
        struct btrfs_root *log_tree_root;
-
        int ret;
        int err = -EINVAL;
        int num_backups_tried = 0;
        int backup_index = 0;
 
-       struct btrfs_super_block *disk_super;
+       extent_root = fs_info->extent_root =
+               kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+       csum_root = fs_info->csum_root =
+               kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+       chunk_root = fs_info->chunk_root =
+               kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+       dev_root = fs_info->dev_root =
+               kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
 
-       if (!extent_root || !tree_root || !tree_root->fs_info ||
-           !chunk_root || !dev_root || !csum_root) {
+       if (!extent_root || !csum_root || !chunk_root || !dev_root) {
                err = -ENOMEM;
                goto fail;
        }
-       fs_info = tree_root->fs_info;
 
        ret = init_srcu_struct(&fs_info->subvol_srcu);
        if (ret) {
@@ -1954,12 +1955,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
        mutex_init(&fs_info->reloc_mutex);
 
        init_completion(&fs_info->kobj_unregister);
-       fs_info->tree_root = tree_root;
-       fs_info->extent_root = extent_root;
-       fs_info->csum_root = csum_root;
-       fs_info->chunk_root = chunk_root;
-       fs_info->dev_root = dev_root;
-       fs_info->fs_devices = fs_devices;
        INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
        INIT_LIST_HEAD(&fs_info->space_info);
        btrfs_mapping_init(&fs_info->mapping_tree);
@@ -2465,21 +2460,20 @@ fail_sb_buffer:
        btrfs_stop_workers(&fs_info->caching_workers);
 fail_alloc:
 fail_iput:
+       btrfs_mapping_tree_free(&fs_info->mapping_tree);
+
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        iput(fs_info->btree_inode);
-
-       btrfs_close_devices(fs_info->fs_devices);
-       btrfs_mapping_tree_free(&fs_info->mapping_tree);
 fail_bdi:
        bdi_destroy(&fs_info->bdi);
 fail_srcu:
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 fail:
+       btrfs_close_devices(fs_info->fs_devices);
        free_fs_info(fs_info);
        return ERR_PTR(err);
 
 recovery_tree_root:
-
        if (!btrfs_test_opt(tree_root, RECOVERY))
                goto fail_tree_roots;
 
@@ -2579,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device,
        int errors = 0;
        u32 crc;
        u64 bytenr;
-       int last_barrier = 0;
 
        if (max_mirrors == 0)
                max_mirrors = BTRFS_SUPER_MIRROR_MAX;
 
-       /* make sure only the last submit_bh does a barrier */
-       if (do_barriers) {
-               for (i = 0; i < max_mirrors; i++) {
-                       bytenr = btrfs_sb_offset(i);
-                       if (bytenr + BTRFS_SUPER_INFO_SIZE >=
-                           device->total_bytes)
-                               break;
-                       last_barrier = i;
-               }
-       }
-
        for (i = 0; i < max_mirrors; i++) {
                bytenr = btrfs_sb_offset(i);
                if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
@@ -2640,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device,
                        bh->b_end_io = btrfs_end_buffer_write_sync;
                }
 
-               if (i == last_barrier && do_barriers)
-                       ret = submit_bh(WRITE_FLUSH_FUA, bh);
-               else
-                       ret = submit_bh(WRITE_SYNC, bh);
-
+               /*
+                * we fua the first super.  The others we allow
+                * to go down lazy.
+                */
+               ret = submit_bh(WRITE_FUA, bh);
                if (ret)
                        errors++;
        }
        return errors < i ? 0 : -1;
 }
 
+/*
+ * endio for the write_dev_flush, this will wake anyone waiting
+ * for the barrier when it is done
+ */
+static void btrfs_end_empty_barrier(struct bio *bio, int err)
+{
+       if (err) {
+               if (err == -EOPNOTSUPP)
+                       set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+               clear_bit(BIO_UPTODATE, &bio->bi_flags);
+       }
+       if (bio->bi_private)
+               complete(bio->bi_private);
+       bio_put(bio);
+}
+
+/*
+ * trigger flushes for one the devices.  If you pass wait == 0, the flushes are
+ * sent down.  With wait == 1, it waits for the previous flush.
+ *
+ * any device where the flush fails with eopnotsupp are flagged as not-barrier
+ * capable
+ */
+static int write_dev_flush(struct btrfs_device *device, int wait)
+{
+       struct bio *bio;
+       int ret = 0;
+
+       if (device->nobarriers)
+               return 0;
+
+       if (wait) {
+               bio = device->flush_bio;
+               if (!bio)
+                       return 0;
+
+               wait_for_completion(&device->flush_wait);
+
+               if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
+                       printk("btrfs: disabling barriers on dev %s\n",
+                              device->name);
+                       device->nobarriers = 1;
+               }
+               if (!bio_flagged(bio, BIO_UPTODATE)) {
+                       ret = -EIO;
+               }
+
+               /* drop the reference from the wait == 0 run */
+               bio_put(bio);
+               device->flush_bio = NULL;
+
+               return ret;
+       }
+
+       /*
+        * one reference for us, and we leave it for the
+        * caller
+        */
+       device->flush_bio = NULL;;
+       bio = bio_alloc(GFP_NOFS, 0);
+       if (!bio)
+               return -ENOMEM;
+
+       bio->bi_end_io = btrfs_end_empty_barrier;
+       bio->bi_bdev = device->bdev;
+       init_completion(&device->flush_wait);
+       bio->bi_private = &device->flush_wait;
+       device->flush_bio = bio;
+
+       bio_get(bio);
+       submit_bio(WRITE_FLUSH, bio);
+
+       return 0;
+}
+
+/*
+ * send an empty flush down to each device in parallel,
+ * then wait for them
+ */
+static int barrier_all_devices(struct btrfs_fs_info *info)
+{
+       struct list_head *head;
+       struct btrfs_device *dev;
+       int errors = 0;
+       int ret;
+
+       /* send down all the barriers */
+       head = &info->fs_devices->devices;
+       list_for_each_entry_rcu(dev, head, dev_list) {
+               if (!dev->bdev) {
+                       errors++;
+                       continue;
+               }
+               if (!dev->in_fs_metadata || !dev->writeable)
+                       continue;
+
+               ret = write_dev_flush(dev, 0);
+               if (ret)
+                       errors++;
+       }
+
+       /* wait for all the barriers */
+       list_for_each_entry_rcu(dev, head, dev_list) {
+               if (!dev->bdev) {
+                       errors++;
+                       continue;
+               }
+               if (!dev->in_fs_metadata || !dev->writeable)
+                       continue;
+
+               ret = write_dev_flush(dev, 1);
+               if (ret)
+                       errors++;
+       }
+       if (errors)
+               return -EIO;
+       return 0;
+}
+
 int write_all_supers(struct btrfs_root *root, int max_mirrors)
 {
        struct list_head *head;
@@ -2672,6 +2773,10 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
 
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        head = &root->fs_info->fs_devices->devices;
+
+       if (do_barriers)
+               barrier_all_devices(root->fs_info);
+
        list_for_each_entry_rcu(dev, head, dev_list) {
                if (!dev->bdev) {
                        total_errors++;