Btrfs: make btrfs_map_block() return entire free extent for each device of RAID0...
[pandora-kernel.git] / fs / btrfs / volumes.c
index d158530..c440c89 100644 (file)
 #include "volumes.h"
 #include "async-thread.h"
 
-struct map_lookup {
-       u64 type;
-       int io_align;
-       int io_width;
-       int stripe_len;
-       int sector_size;
-       int num_stripes;
-       int sub_stripes;
-       struct btrfs_bio_stripe stripes[];
-};
-
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                struct btrfs_device *device);
@@ -1213,6 +1202,10 @@ static int btrfs_rm_dev_item(struct btrfs_root *root,
                return -ENOMEM;
 
        trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               btrfs_free_path(path);
+               return PTR_ERR(trans);
+       }
        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
        key.type = BTRFS_DEV_ITEM_KEY;
        key.offset = device->devid;
@@ -1334,11 +1327,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
        ret = btrfs_shrink_device(device, 0);
        if (ret)
-               goto error_brelse;
+               goto error_undo;
 
        ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
        if (ret)
-               goto error_brelse;
+               goto error_undo;
 
        device->in_fs_metadata = 0;
 
@@ -1412,6 +1405,13 @@ out:
        mutex_unlock(&root->fs_info->volume_mutex);
        mutex_unlock(&uuid_mutex);
        return ret;
+error_undo:
+       if (device->writeable) {
+               list_add(&device->dev_alloc_list,
+                        &root->fs_info->fs_devices->alloc_list);
+               root->fs_info->fs_devices->rw_devices++;
+       }
+       goto error_brelse;
 }
 
 /*
@@ -1601,11 +1601,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 
        ret = find_next_devid(root, &device->devid);
        if (ret) {
+               kfree(device->name);
                kfree(device);
                goto error;
        }
 
        trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               kfree(device->name);
+               kfree(device);
+               ret = PTR_ERR(trans);
+               goto error;
+       }
+
        lock_chunks(root);
 
        device->writeable = 1;
@@ -1621,7 +1629,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        device->dev_root = root->fs_info->dev_root;
        device->bdev = bdev;
        device->in_fs_metadata = 1;
-       device->mode = 0;
+       device->mode = FMODE_EXCL;
        set_blocksize(device->bdev, 4096);
 
        if (seeding_dev) {
@@ -1873,7 +1881,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
                return ret;
 
        trans = btrfs_start_transaction(root, 0);
-       BUG_ON(!trans);
+       BUG_ON(IS_ERR(trans));
 
        lock_chunks(root);
 
@@ -1904,6 +1912,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
 
        BUG_ON(ret);
 
+       trace_btrfs_chunk_free(root, map, chunk_offset, em->len);
+
        if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
                ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
                BUG_ON(ret);
@@ -2047,7 +2057,7 @@ int btrfs_balance(struct btrfs_root *dev_root)
                BUG_ON(ret);
 
                trans = btrfs_start_transaction(dev_root, 0);
-               BUG_ON(!trans);
+               BUG_ON(IS_ERR(trans));
 
                ret = btrfs_grow_device(trans, device, old_size);
                BUG_ON(ret);
@@ -2213,6 +2223,11 @@ again:
 
        /* Shrinking succeeded, else we would be at "done". */
        trans = btrfs_start_transaction(root, 0);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
+               goto done;
+       }
+
        lock_chunks(root);
 
        device->disk_total_bytes = new_size;
@@ -2626,6 +2641,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        *num_bytes = chunk_bytes_by_type(type, calc_size,
                                         map->num_stripes, sub_stripes);
 
+       trace_btrfs_chunk_alloc(info->chunk_root, map, start, *num_bytes);
+
        em = alloc_extent_map(GFP_NOFS);
        if (!em) {
                ret = -ENOMEM;
@@ -2734,6 +2751,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
                                             item_size);
                BUG_ON(ret);
        }
+
        kfree(chunk);
        return 0;
 }
@@ -2938,7 +2956,10 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        struct extent_map_tree *em_tree = &map_tree->map_tree;
        u64 offset;
        u64 stripe_offset;
+       u64 stripe_end_offset;
        u64 stripe_nr;
+       u64 stripe_nr_orig;
+       u64 stripe_nr_end;
        int stripes_allocated = 8;
        int stripes_required = 1;
        int stripe_index;
@@ -2947,7 +2968,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        int max_errors = 0;
        struct btrfs_multi_bio *multi = NULL;
 
-       if (multi_ret && !(rw & REQ_WRITE))
+       if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
                stripes_allocated = 1;
 again:
        if (multi_ret) {
@@ -2993,7 +3014,15 @@ again:
                        max_errors = 1;
                }
        }
-       if (multi_ret && (rw & REQ_WRITE) &&
+       if (rw & REQ_DISCARD) {
+               if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                                BTRFS_BLOCK_GROUP_RAID1 |
+                                BTRFS_BLOCK_GROUP_DUP |
+                                BTRFS_BLOCK_GROUP_RAID10)) {
+                       stripes_required = map->num_stripes;
+               }
+       }
+       if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
            stripes_allocated < stripes_required) {
                stripes_allocated = map->num_stripes;
                free_extent_map(em);
@@ -3013,12 +3042,15 @@ again:
        /* stripe_offset is the offset of this block in its stripe*/
        stripe_offset = offset - stripe_offset;
 
-       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
-                        BTRFS_BLOCK_GROUP_RAID10 |
-                        BTRFS_BLOCK_GROUP_DUP)) {
+       if (rw & REQ_DISCARD)
+               *length = min_t(u64, em->len - offset, *length);
+       else if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                             BTRFS_BLOCK_GROUP_RAID1 |
+                             BTRFS_BLOCK_GROUP_RAID10 |
+                             BTRFS_BLOCK_GROUP_DUP)) {
                /* we limit the length of each bio to what fits in a stripe */
                *length = min_t(u64, em->len - offset,
-                             map->stripe_len - stripe_offset);
+                               map->stripe_len - stripe_offset);
        } else {
                *length = em->len - offset;
        }
@@ -3028,8 +3060,19 @@ again:
 
        num_stripes = 1;
        stripe_index = 0;
-       if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-               if (unplug_page || (rw & REQ_WRITE))
+       stripe_nr_orig = stripe_nr;
+       stripe_nr_end = (offset + *length + map->stripe_len - 1) &
+                       (~(map->stripe_len - 1));
+       do_div(stripe_nr_end, map->stripe_len);
+       stripe_end_offset = stripe_nr_end * map->stripe_len -
+                           (offset + *length);
+       if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+               if (rw & REQ_DISCARD)
+                       num_stripes = min_t(u64, map->num_stripes,
+                                           stripe_nr_end - stripe_nr_orig);
+               stripe_index = do_div(stripe_nr, map->num_stripes);
+       } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
+               if (unplug_page || (rw & (REQ_WRITE | REQ_DISCARD)))
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -3040,7 +3083,7 @@ again:
                }
 
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-               if (rw & REQ_WRITE)
+               if (rw & (REQ_WRITE | REQ_DISCARD))
                        num_stripes = map->num_stripes;
                else if (mirror_num)
                        stripe_index = mirror_num - 1;
@@ -3053,6 +3096,10 @@ again:
 
                if (unplug_page || (rw & REQ_WRITE))
                        num_stripes = map->sub_stripes;
+               else if (rw & REQ_DISCARD)
+                       num_stripes = min_t(u64, map->sub_stripes *
+                                           (stripe_nr_end - stripe_nr_orig),
+                                           map->num_stripes);
                else if (mirror_num)
                        stripe_index += mirror_num - 1;
                else {
@@ -3070,24 +3117,101 @@ again:
        }
        BUG_ON(stripe_index >= map->num_stripes);
 
-       for (i = 0; i < num_stripes; i++) {
-               if (unplug_page) {
-                       struct btrfs_device *device;
-                       struct backing_dev_info *bdi;
-
-                       device = map->stripes[stripe_index].dev;
-                       if (device->bdev) {
-                               bdi = blk_get_backing_dev_info(device->bdev);
-                               if (bdi->unplug_io_fn)
-                                       bdi->unplug_io_fn(bdi, unplug_page);
-                       }
-               } else {
+       if (rw & REQ_DISCARD) {
+               for (i = 0; i < num_stripes; i++) {
                        multi->stripes[i].physical =
                                map->stripes[stripe_index].physical +
                                stripe_offset + stripe_nr * map->stripe_len;
                        multi->stripes[i].dev = map->stripes[stripe_index].dev;
+
+                       if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
+                               u64 stripes;
+                               int last_stripe = (stripe_nr_end - 1) %
+                                       map->num_stripes;
+                               int j;
+
+                               for (j = 0; j < map->num_stripes; j++) {
+                                       if ((stripe_nr_end - 1 - j) %
+                                             map->num_stripes == stripe_index)
+                                               break;
+                               }
+                               stripes = stripe_nr_end - 1 - j;
+                               do_div(stripes, map->num_stripes);
+                               multi->stripes[i].length = map->stripe_len *
+                                       (stripes - stripe_nr + 1);
+
+                               if (i == 0) {
+                                       multi->stripes[i].length -=
+                                               stripe_offset;
+                                       stripe_offset = 0;
+                               }
+                               if (stripe_index == last_stripe)
+                                       multi->stripes[i].length -=
+                                               stripe_end_offset;
+                       } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+                               u64 stripes;
+                               int j;
+                               int factor = map->num_stripes /
+                                            map->sub_stripes;
+                               int last_stripe = (stripe_nr_end - 1) % factor;
+                               last_stripe *= map->sub_stripes;
+
+                               for (j = 0; j < factor; j++) {
+                                       if ((stripe_nr_end - 1 - j) % factor ==
+                                           stripe_index / map->sub_stripes)
+                                               break;
+                               }
+                               stripes = stripe_nr_end - 1 - j;
+                               do_div(stripes, factor);
+                               multi->stripes[i].length = map->stripe_len *
+                                       (stripes - stripe_nr + 1);
+
+                               if (i < map->sub_stripes) {
+                                       multi->stripes[i].length -=
+                                               stripe_offset;
+                                       if (i == map->sub_stripes - 1)
+                                               stripe_offset = 0;
+                               }
+                               if (stripe_index >= last_stripe &&
+                                   stripe_index <= (last_stripe +
+                                                    map->sub_stripes - 1)) {
+                                       multi->stripes[i].length -=
+                                               stripe_end_offset;
+                               }
+                       } else
+                               multi->stripes[i].length = *length;
+
+                       stripe_index++;
+                       if (stripe_index == map->num_stripes) {
+                               /* This could only happen for RAID0/10 */
+                               stripe_index = 0;
+                               stripe_nr++;
+                       }
+               }
+       } else {
+               for (i = 0; i < num_stripes; i++) {
+                       if (unplug_page) {
+                               struct btrfs_device *device;
+                               struct backing_dev_info *bdi;
+
+                               device = map->stripes[stripe_index].dev;
+                               if (device->bdev) {
+                                       bdi = blk_get_backing_dev_info(device->
+                                                                      bdev);
+                                       if (bdi->unplug_io_fn)
+                                               bdi->unplug_io_fn(bdi,
+                                                                 unplug_page);
+                               }
+                       } else {
+                               multi->stripes[i].physical =
+                                       map->stripes[stripe_index].physical +
+                                       stripe_offset +
+                                       stripe_nr * map->stripe_len;
+                               multi->stripes[i].dev =
+                                       map->stripes[stripe_index].dev;
+                       }
+                       stripe_index++;
                }
-               stripe_index++;
        }
        if (multi_ret) {
                *multi_ret = multi;