Merge branch 'pm-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
[pandora-kernel.git] / fs / btrfs / volumes.c
index f2a4cc7..c37433d 100644 (file)
@@ -366,6 +366,14 @@ static noinline int device_list_add(const char *path,
                }
                INIT_LIST_HEAD(&device->dev_alloc_list);
 
+               /* init readahead state */
+               spin_lock_init(&device->reada_lock);
+               device->reada_curr_zone = NULL;
+               atomic_set(&device->reada_in_flight, 0);
+               device->reada_next = 0;
+               INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
+               INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+
                mutex_lock(&fs_devices->device_list_mutex);
                list_add_rcu(&device->dev_list, &fs_devices->devices);
                mutex_unlock(&fs_devices->device_list_mutex);
@@ -597,10 +605,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
                set_blocksize(bdev, 4096);
 
                bh = btrfs_read_dev_super(bdev);
-               if (!bh) {
-                       ret = -EINVAL;
+               if (!bh)
                        goto error_close;
-               }
 
                disk_super = (struct btrfs_super_block *)bh->b_data;
                devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -655,7 +661,7 @@ error:
                continue;
        }
        if (fs_devices->open_devices == 0) {
-               ret = -EIO;
+               ret = -EINVAL;
                goto out;
        }
        fs_devices->seeding = seeding;
@@ -993,7 +999,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
        key.objectid = device->devid;
        key.offset = start;
        key.type = BTRFS_DEV_EXTENT_KEY;
-
+again:
        ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
        if (ret > 0) {
                ret = btrfs_previous_item(root, path, key.objectid,
@@ -1006,6 +1012,9 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
                                        struct btrfs_dev_extent);
                BUG_ON(found_key.offset > start || found_key.offset +
                       btrfs_dev_extent_length(leaf, extent) < start);
+               key = found_key;
+               btrfs_release_path(path);
+               goto again;
        } else if (ret == 0) {
                leaf = path->nodes[0];
                extent = btrfs_item_ptr(leaf, path->slots[0],
@@ -1013,8 +1022,13 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
        }
        BUG_ON(ret);
 
-       if (device->bytes_used > 0)
-               device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
+       if (device->bytes_used > 0) {
+               u64 len = btrfs_dev_extent_length(leaf, extent);
+               device->bytes_used -= len;
+               spin_lock(&root->fs_info->free_chunk_lock);
+               root->fs_info->free_chunk_space += len;
+               spin_unlock(&root->fs_info->free_chunk_lock);
+       }
        ret = btrfs_del_item(trans, root, path);
 
 out:
@@ -1356,6 +1370,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        if (ret)
                goto error_undo;
 
+       spin_lock(&root->fs_info->free_chunk_lock);
+       root->fs_info->free_chunk_space = device->total_bytes -
+               device->bytes_used;
+       spin_unlock(&root->fs_info->free_chunk_lock);
+
        device->in_fs_metadata = 0;
        btrfs_scrub_cancel_dev(root, device);
 
@@ -1387,8 +1406,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        call_rcu(&device->rcu, free_device);
        mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
-       num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
-       btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices);
+       num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
+       btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
 
        if (cur_devices->open_devices == 0) {
                struct btrfs_fs_devices *fs_devices;
@@ -1450,7 +1469,7 @@ static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
        struct btrfs_fs_devices *old_devices;
        struct btrfs_fs_devices *seed_devices;
-       struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
+       struct btrfs_super_block *disk_super = root->fs_info->super_copy;
        struct btrfs_device *device;
        u64 super_flags;
 
@@ -1691,15 +1710,19 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                root->fs_info->fs_devices->num_can_discard++;
        root->fs_info->fs_devices->total_rw_bytes += device->total_bytes;
 
+       spin_lock(&root->fs_info->free_chunk_lock);
+       root->fs_info->free_chunk_space += device->total_bytes;
+       spin_unlock(&root->fs_info->free_chunk_lock);
+
        if (!blk_queue_nonrot(bdev_get_queue(bdev)))
                root->fs_info->fs_devices->rotating = 1;
 
-       total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
-       btrfs_set_super_total_bytes(&root->fs_info->super_copy,
+       total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
+       btrfs_set_super_total_bytes(root->fs_info->super_copy,
                                    total_bytes + device->total_bytes);
 
-       total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
-       btrfs_set_super_num_devices(&root->fs_info->super_copy,
+       total_bytes = btrfs_super_num_devices(root->fs_info->super_copy);
+       btrfs_set_super_num_devices(root->fs_info->super_copy,
                                    total_bytes + 1);
        mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
@@ -1790,7 +1813,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
                      struct btrfs_device *device, u64 new_size)
 {
        struct btrfs_super_block *super_copy =
-               &device->dev_root->fs_info->super_copy;
+               device->dev_root->fs_info->super_copy;
        u64 old_total = btrfs_super_total_bytes(super_copy);
        u64 diff = new_size - device->total_bytes;
 
@@ -1849,7 +1872,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
 static int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
                        chunk_offset)
 {
-       struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
+       struct btrfs_super_block *super_copy = root->fs_info->super_copy;
        struct btrfs_disk_key *disk_key;
        struct btrfs_chunk *chunk;
        u8 *ptr;
@@ -2175,7 +2198,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        bool retried = false;
        struct extent_buffer *l;
        struct btrfs_key key;
-       struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
+       struct btrfs_super_block *super_copy = root->fs_info->super_copy;
        u64 old_total = btrfs_super_total_bytes(super_copy);
        u64 old_size = device->total_bytes;
        u64 diff = device->total_bytes - new_size;
@@ -2192,8 +2215,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        lock_chunks(root);
 
        device->total_bytes = new_size;
-       if (device->writeable)
+       if (device->writeable) {
                device->fs_devices->total_rw_bytes -= diff;
+               spin_lock(&root->fs_info->free_chunk_lock);
+               root->fs_info->free_chunk_space -= diff;
+               spin_unlock(&root->fs_info->free_chunk_lock);
+       }
        unlock_chunks(root);
 
 again:
@@ -2257,6 +2284,9 @@ again:
                device->total_bytes = old_size;
                if (device->writeable)
                        device->fs_devices->total_rw_bytes += diff;
+               spin_lock(&root->fs_info->free_chunk_lock);
+               root->fs_info->free_chunk_space += diff;
+               spin_unlock(&root->fs_info->free_chunk_lock);
                unlock_chunks(root);
                goto done;
        }
@@ -2292,7 +2322,7 @@ static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
                           struct btrfs_key *key,
                           struct btrfs_chunk *chunk, int item_size)
 {
-       struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
+       struct btrfs_super_block *super_copy = root->fs_info->super_copy;
        struct btrfs_disk_key disk_key;
        u32 array_size;
        u8 *ptr;
@@ -2615,6 +2645,11 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
                index++;
        }
 
+       spin_lock(&extent_root->fs_info->free_chunk_lock);
+       extent_root->fs_info->free_chunk_space -= (stripe_size *
+                                                  map->num_stripes);
+       spin_unlock(&extent_root->fs_info->free_chunk_lock);
+
        index = 0;
        stripe = &chunk->stripe;
        while (index < map->num_stripes) {
@@ -2848,7 +2883,7 @@ static int find_live_mirror(struct map_lookup *map, int first, int num,
 
 static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                             u64 logical, u64 *length,
-                            struct btrfs_multi_bio **multi_ret,
+                            struct btrfs_bio **bbio_ret,
                             int mirror_num)
 {
        struct extent_map *em;
@@ -2866,18 +2901,18 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        int i;
        int num_stripes;
        int max_errors = 0;
-       struct btrfs_multi_bio *multi = NULL;
+       struct btrfs_bio *bbio = NULL;
 
-       if (multi_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
+       if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
                stripes_allocated = 1;
 again:
-       if (multi_ret) {
-               multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
+       if (bbio_ret) {
+               bbio = kzalloc(btrfs_bio_size(stripes_allocated),
                                GFP_NOFS);
-               if (!multi)
+               if (!bbio)
                        return -ENOMEM;
 
-               atomic_set(&multi->error, 0);
+               atomic_set(&bbio->error, 0);
        }
 
        read_lock(&em_tree->lock);
@@ -2898,7 +2933,7 @@ again:
        if (mirror_num > map->num_stripes)
                mirror_num = 0;
 
-       /* if our multi bio struct is too small, back off and try again */
+       /* if our btrfs_bio struct is too small, back off and try again */
        if (rw & REQ_WRITE) {
                if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
                                 BTRFS_BLOCK_GROUP_DUP)) {
@@ -2917,11 +2952,11 @@ again:
                        stripes_required = map->num_stripes;
                }
        }
-       if (multi_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
+       if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
            stripes_allocated < stripes_required) {
                stripes_allocated = map->num_stripes;
                free_extent_map(em);
-               kfree(multi);
+               kfree(bbio);
                goto again;
        }
        stripe_nr = offset;
@@ -2950,7 +2985,7 @@ again:
                *length = em->len - offset;
        }
 
-       if (!multi_ret)
+       if (!bbio_ret)
                goto out;
 
        num_stripes = 1;
@@ -2975,13 +3010,17 @@ again:
                        stripe_index = find_live_mirror(map, 0,
                                            map->num_stripes,
                                            current->pid % map->num_stripes);
+                       mirror_num = stripe_index + 1;
                }
 
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-               if (rw & (REQ_WRITE | REQ_DISCARD))
+               if (rw & (REQ_WRITE | REQ_DISCARD)) {
                        num_stripes = map->num_stripes;
-               else if (mirror_num)
+               } else if (mirror_num) {
                        stripe_index = mirror_num - 1;
+               } else {
+                       mirror_num = 1;
+               }
 
        } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
                int factor = map->num_stripes / map->sub_stripes;
@@ -3001,6 +3040,7 @@ again:
                        stripe_index = find_live_mirror(map, stripe_index,
                                              map->sub_stripes, stripe_index +
                                              current->pid % map->sub_stripes);
+                       mirror_num = stripe_index + 1;
                }
        } else {
                /*
@@ -3009,15 +3049,16 @@ again:
                 * stripe_index is the number of our device in the stripe array
                 */
                stripe_index = do_div(stripe_nr, map->num_stripes);
+               mirror_num = stripe_index + 1;
        }
        BUG_ON(stripe_index >= map->num_stripes);
 
        if (rw & REQ_DISCARD) {
                for (i = 0; i < num_stripes; i++) {
-                       multi->stripes[i].physical =
+                       bbio->stripes[i].physical =
                                map->stripes[stripe_index].physical +
                                stripe_offset + stripe_nr * map->stripe_len;
-                       multi->stripes[i].dev = map->stripes[stripe_index].dev;
+                       bbio->stripes[i].dev = map->stripes[stripe_index].dev;
 
                        if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
                                u64 stripes;
@@ -3038,16 +3079,16 @@ again:
                                }
                                stripes = stripe_nr_end - 1 - j;
                                do_div(stripes, map->num_stripes);
-                               multi->stripes[i].length = map->stripe_len *
+                               bbio->stripes[i].length = map->stripe_len *
                                        (stripes - stripe_nr + 1);
 
                                if (i == 0) {
-                                       multi->stripes[i].length -=
+                                       bbio->stripes[i].length -=
                                                stripe_offset;
                                        stripe_offset = 0;
                                }
                                if (stripe_index == last_stripe)
-                                       multi->stripes[i].length -=
+                                       bbio->stripes[i].length -=
                                                stripe_end_offset;
                        } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
                                u64 stripes;
@@ -3072,11 +3113,11 @@ again:
                                }
                                stripes = stripe_nr_end - 1 - j;
                                do_div(stripes, factor);
-                               multi->stripes[i].length = map->stripe_len *
+                               bbio->stripes[i].length = map->stripe_len *
                                        (stripes - stripe_nr + 1);
 
                                if (i < map->sub_stripes) {
-                                       multi->stripes[i].length -=
+                                       bbio->stripes[i].length -=
                                                stripe_offset;
                                        if (i == map->sub_stripes - 1)
                                                stripe_offset = 0;
@@ -3084,11 +3125,11 @@ again:
                                if (stripe_index >= last_stripe &&
                                    stripe_index <= (last_stripe +
                                                     map->sub_stripes - 1)) {
-                                       multi->stripes[i].length -=
+                                       bbio->stripes[i].length -=
                                                stripe_end_offset;
                                }
                        } else
-                               multi->stripes[i].length = *length;
+                               bbio->stripes[i].length = *length;
 
                        stripe_index++;
                        if (stripe_index == map->num_stripes) {
@@ -3099,19 +3140,20 @@ again:
                }
        } else {
                for (i = 0; i < num_stripes; i++) {
-                       multi->stripes[i].physical =
+                       bbio->stripes[i].physical =
                                map->stripes[stripe_index].physical +
                                stripe_offset +
                                stripe_nr * map->stripe_len;
-                       multi->stripes[i].dev =
+                       bbio->stripes[i].dev =
                                map->stripes[stripe_index].dev;
                        stripe_index++;
                }
        }
-       if (multi_ret) {
-               *multi_ret = multi;
-               multi->num_stripes = num_stripes;
-               multi->max_errors = max_errors;
+       if (bbio_ret) {
+               *bbio_ret = bbio;
+               bbio->num_stripes = num_stripes;
+               bbio->max_errors = max_errors;
+               bbio->mirror_num = mirror_num;
        }
 out:
        free_extent_map(em);
@@ -3120,9 +3162,9 @@ out:
 
 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                      u64 logical, u64 *length,
-                     struct btrfs_multi_bio **multi_ret, int mirror_num)
+                     struct btrfs_bio **bbio_ret, int mirror_num)
 {
-       return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
+       return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret,
                                 mirror_num);
 }
 
@@ -3191,28 +3233,30 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
        return 0;
 }
 
-static void end_bio_multi_stripe(struct bio *bio, int err)
+static void btrfs_end_bio(struct bio *bio, int err)
 {
-       struct btrfs_multi_bio *multi = bio->bi_private;
+       struct btrfs_bio *bbio = bio->bi_private;
        int is_orig_bio = 0;
 
        if (err)
-               atomic_inc(&multi->error);
+               atomic_inc(&bbio->error);
 
-       if (bio == multi->orig_bio)
+       if (bio == bbio->orig_bio)
                is_orig_bio = 1;
 
-       if (atomic_dec_and_test(&multi->stripes_pending)) {
+       if (atomic_dec_and_test(&bbio->stripes_pending)) {
                if (!is_orig_bio) {
                        bio_put(bio);
-                       bio = multi->orig_bio;
+                       bio = bbio->orig_bio;
                }
-               bio->bi_private = multi->private;
-               bio->bi_end_io = multi->end_io;
+               bio->bi_private = bbio->private;
+               bio->bi_end_io = bbio->end_io;
+               bio->bi_bdev = (struct block_device *)
+                                       (unsigned long)bbio->mirror_num;
                /* only send an error to the higher layers if it is
                 * beyond the tolerance of the multi-bio
                 */
-               if (atomic_read(&multi->error) > multi->max_errors) {
+               if (atomic_read(&bbio->error) > bbio->max_errors) {
                        err = -EIO;
                } else if (err) {
                        /*
@@ -3222,7 +3266,7 @@ static void end_bio_multi_stripe(struct bio *bio, int err)
                        set_bit(BIO_UPTODATE, &bio->bi_flags);
                        err = 0;
                }
-               kfree(multi);
+               kfree(bbio);
 
                bio_endio(bio, err);
        } else if (!is_orig_bio) {
@@ -3302,20 +3346,20 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
        u64 logical = (u64)bio->bi_sector << 9;
        u64 length = 0;
        u64 map_length;
-       struct btrfs_multi_bio *multi = NULL;
        int ret;
        int dev_nr = 0;
        int total_devs = 1;
+       struct btrfs_bio *bbio = NULL;
 
        length = bio->bi_size;
        map_tree = &root->fs_info->mapping_tree;
        map_length = length;
 
-       ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
+       ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio,
                              mirror_num);
        BUG_ON(ret);
 
-       total_devs = multi->num_stripes;
+       total_devs = bbio->num_stripes;
        if (map_length < length) {
                printk(KERN_CRIT "mapping failed logical %llu bio len %llu "
                       "len %llu\n", (unsigned long long)logical,
@@ -3323,25 +3367,28 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                       (unsigned long long)map_length);
                BUG();
        }
-       multi->end_io = first_bio->bi_end_io;
-       multi->private = first_bio->bi_private;
-       multi->orig_bio = first_bio;
-       atomic_set(&multi->stripes_pending, multi->num_stripes);
+
+       bbio->orig_bio = first_bio;
+       bbio->private = first_bio->bi_private;
+       bbio->end_io = first_bio->bi_end_io;
+       atomic_set(&bbio->stripes_pending, bbio->num_stripes);
 
        while (dev_nr < total_devs) {
-               if (total_devs > 1) {
-                       if (dev_nr < total_devs - 1) {
-                               bio = bio_clone(first_bio, GFP_NOFS);
-                               BUG_ON(!bio);
-                       } else {
-                               bio = first_bio;
-                       }
-                       bio->bi_private = multi;
-                       bio->bi_end_io = end_bio_multi_stripe;
+               if (dev_nr < total_devs - 1) {
+                       bio = bio_clone(first_bio, GFP_NOFS);
+                       BUG_ON(!bio);
+               } else {
+                       bio = first_bio;
                }
-               bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
-               dev = multi->stripes[dev_nr].dev;
+               bio->bi_private = bbio;
+               bio->bi_end_io = btrfs_end_bio;
+               bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
+               dev = bbio->stripes[dev_nr].dev;
                if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
+                       pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
+                                "(%s id %llu), size=%u\n", rw,
+                                (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
+                                dev->name, dev->devid, bio->bi_size);
                        bio->bi_bdev = dev->bdev;
                        if (async_submit)
                                schedule_bio(root, dev, rw, bio);
@@ -3354,8 +3401,6 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                }
                dev_nr++;
        }
-       if (total_devs == 1)
-               kfree(multi);
        return 0;
 }
 
@@ -3616,15 +3661,20 @@ static int read_one_dev(struct btrfs_root *root,
        fill_device_from_item(leaf, dev_item, device);
        device->dev_root = root->fs_info->dev_root;
        device->in_fs_metadata = 1;
-       if (device->writeable)
+       if (device->writeable) {
                device->fs_devices->total_rw_bytes += device->total_bytes;
+               spin_lock(&root->fs_info->free_chunk_lock);
+               root->fs_info->free_chunk_space += device->total_bytes -
+                       device->bytes_used;
+               spin_unlock(&root->fs_info->free_chunk_lock);
+       }
        ret = 0;
        return ret;
 }
 
 int btrfs_read_sys_array(struct btrfs_root *root)
 {
-       struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
+       struct btrfs_super_block *super_copy = root->fs_info->super_copy;
        struct extent_buffer *sb;
        struct btrfs_disk_key *disk_key;
        struct btrfs_chunk *chunk;