btrfs: fix wrong free space information of btrfs
[pandora-kernel.git] / fs / btrfs / super.c
index 1776dbd..2963376 100644 (file)
@@ -61,14 +61,17 @@ static void btrfs_put_super(struct super_block *sb)
 
        ret = close_ctree(root);
        sb->s_fs_info = NULL;
+
+       (void)ret; /* FIXME: need to fix VFS to return error? */
 }
 
 enum {
        Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
        Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
-       Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
-       Opt_discard, Opt_err,
+       Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
+       Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
+       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -83,7 +86,9 @@ static match_table_t tokens = {
        {Opt_alloc_start, "alloc_start=%s"},
        {Opt_thread_pool, "thread_pool=%d"},
        {Opt_compress, "compress"},
+       {Opt_compress_type, "compress=%s"},
        {Opt_compress_force, "compress-force"},
+       {Opt_compress_force_type, "compress-force=%s"},
        {Opt_ssd, "ssd"},
        {Opt_ssd_spread, "ssd_spread"},
        {Opt_nossd, "nossd"},
@@ -92,6 +97,9 @@ static match_table_t tokens = {
        {Opt_flushoncommit, "flushoncommit"},
        {Opt_ratio, "metadata_ratio=%d"},
        {Opt_discard, "discard"},
+       {Opt_space_cache, "space_cache"},
+       {Opt_clear_cache, "clear_cache"},
+       {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
        {Opt_err, NULL},
 };
 
@@ -106,6 +114,8 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
        char *p, *num, *orig;
        int intarg;
        int ret = 0;
+       char *compress_type;
+       bool compress_force = false;
 
        if (!options)
                return 0;
@@ -148,14 +158,32 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        btrfs_set_opt(info->mount_opt, NODATACOW);
                        btrfs_set_opt(info->mount_opt, NODATASUM);
                        break;
-               case Opt_compress:
-                       printk(KERN_INFO "btrfs: use compression\n");
-                       btrfs_set_opt(info->mount_opt, COMPRESS);
-                       break;
                case Opt_compress_force:
-                       printk(KERN_INFO "btrfs: forcing compression\n");
-                       btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+               case Opt_compress_force_type:
+                       compress_force = true;
+               case Opt_compress:
+               case Opt_compress_type:
+                       if (token == Opt_compress ||
+                           token == Opt_compress_force ||
+                           strcmp(args[0].from, "zlib") == 0) {
+                               compress_type = "zlib";
+                               info->compress_type = BTRFS_COMPRESS_ZLIB;
+                       } else if (strcmp(args[0].from, "lzo") == 0) {
+                               compress_type = "lzo";
+                               info->compress_type = BTRFS_COMPRESS_LZO;
+                       } else {
+                               ret = -EINVAL;
+                               goto out;
+                       }
+
                        btrfs_set_opt(info->mount_opt, COMPRESS);
+                       if (compress_force) {
+                               btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+                               pr_info("btrfs: force %s compression\n",
+                                       compress_type);
+                       } else
+                               pr_info("btrfs: use %s compression\n",
+                                       compress_type);
                        break;
                case Opt_ssd:
                        printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
@@ -235,6 +263,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_discard:
                        btrfs_set_opt(info->mount_opt, DISCARD);
                        break;
+               case Opt_space_cache:
+                       printk(KERN_INFO "btrfs: enabling disk space caching\n");
+                       btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+                       break;
+               case Opt_clear_cache:
+                       printk(KERN_INFO "btrfs: force clearing of disk cache\n");
+                       btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+                       break;
+               case Opt_user_subvol_rm_allowed:
+                       btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
+                       break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@ -380,7 +419,7 @@ static struct dentry *get_default_root(struct super_block *sb,
 find_root:
        new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
        if (IS_ERR(new_root))
-               return ERR_PTR(PTR_ERR(new_root));
+               return ERR_CAST(new_root);
 
        if (btrfs_root_refs(&new_root->root_item) == 0)
                return ERR_PTR(-ENOENT);
@@ -436,7 +475,6 @@ static int btrfs_fill_super(struct super_block *sb,
 {
        struct inode *inode;
        struct dentry *root_dentry;
-       struct btrfs_super_block *disk_super;
        struct btrfs_root *tree_root;
        struct btrfs_key key;
        int err;
@@ -458,7 +496,6 @@ static int btrfs_fill_super(struct super_block *sb,
                return PTR_ERR(tree_root);
        }
        sb->s_fs_info = tree_root;
-       disk_super = &tree_root->fs_info->super_copy;
 
        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
        key.type = BTRFS_INODE_ITEM_KEY;
@@ -548,12 +585,26 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 static int btrfs_test_super(struct super_block *s, void *data)
 {
-       struct btrfs_fs_devices *test_fs_devices = data;
+       struct btrfs_root *test_root = data;
        struct btrfs_root *root = btrfs_sb(s);
 
-       return root->fs_info->fs_devices == test_fs_devices;
+       /*
+        * If this super block is going away, return false as it
+        * can't match as an existing super block.
+        */
+       if (!atomic_read(&s->s_active))
+               return 0;
+       return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
+}
+
+static int btrfs_set_super(struct super_block *s, void *data)
+{
+       s->s_fs_info = data;
+
+       return set_anon_super(s, data);
 }
 
+
 /*
  * Find a superblock for the given device / mount point.
  *
@@ -567,11 +618,12 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
        struct super_block *s;
        struct dentry *root;
        struct btrfs_fs_devices *fs_devices = NULL;
+       struct btrfs_root *tree_root = NULL;
+       struct btrfs_fs_info *fs_info = NULL;
        fmode_t mode = FMODE_READ;
        char *subvol_name = NULL;
        u64 subvol_objectid = 0;
        int error = 0;
-       int found = 0;
 
        if (!(flags & MS_RDONLY))
                mode |= FMODE_WRITE;
@@ -595,8 +647,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                goto error_close_devices;
        }
 
+       /*
+        * Setup a dummy root and fs_info for test/set super.  This is because
+        * we don't actually fill this stuff out until open_ctree, but we need
+        * it for searching for existing supers, so this lets us do that and
+        * then open_ctree will properly initialize everything later.
+        */
+       fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
+       tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+       if (!fs_info || !tree_root) {
+               error = -ENOMEM;
+               goto error_close_devices;
+       }
+       fs_info->tree_root = tree_root;
+       fs_info->fs_devices = fs_devices;
+       tree_root->fs_info = fs_info;
+
        bdev = fs_devices->latest_bdev;
-       s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
+       s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
        if (IS_ERR(s))
                goto error_s;
 
@@ -607,7 +675,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                        goto error_close_devices;
                }
 
-               found = 1;
                btrfs_close_devices(fs_devices);
        } else {
                char b[BDEVNAME_SIZE];
@@ -629,7 +696,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
        if (IS_ERR(root)) {
                error = PTR_ERR(root);
                deactivate_locked_super(s);
-               goto error;
+               goto error_free_subvol_name;
        }
        /* if they gave us a subvolume name bind mount into that */
        if (strcmp(subvol_name, ".")) {
@@ -640,17 +707,17 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
                mutex_unlock(&root->d_inode->i_mutex);
 
                if (IS_ERR(new_root)) {
+                       dput(root);
                        deactivate_locked_super(s);
                        error = PTR_ERR(new_root);
-                       dput(root);
-                       goto error_close_devices;
+                       goto error_free_subvol_name;
                }
                if (!new_root->d_inode) {
                        dput(root);
                        dput(new_root);
                        deactivate_locked_super(s);
                        error = -ENXIO;
-                       goto error_close_devices;
+                       goto error_free_subvol_name;
                }
                dput(root);
                root = new_root;
@@ -666,9 +733,10 @@ error_s:
        error = PTR_ERR(s);
 error_close_devices:
        btrfs_close_devices(fs_devices);
+       kfree(fs_info);
+       kfree(tree_root);
 error_free_subvol_name:
        kfree(subvol_name);
-error:
        return error;
 }
 
@@ -709,6 +777,127 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
        return 0;
 }
 
+/*
+ * The helper to calc the free space on the devices that can be used to store
+ * file data.
+ */
+static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
+{
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       struct btrfs_device_info *devices_info;
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_device *device;
+       u64 skip_space;
+       u64 type;
+       u64 avail_space;
+       u64 used_space;
+       u64 min_stripe_size;
+       int min_stripes = 1;
+       int i = 0, nr_devices;
+       int ret;
+
+       nr_devices = fs_info->fs_devices->rw_devices;
+       BUG_ON(!nr_devices);
+
+       devices_info = kmalloc(sizeof(*devices_info) * nr_devices,
+                              GFP_NOFS);
+       if (!devices_info)
+               return -ENOMEM;
+
+       /* calc min stripe number for data space alloction */
+       type = btrfs_get_alloc_profile(root, 1);
+       if (type & BTRFS_BLOCK_GROUP_RAID0)
+               min_stripes = 2;
+       else if (type & BTRFS_BLOCK_GROUP_RAID1)
+               min_stripes = 2;
+       else if (type & BTRFS_BLOCK_GROUP_RAID10)
+               min_stripes = 4;
+
+       if (type & BTRFS_BLOCK_GROUP_DUP)
+               min_stripe_size = 2 * BTRFS_STRIPE_LEN;
+       else
+               min_stripe_size = BTRFS_STRIPE_LEN;
+
+       list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
+               if (!device->in_fs_metadata)
+                       continue;
+
+               avail_space = device->total_bytes - device->bytes_used;
+
+               /* align with stripe_len */
+               do_div(avail_space, BTRFS_STRIPE_LEN);
+               avail_space *= BTRFS_STRIPE_LEN;
+
+               /*
+                * In order to avoid overwritting the superblock on the drive,
+                * btrfs starts at an offset of at least 1MB when doing chunk
+                * allocation.
+                */
+               skip_space = 1024 * 1024;
+
+               /* user can set the offset in fs_info->alloc_start. */
+               if (fs_info->alloc_start + BTRFS_STRIPE_LEN <=
+                   device->total_bytes)
+                       skip_space = max(fs_info->alloc_start, skip_space);
+
+               /*
+                * btrfs can not use the free space in [0, skip_space - 1],
+                * we must subtract it from the total. In order to implement
+                * it, we account the used space in this range first.
+                */
+               ret = btrfs_account_dev_extents_size(device, 0, skip_space - 1,
+                                                    &used_space);
+               if (ret) {
+                       kfree(devices_info);
+                       return ret;
+               }
+
+               /* calc the free space in [0, skip_space - 1] */
+               skip_space -= used_space;
+
+               /*
+                * we can use the free space in [0, skip_space - 1], subtract
+                * it from the total.
+                */
+               if (avail_space && avail_space >= skip_space)
+                       avail_space -= skip_space;
+               else
+                       avail_space = 0;
+
+               if (avail_space < min_stripe_size)
+                       continue;
+
+               devices_info[i].dev = device;
+               devices_info[i].max_avail = avail_space;
+
+               i++;
+       }
+
+       nr_devices = i;
+
+       btrfs_descending_sort_devices(devices_info, nr_devices);
+
+       i = nr_devices - 1;
+       avail_space = 0;
+       while (nr_devices >= min_stripes) {
+               if (devices_info[i].max_avail >= min_stripe_size) {
+                       int j;
+                       u64 alloc_size;
+
+                       avail_space += devices_info[i].max_avail * min_stripes;
+                       alloc_size = devices_info[i].max_avail;
+                       for (j = i + 1 - min_stripes; j <= i; j++)
+                               devices_info[j].max_avail -= alloc_size;
+               }
+               i--;
+               nr_devices--;
+       }
+
+       kfree(devices_info);
+       *free_bytes = avail_space;
+       return 0;
+}
+
 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct btrfs_root *root = btrfs_sb(dentry->d_sb);
@@ -716,20 +905,39 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct list_head *head = &root->fs_info->space_info;
        struct btrfs_space_info *found;
        u64 total_used = 0;
+       u64 total_free_data = 0;
        int bits = dentry->d_sb->s_blocksize_bits;
        __be32 *fsid = (__be32 *)root->fs_info->fsid;
+       int ret;
 
+       /* holding chunk_muext to avoid allocating new chunks */
+       mutex_lock(&root->fs_info->chunk_mutex);
        rcu_read_lock();
-       list_for_each_entry_rcu(found, head, list)
+       list_for_each_entry_rcu(found, head, list) {
+               if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+                       total_free_data += found->disk_total - found->disk_used;
+                       total_free_data -=
+                               btrfs_account_ro_block_groups_free_space(found);
+               }
+
                total_used += found->disk_used;
+       }
        rcu_read_unlock();
 
        buf->f_namelen = BTRFS_NAME_LEN;
        buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
        buf->f_bfree = buf->f_blocks - (total_used >> bits);
-       buf->f_bavail = buf->f_bfree;
        buf->f_bsize = dentry->d_sb->s_blocksize;
        buf->f_type = BTRFS_SUPER_MAGIC;
+       buf->f_bavail = total_free_data;
+       ret = btrfs_calc_avail_data_space(root, &total_free_data);
+       if (ret) {
+               mutex_unlock(&root->fs_info->chunk_mutex);
+               return ret;
+       }
+       buf->f_bavail += total_free_data;
+       buf->f_bavail = buf->f_bavail >> bits;
+       mutex_unlock(&root->fs_info->chunk_mutex);
 
        /* We treat it as constant endianness (it doesn't matter _which_)
           because we want the fsid to come out the same whether mounted
@@ -845,10 +1053,14 @@ static int __init init_btrfs_fs(void)
        if (err)
                return err;
 
-       err = btrfs_init_cachep();
+       err = btrfs_init_compress();
        if (err)
                goto free_sysfs;
 
+       err = btrfs_init_cachep();
+       if (err)
+               goto free_compress;
+
        err = extent_io_init();
        if (err)
                goto free_cachep;
@@ -876,6 +1088,8 @@ free_extent_io:
        extent_io_exit();
 free_cachep:
        btrfs_destroy_cachep();
+free_compress:
+       btrfs_exit_compress();
 free_sysfs:
        btrfs_exit_sysfs();
        return err;
@@ -890,7 +1104,7 @@ static void __exit exit_btrfs_fs(void)
        unregister_filesystem(&btrfs_fs_type);
        btrfs_exit_sysfs();
        btrfs_cleanup_fs_uuids();
-       btrfs_zlib_exit();
+       btrfs_exit_compress();
 }
 
 module_init(init_btrfs_fs)