Merge git://git.jan-o-sch.net/btrfs-unstable into integration
[pandora-kernel.git] / fs / btrfs / ioctl.c
index 8bfb514..cc98939 100644 (file)
@@ -51,6 +51,7 @@
 #include "volumes.h"
 #include "locking.h"
 #include "inode-map.h"
+#include "backref.h"
 
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
@@ -117,7 +118,7 @@ void btrfs_update_iflags(struct inode *inode)
 /*
  * Inherit flags from the parent inode.
  *
- * Unlike extN we don't have any flags we don't want to inherit currently.
+ * Currently only the compression flags and the cow flags are inherited.
  */
 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
 {
@@ -128,12 +129,17 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
 
        flags = BTRFS_I(dir)->flags;
 
-       if (S_ISREG(inode->i_mode))
-               flags &= ~BTRFS_INODE_DIRSYNC;
-       else if (!S_ISDIR(inode->i_mode))
-               flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
+       if (flags & BTRFS_INODE_NOCOMPRESS) {
+               BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS;
+               BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+       } else if (flags & BTRFS_INODE_COMPRESS) {
+               BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
+               BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
+       }
+
+       if (flags & BTRFS_INODE_NODATACOW)
+               BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
 
-       BTRFS_I(inode)->flags = flags;
        btrfs_update_iflags(inode);
 }
 
@@ -277,6 +283,7 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
        struct fstrim_range range;
        u64 minlen = ULLONG_MAX;
        u64 num_devices = 0;
+       u64 total_bytes = btrfs_super_total_bytes(root->fs_info->super_copy);
        int ret;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -295,12 +302,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
                }
        }
        rcu_read_unlock();
+
        if (!num_devices)
                return -EOPNOTSUPP;
-
        if (copy_from_user(&range, arg, sizeof(range)))
                return -EFAULT;
+       if (range.start > total_bytes)
+               return -EINVAL;
 
+       range.len = min(range.len, total_bytes - range.start);
        range.minlen = max(range.minlen, minlen);
        ret = btrfs_trim_fs(root, &range);
        if (ret < 0)
@@ -323,7 +333,7 @@ static noinline int create_subvol(struct btrfs_root *root,
        struct btrfs_inode_item *inode_item;
        struct extent_buffer *leaf;
        struct btrfs_root *new_root;
-       struct dentry *parent = dget_parent(dentry);
+       struct dentry *parent = dentry->d_parent;
        struct inode *dir;
        int ret;
        int err;
@@ -332,10 +342,8 @@ static noinline int create_subvol(struct btrfs_root *root,
        u64 index = 0;
 
        ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
-       if (ret) {
-               dput(parent);
+       if (ret)
                return ret;
-       }
 
        dir = parent->d_inode;
 
@@ -346,10 +354,8 @@ static noinline int create_subvol(struct btrfs_root *root,
         * 2 - dir items
         */
        trans = btrfs_start_transaction(root, 6);
-       if (IS_ERR(trans)) {
-               dput(parent);
+       if (IS_ERR(trans))
                return PTR_ERR(trans);
-       }
 
        leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
                                      0, objectid, NULL, 0, 0, 0);
@@ -439,7 +445,6 @@ static noinline int create_subvol(struct btrfs_root *root,
 
        d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
-       dput(parent);
        if (async_transid) {
                *async_transid = trans->transid;
                err = btrfs_commit_transaction_async(trans, root, 1);
@@ -456,7 +461,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
                           bool readonly)
 {
        struct inode *inode;
-       struct dentry *parent;
        struct btrfs_pending_snapshot *pending_snapshot;
        struct btrfs_trans_handle *trans;
        int ret;
@@ -504,9 +508,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
        if (ret)
                goto fail;
 
-       parent = dget_parent(dentry);
-       inode = btrfs_lookup_dentry(parent->d_inode, dentry);
-       dput(parent);
+       inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
        if (IS_ERR(inode)) {
                ret = PTR_ERR(inode);
                goto fail;
@@ -768,7 +770,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
        int ret = 1;
 
        /*
-        * make sure that once we start defragging and extent, we keep on
+        * make sure that once we start defragging an extent, we keep on
         * defragging it
         */
        if (start < *defrag_end)
@@ -813,7 +815,6 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len,
         * extent will force at least part of that big extent to be defragged.
         */
        if (ret) {
-               *last_len += len;
                *defrag_end = extent_map_end(em);
        } else {
                *last_len = 0;
@@ -851,6 +852,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
        int i_done;
        struct btrfs_ordered_extent *ordered;
        struct extent_state *cached_state = NULL;
+       gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
 
        if (isize == 0)
                return 0;
@@ -868,7 +870,7 @@ again:
        for (i = 0; i < num_pages; i++) {
                struct page *page;
                page = find_or_create_page(inode->i_mapping,
-                                           start_index + i, GFP_NOFS);
+                                           start_index + i, mask);
                if (!page)
                        break;
 
@@ -980,18 +982,20 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
        struct btrfs_super_block *disk_super;
        struct file_ra_state *ra = NULL;
        unsigned long last_index;
+       u64 isize = i_size_read(inode);
        u64 features;
        u64 last_len = 0;
        u64 skip = 0;
        u64 defrag_end = 0;
        u64 newer_off = range->start;
-       int newer_left = 0;
        unsigned long i;
+       unsigned long ra_index = 0;
        int ret;
        int defrag_count = 0;
        int compress_type = BTRFS_COMPRESS_ZLIB;
        int extent_thresh = range->extent_thresh;
-       int newer_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
+       int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT;
+       int cluster = max_cluster;
        u64 new_align = ~((u64)128 * 1024 - 1);
        struct page **pages = NULL;
 
@@ -1005,7 +1009,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        compress_type = range->compress_type;
        }
 
-       if (inode->i_size == 0)
+       if (isize == 0)
                return 0;
 
        /*
@@ -1021,7 +1025,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                ra = &file->f_ra;
        }
 
-       pages = kmalloc(sizeof(struct page *) * newer_cluster,
+       pages = kmalloc(sizeof(struct page *) * max_cluster,
                        GFP_NOFS);
        if (!pages) {
                ret = -ENOMEM;
@@ -1030,10 +1034,10 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
 
        /* find the last page to defrag */
        if (range->start + range->len > range->start) {
-               last_index = min_t(u64, inode->i_size - 1,
+               last_index = min_t(u64, isize - 1,
                         range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
        } else {
-               last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+               last_index = (isize - 1) >> PAGE_CACHE_SHIFT;
        }
 
        if (newer_than) {
@@ -1046,16 +1050,24 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                         * the extents in the file evenly spaced
                         */
                        i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
-                       newer_left = newer_cluster;
                } else
                        goto out_ra;
        } else {
                i = range->start >> PAGE_CACHE_SHIFT;
        }
        if (!max_to_defrag)
-               max_to_defrag = last_index - 1;
+               max_to_defrag = last_index;
 
-       while (i <= last_index && defrag_count < max_to_defrag) {
+       /*
+        * make writeback starts from i, so the defrag range can be
+        * written sequentially.
+        */
+       if (i < inode->i_mapping->writeback_index)
+               inode->i_mapping->writeback_index = i;
+
+       while (i <= last_index && defrag_count < max_to_defrag &&
+              (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+               PAGE_CACHE_SHIFT)) {
                /*
                 * make sure we stop running if someone unmounts
                 * the FS
@@ -1078,18 +1090,31 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        i = max(i + 1, next);
                        continue;
                }
+
+               if (!newer_than) {
+                       cluster = (PAGE_CACHE_ALIGN(defrag_end) >>
+                                  PAGE_CACHE_SHIFT) - i;
+                       cluster = min(cluster, max_cluster);
+               } else {
+                       cluster = max_cluster;
+               }
+
                if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
                        BTRFS_I(inode)->force_compress = compress_type;
 
-               btrfs_force_ra(inode->i_mapping, ra, file, i, newer_cluster);
+               if (i + cluster > ra_index) {
+                       ra_index = max(i, ra_index);
+                       btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
+                                      cluster);
+                       ra_index += max_cluster;
+               }
 
-               ret = cluster_pages_for_defrag(inode, pages, i, newer_cluster);
+               ret = cluster_pages_for_defrag(inode, pages, i, cluster);
                if (ret < 0)
                        goto out_ra;
 
                defrag_count += ret;
                balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret);
-               i += ret;
 
                if (newer_than) {
                        if (newer_off == (u64)-1)
@@ -1104,12 +1129,17 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        if (!ret) {
                                range->start = newer_off;
                                i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
-                               newer_left = newer_cluster;
                        } else {
                                break;
                        }
                } else {
-                       i++;
+                       if (ret > 0) {
+                               i += ret;
+                               last_len += ret << PAGE_CACHE_SHIFT;
+                       } else {
+                               i++;
+                               last_len = 0;
+                       }
                }
        }
 
@@ -1135,16 +1165,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                mutex_unlock(&inode->i_mutex);
        }
 
-       disk_super = &root->fs_info->super_copy;
+       disk_super = root->fs_info->super_copy;
        features = btrfs_super_incompat_flags(disk_super);
        if (range->compress_type == BTRFS_COMPRESS_LZO) {
                features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
                btrfs_set_super_incompat_flags(disk_super, features);
        }
 
-       if (!file)
-               kfree(ra);
-       return defrag_count;
+       ret = defrag_count;
 
 out_ra:
        if (!file)
@@ -2185,6 +2213,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        if (!(src_file->f_mode & FMODE_READ))
                goto out_fput;
 
+       /* don't make the dst file partly checksummed */
+       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+           (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
+               goto out_fput;
+
        ret = -EISDIR;
        if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
                goto out_fput;
@@ -2331,7 +2364,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        else
                                new_key.offset = destoff;
 
-                       trans = btrfs_start_transaction(root, 1);
+                       /*
+                        * 1 - adjusting old extent (we may have to split it)
+                        * 1 - add new extent
+                        * 1 - inode update
+                        */
+                       trans = btrfs_start_transaction(root, 3);
                        if (IS_ERR(trans)) {
                                ret = PTR_ERR(trans);
                                goto out;
@@ -2450,7 +2488,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                        if (endoff > inode->i_size)
                                btrfs_i_size_write(inode, endoff);
 
-                       BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
                        ret = btrfs_update_inode(trans, root, inode);
                        BUG_ON(ret);
                        btrfs_end_transaction(trans, root);
@@ -2577,7 +2614,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
                return PTR_ERR(trans);
        }
 
-       dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
+       dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
        di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
                                   dir_id, "default", 7, 1);
        if (IS_ERR_OR_NULL(di)) {
@@ -2593,7 +2630,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
 
-       disk_super = &root->fs_info->super_copy;
+       disk_super = root->fs_info->super_copy;
        features = btrfs_super_incompat_flags(disk_super);
        if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) {
                features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL;
@@ -2854,6 +2891,144 @@ static long btrfs_ioctl_scrub_progress(struct btrfs_root *root,
        return ret;
 }
 
+static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
+{
+       int ret = 0;
+       int i;
+       unsigned long rel_ptr;
+       int size;
+       struct btrfs_ioctl_ino_path_args *ipa = NULL;
+       struct inode_fs_paths *ipath = NULL;
+       struct btrfs_path *path;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ipa = memdup_user(arg, sizeof(*ipa));
+       if (IS_ERR(ipa)) {
+               ret = PTR_ERR(ipa);
+               ipa = NULL;
+               goto out;
+       }
+
+       size = min_t(u32, ipa->size, 4096);
+       ipath = init_ipath(size, root, path);
+       if (IS_ERR(ipath)) {
+               ret = PTR_ERR(ipath);
+               ipath = NULL;
+               goto out;
+       }
+
+       ret = paths_from_inode(ipa->inum, ipath);
+       if (ret < 0)
+               goto out;
+
+       for (i = 0; i < ipath->fspath->elem_cnt; ++i) {
+               rel_ptr = ipath->fspath->str[i] - (char *)ipath->fspath->str;
+               ipath->fspath->str[i] = (void *)rel_ptr;
+       }
+
+       ret = copy_to_user(ipa->fspath, ipath->fspath, size);
+       if (ret) {
+               ret = -EFAULT;
+               goto out;
+       }
+
+out:
+       btrfs_free_path(path);
+       free_ipath(ipath);
+       kfree(ipa);
+
+       return ret;
+}
+
+static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx)
+{
+       struct btrfs_data_container *inodes = ctx;
+       const size_t c = 3 * sizeof(u64);
+
+       if (inodes->bytes_left >= c) {
+               inodes->bytes_left -= c;
+               inodes->val[inodes->elem_cnt] = inum;
+               inodes->val[inodes->elem_cnt + 1] = offset;
+               inodes->val[inodes->elem_cnt + 2] = root;
+               inodes->elem_cnt += 3;
+       } else {
+               inodes->bytes_missing += c - inodes->bytes_left;
+               inodes->bytes_left = 0;
+               inodes->elem_missed += 3;
+       }
+
+       return 0;
+}
+
+static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
+                                       void __user *arg)
+{
+       int ret = 0;
+       int size;
+       u64 extent_offset;
+       struct btrfs_ioctl_logical_ino_args *loi;
+       struct btrfs_data_container *inodes = NULL;
+       struct btrfs_path *path = NULL;
+       struct btrfs_key key;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       loi = memdup_user(arg, sizeof(*loi));
+       if (IS_ERR(loi)) {
+               ret = PTR_ERR(loi);
+               loi = NULL;
+               goto out;
+       }
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       size = min_t(u32, loi->size, 4096);
+       inodes = init_data_container(size);
+       if (IS_ERR(inodes)) {
+               ret = PTR_ERR(inodes);
+               inodes = NULL;
+               goto out;
+       }
+
+       ret = extent_from_logical(root->fs_info, loi->logical, path, &key);
+
+       if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+               ret = -ENOENT;
+       if (ret < 0)
+               goto out;
+
+       extent_offset = loi->logical - key.objectid;
+       ret = iterate_extent_inodes(root->fs_info, path, key.objectid,
+                                       extent_offset, build_ino_list, inodes);
+
+       if (ret < 0)
+               goto out;
+
+       ret = copy_to_user(loi->inodes, inodes, size);
+       if (ret)
+               ret = -EFAULT;
+
+out:
+       btrfs_free_path(path);
+       kfree(inodes);
+       kfree(loi);
+
+       return ret;
+}
+
 long btrfs_ioctl(struct file *file, unsigned int
                cmd, unsigned long arg)
 {
@@ -2911,6 +3086,10 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_tree_search(file, argp);
        case BTRFS_IOC_INO_LOOKUP:
                return btrfs_ioctl_ino_lookup(file, argp);
+       case BTRFS_IOC_INO_PATHS:
+               return btrfs_ioctl_ino_to_path(root, argp);
+       case BTRFS_IOC_LOGICAL_INO:
+               return btrfs_ioctl_logical_to_ino(root, argp);
        case BTRFS_IOC_SPACE_INFO:
                return btrfs_ioctl_space_info(root, argp);
        case BTRFS_IOC_SYNC: