btrfs: ensure that file descriptor used with subvol ioctls is a dir
[pandora-kernel.git] / fs / btrfs / ioctl.c
index 72d4616..360072f 100644 (file)
@@ -252,11 +252,11 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        trans = btrfs_join_transaction(root);
        BUG_ON(IS_ERR(trans));
 
+       btrfs_update_iflags(inode);
+       inode->i_ctime = CURRENT_TIME;
        ret = btrfs_update_inode(trans, root, inode);
        BUG_ON(ret);
 
-       btrfs_update_iflags(inode);
-       inode->i_ctime = CURRENT_TIME;
        btrfs_end_transaction(trans, root);
 
        mnt_drop_write(file->f_path.mnt);
@@ -858,8 +858,10 @@ static int cluster_pages_for_defrag(struct inode *inode,
                return 0;
        file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
 
+       mutex_lock(&inode->i_mutex);
        ret = btrfs_delalloc_reserve_space(inode,
                                           num_pages << PAGE_CACHE_SHIFT);
+       mutex_unlock(&inode->i_mutex);
        if (ret)
                return ret;
 again:
@@ -1300,6 +1302,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
        int namelen;
        int ret = 0;
 
+       if (!S_ISDIR(file->f_dentry->d_inode->i_mode))
+               return -ENOTDIR;
+
        if (root->fs_info->sb->s_flags & MS_RDONLY)
                return -EROFS;
 
@@ -1325,12 +1330,17 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
                        printk(KERN_INFO "btrfs: Snapshot src from "
                               "another FS\n");
                        ret = -EINVAL;
-                       fput(src_file);
-                       goto out;
+               } else if (!inode_owner_or_capable(src_inode)) {
+                       /*
+                        * Subvolume creation is not restricted, but snapshots
+                        * are limited to own subvolumes only
+                        */
+                       ret = -EPERM;
+               } else {
+                       ret = btrfs_mksubvol(&file->f_path, name, namelen,
+                                            BTRFS_I(src_inode)->root,
+                                            transid, readonly);
                }
-               ret = btrfs_mksubvol(&file->f_path, name, namelen,
-                                    BTRFS_I(src_inode)->root,
-                                    transid, readonly);
                fput(src_file);
        }
 out:
@@ -1343,6 +1353,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
        struct btrfs_ioctl_vol_args *vol_args;
        int ret;
 
+       if (!S_ISDIR(file->f_dentry->d_inode->i_mode))
+               return -ENOTDIR;
+
        vol_args = memdup_user(arg, sizeof(*vol_args));
        if (IS_ERR(vol_args))
                return PTR_ERR(vol_args);
@@ -1365,6 +1378,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
        u64 *ptr = NULL;
        bool readonly = false;
 
+       if (!S_ISDIR(file->f_dentry->d_inode->i_mode))
+               return -ENOTDIR;
+
        vol_args = memdup_user(arg, sizeof(*vol_args));
        if (IS_ERR(vol_args))
                return PTR_ERR(vol_args);
@@ -1569,7 +1585,11 @@ static noinline int copy_to_sk(struct btrfs_root *root,
                item_off = btrfs_item_ptr_offset(leaf, i);
                item_len = btrfs_item_size_nr(leaf, i);
 
-               if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
+               btrfs_item_key_to_cpu(leaf, key, i);
+               if (!key_in_sk(key, sk))
+                       continue;
+
+               if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
                        item_len = 0;
 
                if (sizeof(sh) + item_len + *sk_offset >
@@ -1578,10 +1598,6 @@ static noinline int copy_to_sk(struct btrfs_root *root,
                        goto overflow;
                }
 
-               btrfs_item_key_to_cpu(leaf, key, i);
-               if (!key_in_sk(key, sk))
-                       continue;
-
                sh.objectid = key->objectid;
                sh.offset = key->offset;
                sh.type = key->type;
@@ -1841,6 +1857,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
        int ret;
        int err = 0;
 
+       if (!S_ISDIR(dir->i_mode))
+               return -ENOTDIR;
+
        vol_args = memdup_user(arg, sizeof(*vol_args));
        if (IS_ERR(vol_args))
                return PTR_ERR(vol_args);
@@ -2157,6 +2176,151 @@ out:
        return ret;
 }
 
+/*
+ * Make sure we do not end up inserting an inline extent into a file that has
+ * already other (non-inline) extents. If a file has an inline extent it can
+ * not have any other extents and the (single) inline extent must start at the
+ * file offset 0. Failing to respect these rules will lead to file corruption,
+ * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
+ *
+ * We can have extents that have been already written to disk or we can have
+ * dirty ranges still in delalloc, in which case the extent maps and items are
+ * created only when we run delalloc, and the delalloc ranges might fall outside
+ * the range we are currently locking in the inode's io tree. So we check the
+ * inode's i_size because of that (i_size updates are done while holding the
+ * i_mutex, which we are holding here).
+ * We also check to see if the inode has a size not greater than "datal" but has
+ * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
+ * protected against such concurrent fallocate calls by the i_mutex).
+ *
+ * If the file has no extents but a size greater than datal, do not allow the
+ * copy because we would need turn the inline extent into a non-inline one (even
+ * with NO_HOLES enabled). If we find our destination inode only has one inline
+ * extent, just overwrite it with the source inline extent if its size is less
+ * than the source extent's size, or we could copy the source inline extent's
+ * data into the destination inode's inline extent if the later is greater then
+ * the former.
+ */
+static int clone_copy_inline_extent(struct inode *src,
+                                   struct inode *dst,
+                                   struct btrfs_trans_handle *trans,
+                                   struct btrfs_path *path,
+                                   struct btrfs_key *new_key,
+                                   const u64 drop_start,
+                                   const u64 datal,
+                                   const u64 skip,
+                                   const u64 size,
+                                   char *inline_data)
+{
+       struct btrfs_root *root = BTRFS_I(dst)->root;
+       const u64 aligned_end = ALIGN(new_key->offset + datal,
+                                     root->sectorsize);
+       int ret;
+       struct btrfs_key key;
+       u64 hint_byte;
+
+       if (new_key->offset > 0)
+               return -EOPNOTSUPP;
+
+       key.objectid = btrfs_ino(dst);
+       key.type = BTRFS_EXTENT_DATA_KEY;
+       key.offset = 0;
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0) {
+               return ret;
+       } else if (ret > 0) {
+               if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               return ret;
+                       else if (ret > 0)
+                               goto copy_inline_extent;
+               }
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               if (key.objectid == btrfs_ino(dst) &&
+                   key.type == BTRFS_EXTENT_DATA_KEY) {
+                       return -EOPNOTSUPP;
+               }
+       } else if (i_size_read(dst) <= datal) {
+               struct btrfs_file_extent_item *ei;
+               u64 ext_len;
+
+               /*
+                * If the file size is <= datal, make sure there are no other
+                * extents following (can happen do to an fallocate call with
+                * the flag FALLOC_FL_KEEP_SIZE).
+                */
+               ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                   struct btrfs_file_extent_item);
+               /*
+                * If it's an inline extent, it can not have other extents
+                * following it.
+                */
+               if (btrfs_file_extent_type(path->nodes[0], ei) ==
+                   BTRFS_FILE_EXTENT_INLINE)
+                       goto copy_inline_extent;
+
+               ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
+               if (ext_len > aligned_end)
+                       return -EOPNOTSUPP;
+
+               ret = btrfs_next_item(root, path);
+               if (ret < 0) {
+                       return ret;
+               } else if (ret == 0) {
+                       btrfs_item_key_to_cpu(path->nodes[0], &key,
+                                             path->slots[0]);
+                       if (key.objectid == btrfs_ino(dst) &&
+                           key.type == BTRFS_EXTENT_DATA_KEY)
+                               return -EOPNOTSUPP;
+               }
+       }
+
+copy_inline_extent:
+       /*
+        * We have no extent items, or we have an extent at offset 0 which may
+        * or may not be inlined. All these cases are dealt the same way.
+        */
+       if (i_size_read(dst) > datal) {
+               /*
+                * If the destination inode has an inline extent...
+                * This would require copying the data from the source inline
+                * extent into the beginning of the destination's inline extent.
+                * But this is really complex, both extents can be compressed
+                * or just one of them, which would require decompressing and
+                * re-compressing data (which could increase the new compressed
+                * size, not allowing the compressed data to fit anymore in an
+                * inline extent).
+                * So just don't support this case for now (it should be rare,
+                * we are not really saving space when cloning inline extents).
+                */
+               return -EOPNOTSUPP;
+       }
+
+       btrfs_release_path(path);
+       ret = btrfs_drop_extents(trans, dst, drop_start, aligned_end,
+                                &hint_byte, 1);
+       if (ret)
+               return ret;
+       ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
+       if (ret)
+               return ret;
+
+       if (skip) {
+               const u32 start = btrfs_file_extent_calc_inline_size(0);
+
+               memmove(inline_data + start, inline_data + start + skip, datal);
+       }
+
+       write_extent_buffer(path->nodes[0], inline_data,
+                           btrfs_item_ptr_offset(path->nodes[0],
+                                                 path->slots[0]),
+                           size);
+       inode_add_bytes(dst, datal);
+
+       return 0;
+}
+
 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                                       u64 off, u64 olen, u64 destoff)
 {
@@ -2256,6 +2420,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
        if (off + len == src->i_size)
                len = ALIGN(src->i_size, bs) - off;
 
+       if (len == 0) {
+               ret = 0;
+               goto out_unlock;
+       }
+
        /* verify the end result is block aligned */
        if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
            !IS_ALIGNED(destoff, bs))
@@ -2447,29 +2616,20 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                                size -= skip + trim;
                                datal -= skip + trim;
 
-                               ret = btrfs_drop_extents(trans, inode,
-                                                        new_key.offset,
-                                                        new_key.offset + datal,
-                                                        &hint_byte, 1);
-                               BUG_ON(ret);
-
-                               ret = btrfs_insert_empty_item(trans, root, path,
-                                                             &new_key, size);
-                               BUG_ON(ret);
-
-                               if (skip) {
-                                       u32 start =
-                                         btrfs_file_extent_calc_inline_size(0);
-                                       memmove(buf+start, buf+start+skip,
-                                               datal);
+                               ret = clone_copy_inline_extent(src, inode,
+                                                              trans, path,
+                                                              &new_key,
+                                                              new_key.offset,
+                                                              datal,
+                                                              skip, size, buf);
+                               if (ret) {
+                                       BUG_ON(ret != -EOPNOTSUPP);
+                                       btrfs_end_transaction(trans, root);
+                                       goto out;
                                }
 
                                leaf = path->nodes[0];
                                slot = path->slots[0];
-                               write_extent_buffer(leaf, buf,
-                                           btrfs_item_ptr_offset(leaf, slot),
-                                           size);
-                               inode_add_bytes(inode, datal);
                        }
 
                        btrfs_mark_buffer_dirty(leaf);
@@ -3112,3 +3272,24 @@ long btrfs_ioctl(struct file *file, unsigned int
 
        return -ENOTTY;
 }
+
+#ifdef CONFIG_COMPAT
+long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       switch (cmd) {
+       case FS_IOC32_GETFLAGS:
+               cmd = FS_IOC_GETFLAGS;
+               break;
+       case FS_IOC32_SETFLAGS:
+               cmd = FS_IOC_SETFLAGS;
+               break;
+       case FS_IOC32_GETVERSION:
+               cmd = FS_IOC_GETVERSION;
+               break;
+       default:
+               return -ENOIOCTLCMD;
+       }
+
+       return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif