Btrfs: remove duplicate include in ioctl.c
[pandora-kernel.git] / fs / btrfs / ioctl.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/buffer_head.h>
22 #include <linux/file.h>
23 #include <linux/fs.h>
24 #include <linux/fsnotify.h>
25 #include <linux/pagemap.h>
26 #include <linux/highmem.h>
27 #include <linux/time.h>
28 #include <linux/init.h>
29 #include <linux/string.h>
30 #include <linux/backing-dev.h>
31 #include <linux/mount.h>
32 #include <linux/mpage.h>
33 #include <linux/namei.h>
34 #include <linux/swap.h>
35 #include <linux/writeback.h>
36 #include <linux/statfs.h>
37 #include <linux/compat.h>
38 #include <linux/bit_spinlock.h>
39 #include <linux/security.h>
40 #include <linux/xattr.h>
41 #include <linux/vmalloc.h>
42 #include "compat.h"
43 #include "ctree.h"
44 #include "disk-io.h"
45 #include "transaction.h"
46 #include "btrfs_inode.h"
47 #include "ioctl.h"
48 #include "print-tree.h"
49 #include "volumes.h"
50 #include "locking.h"
51
52 /* Mask out flags that are inappropriate for the given type of inode. */
53 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
54 {
55         if (S_ISDIR(mode))
56                 return flags;
57         else if (S_ISREG(mode))
58                 return flags & ~FS_DIRSYNC_FL;
59         else
60                 return flags & (FS_NODUMP_FL | FS_NOATIME_FL);
61 }
62
63 /*
64  * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl.
65  */
66 static unsigned int btrfs_flags_to_ioctl(unsigned int flags)
67 {
68         unsigned int iflags = 0;
69
70         if (flags & BTRFS_INODE_SYNC)
71                 iflags |= FS_SYNC_FL;
72         if (flags & BTRFS_INODE_IMMUTABLE)
73                 iflags |= FS_IMMUTABLE_FL;
74         if (flags & BTRFS_INODE_APPEND)
75                 iflags |= FS_APPEND_FL;
76         if (flags & BTRFS_INODE_NODUMP)
77                 iflags |= FS_NODUMP_FL;
78         if (flags & BTRFS_INODE_NOATIME)
79                 iflags |= FS_NOATIME_FL;
80         if (flags & BTRFS_INODE_DIRSYNC)
81                 iflags |= FS_DIRSYNC_FL;
82
83         return iflags;
84 }
85
86 /*
87  * Update inode->i_flags based on the btrfs internal flags.
88  */
89 void btrfs_update_iflags(struct inode *inode)
90 {
91         struct btrfs_inode *ip = BTRFS_I(inode);
92
93         inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
94
95         if (ip->flags & BTRFS_INODE_SYNC)
96                 inode->i_flags |= S_SYNC;
97         if (ip->flags & BTRFS_INODE_IMMUTABLE)
98                 inode->i_flags |= S_IMMUTABLE;
99         if (ip->flags & BTRFS_INODE_APPEND)
100                 inode->i_flags |= S_APPEND;
101         if (ip->flags & BTRFS_INODE_NOATIME)
102                 inode->i_flags |= S_NOATIME;
103         if (ip->flags & BTRFS_INODE_DIRSYNC)
104                 inode->i_flags |= S_DIRSYNC;
105 }
106
107 /*
108  * Inherit flags from the parent inode.
109  *
110  * Unlike extN we don't have any flags we don't want to inherit currently.
111  */
112 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir)
113 {
114         unsigned int flags;
115
116         if (!dir)
117                 return;
118
119         flags = BTRFS_I(dir)->flags;
120
121         if (S_ISREG(inode->i_mode))
122                 flags &= ~BTRFS_INODE_DIRSYNC;
123         else if (!S_ISDIR(inode->i_mode))
124                 flags &= (BTRFS_INODE_NODUMP | BTRFS_INODE_NOATIME);
125
126         BTRFS_I(inode)->flags = flags;
127         btrfs_update_iflags(inode);
128 }
129
130 static int btrfs_ioctl_getflags(struct file *file, void __user *arg)
131 {
132         struct btrfs_inode *ip = BTRFS_I(file->f_path.dentry->d_inode);
133         unsigned int flags = btrfs_flags_to_ioctl(ip->flags);
134
135         if (copy_to_user(arg, &flags, sizeof(flags)))
136                 return -EFAULT;
137         return 0;
138 }
139
140 static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
141 {
142         struct inode *inode = file->f_path.dentry->d_inode;
143         struct btrfs_inode *ip = BTRFS_I(inode);
144         struct btrfs_root *root = ip->root;
145         struct btrfs_trans_handle *trans;
146         unsigned int flags, oldflags;
147         int ret;
148
149         if (copy_from_user(&flags, arg, sizeof(flags)))
150                 return -EFAULT;
151
152         if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
153                       FS_NOATIME_FL | FS_NODUMP_FL | \
154                       FS_SYNC_FL | FS_DIRSYNC_FL))
155                 return -EOPNOTSUPP;
156
157         if (!is_owner_or_cap(inode))
158                 return -EACCES;
159
160         mutex_lock(&inode->i_mutex);
161
162         flags = btrfs_mask_flags(inode->i_mode, flags);
163         oldflags = btrfs_flags_to_ioctl(ip->flags);
164         if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
165                 if (!capable(CAP_LINUX_IMMUTABLE)) {
166                         ret = -EPERM;
167                         goto out_unlock;
168                 }
169         }
170
171         ret = mnt_want_write(file->f_path.mnt);
172         if (ret)
173                 goto out_unlock;
174
175         if (flags & FS_SYNC_FL)
176                 ip->flags |= BTRFS_INODE_SYNC;
177         else
178                 ip->flags &= ~BTRFS_INODE_SYNC;
179         if (flags & FS_IMMUTABLE_FL)
180                 ip->flags |= BTRFS_INODE_IMMUTABLE;
181         else
182                 ip->flags &= ~BTRFS_INODE_IMMUTABLE;
183         if (flags & FS_APPEND_FL)
184                 ip->flags |= BTRFS_INODE_APPEND;
185         else
186                 ip->flags &= ~BTRFS_INODE_APPEND;
187         if (flags & FS_NODUMP_FL)
188                 ip->flags |= BTRFS_INODE_NODUMP;
189         else
190                 ip->flags &= ~BTRFS_INODE_NODUMP;
191         if (flags & FS_NOATIME_FL)
192                 ip->flags |= BTRFS_INODE_NOATIME;
193         else
194                 ip->flags &= ~BTRFS_INODE_NOATIME;
195         if (flags & FS_DIRSYNC_FL)
196                 ip->flags |= BTRFS_INODE_DIRSYNC;
197         else
198                 ip->flags &= ~BTRFS_INODE_DIRSYNC;
199
200
201         trans = btrfs_join_transaction(root, 1);
202         BUG_ON(!trans);
203
204         ret = btrfs_update_inode(trans, root, inode);
205         BUG_ON(ret);
206
207         btrfs_update_iflags(inode);
208         inode->i_ctime = CURRENT_TIME;
209         btrfs_end_transaction(trans, root);
210
211         mnt_drop_write(file->f_path.mnt);
212  out_unlock:
213         mutex_unlock(&inode->i_mutex);
214         return 0;
215 }
216
217 static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
218 {
219         struct inode *inode = file->f_path.dentry->d_inode;
220
221         return put_user(inode->i_generation, arg);
222 }
223
224 static noinline int create_subvol(struct btrfs_root *root,
225                                   struct dentry *dentry,
226                                   char *name, int namelen)
227 {
228         struct btrfs_trans_handle *trans;
229         struct btrfs_key key;
230         struct btrfs_root_item root_item;
231         struct btrfs_inode_item *inode_item;
232         struct extent_buffer *leaf;
233         struct btrfs_root *new_root;
234         struct inode *dir = dentry->d_parent->d_inode;
235         int ret;
236         int err;
237         u64 objectid;
238         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
239         u64 index = 0;
240
241         /*
242          * 1 - inode item
243          * 2 - refs
244          * 1 - root item
245          * 2 - dir items
246          */
247         ret = btrfs_reserve_metadata_space(root, 6);
248         if (ret)
249                 return ret;
250
251         trans = btrfs_start_transaction(root, 1);
252         BUG_ON(!trans);
253
254         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
255                                        0, &objectid);
256         if (ret)
257                 goto fail;
258
259         leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
260                                       0, objectid, NULL, 0, 0, 0);
261         if (IS_ERR(leaf)) {
262                 ret = PTR_ERR(leaf);
263                 goto fail;
264         }
265
266         memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
267         btrfs_set_header_bytenr(leaf, leaf->start);
268         btrfs_set_header_generation(leaf, trans->transid);
269         btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
270         btrfs_set_header_owner(leaf, objectid);
271
272         write_extent_buffer(leaf, root->fs_info->fsid,
273                             (unsigned long)btrfs_header_fsid(leaf),
274                             BTRFS_FSID_SIZE);
275         write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
276                             (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
277                             BTRFS_UUID_SIZE);
278         btrfs_mark_buffer_dirty(leaf);
279
280         inode_item = &root_item.inode;
281         memset(inode_item, 0, sizeof(*inode_item));
282         inode_item->generation = cpu_to_le64(1);
283         inode_item->size = cpu_to_le64(3);
284         inode_item->nlink = cpu_to_le32(1);
285         inode_item->nbytes = cpu_to_le64(root->leafsize);
286         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
287
288         btrfs_set_root_bytenr(&root_item, leaf->start);
289         btrfs_set_root_generation(&root_item, trans->transid);
290         btrfs_set_root_level(&root_item, 0);
291         btrfs_set_root_refs(&root_item, 1);
292         btrfs_set_root_used(&root_item, leaf->len);
293         btrfs_set_root_last_snapshot(&root_item, 0);
294
295         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
296         root_item.drop_level = 0;
297
298         btrfs_tree_unlock(leaf);
299         free_extent_buffer(leaf);
300         leaf = NULL;
301
302         btrfs_set_root_dirid(&root_item, new_dirid);
303
304         key.objectid = objectid;
305         key.offset = 0;
306         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
307         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
308                                 &root_item);
309         if (ret)
310                 goto fail;
311
312         key.offset = (u64)-1;
313         new_root = btrfs_read_fs_root_no_name(root->fs_info, &key);
314         BUG_ON(IS_ERR(new_root));
315
316         btrfs_record_root_in_trans(trans, new_root);
317
318         ret = btrfs_create_subvol_root(trans, new_root, new_dirid,
319                                        BTRFS_I(dir)->block_group);
320         /*
321          * insert the directory item
322          */
323         ret = btrfs_set_inode_index(dir, &index);
324         BUG_ON(ret);
325
326         ret = btrfs_insert_dir_item(trans, root,
327                                     name, namelen, dir->i_ino, &key,
328                                     BTRFS_FT_DIR, index);
329         if (ret)
330                 goto fail;
331
332         btrfs_i_size_write(dir, dir->i_size + namelen * 2);
333         ret = btrfs_update_inode(trans, root, dir);
334         BUG_ON(ret);
335
336         ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
337                                  objectid, root->root_key.objectid,
338                                  dir->i_ino, index, name, namelen);
339
340         BUG_ON(ret);
341
342         d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
343 fail:
344         err = btrfs_commit_transaction(trans, root);
345         if (err && !ret)
346                 ret = err;
347
348         btrfs_unreserve_metadata_space(root, 6);
349         return ret;
350 }
351
352 static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
353                            char *name, int namelen)
354 {
355         struct inode *inode;
356         struct btrfs_pending_snapshot *pending_snapshot;
357         struct btrfs_trans_handle *trans;
358         int ret;
359
360         if (!root->ref_cows)
361                 return -EINVAL;
362
363         /*
364          * 1 - inode item
365          * 2 - refs
366          * 1 - root item
367          * 2 - dir items
368          */
369         ret = btrfs_reserve_metadata_space(root, 6);
370         if (ret)
371                 goto fail;
372
373         pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
374         if (!pending_snapshot) {
375                 ret = -ENOMEM;
376                 btrfs_unreserve_metadata_space(root, 6);
377                 goto fail;
378         }
379         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
380         if (!pending_snapshot->name) {
381                 ret = -ENOMEM;
382                 kfree(pending_snapshot);
383                 btrfs_unreserve_metadata_space(root, 6);
384                 goto fail;
385         }
386         memcpy(pending_snapshot->name, name, namelen);
387         pending_snapshot->name[namelen] = '\0';
388         pending_snapshot->dentry = dentry;
389         trans = btrfs_start_transaction(root, 1);
390         BUG_ON(!trans);
391         pending_snapshot->root = root;
392         list_add(&pending_snapshot->list,
393                  &trans->transaction->pending_snapshots);
394         ret = btrfs_commit_transaction(trans, root);
395         BUG_ON(ret);
396         btrfs_unreserve_metadata_space(root, 6);
397
398         inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
399         if (IS_ERR(inode)) {
400                 ret = PTR_ERR(inode);
401                 goto fail;
402         }
403         BUG_ON(!inode);
404         d_instantiate(dentry, inode);
405         ret = 0;
406 fail:
407         return ret;
408 }
409
410 /* copy of may_create in fs/namei.c() */
411 static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
412 {
413         if (child->d_inode)
414                 return -EEXIST;
415         if (IS_DEADDIR(dir))
416                 return -ENOENT;
417         return inode_permission(dir, MAY_WRITE | MAY_EXEC);
418 }
419
420 /*
421  * Create a new subvolume below @parent.  This is largely modeled after
422  * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
423  * inside this filesystem so it's quite a bit simpler.
424  */
425 static noinline int btrfs_mksubvol(struct path *parent,
426                                    char *name, int namelen,
427                                    struct btrfs_root *snap_src)
428 {
429         struct inode *dir  = parent->dentry->d_inode;
430         struct dentry *dentry;
431         int error;
432
433         mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
434
435         dentry = lookup_one_len(name, parent->dentry, namelen);
436         error = PTR_ERR(dentry);
437         if (IS_ERR(dentry))
438                 goto out_unlock;
439
440         error = -EEXIST;
441         if (dentry->d_inode)
442                 goto out_dput;
443
444         error = mnt_want_write(parent->mnt);
445         if (error)
446                 goto out_dput;
447
448         error = btrfs_may_create(dir, dentry);
449         if (error)
450                 goto out_drop_write;
451
452         down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
453
454         if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0)
455                 goto out_up_read;
456
457         if (snap_src) {
458                 error = create_snapshot(snap_src, dentry,
459                                         name, namelen);
460         } else {
461                 error = create_subvol(BTRFS_I(dir)->root, dentry,
462                                       name, namelen);
463         }
464         if (!error)
465                 fsnotify_mkdir(dir, dentry);
466 out_up_read:
467         up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem);
468 out_drop_write:
469         mnt_drop_write(parent->mnt);
470 out_dput:
471         dput(dentry);
472 out_unlock:
473         mutex_unlock(&dir->i_mutex);
474         return error;
475 }
476
477 static int should_defrag_range(struct inode *inode, u64 start, u64 len,
478                                int thresh, u64 *last_len, u64 *skip,
479                                u64 *defrag_end)
480 {
481         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
482         struct extent_map *em = NULL;
483         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
484         int ret = 1;
485
486
487         if (thresh == 0)
488                 thresh = 256 * 1024;
489
490         /*
491          * make sure that once we start defragging and extent, we keep on
492          * defragging it
493          */
494         if (start < *defrag_end)
495                 return 1;
496
497         *skip = 0;
498
499         /*
500          * hopefully we have this extent in the tree already, try without
501          * the full extent lock
502          */
503         read_lock(&em_tree->lock);
504         em = lookup_extent_mapping(em_tree, start, len);
505         read_unlock(&em_tree->lock);
506
507         if (!em) {
508                 /* get the big lock and read metadata off disk */
509                 lock_extent(io_tree, start, start + len - 1, GFP_NOFS);
510                 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
511                 unlock_extent(io_tree, start, start + len - 1, GFP_NOFS);
512
513                 if (!em)
514                         return 0;
515         }
516
517         /* this will cover holes, and inline extents */
518         if (em->block_start >= EXTENT_MAP_LAST_BYTE)
519                 ret = 0;
520
521         /*
522          * we hit a real extent, if it is big don't bother defragging it again
523          */
524         if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
525                 ret = 0;
526
527         /*
528          * last_len ends up being a counter of how many bytes we've defragged.
529          * every time we choose not to defrag an extent, we reset *last_len
530          * so that the next tiny extent will force a defrag.
531          *
532          * The end result of this is that tiny extents before a single big
533          * extent will force at least part of that big extent to be defragged.
534          */
535         if (ret) {
536                 *last_len += len;
537                 *defrag_end = extent_map_end(em);
538         } else {
539                 *last_len = 0;
540                 *skip = extent_map_end(em);
541                 *defrag_end = 0;
542         }
543
544         free_extent_map(em);
545         return ret;
546 }
547
548 static int btrfs_defrag_file(struct file *file,
549                              struct btrfs_ioctl_defrag_range_args *range)
550 {
551         struct inode *inode = fdentry(file)->d_inode;
552         struct btrfs_root *root = BTRFS_I(inode)->root;
553         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
554         struct btrfs_ordered_extent *ordered;
555         struct page *page;
556         unsigned long last_index;
557         unsigned long ra_pages = root->fs_info->bdi.ra_pages;
558         unsigned long total_read = 0;
559         u64 page_start;
560         u64 page_end;
561         u64 last_len = 0;
562         u64 skip = 0;
563         u64 defrag_end = 0;
564         unsigned long i;
565         int ret;
566
567         if (inode->i_size == 0)
568                 return 0;
569
570         if (range->start + range->len > range->start) {
571                 last_index = min_t(u64, inode->i_size - 1,
572                          range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
573         } else {
574                 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
575         }
576
577         i = range->start >> PAGE_CACHE_SHIFT;
578         while (i <= last_index) {
579                 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
580                                         PAGE_CACHE_SIZE,
581                                         range->extent_thresh,
582                                         &last_len, &skip,
583                                         &defrag_end)) {
584                         unsigned long next;
585                         /*
586                          * the should_defrag function tells us how much to skip
587                          * bump our counter by the suggested amount
588                          */
589                         next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
590                         i = max(i + 1, next);
591                         continue;
592                 }
593
594                 if (total_read % ra_pages == 0) {
595                         btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
596                                        min(last_index, i + ra_pages - 1));
597                 }
598                 total_read++;
599                 mutex_lock(&inode->i_mutex);
600                 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
601                         BTRFS_I(inode)->force_compress = 1;
602
603                 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
604                 if (ret) {
605                         ret = -ENOSPC;
606                         break;
607                 }
608
609                 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
610                 if (ret) {
611                         btrfs_free_reserved_data_space(root, inode,
612                                                        PAGE_CACHE_SIZE);
613                         ret = -ENOSPC;
614                         break;
615                 }
616 again:
617                 if (inode->i_size == 0 ||
618                     i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
619                         ret = 0;
620                         goto err_reservations;
621                 }
622
623                 page = grab_cache_page(inode->i_mapping, i);
624                 if (!page)
625                         goto err_reservations;
626
627                 if (!PageUptodate(page)) {
628                         btrfs_readpage(NULL, page);
629                         lock_page(page);
630                         if (!PageUptodate(page)) {
631                                 unlock_page(page);
632                                 page_cache_release(page);
633                                 goto err_reservations;
634                         }
635                 }
636
637                 if (page->mapping != inode->i_mapping) {
638                         unlock_page(page);
639                         page_cache_release(page);
640                         goto again;
641                 }
642
643                 wait_on_page_writeback(page);
644
645                 if (PageDirty(page)) {
646                         btrfs_free_reserved_data_space(root, inode,
647                                                        PAGE_CACHE_SIZE);
648                         goto loop_unlock;
649                 }
650
651                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
652                 page_end = page_start + PAGE_CACHE_SIZE - 1;
653                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
654
655                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
656                 if (ordered) {
657                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
658                         unlock_page(page);
659                         page_cache_release(page);
660                         btrfs_start_ordered_extent(inode, ordered, 1);
661                         btrfs_put_ordered_extent(ordered);
662                         goto again;
663                 }
664                 set_page_extent_mapped(page);
665
666                 /*
667                  * this makes sure page_mkwrite is called on the
668                  * page if it is dirtied again later
669                  */
670                 clear_page_dirty_for_io(page);
671                 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start,
672                                   page_end, EXTENT_DIRTY | EXTENT_DELALLOC |
673                                   EXTENT_DO_ACCOUNTING, GFP_NOFS);
674
675                 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
676                 ClearPageChecked(page);
677                 set_page_dirty(page);
678                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
679
680 loop_unlock:
681                 unlock_page(page);
682                 page_cache_release(page);
683                 mutex_unlock(&inode->i_mutex);
684
685                 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
686                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
687                 i++;
688         }
689
690         if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
691                 filemap_flush(inode->i_mapping);
692
693         if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
694                 /* the filemap_flush will queue IO into the worker threads, but
695                  * we have to make sure the IO is actually started and that
696                  * ordered extents get created before we return
697                  */
698                 atomic_inc(&root->fs_info->async_submit_draining);
699                 while (atomic_read(&root->fs_info->nr_async_submits) ||
700                       atomic_read(&root->fs_info->async_delalloc_pages)) {
701                         wait_event(root->fs_info->async_submit_wait,
702                            (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
703                             atomic_read(&root->fs_info->async_delalloc_pages) == 0));
704                 }
705                 atomic_dec(&root->fs_info->async_submit_draining);
706
707                 mutex_lock(&inode->i_mutex);
708                 BTRFS_I(inode)->force_compress = 0;
709                 mutex_unlock(&inode->i_mutex);
710         }
711
712         return 0;
713
714 err_reservations:
715         mutex_unlock(&inode->i_mutex);
716         btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
717         btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
718         return ret;
719 }
720
721 static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
722                                         void __user *arg)
723 {
724         u64 new_size;
725         u64 old_size;
726         u64 devid = 1;
727         struct btrfs_ioctl_vol_args *vol_args;
728         struct btrfs_trans_handle *trans;
729         struct btrfs_device *device = NULL;
730         char *sizestr;
731         char *devstr = NULL;
732         int ret = 0;
733         int namelen;
734         int mod = 0;
735
736         if (root->fs_info->sb->s_flags & MS_RDONLY)
737                 return -EROFS;
738
739         if (!capable(CAP_SYS_ADMIN))
740                 return -EPERM;
741
742         vol_args = memdup_user(arg, sizeof(*vol_args));
743         if (IS_ERR(vol_args))
744                 return PTR_ERR(vol_args);
745
746         vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
747         namelen = strlen(vol_args->name);
748
749         mutex_lock(&root->fs_info->volume_mutex);
750         sizestr = vol_args->name;
751         devstr = strchr(sizestr, ':');
752         if (devstr) {
753                 char *end;
754                 sizestr = devstr + 1;
755                 *devstr = '\0';
756                 devstr = vol_args->name;
757                 devid = simple_strtoull(devstr, &end, 10);
758                 printk(KERN_INFO "resizing devid %llu\n",
759                        (unsigned long long)devid);
760         }
761         device = btrfs_find_device(root, devid, NULL, NULL);
762         if (!device) {
763                 printk(KERN_INFO "resizer unable to find device %llu\n",
764                        (unsigned long long)devid);
765                 ret = -EINVAL;
766                 goto out_unlock;
767         }
768         if (!strcmp(sizestr, "max"))
769                 new_size = device->bdev->bd_inode->i_size;
770         else {
771                 if (sizestr[0] == '-') {
772                         mod = -1;
773                         sizestr++;
774                 } else if (sizestr[0] == '+') {
775                         mod = 1;
776                         sizestr++;
777                 }
778                 new_size = memparse(sizestr, NULL);
779                 if (new_size == 0) {
780                         ret = -EINVAL;
781                         goto out_unlock;
782                 }
783         }
784
785         old_size = device->total_bytes;
786
787         if (mod < 0) {
788                 if (new_size > old_size) {
789                         ret = -EINVAL;
790                         goto out_unlock;
791                 }
792                 new_size = old_size - new_size;
793         } else if (mod > 0) {
794                 new_size = old_size + new_size;
795         }
796
797         if (new_size < 256 * 1024 * 1024) {
798                 ret = -EINVAL;
799                 goto out_unlock;
800         }
801         if (new_size > device->bdev->bd_inode->i_size) {
802                 ret = -EFBIG;
803                 goto out_unlock;
804         }
805
806         do_div(new_size, root->sectorsize);
807         new_size *= root->sectorsize;
808
809         printk(KERN_INFO "new size for %s is %llu\n",
810                 device->name, (unsigned long long)new_size);
811
812         if (new_size > old_size) {
813                 trans = btrfs_start_transaction(root, 1);
814                 ret = btrfs_grow_device(trans, device, new_size);
815                 btrfs_commit_transaction(trans, root);
816         } else {
817                 ret = btrfs_shrink_device(device, new_size);
818         }
819
820 out_unlock:
821         mutex_unlock(&root->fs_info->volume_mutex);
822         kfree(vol_args);
823         return ret;
824 }
825
826 static noinline int btrfs_ioctl_snap_create(struct file *file,
827                                             void __user *arg, int subvol)
828 {
829         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
830         struct btrfs_ioctl_vol_args *vol_args;
831         struct file *src_file;
832         int namelen;
833         int ret = 0;
834
835         if (root->fs_info->sb->s_flags & MS_RDONLY)
836                 return -EROFS;
837
838         vol_args = memdup_user(arg, sizeof(*vol_args));
839         if (IS_ERR(vol_args))
840                 return PTR_ERR(vol_args);
841
842         vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
843         namelen = strlen(vol_args->name);
844         if (strchr(vol_args->name, '/')) {
845                 ret = -EINVAL;
846                 goto out;
847         }
848
849         if (subvol) {
850                 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
851                                      NULL);
852         } else {
853                 struct inode *src_inode;
854                 src_file = fget(vol_args->fd);
855                 if (!src_file) {
856                         ret = -EINVAL;
857                         goto out;
858                 }
859
860                 src_inode = src_file->f_path.dentry->d_inode;
861                 if (src_inode->i_sb != file->f_path.dentry->d_inode->i_sb) {
862                         printk(KERN_INFO "btrfs: Snapshot src from "
863                                "another FS\n");
864                         ret = -EINVAL;
865                         fput(src_file);
866                         goto out;
867                 }
868                 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
869                                      BTRFS_I(src_inode)->root);
870                 fput(src_file);
871         }
872 out:
873         kfree(vol_args);
874         return ret;
875 }
876
877 /*
878  * helper to check if the subvolume references other subvolumes
879  */
880 static noinline int may_destroy_subvol(struct btrfs_root *root)
881 {
882         struct btrfs_path *path;
883         struct btrfs_key key;
884         int ret;
885
886         path = btrfs_alloc_path();
887         if (!path)
888                 return -ENOMEM;
889
890         key.objectid = root->root_key.objectid;
891         key.type = BTRFS_ROOT_REF_KEY;
892         key.offset = (u64)-1;
893
894         ret = btrfs_search_slot(NULL, root->fs_info->tree_root,
895                                 &key, path, 0, 0);
896         if (ret < 0)
897                 goto out;
898         BUG_ON(ret == 0);
899
900         ret = 0;
901         if (path->slots[0] > 0) {
902                 path->slots[0]--;
903                 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
904                 if (key.objectid == root->root_key.objectid &&
905                     key.type == BTRFS_ROOT_REF_KEY)
906                         ret = -ENOTEMPTY;
907         }
908 out:
909         btrfs_free_path(path);
910         return ret;
911 }
912
913 static noinline int key_in_sk(struct btrfs_key *key,
914                               struct btrfs_ioctl_search_key *sk)
915 {
916         struct btrfs_key test;
917         int ret;
918
919         test.objectid = sk->min_objectid;
920         test.type = sk->min_type;
921         test.offset = sk->min_offset;
922
923         ret = btrfs_comp_cpu_keys(key, &test);
924         if (ret < 0)
925                 return 0;
926
927         test.objectid = sk->max_objectid;
928         test.type = sk->max_type;
929         test.offset = sk->max_offset;
930
931         ret = btrfs_comp_cpu_keys(key, &test);
932         if (ret > 0)
933                 return 0;
934         return 1;
935 }
936
937 static noinline int copy_to_sk(struct btrfs_root *root,
938                                struct btrfs_path *path,
939                                struct btrfs_key *key,
940                                struct btrfs_ioctl_search_key *sk,
941                                char *buf,
942                                unsigned long *sk_offset,
943                                int *num_found)
944 {
945         u64 found_transid;
946         struct extent_buffer *leaf;
947         struct btrfs_ioctl_search_header sh;
948         unsigned long item_off;
949         unsigned long item_len;
950         int nritems;
951         int i;
952         int slot;
953         int found = 0;
954         int ret = 0;
955
956         leaf = path->nodes[0];
957         slot = path->slots[0];
958         nritems = btrfs_header_nritems(leaf);
959
960         if (btrfs_header_generation(leaf) > sk->max_transid) {
961                 i = nritems;
962                 goto advance_key;
963         }
964         found_transid = btrfs_header_generation(leaf);
965
966         for (i = slot; i < nritems; i++) {
967                 item_off = btrfs_item_ptr_offset(leaf, i);
968                 item_len = btrfs_item_size_nr(leaf, i);
969
970                 if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
971                         item_len = 0;
972
973                 if (sizeof(sh) + item_len + *sk_offset >
974                     BTRFS_SEARCH_ARGS_BUFSIZE) {
975                         ret = 1;
976                         goto overflow;
977                 }
978
979                 btrfs_item_key_to_cpu(leaf, key, i);
980                 if (!key_in_sk(key, sk))
981                         continue;
982
983                 sh.objectid = key->objectid;
984                 sh.offset = key->offset;
985                 sh.type = key->type;
986                 sh.len = item_len;
987                 sh.transid = found_transid;
988
989                 /* copy search result header */
990                 memcpy(buf + *sk_offset, &sh, sizeof(sh));
991                 *sk_offset += sizeof(sh);
992
993                 if (item_len) {
994                         char *p = buf + *sk_offset;
995                         /* copy the item */
996                         read_extent_buffer(leaf, p,
997                                            item_off, item_len);
998                         *sk_offset += item_len;
999                 }
1000                 found++;
1001
1002                 if (*num_found >= sk->nr_items)
1003                         break;
1004         }
1005 advance_key:
1006         ret = 0;
1007         if (key->offset < (u64)-1 && key->offset < sk->max_offset)
1008                 key->offset++;
1009         else if (key->type < (u8)-1 && key->type < sk->max_type) {
1010                 key->offset = 0;
1011                 key->type++;
1012         } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) {
1013                 key->offset = 0;
1014                 key->type = 0;
1015                 key->objectid++;
1016         } else
1017                 ret = 1;
1018 overflow:
1019         *num_found += found;
1020         return ret;
1021 }
1022
1023 static noinline int search_ioctl(struct inode *inode,
1024                                  struct btrfs_ioctl_search_args *args)
1025 {
1026         struct btrfs_root *root;
1027         struct btrfs_key key;
1028         struct btrfs_key max_key;
1029         struct btrfs_path *path;
1030         struct btrfs_ioctl_search_key *sk = &args->key;
1031         struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
1032         int ret;
1033         int num_found = 0;
1034         unsigned long sk_offset = 0;
1035
1036         path = btrfs_alloc_path();
1037         if (!path)
1038                 return -ENOMEM;
1039
1040         if (sk->tree_id == 0) {
1041                 /* search the root of the inode that was passed */
1042                 root = BTRFS_I(inode)->root;
1043         } else {
1044                 key.objectid = sk->tree_id;
1045                 key.type = BTRFS_ROOT_ITEM_KEY;
1046                 key.offset = (u64)-1;
1047                 root = btrfs_read_fs_root_no_name(info, &key);
1048                 if (IS_ERR(root)) {
1049                         printk(KERN_ERR "could not find root %llu\n",
1050                                sk->tree_id);
1051                         btrfs_free_path(path);
1052                         return -ENOENT;
1053                 }
1054         }
1055
1056         key.objectid = sk->min_objectid;
1057         key.type = sk->min_type;
1058         key.offset = sk->min_offset;
1059
1060         max_key.objectid = sk->max_objectid;
1061         max_key.type = sk->max_type;
1062         max_key.offset = sk->max_offset;
1063
1064         path->keep_locks = 1;
1065
1066         while(1) {
1067                 ret = btrfs_search_forward(root, &key, &max_key, path, 0,
1068                                            sk->min_transid);
1069                 if (ret != 0) {
1070                         if (ret > 0)
1071                                 ret = 0;
1072                         goto err;
1073                 }
1074                 ret = copy_to_sk(root, path, &key, sk, args->buf,
1075                                  &sk_offset, &num_found);
1076                 btrfs_release_path(root, path);
1077                 if (ret || num_found >= sk->nr_items)
1078                         break;
1079
1080         }
1081         ret = 0;
1082 err:
1083         sk->nr_items = num_found;
1084         btrfs_free_path(path);
1085         return ret;
1086 }
1087
1088 static noinline int btrfs_ioctl_tree_search(struct file *file,
1089                                            void __user *argp)
1090 {
1091          struct btrfs_ioctl_search_args *args;
1092          struct inode *inode;
1093          int ret;
1094
1095         if (!capable(CAP_SYS_ADMIN))
1096                 return -EPERM;
1097
1098         args = kmalloc(sizeof(*args), GFP_KERNEL);
1099         if (!args)
1100                 return -ENOMEM;
1101
1102         if (copy_from_user(args, argp, sizeof(*args))) {
1103                 kfree(args);
1104                 return -EFAULT;
1105         }
1106         inode = fdentry(file)->d_inode;
1107         ret = search_ioctl(inode, args);
1108         if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1109                 ret = -EFAULT;
1110         kfree(args);
1111         return ret;
1112 }
1113
1114 /*
1115  * Search INODE_REFs to identify path name of 'dirid' directory
1116  * in a 'tree_id' tree. and sets path name to 'name'.
1117  */
1118 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1119                                 u64 tree_id, u64 dirid, char *name)
1120 {
1121         struct btrfs_root *root;
1122         struct btrfs_key key;
1123         char *ptr;
1124         int ret = -1;
1125         int slot;
1126         int len;
1127         int total_len = 0;
1128         struct btrfs_inode_ref *iref;
1129         struct extent_buffer *l;
1130         struct btrfs_path *path;
1131
1132         if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1133                 name[0]='\0';
1134                 return 0;
1135         }
1136
1137         path = btrfs_alloc_path();
1138         if (!path)
1139                 return -ENOMEM;
1140
1141         ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
1142
1143         key.objectid = tree_id;
1144         key.type = BTRFS_ROOT_ITEM_KEY;
1145         key.offset = (u64)-1;
1146         root = btrfs_read_fs_root_no_name(info, &key);
1147         if (IS_ERR(root)) {
1148                 printk(KERN_ERR "could not find root %llu\n", tree_id);
1149                 ret = -ENOENT;
1150                 goto out;
1151         }
1152
1153         key.objectid = dirid;
1154         key.type = BTRFS_INODE_REF_KEY;
1155         key.offset = (u64)-1;
1156
1157         while(1) {
1158                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1159                 if (ret < 0)
1160                         goto out;
1161
1162                 l = path->nodes[0];
1163                 slot = path->slots[0];
1164                 if (ret > 0 && slot > 0)
1165                         slot--;
1166                 btrfs_item_key_to_cpu(l, &key, slot);
1167
1168                 if (ret > 0 && (key.objectid != dirid ||
1169                                 key.type != BTRFS_INODE_REF_KEY)) {
1170                         ret = -ENOENT;
1171                         goto out;
1172                 }
1173
1174                 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1175                 len = btrfs_inode_ref_name_len(l, iref);
1176                 ptr -= len + 1;
1177                 total_len += len + 1;
1178                 if (ptr < name)
1179                         goto out;
1180
1181                 *(ptr + len) = '/';
1182                 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
1183
1184                 if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1185                         break;
1186
1187                 btrfs_release_path(root, path);
1188                 key.objectid = key.offset;
1189                 key.offset = (u64)-1;
1190                 dirid = key.objectid;
1191
1192         }
1193         if (ptr < name)
1194                 goto out;
1195         memcpy(name, ptr, total_len);
1196         name[total_len]='\0';
1197         ret = 0;
1198 out:
1199         btrfs_free_path(path);
1200         return ret;
1201 }
1202
1203 static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1204                                            void __user *argp)
1205 {
1206          struct btrfs_ioctl_ino_lookup_args *args;
1207          struct inode *inode;
1208          int ret;
1209
1210         if (!capable(CAP_SYS_ADMIN))
1211                 return -EPERM;
1212
1213         args = kmalloc(sizeof(*args), GFP_KERNEL);
1214         if (copy_from_user(args, argp, sizeof(*args))) {
1215                 kfree(args);
1216                 return -EFAULT;
1217         }
1218         inode = fdentry(file)->d_inode;
1219
1220         if (args->treeid == 0)
1221                 args->treeid = BTRFS_I(inode)->root->root_key.objectid;
1222
1223         ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
1224                                         args->treeid, args->objectid,
1225                                         args->name);
1226
1227         if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1228                 ret = -EFAULT;
1229
1230         kfree(args);
1231         return ret;
1232 }
1233
1234 static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1235                                              void __user *arg)
1236 {
1237         struct dentry *parent = fdentry(file);
1238         struct dentry *dentry;
1239         struct inode *dir = parent->d_inode;
1240         struct inode *inode;
1241         struct btrfs_root *root = BTRFS_I(dir)->root;
1242         struct btrfs_root *dest = NULL;
1243         struct btrfs_ioctl_vol_args *vol_args;
1244         struct btrfs_trans_handle *trans;
1245         int namelen;
1246         int ret;
1247         int err = 0;
1248
1249         if (!capable(CAP_SYS_ADMIN))
1250                 return -EPERM;
1251
1252         vol_args = memdup_user(arg, sizeof(*vol_args));
1253         if (IS_ERR(vol_args))
1254                 return PTR_ERR(vol_args);
1255
1256         vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1257         namelen = strlen(vol_args->name);
1258         if (strchr(vol_args->name, '/') ||
1259             strncmp(vol_args->name, "..", namelen) == 0) {
1260                 err = -EINVAL;
1261                 goto out;
1262         }
1263
1264         err = mnt_want_write(file->f_path.mnt);
1265         if (err)
1266                 goto out;
1267
1268         mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
1269         dentry = lookup_one_len(vol_args->name, parent, namelen);
1270         if (IS_ERR(dentry)) {
1271                 err = PTR_ERR(dentry);
1272                 goto out_unlock_dir;
1273         }
1274
1275         if (!dentry->d_inode) {
1276                 err = -ENOENT;
1277                 goto out_dput;
1278         }
1279
1280         inode = dentry->d_inode;
1281         if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
1282                 err = -EINVAL;
1283                 goto out_dput;
1284         }
1285
1286         dest = BTRFS_I(inode)->root;
1287
1288         mutex_lock(&inode->i_mutex);
1289         err = d_invalidate(dentry);
1290         if (err)
1291                 goto out_unlock;
1292
1293         down_write(&root->fs_info->subvol_sem);
1294
1295         err = may_destroy_subvol(dest);
1296         if (err)
1297                 goto out_up_write;
1298
1299         trans = btrfs_start_transaction(root, 1);
1300         ret = btrfs_unlink_subvol(trans, root, dir,
1301                                 dest->root_key.objectid,
1302                                 dentry->d_name.name,
1303                                 dentry->d_name.len);
1304         BUG_ON(ret);
1305
1306         btrfs_record_root_in_trans(trans, dest);
1307
1308         memset(&dest->root_item.drop_progress, 0,
1309                 sizeof(dest->root_item.drop_progress));
1310         dest->root_item.drop_level = 0;
1311         btrfs_set_root_refs(&dest->root_item, 0);
1312
1313         ret = btrfs_insert_orphan_item(trans,
1314                                 root->fs_info->tree_root,
1315                                 dest->root_key.objectid);
1316         BUG_ON(ret);
1317
1318         ret = btrfs_commit_transaction(trans, root);
1319         BUG_ON(ret);
1320         inode->i_flags |= S_DEAD;
1321 out_up_write:
1322         up_write(&root->fs_info->subvol_sem);
1323 out_unlock:
1324         mutex_unlock(&inode->i_mutex);
1325         if (!err) {
1326                 shrink_dcache_sb(root->fs_info->sb);
1327                 btrfs_invalidate_inodes(dest);
1328                 d_delete(dentry);
1329         }
1330 out_dput:
1331         dput(dentry);
1332 out_unlock_dir:
1333         mutex_unlock(&dir->i_mutex);
1334         mnt_drop_write(file->f_path.mnt);
1335 out:
1336         kfree(vol_args);
1337         return err;
1338 }
1339
1340 static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
1341 {
1342         struct inode *inode = fdentry(file)->d_inode;
1343         struct btrfs_root *root = BTRFS_I(inode)->root;
1344         struct btrfs_ioctl_defrag_range_args *range;
1345         int ret;
1346
1347         ret = mnt_want_write(file->f_path.mnt);
1348         if (ret)
1349                 return ret;
1350
1351         switch (inode->i_mode & S_IFMT) {
1352         case S_IFDIR:
1353                 if (!capable(CAP_SYS_ADMIN)) {
1354                         ret = -EPERM;
1355                         goto out;
1356                 }
1357                 btrfs_defrag_root(root, 0);
1358                 btrfs_defrag_root(root->fs_info->extent_root, 0);
1359                 break;
1360         case S_IFREG:
1361                 if (!(file->f_mode & FMODE_WRITE)) {
1362                         ret = -EINVAL;
1363                         goto out;
1364                 }
1365
1366                 range = kzalloc(sizeof(*range), GFP_KERNEL);
1367                 if (!range) {
1368                         ret = -ENOMEM;
1369                         goto out;
1370                 }
1371
1372                 if (argp) {
1373                         if (copy_from_user(range, argp,
1374                                            sizeof(*range))) {
1375                                 ret = -EFAULT;
1376                                 kfree(range);
1377                         }
1378                         /* compression requires us to start the IO */
1379                         if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
1380                                 range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
1381                                 range->extent_thresh = (u32)-1;
1382                         }
1383                 } else {
1384                         /* the rest are all set to zero by kzalloc */
1385                         range->len = (u64)-1;
1386                 }
1387                 btrfs_defrag_file(file, range);
1388                 kfree(range);
1389                 break;
1390         }
1391 out:
1392         mnt_drop_write(file->f_path.mnt);
1393         return ret;
1394 }
1395
1396 static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
1397 {
1398         struct btrfs_ioctl_vol_args *vol_args;
1399         int ret;
1400
1401         if (!capable(CAP_SYS_ADMIN))
1402                 return -EPERM;
1403
1404         vol_args = memdup_user(arg, sizeof(*vol_args));
1405         if (IS_ERR(vol_args))
1406                 return PTR_ERR(vol_args);
1407
1408         vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1409         ret = btrfs_init_new_device(root, vol_args->name);
1410
1411         kfree(vol_args);
1412         return ret;
1413 }
1414
1415 static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
1416 {
1417         struct btrfs_ioctl_vol_args *vol_args;
1418         int ret;
1419
1420         if (!capable(CAP_SYS_ADMIN))
1421                 return -EPERM;
1422
1423         if (root->fs_info->sb->s_flags & MS_RDONLY)
1424                 return -EROFS;
1425
1426         vol_args = memdup_user(arg, sizeof(*vol_args));
1427         if (IS_ERR(vol_args))
1428                 return PTR_ERR(vol_args);
1429
1430         vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
1431         ret = btrfs_rm_device(root, vol_args->name);
1432
1433         kfree(vol_args);
1434         return ret;
1435 }
1436
1437 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1438                                        u64 off, u64 olen, u64 destoff)
1439 {
1440         struct inode *inode = fdentry(file)->d_inode;
1441         struct btrfs_root *root = BTRFS_I(inode)->root;
1442         struct file *src_file;
1443         struct inode *src;
1444         struct btrfs_trans_handle *trans;
1445         struct btrfs_path *path;
1446         struct extent_buffer *leaf;
1447         char *buf;
1448         struct btrfs_key key;
1449         u32 nritems;
1450         int slot;
1451         int ret;
1452         u64 len = olen;
1453         u64 bs = root->fs_info->sb->s_blocksize;
1454         u64 hint_byte;
1455
1456         /*
1457          * TODO:
1458          * - split compressed inline extents.  annoying: we need to
1459          *   decompress into destination's address_space (the file offset
1460          *   may change, so source mapping won't do), then recompress (or
1461          *   otherwise reinsert) a subrange.
1462          * - allow ranges within the same file to be cloned (provided
1463          *   they don't overlap)?
1464          */
1465
1466         /* the destination must be opened for writing */
1467         if (!(file->f_mode & FMODE_WRITE))
1468                 return -EINVAL;
1469
1470         ret = mnt_want_write(file->f_path.mnt);
1471         if (ret)
1472                 return ret;
1473
1474         src_file = fget(srcfd);
1475         if (!src_file) {
1476                 ret = -EBADF;
1477                 goto out_drop_write;
1478         }
1479         src = src_file->f_dentry->d_inode;
1480
1481         ret = -EINVAL;
1482         if (src == inode)
1483                 goto out_fput;
1484
1485         ret = -EISDIR;
1486         if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
1487                 goto out_fput;
1488
1489         ret = -EXDEV;
1490         if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
1491                 goto out_fput;
1492
1493         ret = -ENOMEM;
1494         buf = vmalloc(btrfs_level_size(root, 0));
1495         if (!buf)
1496                 goto out_fput;
1497
1498         path = btrfs_alloc_path();
1499         if (!path) {
1500                 vfree(buf);
1501                 goto out_fput;
1502         }
1503         path->reada = 2;
1504
1505         if (inode < src) {
1506                 mutex_lock(&inode->i_mutex);
1507                 mutex_lock(&src->i_mutex);
1508         } else {
1509                 mutex_lock(&src->i_mutex);
1510                 mutex_lock(&inode->i_mutex);
1511         }
1512
1513         /* determine range to clone */
1514         ret = -EINVAL;
1515         if (off >= src->i_size || off + len > src->i_size)
1516                 goto out_unlock;
1517         if (len == 0)
1518                 olen = len = src->i_size - off;
1519         /* if we extend to eof, continue to block boundary */
1520         if (off + len == src->i_size)
1521                 len = ((src->i_size + bs-1) & ~(bs-1))
1522                         - off;
1523
1524         /* verify the end result is block aligned */
1525         if ((off & (bs-1)) ||
1526             ((off + len) & (bs-1)))
1527                 goto out_unlock;
1528
1529         /* do any pending delalloc/csum calc on src, one way or
1530            another, and lock file content */
1531         while (1) {
1532                 struct btrfs_ordered_extent *ordered;
1533                 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1534                 ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
1535                 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
1536                         break;
1537                 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1538                 if (ordered)
1539                         btrfs_put_ordered_extent(ordered);
1540                 btrfs_wait_ordered_range(src, off, off+len);
1541         }
1542
1543         trans = btrfs_start_transaction(root, 1);
1544         BUG_ON(!trans);
1545
1546         /* punch hole in destination first */
1547         btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
1548
1549         /* clone data */
1550         key.objectid = src->i_ino;
1551         key.type = BTRFS_EXTENT_DATA_KEY;
1552         key.offset = 0;
1553
1554         while (1) {
1555                 /*
1556                  * note the key will change type as we walk through the
1557                  * tree.
1558                  */
1559                 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
1560                 if (ret < 0)
1561                         goto out;
1562
1563                 nritems = btrfs_header_nritems(path->nodes[0]);
1564                 if (path->slots[0] >= nritems) {
1565                         ret = btrfs_next_leaf(root, path);
1566                         if (ret < 0)
1567                                 goto out;
1568                         if (ret > 0)
1569                                 break;
1570                         nritems = btrfs_header_nritems(path->nodes[0]);
1571                 }
1572                 leaf = path->nodes[0];
1573                 slot = path->slots[0];
1574
1575                 btrfs_item_key_to_cpu(leaf, &key, slot);
1576                 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
1577                     key.objectid != src->i_ino)
1578                         break;
1579
1580                 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
1581                         struct btrfs_file_extent_item *extent;
1582                         int type;
1583                         u32 size;
1584                         struct btrfs_key new_key;
1585                         u64 disko = 0, diskl = 0;
1586                         u64 datao = 0, datal = 0;
1587                         u8 comp;
1588
1589                         size = btrfs_item_size_nr(leaf, slot);
1590                         read_extent_buffer(leaf, buf,
1591                                            btrfs_item_ptr_offset(leaf, slot),
1592                                            size);
1593
1594                         extent = btrfs_item_ptr(leaf, slot,
1595                                                 struct btrfs_file_extent_item);
1596                         comp = btrfs_file_extent_compression(leaf, extent);
1597                         type = btrfs_file_extent_type(leaf, extent);
1598                         if (type == BTRFS_FILE_EXTENT_REG ||
1599                             type == BTRFS_FILE_EXTENT_PREALLOC) {
1600                                 disko = btrfs_file_extent_disk_bytenr(leaf,
1601                                                                       extent);
1602                                 diskl = btrfs_file_extent_disk_num_bytes(leaf,
1603                                                                  extent);
1604                                 datao = btrfs_file_extent_offset(leaf, extent);
1605                                 datal = btrfs_file_extent_num_bytes(leaf,
1606                                                                     extent);
1607                         } else if (type == BTRFS_FILE_EXTENT_INLINE) {
1608                                 /* take upper bound, may be compressed */
1609                                 datal = btrfs_file_extent_ram_bytes(leaf,
1610                                                                     extent);
1611                         }
1612                         btrfs_release_path(root, path);
1613
1614                         if (key.offset + datal < off ||
1615                             key.offset >= off+len)
1616                                 goto next;
1617
1618                         memcpy(&new_key, &key, sizeof(new_key));
1619                         new_key.objectid = inode->i_ino;
1620                         new_key.offset = key.offset + destoff - off;
1621
1622                         if (type == BTRFS_FILE_EXTENT_REG ||
1623                             type == BTRFS_FILE_EXTENT_PREALLOC) {
1624                                 ret = btrfs_insert_empty_item(trans, root, path,
1625                                                               &new_key, size);
1626                                 if (ret)
1627                                         goto out;
1628
1629                                 leaf = path->nodes[0];
1630                                 slot = path->slots[0];
1631                                 write_extent_buffer(leaf, buf,
1632                                             btrfs_item_ptr_offset(leaf, slot),
1633                                             size);
1634
1635                                 extent = btrfs_item_ptr(leaf, slot,
1636                                                 struct btrfs_file_extent_item);
1637
1638                                 if (off > key.offset) {
1639                                         datao += off - key.offset;
1640                                         datal -= off - key.offset;
1641                                 }
1642
1643                                 if (key.offset + datal > off + len)
1644                                         datal = off + len - key.offset;
1645
1646                                 /* disko == 0 means it's a hole */
1647                                 if (!disko)
1648                                         datao = 0;
1649
1650                                 btrfs_set_file_extent_offset(leaf, extent,
1651                                                              datao);
1652                                 btrfs_set_file_extent_num_bytes(leaf, extent,
1653                                                                 datal);
1654                                 if (disko) {
1655                                         inode_add_bytes(inode, datal);
1656                                         ret = btrfs_inc_extent_ref(trans, root,
1657                                                         disko, diskl, 0,
1658                                                         root->root_key.objectid,
1659                                                         inode->i_ino,
1660                                                         new_key.offset - datao);
1661                                         BUG_ON(ret);
1662                                 }
1663                         } else if (type == BTRFS_FILE_EXTENT_INLINE) {
1664                                 u64 skip = 0;
1665                                 u64 trim = 0;
1666                                 if (off > key.offset) {
1667                                         skip = off - key.offset;
1668                                         new_key.offset += skip;
1669                                 }
1670
1671                                 if (key.offset + datal > off+len)
1672                                         trim = key.offset + datal - (off+len);
1673
1674                                 if (comp && (skip || trim)) {
1675                                         ret = -EINVAL;
1676                                         goto out;
1677                                 }
1678                                 size -= skip + trim;
1679                                 datal -= skip + trim;
1680                                 ret = btrfs_insert_empty_item(trans, root, path,
1681                                                               &new_key, size);
1682                                 if (ret)
1683                                         goto out;
1684
1685                                 if (skip) {
1686                                         u32 start =
1687                                           btrfs_file_extent_calc_inline_size(0);
1688                                         memmove(buf+start, buf+start+skip,
1689                                                 datal);
1690                                 }
1691
1692                                 leaf = path->nodes[0];
1693                                 slot = path->slots[0];
1694                                 write_extent_buffer(leaf, buf,
1695                                             btrfs_item_ptr_offset(leaf, slot),
1696                                             size);
1697                                 inode_add_bytes(inode, datal);
1698                         }
1699
1700                         btrfs_mark_buffer_dirty(leaf);
1701                 }
1702
1703 next:
1704                 btrfs_release_path(root, path);
1705                 key.offset++;
1706         }
1707         ret = 0;
1708 out:
1709         btrfs_release_path(root, path);
1710         if (ret == 0) {
1711                 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
1712                 if (destoff + olen > inode->i_size)
1713                         btrfs_i_size_write(inode, destoff + olen);
1714                 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
1715                 ret = btrfs_update_inode(trans, root, inode);
1716         }
1717         btrfs_end_transaction(trans, root);
1718         unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1719         if (ret)
1720                 vmtruncate(inode, 0);
1721 out_unlock:
1722         mutex_unlock(&src->i_mutex);
1723         mutex_unlock(&inode->i_mutex);
1724         vfree(buf);
1725         btrfs_free_path(path);
1726 out_fput:
1727         fput(src_file);
1728 out_drop_write:
1729         mnt_drop_write(file->f_path.mnt);
1730         return ret;
1731 }
1732
1733 static long btrfs_ioctl_clone_range(struct file *file, void __user *argp)
1734 {
1735         struct btrfs_ioctl_clone_range_args args;
1736
1737         if (copy_from_user(&args, argp, sizeof(args)))
1738                 return -EFAULT;
1739         return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
1740                                  args.src_length, args.dest_offset);
1741 }
1742
1743 /*
1744  * there are many ways the trans_start and trans_end ioctls can lead
1745  * to deadlocks.  They should only be used by applications that
1746  * basically own the machine, and have a very in depth understanding
1747  * of all the possible deadlocks and enospc problems.
1748  */
1749 static long btrfs_ioctl_trans_start(struct file *file)
1750 {
1751         struct inode *inode = fdentry(file)->d_inode;
1752         struct btrfs_root *root = BTRFS_I(inode)->root;
1753         struct btrfs_trans_handle *trans;
1754         int ret;
1755
1756         ret = -EPERM;
1757         if (!capable(CAP_SYS_ADMIN))
1758                 goto out;
1759
1760         ret = -EINPROGRESS;
1761         if (file->private_data)
1762                 goto out;
1763
1764         ret = mnt_want_write(file->f_path.mnt);
1765         if (ret)
1766                 goto out;
1767
1768         mutex_lock(&root->fs_info->trans_mutex);
1769         root->fs_info->open_ioctl_trans++;
1770         mutex_unlock(&root->fs_info->trans_mutex);
1771
1772         ret = -ENOMEM;
1773         trans = btrfs_start_ioctl_transaction(root, 0);
1774         if (!trans)
1775                 goto out_drop;
1776
1777         file->private_data = trans;
1778         return 0;
1779
1780 out_drop:
1781         mutex_lock(&root->fs_info->trans_mutex);
1782         root->fs_info->open_ioctl_trans--;
1783         mutex_unlock(&root->fs_info->trans_mutex);
1784         mnt_drop_write(file->f_path.mnt);
1785 out:
1786         return ret;
1787 }
1788
1789 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
1790 {
1791         struct inode *inode = fdentry(file)->d_inode;
1792         struct btrfs_root *root = BTRFS_I(inode)->root;
1793         struct btrfs_root *new_root;
1794         struct btrfs_dir_item *di;
1795         struct btrfs_trans_handle *trans;
1796         struct btrfs_path *path;
1797         struct btrfs_key location;
1798         struct btrfs_disk_key disk_key;
1799         struct btrfs_super_block *disk_super;
1800         u64 features;
1801         u64 objectid = 0;
1802         u64 dir_id;
1803
1804         if (!capable(CAP_SYS_ADMIN))
1805                 return -EPERM;
1806
1807         if (copy_from_user(&objectid, argp, sizeof(objectid)))
1808                 return -EFAULT;
1809
1810         if (!objectid)
1811                 objectid = root->root_key.objectid;
1812
1813         location.objectid = objectid;
1814         location.type = BTRFS_ROOT_ITEM_KEY;
1815         location.offset = (u64)-1;
1816
1817         new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
1818         if (IS_ERR(new_root))
1819                 return PTR_ERR(new_root);
1820
1821         if (btrfs_root_refs(&new_root->root_item) == 0)
1822                 return -ENOENT;
1823
1824         path = btrfs_alloc_path();
1825         if (!path)
1826                 return -ENOMEM;
1827         path->leave_spinning = 1;
1828
1829         trans = btrfs_start_transaction(root, 1);
1830         if (!trans) {
1831                 btrfs_free_path(path);
1832                 return -ENOMEM;
1833         }
1834
1835         dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
1836         di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
1837                                    dir_id, "default", 7, 1);
1838         if (!di) {
1839                 btrfs_free_path(path);
1840                 btrfs_end_transaction(trans, root);
1841                 printk(KERN_ERR "Umm, you don't have the default dir item, "
1842                        "this isn't going to work\n");
1843                 return -ENOENT;
1844         }
1845
1846         btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
1847         btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
1848         btrfs_mark_buffer_dirty(path->nodes[0]);
1849         btrfs_free_path(path);
1850
1851         disk_super = &root->fs_info->super_copy;
1852         features = btrfs_super_incompat_flags(disk_super);
1853         if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) {
1854                 features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL;
1855                 btrfs_set_super_incompat_flags(disk_super, features);
1856         }
1857         btrfs_end_transaction(trans, root);
1858
1859         return 0;
1860 }
1861
1862 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1863 {
1864         struct btrfs_ioctl_space_args space_args;
1865         struct btrfs_ioctl_space_info space;
1866         struct btrfs_ioctl_space_info *dest;
1867         struct btrfs_ioctl_space_info *dest_orig;
1868         struct btrfs_ioctl_space_info *user_dest;
1869         struct btrfs_space_info *info;
1870         int alloc_size;
1871         int ret = 0;
1872         int slot_count = 0;
1873
1874         if (copy_from_user(&space_args,
1875                            (struct btrfs_ioctl_space_args __user *)arg,
1876                            sizeof(space_args)))
1877                 return -EFAULT;
1878
1879         /* first we count slots */
1880         rcu_read_lock();
1881         list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
1882                 slot_count++;
1883         rcu_read_unlock();
1884
1885         /* space_slots == 0 means they are asking for a count */
1886         if (space_args.space_slots == 0) {
1887                 space_args.total_spaces = slot_count;
1888                 goto out;
1889         }
1890         alloc_size = sizeof(*dest) * slot_count;
1891         /* we generally have at most 6 or so space infos, one for each raid
1892          * level.  So, a whole page should be more than enough for everyone
1893          */
1894         if (alloc_size > PAGE_CACHE_SIZE)
1895                 return -ENOMEM;
1896
1897         space_args.total_spaces = 0;
1898         dest = kmalloc(alloc_size, GFP_NOFS);
1899         if (!dest)
1900                 return -ENOMEM;
1901         dest_orig = dest;
1902
1903         /* now we have a buffer to copy into */
1904         rcu_read_lock();
1905         list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
1906                 /* make sure we don't copy more than we allocated
1907                  * in our buffer
1908                  */
1909                 if (slot_count == 0)
1910                         break;
1911                 slot_count--;
1912
1913                 /* make sure userland has enough room in their buffer */
1914                 if (space_args.total_spaces >= space_args.space_slots)
1915                         break;
1916
1917                 space.flags = info->flags;
1918                 space.total_bytes = info->total_bytes;
1919                 space.used_bytes = info->bytes_used;
1920                 memcpy(dest, &space, sizeof(space));
1921                 dest++;
1922                 space_args.total_spaces++;
1923         }
1924         rcu_read_unlock();
1925
1926         user_dest = (struct btrfs_ioctl_space_info *)
1927                 (arg + sizeof(struct btrfs_ioctl_space_args));
1928
1929         if (copy_to_user(user_dest, dest_orig, alloc_size))
1930                 ret = -EFAULT;
1931
1932         kfree(dest_orig);
1933 out:
1934         if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
1935                 ret = -EFAULT;
1936
1937         return ret;
1938 }
1939
1940 /*
1941  * there are many ways the trans_start and trans_end ioctls can lead
1942  * to deadlocks.  They should only be used by applications that
1943  * basically own the machine, and have a very in depth understanding
1944  * of all the possible deadlocks and enospc problems.
1945  */
1946 long btrfs_ioctl_trans_end(struct file *file)
1947 {
1948         struct inode *inode = fdentry(file)->d_inode;
1949         struct btrfs_root *root = BTRFS_I(inode)->root;
1950         struct btrfs_trans_handle *trans;
1951
1952         trans = file->private_data;
1953         if (!trans)
1954                 return -EINVAL;
1955         file->private_data = NULL;
1956
1957         btrfs_end_transaction(trans, root);
1958
1959         mutex_lock(&root->fs_info->trans_mutex);
1960         root->fs_info->open_ioctl_trans--;
1961         mutex_unlock(&root->fs_info->trans_mutex);
1962
1963         mnt_drop_write(file->f_path.mnt);
1964         return 0;
1965 }
1966
1967 long btrfs_ioctl(struct file *file, unsigned int
1968                 cmd, unsigned long arg)
1969 {
1970         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
1971         void __user *argp = (void __user *)arg;
1972
1973         switch (cmd) {
1974         case FS_IOC_GETFLAGS:
1975                 return btrfs_ioctl_getflags(file, argp);
1976         case FS_IOC_SETFLAGS:
1977                 return btrfs_ioctl_setflags(file, argp);
1978         case FS_IOC_GETVERSION:
1979                 return btrfs_ioctl_getversion(file, argp);
1980         case BTRFS_IOC_SNAP_CREATE:
1981                 return btrfs_ioctl_snap_create(file, argp, 0);
1982         case BTRFS_IOC_SUBVOL_CREATE:
1983                 return btrfs_ioctl_snap_create(file, argp, 1);
1984         case BTRFS_IOC_SNAP_DESTROY:
1985                 return btrfs_ioctl_snap_destroy(file, argp);
1986         case BTRFS_IOC_DEFAULT_SUBVOL:
1987                 return btrfs_ioctl_default_subvol(file, argp);
1988         case BTRFS_IOC_DEFRAG:
1989                 return btrfs_ioctl_defrag(file, NULL);
1990         case BTRFS_IOC_DEFRAG_RANGE:
1991                 return btrfs_ioctl_defrag(file, argp);
1992         case BTRFS_IOC_RESIZE:
1993                 return btrfs_ioctl_resize(root, argp);
1994         case BTRFS_IOC_ADD_DEV:
1995                 return btrfs_ioctl_add_dev(root, argp);
1996         case BTRFS_IOC_RM_DEV:
1997                 return btrfs_ioctl_rm_dev(root, argp);
1998         case BTRFS_IOC_BALANCE:
1999                 return btrfs_balance(root->fs_info->dev_root);
2000         case BTRFS_IOC_CLONE:
2001                 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
2002         case BTRFS_IOC_CLONE_RANGE:
2003                 return btrfs_ioctl_clone_range(file, argp);
2004         case BTRFS_IOC_TRANS_START:
2005                 return btrfs_ioctl_trans_start(file);
2006         case BTRFS_IOC_TRANS_END:
2007                 return btrfs_ioctl_trans_end(file);
2008         case BTRFS_IOC_TREE_SEARCH:
2009                 return btrfs_ioctl_tree_search(file, argp);
2010         case BTRFS_IOC_INO_LOOKUP:
2011                 return btrfs_ioctl_ino_lookup(file, argp);
2012         case BTRFS_IOC_SPACE_INFO:
2013                 return btrfs_ioctl_space_info(root, argp);
2014         case BTRFS_IOC_SYNC:
2015                 btrfs_sync_fs(file->f_dentry->d_sb, 1);
2016                 return 0;
2017         }
2018
2019         return -ENOTTY;
2020 }