Btrfs: Always use 64bit inode number
[pandora-kernel.git] / fs / btrfs / file.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/fs.h>
20 #include <linux/pagemap.h>
21 #include <linux/highmem.h>
22 #include <linux/time.h>
23 #include <linux/init.h>
24 #include <linux/string.h>
25 #include <linux/backing-dev.h>
26 #include <linux/mpage.h>
27 #include <linux/falloc.h>
28 #include <linux/swap.h>
29 #include <linux/writeback.h>
30 #include <linux/statfs.h>
31 #include <linux/compat.h>
32 #include <linux/slab.h>
33 #include "ctree.h"
34 #include "disk-io.h"
35 #include "transaction.h"
36 #include "btrfs_inode.h"
37 #include "ioctl.h"
38 #include "print-tree.h"
39 #include "tree-log.h"
40 #include "locking.h"
41 #include "compat.h"
42
43
44 /* simple helper to fault in pages and copy.  This should go away
45  * and be replaced with calls into generic code.
46  */
47 static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
48                                          size_t write_bytes,
49                                          struct page **prepared_pages,
50                                          struct iov_iter *i)
51 {
52         size_t copied = 0;
53         size_t total_copied = 0;
54         int pg = 0;
55         int offset = pos & (PAGE_CACHE_SIZE - 1);
56
57         while (write_bytes > 0) {
58                 size_t count = min_t(size_t,
59                                      PAGE_CACHE_SIZE - offset, write_bytes);
60                 struct page *page = prepared_pages[pg];
61                 /*
62                  * Copy data from userspace to the current page
63                  *
64                  * Disable pagefault to avoid recursive lock since
65                  * the pages are already locked
66                  */
67                 pagefault_disable();
68                 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
69                 pagefault_enable();
70
71                 /* Flush processor's dcache for this page */
72                 flush_dcache_page(page);
73
74                 /*
75                  * if we get a partial write, we can end up with
76                  * partially up to date pages.  These add
77                  * a lot of complexity, so make sure they don't
78                  * happen by forcing this copy to be retried.
79                  *
80                  * The rest of the btrfs_file_write code will fall
81                  * back to page at a time copies after we return 0.
82                  */
83                 if (!PageUptodate(page) && copied < count)
84                         copied = 0;
85
86                 iov_iter_advance(i, copied);
87                 write_bytes -= copied;
88                 total_copied += copied;
89
90                 /* Return to btrfs_file_aio_write to fault page */
91                 if (unlikely(copied == 0))
92                         break;
93
94                 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
95                         offset += copied;
96                 } else {
97                         pg++;
98                         offset = 0;
99                 }
100         }
101         return total_copied;
102 }
103
104 /*
105  * unlocks pages after btrfs_file_write is done with them
106  */
107 void btrfs_drop_pages(struct page **pages, size_t num_pages)
108 {
109         size_t i;
110         for (i = 0; i < num_pages; i++) {
111                 /* page checked is some magic around finding pages that
112                  * have been modified without going through btrfs_set_page_dirty
113                  * clear it here
114                  */
115                 ClearPageChecked(pages[i]);
116                 unlock_page(pages[i]);
117                 mark_page_accessed(pages[i]);
118                 page_cache_release(pages[i]);
119         }
120 }
121
122 /*
123  * after copy_from_user, pages need to be dirtied and we need to make
124  * sure holes are created between the current EOF and the start of
125  * any next extents (if required).
126  *
127  * this also makes the decision about creating an inline extent vs
128  * doing real data extents, marking pages dirty and delalloc as required.
129  */
130 int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
131                       struct page **pages, size_t num_pages,
132                       loff_t pos, size_t write_bytes,
133                       struct extent_state **cached)
134 {
135         int err = 0;
136         int i;
137         u64 num_bytes;
138         u64 start_pos;
139         u64 end_of_last_block;
140         u64 end_pos = pos + write_bytes;
141         loff_t isize = i_size_read(inode);
142
143         start_pos = pos & ~((u64)root->sectorsize - 1);
144         num_bytes = (write_bytes + pos - start_pos +
145                     root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
146
147         end_of_last_block = start_pos + num_bytes - 1;
148         err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
149                                         cached);
150         if (err)
151                 return err;
152
153         for (i = 0; i < num_pages; i++) {
154                 struct page *p = pages[i];
155                 SetPageUptodate(p);
156                 ClearPageChecked(p);
157                 set_page_dirty(p);
158         }
159
160         /*
161          * we've only changed i_size in ram, and we haven't updated
162          * the disk i_size.  There is no need to log the inode
163          * at this time.
164          */
165         if (end_pos > isize)
166                 i_size_write(inode, end_pos);
167         return 0;
168 }
169
170 /*
171  * this drops all the extents in the cache that intersect the range
172  * [start, end].  Existing extents are split as required.
173  */
174 int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
175                             int skip_pinned)
176 {
177         struct extent_map *em;
178         struct extent_map *split = NULL;
179         struct extent_map *split2 = NULL;
180         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
181         u64 len = end - start + 1;
182         int ret;
183         int testend = 1;
184         unsigned long flags;
185         int compressed = 0;
186
187         WARN_ON(end < start);
188         if (end == (u64)-1) {
189                 len = (u64)-1;
190                 testend = 0;
191         }
192         while (1) {
193                 if (!split)
194                         split = alloc_extent_map(GFP_NOFS);
195                 if (!split2)
196                         split2 = alloc_extent_map(GFP_NOFS);
197                 BUG_ON(!split || !split2);
198
199                 write_lock(&em_tree->lock);
200                 em = lookup_extent_mapping(em_tree, start, len);
201                 if (!em) {
202                         write_unlock(&em_tree->lock);
203                         break;
204                 }
205                 flags = em->flags;
206                 if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
207                         if (testend && em->start + em->len >= start + len) {
208                                 free_extent_map(em);
209                                 write_unlock(&em_tree->lock);
210                                 break;
211                         }
212                         start = em->start + em->len;
213                         if (testend)
214                                 len = start + len - (em->start + em->len);
215                         free_extent_map(em);
216                         write_unlock(&em_tree->lock);
217                         continue;
218                 }
219                 compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
220                 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
221                 remove_extent_mapping(em_tree, em);
222
223                 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
224                     em->start < start) {
225                         split->start = em->start;
226                         split->len = start - em->start;
227                         split->orig_start = em->orig_start;
228                         split->block_start = em->block_start;
229
230                         if (compressed)
231                                 split->block_len = em->block_len;
232                         else
233                                 split->block_len = split->len;
234
235                         split->bdev = em->bdev;
236                         split->flags = flags;
237                         split->compress_type = em->compress_type;
238                         ret = add_extent_mapping(em_tree, split);
239                         BUG_ON(ret);
240                         free_extent_map(split);
241                         split = split2;
242                         split2 = NULL;
243                 }
244                 if (em->block_start < EXTENT_MAP_LAST_BYTE &&
245                     testend && em->start + em->len > start + len) {
246                         u64 diff = start + len - em->start;
247
248                         split->start = start + len;
249                         split->len = em->start + em->len - (start + len);
250                         split->bdev = em->bdev;
251                         split->flags = flags;
252                         split->compress_type = em->compress_type;
253
254                         if (compressed) {
255                                 split->block_len = em->block_len;
256                                 split->block_start = em->block_start;
257                                 split->orig_start = em->orig_start;
258                         } else {
259                                 split->block_len = split->len;
260                                 split->block_start = em->block_start + diff;
261                                 split->orig_start = split->start;
262                         }
263
264                         ret = add_extent_mapping(em_tree, split);
265                         BUG_ON(ret);
266                         free_extent_map(split);
267                         split = NULL;
268                 }
269                 write_unlock(&em_tree->lock);
270
271                 /* once for us */
272                 free_extent_map(em);
273                 /* once for the tree*/
274                 free_extent_map(em);
275         }
276         if (split)
277                 free_extent_map(split);
278         if (split2)
279                 free_extent_map(split2);
280         return 0;
281 }
282
283 /*
284  * this is very complex, but the basic idea is to drop all extents
285  * in the range start - end.  hint_block is filled in with a block number
286  * that would be a good hint to the block allocator for this file.
287  *
288  * If an extent intersects the range but is not entirely inside the range
289  * it is either truncated or split.  Anything entirely inside the range
290  * is deleted from the tree.
291  */
292 int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
293                        u64 start, u64 end, u64 *hint_byte, int drop_cache)
294 {
295         struct btrfs_root *root = BTRFS_I(inode)->root;
296         struct extent_buffer *leaf;
297         struct btrfs_file_extent_item *fi;
298         struct btrfs_path *path;
299         struct btrfs_key key;
300         struct btrfs_key new_key;
301         u64 ino = btrfs_ino(inode);
302         u64 search_start = start;
303         u64 disk_bytenr = 0;
304         u64 num_bytes = 0;
305         u64 extent_offset = 0;
306         u64 extent_end = 0;
307         int del_nr = 0;
308         int del_slot = 0;
309         int extent_type;
310         int recow;
311         int ret;
312
313         if (drop_cache)
314                 btrfs_drop_extent_cache(inode, start, end - 1, 0);
315
316         path = btrfs_alloc_path();
317         if (!path)
318                 return -ENOMEM;
319
320         while (1) {
321                 recow = 0;
322                 ret = btrfs_lookup_file_extent(trans, root, path, ino,
323                                                search_start, -1);
324                 if (ret < 0)
325                         break;
326                 if (ret > 0 && path->slots[0] > 0 && search_start == start) {
327                         leaf = path->nodes[0];
328                         btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
329                         if (key.objectid == ino &&
330                             key.type == BTRFS_EXTENT_DATA_KEY)
331                                 path->slots[0]--;
332                 }
333                 ret = 0;
334 next_slot:
335                 leaf = path->nodes[0];
336                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
337                         BUG_ON(del_nr > 0);
338                         ret = btrfs_next_leaf(root, path);
339                         if (ret < 0)
340                                 break;
341                         if (ret > 0) {
342                                 ret = 0;
343                                 break;
344                         }
345                         leaf = path->nodes[0];
346                         recow = 1;
347                 }
348
349                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
350                 if (key.objectid > ino ||
351                     key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
352                         break;
353
354                 fi = btrfs_item_ptr(leaf, path->slots[0],
355                                     struct btrfs_file_extent_item);
356                 extent_type = btrfs_file_extent_type(leaf, fi);
357
358                 if (extent_type == BTRFS_FILE_EXTENT_REG ||
359                     extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
360                         disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
361                         num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
362                         extent_offset = btrfs_file_extent_offset(leaf, fi);
363                         extent_end = key.offset +
364                                 btrfs_file_extent_num_bytes(leaf, fi);
365                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
366                         extent_end = key.offset +
367                                 btrfs_file_extent_inline_len(leaf, fi);
368                 } else {
369                         WARN_ON(1);
370                         extent_end = search_start;
371                 }
372
373                 if (extent_end <= search_start) {
374                         path->slots[0]++;
375                         goto next_slot;
376                 }
377
378                 search_start = max(key.offset, start);
379                 if (recow) {
380                         btrfs_release_path(root, path);
381                         continue;
382                 }
383
384                 /*
385                  *     | - range to drop - |
386                  *  | -------- extent -------- |
387                  */
388                 if (start > key.offset && end < extent_end) {
389                         BUG_ON(del_nr > 0);
390                         BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
391
392                         memcpy(&new_key, &key, sizeof(new_key));
393                         new_key.offset = start;
394                         ret = btrfs_duplicate_item(trans, root, path,
395                                                    &new_key);
396                         if (ret == -EAGAIN) {
397                                 btrfs_release_path(root, path);
398                                 continue;
399                         }
400                         if (ret < 0)
401                                 break;
402
403                         leaf = path->nodes[0];
404                         fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
405                                             struct btrfs_file_extent_item);
406                         btrfs_set_file_extent_num_bytes(leaf, fi,
407                                                         start - key.offset);
408
409                         fi = btrfs_item_ptr(leaf, path->slots[0],
410                                             struct btrfs_file_extent_item);
411
412                         extent_offset += start - key.offset;
413                         btrfs_set_file_extent_offset(leaf, fi, extent_offset);
414                         btrfs_set_file_extent_num_bytes(leaf, fi,
415                                                         extent_end - start);
416                         btrfs_mark_buffer_dirty(leaf);
417
418                         if (disk_bytenr > 0) {
419                                 ret = btrfs_inc_extent_ref(trans, root,
420                                                 disk_bytenr, num_bytes, 0,
421                                                 root->root_key.objectid,
422                                                 new_key.objectid,
423                                                 start - extent_offset);
424                                 BUG_ON(ret);
425                                 *hint_byte = disk_bytenr;
426                         }
427                         key.offset = start;
428                 }
429                 /*
430                  *  | ---- range to drop ----- |
431                  *      | -------- extent -------- |
432                  */
433                 if (start <= key.offset && end < extent_end) {
434                         BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
435
436                         memcpy(&new_key, &key, sizeof(new_key));
437                         new_key.offset = end;
438                         btrfs_set_item_key_safe(trans, root, path, &new_key);
439
440                         extent_offset += end - key.offset;
441                         btrfs_set_file_extent_offset(leaf, fi, extent_offset);
442                         btrfs_set_file_extent_num_bytes(leaf, fi,
443                                                         extent_end - end);
444                         btrfs_mark_buffer_dirty(leaf);
445                         if (disk_bytenr > 0) {
446                                 inode_sub_bytes(inode, end - key.offset);
447                                 *hint_byte = disk_bytenr;
448                         }
449                         break;
450                 }
451
452                 search_start = extent_end;
453                 /*
454                  *       | ---- range to drop ----- |
455                  *  | -------- extent -------- |
456                  */
457                 if (start > key.offset && end >= extent_end) {
458                         BUG_ON(del_nr > 0);
459                         BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
460
461                         btrfs_set_file_extent_num_bytes(leaf, fi,
462                                                         start - key.offset);
463                         btrfs_mark_buffer_dirty(leaf);
464                         if (disk_bytenr > 0) {
465                                 inode_sub_bytes(inode, extent_end - start);
466                                 *hint_byte = disk_bytenr;
467                         }
468                         if (end == extent_end)
469                                 break;
470
471                         path->slots[0]++;
472                         goto next_slot;
473                 }
474
475                 /*
476                  *  | ---- range to drop ----- |
477                  *    | ------ extent ------ |
478                  */
479                 if (start <= key.offset && end >= extent_end) {
480                         if (del_nr == 0) {
481                                 del_slot = path->slots[0];
482                                 del_nr = 1;
483                         } else {
484                                 BUG_ON(del_slot + del_nr != path->slots[0]);
485                                 del_nr++;
486                         }
487
488                         if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
489                                 inode_sub_bytes(inode,
490                                                 extent_end - key.offset);
491                                 extent_end = ALIGN(extent_end,
492                                                    root->sectorsize);
493                         } else if (disk_bytenr > 0) {
494                                 ret = btrfs_free_extent(trans, root,
495                                                 disk_bytenr, num_bytes, 0,
496                                                 root->root_key.objectid,
497                                                 key.objectid, key.offset -
498                                                 extent_offset);
499                                 BUG_ON(ret);
500                                 inode_sub_bytes(inode,
501                                                 extent_end - key.offset);
502                                 *hint_byte = disk_bytenr;
503                         }
504
505                         if (end == extent_end)
506                                 break;
507
508                         if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
509                                 path->slots[0]++;
510                                 goto next_slot;
511                         }
512
513                         ret = btrfs_del_items(trans, root, path, del_slot,
514                                               del_nr);
515                         BUG_ON(ret);
516
517                         del_nr = 0;
518                         del_slot = 0;
519
520                         btrfs_release_path(root, path);
521                         continue;
522                 }
523
524                 BUG_ON(1);
525         }
526
527         if (del_nr > 0) {
528                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
529                 BUG_ON(ret);
530         }
531
532         btrfs_free_path(path);
533         return ret;
534 }
535
536 static int extent_mergeable(struct extent_buffer *leaf, int slot,
537                             u64 objectid, u64 bytenr, u64 orig_offset,
538                             u64 *start, u64 *end)
539 {
540         struct btrfs_file_extent_item *fi;
541         struct btrfs_key key;
542         u64 extent_end;
543
544         if (slot < 0 || slot >= btrfs_header_nritems(leaf))
545                 return 0;
546
547         btrfs_item_key_to_cpu(leaf, &key, slot);
548         if (key.objectid != objectid || key.type != BTRFS_EXTENT_DATA_KEY)
549                 return 0;
550
551         fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
552         if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
553             btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
554             btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
555             btrfs_file_extent_compression(leaf, fi) ||
556             btrfs_file_extent_encryption(leaf, fi) ||
557             btrfs_file_extent_other_encoding(leaf, fi))
558                 return 0;
559
560         extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
561         if ((*start && *start != key.offset) || (*end && *end != extent_end))
562                 return 0;
563
564         *start = key.offset;
565         *end = extent_end;
566         return 1;
567 }
568
569 /*
570  * Mark extent in the range start - end as written.
571  *
572  * This changes extent type from 'pre-allocated' to 'regular'. If only
573  * part of extent is marked as written, the extent will be split into
574  * two or three.
575  */
576 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
577                               struct inode *inode, u64 start, u64 end)
578 {
579         struct btrfs_root *root = BTRFS_I(inode)->root;
580         struct extent_buffer *leaf;
581         struct btrfs_path *path;
582         struct btrfs_file_extent_item *fi;
583         struct btrfs_key key;
584         struct btrfs_key new_key;
585         u64 bytenr;
586         u64 num_bytes;
587         u64 extent_end;
588         u64 orig_offset;
589         u64 other_start;
590         u64 other_end;
591         u64 split;
592         int del_nr = 0;
593         int del_slot = 0;
594         int recow;
595         int ret;
596         u64 ino = btrfs_ino(inode);
597
598         btrfs_drop_extent_cache(inode, start, end - 1, 0);
599
600         path = btrfs_alloc_path();
601         BUG_ON(!path);
602 again:
603         recow = 0;
604         split = start;
605         key.objectid = ino;
606         key.type = BTRFS_EXTENT_DATA_KEY;
607         key.offset = split;
608
609         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
610         if (ret < 0)
611                 goto out;
612         if (ret > 0 && path->slots[0] > 0)
613                 path->slots[0]--;
614
615         leaf = path->nodes[0];
616         btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
617         BUG_ON(key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY);
618         fi = btrfs_item_ptr(leaf, path->slots[0],
619                             struct btrfs_file_extent_item);
620         BUG_ON(btrfs_file_extent_type(leaf, fi) !=
621                BTRFS_FILE_EXTENT_PREALLOC);
622         extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
623         BUG_ON(key.offset > start || extent_end < end);
624
625         bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
626         num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
627         orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
628         memcpy(&new_key, &key, sizeof(new_key));
629
630         if (start == key.offset && end < extent_end) {
631                 other_start = 0;
632                 other_end = start;
633                 if (extent_mergeable(leaf, path->slots[0] - 1,
634                                      ino, bytenr, orig_offset,
635                                      &other_start, &other_end)) {
636                         new_key.offset = end;
637                         btrfs_set_item_key_safe(trans, root, path, &new_key);
638                         fi = btrfs_item_ptr(leaf, path->slots[0],
639                                             struct btrfs_file_extent_item);
640                         btrfs_set_file_extent_num_bytes(leaf, fi,
641                                                         extent_end - end);
642                         btrfs_set_file_extent_offset(leaf, fi,
643                                                      end - orig_offset);
644                         fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
645                                             struct btrfs_file_extent_item);
646                         btrfs_set_file_extent_num_bytes(leaf, fi,
647                                                         end - other_start);
648                         btrfs_mark_buffer_dirty(leaf);
649                         goto out;
650                 }
651         }
652
653         if (start > key.offset && end == extent_end) {
654                 other_start = end;
655                 other_end = 0;
656                 if (extent_mergeable(leaf, path->slots[0] + 1,
657                                      ino, bytenr, orig_offset,
658                                      &other_start, &other_end)) {
659                         fi = btrfs_item_ptr(leaf, path->slots[0],
660                                             struct btrfs_file_extent_item);
661                         btrfs_set_file_extent_num_bytes(leaf, fi,
662                                                         start - key.offset);
663                         path->slots[0]++;
664                         new_key.offset = start;
665                         btrfs_set_item_key_safe(trans, root, path, &new_key);
666
667                         fi = btrfs_item_ptr(leaf, path->slots[0],
668                                             struct btrfs_file_extent_item);
669                         btrfs_set_file_extent_num_bytes(leaf, fi,
670                                                         other_end - start);
671                         btrfs_set_file_extent_offset(leaf, fi,
672                                                      start - orig_offset);
673                         btrfs_mark_buffer_dirty(leaf);
674                         goto out;
675                 }
676         }
677
678         while (start > key.offset || end < extent_end) {
679                 if (key.offset == start)
680                         split = end;
681
682                 new_key.offset = split;
683                 ret = btrfs_duplicate_item(trans, root, path, &new_key);
684                 if (ret == -EAGAIN) {
685                         btrfs_release_path(root, path);
686                         goto again;
687                 }
688                 BUG_ON(ret < 0);
689
690                 leaf = path->nodes[0];
691                 fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
692                                     struct btrfs_file_extent_item);
693                 btrfs_set_file_extent_num_bytes(leaf, fi,
694                                                 split - key.offset);
695
696                 fi = btrfs_item_ptr(leaf, path->slots[0],
697                                     struct btrfs_file_extent_item);
698
699                 btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
700                 btrfs_set_file_extent_num_bytes(leaf, fi,
701                                                 extent_end - split);
702                 btrfs_mark_buffer_dirty(leaf);
703
704                 ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
705                                            root->root_key.objectid,
706                                            ino, orig_offset);
707                 BUG_ON(ret);
708
709                 if (split == start) {
710                         key.offset = start;
711                 } else {
712                         BUG_ON(start != key.offset);
713                         path->slots[0]--;
714                         extent_end = end;
715                 }
716                 recow = 1;
717         }
718
719         other_start = end;
720         other_end = 0;
721         if (extent_mergeable(leaf, path->slots[0] + 1,
722                              ino, bytenr, orig_offset,
723                              &other_start, &other_end)) {
724                 if (recow) {
725                         btrfs_release_path(root, path);
726                         goto again;
727                 }
728                 extent_end = other_end;
729                 del_slot = path->slots[0] + 1;
730                 del_nr++;
731                 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
732                                         0, root->root_key.objectid,
733                                         ino, orig_offset);
734                 BUG_ON(ret);
735         }
736         other_start = 0;
737         other_end = start;
738         if (extent_mergeable(leaf, path->slots[0] - 1,
739                              ino, bytenr, orig_offset,
740                              &other_start, &other_end)) {
741                 if (recow) {
742                         btrfs_release_path(root, path);
743                         goto again;
744                 }
745                 key.offset = other_start;
746                 del_slot = path->slots[0];
747                 del_nr++;
748                 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
749                                         0, root->root_key.objectid,
750                                         ino, orig_offset);
751                 BUG_ON(ret);
752         }
753         if (del_nr == 0) {
754                 fi = btrfs_item_ptr(leaf, path->slots[0],
755                            struct btrfs_file_extent_item);
756                 btrfs_set_file_extent_type(leaf, fi,
757                                            BTRFS_FILE_EXTENT_REG);
758                 btrfs_mark_buffer_dirty(leaf);
759         } else {
760                 fi = btrfs_item_ptr(leaf, del_slot - 1,
761                            struct btrfs_file_extent_item);
762                 btrfs_set_file_extent_type(leaf, fi,
763                                            BTRFS_FILE_EXTENT_REG);
764                 btrfs_set_file_extent_num_bytes(leaf, fi,
765                                                 extent_end - key.offset);
766                 btrfs_mark_buffer_dirty(leaf);
767
768                 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
769                 BUG_ON(ret);
770         }
771 out:
772         btrfs_free_path(path);
773         return 0;
774 }
775
776 /*
777  * on error we return an unlocked page and the error value
778  * on success we return a locked page and 0
779  */
780 static int prepare_uptodate_page(struct page *page, u64 pos)
781 {
782         int ret = 0;
783
784         if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
785                 ret = btrfs_readpage(NULL, page);
786                 if (ret)
787                         return ret;
788                 lock_page(page);
789                 if (!PageUptodate(page)) {
790                         unlock_page(page);
791                         return -EIO;
792                 }
793         }
794         return 0;
795 }
796
797 /*
798  * this gets pages into the page cache and locks them down, it also properly
799  * waits for data=ordered extents to finish before allowing the pages to be
800  * modified.
801  */
802 static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
803                          struct page **pages, size_t num_pages,
804                          loff_t pos, unsigned long first_index,
805                          unsigned long last_index, size_t write_bytes)
806 {
807         struct extent_state *cached_state = NULL;
808         int i;
809         unsigned long index = pos >> PAGE_CACHE_SHIFT;
810         struct inode *inode = fdentry(file)->d_inode;
811         int err = 0;
812         int faili = 0;
813         u64 start_pos;
814         u64 last_pos;
815
816         start_pos = pos & ~((u64)root->sectorsize - 1);
817         last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
818
819         if (start_pos > inode->i_size) {
820                 err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
821                 if (err)
822                         return err;
823         }
824
825 again:
826         for (i = 0; i < num_pages; i++) {
827                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
828                 if (!pages[i]) {
829                         faili = i - 1;
830                         err = -ENOMEM;
831                         goto fail;
832                 }
833
834                 if (i == 0)
835                         err = prepare_uptodate_page(pages[i], pos);
836                 if (i == num_pages - 1)
837                         err = prepare_uptodate_page(pages[i],
838                                                     pos + write_bytes);
839                 if (err) {
840                         page_cache_release(pages[i]);
841                         faili = i - 1;
842                         goto fail;
843                 }
844                 wait_on_page_writeback(pages[i]);
845         }
846         err = 0;
847         if (start_pos < inode->i_size) {
848                 struct btrfs_ordered_extent *ordered;
849                 lock_extent_bits(&BTRFS_I(inode)->io_tree,
850                                  start_pos, last_pos - 1, 0, &cached_state,
851                                  GFP_NOFS);
852                 ordered = btrfs_lookup_first_ordered_extent(inode,
853                                                             last_pos - 1);
854                 if (ordered &&
855                     ordered->file_offset + ordered->len > start_pos &&
856                     ordered->file_offset < last_pos) {
857                         btrfs_put_ordered_extent(ordered);
858                         unlock_extent_cached(&BTRFS_I(inode)->io_tree,
859                                              start_pos, last_pos - 1,
860                                              &cached_state, GFP_NOFS);
861                         for (i = 0; i < num_pages; i++) {
862                                 unlock_page(pages[i]);
863                                 page_cache_release(pages[i]);
864                         }
865                         btrfs_wait_ordered_range(inode, start_pos,
866                                                  last_pos - start_pos);
867                         goto again;
868                 }
869                 if (ordered)
870                         btrfs_put_ordered_extent(ordered);
871
872                 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
873                                   last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
874                                   EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
875                                   GFP_NOFS);
876                 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
877                                      start_pos, last_pos - 1, &cached_state,
878                                      GFP_NOFS);
879         }
880         for (i = 0; i < num_pages; i++) {
881                 clear_page_dirty_for_io(pages[i]);
882                 set_page_extent_mapped(pages[i]);
883                 WARN_ON(!PageLocked(pages[i]));
884         }
885         return 0;
886 fail:
887         while (faili >= 0) {
888                 unlock_page(pages[faili]);
889                 page_cache_release(pages[faili]);
890                 faili--;
891         }
892         return err;
893
894 }
895
896 static noinline ssize_t __btrfs_buffered_write(struct file *file,
897                                                struct iov_iter *i,
898                                                loff_t pos)
899 {
900         struct inode *inode = fdentry(file)->d_inode;
901         struct btrfs_root *root = BTRFS_I(inode)->root;
902         struct page **pages = NULL;
903         unsigned long first_index;
904         unsigned long last_index;
905         size_t num_written = 0;
906         int nrptrs;
907         int ret = 0;
908
909         nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
910                      PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
911                      (sizeof(struct page *)));
912         pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
913         if (!pages)
914                 return -ENOMEM;
915
916         first_index = pos >> PAGE_CACHE_SHIFT;
917         last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
918
919         while (iov_iter_count(i) > 0) {
920                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
921                 size_t write_bytes = min(iov_iter_count(i),
922                                          nrptrs * (size_t)PAGE_CACHE_SIZE -
923                                          offset);
924                 size_t num_pages = (write_bytes + offset +
925                                     PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
926                 size_t dirty_pages;
927                 size_t copied;
928
929                 WARN_ON(num_pages > nrptrs);
930
931                 /*
932                  * Fault pages before locking them in prepare_pages
933                  * to avoid recursive lock
934                  */
935                 if (unlikely(iov_iter_fault_in_readable(i, write_bytes))) {
936                         ret = -EFAULT;
937                         break;
938                 }
939
940                 ret = btrfs_delalloc_reserve_space(inode,
941                                         num_pages << PAGE_CACHE_SHIFT);
942                 if (ret)
943                         break;
944
945                 /*
946                  * This is going to setup the pages array with the number of
947                  * pages we want, so we don't really need to worry about the
948                  * contents of pages from loop to loop
949                  */
950                 ret = prepare_pages(root, file, pages, num_pages,
951                                     pos, first_index, last_index,
952                                     write_bytes);
953                 if (ret) {
954                         btrfs_delalloc_release_space(inode,
955                                         num_pages << PAGE_CACHE_SHIFT);
956                         break;
957                 }
958
959                 copied = btrfs_copy_from_user(pos, num_pages,
960                                            write_bytes, pages, i);
961
962                 /*
963                  * if we have trouble faulting in the pages, fall
964                  * back to one page at a time
965                  */
966                 if (copied < write_bytes)
967                         nrptrs = 1;
968
969                 if (copied == 0)
970                         dirty_pages = 0;
971                 else
972                         dirty_pages = (copied + offset +
973                                        PAGE_CACHE_SIZE - 1) >>
974                                        PAGE_CACHE_SHIFT;
975
976                 /*
977                  * If we had a short copy we need to release the excess delaloc
978                  * bytes we reserved.  We need to increment outstanding_extents
979                  * because btrfs_delalloc_release_space will decrement it, but
980                  * we still have an outstanding extent for the chunk we actually
981                  * managed to copy.
982                  */
983                 if (num_pages > dirty_pages) {
984                         if (copied > 0)
985                                 atomic_inc(
986                                         &BTRFS_I(inode)->outstanding_extents);
987                         btrfs_delalloc_release_space(inode,
988                                         (num_pages - dirty_pages) <<
989                                         PAGE_CACHE_SHIFT);
990                 }
991
992                 if (copied > 0) {
993                         ret = btrfs_dirty_pages(root, inode, pages,
994                                                 dirty_pages, pos, copied,
995                                                 NULL);
996                         if (ret) {
997                                 btrfs_delalloc_release_space(inode,
998                                         dirty_pages << PAGE_CACHE_SHIFT);
999                                 btrfs_drop_pages(pages, num_pages);
1000                                 break;
1001                         }
1002                 }
1003
1004                 btrfs_drop_pages(pages, num_pages);
1005
1006                 cond_resched();
1007
1008                 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
1009                                                    dirty_pages);
1010                 if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1011                         btrfs_btree_balance_dirty(root, 1);
1012                 btrfs_throttle(root);
1013
1014                 pos += copied;
1015                 num_written += copied;
1016         }
1017
1018         kfree(pages);
1019
1020         return num_written ? num_written : ret;
1021 }
1022
1023 static ssize_t __btrfs_direct_write(struct kiocb *iocb,
1024                                     const struct iovec *iov,
1025                                     unsigned long nr_segs, loff_t pos,
1026                                     loff_t *ppos, size_t count, size_t ocount)
1027 {
1028         struct file *file = iocb->ki_filp;
1029         struct inode *inode = fdentry(file)->d_inode;
1030         struct iov_iter i;
1031         ssize_t written;
1032         ssize_t written_buffered;
1033         loff_t endbyte;
1034         int err;
1035
1036         written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
1037                                             count, ocount);
1038
1039         /*
1040          * the generic O_DIRECT will update in-memory i_size after the
1041          * DIOs are done.  But our endio handlers that update the on
1042          * disk i_size never update past the in memory i_size.  So we
1043          * need one more update here to catch any additions to the
1044          * file
1045          */
1046         if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
1047                 btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
1048                 mark_inode_dirty(inode);
1049         }
1050
1051         if (written < 0 || written == count)
1052                 return written;
1053
1054         pos += written;
1055         count -= written;
1056         iov_iter_init(&i, iov, nr_segs, count, written);
1057         written_buffered = __btrfs_buffered_write(file, &i, pos);
1058         if (written_buffered < 0) {
1059                 err = written_buffered;
1060                 goto out;
1061         }
1062         endbyte = pos + written_buffered - 1;
1063         err = filemap_write_and_wait_range(file->f_mapping, pos, endbyte);
1064         if (err)
1065                 goto out;
1066         written += written_buffered;
1067         *ppos = pos + written_buffered;
1068         invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
1069                                  endbyte >> PAGE_CACHE_SHIFT);
1070 out:
1071         return written ? written : err;
1072 }
1073
1074 static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1075                                     const struct iovec *iov,
1076                                     unsigned long nr_segs, loff_t pos)
1077 {
1078         struct file *file = iocb->ki_filp;
1079         struct inode *inode = fdentry(file)->d_inode;
1080         struct btrfs_root *root = BTRFS_I(inode)->root;
1081         loff_t *ppos = &iocb->ki_pos;
1082         ssize_t num_written = 0;
1083         ssize_t err = 0;
1084         size_t count, ocount;
1085
1086         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1087
1088         mutex_lock(&inode->i_mutex);
1089
1090         err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
1091         if (err) {
1092                 mutex_unlock(&inode->i_mutex);
1093                 goto out;
1094         }
1095         count = ocount;
1096
1097         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1098         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1099         if (err) {
1100                 mutex_unlock(&inode->i_mutex);
1101                 goto out;
1102         }
1103
1104         if (count == 0) {
1105                 mutex_unlock(&inode->i_mutex);
1106                 goto out;
1107         }
1108
1109         err = file_remove_suid(file);
1110         if (err) {
1111                 mutex_unlock(&inode->i_mutex);
1112                 goto out;
1113         }
1114
1115         /*
1116          * If BTRFS flips readonly due to some impossible error
1117          * (fs_info->fs_state now has BTRFS_SUPER_FLAG_ERROR),
1118          * although we have opened a file as writable, we have
1119          * to stop this write operation to ensure FS consistency.
1120          */
1121         if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
1122                 mutex_unlock(&inode->i_mutex);
1123                 err = -EROFS;
1124                 goto out;
1125         }
1126
1127         file_update_time(file);
1128         BTRFS_I(inode)->sequence++;
1129
1130         if (unlikely(file->f_flags & O_DIRECT)) {
1131                 num_written = __btrfs_direct_write(iocb, iov, nr_segs,
1132                                                    pos, ppos, count, ocount);
1133         } else {
1134                 struct iov_iter i;
1135
1136                 iov_iter_init(&i, iov, nr_segs, count, num_written);
1137
1138                 num_written = __btrfs_buffered_write(file, &i, pos);
1139                 if (num_written > 0)
1140                         *ppos = pos + num_written;
1141         }
1142
1143         mutex_unlock(&inode->i_mutex);
1144
1145         /*
1146          * we want to make sure fsync finds this change
1147          * but we haven't joined a transaction running right now.
1148          *
1149          * Later on, someone is sure to update the inode and get the
1150          * real transid recorded.
1151          *
1152          * We set last_trans now to the fs_info generation + 1,
1153          * this will either be one more than the running transaction
1154          * or the generation used for the next transaction if there isn't
1155          * one running right now.
1156          */
1157         BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1158         if (num_written > 0 || num_written == -EIOCBQUEUED) {
1159                 err = generic_write_sync(file, pos, num_written);
1160                 if (err < 0 && num_written > 0)
1161                         num_written = err;
1162         }
1163 out:
1164         current->backing_dev_info = NULL;
1165         return num_written ? num_written : err;
1166 }
1167
1168 int btrfs_release_file(struct inode *inode, struct file *filp)
1169 {
1170         /*
1171          * ordered_data_close is set by settattr when we are about to truncate
1172          * a file from a non-zero size to a zero size.  This tries to
1173          * flush down new bytes that may have been written if the
1174          * application were using truncate to replace a file in place.
1175          */
1176         if (BTRFS_I(inode)->ordered_data_close) {
1177                 BTRFS_I(inode)->ordered_data_close = 0;
1178                 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode);
1179                 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1180                         filemap_flush(inode->i_mapping);
1181         }
1182         if (filp->private_data)
1183                 btrfs_ioctl_trans_end(filp);
1184         return 0;
1185 }
1186
1187 /*
1188  * fsync call for both files and directories.  This logs the inode into
1189  * the tree log instead of forcing full commits whenever possible.
1190  *
1191  * It needs to call filemap_fdatawait so that all ordered extent updates are
1192  * in the metadata btree are up to date for copying to the log.
1193  *
1194  * It drops the inode mutex before doing the tree log commit.  This is an
1195  * important optimization for directories because holding the mutex prevents
1196  * new operations on the dir while we write to disk.
1197  */
1198 int btrfs_sync_file(struct file *file, int datasync)
1199 {
1200         struct dentry *dentry = file->f_path.dentry;
1201         struct inode *inode = dentry->d_inode;
1202         struct btrfs_root *root = BTRFS_I(inode)->root;
1203         int ret = 0;
1204         struct btrfs_trans_handle *trans;
1205
1206         trace_btrfs_sync_file(file, datasync);
1207
1208         /* we wait first, since the writeback may change the inode */
1209         root->log_batch++;
1210         /* the VFS called filemap_fdatawrite for us */
1211         btrfs_wait_ordered_range(inode, 0, (u64)-1);
1212         root->log_batch++;
1213
1214         /*
1215          * check the transaction that last modified this inode
1216          * and see if its already been committed
1217          */
1218         if (!BTRFS_I(inode)->last_trans)
1219                 goto out;
1220
1221         /*
1222          * if the last transaction that changed this file was before
1223          * the current transaction, we can bail out now without any
1224          * syncing
1225          */
1226         mutex_lock(&root->fs_info->trans_mutex);
1227         if (BTRFS_I(inode)->last_trans <=
1228             root->fs_info->last_trans_committed) {
1229                 BTRFS_I(inode)->last_trans = 0;
1230                 mutex_unlock(&root->fs_info->trans_mutex);
1231                 goto out;
1232         }
1233         mutex_unlock(&root->fs_info->trans_mutex);
1234
1235         /*
1236          * ok we haven't committed the transaction yet, lets do a commit
1237          */
1238         if (file->private_data)
1239                 btrfs_ioctl_trans_end(file);
1240
1241         trans = btrfs_start_transaction(root, 0);
1242         if (IS_ERR(trans)) {
1243                 ret = PTR_ERR(trans);
1244                 goto out;
1245         }
1246
1247         ret = btrfs_log_dentry_safe(trans, root, dentry);
1248         if (ret < 0)
1249                 goto out;
1250
1251         /* we've logged all the items and now have a consistent
1252          * version of the file in the log.  It is possible that
1253          * someone will come in and modify the file, but that's
1254          * fine because the log is consistent on disk, and we
1255          * have references to all of the file's extents
1256          *
1257          * It is possible that someone will come in and log the
1258          * file again, but that will end up using the synchronization
1259          * inside btrfs_sync_log to keep things safe.
1260          */
1261         mutex_unlock(&dentry->d_inode->i_mutex);
1262
1263         if (ret != BTRFS_NO_LOG_SYNC) {
1264                 if (ret > 0) {
1265                         ret = btrfs_commit_transaction(trans, root);
1266                 } else {
1267                         ret = btrfs_sync_log(trans, root);
1268                         if (ret == 0)
1269                                 ret = btrfs_end_transaction(trans, root);
1270                         else
1271                                 ret = btrfs_commit_transaction(trans, root);
1272                 }
1273         } else {
1274                 ret = btrfs_end_transaction(trans, root);
1275         }
1276         mutex_lock(&dentry->d_inode->i_mutex);
1277 out:
1278         return ret > 0 ? -EIO : ret;
1279 }
1280
1281 static const struct vm_operations_struct btrfs_file_vm_ops = {
1282         .fault          = filemap_fault,
1283         .page_mkwrite   = btrfs_page_mkwrite,
1284 };
1285
1286 static int btrfs_file_mmap(struct file  *filp, struct vm_area_struct *vma)
1287 {
1288         struct address_space *mapping = filp->f_mapping;
1289
1290         if (!mapping->a_ops->readpage)
1291                 return -ENOEXEC;
1292
1293         file_accessed(filp);
1294         vma->vm_ops = &btrfs_file_vm_ops;
1295         vma->vm_flags |= VM_CAN_NONLINEAR;
1296
1297         return 0;
1298 }
1299
1300 static long btrfs_fallocate(struct file *file, int mode,
1301                             loff_t offset, loff_t len)
1302 {
1303         struct inode *inode = file->f_path.dentry->d_inode;
1304         struct extent_state *cached_state = NULL;
1305         u64 cur_offset;
1306         u64 last_byte;
1307         u64 alloc_start;
1308         u64 alloc_end;
1309         u64 alloc_hint = 0;
1310         u64 locked_end;
1311         u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
1312         struct extent_map *em;
1313         int ret;
1314
1315         alloc_start = offset & ~mask;
1316         alloc_end =  (offset + len + mask) & ~mask;
1317
1318         /* We only support the FALLOC_FL_KEEP_SIZE mode */
1319         if (mode & ~FALLOC_FL_KEEP_SIZE)
1320                 return -EOPNOTSUPP;
1321
1322         /*
1323          * wait for ordered IO before we have any locks.  We'll loop again
1324          * below with the locks held.
1325          */
1326         btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
1327
1328         mutex_lock(&inode->i_mutex);
1329         ret = inode_newsize_ok(inode, alloc_end);
1330         if (ret)
1331                 goto out;
1332
1333         if (alloc_start > inode->i_size) {
1334                 ret = btrfs_cont_expand(inode, i_size_read(inode),
1335                                         alloc_start);
1336                 if (ret)
1337                         goto out;
1338         }
1339
1340         ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
1341         if (ret)
1342                 goto out;
1343
1344         locked_end = alloc_end - 1;
1345         while (1) {
1346                 struct btrfs_ordered_extent *ordered;
1347
1348                 /* the extent lock is ordered inside the running
1349                  * transaction
1350                  */
1351                 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
1352                                  locked_end, 0, &cached_state, GFP_NOFS);
1353                 ordered = btrfs_lookup_first_ordered_extent(inode,
1354                                                             alloc_end - 1);
1355                 if (ordered &&
1356                     ordered->file_offset + ordered->len > alloc_start &&
1357                     ordered->file_offset < alloc_end) {
1358                         btrfs_put_ordered_extent(ordered);
1359                         unlock_extent_cached(&BTRFS_I(inode)->io_tree,
1360                                              alloc_start, locked_end,
1361                                              &cached_state, GFP_NOFS);
1362                         /*
1363                          * we can't wait on the range with the transaction
1364                          * running or with the extent lock held
1365                          */
1366                         btrfs_wait_ordered_range(inode, alloc_start,
1367                                                  alloc_end - alloc_start);
1368                 } else {
1369                         if (ordered)
1370                                 btrfs_put_ordered_extent(ordered);
1371                         break;
1372                 }
1373         }
1374
1375         cur_offset = alloc_start;
1376         while (1) {
1377                 em = btrfs_get_extent(inode, NULL, 0, cur_offset,
1378                                       alloc_end - cur_offset, 0);
1379                 BUG_ON(IS_ERR(em) || !em);
1380                 last_byte = min(extent_map_end(em), alloc_end);
1381                 last_byte = (last_byte + mask) & ~mask;
1382                 if (em->block_start == EXTENT_MAP_HOLE ||
1383                     (cur_offset >= inode->i_size &&
1384                      !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
1385                         ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
1386                                                         last_byte - cur_offset,
1387                                                         1 << inode->i_blkbits,
1388                                                         offset + len,
1389                                                         &alloc_hint);
1390                         if (ret < 0) {
1391                                 free_extent_map(em);
1392                                 break;
1393                         }
1394                 }
1395                 free_extent_map(em);
1396
1397                 cur_offset = last_byte;
1398                 if (cur_offset >= alloc_end) {
1399                         ret = 0;
1400                         break;
1401                 }
1402         }
1403         unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
1404                              &cached_state, GFP_NOFS);
1405
1406         btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
1407 out:
1408         mutex_unlock(&inode->i_mutex);
1409         return ret;
1410 }
1411
1412 const struct file_operations btrfs_file_operations = {
1413         .llseek         = generic_file_llseek,
1414         .read           = do_sync_read,
1415         .write          = do_sync_write,
1416         .aio_read       = generic_file_aio_read,
1417         .splice_read    = generic_file_splice_read,
1418         .aio_write      = btrfs_file_aio_write,
1419         .mmap           = btrfs_file_mmap,
1420         .open           = generic_file_open,
1421         .release        = btrfs_release_file,
1422         .fsync          = btrfs_sync_file,
1423         .fallocate      = btrfs_fallocate,
1424         .unlocked_ioctl = btrfs_ioctl,
1425 #ifdef CONFIG_COMPAT
1426         .compat_ioctl   = btrfs_ioctl,
1427 #endif
1428 };