Btrfs: break up btrfs_search_slot into smaller pieces
[pandora-kernel.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include <linux/sort.h>
23 #include <linux/rcupdate.h>
24 #include "compat.h"
25 #include "hash.h"
26 #include "crc32c.h"
27 #include "ctree.h"
28 #include "disk-io.h"
29 #include "print-tree.h"
30 #include "transaction.h"
31 #include "volumes.h"
32 #include "locking.h"
33 #include "ref-cache.h"
34
35 #define PENDING_EXTENT_INSERT 0
36 #define PENDING_EXTENT_DELETE 1
37 #define PENDING_BACKREF_UPDATE 2
38
39 struct pending_extent_op {
40         int type;
41         u64 bytenr;
42         u64 num_bytes;
43         u64 parent;
44         u64 orig_parent;
45         u64 generation;
46         u64 orig_generation;
47         int level;
48         struct list_head list;
49         int del;
50 };
51
52 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
53                                          struct btrfs_root *root, u64 parent,
54                                          u64 root_objectid, u64 ref_generation,
55                                          u64 owner, struct btrfs_key *ins,
56                                          int ref_mod);
57 static int update_reserved_extents(struct btrfs_root *root,
58                                    u64 bytenr, u64 num, int reserve);
59 static int update_block_group(struct btrfs_trans_handle *trans,
60                               struct btrfs_root *root,
61                               u64 bytenr, u64 num_bytes, int alloc,
62                               int mark_free);
63 static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans,
64                                         struct btrfs_root *root,
65                                         u64 bytenr, u64 num_bytes, u64 parent,
66                                         u64 root_objectid, u64 ref_generation,
67                                         u64 owner_objectid, int pin,
68                                         int ref_to_drop);
69
70 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
71                           struct btrfs_root *extent_root, u64 alloc_bytes,
72                           u64 flags, int force);
73
74 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
75 {
76         return (cache->flags & bits) == bits;
77 }
78
79 /*
80  * this adds the block group to the fs_info rb tree for the block group
81  * cache
82  */
83 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
84                                 struct btrfs_block_group_cache *block_group)
85 {
86         struct rb_node **p;
87         struct rb_node *parent = NULL;
88         struct btrfs_block_group_cache *cache;
89
90         spin_lock(&info->block_group_cache_lock);
91         p = &info->block_group_cache_tree.rb_node;
92
93         while (*p) {
94                 parent = *p;
95                 cache = rb_entry(parent, struct btrfs_block_group_cache,
96                                  cache_node);
97                 if (block_group->key.objectid < cache->key.objectid) {
98                         p = &(*p)->rb_left;
99                 } else if (block_group->key.objectid > cache->key.objectid) {
100                         p = &(*p)->rb_right;
101                 } else {
102                         spin_unlock(&info->block_group_cache_lock);
103                         return -EEXIST;
104                 }
105         }
106
107         rb_link_node(&block_group->cache_node, parent, p);
108         rb_insert_color(&block_group->cache_node,
109                         &info->block_group_cache_tree);
110         spin_unlock(&info->block_group_cache_lock);
111
112         return 0;
113 }
114
115 /*
116  * This will return the block group at or after bytenr if contains is 0, else
117  * it will return the block group that contains the bytenr
118  */
119 static struct btrfs_block_group_cache *
120 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
121                               int contains)
122 {
123         struct btrfs_block_group_cache *cache, *ret = NULL;
124         struct rb_node *n;
125         u64 end, start;
126
127         spin_lock(&info->block_group_cache_lock);
128         n = info->block_group_cache_tree.rb_node;
129
130         while (n) {
131                 cache = rb_entry(n, struct btrfs_block_group_cache,
132                                  cache_node);
133                 end = cache->key.objectid + cache->key.offset - 1;
134                 start = cache->key.objectid;
135
136                 if (bytenr < start) {
137                         if (!contains && (!ret || start < ret->key.objectid))
138                                 ret = cache;
139                         n = n->rb_left;
140                 } else if (bytenr > start) {
141                         if (contains && bytenr <= end) {
142                                 ret = cache;
143                                 break;
144                         }
145                         n = n->rb_right;
146                 } else {
147                         ret = cache;
148                         break;
149                 }
150         }
151         if (ret)
152                 atomic_inc(&ret->count);
153         spin_unlock(&info->block_group_cache_lock);
154
155         return ret;
156 }
157
158 /*
159  * this is only called by cache_block_group, since we could have freed extents
160  * we need to check the pinned_extents for any extents that can't be used yet
161  * since their free space will be released as soon as the transaction commits.
162  */
163 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
164                               struct btrfs_fs_info *info, u64 start, u64 end)
165 {
166         u64 extent_start, extent_end, size;
167         int ret;
168
169         while (start < end) {
170                 ret = find_first_extent_bit(&info->pinned_extents, start,
171                                             &extent_start, &extent_end,
172                                             EXTENT_DIRTY);
173                 if (ret)
174                         break;
175
176                 if (extent_start == start) {
177                         start = extent_end + 1;
178                 } else if (extent_start > start && extent_start < end) {
179                         size = extent_start - start;
180                         ret = btrfs_add_free_space(block_group, start,
181                                                    size);
182                         BUG_ON(ret);
183                         start = extent_end + 1;
184                 } else {
185                         break;
186                 }
187         }
188
189         if (start < end) {
190                 size = end - start;
191                 ret = btrfs_add_free_space(block_group, start, size);
192                 BUG_ON(ret);
193         }
194
195         return 0;
196 }
197
198 static int remove_sb_from_cache(struct btrfs_root *root,
199                                 struct btrfs_block_group_cache *cache)
200 {
201         u64 bytenr;
202         u64 *logical;
203         int stripe_len;
204         int i, nr, ret;
205
206         for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
207                 bytenr = btrfs_sb_offset(i);
208                 ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
209                                        cache->key.objectid, bytenr, 0,
210                                        &logical, &nr, &stripe_len);
211                 BUG_ON(ret);
212                 while (nr--) {
213                         btrfs_remove_free_space(cache, logical[nr],
214                                                 stripe_len);
215                 }
216                 kfree(logical);
217         }
218         return 0;
219 }
220
221 static int cache_block_group(struct btrfs_root *root,
222                              struct btrfs_block_group_cache *block_group)
223 {
224         struct btrfs_path *path;
225         int ret = 0;
226         struct btrfs_key key;
227         struct extent_buffer *leaf;
228         int slot;
229         u64 last;
230
231         if (!block_group)
232                 return 0;
233
234         root = root->fs_info->extent_root;
235
236         if (block_group->cached)
237                 return 0;
238
239         path = btrfs_alloc_path();
240         if (!path)
241                 return -ENOMEM;
242
243         path->reada = 2;
244         /*
245          * we get into deadlocks with paths held by callers of this function.
246          * since the alloc_mutex is protecting things right now, just
247          * skip the locking here
248          */
249         path->skip_locking = 1;
250         last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
251         key.objectid = last;
252         key.offset = 0;
253         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
254         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
255         if (ret < 0)
256                 goto err;
257
258         while (1) {
259                 leaf = path->nodes[0];
260                 slot = path->slots[0];
261                 if (slot >= btrfs_header_nritems(leaf)) {
262                         ret = btrfs_next_leaf(root, path);
263                         if (ret < 0)
264                                 goto err;
265                         if (ret == 0)
266                                 continue;
267                         else
268                                 break;
269                 }
270                 btrfs_item_key_to_cpu(leaf, &key, slot);
271                 if (key.objectid < block_group->key.objectid)
272                         goto next;
273
274                 if (key.objectid >= block_group->key.objectid +
275                     block_group->key.offset)
276                         break;
277
278                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
279                         add_new_free_space(block_group, root->fs_info, last,
280                                            key.objectid);
281
282                         last = key.objectid + key.offset;
283                 }
284 next:
285                 path->slots[0]++;
286         }
287
288         add_new_free_space(block_group, root->fs_info, last,
289                            block_group->key.objectid +
290                            block_group->key.offset);
291
292         block_group->cached = 1;
293         remove_sb_from_cache(root, block_group);
294         ret = 0;
295 err:
296         btrfs_free_path(path);
297         return ret;
298 }
299
300 /*
301  * return the block group that starts at or after bytenr
302  */
303 static struct btrfs_block_group_cache *
304 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr)
305 {
306         struct btrfs_block_group_cache *cache;
307
308         cache = block_group_cache_tree_search(info, bytenr, 0);
309
310         return cache;
311 }
312
313 /*
314  * return the block group that contains teh given bytenr
315  */
316 struct btrfs_block_group_cache *btrfs_lookup_block_group(
317                                                  struct btrfs_fs_info *info,
318                                                  u64 bytenr)
319 {
320         struct btrfs_block_group_cache *cache;
321
322         cache = block_group_cache_tree_search(info, bytenr, 1);
323
324         return cache;
325 }
326
327 static inline void put_block_group(struct btrfs_block_group_cache *cache)
328 {
329         if (atomic_dec_and_test(&cache->count))
330                 kfree(cache);
331 }
332
333 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
334                                                   u64 flags)
335 {
336         struct list_head *head = &info->space_info;
337         struct btrfs_space_info *found;
338
339         rcu_read_lock();
340         list_for_each_entry_rcu(found, head, list) {
341                 if (found->flags == flags) {
342                         rcu_read_unlock();
343                         return found;
344                 }
345         }
346         rcu_read_unlock();
347         return NULL;
348 }
349
350 /*
351  * after adding space to the filesystem, we need to clear the full flags
352  * on all the space infos.
353  */
354 void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
355 {
356         struct list_head *head = &info->space_info;
357         struct btrfs_space_info *found;
358
359         rcu_read_lock();
360         list_for_each_entry_rcu(found, head, list)
361                 found->full = 0;
362         rcu_read_unlock();
363 }
364
365 static u64 div_factor(u64 num, int factor)
366 {
367         if (factor == 10)
368                 return num;
369         num *= factor;
370         do_div(num, 10);
371         return num;
372 }
373
374 u64 btrfs_find_block_group(struct btrfs_root *root,
375                            u64 search_start, u64 search_hint, int owner)
376 {
377         struct btrfs_block_group_cache *cache;
378         u64 used;
379         u64 last = max(search_hint, search_start);
380         u64 group_start = 0;
381         int full_search = 0;
382         int factor = 9;
383         int wrapped = 0;
384 again:
385         while (1) {
386                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
387                 if (!cache)
388                         break;
389
390                 spin_lock(&cache->lock);
391                 last = cache->key.objectid + cache->key.offset;
392                 used = btrfs_block_group_used(&cache->item);
393
394                 if ((full_search || !cache->ro) &&
395                     block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) {
396                         if (used + cache->pinned + cache->reserved <
397                             div_factor(cache->key.offset, factor)) {
398                                 group_start = cache->key.objectid;
399                                 spin_unlock(&cache->lock);
400                                 put_block_group(cache);
401                                 goto found;
402                         }
403                 }
404                 spin_unlock(&cache->lock);
405                 put_block_group(cache);
406                 cond_resched();
407         }
408         if (!wrapped) {
409                 last = search_start;
410                 wrapped = 1;
411                 goto again;
412         }
413         if (!full_search && factor < 10) {
414                 last = search_start;
415                 full_search = 1;
416                 factor = 10;
417                 goto again;
418         }
419 found:
420         return group_start;
421 }
422
423 /* simple helper to search for an existing extent at a given offset */
424 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
425 {
426         int ret;
427         struct btrfs_key key;
428         struct btrfs_path *path;
429
430         path = btrfs_alloc_path();
431         BUG_ON(!path);
432         key.objectid = start;
433         key.offset = len;
434         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
435         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
436                                 0, 0);
437         btrfs_free_path(path);
438         return ret;
439 }
440
441 /*
442  * Back reference rules.  Back refs have three main goals:
443  *
444  * 1) differentiate between all holders of references to an extent so that
445  *    when a reference is dropped we can make sure it was a valid reference
446  *    before freeing the extent.
447  *
448  * 2) Provide enough information to quickly find the holders of an extent
449  *    if we notice a given block is corrupted or bad.
450  *
451  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
452  *    maintenance.  This is actually the same as #2, but with a slightly
453  *    different use case.
454  *
455  * File extents can be referenced by:
456  *
457  * - multiple snapshots, subvolumes, or different generations in one subvol
458  * - different files inside a single subvolume
459  * - different offsets inside a file (bookend extents in file.c)
460  *
461  * The extent ref structure has fields for:
462  *
463  * - Objectid of the subvolume root
464  * - Generation number of the tree holding the reference
465  * - objectid of the file holding the reference
466  * - number of references holding by parent node (alway 1 for tree blocks)
467  *
468  * Btree leaf may hold multiple references to a file extent. In most cases,
469  * these references are from same file and the corresponding offsets inside
470  * the file are close together.
471  *
472  * When a file extent is allocated the fields are filled in:
473  *     (root_key.objectid, trans->transid, inode objectid, 1)
474  *
475  * When a leaf is cow'd new references are added for every file extent found
476  * in the leaf.  It looks similar to the create case, but trans->transid will
477  * be different when the block is cow'd.
478  *
479  *     (root_key.objectid, trans->transid, inode objectid,
480  *      number of references in the leaf)
481  *
482  * When a file extent is removed either during snapshot deletion or
483  * file truncation, we find the corresponding back reference and check
484  * the following fields:
485  *
486  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
487  *      inode objectid)
488  *
489  * Btree extents can be referenced by:
490  *
491  * - Different subvolumes
492  * - Different generations of the same subvolume
493  *
494  * When a tree block is created, back references are inserted:
495  *
496  * (root->root_key.objectid, trans->transid, level, 1)
497  *
498  * When a tree block is cow'd, new back references are added for all the
499  * blocks it points to. If the tree block isn't in reference counted root,
500  * the old back references are removed. These new back references are of
501  * the form (trans->transid will have increased since creation):
502  *
503  * (root->root_key.objectid, trans->transid, level, 1)
504  *
505  * When a backref is in deleting, the following fields are checked:
506  *
507  * if backref was for a tree root:
508  *     (btrfs_header_owner(itself), btrfs_header_generation(itself), level)
509  * else
510  *     (btrfs_header_owner(parent), btrfs_header_generation(parent), level)
511  *
512  * Back Reference Key composing:
513  *
514  * The key objectid corresponds to the first byte in the extent, the key
515  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
516  * byte of parent extent. If a extent is tree root, the key offset is set
517  * to the key objectid.
518  */
519
520 static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans,
521                                           struct btrfs_root *root,
522                                           struct btrfs_path *path,
523                                           u64 bytenr, u64 parent,
524                                           u64 ref_root, u64 ref_generation,
525                                           u64 owner_objectid, int del)
526 {
527         struct btrfs_key key;
528         struct btrfs_extent_ref *ref;
529         struct extent_buffer *leaf;
530         u64 ref_objectid;
531         int ret;
532
533         key.objectid = bytenr;
534         key.type = BTRFS_EXTENT_REF_KEY;
535         key.offset = parent;
536
537         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
538         if (ret < 0)
539                 goto out;
540         if (ret > 0) {
541                 ret = -ENOENT;
542                 goto out;
543         }
544
545         leaf = path->nodes[0];
546         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
547         ref_objectid = btrfs_ref_objectid(leaf, ref);
548         if (btrfs_ref_root(leaf, ref) != ref_root ||
549             btrfs_ref_generation(leaf, ref) != ref_generation ||
550             (ref_objectid != owner_objectid &&
551              ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) {
552                 ret = -EIO;
553                 WARN_ON(1);
554                 goto out;
555         }
556         ret = 0;
557 out:
558         return ret;
559 }
560
561 static noinline int insert_extent_backref(struct btrfs_trans_handle *trans,
562                                           struct btrfs_root *root,
563                                           struct btrfs_path *path,
564                                           u64 bytenr, u64 parent,
565                                           u64 ref_root, u64 ref_generation,
566                                           u64 owner_objectid,
567                                           int refs_to_add)
568 {
569         struct btrfs_key key;
570         struct extent_buffer *leaf;
571         struct btrfs_extent_ref *ref;
572         u32 num_refs;
573         int ret;
574
575         key.objectid = bytenr;
576         key.type = BTRFS_EXTENT_REF_KEY;
577         key.offset = parent;
578
579         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
580         if (ret == 0) {
581                 leaf = path->nodes[0];
582                 ref = btrfs_item_ptr(leaf, path->slots[0],
583                                      struct btrfs_extent_ref);
584                 btrfs_set_ref_root(leaf, ref, ref_root);
585                 btrfs_set_ref_generation(leaf, ref, ref_generation);
586                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
587                 btrfs_set_ref_num_refs(leaf, ref, refs_to_add);
588         } else if (ret == -EEXIST) {
589                 u64 existing_owner;
590
591                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
592                 leaf = path->nodes[0];
593                 ref = btrfs_item_ptr(leaf, path->slots[0],
594                                      struct btrfs_extent_ref);
595                 if (btrfs_ref_root(leaf, ref) != ref_root ||
596                     btrfs_ref_generation(leaf, ref) != ref_generation) {
597                         ret = -EIO;
598                         WARN_ON(1);
599                         goto out;
600                 }
601
602                 num_refs = btrfs_ref_num_refs(leaf, ref);
603                 BUG_ON(num_refs == 0);
604                 btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add);
605
606                 existing_owner = btrfs_ref_objectid(leaf, ref);
607                 if (existing_owner != owner_objectid &&
608                     existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
609                         btrfs_set_ref_objectid(leaf, ref,
610                                         BTRFS_MULTIPLE_OBJECTIDS);
611                 }
612                 ret = 0;
613         } else {
614                 goto out;
615         }
616         btrfs_unlock_up_safe(path, 1);
617         btrfs_mark_buffer_dirty(path->nodes[0]);
618 out:
619         btrfs_release_path(root, path);
620         return ret;
621 }
622
623 static noinline int remove_extent_backref(struct btrfs_trans_handle *trans,
624                                           struct btrfs_root *root,
625                                           struct btrfs_path *path,
626                                           int refs_to_drop)
627 {
628         struct extent_buffer *leaf;
629         struct btrfs_extent_ref *ref;
630         u32 num_refs;
631         int ret = 0;
632
633         leaf = path->nodes[0];
634         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
635         num_refs = btrfs_ref_num_refs(leaf, ref);
636         BUG_ON(num_refs < refs_to_drop);
637         num_refs -= refs_to_drop;
638         if (num_refs == 0) {
639                 ret = btrfs_del_item(trans, root, path);
640         } else {
641                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
642                 btrfs_mark_buffer_dirty(leaf);
643         }
644         btrfs_release_path(root, path);
645         return ret;
646 }
647
648 #ifdef BIO_RW_DISCARD
649 static void btrfs_issue_discard(struct block_device *bdev,
650                                 u64 start, u64 len)
651 {
652         blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
653 }
654 #endif
655
656 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
657                                 u64 num_bytes)
658 {
659 #ifdef BIO_RW_DISCARD
660         int ret;
661         u64 map_length = num_bytes;
662         struct btrfs_multi_bio *multi = NULL;
663
664         /* Tell the block device(s) that the sectors can be discarded */
665         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
666                               bytenr, &map_length, &multi, 0);
667         if (!ret) {
668                 struct btrfs_bio_stripe *stripe = multi->stripes;
669                 int i;
670
671                 if (map_length > num_bytes)
672                         map_length = num_bytes;
673
674                 for (i = 0; i < multi->num_stripes; i++, stripe++) {
675                         btrfs_issue_discard(stripe->dev->bdev,
676                                             stripe->physical,
677                                             map_length);
678                 }
679                 kfree(multi);
680         }
681
682         return ret;
683 #else
684         return 0;
685 #endif
686 }
687
688 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
689                                      struct btrfs_root *root, u64 bytenr,
690                                      u64 num_bytes,
691                                      u64 orig_parent, u64 parent,
692                                      u64 orig_root, u64 ref_root,
693                                      u64 orig_generation, u64 ref_generation,
694                                      u64 owner_objectid)
695 {
696         int ret;
697         int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID;
698
699         ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes,
700                                        orig_parent, parent, orig_root,
701                                        ref_root, orig_generation,
702                                        ref_generation, owner_objectid, pin);
703         BUG_ON(ret);
704         return ret;
705 }
706
707 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
708                             struct btrfs_root *root, u64 bytenr,
709                             u64 num_bytes, u64 orig_parent, u64 parent,
710                             u64 ref_root, u64 ref_generation,
711                             u64 owner_objectid)
712 {
713         int ret;
714         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
715             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
716                 return 0;
717
718         ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes,
719                                         orig_parent, parent, ref_root,
720                                         ref_root, ref_generation,
721                                         ref_generation, owner_objectid);
722         return ret;
723 }
724 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
725                                   struct btrfs_root *root, u64 bytenr,
726                                   u64 num_bytes,
727                                   u64 orig_parent, u64 parent,
728                                   u64 orig_root, u64 ref_root,
729                                   u64 orig_generation, u64 ref_generation,
730                                   u64 owner_objectid)
731 {
732         int ret;
733
734         ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root,
735                                     ref_generation, owner_objectid,
736                                     BTRFS_ADD_DELAYED_REF, 0);
737         BUG_ON(ret);
738         return ret;
739 }
740
741 static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans,
742                           struct btrfs_root *root, u64 bytenr,
743                           u64 num_bytes, u64 parent, u64 ref_root,
744                           u64 ref_generation, u64 owner_objectid,
745                           int refs_to_add)
746 {
747         struct btrfs_path *path;
748         int ret;
749         struct btrfs_key key;
750         struct extent_buffer *l;
751         struct btrfs_extent_item *item;
752         u32 refs;
753
754         path = btrfs_alloc_path();
755         if (!path)
756                 return -ENOMEM;
757
758         path->reada = 1;
759         path->leave_spinning = 1;
760         key.objectid = bytenr;
761         key.type = BTRFS_EXTENT_ITEM_KEY;
762         key.offset = num_bytes;
763
764         /* first find the extent item and update its reference count */
765         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
766                                 path, 0, 1);
767         if (ret < 0) {
768                 btrfs_set_path_blocking(path);
769                 return ret;
770         }
771
772         if (ret > 0) {
773                 WARN_ON(1);
774                 btrfs_free_path(path);
775                 return -EIO;
776         }
777         l = path->nodes[0];
778
779         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
780         if (key.objectid != bytenr) {
781                 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]);
782                 printk(KERN_ERR "btrfs wanted %llu found %llu\n",
783                        (unsigned long long)bytenr,
784                        (unsigned long long)key.objectid);
785                 BUG();
786         }
787         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
788
789         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
790
791         refs = btrfs_extent_refs(l, item);
792         btrfs_set_extent_refs(l, item, refs + refs_to_add);
793         btrfs_unlock_up_safe(path, 1);
794
795         btrfs_mark_buffer_dirty(path->nodes[0]);
796
797         btrfs_release_path(root->fs_info->extent_root, path);
798
799         path->reada = 1;
800         path->leave_spinning = 1;
801
802         /* now insert the actual backref */
803         ret = insert_extent_backref(trans, root->fs_info->extent_root,
804                                     path, bytenr, parent,
805                                     ref_root, ref_generation,
806                                     owner_objectid, refs_to_add);
807         BUG_ON(ret);
808         btrfs_free_path(path);
809         return 0;
810 }
811
812 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
813                          struct btrfs_root *root,
814                          u64 bytenr, u64 num_bytes, u64 parent,
815                          u64 ref_root, u64 ref_generation,
816                          u64 owner_objectid)
817 {
818         int ret;
819         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
820             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
821                 return 0;
822
823         ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent,
824                                      0, ref_root, 0, ref_generation,
825                                      owner_objectid);
826         return ret;
827 }
828
829 static int drop_delayed_ref(struct btrfs_trans_handle *trans,
830                                         struct btrfs_root *root,
831                                         struct btrfs_delayed_ref_node *node)
832 {
833         int ret = 0;
834         struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node);
835
836         BUG_ON(node->ref_mod == 0);
837         ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes,
838                                   node->parent, ref->root, ref->generation,
839                                   ref->owner_objectid, ref->pin, node->ref_mod);
840
841         return ret;
842 }
843
844 /* helper function to actually process a single delayed ref entry */
845 static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans,
846                                         struct btrfs_root *root,
847                                         struct btrfs_delayed_ref_node *node,
848                                         int insert_reserved)
849 {
850         int ret;
851         struct btrfs_delayed_ref *ref;
852
853         if (node->parent == (u64)-1) {
854                 struct btrfs_delayed_ref_head *head;
855                 /*
856                  * we've hit the end of the chain and we were supposed
857                  * to insert this extent into the tree.  But, it got
858                  * deleted before we ever needed to insert it, so all
859                  * we have to do is clean up the accounting
860                  */
861                 if (insert_reserved) {
862                         update_reserved_extents(root, node->bytenr,
863                                                 node->num_bytes, 0);
864                 }
865                 head = btrfs_delayed_node_to_head(node);
866                 mutex_unlock(&head->mutex);
867                 return 0;
868         }
869
870         ref = btrfs_delayed_node_to_ref(node);
871         if (ref->action == BTRFS_ADD_DELAYED_REF) {
872                 if (insert_reserved) {
873                         struct btrfs_key ins;
874
875                         ins.objectid = node->bytenr;
876                         ins.offset = node->num_bytes;
877                         ins.type = BTRFS_EXTENT_ITEM_KEY;
878
879                         /* record the full extent allocation */
880                         ret = __btrfs_alloc_reserved_extent(trans, root,
881                                         node->parent, ref->root,
882                                         ref->generation, ref->owner_objectid,
883                                         &ins, node->ref_mod);
884                         update_reserved_extents(root, node->bytenr,
885                                                 node->num_bytes, 0);
886                 } else {
887                         /* just add one backref */
888                         ret = add_extent_ref(trans, root, node->bytenr,
889                                      node->num_bytes,
890                                      node->parent, ref->root, ref->generation,
891                                      ref->owner_objectid, node->ref_mod);
892                 }
893                 BUG_ON(ret);
894         } else if (ref->action == BTRFS_DROP_DELAYED_REF) {
895                 WARN_ON(insert_reserved);
896                 ret = drop_delayed_ref(trans, root, node);
897         }
898         return 0;
899 }
900
901 static noinline struct btrfs_delayed_ref_node *
902 select_delayed_ref(struct btrfs_delayed_ref_head *head)
903 {
904         struct rb_node *node;
905         struct btrfs_delayed_ref_node *ref;
906         int action = BTRFS_ADD_DELAYED_REF;
907 again:
908         /*
909          * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
910          * this prevents ref count from going down to zero when
911          * there still are pending delayed ref.
912          */
913         node = rb_prev(&head->node.rb_node);
914         while (1) {
915                 if (!node)
916                         break;
917                 ref = rb_entry(node, struct btrfs_delayed_ref_node,
918                                 rb_node);
919                 if (ref->bytenr != head->node.bytenr)
920                         break;
921                 if (btrfs_delayed_node_to_ref(ref)->action == action)
922                         return ref;
923                 node = rb_prev(node);
924         }
925         if (action == BTRFS_ADD_DELAYED_REF) {
926                 action = BTRFS_DROP_DELAYED_REF;
927                 goto again;
928         }
929         return NULL;
930 }
931
932 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
933                                        struct btrfs_root *root,
934                                        struct list_head *cluster)
935 {
936         struct btrfs_delayed_ref_root *delayed_refs;
937         struct btrfs_delayed_ref_node *ref;
938         struct btrfs_delayed_ref_head *locked_ref = NULL;
939         int ret;
940         int count = 0;
941         int must_insert_reserved = 0;
942
943         delayed_refs = &trans->transaction->delayed_refs;
944         while (1) {
945                 if (!locked_ref) {
946                         /* pick a new head ref from the cluster list */
947                         if (list_empty(cluster))
948                                 break;
949
950                         locked_ref = list_entry(cluster->next,
951                                      struct btrfs_delayed_ref_head, cluster);
952
953                         /* grab the lock that says we are going to process
954                          * all the refs for this head */
955                         ret = btrfs_delayed_ref_lock(trans, locked_ref);
956
957                         /*
958                          * we may have dropped the spin lock to get the head
959                          * mutex lock, and that might have given someone else
960                          * time to free the head.  If that's true, it has been
961                          * removed from our list and we can move on.
962                          */
963                         if (ret == -EAGAIN) {
964                                 locked_ref = NULL;
965                                 count++;
966                                 continue;
967                         }
968                 }
969
970                 /*
971                  * record the must insert reserved flag before we
972                  * drop the spin lock.
973                  */
974                 must_insert_reserved = locked_ref->must_insert_reserved;
975                 locked_ref->must_insert_reserved = 0;
976
977                 /*
978                  * locked_ref is the head node, so we have to go one
979                  * node back for any delayed ref updates
980                  */
981                 ref = select_delayed_ref(locked_ref);
982                 if (!ref) {
983                         /* All delayed refs have been processed, Go ahead
984                          * and send the head node to run_one_delayed_ref,
985                          * so that any accounting fixes can happen
986                          */
987                         ref = &locked_ref->node;
988                         list_del_init(&locked_ref->cluster);
989                         locked_ref = NULL;
990                 }
991
992                 ref->in_tree = 0;
993                 rb_erase(&ref->rb_node, &delayed_refs->root);
994                 delayed_refs->num_entries--;
995                 spin_unlock(&delayed_refs->lock);
996
997                 ret = run_one_delayed_ref(trans, root, ref,
998                                           must_insert_reserved);
999                 BUG_ON(ret);
1000                 btrfs_put_delayed_ref(ref);
1001
1002                 count++;
1003                 cond_resched();
1004                 spin_lock(&delayed_refs->lock);
1005         }
1006         return count;
1007 }
1008
1009 /*
1010  * this starts processing the delayed reference count updates and
1011  * extent insertions we have queued up so far.  count can be
1012  * 0, which means to process everything in the tree at the start
1013  * of the run (but not newly added entries), or it can be some target
1014  * number you'd like to process.
1015  */
1016 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1017                            struct btrfs_root *root, unsigned long count)
1018 {
1019         struct rb_node *node;
1020         struct btrfs_delayed_ref_root *delayed_refs;
1021         struct btrfs_delayed_ref_node *ref;
1022         struct list_head cluster;
1023         int ret;
1024         int run_all = count == (unsigned long)-1;
1025         int run_most = 0;
1026
1027         if (root == root->fs_info->extent_root)
1028                 root = root->fs_info->tree_root;
1029
1030         delayed_refs = &trans->transaction->delayed_refs;
1031         INIT_LIST_HEAD(&cluster);
1032 again:
1033         spin_lock(&delayed_refs->lock);
1034         if (count == 0) {
1035                 count = delayed_refs->num_entries * 2;
1036                 run_most = 1;
1037         }
1038         while (1) {
1039                 if (!(run_all || run_most) &&
1040                     delayed_refs->num_heads_ready < 64)
1041                         break;
1042
1043                 /*
1044                  * go find something we can process in the rbtree.  We start at
1045                  * the beginning of the tree, and then build a cluster
1046                  * of refs to process starting at the first one we are able to
1047                  * lock
1048                  */
1049                 ret = btrfs_find_ref_cluster(trans, &cluster,
1050                                              delayed_refs->run_delayed_start);
1051                 if (ret)
1052                         break;
1053
1054                 ret = run_clustered_refs(trans, root, &cluster);
1055                 BUG_ON(ret < 0);
1056
1057                 count -= min_t(unsigned long, ret, count);
1058
1059                 if (count == 0)
1060                         break;
1061         }
1062
1063         if (run_all) {
1064                 node = rb_first(&delayed_refs->root);
1065                 if (!node)
1066                         goto out;
1067                 count = (unsigned long)-1;
1068
1069                 while (node) {
1070                         ref = rb_entry(node, struct btrfs_delayed_ref_node,
1071                                        rb_node);
1072                         if (btrfs_delayed_ref_is_head(ref)) {
1073                                 struct btrfs_delayed_ref_head *head;
1074
1075                                 head = btrfs_delayed_node_to_head(ref);
1076                                 atomic_inc(&ref->refs);
1077
1078                                 spin_unlock(&delayed_refs->lock);
1079                                 mutex_lock(&head->mutex);
1080                                 mutex_unlock(&head->mutex);
1081
1082                                 btrfs_put_delayed_ref(ref);
1083                                 cond_resched();
1084                                 goto again;
1085                         }
1086                         node = rb_next(node);
1087                 }
1088                 spin_unlock(&delayed_refs->lock);
1089                 schedule_timeout(1);
1090                 goto again;
1091         }
1092 out:
1093         spin_unlock(&delayed_refs->lock);
1094         return 0;
1095 }
1096
1097 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1098                           struct btrfs_root *root, u64 objectid, u64 bytenr)
1099 {
1100         struct btrfs_root *extent_root = root->fs_info->extent_root;
1101         struct btrfs_path *path;
1102         struct extent_buffer *leaf;
1103         struct btrfs_extent_ref *ref_item;
1104         struct btrfs_key key;
1105         struct btrfs_key found_key;
1106         u64 ref_root;
1107         u64 last_snapshot;
1108         u32 nritems;
1109         int ret;
1110
1111         key.objectid = bytenr;
1112         key.offset = (u64)-1;
1113         key.type = BTRFS_EXTENT_ITEM_KEY;
1114
1115         path = btrfs_alloc_path();
1116         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
1117         if (ret < 0)
1118                 goto out;
1119         BUG_ON(ret == 0);
1120
1121         ret = -ENOENT;
1122         if (path->slots[0] == 0)
1123                 goto out;
1124
1125         path->slots[0]--;
1126         leaf = path->nodes[0];
1127         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1128
1129         if (found_key.objectid != bytenr ||
1130             found_key.type != BTRFS_EXTENT_ITEM_KEY)
1131                 goto out;
1132
1133         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1134         while (1) {
1135                 leaf = path->nodes[0];
1136                 nritems = btrfs_header_nritems(leaf);
1137                 if (path->slots[0] >= nritems) {
1138                         ret = btrfs_next_leaf(extent_root, path);
1139                         if (ret < 0)
1140                                 goto out;
1141                         if (ret == 0)
1142                                 continue;
1143                         break;
1144                 }
1145                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1146                 if (found_key.objectid != bytenr)
1147                         break;
1148
1149                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
1150                         path->slots[0]++;
1151                         continue;
1152                 }
1153
1154                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1155                                           struct btrfs_extent_ref);
1156                 ref_root = btrfs_ref_root(leaf, ref_item);
1157                 if ((ref_root != root->root_key.objectid &&
1158                      ref_root != BTRFS_TREE_LOG_OBJECTID) ||
1159                      objectid != btrfs_ref_objectid(leaf, ref_item)) {
1160                         ret = 1;
1161                         goto out;
1162                 }
1163                 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) {
1164                         ret = 1;
1165                         goto out;
1166                 }
1167
1168                 path->slots[0]++;
1169         }
1170         ret = 0;
1171 out:
1172         btrfs_free_path(path);
1173         return ret;
1174 }
1175
1176 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1177                     struct extent_buffer *buf, u32 nr_extents)
1178 {
1179         struct btrfs_key key;
1180         struct btrfs_file_extent_item *fi;
1181         u64 root_gen;
1182         u32 nritems;
1183         int i;
1184         int level;
1185         int ret = 0;
1186         int shared = 0;
1187
1188         if (!root->ref_cows)
1189                 return 0;
1190
1191         if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1192                 shared = 0;
1193                 root_gen = root->root_key.offset;
1194         } else {
1195                 shared = 1;
1196                 root_gen = trans->transid - 1;
1197         }
1198
1199         level = btrfs_header_level(buf);
1200         nritems = btrfs_header_nritems(buf);
1201
1202         if (level == 0) {
1203                 struct btrfs_leaf_ref *ref;
1204                 struct btrfs_extent_info *info;
1205
1206                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1207                 if (!ref) {
1208                         ret = -ENOMEM;
1209                         goto out;
1210                 }
1211
1212                 ref->root_gen = root_gen;
1213                 ref->bytenr = buf->start;
1214                 ref->owner = btrfs_header_owner(buf);
1215                 ref->generation = btrfs_header_generation(buf);
1216                 ref->nritems = nr_extents;
1217                 info = ref->extents;
1218
1219                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1220                         u64 disk_bytenr;
1221                         btrfs_item_key_to_cpu(buf, &key, i);
1222                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1223                                 continue;
1224                         fi = btrfs_item_ptr(buf, i,
1225                                             struct btrfs_file_extent_item);
1226                         if (btrfs_file_extent_type(buf, fi) ==
1227                             BTRFS_FILE_EXTENT_INLINE)
1228                                 continue;
1229                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1230                         if (disk_bytenr == 0)
1231                                 continue;
1232
1233                         info->bytenr = disk_bytenr;
1234                         info->num_bytes =
1235                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1236                         info->objectid = key.objectid;
1237                         info->offset = key.offset;
1238                         info++;
1239                 }
1240
1241                 ret = btrfs_add_leaf_ref(root, ref, shared);
1242                 if (ret == -EEXIST && shared) {
1243                         struct btrfs_leaf_ref *old;
1244                         old = btrfs_lookup_leaf_ref(root, ref->bytenr);
1245                         BUG_ON(!old);
1246                         btrfs_remove_leaf_ref(root, old);
1247                         btrfs_free_leaf_ref(root, old);
1248                         ret = btrfs_add_leaf_ref(root, ref, shared);
1249                 }
1250                 WARN_ON(ret);
1251                 btrfs_free_leaf_ref(root, ref);
1252         }
1253 out:
1254         return ret;
1255 }
1256
1257 /* when a block goes through cow, we update the reference counts of
1258  * everything that block points to.  The internal pointers of the block
1259  * can be in just about any order, and it is likely to have clusters of
1260  * things that are close together and clusters of things that are not.
1261  *
1262  * To help reduce the seeks that come with updating all of these reference
1263  * counts, sort them by byte number before actual updates are done.
1264  *
1265  * struct refsort is used to match byte number to slot in the btree block.
1266  * we sort based on the byte number and then use the slot to actually
1267  * find the item.
1268  *
1269  * struct refsort is smaller than strcut btrfs_item and smaller than
1270  * struct btrfs_key_ptr.  Since we're currently limited to the page size
1271  * for a btree block, there's no way for a kmalloc of refsorts for a
1272  * single node to be bigger than a page.
1273  */
1274 struct refsort {
1275         u64 bytenr;
1276         u32 slot;
1277 };
1278
1279 /*
1280  * for passing into sort()
1281  */
1282 static int refsort_cmp(const void *a_void, const void *b_void)
1283 {
1284         const struct refsort *a = a_void;
1285         const struct refsort *b = b_void;
1286
1287         if (a->bytenr < b->bytenr)
1288                 return -1;
1289         if (a->bytenr > b->bytenr)
1290                 return 1;
1291         return 0;
1292 }
1293
1294
1295 noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans,
1296                            struct btrfs_root *root,
1297                            struct extent_buffer *orig_buf,
1298                            struct extent_buffer *buf, u32 *nr_extents)
1299 {
1300         u64 bytenr;
1301         u64 ref_root;
1302         u64 orig_root;
1303         u64 ref_generation;
1304         u64 orig_generation;
1305         struct refsort *sorted;
1306         u32 nritems;
1307         u32 nr_file_extents = 0;
1308         struct btrfs_key key;
1309         struct btrfs_file_extent_item *fi;
1310         int i;
1311         int level;
1312         int ret = 0;
1313         int faili = 0;
1314         int refi = 0;
1315         int slot;
1316         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1317                             u64, u64, u64, u64, u64, u64, u64, u64, u64);
1318
1319         ref_root = btrfs_header_owner(buf);
1320         ref_generation = btrfs_header_generation(buf);
1321         orig_root = btrfs_header_owner(orig_buf);
1322         orig_generation = btrfs_header_generation(orig_buf);
1323
1324         nritems = btrfs_header_nritems(buf);
1325         level = btrfs_header_level(buf);
1326
1327         sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS);
1328         BUG_ON(!sorted);
1329
1330         if (root->ref_cows) {
1331                 process_func = __btrfs_inc_extent_ref;
1332         } else {
1333                 if (level == 0 &&
1334                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1335                         goto out;
1336                 if (level != 0 &&
1337                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1338                         goto out;
1339                 process_func = __btrfs_update_extent_ref;
1340         }
1341
1342         /*
1343          * we make two passes through the items.  In the first pass we
1344          * only record the byte number and slot.  Then we sort based on
1345          * byte number and do the actual work based on the sorted results
1346          */
1347         for (i = 0; i < nritems; i++) {
1348                 cond_resched();
1349                 if (level == 0) {
1350                         btrfs_item_key_to_cpu(buf, &key, i);
1351                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1352                                 continue;
1353                         fi = btrfs_item_ptr(buf, i,
1354                                             struct btrfs_file_extent_item);
1355                         if (btrfs_file_extent_type(buf, fi) ==
1356                             BTRFS_FILE_EXTENT_INLINE)
1357                                 continue;
1358                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1359                         if (bytenr == 0)
1360                                 continue;
1361
1362                         nr_file_extents++;
1363                         sorted[refi].bytenr = bytenr;
1364                         sorted[refi].slot = i;
1365                         refi++;
1366                 } else {
1367                         bytenr = btrfs_node_blockptr(buf, i);
1368                         sorted[refi].bytenr = bytenr;
1369                         sorted[refi].slot = i;
1370                         refi++;
1371                 }
1372         }
1373         /*
1374          * if refi == 0, we didn't actually put anything into the sorted
1375          * array and we're done
1376          */
1377         if (refi == 0)
1378                 goto out;
1379
1380         sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
1381
1382         for (i = 0; i < refi; i++) {
1383                 cond_resched();
1384                 slot = sorted[i].slot;
1385                 bytenr = sorted[i].bytenr;
1386
1387                 if (level == 0) {
1388                         btrfs_item_key_to_cpu(buf, &key, slot);
1389                         fi = btrfs_item_ptr(buf, slot,
1390                                             struct btrfs_file_extent_item);
1391
1392                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1393                         if (bytenr == 0)
1394                                 continue;
1395
1396                         ret = process_func(trans, root, bytenr,
1397                                    btrfs_file_extent_disk_num_bytes(buf, fi),
1398                                    orig_buf->start, buf->start,
1399                                    orig_root, ref_root,
1400                                    orig_generation, ref_generation,
1401                                    key.objectid);
1402
1403                         if (ret) {
1404                                 faili = slot;
1405                                 WARN_ON(1);
1406                                 goto fail;
1407                         }
1408                 } else {
1409                         ret = process_func(trans, root, bytenr, buf->len,
1410                                            orig_buf->start, buf->start,
1411                                            orig_root, ref_root,
1412                                            orig_generation, ref_generation,
1413                                            level - 1);
1414                         if (ret) {
1415                                 faili = slot;
1416                                 WARN_ON(1);
1417                                 goto fail;
1418                         }
1419                 }
1420         }
1421 out:
1422         kfree(sorted);
1423         if (nr_extents) {
1424                 if (level == 0)
1425                         *nr_extents = nr_file_extents;
1426                 else
1427                         *nr_extents = nritems;
1428         }
1429         return 0;
1430 fail:
1431         kfree(sorted);
1432         WARN_ON(1);
1433         return ret;
1434 }
1435
1436 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1437                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1438                      struct extent_buffer *buf, int start_slot, int nr)
1439
1440 {
1441         u64 bytenr;
1442         u64 ref_root;
1443         u64 orig_root;
1444         u64 ref_generation;
1445         u64 orig_generation;
1446         struct btrfs_key key;
1447         struct btrfs_file_extent_item *fi;
1448         int i;
1449         int ret;
1450         int slot;
1451         int level;
1452
1453         BUG_ON(start_slot < 0);
1454         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1455
1456         ref_root = btrfs_header_owner(buf);
1457         ref_generation = btrfs_header_generation(buf);
1458         orig_root = btrfs_header_owner(orig_buf);
1459         orig_generation = btrfs_header_generation(orig_buf);
1460         level = btrfs_header_level(buf);
1461
1462         if (!root->ref_cows) {
1463                 if (level == 0 &&
1464                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1465                         return 0;
1466                 if (level != 0 &&
1467                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1468                         return 0;
1469         }
1470
1471         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1472                 cond_resched();
1473                 if (level == 0) {
1474                         btrfs_item_key_to_cpu(buf, &key, slot);
1475                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1476                                 continue;
1477                         fi = btrfs_item_ptr(buf, slot,
1478                                             struct btrfs_file_extent_item);
1479                         if (btrfs_file_extent_type(buf, fi) ==
1480                             BTRFS_FILE_EXTENT_INLINE)
1481                                 continue;
1482                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1483                         if (bytenr == 0)
1484                                 continue;
1485                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1486                                     btrfs_file_extent_disk_num_bytes(buf, fi),
1487                                     orig_buf->start, buf->start,
1488                                     orig_root, ref_root, orig_generation,
1489                                     ref_generation, key.objectid);
1490                         if (ret)
1491                                 goto fail;
1492                 } else {
1493                         bytenr = btrfs_node_blockptr(buf, slot);
1494                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1495                                             buf->len, orig_buf->start,
1496                                             buf->start, orig_root, ref_root,
1497                                             orig_generation, ref_generation,
1498                                             level - 1);
1499                         if (ret)
1500                                 goto fail;
1501                 }
1502         }
1503         return 0;
1504 fail:
1505         WARN_ON(1);
1506         return -1;
1507 }
1508
1509 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1510                                  struct btrfs_root *root,
1511                                  struct btrfs_path *path,
1512                                  struct btrfs_block_group_cache *cache)
1513 {
1514         int ret;
1515         struct btrfs_root *extent_root = root->fs_info->extent_root;
1516         unsigned long bi;
1517         struct extent_buffer *leaf;
1518
1519         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1520         if (ret < 0)
1521                 goto fail;
1522         BUG_ON(ret);
1523
1524         leaf = path->nodes[0];
1525         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1526         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1527         btrfs_mark_buffer_dirty(leaf);
1528         btrfs_release_path(extent_root, path);
1529 fail:
1530         if (ret)
1531                 return ret;
1532         return 0;
1533
1534 }
1535
1536 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1537                                    struct btrfs_root *root)
1538 {
1539         struct btrfs_block_group_cache *cache, *entry;
1540         struct rb_node *n;
1541         int err = 0;
1542         int werr = 0;
1543         struct btrfs_path *path;
1544         u64 last = 0;
1545
1546         path = btrfs_alloc_path();
1547         if (!path)
1548                 return -ENOMEM;
1549
1550         while (1) {
1551                 cache = NULL;
1552                 spin_lock(&root->fs_info->block_group_cache_lock);
1553                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1554                      n; n = rb_next(n)) {
1555                         entry = rb_entry(n, struct btrfs_block_group_cache,
1556                                          cache_node);
1557                         if (entry->dirty) {
1558                                 cache = entry;
1559                                 break;
1560                         }
1561                 }
1562                 spin_unlock(&root->fs_info->block_group_cache_lock);
1563
1564                 if (!cache)
1565                         break;
1566
1567                 cache->dirty = 0;
1568                 last += cache->key.offset;
1569
1570                 err = write_one_cache_group(trans, root,
1571                                             path, cache);
1572                 /*
1573                  * if we fail to write the cache group, we want
1574                  * to keep it marked dirty in hopes that a later
1575                  * write will work
1576                  */
1577                 if (err) {
1578                         werr = err;
1579                         continue;
1580                 }
1581         }
1582         btrfs_free_path(path);
1583         return werr;
1584 }
1585
1586 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
1587 {
1588         struct btrfs_block_group_cache *block_group;
1589         int readonly = 0;
1590
1591         block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
1592         if (!block_group || block_group->ro)
1593                 readonly = 1;
1594         if (block_group)
1595                 put_block_group(block_group);
1596         return readonly;
1597 }
1598
1599 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1600                              u64 total_bytes, u64 bytes_used,
1601                              struct btrfs_space_info **space_info)
1602 {
1603         struct btrfs_space_info *found;
1604
1605         found = __find_space_info(info, flags);
1606         if (found) {
1607                 spin_lock(&found->lock);
1608                 found->total_bytes += total_bytes;
1609                 found->bytes_used += bytes_used;
1610                 found->full = 0;
1611                 spin_unlock(&found->lock);
1612                 *space_info = found;
1613                 return 0;
1614         }
1615         found = kzalloc(sizeof(*found), GFP_NOFS);
1616         if (!found)
1617                 return -ENOMEM;
1618
1619         INIT_LIST_HEAD(&found->block_groups);
1620         init_rwsem(&found->groups_sem);
1621         spin_lock_init(&found->lock);
1622         found->flags = flags;
1623         found->total_bytes = total_bytes;
1624         found->bytes_used = bytes_used;
1625         found->bytes_pinned = 0;
1626         found->bytes_reserved = 0;
1627         found->bytes_readonly = 0;
1628         found->bytes_delalloc = 0;
1629         found->full = 0;
1630         found->force_alloc = 0;
1631         *space_info = found;
1632         list_add_rcu(&found->list, &info->space_info);
1633         return 0;
1634 }
1635
1636 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1637 {
1638         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1639                                    BTRFS_BLOCK_GROUP_RAID1 |
1640                                    BTRFS_BLOCK_GROUP_RAID10 |
1641                                    BTRFS_BLOCK_GROUP_DUP);
1642         if (extra_flags) {
1643                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1644                         fs_info->avail_data_alloc_bits |= extra_flags;
1645                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1646                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1647                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1648                         fs_info->avail_system_alloc_bits |= extra_flags;
1649         }
1650 }
1651
1652 static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
1653 {
1654         spin_lock(&cache->space_info->lock);
1655         spin_lock(&cache->lock);
1656         if (!cache->ro) {
1657                 cache->space_info->bytes_readonly += cache->key.offset -
1658                                         btrfs_block_group_used(&cache->item);
1659                 cache->ro = 1;
1660         }
1661         spin_unlock(&cache->lock);
1662         spin_unlock(&cache->space_info->lock);
1663 }
1664
1665 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1666 {
1667         u64 num_devices = root->fs_info->fs_devices->rw_devices;
1668
1669         if (num_devices == 1)
1670                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1671         if (num_devices < 4)
1672                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1673
1674         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1675             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1676                       BTRFS_BLOCK_GROUP_RAID10))) {
1677                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1678         }
1679
1680         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1681             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1682                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1683         }
1684
1685         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1686             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1687              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1688              (flags & BTRFS_BLOCK_GROUP_DUP)))
1689                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1690         return flags;
1691 }
1692
1693 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data)
1694 {
1695         struct btrfs_fs_info *info = root->fs_info;
1696         u64 alloc_profile;
1697
1698         if (data) {
1699                 alloc_profile = info->avail_data_alloc_bits &
1700                         info->data_alloc_profile;
1701                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1702         } else if (root == root->fs_info->chunk_root) {
1703                 alloc_profile = info->avail_system_alloc_bits &
1704                         info->system_alloc_profile;
1705                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1706         } else {
1707                 alloc_profile = info->avail_metadata_alloc_bits &
1708                         info->metadata_alloc_profile;
1709                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1710         }
1711
1712         return btrfs_reduce_alloc_profile(root, data);
1713 }
1714
1715 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode)
1716 {
1717         u64 alloc_target;
1718
1719         alloc_target = btrfs_get_alloc_profile(root, 1);
1720         BTRFS_I(inode)->space_info = __find_space_info(root->fs_info,
1721                                                        alloc_target);
1722 }
1723
1724 /*
1725  * for now this just makes sure we have at least 5% of our metadata space free
1726  * for use.
1727  */
1728 int btrfs_check_metadata_free_space(struct btrfs_root *root)
1729 {
1730         struct btrfs_fs_info *info = root->fs_info;
1731         struct btrfs_space_info *meta_sinfo;
1732         u64 alloc_target, thresh;
1733         int committed = 0, ret;
1734
1735         /* get the space info for where the metadata will live */
1736         alloc_target = btrfs_get_alloc_profile(root, 0);
1737         meta_sinfo = __find_space_info(info, alloc_target);
1738
1739 again:
1740         spin_lock(&meta_sinfo->lock);
1741         if (!meta_sinfo->full)
1742                 thresh = meta_sinfo->total_bytes * 80;
1743         else
1744                 thresh = meta_sinfo->total_bytes * 95;
1745
1746         do_div(thresh, 100);
1747
1748         if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved +
1749             meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) {
1750                 struct btrfs_trans_handle *trans;
1751                 if (!meta_sinfo->full) {
1752                         meta_sinfo->force_alloc = 1;
1753                         spin_unlock(&meta_sinfo->lock);
1754
1755                         trans = btrfs_start_transaction(root, 1);
1756                         if (!trans)
1757                                 return -ENOMEM;
1758
1759                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1760                                              2 * 1024 * 1024, alloc_target, 0);
1761                         btrfs_end_transaction(trans, root);
1762                         goto again;
1763                 }
1764                 spin_unlock(&meta_sinfo->lock);
1765
1766                 if (!committed) {
1767                         committed = 1;
1768                         trans = btrfs_join_transaction(root, 1);
1769                         if (!trans)
1770                                 return -ENOMEM;
1771                         ret = btrfs_commit_transaction(trans, root);
1772                         if (ret)
1773                                 return ret;
1774                         goto again;
1775                 }
1776                 return -ENOSPC;
1777         }
1778         spin_unlock(&meta_sinfo->lock);
1779
1780         return 0;
1781 }
1782
1783 /*
1784  * This will check the space that the inode allocates from to make sure we have
1785  * enough space for bytes.
1786  */
1787 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
1788                                 u64 bytes)
1789 {
1790         struct btrfs_space_info *data_sinfo;
1791         int ret = 0, committed = 0;
1792
1793         /* make sure bytes are sectorsize aligned */
1794         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
1795
1796         data_sinfo = BTRFS_I(inode)->space_info;
1797 again:
1798         /* make sure we have enough space to handle the data first */
1799         spin_lock(&data_sinfo->lock);
1800         if (data_sinfo->total_bytes - data_sinfo->bytes_used -
1801             data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved -
1802             data_sinfo->bytes_pinned - data_sinfo->bytes_readonly -
1803             data_sinfo->bytes_may_use < bytes) {
1804                 struct btrfs_trans_handle *trans;
1805
1806                 /*
1807                  * if we don't have enough free bytes in this space then we need
1808                  * to alloc a new chunk.
1809                  */
1810                 if (!data_sinfo->full) {
1811                         u64 alloc_target;
1812
1813                         data_sinfo->force_alloc = 1;
1814                         spin_unlock(&data_sinfo->lock);
1815
1816                         alloc_target = btrfs_get_alloc_profile(root, 1);
1817                         trans = btrfs_start_transaction(root, 1);
1818                         if (!trans)
1819                                 return -ENOMEM;
1820
1821                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1822                                              bytes + 2 * 1024 * 1024,
1823                                              alloc_target, 0);
1824                         btrfs_end_transaction(trans, root);
1825                         if (ret)
1826                                 return ret;
1827                         goto again;
1828                 }
1829                 spin_unlock(&data_sinfo->lock);
1830
1831                 /* commit the current transaction and try again */
1832                 if (!committed) {
1833                         committed = 1;
1834                         trans = btrfs_join_transaction(root, 1);
1835                         if (!trans)
1836                                 return -ENOMEM;
1837                         ret = btrfs_commit_transaction(trans, root);
1838                         if (ret)
1839                                 return ret;
1840                         goto again;
1841                 }
1842
1843                 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes"
1844                        ", %llu bytes_used, %llu bytes_reserved, "
1845                        "%llu bytes_pinned, %llu bytes_readonly, %llu may use"
1846                        "%llu total\n", bytes, data_sinfo->bytes_delalloc,
1847                        data_sinfo->bytes_used, data_sinfo->bytes_reserved,
1848                        data_sinfo->bytes_pinned, data_sinfo->bytes_readonly,
1849                        data_sinfo->bytes_may_use, data_sinfo->total_bytes);
1850                 return -ENOSPC;
1851         }
1852         data_sinfo->bytes_may_use += bytes;
1853         BTRFS_I(inode)->reserved_bytes += bytes;
1854         spin_unlock(&data_sinfo->lock);
1855
1856         return btrfs_check_metadata_free_space(root);
1857 }
1858
1859 /*
1860  * if there was an error for whatever reason after calling
1861  * btrfs_check_data_free_space, call this so we can cleanup the counters.
1862  */
1863 void btrfs_free_reserved_data_space(struct btrfs_root *root,
1864                                     struct inode *inode, u64 bytes)
1865 {
1866         struct btrfs_space_info *data_sinfo;
1867
1868         /* make sure bytes are sectorsize aligned */
1869         bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
1870
1871         data_sinfo = BTRFS_I(inode)->space_info;
1872         spin_lock(&data_sinfo->lock);
1873         data_sinfo->bytes_may_use -= bytes;
1874         BTRFS_I(inode)->reserved_bytes -= bytes;
1875         spin_unlock(&data_sinfo->lock);
1876 }
1877
1878 /* called when we are adding a delalloc extent to the inode's io_tree */
1879 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
1880                                   u64 bytes)
1881 {
1882         struct btrfs_space_info *data_sinfo;
1883
1884         /* get the space info for where this inode will be storing its data */
1885         data_sinfo = BTRFS_I(inode)->space_info;
1886
1887         /* make sure we have enough space to handle the data first */
1888         spin_lock(&data_sinfo->lock);
1889         data_sinfo->bytes_delalloc += bytes;
1890
1891         /*
1892          * we are adding a delalloc extent without calling
1893          * btrfs_check_data_free_space first.  This happens on a weird
1894          * writepage condition, but shouldn't hurt our accounting
1895          */
1896         if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) {
1897                 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes;
1898                 BTRFS_I(inode)->reserved_bytes = 0;
1899         } else {
1900                 data_sinfo->bytes_may_use -= bytes;
1901                 BTRFS_I(inode)->reserved_bytes -= bytes;
1902         }
1903
1904         spin_unlock(&data_sinfo->lock);
1905 }
1906
1907 /* called when we are clearing an delalloc extent from the inode's io_tree */
1908 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
1909                               u64 bytes)
1910 {
1911         struct btrfs_space_info *info;
1912
1913         info = BTRFS_I(inode)->space_info;
1914
1915         spin_lock(&info->lock);
1916         info->bytes_delalloc -= bytes;
1917         spin_unlock(&info->lock);
1918 }
1919
1920 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1921                           struct btrfs_root *extent_root, u64 alloc_bytes,
1922                           u64 flags, int force)
1923 {
1924         struct btrfs_space_info *space_info;
1925         u64 thresh;
1926         int ret = 0;
1927
1928         mutex_lock(&extent_root->fs_info->chunk_mutex);
1929
1930         flags = btrfs_reduce_alloc_profile(extent_root, flags);
1931
1932         space_info = __find_space_info(extent_root->fs_info, flags);
1933         if (!space_info) {
1934                 ret = update_space_info(extent_root->fs_info, flags,
1935                                         0, 0, &space_info);
1936                 BUG_ON(ret);
1937         }
1938         BUG_ON(!space_info);
1939
1940         spin_lock(&space_info->lock);
1941         if (space_info->force_alloc) {
1942                 force = 1;
1943                 space_info->force_alloc = 0;
1944         }
1945         if (space_info->full) {
1946                 spin_unlock(&space_info->lock);
1947                 goto out;
1948         }
1949
1950         thresh = space_info->total_bytes - space_info->bytes_readonly;
1951         thresh = div_factor(thresh, 6);
1952         if (!force &&
1953            (space_info->bytes_used + space_info->bytes_pinned +
1954             space_info->bytes_reserved + alloc_bytes) < thresh) {
1955                 spin_unlock(&space_info->lock);
1956                 goto out;
1957         }
1958         spin_unlock(&space_info->lock);
1959
1960         ret = btrfs_alloc_chunk(trans, extent_root, flags);
1961         if (ret)
1962                 space_info->full = 1;
1963 out:
1964         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1965         return ret;
1966 }
1967
1968 static int update_block_group(struct btrfs_trans_handle *trans,
1969                               struct btrfs_root *root,
1970                               u64 bytenr, u64 num_bytes, int alloc,
1971                               int mark_free)
1972 {
1973         struct btrfs_block_group_cache *cache;
1974         struct btrfs_fs_info *info = root->fs_info;
1975         u64 total = num_bytes;
1976         u64 old_val;
1977         u64 byte_in_group;
1978
1979         while (total) {
1980                 cache = btrfs_lookup_block_group(info, bytenr);
1981                 if (!cache)
1982                         return -1;
1983                 byte_in_group = bytenr - cache->key.objectid;
1984                 WARN_ON(byte_in_group > cache->key.offset);
1985
1986                 spin_lock(&cache->space_info->lock);
1987                 spin_lock(&cache->lock);
1988                 cache->dirty = 1;
1989                 old_val = btrfs_block_group_used(&cache->item);
1990                 num_bytes = min(total, cache->key.offset - byte_in_group);
1991                 if (alloc) {
1992                         old_val += num_bytes;
1993                         cache->space_info->bytes_used += num_bytes;
1994                         if (cache->ro)
1995                                 cache->space_info->bytes_readonly -= num_bytes;
1996                         btrfs_set_block_group_used(&cache->item, old_val);
1997                         spin_unlock(&cache->lock);
1998                         spin_unlock(&cache->space_info->lock);
1999                 } else {
2000                         old_val -= num_bytes;
2001                         cache->space_info->bytes_used -= num_bytes;
2002                         if (cache->ro)
2003                                 cache->space_info->bytes_readonly += num_bytes;
2004                         btrfs_set_block_group_used(&cache->item, old_val);
2005                         spin_unlock(&cache->lock);
2006                         spin_unlock(&cache->space_info->lock);
2007                         if (mark_free) {
2008                                 int ret;
2009
2010                                 ret = btrfs_discard_extent(root, bytenr,
2011                                                            num_bytes);
2012                                 WARN_ON(ret);
2013
2014                                 ret = btrfs_add_free_space(cache, bytenr,
2015                                                            num_bytes);
2016                                 WARN_ON(ret);
2017                         }
2018                 }
2019                 put_block_group(cache);
2020                 total -= num_bytes;
2021                 bytenr += num_bytes;
2022         }
2023         return 0;
2024 }
2025
2026 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
2027 {
2028         struct btrfs_block_group_cache *cache;
2029         u64 bytenr;
2030
2031         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
2032         if (!cache)
2033                 return 0;
2034
2035         bytenr = cache->key.objectid;
2036         put_block_group(cache);
2037
2038         return bytenr;
2039 }
2040
2041 int btrfs_update_pinned_extents(struct btrfs_root *root,
2042                                 u64 bytenr, u64 num, int pin)
2043 {
2044         u64 len;
2045         struct btrfs_block_group_cache *cache;
2046         struct btrfs_fs_info *fs_info = root->fs_info;
2047
2048         if (pin) {
2049                 set_extent_dirty(&fs_info->pinned_extents,
2050                                 bytenr, bytenr + num - 1, GFP_NOFS);
2051         } else {
2052                 clear_extent_dirty(&fs_info->pinned_extents,
2053                                 bytenr, bytenr + num - 1, GFP_NOFS);
2054         }
2055
2056         while (num > 0) {
2057                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2058                 BUG_ON(!cache);
2059                 len = min(num, cache->key.offset -
2060                           (bytenr - cache->key.objectid));
2061                 if (pin) {
2062                         spin_lock(&cache->space_info->lock);
2063                         spin_lock(&cache->lock);
2064                         cache->pinned += len;
2065                         cache->space_info->bytes_pinned += len;
2066                         spin_unlock(&cache->lock);
2067                         spin_unlock(&cache->space_info->lock);
2068                         fs_info->total_pinned += len;
2069                 } else {
2070                         spin_lock(&cache->space_info->lock);
2071                         spin_lock(&cache->lock);
2072                         cache->pinned -= len;
2073                         cache->space_info->bytes_pinned -= len;
2074                         spin_unlock(&cache->lock);
2075                         spin_unlock(&cache->space_info->lock);
2076                         fs_info->total_pinned -= len;
2077                         if (cache->cached)
2078                                 btrfs_add_free_space(cache, bytenr, len);
2079                 }
2080                 put_block_group(cache);
2081                 bytenr += len;
2082                 num -= len;
2083         }
2084         return 0;
2085 }
2086
2087 static int update_reserved_extents(struct btrfs_root *root,
2088                                    u64 bytenr, u64 num, int reserve)
2089 {
2090         u64 len;
2091         struct btrfs_block_group_cache *cache;
2092         struct btrfs_fs_info *fs_info = root->fs_info;
2093
2094         while (num > 0) {
2095                 cache = btrfs_lookup_block_group(fs_info, bytenr);
2096                 BUG_ON(!cache);
2097                 len = min(num, cache->key.offset -
2098                           (bytenr - cache->key.objectid));
2099
2100                 spin_lock(&cache->space_info->lock);
2101                 spin_lock(&cache->lock);
2102                 if (reserve) {
2103                         cache->reserved += len;
2104                         cache->space_info->bytes_reserved += len;
2105                 } else {
2106                         cache->reserved -= len;
2107                         cache->space_info->bytes_reserved -= len;
2108                 }
2109                 spin_unlock(&cache->lock);
2110                 spin_unlock(&cache->space_info->lock);
2111                 put_block_group(cache);
2112                 bytenr += len;
2113                 num -= len;
2114         }
2115         return 0;
2116 }
2117
2118 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
2119 {
2120         u64 last = 0;
2121         u64 start;
2122         u64 end;
2123         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
2124         int ret;
2125
2126         while (1) {
2127                 ret = find_first_extent_bit(pinned_extents, last,
2128                                             &start, &end, EXTENT_DIRTY);
2129                 if (ret)
2130                         break;
2131                 set_extent_dirty(copy, start, end, GFP_NOFS);
2132                 last = end + 1;
2133         }
2134         return 0;
2135 }
2136
2137 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
2138                                struct btrfs_root *root,
2139                                struct extent_io_tree *unpin)
2140 {
2141         u64 start;
2142         u64 end;
2143         int ret;
2144
2145         while (1) {
2146                 ret = find_first_extent_bit(unpin, 0, &start, &end,
2147                                             EXTENT_DIRTY);
2148                 if (ret)
2149                         break;
2150
2151                 ret = btrfs_discard_extent(root, start, end + 1 - start);
2152
2153                 /* unlocks the pinned mutex */
2154                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
2155                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
2156
2157                 cond_resched();
2158         }
2159         return ret;
2160 }
2161
2162 static int pin_down_bytes(struct btrfs_trans_handle *trans,
2163                           struct btrfs_root *root,
2164                           struct btrfs_path *path,
2165                           u64 bytenr, u64 num_bytes, int is_data,
2166                           struct extent_buffer **must_clean)
2167 {
2168         int err = 0;
2169         struct extent_buffer *buf;
2170
2171         if (is_data)
2172                 goto pinit;
2173
2174         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
2175         if (!buf)
2176                 goto pinit;
2177
2178         /* we can reuse a block if it hasn't been written
2179          * and it is from this transaction.  We can't
2180          * reuse anything from the tree log root because
2181          * it has tiny sub-transactions.
2182          */
2183         if (btrfs_buffer_uptodate(buf, 0) &&
2184             btrfs_try_tree_lock(buf)) {
2185                 u64 header_owner = btrfs_header_owner(buf);
2186                 u64 header_transid = btrfs_header_generation(buf);
2187                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
2188                     header_owner != BTRFS_TREE_RELOC_OBJECTID &&
2189                     header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID &&
2190                     header_transid == trans->transid &&
2191                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
2192                         *must_clean = buf;
2193                         return 1;
2194                 }
2195                 btrfs_tree_unlock(buf);
2196         }
2197         free_extent_buffer(buf);
2198 pinit:
2199         btrfs_set_path_blocking(path);
2200         /* unlocks the pinned mutex */
2201         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2202
2203         BUG_ON(err < 0);
2204         return 0;
2205 }
2206
2207 /*
2208  * remove an extent from the root, returns 0 on success
2209  */
2210 static int __free_extent(struct btrfs_trans_handle *trans,
2211                          struct btrfs_root *root,
2212                          u64 bytenr, u64 num_bytes, u64 parent,
2213                          u64 root_objectid, u64 ref_generation,
2214                          u64 owner_objectid, int pin, int mark_free,
2215                          int refs_to_drop)
2216 {
2217         struct btrfs_path *path;
2218         struct btrfs_key key;
2219         struct btrfs_fs_info *info = root->fs_info;
2220         struct btrfs_root *extent_root = info->extent_root;
2221         struct extent_buffer *leaf;
2222         int ret;
2223         int extent_slot = 0;
2224         int found_extent = 0;
2225         int num_to_del = 1;
2226         struct btrfs_extent_item *ei;
2227         u32 refs;
2228
2229         key.objectid = bytenr;
2230         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
2231         key.offset = num_bytes;
2232         path = btrfs_alloc_path();
2233         if (!path)
2234                 return -ENOMEM;
2235
2236         path->reada = 1;
2237         path->leave_spinning = 1;
2238         ret = lookup_extent_backref(trans, extent_root, path,
2239                                     bytenr, parent, root_objectid,
2240                                     ref_generation, owner_objectid, 1);
2241         if (ret == 0) {
2242                 struct btrfs_key found_key;
2243                 extent_slot = path->slots[0];
2244                 while (extent_slot > 0) {
2245                         extent_slot--;
2246                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2247                                               extent_slot);
2248                         if (found_key.objectid != bytenr)
2249                                 break;
2250                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
2251                             found_key.offset == num_bytes) {
2252                                 found_extent = 1;
2253                                 break;
2254                         }
2255                         if (path->slots[0] - extent_slot > 5)
2256                                 break;
2257                 }
2258                 if (!found_extent) {
2259                         ret = remove_extent_backref(trans, extent_root, path,
2260                                                     refs_to_drop);
2261                         BUG_ON(ret);
2262                         btrfs_release_path(extent_root, path);
2263                         path->leave_spinning = 1;
2264                         ret = btrfs_search_slot(trans, extent_root,
2265                                                 &key, path, -1, 1);
2266                         if (ret) {
2267                                 printk(KERN_ERR "umm, got %d back from search"
2268                                        ", was looking for %llu\n", ret,
2269                                        (unsigned long long)bytenr);
2270                                 btrfs_print_leaf(extent_root, path->nodes[0]);
2271                         }
2272                         BUG_ON(ret);
2273                         extent_slot = path->slots[0];
2274                 }
2275         } else {
2276                 btrfs_print_leaf(extent_root, path->nodes[0]);
2277                 WARN_ON(1);
2278                 printk(KERN_ERR "btrfs unable to find ref byte nr %llu "
2279                        "parent %llu root %llu gen %llu owner %llu\n",
2280                        (unsigned long long)bytenr,
2281                        (unsigned long long)parent,
2282                        (unsigned long long)root_objectid,
2283                        (unsigned long long)ref_generation,
2284                        (unsigned long long)owner_objectid);
2285         }
2286
2287         leaf = path->nodes[0];
2288         ei = btrfs_item_ptr(leaf, extent_slot,
2289                             struct btrfs_extent_item);
2290         refs = btrfs_extent_refs(leaf, ei);
2291
2292         /*
2293          * we're not allowed to delete the extent item if there
2294          * are other delayed ref updates pending
2295          */
2296
2297         BUG_ON(refs < refs_to_drop);
2298         refs -= refs_to_drop;
2299         btrfs_set_extent_refs(leaf, ei, refs);
2300         btrfs_mark_buffer_dirty(leaf);
2301
2302         if (refs == 0 && found_extent &&
2303             path->slots[0] == extent_slot + 1) {
2304                 struct btrfs_extent_ref *ref;
2305                 ref = btrfs_item_ptr(leaf, path->slots[0],
2306                                      struct btrfs_extent_ref);
2307                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop);
2308                 /* if the back ref and the extent are next to each other
2309                  * they get deleted below in one shot
2310                  */
2311                 path->slots[0] = extent_slot;
2312                 num_to_del = 2;
2313         } else if (found_extent) {
2314                 /* otherwise delete the extent back ref */
2315                 ret = remove_extent_backref(trans, extent_root, path,
2316                                             refs_to_drop);
2317                 BUG_ON(ret);
2318                 /* if refs are 0, we need to setup the path for deletion */
2319                 if (refs == 0) {
2320                         btrfs_release_path(extent_root, path);
2321                         path->leave_spinning = 1;
2322                         ret = btrfs_search_slot(trans, extent_root, &key, path,
2323                                                 -1, 1);
2324                         BUG_ON(ret);
2325                 }
2326         }
2327
2328         if (refs == 0) {
2329                 u64 super_used;
2330                 u64 root_used;
2331                 struct extent_buffer *must_clean = NULL;
2332
2333                 if (pin) {
2334                         ret = pin_down_bytes(trans, root, path,
2335                                 bytenr, num_bytes,
2336                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID,
2337                                 &must_clean);
2338                         if (ret > 0)
2339                                 mark_free = 1;
2340                         BUG_ON(ret < 0);
2341                 }
2342
2343                 /* block accounting for super block */
2344                 spin_lock(&info->delalloc_lock);
2345                 super_used = btrfs_super_bytes_used(&info->super_copy);
2346                 btrfs_set_super_bytes_used(&info->super_copy,
2347                                            super_used - num_bytes);
2348
2349                 /* block accounting for root item */
2350                 root_used = btrfs_root_used(&root->root_item);
2351                 btrfs_set_root_used(&root->root_item,
2352                                            root_used - num_bytes);
2353                 spin_unlock(&info->delalloc_lock);
2354
2355                 /*
2356                  * it is going to be very rare for someone to be waiting
2357                  * on the block we're freeing.  del_items might need to
2358                  * schedule, so rather than get fancy, just force it
2359                  * to blocking here
2360                  */
2361                 if (must_clean)
2362                         btrfs_set_lock_blocking(must_clean);
2363
2364                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
2365                                       num_to_del);
2366                 BUG_ON(ret);
2367                 btrfs_release_path(extent_root, path);
2368
2369                 if (must_clean) {
2370                         clean_tree_block(NULL, root, must_clean);
2371                         btrfs_tree_unlock(must_clean);
2372                         free_extent_buffer(must_clean);
2373                 }
2374
2375                 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2376                         ret = btrfs_del_csums(trans, root, bytenr, num_bytes);
2377                         BUG_ON(ret);
2378                 } else {
2379                         invalidate_mapping_pages(info->btree_inode->i_mapping,
2380                              bytenr >> PAGE_CACHE_SHIFT,
2381                              (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT);
2382                 }
2383
2384                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
2385                                          mark_free);
2386                 BUG_ON(ret);
2387         }
2388         btrfs_free_path(path);
2389         return ret;
2390 }
2391
2392 /*
2393  * remove an extent from the root, returns 0 on success
2394  */
2395 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2396                                         struct btrfs_root *root,
2397                                         u64 bytenr, u64 num_bytes, u64 parent,
2398                                         u64 root_objectid, u64 ref_generation,
2399                                         u64 owner_objectid, int pin,
2400                                         int refs_to_drop)
2401 {
2402         WARN_ON(num_bytes < root->sectorsize);
2403
2404         /*
2405          * if metadata always pin
2406          * if data pin when any transaction has committed this
2407          */
2408         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID ||
2409             ref_generation != trans->transid)
2410                 pin = 1;
2411
2412         if (ref_generation != trans->transid)
2413                 pin = 1;
2414
2415         return __free_extent(trans, root, bytenr, num_bytes, parent,
2416                             root_objectid, ref_generation,
2417                             owner_objectid, pin, pin == 0, refs_to_drop);
2418 }
2419
2420 /*
2421  * when we free an extent, it is possible (and likely) that we free the last
2422  * delayed ref for that extent as well.  This searches the delayed ref tree for
2423  * a given extent, and if there are no other delayed refs to be processed, it
2424  * removes it from the tree.
2425  */
2426 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
2427                                       struct btrfs_root *root, u64 bytenr)
2428 {
2429         struct btrfs_delayed_ref_head *head;
2430         struct btrfs_delayed_ref_root *delayed_refs;
2431         struct btrfs_delayed_ref_node *ref;
2432         struct rb_node *node;
2433         int ret;
2434
2435         delayed_refs = &trans->transaction->delayed_refs;
2436         spin_lock(&delayed_refs->lock);
2437         head = btrfs_find_delayed_ref_head(trans, bytenr);
2438         if (!head)
2439                 goto out;
2440
2441         node = rb_prev(&head->node.rb_node);
2442         if (!node)
2443                 goto out;
2444
2445         ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
2446
2447         /* there are still entries for this ref, we can't drop it */
2448         if (ref->bytenr == bytenr)
2449                 goto out;
2450
2451         /*
2452          * waiting for the lock here would deadlock.  If someone else has it
2453          * locked they are already in the process of dropping it anyway
2454          */
2455         if (!mutex_trylock(&head->mutex))
2456                 goto out;
2457
2458         /*
2459          * at this point we have a head with no other entries.  Go
2460          * ahead and process it.
2461          */
2462         head->node.in_tree = 0;
2463         rb_erase(&head->node.rb_node, &delayed_refs->root);
2464
2465         delayed_refs->num_entries--;
2466
2467         /*
2468          * we don't take a ref on the node because we're removing it from the
2469          * tree, so we just steal the ref the tree was holding.
2470          */
2471         delayed_refs->num_heads--;
2472         if (list_empty(&head->cluster))
2473                 delayed_refs->num_heads_ready--;
2474
2475         list_del_init(&head->cluster);
2476         spin_unlock(&delayed_refs->lock);
2477
2478         ret = run_one_delayed_ref(trans, root->fs_info->tree_root,
2479                                   &head->node, head->must_insert_reserved);
2480         BUG_ON(ret);
2481         btrfs_put_delayed_ref(&head->node);
2482         return 0;
2483 out:
2484         spin_unlock(&delayed_refs->lock);
2485         return 0;
2486 }
2487
2488 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2489                       struct btrfs_root *root,
2490                       u64 bytenr, u64 num_bytes, u64 parent,
2491                       u64 root_objectid, u64 ref_generation,
2492                       u64 owner_objectid, int pin)
2493 {
2494         int ret;
2495
2496         /*
2497          * tree log blocks never actually go into the extent allocation
2498          * tree, just update pinning info and exit early.
2499          *
2500          * data extents referenced by the tree log do need to have
2501          * their reference counts bumped.
2502          */
2503         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID &&
2504             owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2505                 /* unlocks the pinned mutex */
2506                 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
2507                 update_reserved_extents(root, bytenr, num_bytes, 0);
2508                 ret = 0;
2509         } else {
2510                 ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent,
2511                                        root_objectid, ref_generation,
2512                                        owner_objectid,
2513                                        BTRFS_DROP_DELAYED_REF, 1);
2514                 BUG_ON(ret);
2515                 ret = check_ref_cleanup(trans, root, bytenr);
2516                 BUG_ON(ret);
2517         }
2518         return ret;
2519 }
2520
2521 static u64 stripe_align(struct btrfs_root *root, u64 val)
2522 {
2523         u64 mask = ((u64)root->stripesize - 1);
2524         u64 ret = (val + mask) & ~mask;
2525         return ret;
2526 }
2527
2528 /*
2529  * walks the btree of allocated extents and find a hole of a given size.
2530  * The key ins is changed to record the hole:
2531  * ins->objectid == block start
2532  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2533  * ins->offset == number of blocks
2534  * Any available blocks before search_start are skipped.
2535  */
2536 static noinline int find_free_extent(struct btrfs_trans_handle *trans,
2537                                      struct btrfs_root *orig_root,
2538                                      u64 num_bytes, u64 empty_size,
2539                                      u64 search_start, u64 search_end,
2540                                      u64 hint_byte, struct btrfs_key *ins,
2541                                      u64 exclude_start, u64 exclude_nr,
2542                                      int data)
2543 {
2544         int ret = 0;
2545         struct btrfs_root *root = orig_root->fs_info->extent_root;
2546         u64 *last_ptr = NULL;
2547         struct btrfs_block_group_cache *block_group = NULL;
2548         int empty_cluster = 2 * 1024 * 1024;
2549         int allowed_chunk_alloc = 0;
2550         int using_hint = 0;
2551         struct btrfs_space_info *space_info;
2552
2553         WARN_ON(num_bytes < root->sectorsize);
2554         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2555         ins->objectid = 0;
2556         ins->offset = 0;
2557
2558         space_info = __find_space_info(root->fs_info, data);
2559
2560         if (orig_root->ref_cows || empty_size)
2561                 allowed_chunk_alloc = 1;
2562
2563         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2564                 last_ptr = &root->fs_info->last_alloc;
2565                 if (!btrfs_test_opt(root, SSD))
2566                         empty_cluster = 64 * 1024;
2567         }
2568
2569         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2570                 last_ptr = &root->fs_info->last_data_alloc;
2571
2572         if (last_ptr) {
2573                 if (*last_ptr)
2574                         hint_byte = *last_ptr;
2575                 else
2576                         empty_size += empty_cluster;
2577         } else {
2578                 empty_cluster = 0;
2579         }
2580         search_start = max(search_start, first_logical_byte(root, 0));
2581         search_start = max(search_start, hint_byte);
2582
2583         if (search_start == hint_byte) {
2584                 using_hint = 1;
2585                 block_group = btrfs_lookup_block_group(root->fs_info,
2586                                                        search_start);
2587                 if (block_group && block_group_bits(block_group, data)) {
2588                         down_read(&space_info->groups_sem);
2589                         goto have_block_group;
2590                 } else if (block_group) {
2591                         put_block_group(block_group);
2592                 }
2593
2594                 empty_size += empty_cluster;
2595                 using_hint = 0;
2596         }
2597
2598 search:
2599         down_read(&space_info->groups_sem);
2600         list_for_each_entry(block_group, &space_info->block_groups, list) {
2601                 u64 offset;
2602
2603                 atomic_inc(&block_group->count);
2604                 search_start = block_group->key.objectid;
2605
2606 have_block_group:
2607                 if (unlikely(!block_group->cached)) {
2608                         mutex_lock(&block_group->cache_mutex);
2609                         ret = cache_block_group(root, block_group);
2610                         mutex_unlock(&block_group->cache_mutex);
2611                         if (ret) {
2612                                 put_block_group(block_group);
2613                                 break;
2614                         }
2615                 }
2616
2617                 if (unlikely(block_group->ro))
2618                         goto loop;
2619
2620                 offset = btrfs_find_space_for_alloc(block_group, search_start,
2621                                                     num_bytes, empty_size);
2622                 if (!offset)
2623                         goto loop;
2624
2625                 search_start = stripe_align(root, offset);
2626
2627                 /* move on to the next group */
2628                 if (search_start + num_bytes >= search_end) {
2629                         btrfs_add_free_space(block_group, offset, num_bytes);
2630                         goto loop;
2631                 }
2632
2633                 /* move on to the next group */
2634                 if (search_start + num_bytes >
2635                     block_group->key.objectid + block_group->key.offset) {
2636                         btrfs_add_free_space(block_group, offset, num_bytes);
2637                         goto loop;
2638                 }
2639
2640                 if (using_hint && search_start > hint_byte) {
2641                         btrfs_add_free_space(block_group, offset, num_bytes);
2642                         goto loop;
2643                 }
2644
2645                 if (exclude_nr > 0 &&
2646                     (search_start + num_bytes > exclude_start &&
2647                      search_start < exclude_start + exclude_nr)) {
2648                         search_start = exclude_start + exclude_nr;
2649
2650                         btrfs_add_free_space(block_group, offset, num_bytes);
2651                         /*
2652                          * if search_start is still in this block group
2653                          * then we just re-search this block group
2654                          */
2655                         if (search_start >= block_group->key.objectid &&
2656                             search_start < (block_group->key.objectid +
2657                                             block_group->key.offset))
2658                                 goto have_block_group;
2659                         goto loop;
2660                 }
2661
2662                 ins->objectid = search_start;
2663                 ins->offset = num_bytes;
2664
2665                 if (offset < search_start)
2666                         btrfs_add_free_space(block_group, offset,
2667                                              search_start - offset);
2668                 BUG_ON(offset > search_start);
2669
2670                 /* we are all good, lets return */
2671                 break;
2672 loop:
2673                 put_block_group(block_group);
2674                 if (using_hint) {
2675                         empty_size += empty_cluster;
2676                         using_hint = 0;
2677                         up_read(&space_info->groups_sem);
2678                         goto search;
2679                 }
2680         }
2681         up_read(&space_info->groups_sem);
2682
2683         if (!ins->objectid && (empty_size || allowed_chunk_alloc)) {
2684                 int try_again = empty_size;
2685
2686                 empty_size = 0;
2687
2688                 if (allowed_chunk_alloc) {
2689                         ret = do_chunk_alloc(trans, root, num_bytes +
2690                                              2 * 1024 * 1024, data, 1);
2691                         if (!ret)
2692                                 try_again = 1;
2693                         allowed_chunk_alloc = 0;
2694                 } else {
2695                         space_info->force_alloc = 1;
2696                 }
2697
2698                 if (try_again)
2699                         goto search;
2700                 ret = -ENOSPC;
2701         } else if (!ins->objectid) {
2702                 ret = -ENOSPC;
2703         }
2704
2705         /* we found what we needed */
2706         if (ins->objectid) {
2707                 if (!(data & BTRFS_BLOCK_GROUP_DATA))
2708                         trans->block_group = block_group->key.objectid;
2709
2710                 if (last_ptr)
2711                         *last_ptr = ins->objectid + ins->offset;
2712                 put_block_group(block_group);
2713                 ret = 0;
2714         }
2715
2716         return ret;
2717 }
2718
2719 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2720 {
2721         struct btrfs_block_group_cache *cache;
2722
2723         printk(KERN_INFO "space_info has %llu free, is %sfull\n",
2724                (unsigned long long)(info->total_bytes - info->bytes_used -
2725                                     info->bytes_pinned - info->bytes_reserved),
2726                (info->full) ? "" : "not ");
2727         printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu,"
2728                " may_use=%llu, used=%llu\n", info->total_bytes,
2729                info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use,
2730                info->bytes_used);
2731
2732         down_read(&info->groups_sem);
2733         list_for_each_entry(cache, &info->block_groups, list) {
2734                 spin_lock(&cache->lock);
2735                 printk(KERN_INFO "block group %llu has %llu bytes, %llu used "
2736                        "%llu pinned %llu reserved\n",
2737                        (unsigned long long)cache->key.objectid,
2738                        (unsigned long long)cache->key.offset,
2739                        (unsigned long long)btrfs_block_group_used(&cache->item),
2740                        (unsigned long long)cache->pinned,
2741                        (unsigned long long)cache->reserved);
2742                 btrfs_dump_free_space(cache, bytes);
2743                 spin_unlock(&cache->lock);
2744         }
2745         up_read(&info->groups_sem);
2746 }
2747
2748 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2749                                   struct btrfs_root *root,
2750                                   u64 num_bytes, u64 min_alloc_size,
2751                                   u64 empty_size, u64 hint_byte,
2752                                   u64 search_end, struct btrfs_key *ins,
2753                                   u64 data)
2754 {
2755         int ret;
2756         u64 search_start = 0;
2757         struct btrfs_fs_info *info = root->fs_info;
2758
2759         data = btrfs_get_alloc_profile(root, data);
2760 again:
2761         /*
2762          * the only place that sets empty_size is btrfs_realloc_node, which
2763          * is not called recursively on allocations
2764          */
2765         if (empty_size || root->ref_cows) {
2766                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2767                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2768                                      2 * 1024 * 1024,
2769                                      BTRFS_BLOCK_GROUP_METADATA |
2770                                      (info->metadata_alloc_profile &
2771                                       info->avail_metadata_alloc_bits), 0);
2772                 }
2773                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2774                                      num_bytes + 2 * 1024 * 1024, data, 0);
2775         }
2776
2777         WARN_ON(num_bytes < root->sectorsize);
2778         ret = find_free_extent(trans, root, num_bytes, empty_size,
2779                                search_start, search_end, hint_byte, ins,
2780                                trans->alloc_exclude_start,
2781                                trans->alloc_exclude_nr, data);
2782
2783         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2784                 num_bytes = num_bytes >> 1;
2785                 num_bytes = num_bytes & ~(root->sectorsize - 1);
2786                 num_bytes = max(num_bytes, min_alloc_size);
2787                 do_chunk_alloc(trans, root->fs_info->extent_root,
2788                                num_bytes, data, 1);
2789                 goto again;
2790         }
2791         if (ret) {
2792                 struct btrfs_space_info *sinfo;
2793
2794                 sinfo = __find_space_info(root->fs_info, data);
2795                 printk(KERN_ERR "btrfs allocation failed flags %llu, "
2796                        "wanted %llu\n", (unsigned long long)data,
2797                        (unsigned long long)num_bytes);
2798                 dump_space_info(sinfo, num_bytes);
2799                 BUG();
2800         }
2801
2802         return ret;
2803 }
2804
2805 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2806 {
2807         struct btrfs_block_group_cache *cache;
2808         int ret = 0;
2809
2810         cache = btrfs_lookup_block_group(root->fs_info, start);
2811         if (!cache) {
2812                 printk(KERN_ERR "Unable to find block group for %llu\n",
2813                        (unsigned long long)start);
2814                 return -ENOSPC;
2815         }
2816
2817         ret = btrfs_discard_extent(root, start, len);
2818
2819         btrfs_add_free_space(cache, start, len);
2820         put_block_group(cache);
2821         update_reserved_extents(root, start, len, 0);
2822
2823         return ret;
2824 }
2825
2826 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2827                                   struct btrfs_root *root,
2828                                   u64 num_bytes, u64 min_alloc_size,
2829                                   u64 empty_size, u64 hint_byte,
2830                                   u64 search_end, struct btrfs_key *ins,
2831                                   u64 data)
2832 {
2833         int ret;
2834         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2835                                      empty_size, hint_byte, search_end, ins,
2836                                      data);
2837         update_reserved_extents(root, ins->objectid, ins->offset, 1);
2838         return ret;
2839 }
2840
2841 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2842                                          struct btrfs_root *root, u64 parent,
2843                                          u64 root_objectid, u64 ref_generation,
2844                                          u64 owner, struct btrfs_key *ins,
2845                                          int ref_mod)
2846 {
2847         int ret;
2848         u64 super_used;
2849         u64 root_used;
2850         u64 num_bytes = ins->offset;
2851         u32 sizes[2];
2852         struct btrfs_fs_info *info = root->fs_info;
2853         struct btrfs_root *extent_root = info->extent_root;
2854         struct btrfs_extent_item *extent_item;
2855         struct btrfs_extent_ref *ref;
2856         struct btrfs_path *path;
2857         struct btrfs_key keys[2];
2858
2859         if (parent == 0)
2860                 parent = ins->objectid;
2861
2862         /* block accounting for super block */
2863         spin_lock(&info->delalloc_lock);
2864         super_used = btrfs_super_bytes_used(&info->super_copy);
2865         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2866
2867         /* block accounting for root item */
2868         root_used = btrfs_root_used(&root->root_item);
2869         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2870         spin_unlock(&info->delalloc_lock);
2871
2872         memcpy(&keys[0], ins, sizeof(*ins));
2873         keys[1].objectid = ins->objectid;
2874         keys[1].type = BTRFS_EXTENT_REF_KEY;
2875         keys[1].offset = parent;
2876         sizes[0] = sizeof(*extent_item);
2877         sizes[1] = sizeof(*ref);
2878
2879         path = btrfs_alloc_path();
2880         BUG_ON(!path);
2881
2882         path->leave_spinning = 1;
2883         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2884                                        sizes, 2);
2885         BUG_ON(ret);
2886
2887         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2888                                      struct btrfs_extent_item);
2889         btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod);
2890         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2891                              struct btrfs_extent_ref);
2892
2893         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2894         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2895         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2896         btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod);
2897
2898         btrfs_mark_buffer_dirty(path->nodes[0]);
2899
2900         trans->alloc_exclude_start = 0;
2901         trans->alloc_exclude_nr = 0;
2902         btrfs_free_path(path);
2903
2904         if (ret)
2905                 goto out;
2906
2907         ret = update_block_group(trans, root, ins->objectid,
2908                                  ins->offset, 1, 0);
2909         if (ret) {
2910                 printk(KERN_ERR "btrfs update block group failed for %llu "
2911                        "%llu\n", (unsigned long long)ins->objectid,
2912                        (unsigned long long)ins->offset);
2913                 BUG();
2914         }
2915 out:
2916         return ret;
2917 }
2918
2919 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2920                                 struct btrfs_root *root, u64 parent,
2921                                 u64 root_objectid, u64 ref_generation,
2922                                 u64 owner, struct btrfs_key *ins)
2923 {
2924         int ret;
2925
2926         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
2927                 return 0;
2928
2929         ret = btrfs_add_delayed_ref(trans, ins->objectid,
2930                                     ins->offset, parent, root_objectid,
2931                                     ref_generation, owner,
2932                                     BTRFS_ADD_DELAYED_EXTENT, 0);
2933         BUG_ON(ret);
2934         return ret;
2935 }
2936
2937 /*
2938  * this is used by the tree logging recovery code.  It records that
2939  * an extent has been allocated and makes sure to clear the free
2940  * space cache bits as well
2941  */
2942 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
2943                                 struct btrfs_root *root, u64 parent,
2944                                 u64 root_objectid, u64 ref_generation,
2945                                 u64 owner, struct btrfs_key *ins)
2946 {
2947         int ret;
2948         struct btrfs_block_group_cache *block_group;
2949
2950         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2951         mutex_lock(&block_group->cache_mutex);
2952         cache_block_group(root, block_group);
2953         mutex_unlock(&block_group->cache_mutex);
2954
2955         ret = btrfs_remove_free_space(block_group, ins->objectid,
2956                                       ins->offset);
2957         BUG_ON(ret);
2958         put_block_group(block_group);
2959         ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid,
2960                                             ref_generation, owner, ins, 1);
2961         return ret;
2962 }
2963
2964 /*
2965  * finds a free extent and does all the dirty work required for allocation
2966  * returns the key for the extent through ins, and a tree buffer for
2967  * the first block of the extent through buf.
2968  *
2969  * returns 0 if everything worked, non-zero otherwise.
2970  */
2971 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2972                        struct btrfs_root *root,
2973                        u64 num_bytes, u64 parent, u64 min_alloc_size,
2974                        u64 root_objectid, u64 ref_generation,
2975                        u64 owner_objectid, u64 empty_size, u64 hint_byte,
2976                        u64 search_end, struct btrfs_key *ins, u64 data)
2977 {
2978         int ret;
2979         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2980                                      min_alloc_size, empty_size, hint_byte,
2981                                      search_end, ins, data);
2982         BUG_ON(ret);
2983         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
2984                 ret = btrfs_add_delayed_ref(trans, ins->objectid,
2985                                             ins->offset, parent, root_objectid,
2986                                             ref_generation, owner_objectid,
2987                                             BTRFS_ADD_DELAYED_EXTENT, 0);
2988                 BUG_ON(ret);
2989         }
2990         update_reserved_extents(root, ins->objectid, ins->offset, 1);
2991         return ret;
2992 }
2993
2994 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
2995                                             struct btrfs_root *root,
2996                                             u64 bytenr, u32 blocksize,
2997                                             int level)
2998 {
2999         struct extent_buffer *buf;
3000
3001         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
3002         if (!buf)
3003                 return ERR_PTR(-ENOMEM);
3004         btrfs_set_header_generation(buf, trans->transid);
3005         btrfs_set_buffer_lockdep_class(buf, level);
3006         btrfs_tree_lock(buf);
3007         clean_tree_block(trans, root, buf);
3008
3009         btrfs_set_lock_blocking(buf);
3010         btrfs_set_buffer_uptodate(buf);
3011
3012         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
3013                 set_extent_dirty(&root->dirty_log_pages, buf->start,
3014                          buf->start + buf->len - 1, GFP_NOFS);
3015         } else {
3016                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
3017                          buf->start + buf->len - 1, GFP_NOFS);
3018         }
3019         trans->blocks_used++;
3020         /* this returns a buffer locked for blocking */
3021         return buf;
3022 }
3023
3024 /*
3025  * helper function to allocate a block for a given tree
3026  * returns the tree buffer or NULL.
3027  */
3028 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
3029                                              struct btrfs_root *root,
3030                                              u32 blocksize, u64 parent,
3031                                              u64 root_objectid,
3032                                              u64 ref_generation,
3033                                              int level,
3034                                              u64 hint,
3035                                              u64 empty_size)
3036 {
3037         struct btrfs_key ins;
3038         int ret;
3039         struct extent_buffer *buf;
3040
3041         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
3042                                  root_objectid, ref_generation, level,
3043                                  empty_size, hint, (u64)-1, &ins, 0);
3044         if (ret) {
3045                 BUG_ON(ret > 0);
3046                 return ERR_PTR(ret);
3047         }
3048
3049         buf = btrfs_init_new_buffer(trans, root, ins.objectid,
3050                                     blocksize, level);
3051         return buf;
3052 }
3053
3054 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
3055                         struct btrfs_root *root, struct extent_buffer *leaf)
3056 {
3057         u64 leaf_owner;
3058         u64 leaf_generation;
3059         struct refsort *sorted;
3060         struct btrfs_key key;
3061         struct btrfs_file_extent_item *fi;
3062         int i;
3063         int nritems;
3064         int ret;
3065         int refi = 0;
3066         int slot;
3067
3068         BUG_ON(!btrfs_is_leaf(leaf));
3069         nritems = btrfs_header_nritems(leaf);
3070         leaf_owner = btrfs_header_owner(leaf);
3071         leaf_generation = btrfs_header_generation(leaf);
3072
3073         sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
3074         /* we do this loop twice.  The first time we build a list
3075          * of the extents we have a reference on, then we sort the list
3076          * by bytenr.  The second time around we actually do the
3077          * extent freeing.
3078          */
3079         for (i = 0; i < nritems; i++) {
3080                 u64 disk_bytenr;
3081                 cond_resched();
3082
3083                 btrfs_item_key_to_cpu(leaf, &key, i);
3084
3085                 /* only extents have references, skip everything else */
3086                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3087                         continue;
3088
3089                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
3090
3091                 /* inline extents live in the btree, they don't have refs */
3092                 if (btrfs_file_extent_type(leaf, fi) ==
3093                     BTRFS_FILE_EXTENT_INLINE)
3094                         continue;
3095
3096                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
3097
3098                 /* holes don't have refs */
3099                 if (disk_bytenr == 0)
3100                         continue;
3101
3102                 sorted[refi].bytenr = disk_bytenr;
3103                 sorted[refi].slot = i;
3104                 refi++;
3105         }
3106
3107         if (refi == 0)
3108                 goto out;
3109
3110         sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
3111
3112         for (i = 0; i < refi; i++) {
3113                 u64 disk_bytenr;
3114
3115                 disk_bytenr = sorted[i].bytenr;
3116                 slot = sorted[i].slot;
3117
3118                 cond_resched();
3119
3120                 btrfs_item_key_to_cpu(leaf, &key, slot);
3121                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
3122                         continue;
3123
3124                 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
3125
3126                 ret = btrfs_free_extent(trans, root, disk_bytenr,
3127                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
3128                                 leaf->start, leaf_owner, leaf_generation,
3129                                 key.objectid, 0);
3130                 BUG_ON(ret);
3131
3132                 atomic_inc(&root->fs_info->throttle_gen);
3133                 wake_up(&root->fs_info->transaction_throttle);
3134                 cond_resched();
3135         }
3136 out:
3137         kfree(sorted);
3138         return 0;
3139 }
3140
3141 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
3142                                         struct btrfs_root *root,
3143                                         struct btrfs_leaf_ref *ref)
3144 {
3145         int i;
3146         int ret;
3147         struct btrfs_extent_info *info;
3148         struct refsort *sorted;
3149
3150         if (ref->nritems == 0)
3151                 return 0;
3152
3153         sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS);
3154         for (i = 0; i < ref->nritems; i++) {
3155                 sorted[i].bytenr = ref->extents[i].bytenr;
3156                 sorted[i].slot = i;
3157         }
3158         sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL);
3159
3160         /*
3161          * the items in the ref were sorted when the ref was inserted
3162          * into the ref cache, so this is already in order
3163          */
3164         for (i = 0; i < ref->nritems; i++) {
3165                 info = ref->extents + sorted[i].slot;
3166                 ret = btrfs_free_extent(trans, root, info->bytenr,
3167                                           info->num_bytes, ref->bytenr,
3168                                           ref->owner, ref->generation,
3169                                           info->objectid, 0);
3170
3171                 atomic_inc(&root->fs_info->throttle_gen);
3172                 wake_up(&root->fs_info->transaction_throttle);
3173                 cond_resched();
3174
3175                 BUG_ON(ret);
3176                 info++;
3177         }
3178
3179         kfree(sorted);
3180         return 0;
3181 }
3182
3183 static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans,
3184                                      struct btrfs_root *root, u64 start,
3185                                      u64 len, u32 *refs)
3186 {
3187         int ret;
3188
3189         ret = btrfs_lookup_extent_ref(trans, root, start, len, refs);
3190         BUG_ON(ret);
3191
3192 #if 0 /* some debugging code in case we see problems here */
3193         /* if the refs count is one, it won't get increased again.  But
3194          * if the ref count is > 1, someone may be decreasing it at
3195          * the same time we are.
3196          */
3197         if (*refs != 1) {
3198                 struct extent_buffer *eb = NULL;
3199                 eb = btrfs_find_create_tree_block(root, start, len);
3200                 if (eb)
3201                         btrfs_tree_lock(eb);
3202
3203                 mutex_lock(&root->fs_info->alloc_mutex);
3204                 ret = lookup_extent_ref(NULL, root, start, len, refs);
3205                 BUG_ON(ret);
3206                 mutex_unlock(&root->fs_info->alloc_mutex);
3207
3208                 if (eb) {
3209                         btrfs_tree_unlock(eb);
3210                         free_extent_buffer(eb);
3211                 }
3212                 if (*refs == 1) {
3213                         printk(KERN_ERR "btrfs block %llu went down to one "
3214                                "during drop_snap\n", (unsigned long long)start);
3215                 }
3216
3217         }
3218 #endif
3219
3220         cond_resched();
3221         return ret;
3222 }
3223
3224 /*
3225  * this is used while deleting old snapshots, and it drops the refs
3226  * on a whole subtree starting from a level 1 node.
3227  *
3228  * The idea is to sort all the leaf pointers, and then drop the
3229  * ref on all the leaves in order.  Most of the time the leaves
3230  * will have ref cache entries, so no leaf IOs will be required to
3231  * find the extents they have references on.
3232  *
3233  * For each leaf, any references it has are also dropped in order
3234  *
3235  * This ends up dropping the references in something close to optimal
3236  * order for reading and modifying the extent allocation tree.
3237  */
3238 static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans,
3239                                         struct btrfs_root *root,
3240                                         struct btrfs_path *path)
3241 {
3242         u64 bytenr;
3243         u64 root_owner;
3244         u64 root_gen;
3245         struct extent_buffer *eb = path->nodes[1];
3246         struct extent_buffer *leaf;
3247         struct btrfs_leaf_ref *ref;
3248         struct refsort *sorted = NULL;
3249         int nritems = btrfs_header_nritems(eb);
3250         int ret;
3251         int i;
3252         int refi = 0;
3253         int slot = path->slots[1];
3254         u32 blocksize = btrfs_level_size(root, 0);
3255         u32 refs;
3256
3257         if (nritems == 0)
3258                 goto out;
3259
3260         root_owner = btrfs_header_owner(eb);
3261         root_gen = btrfs_header_generation(eb);
3262         sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS);
3263
3264         /*
3265          * step one, sort all the leaf pointers so we don't scribble
3266          * randomly into the extent allocation tree
3267          */
3268         for (i = slot; i < nritems; i++) {
3269                 sorted[refi].bytenr = btrfs_node_blockptr(eb, i);
3270                 sorted[refi].slot = i;
3271                 refi++;
3272         }
3273
3274         /*
3275          * nritems won't be zero, but if we're picking up drop_snapshot
3276          * after a crash, slot might be > 0, so double check things
3277          * just in case.
3278          */
3279         if (refi == 0)
3280                 goto out;
3281
3282         sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL);
3283
3284         /*
3285          * the first loop frees everything the leaves point to
3286          */
3287         for (i = 0; i < refi; i++) {
3288                 u64 ptr_gen;
3289
3290                 bytenr = sorted[i].bytenr;
3291
3292                 /*
3293                  * check the reference count on this leaf.  If it is > 1
3294                  * we just decrement it below and don't update any
3295                  * of the refs the leaf points to.
3296                  */
3297                 ret = drop_snap_lookup_refcount(trans, root, bytenr,
3298                                                 blocksize, &refs);
3299                 BUG_ON(ret);
3300                 if (refs != 1)
3301                         continue;
3302
3303                 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot);
3304
3305                 /*
3306                  * the leaf only had one reference, which means the
3307                  * only thing pointing to this leaf is the snapshot
3308                  * we're deleting.  It isn't possible for the reference
3309                  * count to increase again later
3310                  *
3311                  * The reference cache is checked for the leaf,
3312                  * and if found we'll be able to drop any refs held by
3313                  * the leaf without needing to read it in.
3314                  */
3315                 ref = btrfs_lookup_leaf_ref(root, bytenr);
3316                 if (ref && ref->generation != ptr_gen) {
3317                         btrfs_free_leaf_ref(root, ref);
3318                         ref = NULL;
3319                 }
3320                 if (ref) {
3321                         ret = cache_drop_leaf_ref(trans, root, ref);
3322                         BUG_ON(ret);
3323                         btrfs_remove_leaf_ref(root, ref);
3324                         btrfs_free_leaf_ref(root, ref);
3325                 } else {
3326                         /*
3327                          * the leaf wasn't in the reference cache, so
3328                          * we have to read it.
3329                          */
3330                         leaf = read_tree_block(root, bytenr, blocksize,
3331                                                ptr_gen);
3332                         ret = btrfs_drop_leaf_ref(trans, root, leaf);
3333                         BUG_ON(ret);
3334                         free_extent_buffer(leaf);
3335                 }
3336                 atomic_inc(&root->fs_info->throttle_gen);
3337                 wake_up(&root->fs_info->transaction_throttle);
3338                 cond_resched();
3339         }
3340
3341         /*
3342          * run through the loop again to free the refs on the leaves.
3343          * This is faster than doing it in the loop above because
3344          * the leaves are likely to be clustered together.  We end up
3345          * working in nice chunks on the extent allocation tree.
3346          */
3347         for (i = 0; i < refi; i++) {
3348                 bytenr = sorted[i].bytenr;
3349                 ret = btrfs_free_extent(trans, root, bytenr,
3350                                         blocksize, eb->start,
3351                                         root_owner, root_gen, 0, 1);
3352                 BUG_ON(ret);
3353
3354                 atomic_inc(&root->fs_info->throttle_gen);
3355                 wake_up(&root->fs_info->transaction_throttle);
3356                 cond_resched();
3357         }
3358 out:
3359         kfree(sorted);
3360
3361         /*
3362          * update the path to show we've processed the entire level 1
3363          * node.  This will get saved into the root's drop_snapshot_progress
3364          * field so these drops are not repeated again if this transaction
3365          * commits.
3366          */
3367         path->slots[1] = nritems;
3368         return 0;
3369 }
3370
3371 /*
3372  * helper function for drop_snapshot, this walks down the tree dropping ref
3373  * counts as it goes.
3374  */
3375 static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
3376                                    struct btrfs_root *root,
3377                                    struct btrfs_path *path, int *level)
3378 {
3379         u64 root_owner;
3380         u64 root_gen;
3381         u64 bytenr;
3382         u64 ptr_gen;
3383         struct extent_buffer *next;
3384         struct extent_buffer *cur;
3385         struct extent_buffer *parent;
3386         u32 blocksize;
3387         int ret;
3388         u32 refs;
3389
3390         WARN_ON(*level < 0);
3391         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3392         ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start,
3393                                 path->nodes[*level]->len, &refs);
3394         BUG_ON(ret);
3395         if (refs > 1)
3396                 goto out;
3397
3398         /*
3399          * walk down to the last node level and free all the leaves
3400          */
3401         while (*level >= 0) {
3402                 WARN_ON(*level < 0);
3403                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
3404                 cur = path->nodes[*level];
3405
3406                 if (btrfs_header_level(cur) != *level)
3407                         WARN_ON(1);
3408
3409                 if (path->slots[*level] >=
3410                     btrfs_header_nritems(cur))
3411                         break;
3412
3413                 /* the new code goes down to level 1 and does all the
3414                  * leaves pointed to that node in bulk.  So, this check
3415                  * for level 0 will always be false.
3416                  *
3417                  * But, the disk format allows the drop_snapshot_progress
3418                  * field in the root to leave things in a state where
3419                  * a leaf will need cleaning up here.  If someone crashes
3420                  * with the old code and then boots with the new code,
3421                  * we might find a leaf here.
3422                  */
3423                 if (*level == 0) {
3424                         ret = btrfs_drop_leaf_ref(trans, root, cur);
3425                         BUG_ON(ret);
3426                         break;
3427                 }
3428
3429                 /*
3430                  * once we get to level one, process the whole node
3431                  * at once, including everything below it.
3432                  */
3433                 if (*level == 1) {
3434                         ret = drop_level_one_refs(trans, root, path);
3435                         BUG_ON(ret);
3436                         break;
3437                 }
3438
3439                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3440                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3441                 blocksize = btrfs_level_size(root, *level - 1);
3442
3443                 ret = drop_snap_lookup_refcount(trans, root, bytenr,
3444                                                 blocksize, &refs);
3445                 BUG_ON(ret);
3446
3447                 /*
3448                  * if there is more than one reference, we don't need
3449                  * to read that node to drop any references it has.  We
3450                  * just drop the ref we hold on that node and move on to the
3451                  * next slot in this level.
3452                  */
3453                 if (refs != 1) {
3454                         parent = path->nodes[*level];
3455                         root_owner = btrfs_header_owner(parent);
3456                         root_gen = btrfs_header_generation(parent);
3457                         path->slots[*level]++;
3458
3459                         ret = btrfs_free_extent(trans, root, bytenr,
3460                                                 blocksize, parent->start,
3461                                                 root_owner, root_gen,
3462                                                 *level - 1, 1);
3463                         BUG_ON(ret);
3464
3465                         atomic_inc(&root->fs_info->throttle_gen);
3466                         wake_up(&root->fs_info->transaction_throttle);
3467                         cond_resched();
3468
3469                         continue;
3470                 }
3471
3472                 /*
3473                  * we need to keep freeing things in the next level down.
3474                  * read the block and loop around to process it
3475                  */
3476                 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3477                 WARN_ON(*level <= 0);
3478                 if (path->nodes[*level-1])
3479                         free_extent_buffer(path->nodes[*level-1]);
3480                 path->nodes[*level-1] = next;
3481                 *level = btrfs_header_level(next);
3482                 path->slots[*level] = 0;
3483                 cond_resched();
3484         }
3485 out:
3486         WARN_ON(*level < 0);
3487         WARN_ON(*level >= BTRFS_MAX_LEVEL);
3488
3489         if (path->nodes[*level] == root->node) {
3490                 parent = path->nodes[*level];
3491                 bytenr = path->nodes[*level]->start;
3492         } else {
3493                 parent = path->nodes[*level + 1];
3494                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
3495         }
3496
3497         blocksize = btrfs_level_size(root, *level);
3498         root_owner = btrfs_header_owner(parent);
3499         root_gen = btrfs_header_generation(parent);
3500
3501         /*
3502          * cleanup and free the reference on the last node
3503          * we processed
3504          */
3505         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3506                                   parent->start, root_owner, root_gen,
3507                                   *level, 1);
3508         free_extent_buffer(path->nodes[*level]);
3509         path->nodes[*level] = NULL;
3510
3511         *level += 1;
3512         BUG_ON(ret);
3513
3514         cond_resched();
3515         return 0;
3516 }
3517
3518 /*
3519  * helper function for drop_subtree, this function is similar to
3520  * walk_down_tree. The main difference is that it checks reference
3521  * counts while tree blocks are locked.
3522  */
3523 static noinline int walk_down_subtree(struct btrfs_trans_handle *trans,
3524                                       struct btrfs_root *root,
3525                                       struct btrfs_path *path, int *level)
3526 {
3527         struct extent_buffer *next;
3528         struct extent_buffer *cur;
3529         struct extent_buffer *parent;
3530         u64 bytenr;
3531         u64 ptr_gen;
3532         u32 blocksize;
3533         u32 refs;
3534         int ret;
3535
3536         cur = path->nodes[*level];
3537         ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len,
3538                                       &refs);
3539         BUG_ON(ret);
3540         if (refs > 1)
3541                 goto out;
3542
3543         while (*level >= 0) {
3544                 cur = path->nodes[*level];
3545                 if (*level == 0) {
3546                         ret = btrfs_drop_leaf_ref(trans, root, cur);
3547                         BUG_ON(ret);
3548                         clean_tree_block(trans, root, cur);
3549                         break;
3550                 }
3551                 if (path->slots[*level] >= btrfs_header_nritems(cur)) {
3552                         clean_tree_block(trans, root, cur);
3553                         break;
3554                 }
3555
3556                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
3557                 blocksize = btrfs_level_size(root, *level - 1);
3558                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
3559
3560                 next = read_tree_block(root, bytenr, blocksize, ptr_gen);
3561                 btrfs_tree_lock(next);
3562                 btrfs_set_lock_blocking(next);
3563
3564                 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize,
3565                                               &refs);
3566                 BUG_ON(ret);
3567                 if (refs > 1) {
3568                         parent = path->nodes[*level];
3569                         ret = btrfs_free_extent(trans, root, bytenr,
3570                                         blocksize, parent->start,
3571                                         btrfs_header_owner(parent),
3572                                         btrfs_header_generation(parent),
3573                                         *level - 1, 1);
3574                         BUG_ON(ret);
3575                         path->slots[*level]++;
3576                         btrfs_tree_unlock(next);
3577                         free_extent_buffer(next);
3578                         continue;
3579                 }
3580
3581                 *level = btrfs_header_level(next);
3582                 path->nodes[*level] = next;
3583                 path->slots[*level] = 0;
3584                 path->locks[*level] = 1;
3585                 cond_resched();
3586         }
3587 out:
3588         parent = path->nodes[*level + 1];
3589         bytenr = path->nodes[*level]->start;
3590         blocksize = path->nodes[*level]->len;
3591
3592         ret = btrfs_free_extent(trans, root, bytenr, blocksize,
3593                         parent->start, btrfs_header_owner(parent),
3594                         btrfs_header_generation(parent), *level, 1);
3595         BUG_ON(ret);
3596
3597         if (path->locks[*level]) {
3598                 btrfs_tree_unlock(path->nodes[*level]);
3599                 path->locks[*level] = 0;
3600         }
3601         free_extent_buffer(path->nodes[*level]);
3602         path->nodes[*level] = NULL;
3603         *level += 1;
3604         cond_resched();
3605         return 0;
3606 }
3607
3608 /*
3609  * helper for dropping snapshots.  This walks back up the tree in the path
3610  * to find the first node higher up where we haven't yet gone through
3611  * all the slots
3612  */
3613 static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
3614                                  struct btrfs_root *root,
3615                                  struct btrfs_path *path,
3616                                  int *level, int max_level)
3617 {
3618         u64 root_owner;
3619         u64 root_gen;
3620         struct btrfs_root_item *root_item = &root->root_item;
3621         int i;
3622         int slot;
3623         int ret;
3624
3625         for (i = *level; i < max_level && path->nodes[i]; i++) {
3626                 slot = path->slots[i];
3627                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3628                         struct extent_buffer *node;
3629                         struct btrfs_disk_key disk_key;
3630
3631                         /*
3632                          * there is more work to do in this level.
3633                          * Update the drop_progress marker to reflect
3634                          * the work we've done so far, and then bump
3635                          * the slot number
3636                          */
3637                         node = path->nodes[i];
3638                         path->slots[i]++;
3639                         *level = i;
3640                         WARN_ON(*level == 0);
3641                         btrfs_node_key(node, &disk_key, path->slots[i]);
3642                         memcpy(&root_item->drop_progress,
3643                                &disk_key, sizeof(disk_key));
3644                         root_item->drop_level = i;
3645                         return 0;
3646                 } else {
3647                         struct extent_buffer *parent;
3648
3649                         /*
3650                          * this whole node is done, free our reference
3651                          * on it and go up one level
3652                          */
3653                         if (path->nodes[*level] == root->node)
3654                                 parent = path->nodes[*level];
3655                         else
3656                                 parent = path->nodes[*level + 1];
3657
3658                         root_owner = btrfs_header_owner(parent);
3659                         root_gen = btrfs_header_generation(parent);
3660
3661                         clean_tree_block(trans, root, path->nodes[*level]);
3662                         ret = btrfs_free_extent(trans, root,
3663                                                 path->nodes[*level]->start,
3664                                                 path->nodes[*level]->len,
3665                                                 parent->start, root_owner,
3666                                                 root_gen, *level, 1);
3667                         BUG_ON(ret);
3668                         if (path->locks[*level]) {
3669                                 btrfs_tree_unlock(path->nodes[*level]);
3670                                 path->locks[*level] = 0;
3671                         }
3672                         free_extent_buffer(path->nodes[*level]);
3673                         path->nodes[*level] = NULL;
3674                         *level = i + 1;
3675                 }
3676         }
3677         return 1;
3678 }
3679
3680 /*
3681  * drop the reference count on the tree rooted at 'snap'.  This traverses
3682  * the tree freeing any blocks that have a ref count of zero after being
3683  * decremented.
3684  */
3685 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3686                         *root)
3687 {
3688         int ret = 0;
3689         int wret;
3690         int level;
3691         struct btrfs_path *path;
3692         int i;
3693         int orig_level;
3694         int update_count;
3695         struct btrfs_root_item *root_item = &root->root_item;
3696
3697         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3698         path = btrfs_alloc_path();
3699         BUG_ON(!path);
3700
3701         level = btrfs_header_level(root->node);
3702         orig_level = level;
3703         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3704                 path->nodes[level] = root->node;
3705                 extent_buffer_get(root->node);
3706                 path->slots[level] = 0;
3707         } else {
3708                 struct btrfs_key key;
3709                 struct btrfs_disk_key found_key;
3710                 struct extent_buffer *node;
3711
3712                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3713                 level = root_item->drop_level;
3714                 path->lowest_level = level;
3715                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3716                 if (wret < 0) {
3717                         ret = wret;
3718                         goto out;
3719                 }
3720                 node = path->nodes[level];
3721                 btrfs_node_key(node, &found_key, path->slots[level]);
3722                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3723                                sizeof(found_key)));
3724                 /*
3725                  * unlock our path, this is safe because only this
3726                  * function is allowed to delete this snapshot
3727                  */
3728                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3729                         if (path->nodes[i] && path->locks[i]) {
3730                                 path->locks[i] = 0;
3731                                 btrfs_tree_unlock(path->nodes[i]);
3732                         }
3733                 }
3734         }
3735         while (1) {
3736                 unsigned long update;
3737                 wret = walk_down_tree(trans, root, path, &level);
3738                 if (wret > 0)
3739                         break;
3740                 if (wret < 0)
3741                         ret = wret;
3742
3743                 wret = walk_up_tree(trans, root, path, &level,
3744                                     BTRFS_MAX_LEVEL);
3745                 if (wret > 0)
3746                         break;
3747                 if (wret < 0)
3748                         ret = wret;
3749                 if (trans->transaction->in_commit ||
3750                     trans->transaction->delayed_refs.flushing) {
3751                         ret = -EAGAIN;
3752                         break;
3753                 }
3754                 atomic_inc(&root->fs_info->throttle_gen);
3755                 wake_up(&root->fs_info->transaction_throttle);
3756                 for (update_count = 0; update_count < 16; update_count++) {
3757                         update = trans->delayed_ref_updates;
3758                         trans->delayed_ref_updates = 0;
3759                         if (update)
3760                                 btrfs_run_delayed_refs(trans, root, update);
3761                         else
3762                                 break;
3763                 }
3764         }
3765         for (i = 0; i <= orig_level; i++) {
3766                 if (path->nodes[i]) {
3767                         free_extent_buffer(path->nodes[i]);
3768                         path->nodes[i] = NULL;
3769                 }
3770         }
3771 out:
3772         btrfs_free_path(path);
3773         return ret;
3774 }
3775
3776 int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
3777                         struct btrfs_root *root,
3778                         struct extent_buffer *node,
3779                         struct extent_buffer *parent)
3780 {
3781         struct btrfs_path *path;
3782         int level;
3783         int parent_level;
3784         int ret = 0;
3785         int wret;
3786
3787         path = btrfs_alloc_path();
3788         BUG_ON(!path);
3789
3790         btrfs_assert_tree_locked(parent);
3791         parent_level = btrfs_header_level(parent);
3792         extent_buffer_get(parent);
3793         path->nodes[parent_level] = parent;
3794         path->slots[parent_level] = btrfs_header_nritems(parent);
3795
3796         btrfs_assert_tree_locked(node);
3797         level = btrfs_header_level(node);
3798         extent_buffer_get(node);
3799         path->nodes[level] = node;
3800         path->slots[level] = 0;
3801
3802         while (1) {
3803                 wret = walk_down_subtree(trans, root, path, &level);
3804                 if (wret < 0)
3805                         ret = wret;
3806                 if (wret != 0)
3807                         break;
3808
3809                 wret = walk_up_tree(trans, root, path, &level, parent_level);
3810                 if (wret < 0)
3811                         ret = wret;
3812                 if (wret != 0)
3813                         break;
3814         }
3815
3816         btrfs_free_path(path);
3817         return ret;
3818 }
3819
3820 static unsigned long calc_ra(unsigned long start, unsigned long last,
3821                              unsigned long nr)
3822 {
3823         return min(last, start + nr - 1);
3824 }
3825
3826 static noinline int relocate_inode_pages(struct inode *inode, u64 start,
3827                                          u64 len)
3828 {
3829         u64 page_start;
3830         u64 page_end;
3831         unsigned long first_index;
3832         unsigned long last_index;
3833         unsigned long i;
3834         struct page *page;
3835         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3836         struct file_ra_state *ra;
3837         struct btrfs_ordered_extent *ordered;
3838         unsigned int total_read = 0;
3839         unsigned int total_dirty = 0;
3840         int ret = 0;
3841
3842         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3843
3844         mutex_lock(&inode->i_mutex);
3845         first_index = start >> PAGE_CACHE_SHIFT;
3846         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3847
3848         /* make sure the dirty trick played by the caller work */
3849         ret = invalidate_inode_pages2_range(inode->i_mapping,
3850                                             first_index, last_index);
3851         if (ret)
3852                 goto out_unlock;
3853
3854         file_ra_state_init(ra, inode->i_mapping);
3855
3856         for (i = first_index ; i <= last_index; i++) {
3857                 if (total_read % ra->ra_pages == 0) {
3858                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3859                                        calc_ra(i, last_index, ra->ra_pages));
3860                 }
3861                 total_read++;
3862 again:
3863                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3864                         BUG_ON(1);
3865                 page = grab_cache_page(inode->i_mapping, i);
3866                 if (!page) {
3867                         ret = -ENOMEM;
3868                         goto out_unlock;
3869                 }
3870                 if (!PageUptodate(page)) {
3871                         btrfs_readpage(NULL, page);
3872                         lock_page(page);
3873                         if (!PageUptodate(page)) {
3874                                 unlock_page(page);
3875                                 page_cache_release(page);
3876                                 ret = -EIO;
3877                                 goto out_unlock;
3878                         }
3879                 }
3880                 wait_on_page_writeback(page);
3881
3882                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3883                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3884                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3885
3886                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3887                 if (ordered) {
3888                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3889                         unlock_page(page);
3890                         page_cache_release(page);
3891                         btrfs_start_ordered_extent(inode, ordered, 1);
3892                         btrfs_put_ordered_extent(ordered);
3893                         goto again;
3894                 }
3895                 set_page_extent_mapped(page);
3896
3897                 if (i == first_index)
3898                         set_extent_bits(io_tree, page_start, page_end,
3899                                         EXTENT_BOUNDARY, GFP_NOFS);
3900                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3901
3902                 set_page_dirty(page);
3903                 total_dirty++;
3904
3905                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3906                 unlock_page(page);
3907                 page_cache_release(page);
3908         }
3909
3910 out_unlock:
3911         kfree(ra);
3912         mutex_unlock(&inode->i_mutex);
3913         balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty);
3914         return ret;
3915 }
3916
3917 static noinline int relocate_data_extent(struct inode *reloc_inode,
3918                                          struct btrfs_key *extent_key,
3919                                          u64 offset)
3920 {
3921         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
3922         struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
3923         struct extent_map *em;
3924         u64 start = extent_key->objectid - offset;
3925         u64 end = start + extent_key->offset - 1;
3926
3927         em = alloc_extent_map(GFP_NOFS);
3928         BUG_ON(!em || IS_ERR(em));
3929
3930         em->start = start;
3931         em->len = extent_key->offset;
3932         em->block_len = extent_key->offset;
3933         em->block_start = extent_key->objectid;
3934         em->bdev = root->fs_info->fs_devices->latest_bdev;
3935         set_bit(EXTENT_FLAG_PINNED, &em->flags);
3936
3937         /* setup extent map to cheat btrfs_readpage */
3938         lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
3939         while (1) {
3940                 int ret;
3941                 spin_lock(&em_tree->lock);
3942                 ret = add_extent_mapping(em_tree, em);
3943                 spin_unlock(&em_tree->lock);
3944                 if (ret != -EEXIST) {
3945                         free_extent_map(em);
3946                         break;
3947                 }
3948                 btrfs_drop_extent_cache(reloc_inode, start, end, 0);
3949         }
3950         unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
3951
3952         return relocate_inode_pages(reloc_inode, start, extent_key->offset);
3953 }
3954
3955 struct btrfs_ref_path {
3956         u64 extent_start;
3957         u64 nodes[BTRFS_MAX_LEVEL];
3958         u64 root_objectid;
3959         u64 root_generation;
3960         u64 owner_objectid;
3961         u32 num_refs;
3962         int lowest_level;
3963         int current_level;
3964         int shared_level;
3965
3966         struct btrfs_key node_keys[BTRFS_MAX_LEVEL];
3967         u64 new_nodes[BTRFS_MAX_LEVEL];
3968 };
3969
3970 struct disk_extent {
3971         u64 ram_bytes;
3972         u64 disk_bytenr;
3973         u64 disk_num_bytes;
3974         u64 offset;
3975         u64 num_bytes;
3976         u8 compression;
3977         u8 encryption;
3978         u16 other_encoding;
3979 };
3980
3981 static int is_cowonly_root(u64 root_objectid)
3982 {
3983         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
3984             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
3985             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
3986             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
3987             root_objectid == BTRFS_TREE_LOG_OBJECTID ||
3988             root_objectid == BTRFS_CSUM_TREE_OBJECTID)
3989                 return 1;
3990         return 0;
3991 }
3992
3993 static noinline int __next_ref_path(struct btrfs_trans_handle *trans,
3994                                     struct btrfs_root *extent_root,
3995                                     struct btrfs_ref_path *ref_path,
3996                                     int first_time)
3997 {
3998         struct extent_buffer *leaf;
3999         struct btrfs_path *path;
4000         struct btrfs_extent_ref *ref;
4001         struct btrfs_key key;
4002         struct btrfs_key found_key;
4003         u64 bytenr;
4004         u32 nritems;
4005         int level;
4006         int ret = 1;
4007
4008         path = btrfs_alloc_path();
4009         if (!path)
4010                 return -ENOMEM;
4011
4012         if (first_time) {
4013                 ref_path->lowest_level = -1;
4014                 ref_path->current_level = -1;
4015                 ref_path->shared_level = -1;
4016                 goto walk_up;
4017         }
4018 walk_down:
4019         level = ref_path->current_level - 1;
4020         while (level >= -1) {
4021                 u64 parent;
4022                 if (level < ref_path->lowest_level)
4023                         break;
4024
4025                 if (level >= 0)
4026                         bytenr = ref_path->nodes[level];
4027                 else
4028                         bytenr = ref_path->extent_start;
4029                 BUG_ON(bytenr == 0);
4030
4031                 parent = ref_path->nodes[level + 1];
4032                 ref_path->nodes[level + 1] = 0;
4033                 ref_path->current_level = level;
4034                 BUG_ON(parent == 0);
4035
4036                 key.objectid = bytenr;
4037                 key.offset = parent + 1;
4038                 key.type = BTRFS_EXTENT_REF_KEY;
4039
4040                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
4041                 if (ret < 0)
4042                         goto out;
4043                 BUG_ON(ret == 0);
4044
4045                 leaf = path->nodes[0];
4046                 nritems = btrfs_header_nritems(leaf);
4047                 if (path->slots[0] >= nritems) {
4048                         ret = btrfs_next_leaf(extent_root, path);
4049                         if (ret < 0)
4050                                 goto out;
4051                         if (ret > 0)
4052                                 goto next;
4053                         leaf = path->nodes[0];
4054                 }
4055
4056                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4057                 if (found_key.objectid == bytenr &&
4058                     found_key.type == BTRFS_EXTENT_REF_KEY) {
4059                         if (level < ref_path->shared_level)
4060                                 ref_path->shared_level = level;
4061                         goto found;
4062                 }
4063 next:
4064                 level--;
4065                 btrfs_release_path(extent_root, path);
4066                 cond_resched();
4067         }
4068         /* reached lowest level */
4069         ret = 1;
4070         goto out;
4071 walk_up:
4072         level = ref_path->current_level;
4073         while (level < BTRFS_MAX_LEVEL - 1) {
4074                 u64 ref_objectid;
4075
4076                 if (level >= 0)
4077                         bytenr = ref_path->nodes[level];
4078                 else
4079                         bytenr = ref_path->extent_start;
4080
4081                 BUG_ON(bytenr == 0);
4082
4083                 key.objectid = bytenr;
4084                 key.offset = 0;
4085                 key.type = BTRFS_EXTENT_REF_KEY;
4086
4087                 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0);
4088                 if (ret < 0)
4089                         goto out;
4090
4091                 leaf = path->nodes[0];
4092                 nritems = btrfs_header_nritems(leaf);
4093                 if (path->slots[0] >= nritems) {
4094                         ret = btrfs_next_leaf(extent_root, path);
4095                         if (ret < 0)
4096                                 goto out;
4097                         if (ret > 0) {
4098                                 /* the extent was freed by someone */
4099                                 if (ref_path->lowest_level == level)
4100                                         goto out;
4101                                 btrfs_release_path(extent_root, path);
4102                                 goto walk_down;
4103                         }
4104                         leaf = path->nodes[0];
4105                 }
4106
4107                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4108                 if (found_key.objectid != bytenr ||
4109                                 found_key.type != BTRFS_EXTENT_REF_KEY) {
4110                         /* the extent was freed by someone */
4111                         if (ref_path->lowest_level == level) {
4112                                 ret = 1;
4113                                 goto out;
4114                         }
4115                         btrfs_release_path(extent_root, path);
4116                         goto walk_down;
4117                 }
4118 found:
4119                 ref = btrfs_item_ptr(leaf, path->slots[0],
4120                                 struct btrfs_extent_ref);
4121                 ref_objectid = btrfs_ref_objectid(leaf, ref);
4122                 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) {
4123                         if (first_time) {
4124                                 level = (int)ref_objectid;
4125                                 BUG_ON(level >= BTRFS_MAX_LEVEL);
4126                                 ref_path->lowest_level = level;
4127                                 ref_path->current_level = level;
4128                                 ref_path->nodes[level] = bytenr;
4129                         } else {
4130                                 WARN_ON(ref_objectid != level);
4131                         }
4132                 } else {
4133                         WARN_ON(level != -1);
4134                 }
4135                 first_time = 0;
4136
4137                 if (ref_path->lowest_level == level) {
4138                         ref_path->owner_objectid = ref_objectid;
4139                         ref_path->num_refs = btrfs_ref_num_refs(leaf, ref);
4140                 }
4141
4142                 /*
4143                  * the block is tree root or the block isn't in reference
4144                  * counted tree.
4145                  */
4146                 if (found_key.objectid == found_key.offset ||
4147                     is_cowonly_root(btrfs_ref_root(leaf, ref))) {
4148                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
4149                         ref_path->root_generation =
4150                                 btrfs_ref_generation(leaf, ref);
4151                         if (level < 0) {
4152                                 /* special reference from the tree log */
4153                                 ref_path->nodes[0] = found_key.offset;
4154                                 ref_path->current_level = 0;
4155                         }
4156                         ret = 0;
4157                         goto out;
4158                 }
4159
4160                 level++;
4161                 BUG_ON(ref_path->nodes[level] != 0);
4162                 ref_path->nodes[level] = found_key.offset;
4163                 ref_path->current_level = level;
4164
4165                 /*
4166                  * the reference was created in the running transaction,
4167                  * no need to continue walking up.
4168                  */
4169                 if (btrfs_ref_generation(leaf, ref) == trans->transid) {
4170                         ref_path->root_objectid = btrfs_ref_root(leaf, ref);
4171                         ref_path->root_generation =
4172                                 btrfs_ref_generation(leaf, ref);
4173                         ret = 0;
4174                         goto out;
4175                 }
4176
4177                 btrfs_release_path(extent_root, path);
4178                 cond_resched();
4179         }
4180         /* reached max tree level, but no tree root found. */
4181         BUG();
4182 out:
4183         btrfs_free_path(path);
4184         return ret;
4185 }
4186
4187 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans,
4188                                 struct btrfs_root *extent_root,
4189                                 struct btrfs_ref_path *ref_path,
4190                                 u64 extent_start)
4191 {
4192         memset(ref_path, 0, sizeof(*ref_path));
4193         ref_path->extent_start = extent_start;
4194
4195         return __next_ref_path(trans, extent_root, ref_path, 1);
4196 }
4197
4198 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans,
4199                                struct btrfs_root *extent_root,
4200                                struct btrfs_ref_path *ref_path)
4201 {
4202         return __next_ref_path(trans, extent_root, ref_path, 0);
4203 }
4204
4205 static noinline int get_new_locations(struct inode *reloc_inode,
4206                                       struct btrfs_key *extent_key,
4207                                       u64 offset, int no_fragment,
4208                                       struct disk_extent **extents,
4209                                       int *nr_extents)
4210 {
4211         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
4212         struct btrfs_path *path;
4213         struct btrfs_file_extent_item *fi;
4214         struct extent_buffer *leaf;
4215         struct disk_extent *exts = *extents;
4216         struct btrfs_key found_key;
4217         u64 cur_pos;
4218         u64 last_byte;
4219         u32 nritems;
4220         int nr = 0;
4221         int max = *nr_extents;
4222         int ret;
4223
4224         WARN_ON(!no_fragment && *extents);
4225         if (!exts) {
4226                 max = 1;
4227                 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS);
4228                 if (!exts)
4229                         return -ENOMEM;
4230         }
4231
4232         path = btrfs_alloc_path();
4233         BUG_ON(!path);
4234
4235         cur_pos = extent_key->objectid - offset;
4236         last_byte = extent_key->objectid + extent_key->offset;
4237         ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
4238                                        cur_pos, 0);
4239         if (ret < 0)
4240                 goto out;
4241         if (ret > 0) {
4242                 ret = -ENOENT;
4243                 goto out;
4244         }
4245
4246         while (1) {
4247                 leaf = path->nodes[0];
4248                 nritems = btrfs_header_nritems(leaf);
4249                 if (path->slots[0] >= nritems) {
4250                         ret = btrfs_next_leaf(root, path);
4251                         if (ret < 0)
4252                                 goto out;
4253                         if (ret > 0)
4254                                 break;
4255                         leaf = path->nodes[0];
4256                 }
4257
4258                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4259                 if (found_key.offset != cur_pos ||
4260                     found_key.type != BTRFS_EXTENT_DATA_KEY ||
4261                     found_key.objectid != reloc_inode->i_ino)
4262                         break;
4263
4264                 fi = btrfs_item_ptr(leaf, path->slots[0],
4265                                     struct btrfs_file_extent_item);
4266                 if (btrfs_file_extent_type(leaf, fi) !=
4267                     BTRFS_FILE_EXTENT_REG ||
4268                     btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4269                         break;
4270
4271                 if (nr == max) {
4272                         struct disk_extent *old = exts;
4273                         max *= 2;
4274                         exts = kzalloc(sizeof(*exts) * max, GFP_NOFS);
4275                         memcpy(exts, old, sizeof(*exts) * nr);
4276                         if (old != *extents)
4277                                 kfree(old);
4278                 }
4279
4280                 exts[nr].disk_bytenr =
4281                         btrfs_file_extent_disk_bytenr(leaf, fi);
4282                 exts[nr].disk_num_bytes =
4283                         btrfs_file_extent_disk_num_bytes(leaf, fi);
4284                 exts[nr].offset = btrfs_file_extent_offset(leaf, fi);
4285                 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4286                 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
4287                 exts[nr].compression = btrfs_file_extent_compression(leaf, fi);
4288                 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi);
4289                 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf,
4290                                                                            fi);
4291                 BUG_ON(exts[nr].offset > 0);
4292                 BUG_ON(exts[nr].compression || exts[nr].encryption);
4293                 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes);
4294
4295                 cur_pos += exts[nr].num_bytes;
4296                 nr++;
4297
4298                 if (cur_pos + offset >= last_byte)
4299                         break;
4300
4301                 if (no_fragment) {
4302                         ret = 1;
4303                         goto out;
4304                 }
4305                 path->slots[0]++;
4306         }
4307
4308         BUG_ON(cur_pos + offset > last_byte);
4309         if (cur_pos + offset < last_byte) {
4310                 ret = -ENOENT;
4311                 goto out;
4312         }
4313         ret = 0;
4314 out:
4315         btrfs_free_path(path);
4316         if (ret) {
4317                 if (exts != *extents)
4318                         kfree(exts);
4319         } else {
4320                 *extents = exts;
4321                 *nr_extents = nr;
4322         }
4323         return ret;
4324 }
4325
4326 static noinline int replace_one_extent(struct btrfs_trans_handle *trans,
4327                                         struct btrfs_root *root,
4328                                         struct btrfs_path *path,
4329                                         struct btrfs_key *extent_key,
4330                                         struct btrfs_key *leaf_key,
4331                                         struct btrfs_ref_path *ref_path,
4332                                         struct disk_extent *new_extents,
4333                                         int nr_extents)
4334 {
4335         struct extent_buffer *leaf;
4336         struct btrfs_file_extent_item *fi;
4337         struct inode *inode = NULL;
4338         struct btrfs_key key;
4339         u64 lock_start = 0;
4340         u64 lock_end = 0;
4341         u64 num_bytes;
4342         u64 ext_offset;
4343         u64 search_end = (u64)-1;
4344         u32 nritems;
4345         int nr_scaned = 0;
4346         int extent_locked = 0;
4347         int extent_type;
4348         int ret;
4349
4350         memcpy(&key, leaf_key, sizeof(key));
4351         if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4352                 if (key.objectid < ref_path->owner_objectid ||
4353                     (key.objectid == ref_path->owner_objectid &&
4354                      key.type < BTRFS_EXTENT_DATA_KEY)) {
4355                         key.objectid = ref_path->owner_objectid;
4356                         key.type = BTRFS_EXTENT_DATA_KEY;
4357                         key.offset = 0;
4358                 }
4359         }
4360
4361         while (1) {
4362                 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
4363                 if (ret < 0)
4364                         goto out;
4365
4366                 leaf = path->nodes[0];
4367                 nritems = btrfs_header_nritems(leaf);
4368 next:
4369                 if (extent_locked && ret > 0) {
4370                         /*
4371                          * the file extent item was modified by someone
4372                          * before the extent got locked.
4373                          */
4374                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4375                                       lock_end, GFP_NOFS);
4376                         extent_locked = 0;
4377                 }
4378
4379                 if (path->slots[0] >= nritems) {
4380                         if (++nr_scaned > 2)
4381                                 break;
4382
4383                         BUG_ON(extent_locked);
4384                         ret = btrfs_next_leaf(root, path);
4385                         if (ret < 0)
4386                                 goto out;
4387                         if (ret > 0)
4388                                 break;
4389                         leaf = path->nodes[0];
4390                         nritems = btrfs_header_nritems(leaf);
4391                 }
4392
4393                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4394
4395                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) {
4396                         if ((key.objectid > ref_path->owner_objectid) ||
4397                             (key.objectid == ref_path->owner_objectid &&
4398                              key.type > BTRFS_EXTENT_DATA_KEY) ||
4399                             key.offset >= search_end)
4400                                 break;
4401                 }
4402
4403                 if (inode && key.objectid != inode->i_ino) {
4404                         BUG_ON(extent_locked);
4405                         btrfs_release_path(root, path);
4406                         mutex_unlock(&inode->i_mutex);
4407                         iput(inode);
4408                         inode = NULL;
4409                         continue;
4410                 }
4411
4412                 if (key.type != BTRFS_EXTENT_DATA_KEY) {
4413                         path->slots[0]++;
4414                         ret = 1;
4415                         goto next;
4416                 }
4417                 fi = btrfs_item_ptr(leaf, path->slots[0],
4418                                     struct btrfs_file_extent_item);
4419                 extent_type = btrfs_file_extent_type(leaf, fi);
4420                 if ((extent_type != BTRFS_FILE_EXTENT_REG &&
4421                      extent_type != BTRFS_FILE_EXTENT_PREALLOC) ||
4422                     (btrfs_file_extent_disk_bytenr(leaf, fi) !=
4423                      extent_key->objectid)) {
4424                         path->slots[0]++;
4425                         ret = 1;
4426                         goto next;
4427                 }
4428
4429                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4430                 ext_offset = btrfs_file_extent_offset(leaf, fi);
4431
4432                 if (search_end == (u64)-1) {
4433                         search_end = key.offset - ext_offset +
4434                                 btrfs_file_extent_ram_bytes(leaf, fi);
4435                 }
4436
4437                 if (!extent_locked) {
4438                         lock_start = key.offset;
4439                         lock_end = lock_start + num_bytes - 1;
4440                 } else {
4441                         if (lock_start > key.offset ||
4442                             lock_end + 1 < key.offset + num_bytes) {
4443                                 unlock_extent(&BTRFS_I(inode)->io_tree,
4444                                               lock_start, lock_end, GFP_NOFS);
4445                                 extent_locked = 0;
4446                         }
4447                 }
4448
4449                 if (!inode) {
4450                         btrfs_release_path(root, path);
4451
4452                         inode = btrfs_iget_locked(root->fs_info->sb,
4453                                                   key.objectid, root);
4454                         if (inode->i_state & I_NEW) {
4455                                 BTRFS_I(inode)->root = root;
4456                                 BTRFS_I(inode)->location.objectid =
4457                                         key.objectid;
4458                                 BTRFS_I(inode)->location.type =
4459                                         BTRFS_INODE_ITEM_KEY;
4460                                 BTRFS_I(inode)->location.offset = 0;
4461                                 btrfs_read_locked_inode(inode);
4462                                 unlock_new_inode(inode);
4463                         }
4464                         /*
4465                          * some code call btrfs_commit_transaction while
4466                          * holding the i_mutex, so we can't use mutex_lock
4467                          * here.
4468                          */
4469                         if (is_bad_inode(inode) ||
4470                             !mutex_trylock(&inode->i_mutex)) {
4471                                 iput(inode);
4472                                 inode = NULL;
4473                                 key.offset = (u64)-1;
4474                                 goto skip;
4475                         }
4476                 }
4477
4478                 if (!extent_locked) {
4479                         struct btrfs_ordered_extent *ordered;
4480
4481                         btrfs_release_path(root, path);
4482
4483                         lock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4484                                     lock_end, GFP_NOFS);
4485                         ordered = btrfs_lookup_first_ordered_extent(inode,
4486                                                                     lock_end);
4487                         if (ordered &&
4488                             ordered->file_offset <= lock_end &&
4489                             ordered->file_offset + ordered->len > lock_start) {
4490                                 unlock_extent(&BTRFS_I(inode)->io_tree,
4491                                               lock_start, lock_end, GFP_NOFS);
4492                                 btrfs_start_ordered_extent(inode, ordered, 1);
4493                                 btrfs_put_ordered_extent(ordered);
4494                                 key.offset += num_bytes;
4495                                 goto skip;
4496                         }
4497                         if (ordered)
4498                                 btrfs_put_ordered_extent(ordered);
4499
4500                         extent_locked = 1;
4501                         continue;
4502                 }
4503
4504                 if (nr_extents == 1) {
4505                         /* update extent pointer in place */
4506                         btrfs_set_file_extent_disk_bytenr(leaf, fi,
4507                                                 new_extents[0].disk_bytenr);
4508                         btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4509                                                 new_extents[0].disk_num_bytes);
4510                         btrfs_mark_buffer_dirty(leaf);
4511
4512                         btrfs_drop_extent_cache(inode, key.offset,
4513                                                 key.offset + num_bytes - 1, 0);
4514
4515                         ret = btrfs_inc_extent_ref(trans, root,
4516                                                 new_extents[0].disk_bytenr,
4517                                                 new_extents[0].disk_num_bytes,
4518                                                 leaf->start,
4519                                                 root->root_key.objectid,
4520                                                 trans->transid,
4521                                                 key.objectid);
4522                         BUG_ON(ret);
4523
4524                         ret = btrfs_free_extent(trans, root,
4525                                                 extent_key->objectid,
4526                                                 extent_key->offset,
4527                                                 leaf->start,
4528                                                 btrfs_header_owner(leaf),
4529                                                 btrfs_header_generation(leaf),
4530                                                 key.objectid, 0);
4531                         BUG_ON(ret);
4532
4533                         btrfs_release_path(root, path);
4534                         key.offset += num_bytes;
4535                 } else {
4536                         BUG_ON(1);
4537 #if 0
4538                         u64 alloc_hint;
4539                         u64 extent_len;
4540                         int i;
4541                         /*
4542                          * drop old extent pointer at first, then insert the
4543                          * new pointers one bye one
4544                          */
4545                         btrfs_release_path(root, path);
4546                         ret = btrfs_drop_extents(trans, root, inode, key.offset,
4547                                                  key.offset + num_bytes,
4548                                                  key.offset, &alloc_hint);
4549                         BUG_ON(ret);
4550
4551                         for (i = 0; i < nr_extents; i++) {
4552                                 if (ext_offset >= new_extents[i].num_bytes) {
4553                                         ext_offset -= new_extents[i].num_bytes;
4554                                         continue;
4555                                 }
4556                                 extent_len = min(new_extents[i].num_bytes -
4557                                                  ext_offset, num_bytes);
4558
4559                                 ret = btrfs_insert_empty_item(trans, root,
4560                                                               path, &key,
4561                                                               sizeof(*fi));
4562                                 BUG_ON(ret);
4563
4564                                 leaf = path->nodes[0];
4565                                 fi = btrfs_item_ptr(leaf, path->slots[0],
4566                                                 struct btrfs_file_extent_item);
4567                                 btrfs_set_file_extent_generation(leaf, fi,
4568                                                         trans->transid);
4569                                 btrfs_set_file_extent_type(leaf, fi,
4570                                                         BTRFS_FILE_EXTENT_REG);
4571                                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4572                                                 new_extents[i].disk_bytenr);
4573                                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4574                                                 new_extents[i].disk_num_bytes);
4575                                 btrfs_set_file_extent_ram_bytes(leaf, fi,
4576                                                 new_extents[i].ram_bytes);
4577
4578                                 btrfs_set_file_extent_compression(leaf, fi,
4579                                                 new_extents[i].compression);
4580                                 btrfs_set_file_extent_encryption(leaf, fi,
4581                                                 new_extents[i].encryption);
4582                                 btrfs_set_file_extent_other_encoding(leaf, fi,
4583                                                 new_extents[i].other_encoding);
4584
4585                                 btrfs_set_file_extent_num_bytes(leaf, fi,
4586                                                         extent_len);
4587                                 ext_offset += new_extents[i].offset;
4588                                 btrfs_set_file_extent_offset(leaf, fi,
4589                                                         ext_offset);
4590                                 btrfs_mark_buffer_dirty(leaf);
4591
4592                                 btrfs_drop_extent_cache(inode, key.offset,
4593                                                 key.offset + extent_len - 1, 0);
4594
4595                                 ret = btrfs_inc_extent_ref(trans, root,
4596                                                 new_extents[i].disk_bytenr,
4597                                                 new_extents[i].disk_num_bytes,
4598                                                 leaf->start,
4599                                                 root->root_key.objectid,
4600                                                 trans->transid, key.objectid);
4601                                 BUG_ON(ret);
4602                                 btrfs_release_path(root, path);
4603
4604                                 inode_add_bytes(inode, extent_len);
4605
4606                                 ext_offset = 0;
4607                                 num_bytes -= extent_len;
4608                                 key.offset += extent_len;
4609
4610                                 if (num_bytes == 0)
4611                                         break;
4612                         }
4613                         BUG_ON(i >= nr_extents);
4614 #endif
4615                 }
4616
4617                 if (extent_locked) {
4618                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4619                                       lock_end, GFP_NOFS);
4620                         extent_locked = 0;
4621                 }
4622 skip:
4623                 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS &&
4624                     key.offset >= search_end)
4625                         break;
4626
4627                 cond_resched();
4628         }
4629         ret = 0;
4630 out:
4631         btrfs_release_path(root, path);
4632         if (inode) {
4633                 mutex_unlock(&inode->i_mutex);
4634                 if (extent_locked) {
4635                         unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
4636                                       lock_end, GFP_NOFS);
4637                 }
4638                 iput(inode);
4639         }
4640         return ret;
4641 }
4642
4643 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
4644                                struct btrfs_root *root,
4645                                struct extent_buffer *buf, u64 orig_start)
4646 {
4647         int level;
4648         int ret;
4649
4650         BUG_ON(btrfs_header_generation(buf) != trans->transid);
4651         BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
4652
4653         level = btrfs_header_level(buf);
4654         if (level == 0) {
4655                 struct btrfs_leaf_ref *ref;
4656                 struct btrfs_leaf_ref *orig_ref;
4657
4658                 orig_ref = btrfs_lookup_leaf_ref(root, orig_start);
4659                 if (!orig_ref)
4660                         return -ENOENT;
4661
4662                 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems);
4663                 if (!ref) {
4664                         btrfs_free_leaf_ref(root, orig_ref);
4665                         return -ENOMEM;
4666                 }
4667
4668                 ref->nritems = orig_ref->nritems;
4669                 memcpy(ref->extents, orig_ref->extents,
4670                         sizeof(ref->extents[0]) * ref->nritems);
4671
4672                 btrfs_free_leaf_ref(root, orig_ref);
4673
4674                 ref->root_gen = trans->transid;
4675                 ref->bytenr = buf->start;
4676                 ref->owner = btrfs_header_owner(buf);
4677                 ref->generation = btrfs_header_generation(buf);
4678
4679                 ret = btrfs_add_leaf_ref(root, ref, 0);
4680                 WARN_ON(ret);
4681                 btrfs_free_leaf_ref(root, ref);
4682         }
4683         return 0;
4684 }
4685
4686 static noinline int invalidate_extent_cache(struct btrfs_root *root,
4687                                         struct extent_buffer *leaf,
4688                                         struct btrfs_block_group_cache *group,
4689                                         struct btrfs_root *target_root)
4690 {
4691         struct btrfs_key key;
4692         struct inode *inode = NULL;
4693         struct btrfs_file_extent_item *fi;
4694         u64 num_bytes;
4695         u64 skip_objectid = 0;
4696         u32 nritems;
4697         u32 i;
4698
4699         nritems = btrfs_header_nritems(leaf);
4700         for (i = 0; i < nritems; i++) {
4701                 btrfs_item_key_to_cpu(leaf, &key, i);
4702                 if (key.objectid == skip_objectid ||
4703                     key.type != BTRFS_EXTENT_DATA_KEY)
4704                         continue;
4705                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4706                 if (btrfs_file_extent_type(leaf, fi) ==
4707                     BTRFS_FILE_EXTENT_INLINE)
4708                         continue;
4709                 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0)
4710                         continue;
4711                 if (!inode || inode->i_ino != key.objectid) {
4712                         iput(inode);
4713                         inode = btrfs_ilookup(target_root->fs_info->sb,
4714                                               key.objectid, target_root, 1);
4715                 }
4716                 if (!inode) {
4717                         skip_objectid = key.objectid;
4718                         continue;
4719                 }
4720                 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
4721
4722                 lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4723                             key.offset + num_bytes - 1, GFP_NOFS);
4724                 btrfs_drop_extent_cache(inode, key.offset,
4725                                         key.offset + num_bytes - 1, 1);
4726                 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
4727                               key.offset + num_bytes - 1, GFP_NOFS);
4728                 cond_resched();
4729         }
4730         iput(inode);
4731         return 0;
4732 }
4733
4734 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans,
4735                                         struct btrfs_root *root,
4736                                         struct extent_buffer *leaf,
4737                                         struct btrfs_block_group_cache *group,
4738                                         struct inode *reloc_inode)
4739 {
4740         struct btrfs_key key;
4741         struct btrfs_key extent_key;
4742         struct btrfs_file_extent_item *fi;
4743         struct btrfs_leaf_ref *ref;
4744         struct disk_extent *new_extent;
4745         u64 bytenr;
4746         u64 num_bytes;
4747         u32 nritems;
4748         u32 i;
4749         int ext_index;
4750         int nr_extent;
4751         int ret;
4752
4753         new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS);
4754         BUG_ON(!new_extent);
4755
4756         ref = btrfs_lookup_leaf_ref(root, leaf->start);
4757         BUG_ON(!ref);
4758
4759         ext_index = -1;
4760         nritems = btrfs_header_nritems(leaf);
4761         for (i = 0; i < nritems; i++) {
4762                 btrfs_item_key_to_cpu(leaf, &key, i);
4763                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
4764                         continue;
4765                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
4766                 if (btrfs_file_extent_type(leaf, fi) ==
4767                     BTRFS_FILE_EXTENT_INLINE)
4768                         continue;
4769                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
4770                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
4771                 if (bytenr == 0)
4772                         continue;
4773
4774                 ext_index++;
4775                 if (bytenr >= group->key.objectid + group->key.offset ||
4776                     bytenr + num_bytes <= group->key.objectid)
4777                         continue;
4778
4779                 extent_key.objectid = bytenr;
4780                 extent_key.offset = num_bytes;
4781                 extent_key.type = BTRFS_EXTENT_ITEM_KEY;
4782                 nr_extent = 1;
4783                 ret = get_new_locations(reloc_inode, &extent_key,
4784                                         group->key.objectid, 1,
4785                                         &new_extent, &nr_extent);
4786                 if (ret > 0)
4787                         continue;
4788                 BUG_ON(ret < 0);
4789
4790                 BUG_ON(ref->extents[ext_index].bytenr != bytenr);
4791                 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes);
4792                 ref->extents[ext_index].bytenr = new_extent->disk_bytenr;
4793                 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes;
4794
4795                 btrfs_set_file_extent_disk_bytenr(leaf, fi,
4796                                                 new_extent->disk_bytenr);
4797                 btrfs_set_file_extent_disk_num_bytes(leaf, fi,
4798                                                 new_extent->disk_num_bytes);
4799                 btrfs_mark_buffer_dirty(leaf);
4800
4801                 ret = btrfs_inc_extent_ref(trans, root,
4802                                         new_extent->disk_bytenr,
4803                                         new_extent->disk_num_bytes,
4804                                         leaf->start,
4805                                         root->root_key.objectid,
4806                                         trans->transid, key.objectid);
4807                 BUG_ON(ret);
4808
4809                 ret = btrfs_free_extent(trans, root,
4810                                         bytenr, num_bytes, leaf->start,
4811                                         btrfs_header_owner(leaf),
4812                                         btrfs_header_generation(leaf),
4813                                         key.objectid, 0);
4814                 BUG_ON(ret);
4815                 cond_resched();
4816         }
4817         kfree(new_extent);
4818         BUG_ON(ext_index + 1 != ref->nritems);
4819         btrfs_free_leaf_ref(root, ref);
4820         return 0;
4821 }
4822
4823 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans,
4824                           struct btrfs_root *root)
4825 {
4826         struct btrfs_root *reloc_root;
4827         int ret;
4828
4829         if (root->reloc_root) {
4830                 reloc_root = root->reloc_root;
4831                 root->reloc_root = NULL;
4832                 list_add(&reloc_root->dead_list,
4833                          &root->fs_info->dead_reloc_roots);
4834
4835                 btrfs_set_root_bytenr(&reloc_root->root_item,
4836                                       reloc_root->node->start);
4837                 btrfs_set_root_level(&root->root_item,
4838                                      btrfs_header_level(reloc_root->node));
4839                 memset(&reloc_root->root_item.drop_progress, 0,
4840                         sizeof(struct btrfs_disk_key));
4841                 reloc_root->root_item.drop_level = 0;
4842
4843                 ret = btrfs_update_root(trans, root->fs_info->tree_root,
4844                                         &reloc_root->root_key,
4845                                         &reloc_root->root_item);
4846                 BUG_ON(ret);
4847         }
4848         return 0;
4849 }
4850
4851 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root)
4852 {
4853         struct btrfs_trans_handle *trans;
4854         struct btrfs_root *reloc_root;
4855         struct btrfs_root *prev_root = NULL;
4856         struct list_head dead_roots;
4857         int ret;
4858         unsigned long nr;
4859
4860         INIT_LIST_HEAD(&dead_roots);
4861         list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots);
4862
4863         while (!list_empty(&dead_roots)) {
4864                 reloc_root = list_entry(dead_roots.prev,
4865                                         struct btrfs_root, dead_list);
4866                 list_del_init(&reloc_root->dead_list);
4867
4868                 BUG_ON(reloc_root->commit_root != NULL);
4869                 while (1) {
4870                         trans = btrfs_join_transaction(root, 1);
4871                         BUG_ON(!trans);
4872
4873                         mutex_lock(&root->fs_info->drop_mutex);
4874                         ret = btrfs_drop_snapshot(trans, reloc_root);
4875                         if (ret != -EAGAIN)
4876                                 break;
4877                         mutex_unlock(&root->fs_info->drop_mutex);
4878
4879                         nr = trans->blocks_used;
4880                         ret = btrfs_end_transaction(trans, root);
4881                         BUG_ON(ret);
4882                         btrfs_btree_balance_dirty(root, nr);
4883                 }
4884
4885                 free_extent_buffer(reloc_root->node);
4886
4887                 ret = btrfs_del_root(trans, root->fs_info->tree_root,
4888                                      &reloc_root->root_key);
4889                 BUG_ON(ret);
4890                 mutex_unlock(&root->fs_info->drop_mutex);
4891
4892                 nr = trans->blocks_used;
4893                 ret = btrfs_end_transaction(trans, root);
4894                 BUG_ON(ret);
4895                 btrfs_btree_balance_dirty(root, nr);
4896
4897                 kfree(prev_root);
4898                 prev_root = reloc_root;
4899         }
4900         if (prev_root) {
4901                 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0);
4902                 kfree(prev_root);
4903         }
4904         return 0;
4905 }
4906
4907 int btrfs_add_dead_reloc_root(struct btrfs_root *root)
4908 {
4909         list_add(&root->dead_list, &root->fs_info->dead_reloc_roots);
4910         return 0;
4911 }
4912
4913 int btrfs_cleanup_reloc_trees(struct btrfs_root *root)
4914 {
4915         struct btrfs_root *reloc_root;
4916         struct btrfs_trans_handle *trans;
4917         struct btrfs_key location;
4918         int found;
4919         int ret;
4920
4921         mutex_lock(&root->fs_info->tree_reloc_mutex);
4922         ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL);
4923         BUG_ON(ret);
4924         found = !list_empty(&root->fs_info->dead_reloc_roots);
4925         mutex_unlock(&root->fs_info->tree_reloc_mutex);
4926
4927         if (found) {
4928                 trans = btrfs_start_transaction(root, 1);
4929                 BUG_ON(!trans);
4930                 ret = btrfs_commit_transaction(trans, root);
4931                 BUG_ON(ret);
4932         }
4933
4934         location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
4935         location.offset = (u64)-1;
4936         location.type = BTRFS_ROOT_ITEM_KEY;
4937
4938         reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
4939         BUG_ON(!reloc_root);
4940         btrfs_orphan_cleanup(reloc_root);
4941         return 0;
4942 }
4943
4944 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans,
4945                                     struct btrfs_root *root)
4946 {
4947         struct btrfs_root *reloc_root;
4948         struct extent_buffer *eb;
4949         struct btrfs_root_item *root_item;
4950         struct btrfs_key root_key;
4951         int ret;
4952
4953         BUG_ON(!root->ref_cows);
4954         if (root->reloc_root)
4955                 return 0;
4956
4957         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
4958         BUG_ON(!root_item);
4959
4960         ret = btrfs_copy_root(trans, root, root->commit_root,
4961                               &eb, BTRFS_TREE_RELOC_OBJECTID);
4962         BUG_ON(ret);
4963
4964         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
4965         root_key.offset = root->root_key.objectid;
4966         root_key.type = BTRFS_ROOT_ITEM_KEY;
4967
4968         memcpy(root_item, &root->root_item, sizeof(root_item));
4969         btrfs_set_root_refs(root_item, 0);
4970         btrfs_set_root_bytenr(root_item, eb->start);
4971         btrfs_set_root_level(root_item, btrfs_header_level(eb));
4972         btrfs_set_root_generation(root_item, trans->transid);
4973
4974         btrfs_tree_unlock(eb);
4975         free_extent_buffer(eb);
4976
4977         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
4978                                 &root_key, root_item);
4979         BUG_ON(ret);
4980         kfree(root_item);
4981
4982         reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
4983                                                  &root_key);
4984         BUG_ON(!reloc_root);
4985         reloc_root->last_trans = trans->transid;
4986         reloc_root->commit_root = NULL;
4987         reloc_root->ref_tree = &root->fs_info->reloc_ref_tree;
4988
4989         root->reloc_root = reloc_root;
4990         return 0;
4991 }
4992
4993 /*
4994  * Core function of space balance.
4995  *
4996  * The idea is using reloc trees to relocate tree blocks in reference
4997  * counted roots. There is one reloc tree for each subvol, and all
4998  * reloc trees share same root key objectid. Reloc trees are snapshots
4999  * of the latest committed roots of subvols (root->commit_root).
5000  *
5001  * To relocate a tree block referenced by a subvol, there are two steps.
5002  * COW the block through subvol's reloc tree, then update block pointer
5003  * in the subvol to point to the new block. Since all reloc trees share
5004  * same root key objectid, doing special handing for tree blocks owned
5005  * by them is easy. Once a tree block has been COWed in one reloc tree,
5006  * we can use the resulting new block directly when the same block is
5007  * required to COW again through other reloc trees. By this way, relocated
5008  * tree blocks are shared between reloc trees, so they are also shared
5009  * between subvols.
5010  */
5011 static noinline int relocate_one_path(struct btrfs_trans_handle *trans,
5012                                       struct btrfs_root *root,
5013                                       struct btrfs_path *path,
5014                                       struct btrfs_key *first_key,
5015                                       struct btrfs_ref_path *ref_path,
5016                                       struct btrfs_block_group_cache *group,
5017                                       struct inode *reloc_inode)
5018 {
5019         struct btrfs_root *reloc_root;
5020         struct extent_buffer *eb = NULL;
5021         struct btrfs_key *keys;
5022         u64 *nodes;
5023         int level;
5024         int shared_level;
5025         int lowest_level = 0;
5026         int ret;
5027
5028         if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
5029                 lowest_level = ref_path->owner_objectid;
5030
5031         if (!root->ref_cows) {
5032                 path->lowest_level = lowest_level;
5033                 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1);
5034                 BUG_ON(ret < 0);
5035                 path->lowest_level = 0;
5036                 btrfs_release_path(root, path);
5037                 return 0;
5038         }
5039
5040         mutex_lock(&root->fs_info->tree_reloc_mutex);
5041         ret = init_reloc_tree(trans, root);
5042         BUG_ON(ret);
5043         reloc_root = root->reloc_root;
5044
5045         shared_level = ref_path->shared_level;
5046         ref_path->shared_level = BTRFS_MAX_LEVEL - 1;
5047
5048         keys = ref_path->node_keys;
5049         nodes = ref_path->new_nodes;
5050         memset(&keys[shared_level + 1], 0,
5051                sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1));
5052         memset(&nodes[shared_level + 1], 0,
5053                sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1));
5054
5055         if (nodes[lowest_level] == 0) {
5056                 path->lowest_level = lowest_level;
5057                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
5058                                         0, 1);
5059                 BUG_ON(ret);
5060                 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) {
5061                         eb = path->nodes[level];
5062                         if (!eb || eb == reloc_root->node)
5063                                 break;
5064                         nodes[level] = eb->start;
5065                         if (level == 0)
5066                                 btrfs_item_key_to_cpu(eb, &keys[level], 0);
5067                         else
5068                                 btrfs_node_key_to_cpu(eb, &keys[level], 0);
5069                 }
5070                 if (nodes[0] &&
5071                     ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5072                         eb = path->nodes[0];
5073                         ret = replace_extents_in_leaf(trans, reloc_root, eb,
5074                                                       group, reloc_inode);
5075                         BUG_ON(ret);
5076                 }
5077                 btrfs_release_path(reloc_root, path);
5078         } else {
5079                 ret = btrfs_merge_path(trans, reloc_root, keys, nodes,
5080                                        lowest_level);
5081                 BUG_ON(ret);
5082         }
5083
5084         /*
5085          * replace tree blocks in the fs tree with tree blocks in
5086          * the reloc tree.
5087          */
5088         ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level);
5089         BUG_ON(ret < 0);
5090
5091         if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5092                 ret = btrfs_search_slot(trans, reloc_root, first_key, path,
5093                                         0, 0);
5094                 BUG_ON(ret);
5095                 extent_buffer_get(path->nodes[0]);
5096                 eb = path->nodes[0];
5097                 btrfs_release_path(reloc_root, path);
5098                 ret = invalidate_extent_cache(reloc_root, eb, group, root);
5099                 BUG_ON(ret);
5100                 free_extent_buffer(eb);
5101         }
5102
5103         mutex_unlock(&root->fs_info->tree_reloc_mutex);
5104         path->lowest_level = 0;
5105         return 0;
5106 }
5107
5108 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans,
5109                                         struct btrfs_root *root,
5110                                         struct btrfs_path *path,
5111                                         struct btrfs_key *first_key,
5112                                         struct btrfs_ref_path *ref_path)
5113 {
5114         int ret;
5115
5116         ret = relocate_one_path(trans, root, path, first_key,
5117                                 ref_path, NULL, NULL);
5118         BUG_ON(ret);
5119
5120         return 0;
5121 }
5122
5123 static noinline int del_extent_zero(struct btrfs_trans_handle *trans,
5124                                     struct btrfs_root *extent_root,
5125                                     struct btrfs_path *path,
5126                                     struct btrfs_key *extent_key)
5127 {
5128         int ret;
5129
5130         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
5131         if (ret)
5132                 goto out;
5133         ret = btrfs_del_item(trans, extent_root, path);
5134 out:
5135         btrfs_release_path(extent_root, path);
5136         return ret;
5137 }
5138
5139 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info,
5140                                                 struct btrfs_ref_path *ref_path)
5141 {
5142         struct btrfs_key root_key;
5143
5144         root_key.objectid = ref_path->root_objectid;
5145         root_key.type = BTRFS_ROOT_ITEM_KEY;
5146         if (is_cowonly_root(ref_path->root_objectid))
5147                 root_key.offset = 0;
5148         else
5149                 root_key.offset = (u64)-1;
5150
5151         return btrfs_read_fs_root_no_name(fs_info, &root_key);
5152 }
5153
5154 static noinline int relocate_one_extent(struct btrfs_root *extent_root,
5155                                         struct btrfs_path *path,
5156                                         struct btrfs_key *extent_key,
5157                                         struct btrfs_block_group_cache *group,
5158                                         struct inode *reloc_inode, int pass)
5159 {
5160         struct btrfs_trans_handle *trans;
5161         struct btrfs_root *found_root;
5162         struct btrfs_ref_path *ref_path = NULL;
5163         struct disk_extent *new_extents = NULL;
5164         int nr_extents = 0;
5165         int loops;
5166         int ret;
5167         int level;
5168         struct btrfs_key first_key;
5169         u64 prev_block = 0;
5170
5171
5172         trans = btrfs_start_transaction(extent_root, 1);
5173         BUG_ON(!trans);
5174
5175         if (extent_key->objectid == 0) {
5176                 ret = del_extent_zero(trans, extent_root, path, extent_key);
5177                 goto out;
5178         }
5179
5180         ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS);
5181         if (!ref_path) {
5182                 ret = -ENOMEM;
5183                 goto out;
5184         }
5185
5186         for (loops = 0; ; loops++) {
5187                 if (loops == 0) {
5188                         ret = btrfs_first_ref_path(trans, extent_root, ref_path,
5189                                                    extent_key->objectid);
5190                 } else {
5191                         ret = btrfs_next_ref_path(trans, extent_root, ref_path);
5192                 }
5193                 if (ret < 0)
5194                         goto out;
5195                 if (ret > 0)
5196                         break;
5197
5198                 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID ||
5199                     ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID)
5200                         continue;
5201
5202                 found_root = read_ref_root(extent_root->fs_info, ref_path);
5203                 BUG_ON(!found_root);
5204                 /*
5205                  * for reference counted tree, only process reference paths
5206                  * rooted at the latest committed root.
5207                  */
5208                 if (found_root->ref_cows &&
5209                     ref_path->root_generation != found_root->root_key.offset)
5210                         continue;
5211
5212                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5213                         if (pass == 0) {
5214                                 /*
5215                                  * copy data extents to new locations
5216                                  */
5217                                 u64 group_start = group->key.objectid;
5218                                 ret = relocate_data_extent(reloc_inode,
5219                                                            extent_key,
5220                                                            group_start);
5221                                 if (ret < 0)
5222                                         goto out;
5223                                 break;
5224                         }
5225                         level = 0;
5226                 } else {
5227                         level = ref_path->owner_objectid;
5228                 }
5229
5230                 if (prev_block != ref_path->nodes[level]) {
5231                         struct extent_buffer *eb;
5232                         u64 block_start = ref_path->nodes[level];
5233                         u64 block_size = btrfs_level_size(found_root, level);
5234
5235                         eb = read_tree_block(found_root, block_start,
5236                                              block_size, 0);
5237                         btrfs_tree_lock(eb);
5238                         BUG_ON(level != btrfs_header_level(eb));
5239
5240                         if (level == 0)
5241                                 btrfs_item_key_to_cpu(eb, &first_key, 0);
5242                         else
5243                                 btrfs_node_key_to_cpu(eb, &first_key, 0);
5244
5245                         btrfs_tree_unlock(eb);
5246                         free_extent_buffer(eb);
5247                         prev_block = block_start;
5248                 }
5249
5250                 mutex_lock(&extent_root->fs_info->trans_mutex);
5251                 btrfs_record_root_in_trans(found_root);
5252                 mutex_unlock(&extent_root->fs_info->trans_mutex);
5253                 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
5254                         /*
5255                          * try to update data extent references while
5256                          * keeping metadata shared between snapshots.
5257                          */
5258                         if (pass == 1) {
5259                                 ret = relocate_one_path(trans, found_root,
5260                                                 path, &first_key, ref_path,
5261                                                 group, reloc_inode);
5262                                 if (ret < 0)
5263                                         goto out;
5264                                 continue;
5265                         }
5266                         /*
5267                          * use fallback method to process the remaining
5268                          * references.
5269                          */
5270                         if (!new_extents) {
5271                                 u64 group_start = group->key.objectid;
5272                                 new_extents = kmalloc(sizeof(*new_extents),
5273                                                       GFP_NOFS);
5274                                 nr_extents = 1;
5275                                 ret = get_new_locations(reloc_inode,
5276                                                         extent_key,
5277                                                         group_start, 1,
5278                                                         &new_extents,
5279                                                         &nr_extents);
5280                                 if (ret)
5281                                         goto out;
5282                         }
5283                         ret = replace_one_extent(trans, found_root,
5284                                                 path, extent_key,
5285                                                 &first_key, ref_path,
5286                                                 new_extents, nr_extents);
5287                 } else {
5288                         ret = relocate_tree_block(trans, found_root, path,
5289                                                   &first_key, ref_path);
5290                 }
5291                 if (ret < 0)
5292                         goto out;
5293         }
5294         ret = 0;
5295 out:
5296         btrfs_end_transaction(trans, extent_root);
5297         kfree(new_extents);
5298         kfree(ref_path);
5299         return ret;
5300 }
5301
5302 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
5303 {
5304         u64 num_devices;
5305         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
5306                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
5307
5308         num_devices = root->fs_info->fs_devices->rw_devices;
5309         if (num_devices == 1) {
5310                 stripped |= BTRFS_BLOCK_GROUP_DUP;
5311                 stripped = flags & ~stripped;
5312
5313                 /* turn raid0 into single device chunks */
5314                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
5315                         return stripped;
5316
5317                 /* turn mirroring into duplication */
5318                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
5319                              BTRFS_BLOCK_GROUP_RAID10))
5320                         return stripped | BTRFS_BLOCK_GROUP_DUP;
5321                 return flags;
5322         } else {
5323                 /* they already had raid on here, just return */
5324                 if (flags & stripped)
5325                         return flags;
5326
5327                 stripped |= BTRFS_BLOCK_GROUP_DUP;
5328                 stripped = flags & ~stripped;
5329
5330                 /* switch duplicated blocks with raid1 */
5331                 if (flags & BTRFS_BLOCK_GROUP_DUP)
5332                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
5333
5334                 /* turn single device chunks into raid0 */
5335                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
5336         }
5337         return flags;
5338 }
5339
5340 static int __alloc_chunk_for_shrink(struct btrfs_root *root,
5341                      struct btrfs_block_group_cache *shrink_block_group,
5342                      int force)
5343 {
5344         struct btrfs_trans_handle *trans;
5345         u64 new_alloc_flags;
5346         u64 calc;
5347
5348         spin_lock(&shrink_block_group->lock);
5349         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
5350                 spin_unlock(&shrink_block_group->lock);
5351
5352                 trans = btrfs_start_transaction(root, 1);
5353                 spin_lock(&shrink_block_group->lock);
5354
5355                 new_alloc_flags = update_block_group_flags(root,
5356                                                    shrink_block_group->flags);
5357                 if (new_alloc_flags != shrink_block_group->flags) {
5358                         calc =
5359                              btrfs_block_group_used(&shrink_block_group->item);
5360                 } else {
5361                         calc = shrink_block_group->key.offset;
5362                 }
5363                 spin_unlock(&shrink_block_group->lock);
5364
5365                 do_chunk_alloc(trans, root->fs_info->extent_root,
5366                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
5367
5368                 btrfs_end_transaction(trans, root);
5369         } else
5370                 spin_unlock(&shrink_block_group->lock);
5371         return 0;
5372 }
5373
5374 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
5375                                  struct btrfs_root *root,
5376                                  u64 objectid, u64 size)
5377 {
5378         struct btrfs_path *path;
5379         struct btrfs_inode_item *item;
5380         struct extent_buffer *leaf;
5381         int ret;
5382
5383         path = btrfs_alloc_path();
5384         if (!path)
5385                 return -ENOMEM;
5386
5387         path->leave_spinning = 1;
5388         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
5389         if (ret)
5390                 goto out;
5391
5392         leaf = path->nodes[0];
5393         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
5394         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
5395         btrfs_set_inode_generation(leaf, item, 1);
5396         btrfs_set_inode_size(leaf, item, size);
5397         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
5398         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS);
5399         btrfs_mark_buffer_dirty(leaf);
5400         btrfs_release_path(root, path);
5401 out:
5402         btrfs_free_path(path);
5403         return ret;
5404 }
5405
5406 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
5407                                         struct btrfs_block_group_cache *group)
5408 {
5409         struct inode *inode = NULL;
5410         struct btrfs_trans_handle *trans;
5411         struct btrfs_root *root;
5412         struct btrfs_key root_key;
5413         u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
5414         int err = 0;
5415
5416         root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID;
5417         root_key.type = BTRFS_ROOT_ITEM_KEY;
5418         root_key.offset = (u64)-1;
5419         root = btrfs_read_fs_root_no_name(fs_info, &root_key);
5420         if (IS_ERR(root))
5421                 return ERR_CAST(root);
5422
5423         trans = btrfs_start_transaction(root, 1);
5424         BUG_ON(!trans);
5425
5426         err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
5427         if (err)
5428                 goto out;
5429
5430         err = __insert_orphan_inode(trans, root, objectid, group->key.offset);
5431         BUG_ON(err);
5432
5433         err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0,
5434                                        group->key.offset, 0, group->key.offset,
5435                                        0, 0, 0);
5436         BUG_ON(err);
5437
5438         inode = btrfs_iget_locked(root->fs_info->sb, objectid, root);
5439         if (inode->i_state & I_NEW) {
5440                 BTRFS_I(inode)->root = root;
5441                 BTRFS_I(inode)->location.objectid = objectid;
5442                 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
5443                 BTRFS_I(inode)->location.offset = 0;
5444                 btrfs_read_locked_inode(inode);
5445                 unlock_new_inode(inode);
5446                 BUG_ON(is_bad_inode(inode));
5447         } else {
5448                 BUG_ON(1);
5449         }
5450         BTRFS_I(inode)->index_cnt = group->key.objectid;
5451
5452         err = btrfs_orphan_add(trans, inode);
5453 out:
5454         btrfs_end_transaction(trans, root);
5455         if (err) {
5456                 if (inode)
5457                         iput(inode);
5458                 inode = ERR_PTR(err);
5459         }
5460         return inode;
5461 }
5462
5463 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
5464 {
5465
5466         struct btrfs_ordered_sum *sums;
5467         struct btrfs_sector_sum *sector_sum;
5468         struct btrfs_ordered_extent *ordered;
5469         struct btrfs_root *root = BTRFS_I(inode)->root;
5470         struct list_head list;
5471         size_t offset;
5472         int ret;
5473         u64 disk_bytenr;
5474
5475         INIT_LIST_HEAD(&list);
5476
5477         ordered = btrfs_lookup_ordered_extent(inode, file_pos);
5478         BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
5479
5480         disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
5481         ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
5482                                        disk_bytenr + len - 1, &list);
5483
5484         while (!list_empty(&list)) {
5485                 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
5486                 list_del_init(&sums->list);
5487
5488                 sector_sum = sums->sums;
5489                 sums->bytenr = ordered->start;
5490
5491                 offset = 0;
5492                 while (offset < sums->len) {
5493                         sector_sum->bytenr += ordered->start - disk_bytenr;
5494                         sector_sum++;
5495                         offset += root->sectorsize;
5496                 }
5497
5498                 btrfs_add_ordered_sum(inode, ordered, sums);
5499         }
5500         btrfs_put_ordered_extent(ordered);
5501         return 0;
5502 }
5503
5504 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
5505 {
5506         struct btrfs_trans_handle *trans;
5507         struct btrfs_path *path;
5508         struct btrfs_fs_info *info = root->fs_info;
5509         struct extent_buffer *leaf;
5510         struct inode *reloc_inode;
5511         struct btrfs_block_group_cache *block_group;
5512         struct btrfs_key key;
5513         u64 skipped;
5514         u64 cur_byte;
5515         u64 total_found;
5516         u32 nritems;
5517         int ret;
5518         int progress;
5519         int pass = 0;
5520
5521         root = root->fs_info->extent_root;
5522
5523         block_group = btrfs_lookup_block_group(info, group_start);
5524         BUG_ON(!block_group);
5525
5526         printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n",
5527                (unsigned long long)block_group->key.objectid,
5528                (unsigned long long)block_group->flags);
5529
5530         path = btrfs_alloc_path();
5531         BUG_ON(!path);
5532
5533         reloc_inode = create_reloc_inode(info, block_group);
5534         BUG_ON(IS_ERR(reloc_inode));
5535
5536         __alloc_chunk_for_shrink(root, block_group, 1);
5537         set_block_group_readonly(block_group);
5538
5539         btrfs_start_delalloc_inodes(info->tree_root);
5540         btrfs_wait_ordered_extents(info->tree_root, 0);
5541 again:
5542         skipped = 0;
5543         total_found = 0;
5544         progress = 0;
5545         key.objectid = block_group->key.objectid;
5546         key.offset = 0;
5547         key.type = 0;
5548         cur_byte = key.objectid;
5549
5550         trans = btrfs_start_transaction(info->tree_root, 1);
5551         btrfs_commit_transaction(trans, info->tree_root);
5552
5553         mutex_lock(&root->fs_info->cleaner_mutex);
5554         btrfs_clean_old_snapshots(info->tree_root);
5555         btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1);
5556         mutex_unlock(&root->fs_info->cleaner_mutex);
5557
5558         trans = btrfs_start_transaction(info->tree_root, 1);
5559         btrfs_commit_transaction(trans, info->tree_root);
5560
5561         while (1) {
5562                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
5563                 if (ret < 0)
5564                         goto out;
5565 next:
5566                 leaf = path->nodes[0];
5567                 nritems = btrfs_header_nritems(leaf);
5568                 if (path->slots[0] >= nritems) {
5569                         ret = btrfs_next_leaf(root, path);
5570                         if (ret < 0)
5571                                 goto out;
5572                         if (ret == 1) {
5573                                 ret = 0;
5574                                 break;
5575                         }
5576                         leaf = path->nodes[0];
5577                         nritems = btrfs_header_nritems(leaf);
5578                 }
5579
5580                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
5581
5582                 if (key.objectid >= block_group->key.objectid +
5583                     block_group->key.offset)
5584                         break;
5585
5586                 if (progress && need_resched()) {
5587                         btrfs_release_path(root, path);
5588                         cond_resched();
5589                         progress = 0;
5590                         continue;
5591                 }
5592                 progress = 1;
5593
5594                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY ||
5595                     key.objectid + key.offset <= cur_byte) {
5596                         path->slots[0]++;
5597                         goto next;
5598                 }
5599
5600                 total_found++;
5601                 cur_byte = key.objectid + key.offset;
5602                 btrfs_release_path(root, path);
5603
5604                 __alloc_chunk_for_shrink(root, block_group, 0);
5605                 ret = relocate_one_extent(root, path, &key, block_group,
5606                                           reloc_inode, pass);
5607                 BUG_ON(ret < 0);
5608                 if (ret > 0)
5609                         skipped++;
5610
5611                 key.objectid = cur_byte;
5612                 key.type = 0;
5613                 key.offset = 0;
5614         }
5615
5616         btrfs_release_path(root, path);
5617
5618         if (pass == 0) {
5619                 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1);
5620                 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1);
5621         }
5622
5623         if (total_found > 0) {
5624                 printk(KERN_INFO "btrfs found %llu extents in pass %d\n",
5625                        (unsigned long long)total_found, pass);
5626                 pass++;
5627                 if (total_found == skipped && pass > 2) {
5628                         iput(reloc_inode);
5629                         reloc_inode = create_reloc_inode(info, block_group);
5630                         pass = 0;
5631                 }
5632                 goto again;
5633         }
5634
5635         /* delete reloc_inode */
5636         iput(reloc_inode);
5637
5638         /* unpin extents in this range */
5639         trans = btrfs_start_transaction(info->tree_root, 1);
5640         btrfs_commit_transaction(trans, info->tree_root);
5641
5642         spin_lock(&block_group->lock);
5643         WARN_ON(block_group->pinned > 0);
5644         WARN_ON(block_group->reserved > 0);
5645         WARN_ON(btrfs_block_group_used(&block_group->item) > 0);
5646         spin_unlock(&block_group->lock);
5647         put_block_group(block_group);
5648         ret = 0;
5649 out:
5650         btrfs_free_path(path);
5651         return ret;
5652 }
5653
5654 static int find_first_block_group(struct btrfs_root *root,
5655                 struct btrfs_path *path, struct btrfs_key *key)
5656 {
5657         int ret = 0;
5658         struct btrfs_key found_key;
5659         struct extent_buffer *leaf;
5660         int slot;
5661
5662         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
5663         if (ret < 0)
5664                 goto out;
5665
5666         while (1) {
5667                 slot = path->slots[0];
5668                 leaf = path->nodes[0];
5669                 if (slot >= btrfs_header_nritems(leaf)) {
5670                         ret = btrfs_next_leaf(root, path);
5671                         if (ret == 0)
5672                                 continue;
5673                         if (ret < 0)
5674                                 goto out;
5675                         break;
5676                 }
5677                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
5678
5679                 if (found_key.objectid >= key->objectid &&
5680                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
5681                         ret = 0;
5682                         goto out;
5683                 }
5684                 path->slots[0]++;
5685         }
5686         ret = -ENOENT;
5687 out:
5688         return ret;
5689 }
5690
5691 int btrfs_free_block_groups(struct btrfs_fs_info *info)
5692 {
5693         struct btrfs_block_group_cache *block_group;
5694         struct btrfs_space_info *space_info;
5695         struct rb_node *n;
5696
5697         spin_lock(&info->block_group_cache_lock);
5698         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
5699                 block_group = rb_entry(n, struct btrfs_block_group_cache,
5700                                        cache_node);
5701                 rb_erase(&block_group->cache_node,
5702                          &info->block_group_cache_tree);
5703                 spin_unlock(&info->block_group_cache_lock);
5704
5705                 btrfs_remove_free_space_cache(block_group);
5706                 down_write(&block_group->space_info->groups_sem);
5707                 list_del(&block_group->list);
5708                 up_write(&block_group->space_info->groups_sem);
5709
5710                 WARN_ON(atomic_read(&block_group->count) != 1);
5711                 kfree(block_group);
5712
5713                 spin_lock(&info->block_group_cache_lock);
5714         }
5715         spin_unlock(&info->block_group_cache_lock);
5716
5717         /* now that all the block groups are freed, go through and
5718          * free all the space_info structs.  This is only called during
5719          * the final stages of unmount, and so we know nobody is
5720          * using them.  We call synchronize_rcu() once before we start,
5721          * just to be on the safe side.
5722          */
5723         synchronize_rcu();
5724
5725         while(!list_empty(&info->space_info)) {
5726                 space_info = list_entry(info->space_info.next,
5727                                         struct btrfs_space_info,
5728                                         list);
5729
5730                 list_del(&space_info->list);
5731                 kfree(space_info);
5732         }
5733         return 0;
5734 }
5735
5736 int btrfs_read_block_groups(struct btrfs_root *root)
5737 {
5738         struct btrfs_path *path;
5739         int ret;
5740         struct btrfs_block_group_cache *cache;
5741         struct btrfs_fs_info *info = root->fs_info;
5742         struct btrfs_space_info *space_info;
5743         struct btrfs_key key;
5744         struct btrfs_key found_key;
5745         struct extent_buffer *leaf;
5746
5747         root = info->extent_root;
5748         key.objectid = 0;
5749         key.offset = 0;
5750         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
5751         path = btrfs_alloc_path();
5752         if (!path)
5753                 return -ENOMEM;
5754
5755         while (1) {
5756                 ret = find_first_block_group(root, path, &key);
5757                 if (ret > 0) {
5758                         ret = 0;
5759                         goto error;
5760                 }
5761                 if (ret != 0)
5762                         goto error;
5763
5764                 leaf = path->nodes[0];
5765                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
5766                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
5767                 if (!cache) {
5768                         ret = -ENOMEM;
5769                         break;
5770                 }
5771
5772                 atomic_set(&cache->count, 1);
5773                 spin_lock_init(&cache->lock);
5774                 spin_lock_init(&cache->tree_lock);
5775                 mutex_init(&cache->cache_mutex);
5776                 INIT_LIST_HEAD(&cache->list);
5777                 read_extent_buffer(leaf, &cache->item,
5778                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
5779                                    sizeof(cache->item));
5780                 memcpy(&cache->key, &found_key, sizeof(found_key));
5781
5782                 key.objectid = found_key.objectid + found_key.offset;
5783                 btrfs_release_path(root, path);
5784                 cache->flags = btrfs_block_group_flags(&cache->item);
5785
5786                 ret = update_space_info(info, cache->flags, found_key.offset,
5787                                         btrfs_block_group_used(&cache->item),
5788                                         &space_info);
5789                 BUG_ON(ret);
5790                 cache->space_info = space_info;
5791                 down_write(&space_info->groups_sem);
5792                 list_add_tail(&cache->list, &space_info->block_groups);
5793                 up_write(&space_info->groups_sem);
5794
5795                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
5796                 BUG_ON(ret);
5797
5798                 set_avail_alloc_bits(root->fs_info, cache->flags);
5799                 if (btrfs_chunk_readonly(root, cache->key.objectid))
5800                         set_block_group_readonly(cache);
5801         }
5802         ret = 0;
5803 error:
5804         btrfs_free_path(path);
5805         return ret;
5806 }
5807
5808 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
5809                            struct btrfs_root *root, u64 bytes_used,
5810                            u64 type, u64 chunk_objectid, u64 chunk_offset,
5811                            u64 size)
5812 {
5813         int ret;
5814         struct btrfs_root *extent_root;
5815         struct btrfs_block_group_cache *cache;
5816
5817         extent_root = root->fs_info->extent_root;
5818
5819         root->fs_info->last_trans_log_full_commit = trans->transid;
5820
5821         cache = kzalloc(sizeof(*cache), GFP_NOFS);
5822         if (!cache)
5823                 return -ENOMEM;
5824
5825         cache->key.objectid = chunk_offset;
5826         cache->key.offset = size;
5827         cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
5828         atomic_set(&cache->count, 1);
5829         spin_lock_init(&cache->lock);
5830         spin_lock_init(&cache->tree_lock);
5831         mutex_init(&cache->cache_mutex);
5832         INIT_LIST_HEAD(&cache->list);
5833
5834         btrfs_set_block_group_used(&cache->item, bytes_used);
5835         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
5836         cache->flags = type;
5837         btrfs_set_block_group_flags(&cache->item, type);
5838
5839         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
5840                                 &cache->space_info);
5841         BUG_ON(ret);
5842         down_write(&cache->space_info->groups_sem);
5843         list_add_tail(&cache->list, &cache->space_info->block_groups);
5844         up_write(&cache->space_info->groups_sem);
5845
5846         ret = btrfs_add_block_group_cache(root->fs_info, cache);
5847         BUG_ON(ret);
5848
5849         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
5850                                 sizeof(cache->item));
5851         BUG_ON(ret);
5852
5853         set_avail_alloc_bits(extent_root->fs_info, type);
5854
5855         return 0;
5856 }
5857
5858 int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
5859                              struct btrfs_root *root, u64 group_start)
5860 {
5861         struct btrfs_path *path;
5862         struct btrfs_block_group_cache *block_group;
5863         struct btrfs_key key;
5864         int ret;
5865
5866         root = root->fs_info->extent_root;
5867
5868         block_group = btrfs_lookup_block_group(root->fs_info, group_start);
5869         BUG_ON(!block_group);
5870         BUG_ON(!block_group->ro);
5871
5872         memcpy(&key, &block_group->key, sizeof(key));
5873
5874         path = btrfs_alloc_path();
5875         BUG_ON(!path);
5876
5877         spin_lock(&root->fs_info->block_group_cache_lock);
5878         rb_erase(&block_group->cache_node,
5879                  &root->fs_info->block_group_cache_tree);
5880         spin_unlock(&root->fs_info->block_group_cache_lock);
5881         btrfs_remove_free_space_cache(block_group);
5882         down_write(&block_group->space_info->groups_sem);
5883         list_del(&block_group->list);
5884         up_write(&block_group->space_info->groups_sem);
5885
5886         spin_lock(&block_group->space_info->lock);
5887         block_group->space_info->total_bytes -= block_group->key.offset;
5888         block_group->space_info->bytes_readonly -= block_group->key.offset;
5889         spin_unlock(&block_group->space_info->lock);
5890         block_group->space_info->full = 0;
5891
5892         put_block_group(block_group);
5893         put_block_group(block_group);
5894
5895         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
5896         if (ret > 0)
5897                 ret = -EIO;
5898         if (ret < 0)
5899                 goto out;
5900
5901         ret = btrfs_del_item(trans, root, path);
5902 out:
5903         btrfs_free_path(path);
5904         return ret;
5905 }