39adb68a653f7a6fe982a7ff5401097c1ffe8b20
[pandora-kernel.git] / fs / btrfs / relocation.c
1 /*
2  * Copyright (C) 2009 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include "ctree.h"
26 #include "disk-io.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "btrfs_inode.h"
31 #include "async-thread.h"
32
33 /*
34  * backref_node, mapping_node and tree_block start with this
35  */
36 struct tree_entry {
37         struct rb_node rb_node;
38         u64 bytenr;
39 };
40
41 /*
42  * present a tree block in the backref cache
43  */
44 struct backref_node {
45         struct rb_node rb_node;
46         u64 bytenr;
47
48         u64 new_bytenr;
49         /* objectid of tree block owner, can be not uptodate */
50         u64 owner;
51         /* link to pending, changed or detached list */
52         struct list_head list;
53         /* list of upper level blocks reference this block */
54         struct list_head upper;
55         /* list of child blocks in the cache */
56         struct list_head lower;
57         /* NULL if this node is not tree root */
58         struct btrfs_root *root;
59         /* extent buffer got by COW the block */
60         struct extent_buffer *eb;
61         /* level of tree block */
62         unsigned int level:8;
63         /* is the block in non-reference counted tree */
64         unsigned int cowonly:1;
65         /* 1 if no child node in the cache */
66         unsigned int lowest:1;
67         /* is the extent buffer locked */
68         unsigned int locked:1;
69         /* has the block been processed */
70         unsigned int processed:1;
71         /* have backrefs of this block been checked */
72         unsigned int checked:1;
73         /*
74          * 1 if corresponding block has been cowed but some upper
75          * level block pointers may not point to the new location
76          */
77         unsigned int pending:1;
78         /*
79          * 1 if the backref node isn't connected to any other
80          * backref node.
81          */
82         unsigned int detached:1;
83 };
84
85 /*
86  * present a block pointer in the backref cache
87  */
88 struct backref_edge {
89         struct list_head list[2];
90         struct backref_node *node[2];
91 };
92
93 #define LOWER   0
94 #define UPPER   1
95
96 struct backref_cache {
97         /* red black tree of all backref nodes in the cache */
98         struct rb_root rb_root;
99         /* for passing backref nodes to btrfs_reloc_cow_block */
100         struct backref_node *path[BTRFS_MAX_LEVEL];
101         /*
102          * list of blocks that have been cowed but some block
103          * pointers in upper level blocks may not reflect the
104          * new location
105          */
106         struct list_head pending[BTRFS_MAX_LEVEL];
107         /* list of backref nodes with no child node */
108         struct list_head leaves;
109         /* list of blocks that have been cowed in current transaction */
110         struct list_head changed;
111         /* list of detached backref node. */
112         struct list_head detached;
113
114         u64 last_trans;
115
116         int nr_nodes;
117         int nr_edges;
118 };
119
120 /*
121  * map address of tree root to tree
122  */
123 struct mapping_node {
124         struct rb_node rb_node;
125         u64 bytenr;
126         void *data;
127 };
128
129 struct mapping_tree {
130         struct rb_root rb_root;
131         spinlock_t lock;
132 };
133
134 /*
135  * present a tree block to process
136  */
137 struct tree_block {
138         struct rb_node rb_node;
139         u64 bytenr;
140         struct btrfs_key key;
141         unsigned int level:8;
142         unsigned int key_ready:1;
143 };
144
145 #define MAX_EXTENTS 128
146
147 struct file_extent_cluster {
148         u64 start;
149         u64 end;
150         u64 boundary[MAX_EXTENTS];
151         unsigned int nr;
152 };
153
154 struct reloc_control {
155         /* block group to relocate */
156         struct btrfs_block_group_cache *block_group;
157         /* extent tree */
158         struct btrfs_root *extent_root;
159         /* inode for moving data */
160         struct inode *data_inode;
161
162         struct btrfs_block_rsv *block_rsv;
163
164         struct backref_cache backref_cache;
165
166         struct file_extent_cluster cluster;
167         /* tree blocks have been processed */
168         struct extent_io_tree processed_blocks;
169         /* map start of tree root to corresponding reloc tree */
170         struct mapping_tree reloc_root_tree;
171         /* list of reloc trees */
172         struct list_head reloc_roots;
173         /* size of metadata reservation for merging reloc trees */
174         u64 merging_rsv_size;
175         /* size of relocated tree nodes */
176         u64 nodes_relocated;
177
178         u64 search_start;
179         u64 extents_found;
180
181         unsigned int stage:8;
182         unsigned int create_reloc_tree:1;
183         unsigned int merge_reloc_tree:1;
184         unsigned int found_file_extent:1;
185         unsigned int commit_transaction:1;
186 };
187
188 /* stages of data relocation */
189 #define MOVE_DATA_EXTENTS       0
190 #define UPDATE_DATA_PTRS        1
191
192 static void remove_backref_node(struct backref_cache *cache,
193                                 struct backref_node *node);
194 static void __mark_block_processed(struct reloc_control *rc,
195                                    struct backref_node *node);
196
197 static void mapping_tree_init(struct mapping_tree *tree)
198 {
199         tree->rb_root = RB_ROOT;
200         spin_lock_init(&tree->lock);
201 }
202
203 static void backref_cache_init(struct backref_cache *cache)
204 {
205         int i;
206         cache->rb_root = RB_ROOT;
207         for (i = 0; i < BTRFS_MAX_LEVEL; i++)
208                 INIT_LIST_HEAD(&cache->pending[i]);
209         INIT_LIST_HEAD(&cache->changed);
210         INIT_LIST_HEAD(&cache->detached);
211         INIT_LIST_HEAD(&cache->leaves);
212 }
213
214 static void backref_cache_cleanup(struct backref_cache *cache)
215 {
216         struct backref_node *node;
217         int i;
218
219         while (!list_empty(&cache->detached)) {
220                 node = list_entry(cache->detached.next,
221                                   struct backref_node, list);
222                 remove_backref_node(cache, node);
223         }
224
225         while (!list_empty(&cache->leaves)) {
226                 node = list_entry(cache->leaves.next,
227                                   struct backref_node, lower);
228                 remove_backref_node(cache, node);
229         }
230
231         cache->last_trans = 0;
232
233         for (i = 0; i < BTRFS_MAX_LEVEL; i++)
234                 BUG_ON(!list_empty(&cache->pending[i]));
235         BUG_ON(!list_empty(&cache->changed));
236         BUG_ON(!list_empty(&cache->detached));
237         BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
238         BUG_ON(cache->nr_nodes);
239         BUG_ON(cache->nr_edges);
240 }
241
242 static struct backref_node *alloc_backref_node(struct backref_cache *cache)
243 {
244         struct backref_node *node;
245
246         node = kzalloc(sizeof(*node), GFP_NOFS);
247         if (node) {
248                 INIT_LIST_HEAD(&node->list);
249                 INIT_LIST_HEAD(&node->upper);
250                 INIT_LIST_HEAD(&node->lower);
251                 RB_CLEAR_NODE(&node->rb_node);
252                 cache->nr_nodes++;
253         }
254         return node;
255 }
256
257 static void free_backref_node(struct backref_cache *cache,
258                               struct backref_node *node)
259 {
260         if (node) {
261                 cache->nr_nodes--;
262                 kfree(node);
263         }
264 }
265
266 static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
267 {
268         struct backref_edge *edge;
269
270         edge = kzalloc(sizeof(*edge), GFP_NOFS);
271         if (edge)
272                 cache->nr_edges++;
273         return edge;
274 }
275
276 static void free_backref_edge(struct backref_cache *cache,
277                               struct backref_edge *edge)
278 {
279         if (edge) {
280                 cache->nr_edges--;
281                 kfree(edge);
282         }
283 }
284
285 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
286                                    struct rb_node *node)
287 {
288         struct rb_node **p = &root->rb_node;
289         struct rb_node *parent = NULL;
290         struct tree_entry *entry;
291
292         while (*p) {
293                 parent = *p;
294                 entry = rb_entry(parent, struct tree_entry, rb_node);
295
296                 if (bytenr < entry->bytenr)
297                         p = &(*p)->rb_left;
298                 else if (bytenr > entry->bytenr)
299                         p = &(*p)->rb_right;
300                 else
301                         return parent;
302         }
303
304         rb_link_node(node, parent, p);
305         rb_insert_color(node, root);
306         return NULL;
307 }
308
309 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)
310 {
311         struct rb_node *n = root->rb_node;
312         struct tree_entry *entry;
313
314         while (n) {
315                 entry = rb_entry(n, struct tree_entry, rb_node);
316
317                 if (bytenr < entry->bytenr)
318                         n = n->rb_left;
319                 else if (bytenr > entry->bytenr)
320                         n = n->rb_right;
321                 else
322                         return n;
323         }
324         return NULL;
325 }
326
327 /*
328  * walk up backref nodes until reach node presents tree root
329  */
330 static struct backref_node *walk_up_backref(struct backref_node *node,
331                                             struct backref_edge *edges[],
332                                             int *index)
333 {
334         struct backref_edge *edge;
335         int idx = *index;
336
337         while (!list_empty(&node->upper)) {
338                 edge = list_entry(node->upper.next,
339                                   struct backref_edge, list[LOWER]);
340                 edges[idx++] = edge;
341                 node = edge->node[UPPER];
342         }
343         BUG_ON(node->detached);
344         *index = idx;
345         return node;
346 }
347
348 /*
349  * walk down backref nodes to find start of next reference path
350  */
351 static struct backref_node *walk_down_backref(struct backref_edge *edges[],
352                                               int *index)
353 {
354         struct backref_edge *edge;
355         struct backref_node *lower;
356         int idx = *index;
357
358         while (idx > 0) {
359                 edge = edges[idx - 1];
360                 lower = edge->node[LOWER];
361                 if (list_is_last(&edge->list[LOWER], &lower->upper)) {
362                         idx--;
363                         continue;
364                 }
365                 edge = list_entry(edge->list[LOWER].next,
366                                   struct backref_edge, list[LOWER]);
367                 edges[idx - 1] = edge;
368                 *index = idx;
369                 return edge->node[UPPER];
370         }
371         *index = 0;
372         return NULL;
373 }
374
375 static void unlock_node_buffer(struct backref_node *node)
376 {
377         if (node->locked) {
378                 btrfs_tree_unlock(node->eb);
379                 node->locked = 0;
380         }
381 }
382
383 static void drop_node_buffer(struct backref_node *node)
384 {
385         if (node->eb) {
386                 unlock_node_buffer(node);
387                 free_extent_buffer(node->eb);
388                 node->eb = NULL;
389         }
390 }
391
392 static void drop_backref_node(struct backref_cache *tree,
393                               struct backref_node *node)
394 {
395         BUG_ON(!list_empty(&node->upper));
396
397         drop_node_buffer(node);
398         list_del(&node->list);
399         list_del(&node->lower);
400         if (!RB_EMPTY_NODE(&node->rb_node))
401                 rb_erase(&node->rb_node, &tree->rb_root);
402         free_backref_node(tree, node);
403 }
404
405 /*
406  * remove a backref node from the backref cache
407  */
408 static void remove_backref_node(struct backref_cache *cache,
409                                 struct backref_node *node)
410 {
411         struct backref_node *upper;
412         struct backref_edge *edge;
413
414         if (!node)
415                 return;
416
417         BUG_ON(!node->lowest && !node->detached);
418         while (!list_empty(&node->upper)) {
419                 edge = list_entry(node->upper.next, struct backref_edge,
420                                   list[LOWER]);
421                 upper = edge->node[UPPER];
422                 list_del(&edge->list[LOWER]);
423                 list_del(&edge->list[UPPER]);
424                 free_backref_edge(cache, edge);
425
426                 if (RB_EMPTY_NODE(&upper->rb_node)) {
427                         BUG_ON(!list_empty(&node->upper));
428                         drop_backref_node(cache, node);
429                         node = upper;
430                         node->lowest = 1;
431                         continue;
432                 }
433                 /*
434                  * add the node to leaf node list if no other
435                  * child block cached.
436                  */
437                 if (list_empty(&upper->lower)) {
438                         list_add_tail(&upper->lower, &cache->leaves);
439                         upper->lowest = 1;
440                 }
441         }
442
443         drop_backref_node(cache, node);
444 }
445
446 static void update_backref_node(struct backref_cache *cache,
447                                 struct backref_node *node, u64 bytenr)
448 {
449         struct rb_node *rb_node;
450         rb_erase(&node->rb_node, &cache->rb_root);
451         node->bytenr = bytenr;
452         rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
453         BUG_ON(rb_node);
454 }
455
456 /*
457  * update backref cache after a transaction commit
458  */
459 static int update_backref_cache(struct btrfs_trans_handle *trans,
460                                 struct backref_cache *cache)
461 {
462         struct backref_node *node;
463         int level = 0;
464
465         if (cache->last_trans == 0) {
466                 cache->last_trans = trans->transid;
467                 return 0;
468         }
469
470         if (cache->last_trans == trans->transid)
471                 return 0;
472
473         /*
474          * detached nodes are used to avoid unnecessary backref
475          * lookup. transaction commit changes the extent tree.
476          * so the detached nodes are no longer useful.
477          */
478         while (!list_empty(&cache->detached)) {
479                 node = list_entry(cache->detached.next,
480                                   struct backref_node, list);
481                 remove_backref_node(cache, node);
482         }
483
484         while (!list_empty(&cache->changed)) {
485                 node = list_entry(cache->changed.next,
486                                   struct backref_node, list);
487                 list_del_init(&node->list);
488                 BUG_ON(node->pending);
489                 update_backref_node(cache, node, node->new_bytenr);
490         }
491
492         /*
493          * some nodes can be left in the pending list if there were
494          * errors during processing the pending nodes.
495          */
496         for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
497                 list_for_each_entry(node, &cache->pending[level], list) {
498                         BUG_ON(!node->pending);
499                         if (node->bytenr == node->new_bytenr)
500                                 continue;
501                         update_backref_node(cache, node, node->new_bytenr);
502                 }
503         }
504
505         cache->last_trans = 0;
506         return 1;
507 }
508
509 static int should_ignore_root(struct btrfs_root *root)
510 {
511         struct btrfs_root *reloc_root;
512
513         if (!root->ref_cows)
514                 return 0;
515
516         reloc_root = root->reloc_root;
517         if (!reloc_root)
518                 return 0;
519
520         if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
521             root->fs_info->running_transaction->transid - 1)
522                 return 0;
523         /*
524          * if there is reloc tree and it was created in previous
525          * transaction backref lookup can find the reloc tree,
526          * so backref node for the fs tree root is useless for
527          * relocation.
528          */
529         return 1;
530 }
531
532 /*
533  * find reloc tree by address of tree root
534  */
535 static struct btrfs_root *find_reloc_root(struct reloc_control *rc,
536                                           u64 bytenr)
537 {
538         struct rb_node *rb_node;
539         struct mapping_node *node;
540         struct btrfs_root *root = NULL;
541
542         spin_lock(&rc->reloc_root_tree.lock);
543         rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr);
544         if (rb_node) {
545                 node = rb_entry(rb_node, struct mapping_node, rb_node);
546                 root = (struct btrfs_root *)node->data;
547         }
548         spin_unlock(&rc->reloc_root_tree.lock);
549         return root;
550 }
551
552 static int is_cowonly_root(u64 root_objectid)
553 {
554         if (root_objectid == BTRFS_ROOT_TREE_OBJECTID ||
555             root_objectid == BTRFS_EXTENT_TREE_OBJECTID ||
556             root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||
557             root_objectid == BTRFS_DEV_TREE_OBJECTID ||
558             root_objectid == BTRFS_TREE_LOG_OBJECTID ||
559             root_objectid == BTRFS_CSUM_TREE_OBJECTID)
560                 return 1;
561         return 0;
562 }
563
564 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,
565                                         u64 root_objectid)
566 {
567         struct btrfs_key key;
568
569         key.objectid = root_objectid;
570         key.type = BTRFS_ROOT_ITEM_KEY;
571         if (is_cowonly_root(root_objectid))
572                 key.offset = 0;
573         else
574                 key.offset = (u64)-1;
575
576         return btrfs_read_fs_root_no_name(fs_info, &key);
577 }
578
579 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
580 static noinline_for_stack
581 struct btrfs_root *find_tree_root(struct reloc_control *rc,
582                                   struct extent_buffer *leaf,
583                                   struct btrfs_extent_ref_v0 *ref0)
584 {
585         struct btrfs_root *root;
586         u64 root_objectid = btrfs_ref_root_v0(leaf, ref0);
587         u64 generation = btrfs_ref_generation_v0(leaf, ref0);
588
589         BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID);
590
591         root = read_fs_root(rc->extent_root->fs_info, root_objectid);
592         BUG_ON(IS_ERR(root));
593
594         if (root->ref_cows &&
595             generation != btrfs_root_generation(&root->root_item))
596                 return NULL;
597
598         return root;
599 }
600 #endif
601
602 static noinline_for_stack
603 int find_inline_backref(struct extent_buffer *leaf, int slot,
604                         unsigned long *ptr, unsigned long *end)
605 {
606         struct btrfs_extent_item *ei;
607         struct btrfs_tree_block_info *bi;
608         u32 item_size;
609
610         item_size = btrfs_item_size_nr(leaf, slot);
611 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
612         if (item_size < sizeof(*ei)) {
613                 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0));
614                 return 1;
615         }
616 #endif
617         ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
618         WARN_ON(!(btrfs_extent_flags(leaf, ei) &
619                   BTRFS_EXTENT_FLAG_TREE_BLOCK));
620
621         if (item_size <= sizeof(*ei) + sizeof(*bi)) {
622                 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
623                 return 1;
624         }
625
626         bi = (struct btrfs_tree_block_info *)(ei + 1);
627         *ptr = (unsigned long)(bi + 1);
628         *end = (unsigned long)ei + item_size;
629         return 0;
630 }
631
632 /*
633  * build backref tree for a given tree block. root of the backref tree
634  * corresponds the tree block, leaves of the backref tree correspond
635  * roots of b-trees that reference the tree block.
636  *
637  * the basic idea of this function is check backrefs of a given block
638  * to find upper level blocks that refernece the block, and then check
639  * bakcrefs of these upper level blocks recursively. the recursion stop
640  * when tree root is reached or backrefs for the block is cached.
641  *
642  * NOTE: if we find backrefs for a block are cached, we know backrefs
643  * for all upper level blocks that directly/indirectly reference the
644  * block are also cached.
645  */
646 static noinline_for_stack
647 struct backref_node *build_backref_tree(struct reloc_control *rc,
648                                         struct btrfs_key *node_key,
649                                         int level, u64 bytenr)
650 {
651         struct backref_cache *cache = &rc->backref_cache;
652         struct btrfs_path *path1;
653         struct btrfs_path *path2;
654         struct extent_buffer *eb;
655         struct btrfs_root *root;
656         struct backref_node *cur;
657         struct backref_node *upper;
658         struct backref_node *lower;
659         struct backref_node *node = NULL;
660         struct backref_node *exist = NULL;
661         struct backref_edge *edge;
662         struct rb_node *rb_node;
663         struct btrfs_key key;
664         unsigned long end;
665         unsigned long ptr;
666         LIST_HEAD(list);
667         LIST_HEAD(useless);
668         int cowonly;
669         int ret;
670         int err = 0;
671
672         path1 = btrfs_alloc_path();
673         path2 = btrfs_alloc_path();
674         if (!path1 || !path2) {
675                 err = -ENOMEM;
676                 goto out;
677         }
678
679         node = alloc_backref_node(cache);
680         if (!node) {
681                 err = -ENOMEM;
682                 goto out;
683         }
684
685         node->bytenr = bytenr;
686         node->level = level;
687         node->lowest = 1;
688         cur = node;
689 again:
690         end = 0;
691         ptr = 0;
692         key.objectid = cur->bytenr;
693         key.type = BTRFS_EXTENT_ITEM_KEY;
694         key.offset = (u64)-1;
695
696         path1->search_commit_root = 1;
697         path1->skip_locking = 1;
698         ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1,
699                                 0, 0);
700         if (ret < 0) {
701                 err = ret;
702                 goto out;
703         }
704         BUG_ON(!ret || !path1->slots[0]);
705
706         path1->slots[0]--;
707
708         WARN_ON(cur->checked);
709         if (!list_empty(&cur->upper)) {
710                 /*
711                  * the backref was added previously when processsing
712                  * backref of type BTRFS_TREE_BLOCK_REF_KEY
713                  */
714                 BUG_ON(!list_is_singular(&cur->upper));
715                 edge = list_entry(cur->upper.next, struct backref_edge,
716                                   list[LOWER]);
717                 BUG_ON(!list_empty(&edge->list[UPPER]));
718                 exist = edge->node[UPPER];
719                 /*
720                  * add the upper level block to pending list if we need
721                  * check its backrefs
722                  */
723                 if (!exist->checked)
724                         list_add_tail(&edge->list[UPPER], &list);
725         } else {
726                 exist = NULL;
727         }
728
729         while (1) {
730                 cond_resched();
731                 eb = path1->nodes[0];
732
733                 if (ptr >= end) {
734                         if (path1->slots[0] >= btrfs_header_nritems(eb)) {
735                                 ret = btrfs_next_leaf(rc->extent_root, path1);
736                                 if (ret < 0) {
737                                         err = ret;
738                                         goto out;
739                                 }
740                                 if (ret > 0)
741                                         break;
742                                 eb = path1->nodes[0];
743                         }
744
745                         btrfs_item_key_to_cpu(eb, &key, path1->slots[0]);
746                         if (key.objectid != cur->bytenr) {
747                                 WARN_ON(exist);
748                                 break;
749                         }
750
751                         if (key.type == BTRFS_EXTENT_ITEM_KEY) {
752                                 ret = find_inline_backref(eb, path1->slots[0],
753                                                           &ptr, &end);
754                                 if (ret)
755                                         goto next;
756                         }
757                 }
758
759                 if (ptr < end) {
760                         /* update key for inline back ref */
761                         struct btrfs_extent_inline_ref *iref;
762                         iref = (struct btrfs_extent_inline_ref *)ptr;
763                         key.type = btrfs_extent_inline_ref_type(eb, iref);
764                         key.offset = btrfs_extent_inline_ref_offset(eb, iref);
765                         WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY &&
766                                 key.type != BTRFS_SHARED_BLOCK_REF_KEY);
767                 }
768
769                 if (exist &&
770                     ((key.type == BTRFS_TREE_BLOCK_REF_KEY &&
771                       exist->owner == key.offset) ||
772                      (key.type == BTRFS_SHARED_BLOCK_REF_KEY &&
773                       exist->bytenr == key.offset))) {
774                         exist = NULL;
775                         goto next;
776                 }
777
778 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
779                 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
780                     key.type == BTRFS_EXTENT_REF_V0_KEY) {
781                         if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
782                                 struct btrfs_extent_ref_v0 *ref0;
783                                 ref0 = btrfs_item_ptr(eb, path1->slots[0],
784                                                 struct btrfs_extent_ref_v0);
785                                 if (key.objectid == key.offset) {
786                                         root = find_tree_root(rc, eb, ref0);
787                                         if (root && !should_ignore_root(root))
788                                                 cur->root = root;
789                                         else
790                                                 list_add(&cur->list, &useless);
791                                         break;
792                                 }
793                                 if (is_cowonly_root(btrfs_ref_root_v0(eb,
794                                                                       ref0)))
795                                         cur->cowonly = 1;
796                         }
797 #else
798                 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
799                 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) {
800 #endif
801                         if (key.objectid == key.offset) {
802                                 /*
803                                  * only root blocks of reloc trees use
804                                  * backref of this type.
805                                  */
806                                 root = find_reloc_root(rc, cur->bytenr);
807                                 BUG_ON(!root);
808                                 cur->root = root;
809                                 break;
810                         }
811
812                         edge = alloc_backref_edge(cache);
813                         if (!edge) {
814                                 err = -ENOMEM;
815                                 goto out;
816                         }
817                         rb_node = tree_search(&cache->rb_root, key.offset);
818                         if (!rb_node) {
819                                 upper = alloc_backref_node(cache);
820                                 if (!upper) {
821                                         free_backref_edge(cache, edge);
822                                         err = -ENOMEM;
823                                         goto out;
824                                 }
825                                 upper->bytenr = key.offset;
826                                 upper->level = cur->level + 1;
827                                 /*
828                                  *  backrefs for the upper level block isn't
829                                  *  cached, add the block to pending list
830                                  */
831                                 list_add_tail(&edge->list[UPPER], &list);
832                         } else {
833                                 upper = rb_entry(rb_node, struct backref_node,
834                                                  rb_node);
835                                 BUG_ON(!upper->checked);
836                                 INIT_LIST_HEAD(&edge->list[UPPER]);
837                         }
838                         list_add_tail(&edge->list[LOWER], &cur->upper);
839                         edge->node[LOWER] = cur;
840                         edge->node[UPPER] = upper;
841
842                         goto next;
843                 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
844                         goto next;
845                 }
846
847                 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */
848                 root = read_fs_root(rc->extent_root->fs_info, key.offset);
849                 if (IS_ERR(root)) {
850                         err = PTR_ERR(root);
851                         goto out;
852                 }
853
854                 if (!root->ref_cows)
855                         cur->cowonly = 1;
856
857                 if (btrfs_root_level(&root->root_item) == cur->level) {
858                         /* tree root */
859                         BUG_ON(btrfs_root_bytenr(&root->root_item) !=
860                                cur->bytenr);
861                         if (should_ignore_root(root))
862                                 list_add(&cur->list, &useless);
863                         else
864                                 cur->root = root;
865                         break;
866                 }
867
868                 level = cur->level + 1;
869
870                 /*
871                  * searching the tree to find upper level blocks
872                  * reference the block.
873                  */
874                 path2->search_commit_root = 1;
875                 path2->skip_locking = 1;
876                 path2->lowest_level = level;
877                 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0);
878                 path2->lowest_level = 0;
879                 if (ret < 0) {
880                         err = ret;
881                         goto out;
882                 }
883                 if (ret > 0 && path2->slots[level] > 0)
884                         path2->slots[level]--;
885
886                 eb = path2->nodes[level];
887                 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
888                         cur->bytenr);
889
890                 lower = cur;
891                 for (; level < BTRFS_MAX_LEVEL; level++) {
892                         if (!path2->nodes[level]) {
893                                 BUG_ON(btrfs_root_bytenr(&root->root_item) !=
894                                        lower->bytenr);
895                                 if (should_ignore_root(root))
896                                         list_add(&lower->list, &useless);
897                                 else
898                                         lower->root = root;
899                                 break;
900                         }
901
902                         edge = alloc_backref_edge(cache);
903                         if (!edge) {
904                                 err = -ENOMEM;
905                                 goto out;
906                         }
907
908                         eb = path2->nodes[level];
909                         rb_node = tree_search(&cache->rb_root, eb->start);
910                         if (!rb_node) {
911                                 upper = alloc_backref_node(cache);
912                                 if (!upper) {
913                                         free_backref_edge(cache, edge);
914                                         err = -ENOMEM;
915                                         goto out;
916                                 }
917                                 upper->bytenr = eb->start;
918                                 upper->owner = btrfs_header_owner(eb);
919                                 upper->level = lower->level + 1;
920                                 if (!root->ref_cows)
921                                         upper->cowonly = 1;
922
923                                 /*
924                                  * if we know the block isn't shared
925                                  * we can void checking its backrefs.
926                                  */
927                                 if (btrfs_block_can_be_shared(root, eb))
928                                         upper->checked = 0;
929                                 else
930                                         upper->checked = 1;
931
932                                 /*
933                                  * add the block to pending list if we
934                                  * need check its backrefs. only block
935                                  * at 'cur->level + 1' is added to the
936                                  * tail of pending list. this guarantees
937                                  * we check backrefs from lower level
938                                  * blocks to upper level blocks.
939                                  */
940                                 if (!upper->checked &&
941                                     level == cur->level + 1) {
942                                         list_add_tail(&edge->list[UPPER],
943                                                       &list);
944                                 } else
945                                         INIT_LIST_HEAD(&edge->list[UPPER]);
946                         } else {
947                                 upper = rb_entry(rb_node, struct backref_node,
948                                                  rb_node);
949                                 BUG_ON(!upper->checked);
950                                 INIT_LIST_HEAD(&edge->list[UPPER]);
951                                 if (!upper->owner)
952                                         upper->owner = btrfs_header_owner(eb);
953                         }
954                         list_add_tail(&edge->list[LOWER], &lower->upper);
955                         edge->node[LOWER] = lower;
956                         edge->node[UPPER] = upper;
957
958                         if (rb_node)
959                                 break;
960                         lower = upper;
961                         upper = NULL;
962                 }
963                 btrfs_release_path(root, path2);
964 next:
965                 if (ptr < end) {
966                         ptr += btrfs_extent_inline_ref_size(key.type);
967                         if (ptr >= end) {
968                                 WARN_ON(ptr > end);
969                                 ptr = 0;
970                                 end = 0;
971                         }
972                 }
973                 if (ptr >= end)
974                         path1->slots[0]++;
975         }
976         btrfs_release_path(rc->extent_root, path1);
977
978         cur->checked = 1;
979         WARN_ON(exist);
980
981         /* the pending list isn't empty, take the first block to process */
982         if (!list_empty(&list)) {
983                 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
984                 list_del_init(&edge->list[UPPER]);
985                 cur = edge->node[UPPER];
986                 goto again;
987         }
988
989         /*
990          * everything goes well, connect backref nodes and insert backref nodes
991          * into the cache.
992          */
993         BUG_ON(!node->checked);
994         cowonly = node->cowonly;
995         if (!cowonly) {
996                 rb_node = tree_insert(&cache->rb_root, node->bytenr,
997                                       &node->rb_node);
998                 BUG_ON(rb_node);
999                 list_add_tail(&node->lower, &cache->leaves);
1000         }
1001
1002         list_for_each_entry(edge, &node->upper, list[LOWER])
1003                 list_add_tail(&edge->list[UPPER], &list);
1004
1005         while (!list_empty(&list)) {
1006                 edge = list_entry(list.next, struct backref_edge, list[UPPER]);
1007                 list_del_init(&edge->list[UPPER]);
1008                 upper = edge->node[UPPER];
1009                 if (upper->detached) {
1010                         list_del(&edge->list[LOWER]);
1011                         lower = edge->node[LOWER];
1012                         free_backref_edge(cache, edge);
1013                         if (list_empty(&lower->upper))
1014                                 list_add(&lower->list, &useless);
1015                         continue;
1016                 }
1017
1018                 if (!RB_EMPTY_NODE(&upper->rb_node)) {
1019                         if (upper->lowest) {
1020                                 list_del_init(&upper->lower);
1021                                 upper->lowest = 0;
1022                         }
1023
1024                         list_add_tail(&edge->list[UPPER], &upper->lower);
1025                         continue;
1026                 }
1027
1028                 BUG_ON(!upper->checked);
1029                 BUG_ON(cowonly != upper->cowonly);
1030                 if (!cowonly) {
1031                         rb_node = tree_insert(&cache->rb_root, upper->bytenr,
1032                                               &upper->rb_node);
1033                         BUG_ON(rb_node);
1034                 }
1035
1036                 list_add_tail(&edge->list[UPPER], &upper->lower);
1037
1038                 list_for_each_entry(edge, &upper->upper, list[LOWER])
1039                         list_add_tail(&edge->list[UPPER], &list);
1040         }
1041         /*
1042          * process useless backref nodes. backref nodes for tree leaves
1043          * are deleted from the cache. backref nodes for upper level
1044          * tree blocks are left in the cache to avoid unnecessary backref
1045          * lookup.
1046          */
1047         while (!list_empty(&useless)) {
1048                 upper = list_entry(useless.next, struct backref_node, list);
1049                 list_del_init(&upper->list);
1050                 BUG_ON(!list_empty(&upper->upper));
1051                 if (upper == node)
1052                         node = NULL;
1053                 if (upper->lowest) {
1054                         list_del_init(&upper->lower);
1055                         upper->lowest = 0;
1056                 }
1057                 while (!list_empty(&upper->lower)) {
1058                         edge = list_entry(upper->lower.next,
1059                                           struct backref_edge, list[UPPER]);
1060                         list_del(&edge->list[UPPER]);
1061                         list_del(&edge->list[LOWER]);
1062                         lower = edge->node[LOWER];
1063                         free_backref_edge(cache, edge);
1064
1065                         if (list_empty(&lower->upper))
1066                                 list_add(&lower->list, &useless);
1067                 }
1068                 __mark_block_processed(rc, upper);
1069                 if (upper->level > 0) {
1070                         list_add(&upper->list, &cache->detached);
1071                         upper->detached = 1;
1072                 } else {
1073                         rb_erase(&upper->rb_node, &cache->rb_root);
1074                         free_backref_node(cache, upper);
1075                 }
1076         }
1077 out:
1078         btrfs_free_path(path1);
1079         btrfs_free_path(path2);
1080         if (err) {
1081                 while (!list_empty(&useless)) {
1082                         lower = list_entry(useless.next,
1083                                            struct backref_node, upper);
1084                         list_del_init(&lower->upper);
1085                 }
1086                 upper = node;
1087                 INIT_LIST_HEAD(&list);
1088                 while (upper) {
1089                         if (RB_EMPTY_NODE(&upper->rb_node)) {
1090                                 list_splice_tail(&upper->upper, &list);
1091                                 free_backref_node(cache, upper);
1092                         }
1093
1094                         if (list_empty(&list))
1095                                 break;
1096
1097                         edge = list_entry(list.next, struct backref_edge,
1098                                           list[LOWER]);
1099                         list_del(&edge->list[LOWER]);
1100                         upper = edge->node[UPPER];
1101                         free_backref_edge(cache, edge);
1102                 }
1103                 return ERR_PTR(err);
1104         }
1105         BUG_ON(node && node->detached);
1106         return node;
1107 }
1108
1109 /*
1110  * helper to add backref node for the newly created snapshot.
1111  * the backref node is created by cloning backref node that
1112  * corresponds to root of source tree
1113  */
1114 static int clone_backref_node(struct btrfs_trans_handle *trans,
1115                               struct reloc_control *rc,
1116                               struct btrfs_root *src,
1117                               struct btrfs_root *dest)
1118 {
1119         struct btrfs_root *reloc_root = src->reloc_root;
1120         struct backref_cache *cache = &rc->backref_cache;
1121         struct backref_node *node = NULL;
1122         struct backref_node *new_node;
1123         struct backref_edge *edge;
1124         struct backref_edge *new_edge;
1125         struct rb_node *rb_node;
1126
1127         if (cache->last_trans > 0)
1128                 update_backref_cache(trans, cache);
1129
1130         rb_node = tree_search(&cache->rb_root, src->commit_root->start);
1131         if (rb_node) {
1132                 node = rb_entry(rb_node, struct backref_node, rb_node);
1133                 if (node->detached)
1134                         node = NULL;
1135                 else
1136                         BUG_ON(node->new_bytenr != reloc_root->node->start);
1137         }
1138
1139         if (!node) {
1140                 rb_node = tree_search(&cache->rb_root,
1141                                       reloc_root->commit_root->start);
1142                 if (rb_node) {
1143                         node = rb_entry(rb_node, struct backref_node,
1144                                         rb_node);
1145                         BUG_ON(node->detached);
1146                 }
1147         }
1148
1149         if (!node)
1150                 return 0;
1151
1152         new_node = alloc_backref_node(cache);
1153         if (!new_node)
1154                 return -ENOMEM;
1155
1156         new_node->bytenr = dest->node->start;
1157         new_node->level = node->level;
1158         new_node->lowest = node->lowest;
1159         new_node->root = dest;
1160
1161         if (!node->lowest) {
1162                 list_for_each_entry(edge, &node->lower, list[UPPER]) {
1163                         new_edge = alloc_backref_edge(cache);
1164                         if (!new_edge)
1165                                 goto fail;
1166
1167                         new_edge->node[UPPER] = new_node;
1168                         new_edge->node[LOWER] = edge->node[LOWER];
1169                         list_add_tail(&new_edge->list[UPPER],
1170                                       &new_node->lower);
1171                 }
1172         }
1173
1174         rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
1175                               &new_node->rb_node);
1176         BUG_ON(rb_node);
1177
1178         if (!new_node->lowest) {
1179                 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
1180                         list_add_tail(&new_edge->list[LOWER],
1181                                       &new_edge->node[LOWER]->upper);
1182                 }
1183         }
1184         return 0;
1185 fail:
1186         while (!list_empty(&new_node->lower)) {
1187                 new_edge = list_entry(new_node->lower.next,
1188                                       struct backref_edge, list[UPPER]);
1189                 list_del(&new_edge->list[UPPER]);
1190                 free_backref_edge(cache, new_edge);
1191         }
1192         free_backref_node(cache, new_node);
1193         return -ENOMEM;
1194 }
1195
1196 /*
1197  * helper to add 'address of tree root -> reloc tree' mapping
1198  */
1199 static int __add_reloc_root(struct btrfs_root *root)
1200 {
1201         struct rb_node *rb_node;
1202         struct mapping_node *node;
1203         struct reloc_control *rc = root->fs_info->reloc_ctl;
1204
1205         node = kmalloc(sizeof(*node), GFP_NOFS);
1206         BUG_ON(!node);
1207
1208         node->bytenr = root->node->start;
1209         node->data = root;
1210
1211         spin_lock(&rc->reloc_root_tree.lock);
1212         rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1213                               node->bytenr, &node->rb_node);
1214         spin_unlock(&rc->reloc_root_tree.lock);
1215         BUG_ON(rb_node);
1216
1217         list_add_tail(&root->root_list, &rc->reloc_roots);
1218         return 0;
1219 }
1220
1221 /*
1222  * helper to update/delete the 'address of tree root -> reloc tree'
1223  * mapping
1224  */
1225 static int __update_reloc_root(struct btrfs_root *root, int del)
1226 {
1227         struct rb_node *rb_node;
1228         struct mapping_node *node = NULL;
1229         struct reloc_control *rc = root->fs_info->reloc_ctl;
1230
1231         spin_lock(&rc->reloc_root_tree.lock);
1232         rb_node = tree_search(&rc->reloc_root_tree.rb_root,
1233                               root->commit_root->start);
1234         if (rb_node) {
1235                 node = rb_entry(rb_node, struct mapping_node, rb_node);
1236                 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);
1237         }
1238         spin_unlock(&rc->reloc_root_tree.lock);
1239
1240         BUG_ON((struct btrfs_root *)node->data != root);
1241
1242         if (!del) {
1243                 spin_lock(&rc->reloc_root_tree.lock);
1244                 node->bytenr = root->node->start;
1245                 rb_node = tree_insert(&rc->reloc_root_tree.rb_root,
1246                                       node->bytenr, &node->rb_node);
1247                 spin_unlock(&rc->reloc_root_tree.lock);
1248                 BUG_ON(rb_node);
1249         } else {
1250                 list_del_init(&root->root_list);
1251                 kfree(node);
1252         }
1253         return 0;
1254 }
1255
1256 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
1257                                         struct btrfs_root *root, u64 objectid)
1258 {
1259         struct btrfs_root *reloc_root;
1260         struct extent_buffer *eb;
1261         struct btrfs_root_item *root_item;
1262         struct btrfs_key root_key;
1263         int ret;
1264
1265         root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
1266         BUG_ON(!root_item);
1267
1268         root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
1269         root_key.type = BTRFS_ROOT_ITEM_KEY;
1270         root_key.offset = objectid;
1271
1272         if (root->root_key.objectid == objectid) {
1273                 /* called by btrfs_init_reloc_root */
1274                 ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
1275                                       BTRFS_TREE_RELOC_OBJECTID);
1276                 BUG_ON(ret);
1277
1278                 btrfs_set_root_last_snapshot(&root->root_item,
1279                                              trans->transid - 1);
1280         } else {
1281                 /*
1282                  * called by btrfs_reloc_post_snapshot_hook.
1283                  * the source tree is a reloc tree, all tree blocks
1284                  * modified after it was created have RELOC flag
1285                  * set in their headers. so it's OK to not update
1286                  * the 'last_snapshot'.
1287                  */
1288                 ret = btrfs_copy_root(trans, root, root->node, &eb,
1289                                       BTRFS_TREE_RELOC_OBJECTID);
1290                 BUG_ON(ret);
1291         }
1292
1293         memcpy(root_item, &root->root_item, sizeof(*root_item));
1294         btrfs_set_root_bytenr(root_item, eb->start);
1295         btrfs_set_root_level(root_item, btrfs_header_level(eb));
1296         btrfs_set_root_generation(root_item, trans->transid);
1297
1298         if (root->root_key.objectid == objectid) {
1299                 btrfs_set_root_refs(root_item, 0);
1300                 memset(&root_item->drop_progress, 0,
1301                        sizeof(struct btrfs_disk_key));
1302                 root_item->drop_level = 0;
1303         }
1304
1305         btrfs_tree_unlock(eb);
1306         free_extent_buffer(eb);
1307
1308         ret = btrfs_insert_root(trans, root->fs_info->tree_root,
1309                                 &root_key, root_item);
1310         BUG_ON(ret);
1311         kfree(root_item);
1312
1313         reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root,
1314                                                  &root_key);
1315         BUG_ON(IS_ERR(reloc_root));
1316         reloc_root->last_trans = trans->transid;
1317         return reloc_root;
1318 }
1319
1320 /*
1321  * create reloc tree for a given fs tree. reloc tree is just a
1322  * snapshot of the fs tree with special root objectid.
1323  */
1324 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
1325                           struct btrfs_root *root)
1326 {
1327         struct btrfs_root *reloc_root;
1328         struct reloc_control *rc = root->fs_info->reloc_ctl;
1329         int clear_rsv = 0;
1330
1331         if (root->reloc_root) {
1332                 reloc_root = root->reloc_root;
1333                 reloc_root->last_trans = trans->transid;
1334                 return 0;
1335         }
1336
1337         if (!rc || !rc->create_reloc_tree ||
1338             root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1339                 return 0;
1340
1341         if (!trans->block_rsv) {
1342                 trans->block_rsv = rc->block_rsv;
1343                 clear_rsv = 1;
1344         }
1345         reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
1346         if (clear_rsv)
1347                 trans->block_rsv = NULL;
1348
1349         __add_reloc_root(reloc_root);
1350         root->reloc_root = reloc_root;
1351         return 0;
1352 }
1353
1354 /*
1355  * update root item of reloc tree
1356  */
1357 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
1358                             struct btrfs_root *root)
1359 {
1360         struct btrfs_root *reloc_root;
1361         struct btrfs_root_item *root_item;
1362         int del = 0;
1363         int ret;
1364
1365         if (!root->reloc_root)
1366                 return 0;
1367
1368         reloc_root = root->reloc_root;
1369         root_item = &reloc_root->root_item;
1370
1371         if (root->fs_info->reloc_ctl->merge_reloc_tree &&
1372             btrfs_root_refs(root_item) == 0) {
1373                 root->reloc_root = NULL;
1374                 del = 1;
1375         }
1376
1377         __update_reloc_root(reloc_root, del);
1378
1379         if (reloc_root->commit_root != reloc_root->node) {
1380                 btrfs_set_root_node(root_item, reloc_root->node);
1381                 free_extent_buffer(reloc_root->commit_root);
1382                 reloc_root->commit_root = btrfs_root_node(reloc_root);
1383         }
1384
1385         ret = btrfs_update_root(trans, root->fs_info->tree_root,
1386                                 &reloc_root->root_key, root_item);
1387         BUG_ON(ret);
1388         return 0;
1389 }
1390
1391 /*
1392  * helper to find first cached inode with inode number >= objectid
1393  * in a subvolume
1394  */
1395 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid)
1396 {
1397         struct rb_node *node;
1398         struct rb_node *prev;
1399         struct btrfs_inode *entry;
1400         struct inode *inode;
1401
1402         spin_lock(&root->inode_lock);
1403 again:
1404         node = root->inode_tree.rb_node;
1405         prev = NULL;
1406         while (node) {
1407                 prev = node;
1408                 entry = rb_entry(node, struct btrfs_inode, rb_node);
1409
1410                 if (objectid < entry->vfs_inode.i_ino)
1411                         node = node->rb_left;
1412                 else if (objectid > entry->vfs_inode.i_ino)
1413                         node = node->rb_right;
1414                 else
1415                         break;
1416         }
1417         if (!node) {
1418                 while (prev) {
1419                         entry = rb_entry(prev, struct btrfs_inode, rb_node);
1420                         if (objectid <= entry->vfs_inode.i_ino) {
1421                                 node = prev;
1422                                 break;
1423                         }
1424                         prev = rb_next(prev);
1425                 }
1426         }
1427         while (node) {
1428                 entry = rb_entry(node, struct btrfs_inode, rb_node);
1429                 inode = igrab(&entry->vfs_inode);
1430                 if (inode) {
1431                         spin_unlock(&root->inode_lock);
1432                         return inode;
1433                 }
1434
1435                 objectid = entry->vfs_inode.i_ino + 1;
1436                 if (cond_resched_lock(&root->inode_lock))
1437                         goto again;
1438
1439                 node = rb_next(node);
1440         }
1441         spin_unlock(&root->inode_lock);
1442         return NULL;
1443 }
1444
1445 static int in_block_group(u64 bytenr,
1446                           struct btrfs_block_group_cache *block_group)
1447 {
1448         if (bytenr >= block_group->key.objectid &&
1449             bytenr < block_group->key.objectid + block_group->key.offset)
1450                 return 1;
1451         return 0;
1452 }
1453
1454 /*
1455  * get new location of data
1456  */
1457 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1458                             u64 bytenr, u64 num_bytes)
1459 {
1460         struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
1461         struct btrfs_path *path;
1462         struct btrfs_file_extent_item *fi;
1463         struct extent_buffer *leaf;
1464         int ret;
1465
1466         path = btrfs_alloc_path();
1467         if (!path)
1468                 return -ENOMEM;
1469
1470         bytenr -= BTRFS_I(reloc_inode)->index_cnt;
1471         ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino,
1472                                        bytenr, 0);
1473         if (ret < 0)
1474                 goto out;
1475         if (ret > 0) {
1476                 ret = -ENOENT;
1477                 goto out;
1478         }
1479
1480         leaf = path->nodes[0];
1481         fi = btrfs_item_ptr(leaf, path->slots[0],
1482                             struct btrfs_file_extent_item);
1483
1484         BUG_ON(btrfs_file_extent_offset(leaf, fi) ||
1485                btrfs_file_extent_compression(leaf, fi) ||
1486                btrfs_file_extent_encryption(leaf, fi) ||
1487                btrfs_file_extent_other_encoding(leaf, fi));
1488
1489         if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1490                 ret = 1;
1491                 goto out;
1492         }
1493
1494         *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1495         ret = 0;
1496 out:
1497         btrfs_free_path(path);
1498         return ret;
1499 }
1500
1501 /*
1502  * update file extent items in the tree leaf to point to
1503  * the new locations.
1504  */
1505 static noinline_for_stack
1506 int replace_file_extents(struct btrfs_trans_handle *trans,
1507                          struct reloc_control *rc,
1508                          struct btrfs_root *root,
1509                          struct extent_buffer *leaf)
1510 {
1511         struct btrfs_key key;
1512         struct btrfs_file_extent_item *fi;
1513         struct inode *inode = NULL;
1514         u64 parent;
1515         u64 bytenr;
1516         u64 new_bytenr = 0;
1517         u64 num_bytes;
1518         u64 end;
1519         u32 nritems;
1520         u32 i;
1521         int ret;
1522         int first = 1;
1523         int dirty = 0;
1524
1525         if (rc->stage != UPDATE_DATA_PTRS)
1526                 return 0;
1527
1528         /* reloc trees always use full backref */
1529         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
1530                 parent = leaf->start;
1531         else
1532                 parent = 0;
1533
1534         nritems = btrfs_header_nritems(leaf);
1535         for (i = 0; i < nritems; i++) {
1536                 cond_resched();
1537                 btrfs_item_key_to_cpu(leaf, &key, i);
1538                 if (key.type != BTRFS_EXTENT_DATA_KEY)
1539                         continue;
1540                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1541                 if (btrfs_file_extent_type(leaf, fi) ==
1542                     BTRFS_FILE_EXTENT_INLINE)
1543                         continue;
1544                 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1545                 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
1546                 if (bytenr == 0)
1547                         continue;
1548                 if (!in_block_group(bytenr, rc->block_group))
1549                         continue;
1550
1551                 /*
1552                  * if we are modifying block in fs tree, wait for readpage
1553                  * to complete and drop the extent cache
1554                  */
1555                 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
1556                         if (first) {
1557                                 inode = find_next_inode(root, key.objectid);
1558                                 first = 0;
1559                         } else if (inode && inode->i_ino < key.objectid) {
1560                                 btrfs_add_delayed_iput(inode);
1561                                 inode = find_next_inode(root, key.objectid);
1562                         }
1563                         if (inode && inode->i_ino == key.objectid) {
1564                                 end = key.offset +
1565                                       btrfs_file_extent_num_bytes(leaf, fi);
1566                                 WARN_ON(!IS_ALIGNED(key.offset,
1567                                                     root->sectorsize));
1568                                 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1569                                 end--;
1570                                 ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
1571                                                       key.offset, end,
1572                                                       GFP_NOFS);
1573                                 if (!ret)
1574                                         continue;
1575
1576                                 btrfs_drop_extent_cache(inode, key.offset, end,
1577                                                         1);
1578                                 unlock_extent(&BTRFS_I(inode)->io_tree,
1579                                               key.offset, end, GFP_NOFS);
1580                         }
1581                 }
1582
1583                 ret = get_new_location(rc->data_inode, &new_bytenr,
1584                                        bytenr, num_bytes);
1585                 if (ret > 0) {
1586                         WARN_ON(1);
1587                         continue;
1588                 }
1589                 BUG_ON(ret < 0);
1590
1591                 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1592                 dirty = 1;
1593
1594                 key.offset -= btrfs_file_extent_offset(leaf, fi);
1595                 ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
1596                                            num_bytes, parent,
1597                                            btrfs_header_owner(leaf),
1598                                            key.objectid, key.offset);
1599                 BUG_ON(ret);
1600
1601                 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1602                                         parent, btrfs_header_owner(leaf),
1603                                         key.objectid, key.offset);
1604                 BUG_ON(ret);
1605         }
1606         if (dirty)
1607                 btrfs_mark_buffer_dirty(leaf);
1608         if (inode)
1609                 btrfs_add_delayed_iput(inode);
1610         return 0;
1611 }
1612
1613 static noinline_for_stack
1614 int memcmp_node_keys(struct extent_buffer *eb, int slot,
1615                      struct btrfs_path *path, int level)
1616 {
1617         struct btrfs_disk_key key1;
1618         struct btrfs_disk_key key2;
1619         btrfs_node_key(eb, &key1, slot);
1620         btrfs_node_key(path->nodes[level], &key2, path->slots[level]);
1621         return memcmp(&key1, &key2, sizeof(key1));
1622 }
1623
1624 /*
1625  * try to replace tree blocks in fs tree with the new blocks
1626  * in reloc tree. tree blocks haven't been modified since the
1627  * reloc tree was create can be replaced.
1628  *
1629  * if a block was replaced, level of the block + 1 is returned.
1630  * if no block got replaced, 0 is returned. if there are other
1631  * errors, a negative error number is returned.
1632  */
1633 static noinline_for_stack
1634 int replace_path(struct btrfs_trans_handle *trans,
1635                  struct btrfs_root *dest, struct btrfs_root *src,
1636                  struct btrfs_path *path, struct btrfs_key *next_key,
1637                  int lowest_level, int max_level)
1638 {
1639         struct extent_buffer *eb;
1640         struct extent_buffer *parent;
1641         struct btrfs_key key;
1642         u64 old_bytenr;
1643         u64 new_bytenr;
1644         u64 old_ptr_gen;
1645         u64 new_ptr_gen;
1646         u64 last_snapshot;
1647         u32 blocksize;
1648         int cow = 0;
1649         int level;
1650         int ret;
1651         int slot;
1652
1653         BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID);
1654         BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID);
1655
1656         last_snapshot = btrfs_root_last_snapshot(&src->root_item);
1657 again:
1658         slot = path->slots[lowest_level];
1659         btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot);
1660
1661         eb = btrfs_lock_root_node(dest);
1662         btrfs_set_lock_blocking(eb);
1663         level = btrfs_header_level(eb);
1664
1665         if (level < lowest_level) {
1666                 btrfs_tree_unlock(eb);
1667                 free_extent_buffer(eb);
1668                 return 0;
1669         }
1670
1671         if (cow) {
1672                 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb);
1673                 BUG_ON(ret);
1674         }
1675         btrfs_set_lock_blocking(eb);
1676
1677         if (next_key) {
1678                 next_key->objectid = (u64)-1;
1679                 next_key->type = (u8)-1;
1680                 next_key->offset = (u64)-1;
1681         }
1682
1683         parent = eb;
1684         while (1) {
1685                 level = btrfs_header_level(parent);
1686                 BUG_ON(level < lowest_level);
1687
1688                 ret = btrfs_bin_search(parent, &key, level, &slot);
1689                 if (ret && slot > 0)
1690                         slot--;
1691
1692                 if (next_key && slot + 1 < btrfs_header_nritems(parent))
1693                         btrfs_node_key_to_cpu(parent, next_key, slot + 1);
1694
1695                 old_bytenr = btrfs_node_blockptr(parent, slot);
1696                 blocksize = btrfs_level_size(dest, level - 1);
1697                 old_ptr_gen = btrfs_node_ptr_generation(parent, slot);
1698
1699                 if (level <= max_level) {
1700                         eb = path->nodes[level];
1701                         new_bytenr = btrfs_node_blockptr(eb,
1702                                                         path->slots[level]);
1703                         new_ptr_gen = btrfs_node_ptr_generation(eb,
1704                                                         path->slots[level]);
1705                 } else {
1706                         new_bytenr = 0;
1707                         new_ptr_gen = 0;
1708                 }
1709
1710                 if (new_bytenr > 0 && new_bytenr == old_bytenr) {
1711                         WARN_ON(1);
1712                         ret = level;
1713                         break;
1714                 }
1715
1716                 if (new_bytenr == 0 || old_ptr_gen > last_snapshot ||
1717                     memcmp_node_keys(parent, slot, path, level)) {
1718                         if (level <= lowest_level) {
1719                                 ret = 0;
1720                                 break;
1721                         }
1722
1723                         eb = read_tree_block(dest, old_bytenr, blocksize,
1724                                              old_ptr_gen);
1725                         btrfs_tree_lock(eb);
1726                         if (cow) {
1727                                 ret = btrfs_cow_block(trans, dest, eb, parent,
1728                                                       slot, &eb);
1729                                 BUG_ON(ret);
1730                         }
1731                         btrfs_set_lock_blocking(eb);
1732
1733                         btrfs_tree_unlock(parent);
1734                         free_extent_buffer(parent);
1735
1736                         parent = eb;
1737                         continue;
1738                 }
1739
1740                 if (!cow) {
1741                         btrfs_tree_unlock(parent);
1742                         free_extent_buffer(parent);
1743                         cow = 1;
1744                         goto again;
1745                 }
1746
1747                 btrfs_node_key_to_cpu(path->nodes[level], &key,
1748                                       path->slots[level]);
1749                 btrfs_release_path(src, path);
1750
1751                 path->lowest_level = level;
1752                 ret = btrfs_search_slot(trans, src, &key, path, 0, 1);
1753                 path->lowest_level = 0;
1754                 BUG_ON(ret);
1755
1756                 /*
1757                  * swap blocks in fs tree and reloc tree.
1758                  */
1759                 btrfs_set_node_blockptr(parent, slot, new_bytenr);
1760                 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen);
1761                 btrfs_mark_buffer_dirty(parent);
1762
1763                 btrfs_set_node_blockptr(path->nodes[level],
1764                                         path->slots[level], old_bytenr);
1765                 btrfs_set_node_ptr_generation(path->nodes[level],
1766                                               path->slots[level], old_ptr_gen);
1767                 btrfs_mark_buffer_dirty(path->nodes[level]);
1768
1769                 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
1770                                         path->nodes[level]->start,
1771                                         src->root_key.objectid, level - 1, 0);
1772                 BUG_ON(ret);
1773                 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
1774                                         0, dest->root_key.objectid, level - 1,
1775                                         0);
1776                 BUG_ON(ret);
1777
1778                 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
1779                                         path->nodes[level]->start,
1780                                         src->root_key.objectid, level - 1, 0);
1781                 BUG_ON(ret);
1782
1783                 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
1784                                         0, dest->root_key.objectid, level - 1,
1785                                         0);
1786                 BUG_ON(ret);
1787
1788                 btrfs_unlock_up_safe(path, 0);
1789
1790                 ret = level;
1791                 break;
1792         }
1793         btrfs_tree_unlock(parent);
1794         free_extent_buffer(parent);
1795         return ret;
1796 }
1797
1798 /*
1799  * helper to find next relocated block in reloc tree
1800  */
1801 static noinline_for_stack
1802 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1803                        int *level)
1804 {
1805         struct extent_buffer *eb;
1806         int i;
1807         u64 last_snapshot;
1808         u32 nritems;
1809
1810         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1811
1812         for (i = 0; i < *level; i++) {
1813                 free_extent_buffer(path->nodes[i]);
1814                 path->nodes[i] = NULL;
1815         }
1816
1817         for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) {
1818                 eb = path->nodes[i];
1819                 nritems = btrfs_header_nritems(eb);
1820                 while (path->slots[i] + 1 < nritems) {
1821                         path->slots[i]++;
1822                         if (btrfs_node_ptr_generation(eb, path->slots[i]) <=
1823                             last_snapshot)
1824                                 continue;
1825
1826                         *level = i;
1827                         return 0;
1828                 }
1829                 free_extent_buffer(path->nodes[i]);
1830                 path->nodes[i] = NULL;
1831         }
1832         return 1;
1833 }
1834
1835 /*
1836  * walk down reloc tree to find relocated block of lowest level
1837  */
1838 static noinline_for_stack
1839 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,
1840                          int *level)
1841 {
1842         struct extent_buffer *eb = NULL;
1843         int i;
1844         u64 bytenr;
1845         u64 ptr_gen = 0;
1846         u64 last_snapshot;
1847         u32 blocksize;
1848         u32 nritems;
1849
1850         last_snapshot = btrfs_root_last_snapshot(&root->root_item);
1851
1852         for (i = *level; i > 0; i--) {
1853                 eb = path->nodes[i];
1854                 nritems = btrfs_header_nritems(eb);
1855                 while (path->slots[i] < nritems) {
1856                         ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]);
1857                         if (ptr_gen > last_snapshot)
1858                                 break;
1859                         path->slots[i]++;
1860                 }
1861                 if (path->slots[i] >= nritems) {
1862                         if (i == *level)
1863                                 break;
1864                         *level = i + 1;
1865                         return 0;
1866                 }
1867                 if (i == 1) {
1868                         *level = i;
1869                         return 0;
1870                 }
1871
1872                 bytenr = btrfs_node_blockptr(eb, path->slots[i]);
1873                 blocksize = btrfs_level_size(root, i - 1);
1874                 eb = read_tree_block(root, bytenr, blocksize, ptr_gen);
1875                 BUG_ON(btrfs_header_level(eb) != i - 1);
1876                 path->nodes[i - 1] = eb;
1877                 path->slots[i - 1] = 0;
1878         }
1879         return 1;
1880 }
1881
1882 /*
1883  * invalidate extent cache for file extents whose key in range of
1884  * [min_key, max_key)
1885  */
1886 static int invalidate_extent_cache(struct btrfs_root *root,
1887                                    struct btrfs_key *min_key,
1888                                    struct btrfs_key *max_key)
1889 {
1890         struct inode *inode = NULL;
1891         u64 objectid;
1892         u64 start, end;
1893
1894         objectid = min_key->objectid;
1895         while (1) {
1896                 cond_resched();
1897                 iput(inode);
1898
1899                 if (objectid > max_key->objectid)
1900                         break;
1901
1902                 inode = find_next_inode(root, objectid);
1903                 if (!inode)
1904                         break;
1905
1906                 if (inode->i_ino > max_key->objectid) {
1907                         iput(inode);
1908                         break;
1909                 }
1910
1911                 objectid = inode->i_ino + 1;
1912                 if (!S_ISREG(inode->i_mode))
1913                         continue;
1914
1915                 if (unlikely(min_key->objectid == inode->i_ino)) {
1916                         if (min_key->type > BTRFS_EXTENT_DATA_KEY)
1917                                 continue;
1918                         if (min_key->type < BTRFS_EXTENT_DATA_KEY)
1919                                 start = 0;
1920                         else {
1921                                 start = min_key->offset;
1922                                 WARN_ON(!IS_ALIGNED(start, root->sectorsize));
1923                         }
1924                 } else {
1925                         start = 0;
1926                 }
1927
1928                 if (unlikely(max_key->objectid == inode->i_ino)) {
1929                         if (max_key->type < BTRFS_EXTENT_DATA_KEY)
1930                                 continue;
1931                         if (max_key->type > BTRFS_EXTENT_DATA_KEY) {
1932                                 end = (u64)-1;
1933                         } else {
1934                                 if (max_key->offset == 0)
1935                                         continue;
1936                                 end = max_key->offset;
1937                                 WARN_ON(!IS_ALIGNED(end, root->sectorsize));
1938                                 end--;
1939                         }
1940                 } else {
1941                         end = (u64)-1;
1942                 }
1943
1944                 /* the lock_extent waits for readpage to complete */
1945                 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1946                 btrfs_drop_extent_cache(inode, start, end, 1);
1947                 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
1948         }
1949         return 0;
1950 }
1951
1952 static int find_next_key(struct btrfs_path *path, int level,
1953                          struct btrfs_key *key)
1954
1955 {
1956         while (level < BTRFS_MAX_LEVEL) {
1957                 if (!path->nodes[level])
1958                         break;
1959                 if (path->slots[level] + 1 <
1960                     btrfs_header_nritems(path->nodes[level])) {
1961                         btrfs_node_key_to_cpu(path->nodes[level], key,
1962                                               path->slots[level] + 1);
1963                         return 0;
1964                 }
1965                 level++;
1966         }
1967         return 1;
1968 }
1969
1970 /*
1971  * merge the relocated tree blocks in reloc tree with corresponding
1972  * fs tree.
1973  */
1974 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
1975                                                struct btrfs_root *root)
1976 {
1977         LIST_HEAD(inode_list);
1978         struct btrfs_key key;
1979         struct btrfs_key next_key;
1980         struct btrfs_trans_handle *trans;
1981         struct btrfs_root *reloc_root;
1982         struct btrfs_root_item *root_item;
1983         struct btrfs_path *path;
1984         struct extent_buffer *leaf;
1985         unsigned long nr;
1986         int level;
1987         int max_level;
1988         int replaced = 0;
1989         int ret;
1990         int err = 0;
1991         u32 min_reserved;
1992
1993         path = btrfs_alloc_path();
1994         if (!path)
1995                 return -ENOMEM;
1996
1997         reloc_root = root->reloc_root;
1998         root_item = &reloc_root->root_item;
1999
2000         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2001                 level = btrfs_root_level(root_item);
2002                 extent_buffer_get(reloc_root->node);
2003                 path->nodes[level] = reloc_root->node;
2004                 path->slots[level] = 0;
2005         } else {
2006                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2007
2008                 level = root_item->drop_level;
2009                 BUG_ON(level == 0);
2010                 path->lowest_level = level;
2011                 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
2012                 path->lowest_level = 0;
2013                 if (ret < 0) {
2014                         btrfs_free_path(path);
2015                         return ret;
2016                 }
2017
2018                 btrfs_node_key_to_cpu(path->nodes[level], &next_key,
2019                                       path->slots[level]);
2020                 WARN_ON(memcmp(&key, &next_key, sizeof(key)));
2021
2022                 btrfs_unlock_up_safe(path, 0);
2023         }
2024
2025         min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2026         memset(&next_key, 0, sizeof(next_key));
2027
2028         while (1) {
2029                 trans = btrfs_start_transaction(root, 0);
2030                 trans->block_rsv = rc->block_rsv;
2031
2032                 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv,
2033                                             min_reserved, 0);
2034                 if (ret) {
2035                         BUG_ON(ret != -EAGAIN);
2036                         ret = btrfs_commit_transaction(trans, root);
2037                         BUG_ON(ret);
2038                         continue;
2039                 }
2040
2041                 replaced = 0;
2042                 max_level = level;
2043
2044                 ret = walk_down_reloc_tree(reloc_root, path, &level);
2045                 if (ret < 0) {
2046                         err = ret;
2047                         goto out;
2048                 }
2049                 if (ret > 0)
2050                         break;
2051
2052                 if (!find_next_key(path, level, &key) &&
2053                     btrfs_comp_cpu_keys(&next_key, &key) >= 0) {
2054                         ret = 0;
2055                 } else {
2056                         ret = replace_path(trans, root, reloc_root, path,
2057                                            &next_key, level, max_level);
2058                 }
2059                 if (ret < 0) {
2060                         err = ret;
2061                         goto out;
2062                 }
2063
2064                 if (ret > 0) {
2065                         level = ret;
2066                         btrfs_node_key_to_cpu(path->nodes[level], &key,
2067                                               path->slots[level]);
2068                         replaced = 1;
2069                 }
2070
2071                 ret = walk_up_reloc_tree(reloc_root, path, &level);
2072                 if (ret > 0)
2073                         break;
2074
2075                 BUG_ON(level == 0);
2076                 /*
2077                  * save the merging progress in the drop_progress.
2078                  * this is OK since root refs == 1 in this case.
2079                  */
2080                 btrfs_node_key(path->nodes[level], &root_item->drop_progress,
2081                                path->slots[level]);
2082                 root_item->drop_level = level;
2083
2084                 nr = trans->blocks_used;
2085                 btrfs_end_transaction_throttle(trans, root);
2086
2087                 btrfs_btree_balance_dirty(root, nr);
2088
2089                 if (replaced && rc->stage == UPDATE_DATA_PTRS)
2090                         invalidate_extent_cache(root, &key, &next_key);
2091         }
2092
2093         /*
2094          * handle the case only one block in the fs tree need to be
2095          * relocated and the block is tree root.
2096          */
2097         leaf = btrfs_lock_root_node(root);
2098         ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf);
2099         btrfs_tree_unlock(leaf);
2100         free_extent_buffer(leaf);
2101         if (ret < 0)
2102                 err = ret;
2103 out:
2104         btrfs_free_path(path);
2105
2106         if (err == 0) {
2107                 memset(&root_item->drop_progress, 0,
2108                        sizeof(root_item->drop_progress));
2109                 root_item->drop_level = 0;
2110                 btrfs_set_root_refs(root_item, 0);
2111                 btrfs_update_reloc_root(trans, root);
2112         }
2113
2114         nr = trans->blocks_used;
2115         btrfs_end_transaction_throttle(trans, root);
2116
2117         btrfs_btree_balance_dirty(root, nr);
2118
2119         if (replaced && rc->stage == UPDATE_DATA_PTRS)
2120                 invalidate_extent_cache(root, &key, &next_key);
2121
2122         return err;
2123 }
2124
2125 static noinline_for_stack
2126 int prepare_to_merge(struct reloc_control *rc, int err)
2127 {
2128         struct btrfs_root *root = rc->extent_root;
2129         struct btrfs_root *reloc_root;
2130         struct btrfs_trans_handle *trans;
2131         LIST_HEAD(reloc_roots);
2132         u64 num_bytes = 0;
2133         int ret;
2134
2135         mutex_lock(&root->fs_info->trans_mutex);
2136         rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
2137         rc->merging_rsv_size += rc->nodes_relocated * 2;
2138         mutex_unlock(&root->fs_info->trans_mutex);
2139 again:
2140         if (!err) {
2141                 num_bytes = rc->merging_rsv_size;
2142                 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
2143                                           num_bytes);
2144                 if (ret)
2145                         err = ret;
2146         }
2147
2148         trans = btrfs_join_transaction(rc->extent_root, 1);
2149
2150         if (!err) {
2151                 if (num_bytes != rc->merging_rsv_size) {
2152                         btrfs_end_transaction(trans, rc->extent_root);
2153                         btrfs_block_rsv_release(rc->extent_root,
2154                                                 rc->block_rsv, num_bytes);
2155                         goto again;
2156                 }
2157         }
2158
2159         rc->merge_reloc_tree = 1;
2160
2161         while (!list_empty(&rc->reloc_roots)) {
2162                 reloc_root = list_entry(rc->reloc_roots.next,
2163                                         struct btrfs_root, root_list);
2164                 list_del_init(&reloc_root->root_list);
2165
2166                 root = read_fs_root(reloc_root->fs_info,
2167                                     reloc_root->root_key.offset);
2168                 BUG_ON(IS_ERR(root));
2169                 BUG_ON(root->reloc_root != reloc_root);
2170
2171                 /*
2172                  * set reference count to 1, so btrfs_recover_relocation
2173                  * knows it should resumes merging
2174                  */
2175                 if (!err)
2176                         btrfs_set_root_refs(&reloc_root->root_item, 1);
2177                 btrfs_update_reloc_root(trans, root);
2178
2179                 list_add(&reloc_root->root_list, &reloc_roots);
2180         }
2181
2182         list_splice(&reloc_roots, &rc->reloc_roots);
2183
2184         if (!err)
2185                 btrfs_commit_transaction(trans, rc->extent_root);
2186         else
2187                 btrfs_end_transaction(trans, rc->extent_root);
2188         return err;
2189 }
2190
2191 static noinline_for_stack
2192 int merge_reloc_roots(struct reloc_control *rc)
2193 {
2194         struct btrfs_root *root;
2195         struct btrfs_root *reloc_root;
2196         LIST_HEAD(reloc_roots);
2197         int found = 0;
2198         int ret;
2199 again:
2200         root = rc->extent_root;
2201         mutex_lock(&root->fs_info->trans_mutex);
2202         list_splice_init(&rc->reloc_roots, &reloc_roots);
2203         mutex_unlock(&root->fs_info->trans_mutex);
2204
2205         while (!list_empty(&reloc_roots)) {
2206                 found = 1;
2207                 reloc_root = list_entry(reloc_roots.next,
2208                                         struct btrfs_root, root_list);
2209
2210                 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
2211                         root = read_fs_root(reloc_root->fs_info,
2212                                             reloc_root->root_key.offset);
2213                         BUG_ON(IS_ERR(root));
2214                         BUG_ON(root->reloc_root != reloc_root);
2215
2216                         ret = merge_reloc_root(rc, root);
2217                         BUG_ON(ret);
2218                 } else {
2219                         list_del_init(&reloc_root->root_list);
2220                 }
2221                 btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0);
2222         }
2223
2224         if (found) {
2225                 found = 0;
2226                 goto again;
2227         }
2228         BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root));
2229         return 0;
2230 }
2231
2232 static void free_block_list(struct rb_root *blocks)
2233 {
2234         struct tree_block *block;
2235         struct rb_node *rb_node;
2236         while ((rb_node = rb_first(blocks))) {
2237                 block = rb_entry(rb_node, struct tree_block, rb_node);
2238                 rb_erase(rb_node, blocks);
2239                 kfree(block);
2240         }
2241 }
2242
2243 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
2244                                       struct btrfs_root *reloc_root)
2245 {
2246         struct btrfs_root *root;
2247
2248         if (reloc_root->last_trans == trans->transid)
2249                 return 0;
2250
2251         root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset);
2252         BUG_ON(IS_ERR(root));
2253         BUG_ON(root->reloc_root != reloc_root);
2254
2255         return btrfs_record_root_in_trans(trans, root);
2256 }
2257
2258 static noinline_for_stack
2259 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
2260                                      struct reloc_control *rc,
2261                                      struct backref_node *node,
2262                                      struct backref_edge *edges[], int *nr)
2263 {
2264         struct backref_node *next;
2265         struct btrfs_root *root;
2266         int index = 0;
2267
2268         next = node;
2269         while (1) {
2270                 cond_resched();
2271                 next = walk_up_backref(next, edges, &index);
2272                 root = next->root;
2273                 BUG_ON(!root);
2274                 BUG_ON(!root->ref_cows);
2275
2276                 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
2277                         record_reloc_root_in_trans(trans, root);
2278                         break;
2279                 }
2280
2281                 btrfs_record_root_in_trans(trans, root);
2282                 root = root->reloc_root;
2283
2284                 if (next->new_bytenr != root->node->start) {
2285                         BUG_ON(next->new_bytenr);
2286                         BUG_ON(!list_empty(&next->list));
2287                         next->new_bytenr = root->node->start;
2288                         next->root = root;
2289                         list_add_tail(&next->list,
2290                                       &rc->backref_cache.changed);
2291                         __mark_block_processed(rc, next);
2292                         break;
2293                 }
2294
2295                 WARN_ON(1);
2296                 root = NULL;
2297                 next = walk_down_backref(edges, &index);
2298                 if (!next || next->level <= node->level)
2299                         break;
2300         }
2301         if (!root)
2302                 return NULL;
2303
2304         *nr = index;
2305         next = node;
2306         /* setup backref node path for btrfs_reloc_cow_block */
2307         while (1) {
2308                 rc->backref_cache.path[next->level] = next;
2309                 if (--index < 0)
2310                         break;
2311                 next = edges[index]->node[UPPER];
2312         }
2313         return root;
2314 }
2315
2316 /*
2317  * select a tree root for relocation. return NULL if the block
2318  * is reference counted. we should use do_relocation() in this
2319  * case. return a tree root pointer if the block isn't reference
2320  * counted. return -ENOENT if the block is root of reloc tree.
2321  */
2322 static noinline_for_stack
2323 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,
2324                                    struct backref_node *node)
2325 {
2326         struct backref_node *next;
2327         struct btrfs_root *root;
2328         struct btrfs_root *fs_root = NULL;
2329         struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2330         int index = 0;
2331
2332         next = node;
2333         while (1) {
2334                 cond_resched();
2335                 next = walk_up_backref(next, edges, &index);
2336                 root = next->root;
2337                 BUG_ON(!root);
2338
2339                 /* no other choice for non-refernce counted tree */
2340                 if (!root->ref_cows)
2341                         return root;
2342
2343                 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)
2344                         fs_root = root;
2345
2346                 if (next != node)
2347                         return NULL;
2348
2349                 next = walk_down_backref(edges, &index);
2350                 if (!next || next->level <= node->level)
2351                         break;
2352         }
2353
2354         if (!fs_root)
2355                 return ERR_PTR(-ENOENT);
2356         return fs_root;
2357 }
2358
2359 static noinline_for_stack
2360 u64 calcu_metadata_size(struct reloc_control *rc,
2361                         struct backref_node *node, int reserve)
2362 {
2363         struct backref_node *next = node;
2364         struct backref_edge *edge;
2365         struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2366         u64 num_bytes = 0;
2367         int index = 0;
2368
2369         BUG_ON(reserve && node->processed);
2370
2371         while (next) {
2372                 cond_resched();
2373                 while (1) {
2374                         if (next->processed && (reserve || next != node))
2375                                 break;
2376
2377                         num_bytes += btrfs_level_size(rc->extent_root,
2378                                                       next->level);
2379
2380                         if (list_empty(&next->upper))
2381                                 break;
2382
2383                         edge = list_entry(next->upper.next,
2384                                           struct backref_edge, list[LOWER]);
2385                         edges[index++] = edge;
2386                         next = edge->node[UPPER];
2387                 }
2388                 next = walk_down_backref(edges, &index);
2389         }
2390         return num_bytes;
2391 }
2392
2393 static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2394                                   struct reloc_control *rc,
2395                                   struct backref_node *node)
2396 {
2397         struct btrfs_root *root = rc->extent_root;
2398         u64 num_bytes;
2399         int ret;
2400
2401         num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2402
2403         trans->block_rsv = rc->block_rsv;
2404         ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
2405         if (ret) {
2406                 if (ret == -EAGAIN)
2407                         rc->commit_transaction = 1;
2408                 return ret;
2409         }
2410
2411         return 0;
2412 }
2413
2414 static void release_metadata_space(struct reloc_control *rc,
2415                                    struct backref_node *node)
2416 {
2417         u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2;
2418         btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes);
2419 }
2420
2421 /*
2422  * relocate a block tree, and then update pointers in upper level
2423  * blocks that reference the block to point to the new location.
2424  *
2425  * if called by link_to_upper, the block has already been relocated.
2426  * in that case this function just updates pointers.
2427  */
2428 static int do_relocation(struct btrfs_trans_handle *trans,
2429                          struct reloc_control *rc,
2430                          struct backref_node *node,
2431                          struct btrfs_key *key,
2432                          struct btrfs_path *path, int lowest)
2433 {
2434         struct backref_node *upper;
2435         struct backref_edge *edge;
2436         struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2437         struct btrfs_root *root;
2438         struct extent_buffer *eb;
2439         u32 blocksize;
2440         u64 bytenr;
2441         u64 generation;
2442         int nr;
2443         int slot;
2444         int ret;
2445         int err = 0;
2446
2447         BUG_ON(lowest && node->eb);
2448
2449         path->lowest_level = node->level + 1;
2450         rc->backref_cache.path[node->level] = node;
2451         list_for_each_entry(edge, &node->upper, list[LOWER]) {
2452                 cond_resched();
2453
2454                 upper = edge->node[UPPER];
2455                 root = select_reloc_root(trans, rc, upper, edges, &nr);
2456                 BUG_ON(!root);
2457
2458                 if (upper->eb && !upper->locked) {
2459                         if (!lowest) {
2460                                 ret = btrfs_bin_search(upper->eb, key,
2461                                                        upper->level, &slot);
2462                                 BUG_ON(ret);
2463                                 bytenr = btrfs_node_blockptr(upper->eb, slot);
2464                                 if (node->eb->start == bytenr)
2465                                         goto next;
2466                         }
2467                         drop_node_buffer(upper);
2468                 }
2469
2470                 if (!upper->eb) {
2471                         ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2472                         if (ret < 0) {
2473                                 err = ret;
2474                                 break;
2475                         }
2476                         BUG_ON(ret > 0);
2477
2478                         if (!upper->eb) {
2479                                 upper->eb = path->nodes[upper->level];
2480                                 path->nodes[upper->level] = NULL;
2481                         } else {
2482                                 BUG_ON(upper->eb != path->nodes[upper->level]);
2483                         }
2484
2485                         upper->locked = 1;
2486                         path->locks[upper->level] = 0;
2487
2488                         slot = path->slots[upper->level];
2489                         btrfs_release_path(NULL, path);
2490                 } else {
2491                         ret = btrfs_bin_search(upper->eb, key, upper->level,
2492                                                &slot);
2493                         BUG_ON(ret);
2494                 }
2495
2496                 bytenr = btrfs_node_blockptr(upper->eb, slot);
2497                 if (lowest) {
2498                         BUG_ON(bytenr != node->bytenr);
2499                 } else {
2500                         if (node->eb->start == bytenr)
2501                                 goto next;
2502                 }
2503
2504                 blocksize = btrfs_level_size(root, node->level);
2505                 generation = btrfs_node_ptr_generation(upper->eb, slot);
2506                 eb = read_tree_block(root, bytenr, blocksize, generation);
2507                 btrfs_tree_lock(eb);
2508                 btrfs_set_lock_blocking(eb);
2509
2510                 if (!node->eb) {
2511                         ret = btrfs_cow_block(trans, root, eb, upper->eb,
2512                                               slot, &eb);
2513                         btrfs_tree_unlock(eb);
2514                         free_extent_buffer(eb);
2515                         if (ret < 0) {
2516                                 err = ret;
2517                                 goto next;
2518                         }
2519                         BUG_ON(node->eb != eb);
2520                 } else {
2521                         btrfs_set_node_blockptr(upper->eb, slot,
2522                                                 node->eb->start);
2523                         btrfs_set_node_ptr_generation(upper->eb, slot,
2524                                                       trans->transid);
2525                         btrfs_mark_buffer_dirty(upper->eb);
2526
2527                         ret = btrfs_inc_extent_ref(trans, root,
2528                                                 node->eb->start, blocksize,
2529                                                 upper->eb->start,
2530                                                 btrfs_header_owner(upper->eb),
2531                                                 node->level, 0);
2532                         BUG_ON(ret);
2533
2534                         ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
2535                         BUG_ON(ret);
2536                 }
2537 next:
2538                 if (!upper->pending)
2539                         drop_node_buffer(upper);
2540                 else
2541                         unlock_node_buffer(upper);
2542                 if (err)
2543                         break;
2544         }
2545
2546         if (!err && node->pending) {
2547                 drop_node_buffer(node);
2548                 list_move_tail(&node->list, &rc->backref_cache.changed);
2549                 node->pending = 0;
2550         }
2551
2552         path->lowest_level = 0;
2553         BUG_ON(err == -ENOSPC);
2554         return err;
2555 }
2556
2557 static int link_to_upper(struct btrfs_trans_handle *trans,
2558                          struct reloc_control *rc,
2559                          struct backref_node *node,
2560                          struct btrfs_path *path)
2561 {
2562         struct btrfs_key key;
2563
2564         btrfs_node_key_to_cpu(node->eb, &key, 0);
2565         return do_relocation(trans, rc, node, &key, path, 0);
2566 }
2567
2568 static int finish_pending_nodes(struct btrfs_trans_handle *trans,
2569                                 struct reloc_control *rc,
2570                                 struct btrfs_path *path, int err)
2571 {
2572         LIST_HEAD(list);
2573         struct backref_cache *cache = &rc->backref_cache;
2574         struct backref_node *node;
2575         int level;
2576         int ret;
2577
2578         for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2579                 while (!list_empty(&cache->pending[level])) {
2580                         node = list_entry(cache->pending[level].next,
2581                                           struct backref_node, list);
2582                         list_move_tail(&node->list, &list);
2583                         BUG_ON(!node->pending);
2584
2585                         if (!err) {
2586                                 ret = link_to_upper(trans, rc, node, path);
2587                                 if (ret < 0)
2588                                         err = ret;
2589                         }
2590                 }
2591                 list_splice_init(&list, &cache->pending[level]);
2592         }
2593         return err;
2594 }
2595
2596 static void mark_block_processed(struct reloc_control *rc,
2597                                  u64 bytenr, u32 blocksize)
2598 {
2599         set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1,
2600                         EXTENT_DIRTY, GFP_NOFS);
2601 }
2602
2603 static void __mark_block_processed(struct reloc_control *rc,
2604                                    struct backref_node *node)
2605 {
2606         u32 blocksize;
2607         if (node->level == 0 ||
2608             in_block_group(node->bytenr, rc->block_group)) {
2609                 blocksize = btrfs_level_size(rc->extent_root, node->level);
2610                 mark_block_processed(rc, node->bytenr, blocksize);
2611         }
2612         node->processed = 1;
2613 }
2614
2615 /*
2616  * mark a block and all blocks directly/indirectly reference the block
2617  * as processed.
2618  */
2619 static void update_processed_blocks(struct reloc_control *rc,
2620                                     struct backref_node *node)
2621 {
2622         struct backref_node *next = node;
2623         struct backref_edge *edge;
2624         struct backref_edge *edges[BTRFS_MAX_LEVEL - 1];
2625         int index = 0;
2626
2627         while (next) {
2628                 cond_resched();
2629                 while (1) {
2630                         if (next->processed)
2631                                 break;
2632
2633                         __mark_block_processed(rc, next);
2634
2635                         if (list_empty(&next->upper))
2636                                 break;
2637
2638                         edge = list_entry(next->upper.next,
2639                                           struct backref_edge, list[LOWER]);
2640                         edges[index++] = edge;
2641                         next = edge->node[UPPER];
2642                 }
2643                 next = walk_down_backref(edges, &index);
2644         }
2645 }
2646
2647 static int tree_block_processed(u64 bytenr, u32 blocksize,
2648                                 struct reloc_control *rc)
2649 {
2650         if (test_range_bit(&rc->processed_blocks, bytenr,
2651                            bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL))
2652                 return 1;
2653         return 0;
2654 }
2655
2656 static int get_tree_block_key(struct reloc_control *rc,
2657                               struct tree_block *block)
2658 {
2659         struct extent_buffer *eb;
2660
2661         BUG_ON(block->key_ready);
2662         eb = read_tree_block(rc->extent_root, block->bytenr,
2663                              block->key.objectid, block->key.offset);
2664         WARN_ON(btrfs_header_level(eb) != block->level);
2665         if (block->level == 0)
2666                 btrfs_item_key_to_cpu(eb, &block->key, 0);
2667         else
2668                 btrfs_node_key_to_cpu(eb, &block->key, 0);
2669         free_extent_buffer(eb);
2670         block->key_ready = 1;
2671         return 0;
2672 }
2673
2674 static int reada_tree_block(struct reloc_control *rc,
2675                             struct tree_block *block)
2676 {
2677         BUG_ON(block->key_ready);
2678         readahead_tree_block(rc->extent_root, block->bytenr,
2679                              block->key.objectid, block->key.offset);
2680         return 0;
2681 }
2682
2683 /*
2684  * helper function to relocate a tree block
2685  */
2686 static int relocate_tree_block(struct btrfs_trans_handle *trans,
2687                                 struct reloc_control *rc,
2688                                 struct backref_node *node,
2689                                 struct btrfs_key *key,
2690                                 struct btrfs_path *path)
2691 {
2692         struct btrfs_root *root;
2693         int release = 0;
2694         int ret = 0;
2695
2696         if (!node)
2697                 return 0;
2698
2699         BUG_ON(node->processed);
2700         root = select_one_root(trans, node);
2701         if (root == ERR_PTR(-ENOENT)) {
2702                 update_processed_blocks(rc, node);
2703                 goto out;
2704         }
2705
2706         if (!root || root->ref_cows) {
2707                 ret = reserve_metadata_space(trans, rc, node);
2708                 if (ret)
2709                         goto out;
2710                 release = 1;
2711         }
2712
2713         if (root) {
2714                 if (root->ref_cows) {
2715                         BUG_ON(node->new_bytenr);
2716                         BUG_ON(!list_empty(&node->list));
2717                         btrfs_record_root_in_trans(trans, root);
2718                         root = root->reloc_root;
2719                         node->new_bytenr = root->node->start;
2720                         node->root = root;
2721                         list_add_tail(&node->list, &rc->backref_cache.changed);
2722                 } else {
2723                         path->lowest_level = node->level;
2724                         ret = btrfs_search_slot(trans, root, key, path, 0, 1);
2725                         btrfs_release_path(root, path);
2726                         if (ret > 0)
2727                                 ret = 0;
2728                 }
2729                 if (!ret)
2730                         update_processed_blocks(rc, node);
2731         } else {
2732                 ret = do_relocation(trans, rc, node, key, path, 1);
2733         }
2734 out:
2735         if (ret || node->level == 0 || node->cowonly) {
2736                 if (release)
2737                         release_metadata_space(rc, node);
2738                 remove_backref_node(&rc->backref_cache, node);
2739         }
2740         return ret;
2741 }
2742
2743 /*
2744  * relocate a list of blocks
2745  */
2746 static noinline_for_stack
2747 int relocate_tree_blocks(struct btrfs_trans_handle *trans,
2748                          struct reloc_control *rc, struct rb_root *blocks)
2749 {
2750         struct backref_node *node;
2751         struct btrfs_path *path;
2752         struct tree_block *block;
2753         struct rb_node *rb_node;
2754         int ret;
2755         int err = 0;
2756
2757         path = btrfs_alloc_path();
2758         if (!path)
2759                 return -ENOMEM;
2760
2761         rb_node = rb_first(blocks);
2762         while (rb_node) {
2763                 block = rb_entry(rb_node, struct tree_block, rb_node);
2764                 if (!block->key_ready)
2765                         reada_tree_block(rc, block);
2766                 rb_node = rb_next(rb_node);
2767         }
2768
2769         rb_node = rb_first(blocks);
2770         while (rb_node) {
2771                 block = rb_entry(rb_node, struct tree_block, rb_node);
2772                 if (!block->key_ready)
2773                         get_tree_block_key(rc, block);
2774                 rb_node = rb_next(rb_node);
2775         }
2776
2777         rb_node = rb_first(blocks);
2778         while (rb_node) {
2779                 block = rb_entry(rb_node, struct tree_block, rb_node);
2780
2781                 node = build_backref_tree(rc, &block->key,
2782                                           block->level, block->bytenr);
2783                 if (IS_ERR(node)) {
2784                         err = PTR_ERR(node);
2785                         goto out;
2786                 }
2787
2788                 ret = relocate_tree_block(trans, rc, node, &block->key,
2789                                           path);
2790                 if (ret < 0) {
2791                         if (ret != -EAGAIN || rb_node == rb_first(blocks))
2792                                 err = ret;
2793                         goto out;
2794                 }
2795                 rb_node = rb_next(rb_node);
2796         }
2797 out:
2798         free_block_list(blocks);
2799         err = finish_pending_nodes(trans, rc, path, err);
2800
2801         btrfs_free_path(path);
2802         return err;
2803 }
2804
2805 static noinline_for_stack
2806 int prealloc_file_extent_cluster(struct inode *inode,
2807                                  struct file_extent_cluster *cluster)
2808 {
2809         u64 alloc_hint = 0;
2810         u64 start;
2811         u64 end;
2812         u64 offset = BTRFS_I(inode)->index_cnt;
2813         u64 num_bytes;
2814         int nr = 0;
2815         int ret = 0;
2816
2817         BUG_ON(cluster->start != cluster->boundary[0]);
2818         mutex_lock(&inode->i_mutex);
2819
2820         ret = btrfs_check_data_free_space(inode, cluster->end +
2821                                           1 - cluster->start);
2822         if (ret)
2823                 goto out;
2824
2825         while (nr < cluster->nr) {
2826                 start = cluster->boundary[nr] - offset;
2827                 if (nr + 1 < cluster->nr)
2828                         end = cluster->boundary[nr + 1] - 1 - offset;
2829                 else
2830                         end = cluster->end - offset;
2831
2832                 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2833                 num_bytes = end + 1 - start;
2834                 ret = btrfs_prealloc_file_range(inode, 0, start,
2835                                                 num_bytes, num_bytes,
2836                                                 end + 1, &alloc_hint);
2837                 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2838                 if (ret)
2839                         break;
2840                 nr++;
2841         }
2842         btrfs_free_reserved_data_space(inode, cluster->end +
2843                                        1 - cluster->start);
2844 out:
2845         mutex_unlock(&inode->i_mutex);
2846         return ret;
2847 }
2848
2849 static noinline_for_stack
2850 int setup_extent_mapping(struct inode *inode, u64 start, u64 end,
2851                          u64 block_start)
2852 {
2853         struct btrfs_root *root = BTRFS_I(inode)->root;
2854         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2855         struct extent_map *em;
2856         int ret = 0;
2857
2858         em = alloc_extent_map(GFP_NOFS);
2859         if (!em)
2860                 return -ENOMEM;
2861
2862         em->start = start;
2863         em->len = end + 1 - start;
2864         em->block_len = em->len;
2865         em->block_start = block_start;
2866         em->bdev = root->fs_info->fs_devices->latest_bdev;
2867         set_bit(EXTENT_FLAG_PINNED, &em->flags);
2868
2869         lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2870         while (1) {
2871                 write_lock(&em_tree->lock);
2872                 ret = add_extent_mapping(em_tree, em);
2873                 write_unlock(&em_tree->lock);
2874                 if (ret != -EEXIST) {
2875                         free_extent_map(em);
2876                         break;
2877                 }
2878                 btrfs_drop_extent_cache(inode, start, end, 0);
2879         }
2880         unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS);
2881         return ret;
2882 }
2883
2884 static int relocate_file_extent_cluster(struct inode *inode,
2885                                         struct file_extent_cluster *cluster)
2886 {
2887         u64 page_start;
2888         u64 page_end;
2889         u64 offset = BTRFS_I(inode)->index_cnt;
2890         unsigned long index;
2891         unsigned long last_index;
2892         struct page *page;
2893         struct file_ra_state *ra;
2894         int nr = 0;
2895         int ret = 0;
2896
2897         if (!cluster->nr)
2898                 return 0;
2899
2900         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2901         if (!ra)
2902                 return -ENOMEM;
2903
2904         ret = prealloc_file_extent_cluster(inode, cluster);
2905         if (ret)
2906                 goto out;
2907
2908         file_ra_state_init(ra, inode->i_mapping);
2909
2910         ret = setup_extent_mapping(inode, cluster->start - offset,
2911                                    cluster->end - offset, cluster->start);
2912         if (ret)
2913                 goto out;
2914
2915         index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
2916         last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
2917         while (index <= last_index) {
2918                 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
2919                 if (ret)
2920                         goto out;
2921
2922                 page = find_lock_page(inode->i_mapping, index);
2923                 if (!page) {
2924                         page_cache_sync_readahead(inode->i_mapping,
2925                                                   ra, NULL, index,
2926                                                   last_index + 1 - index);
2927                         page = grab_cache_page(inode->i_mapping, index);
2928                         if (!page) {
2929                                 btrfs_delalloc_release_metadata(inode,
2930                                                         PAGE_CACHE_SIZE);
2931                                 ret = -ENOMEM;
2932                                 goto out;
2933                         }
2934                 }
2935
2936                 if (PageReadahead(page)) {
2937                         page_cache_async_readahead(inode->i_mapping,
2938                                                    ra, NULL, page, index,
2939                                                    last_index + 1 - index);
2940                 }
2941
2942                 if (!PageUptodate(page)) {
2943                         btrfs_readpage(NULL, page);
2944                         lock_page(page);
2945                         if (!PageUptodate(page)) {
2946                                 unlock_page(page);
2947                                 page_cache_release(page);
2948                                 btrfs_delalloc_release_metadata(inode,
2949                                                         PAGE_CACHE_SIZE);
2950                                 ret = -EIO;
2951                                 goto out;
2952                         }
2953                 }
2954
2955                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2956                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2957
2958                 lock_extent(&BTRFS_I(inode)->io_tree,
2959                             page_start, page_end, GFP_NOFS);
2960
2961                 set_page_extent_mapped(page);
2962
2963                 if (nr < cluster->nr &&
2964                     page_start + offset == cluster->boundary[nr]) {
2965                         set_extent_bits(&BTRFS_I(inode)->io_tree,
2966                                         page_start, page_end,
2967                                         EXTENT_BOUNDARY, GFP_NOFS);
2968                         nr++;
2969                 }
2970
2971                 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
2972                 set_page_dirty(page);
2973
2974                 unlock_extent(&BTRFS_I(inode)->io_tree,
2975                               page_start, page_end, GFP_NOFS);
2976                 unlock_page(page);
2977                 page_cache_release(page);
2978
2979                 index++;
2980                 balance_dirty_pages_ratelimited(inode->i_mapping);
2981                 btrfs_throttle(BTRFS_I(inode)->root);
2982         }
2983         WARN_ON(nr != cluster->nr);
2984 out:
2985         kfree(ra);
2986         return ret;
2987 }
2988
2989 static noinline_for_stack
2990 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key,
2991                          struct file_extent_cluster *cluster)
2992 {
2993         int ret;
2994
2995         if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
2996                 ret = relocate_file_extent_cluster(inode, cluster);
2997                 if (ret)
2998                         return ret;
2999                 cluster->nr = 0;
3000         }
3001
3002         if (!cluster->nr)
3003                 cluster->start = extent_key->objectid;
3004         else
3005                 BUG_ON(cluster->nr >= MAX_EXTENTS);
3006         cluster->end = extent_key->objectid + extent_key->offset - 1;
3007         cluster->boundary[cluster->nr] = extent_key->objectid;
3008         cluster->nr++;
3009
3010         if (cluster->nr >= MAX_EXTENTS) {
3011                 ret = relocate_file_extent_cluster(inode, cluster);
3012                 if (ret)
3013                         return ret;
3014                 cluster->nr = 0;
3015         }
3016         return 0;
3017 }
3018
3019 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3020 static int get_ref_objectid_v0(struct reloc_control *rc,
3021                                struct btrfs_path *path,
3022                                struct btrfs_key *extent_key,
3023                                u64 *ref_objectid, int *path_change)
3024 {
3025         struct btrfs_key key;
3026         struct extent_buffer *leaf;
3027         struct btrfs_extent_ref_v0 *ref0;
3028         int ret;
3029         int slot;
3030
3031         leaf = path->nodes[0];
3032         slot = path->slots[0];
3033         while (1) {
3034                 if (slot >= btrfs_header_nritems(leaf)) {
3035                         ret = btrfs_next_leaf(rc->extent_root, path);
3036                         if (ret < 0)
3037                                 return ret;
3038                         BUG_ON(ret > 0);
3039                         leaf = path->nodes[0];
3040                         slot = path->slots[0];
3041                         if (path_change)
3042                                 *path_change = 1;
3043                 }
3044                 btrfs_item_key_to_cpu(leaf, &key, slot);
3045                 if (key.objectid != extent_key->objectid)
3046                         return -ENOENT;
3047
3048                 if (key.type != BTRFS_EXTENT_REF_V0_KEY) {
3049                         slot++;
3050                         continue;
3051                 }
3052                 ref0 = btrfs_item_ptr(leaf, slot,
3053                                 struct btrfs_extent_ref_v0);
3054                 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0);
3055                 break;
3056         }
3057         return 0;
3058 }
3059 #endif
3060
3061 /*
3062  * helper to add a tree block to the list.
3063  * the major work is getting the generation and level of the block
3064  */
3065 static int add_tree_block(struct reloc_control *rc,
3066                           struct btrfs_key *extent_key,
3067                           struct btrfs_path *path,
3068                           struct rb_root *blocks)
3069 {
3070         struct extent_buffer *eb;
3071         struct btrfs_extent_item *ei;
3072         struct btrfs_tree_block_info *bi;
3073         struct tree_block *block;
3074         struct rb_node *rb_node;
3075         u32 item_size;
3076         int level = -1;
3077         int generation;
3078
3079         eb =  path->nodes[0];
3080         item_size = btrfs_item_size_nr(eb, path->slots[0]);
3081
3082         if (item_size >= sizeof(*ei) + sizeof(*bi)) {
3083                 ei = btrfs_item_ptr(eb, path->slots[0],
3084                                 struct btrfs_extent_item);
3085                 bi = (struct btrfs_tree_block_info *)(ei + 1);
3086                 generation = btrfs_extent_generation(eb, ei);
3087                 level = btrfs_tree_block_level(eb, bi);
3088         } else {
3089 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3090                 u64 ref_owner;
3091                 int ret;
3092
3093                 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3094                 ret = get_ref_objectid_v0(rc, path, extent_key,
3095                                           &ref_owner, NULL);
3096                 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
3097                 level = (int)ref_owner;
3098                 /* FIXME: get real generation */
3099                 generation = 0;
3100 #else
3101                 BUG();
3102 #endif
3103         }
3104
3105         btrfs_release_path(rc->extent_root, path);
3106
3107         BUG_ON(level == -1);
3108
3109         block = kmalloc(sizeof(*block), GFP_NOFS);
3110         if (!block)
3111                 return -ENOMEM;
3112
3113         block->bytenr = extent_key->objectid;
3114         block->key.objectid = extent_key->offset;
3115         block->key.offset = generation;
3116         block->level = level;
3117         block->key_ready = 0;
3118
3119         rb_node = tree_insert(blocks, block->bytenr, &block->rb_node);
3120         BUG_ON(rb_node);
3121
3122         return 0;
3123 }
3124
3125 /*
3126  * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY
3127  */
3128 static int __add_tree_block(struct reloc_control *rc,
3129                             u64 bytenr, u32 blocksize,
3130                             struct rb_root *blocks)
3131 {
3132         struct btrfs_path *path;
3133         struct btrfs_key key;
3134         int ret;
3135
3136         if (tree_block_processed(bytenr, blocksize, rc))
3137                 return 0;
3138
3139         if (tree_search(blocks, bytenr))
3140                 return 0;
3141
3142         path = btrfs_alloc_path();
3143         if (!path)
3144                 return -ENOMEM;
3145
3146         key.objectid = bytenr;
3147         key.type = BTRFS_EXTENT_ITEM_KEY;
3148         key.offset = blocksize;
3149
3150         path->search_commit_root = 1;
3151         path->skip_locking = 1;
3152         ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);
3153         if (ret < 0)
3154                 goto out;
3155         BUG_ON(ret);
3156
3157         btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
3158         ret = add_tree_block(rc, &key, path, blocks);
3159 out:
3160         btrfs_free_path(path);
3161         return ret;
3162 }
3163
3164 /*
3165  * helper to check if the block use full backrefs for pointers in it
3166  */
3167 static int block_use_full_backref(struct reloc_control *rc,
3168                                   struct extent_buffer *eb)
3169 {
3170         u64 flags;
3171         int ret;
3172
3173         if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) ||
3174             btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV)
3175                 return 1;
3176
3177         ret = btrfs_lookup_extent_info(NULL, rc->extent_root,
3178                                        eb->start, eb->len, NULL, &flags);
3179         BUG_ON(ret);
3180
3181         if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)
3182                 ret = 1;
3183         else
3184                 ret = 0;
3185         return ret;
3186 }
3187
3188 /*
3189  * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3190  * this function scans fs tree to find blocks reference the data extent
3191  */
3192 static int find_data_references(struct reloc_control *rc,
3193                                 struct btrfs_key *extent_key,
3194                                 struct extent_buffer *leaf,
3195                                 struct btrfs_extent_data_ref *ref,
3196                                 struct rb_root *blocks)
3197 {
3198         struct btrfs_path *path;
3199         struct tree_block *block;
3200         struct btrfs_root *root;
3201         struct btrfs_file_extent_item *fi;
3202         struct rb_node *rb_node;
3203         struct btrfs_key key;
3204         u64 ref_root;
3205         u64 ref_objectid;
3206         u64 ref_offset;
3207         u32 ref_count;
3208         u32 nritems;
3209         int err = 0;
3210         int added = 0;
3211         int counted;
3212         int ret;
3213
3214         path = btrfs_alloc_path();
3215         if (!path)
3216                 return -ENOMEM;
3217
3218         ref_root = btrfs_extent_data_ref_root(leaf, ref);
3219         ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3220         ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3221         ref_count = btrfs_extent_data_ref_count(leaf, ref);
3222
3223         root = read_fs_root(rc->extent_root->fs_info, ref_root);
3224         if (IS_ERR(root)) {
3225                 err = PTR_ERR(root);
3226                 goto out;
3227         }
3228
3229         key.objectid = ref_objectid;
3230         key.offset = ref_offset;
3231         key.type = BTRFS_EXTENT_DATA_KEY;
3232
3233         path->search_commit_root = 1;
3234         path->skip_locking = 1;
3235         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3236         if (ret < 0) {
3237                 err = ret;
3238                 goto out;
3239         }
3240
3241         leaf = path->nodes[0];
3242         nritems = btrfs_header_nritems(leaf);
3243         /*
3244          * the references in tree blocks that use full backrefs
3245          * are not counted in
3246          */
3247         if (block_use_full_backref(rc, leaf))
3248                 counted = 0;
3249         else
3250                 counted = 1;
3251         rb_node = tree_search(blocks, leaf->start);
3252         if (rb_node) {
3253                 if (counted)
3254                         added = 1;
3255                 else
3256                         path->slots[0] = nritems;
3257         }
3258
3259         while (ref_count > 0) {
3260                 while (path->slots[0] >= nritems) {
3261                         ret = btrfs_next_leaf(root, path);
3262                         if (ret < 0) {
3263                                 err = ret;
3264                                 goto out;
3265                         }
3266                         if (ret > 0) {
3267                                 WARN_ON(1);
3268                                 goto out;
3269                         }
3270
3271                         leaf = path->nodes[0];
3272                         nritems = btrfs_header_nritems(leaf);
3273                         added = 0;
3274
3275                         if (block_use_full_backref(rc, leaf))
3276                                 counted = 0;
3277                         else
3278                                 counted = 1;
3279                         rb_node = tree_search(blocks, leaf->start);
3280                         if (rb_node) {
3281                                 if (counted)
3282                                         added = 1;
3283                                 else
3284                                         path->slots[0] = nritems;
3285                         }
3286                 }
3287
3288                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3289                 if (key.objectid != ref_objectid ||
3290                     key.type != BTRFS_EXTENT_DATA_KEY) {
3291                         WARN_ON(1);
3292                         break;
3293                 }
3294
3295                 fi = btrfs_item_ptr(leaf, path->slots[0],
3296                                     struct btrfs_file_extent_item);
3297
3298                 if (btrfs_file_extent_type(leaf, fi) ==
3299                     BTRFS_FILE_EXTENT_INLINE)
3300                         goto next;
3301
3302                 if (btrfs_file_extent_disk_bytenr(leaf, fi) !=
3303                     extent_key->objectid)
3304                         goto next;
3305
3306                 key.offset -= btrfs_file_extent_offset(leaf, fi);
3307                 if (key.offset != ref_offset)
3308                         goto next;
3309
3310                 if (counted)
3311                         ref_count--;
3312                 if (added)
3313                         goto next;
3314
3315                 if (!tree_block_processed(leaf->start, leaf->len, rc)) {
3316                         block = kmalloc(sizeof(*block), GFP_NOFS);
3317                         if (!block) {
3318                                 err = -ENOMEM;
3319                                 break;
3320                         }
3321                         block->bytenr = leaf->start;
3322                         btrfs_item_key_to_cpu(leaf, &block->key, 0);
3323                         block->level = 0;
3324                         block->key_ready = 1;
3325                         rb_node = tree_insert(blocks, block->bytenr,
3326                                               &block->rb_node);
3327                         BUG_ON(rb_node);
3328                 }
3329                 if (counted)
3330                         added = 1;
3331                 else
3332                         path->slots[0] = nritems;
3333 next:
3334                 path->slots[0]++;
3335
3336         }
3337 out:
3338         btrfs_free_path(path);
3339         return err;
3340 }
3341
3342 /*
3343  * hepler to find all tree blocks that reference a given data extent
3344  */
3345 static noinline_for_stack
3346 int add_data_references(struct reloc_control *rc,
3347                         struct btrfs_key *extent_key,
3348                         struct btrfs_path *path,
3349                         struct rb_root *blocks)
3350 {
3351         struct btrfs_key key;
3352         struct extent_buffer *eb;
3353         struct btrfs_extent_data_ref *dref;
3354         struct btrfs_extent_inline_ref *iref;
3355         unsigned long ptr;
3356         unsigned long end;
3357         u32 blocksize = btrfs_level_size(rc->extent_root, 0);
3358         int ret;
3359         int err = 0;
3360
3361         eb = path->nodes[0];
3362         ptr = btrfs_item_ptr_offset(eb, path->slots[0]);
3363         end = ptr + btrfs_item_size_nr(eb, path->slots[0]);
3364 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3365         if (ptr + sizeof(struct btrfs_extent_item_v0) == end)
3366                 ptr = end;
3367         else
3368 #endif
3369                 ptr += sizeof(struct btrfs_extent_item);
3370
3371         while (ptr < end) {
3372                 iref = (struct btrfs_extent_inline_ref *)ptr;
3373                 key.type = btrfs_extent_inline_ref_type(eb, iref);
3374                 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3375                         key.offset = btrfs_extent_inline_ref_offset(eb, iref);
3376                         ret = __add_tree_block(rc, key.offset, blocksize,
3377                                                blocks);
3378                 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3379                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
3380                         ret = find_data_references(rc, extent_key,
3381                                                    eb, dref, blocks);
3382                 } else {
3383                         BUG();
3384                 }
3385                 ptr += btrfs_extent_inline_ref_size(key.type);
3386         }
3387         WARN_ON(ptr > end);
3388
3389         while (1) {
3390                 cond_resched();
3391                 eb = path->nodes[0];
3392                 if (path->slots[0] >= btrfs_header_nritems(eb)) {
3393                         ret = btrfs_next_leaf(rc->extent_root, path);
3394                         if (ret < 0) {
3395                                 err = ret;
3396                                 break;
3397                         }
3398                         if (ret > 0)
3399                                 break;
3400                         eb = path->nodes[0];
3401                 }
3402
3403                 btrfs_item_key_to_cpu(eb, &key, path->slots[0]);
3404                 if (key.objectid != extent_key->objectid)
3405                         break;
3406
3407 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3408                 if (key.type == BTRFS_SHARED_DATA_REF_KEY ||
3409                     key.type == BTRFS_EXTENT_REF_V0_KEY) {
3410 #else
3411                 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
3412                 if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
3413 #endif
3414                         ret = __add_tree_block(rc, key.offset, blocksize,
3415                                                blocks);
3416                 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
3417                         dref = btrfs_item_ptr(eb, path->slots[0],
3418                                               struct btrfs_extent_data_ref);
3419                         ret = find_data_references(rc, extent_key,
3420                                                    eb, dref, blocks);
3421                 } else {
3422                         ret = 0;
3423                 }
3424                 if (ret) {
3425                         err = ret;
3426                         break;
3427                 }
3428                 path->slots[0]++;
3429         }
3430         btrfs_release_path(rc->extent_root, path);
3431         if (err)
3432                 free_block_list(blocks);
3433         return err;
3434 }
3435
3436 /*
3437  * hepler to find next unprocessed extent
3438  */
3439 static noinline_for_stack
3440 int find_next_extent(struct btrfs_trans_handle *trans,
3441                      struct reloc_control *rc, struct btrfs_path *path,
3442                      struct btrfs_key *extent_key)
3443 {
3444         struct btrfs_key key;
3445         struct extent_buffer *leaf;
3446         u64 start, end, last;
3447         int ret;
3448
3449         last = rc->block_group->key.objectid + rc->block_group->key.offset;
3450         while (1) {
3451                 cond_resched();
3452                 if (rc->search_start >= last) {
3453                         ret = 1;
3454                         break;
3455                 }
3456
3457                 key.objectid = rc->search_start;
3458                 key.type = BTRFS_EXTENT_ITEM_KEY;
3459                 key.offset = 0;
3460
3461                 path->search_commit_root = 1;
3462                 path->skip_locking = 1;
3463                 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path,
3464                                         0, 0);
3465                 if (ret < 0)
3466                         break;
3467 next:
3468                 leaf = path->nodes[0];
3469                 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
3470                         ret = btrfs_next_leaf(rc->extent_root, path);
3471                         if (ret != 0)
3472                                 break;
3473                         leaf = path->nodes[0];
3474                 }
3475
3476                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3477                 if (key.objectid >= last) {
3478                         ret = 1;
3479                         break;
3480                 }
3481
3482                 if (key.type != BTRFS_EXTENT_ITEM_KEY ||
3483                     key.objectid + key.offset <= rc->search_start) {
3484                         path->slots[0]++;
3485                         goto next;
3486                 }
3487
3488                 ret = find_first_extent_bit(&rc->processed_blocks,
3489                                             key.objectid, &start, &end,
3490                                             EXTENT_DIRTY);
3491
3492                 if (ret == 0 && start <= key.objectid) {
3493                         btrfs_release_path(rc->extent_root, path);
3494                         rc->search_start = end + 1;
3495                 } else {
3496                         rc->search_start = key.objectid + key.offset;
3497                         memcpy(extent_key, &key, sizeof(key));
3498                         return 0;
3499                 }
3500         }
3501         btrfs_release_path(rc->extent_root, path);
3502         return ret;
3503 }
3504
3505 static void set_reloc_control(struct reloc_control *rc)
3506 {
3507         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3508         mutex_lock(&fs_info->trans_mutex);
3509         fs_info->reloc_ctl = rc;
3510         mutex_unlock(&fs_info->trans_mutex);
3511 }
3512
3513 static void unset_reloc_control(struct reloc_control *rc)
3514 {
3515         struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
3516         mutex_lock(&fs_info->trans_mutex);
3517         fs_info->reloc_ctl = NULL;
3518         mutex_unlock(&fs_info->trans_mutex);
3519 }
3520
3521 static int check_extent_flags(u64 flags)
3522 {
3523         if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3524             (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3525                 return 1;
3526         if (!(flags & BTRFS_EXTENT_FLAG_DATA) &&
3527             !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK))
3528                 return 1;
3529         if ((flags & BTRFS_EXTENT_FLAG_DATA) &&
3530             (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))
3531                 return 1;
3532         return 0;
3533 }
3534
3535 static noinline_for_stack
3536 int prepare_to_relocate(struct reloc_control *rc)
3537 {
3538         struct btrfs_trans_handle *trans;
3539         int ret;
3540
3541         rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root);
3542         if (!rc->block_rsv)
3543                 return -ENOMEM;
3544
3545         /*
3546          * reserve some space for creating reloc trees.
3547          * btrfs_init_reloc_root will use them when there
3548          * is no reservation in transaction handle.
3549          */
3550         ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
3551                                   rc->extent_root->nodesize * 256);
3552         if (ret)
3553                 return ret;
3554
3555         rc->block_rsv->refill_used = 1;
3556         btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv);
3557
3558         memset(&rc->cluster, 0, sizeof(rc->cluster));
3559         rc->search_start = rc->block_group->key.objectid;
3560         rc->extents_found = 0;
3561         rc->nodes_relocated = 0;
3562         rc->merging_rsv_size = 0;
3563
3564         rc->create_reloc_tree = 1;
3565         set_reloc_control(rc);
3566
3567         trans = btrfs_join_transaction(rc->extent_root, 1);
3568         btrfs_commit_transaction(trans, rc->extent_root);
3569         return 0;
3570 }
3571
3572 static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
3573 {
3574         struct rb_root blocks = RB_ROOT;
3575         struct btrfs_key key;
3576         struct btrfs_trans_handle *trans = NULL;
3577         struct btrfs_path *path;
3578         struct btrfs_extent_item *ei;
3579         unsigned long nr;
3580         u64 flags;
3581         u32 item_size;
3582         int ret;
3583         int err = 0;
3584
3585         path = btrfs_alloc_path();
3586         if (!path)
3587                 return -ENOMEM;
3588
3589         ret = prepare_to_relocate(rc);
3590         if (ret) {
3591                 err = ret;
3592                 goto out_free;
3593         }
3594
3595         while (1) {
3596                 trans = btrfs_start_transaction(rc->extent_root, 0);
3597
3598                 if (update_backref_cache(trans, &rc->backref_cache)) {
3599                         btrfs_end_transaction(trans, rc->extent_root);
3600                         continue;
3601                 }
3602
3603                 ret = find_next_extent(trans, rc, path, &key);
3604                 if (ret < 0)
3605                         err = ret;
3606                 if (ret != 0)
3607                         break;
3608
3609                 rc->extents_found++;
3610
3611                 ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
3612                                     struct btrfs_extent_item);
3613                 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]);
3614                 if (item_size >= sizeof(*ei)) {
3615                         flags = btrfs_extent_flags(path->nodes[0], ei);
3616                         ret = check_extent_flags(flags);
3617                         BUG_ON(ret);
3618
3619                 } else {
3620 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
3621                         u64 ref_owner;
3622                         int path_change = 0;
3623
3624                         BUG_ON(item_size !=
3625                                sizeof(struct btrfs_extent_item_v0));
3626                         ret = get_ref_objectid_v0(rc, path, &key, &ref_owner,
3627                                                   &path_change);
3628                         if (ref_owner < BTRFS_FIRST_FREE_OBJECTID)
3629                                 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK;
3630                         else
3631                                 flags = BTRFS_EXTENT_FLAG_DATA;
3632
3633                         if (path_change) {
3634                                 btrfs_release_path(rc->extent_root, path);
3635
3636                                 path->search_commit_root = 1;
3637                                 path->skip_locking = 1;
3638                                 ret = btrfs_search_slot(NULL, rc->extent_root,
3639                                                         &key, path, 0, 0);
3640                                 if (ret < 0) {
3641                                         err = ret;
3642                                         break;
3643                                 }
3644                                 BUG_ON(ret > 0);
3645                         }
3646 #else
3647                         BUG();
3648 #endif
3649                 }
3650
3651                 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
3652                         ret = add_tree_block(rc, &key, path, &blocks);
3653                 } else if (rc->stage == UPDATE_DATA_PTRS &&
3654                            (flags & BTRFS_EXTENT_FLAG_DATA)) {
3655                         ret = add_data_references(rc, &key, path, &blocks);
3656                 } else {
3657                         btrfs_release_path(rc->extent_root, path);
3658                         ret = 0;
3659                 }
3660                 if (ret < 0) {
3661                         err = ret;
3662                         break;
3663                 }
3664
3665                 if (!RB_EMPTY_ROOT(&blocks)) {
3666                         ret = relocate_tree_blocks(trans, rc, &blocks);
3667                         if (ret < 0) {
3668                                 if (ret != -EAGAIN) {
3669                                         err = ret;
3670                                         break;
3671                                 }
3672                                 rc->extents_found--;
3673                                 rc->search_start = key.objectid;
3674                         }
3675                 }
3676
3677                 ret = btrfs_block_rsv_check(trans, rc->extent_root,
3678                                             rc->block_rsv, 0, 5);
3679                 if (ret < 0) {
3680                         if (ret != -EAGAIN) {
3681                                 err = ret;
3682                                 WARN_ON(1);
3683                                 break;
3684                         }
3685                         rc->commit_transaction = 1;
3686                 }
3687
3688                 if (rc->commit_transaction) {
3689                         rc->commit_transaction = 0;
3690                         ret = btrfs_commit_transaction(trans, rc->extent_root);
3691                         BUG_ON(ret);
3692                 } else {
3693                         nr = trans->blocks_used;
3694                         btrfs_end_transaction_throttle(trans, rc->extent_root);
3695                         btrfs_btree_balance_dirty(rc->extent_root, nr);
3696                 }
3697                 trans = NULL;
3698
3699                 if (rc->stage == MOVE_DATA_EXTENTS &&
3700                     (flags & BTRFS_EXTENT_FLAG_DATA)) {
3701                         rc->found_file_extent = 1;
3702                         ret = relocate_data_extent(rc->data_inode,
3703                                                    &key, &rc->cluster);
3704                         if (ret < 0) {
3705                                 err = ret;
3706                                 break;
3707                         }
3708                 }
3709         }
3710
3711         btrfs_release_path(rc->extent_root, path);
3712         clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
3713                           GFP_NOFS);
3714
3715         if (trans) {
3716                 nr = trans->blocks_used;
3717                 btrfs_end_transaction_throttle(trans, rc->extent_root);
3718                 btrfs_btree_balance_dirty(rc->extent_root, nr);
3719         }
3720
3721         if (!err) {
3722                 ret = relocate_file_extent_cluster(rc->data_inode,
3723                                                    &rc->cluster);
3724                 if (ret < 0)
3725                         err = ret;
3726         }
3727
3728         rc->create_reloc_tree = 0;
3729         set_reloc_control(rc);
3730
3731         backref_cache_cleanup(&rc->backref_cache);
3732         btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
3733
3734         err = prepare_to_merge(rc, err);
3735
3736         merge_reloc_roots(rc);
3737
3738         rc->merge_reloc_tree = 0;
3739         unset_reloc_control(rc);
3740         btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);
3741
3742         /* get rid of pinned extents */
3743         trans = btrfs_join_transaction(rc->extent_root, 1);
3744         btrfs_commit_transaction(trans, rc->extent_root);
3745 out_free:
3746         btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);
3747         btrfs_free_path(path);
3748         return err;
3749 }
3750
3751 static int __insert_orphan_inode(struct btrfs_trans_handle *trans,
3752                                  struct btrfs_root *root, u64 objectid)
3753 {
3754         struct btrfs_path *path;
3755         struct btrfs_inode_item *item;
3756         struct extent_buffer *leaf;
3757         int ret;
3758
3759         path = btrfs_alloc_path();
3760         if (!path)
3761                 return -ENOMEM;
3762
3763         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
3764         if (ret)
3765                 goto out;
3766
3767         leaf = path->nodes[0];
3768         item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item);
3769         memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item));
3770         btrfs_set_inode_generation(leaf, item, 1);
3771         btrfs_set_inode_size(leaf, item, 0);
3772         btrfs_set_inode_mode(leaf, item, S_IFREG | 0600);
3773         btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |
3774                                           BTRFS_INODE_PREALLOC);
3775         btrfs_mark_buffer_dirty(leaf);
3776         btrfs_release_path(root, path);
3777 out:
3778         btrfs_free_path(path);
3779         return ret;
3780 }
3781
3782 /*
3783  * helper to create inode for data relocation.
3784  * the inode is in data relocation tree and its link count is 0
3785  */
3786 static noinline_for_stack
3787 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3788                                  struct btrfs_block_group_cache *group)
3789 {
3790         struct inode *inode = NULL;
3791         struct btrfs_trans_handle *trans;
3792         struct btrfs_root *root;
3793         struct btrfs_key key;
3794         unsigned long nr;
3795         u64 objectid = BTRFS_FIRST_FREE_OBJECTID;
3796         int err = 0;
3797
3798         root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID);
3799         if (IS_ERR(root))
3800                 return ERR_CAST(root);
3801
3802         trans = btrfs_start_transaction(root, 6);
3803         if (IS_ERR(trans))
3804                 return ERR_CAST(trans);
3805
3806         err = btrfs_find_free_objectid(trans, root, objectid, &objectid);
3807         if (err)
3808                 goto out;
3809
3810         err = __insert_orphan_inode(trans, root, objectid);
3811         BUG_ON(err);
3812
3813         key.objectid = objectid;
3814         key.type = BTRFS_INODE_ITEM_KEY;
3815         key.offset = 0;
3816         inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
3817         BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3818         BTRFS_I(inode)->index_cnt = group->key.objectid;
3819
3820         err = btrfs_orphan_add(trans, inode);
3821 out:
3822         nr = trans->blocks_used;
3823         btrfs_end_transaction(trans, root);
3824         btrfs_btree_balance_dirty(root, nr);
3825         if (err) {
3826                 if (inode)
3827                         iput(inode);
3828                 inode = ERR_PTR(err);
3829         }
3830         return inode;
3831 }
3832
3833 static struct reloc_control *alloc_reloc_control(void)
3834 {
3835         struct reloc_control *rc;
3836
3837         rc = kzalloc(sizeof(*rc), GFP_NOFS);
3838         if (!rc)
3839                 return NULL;
3840
3841         INIT_LIST_HEAD(&rc->reloc_roots);
3842         backref_cache_init(&rc->backref_cache);
3843         mapping_tree_init(&rc->reloc_root_tree);
3844         extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS);
3845         return rc;
3846 }
3847
3848 /*
3849  * function to relocate all extents in a block group.
3850  */
3851 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3852 {
3853         struct btrfs_fs_info *fs_info = extent_root->fs_info;
3854         struct reloc_control *rc;
3855         int ret;
3856         int rw = 0;
3857         int err = 0;
3858
3859         rc = alloc_reloc_control();
3860         if (!rc)
3861                 return -ENOMEM;
3862
3863         rc->extent_root = extent_root;
3864
3865         rc->block_group = btrfs_lookup_block_group(fs_info, group_start);
3866         BUG_ON(!rc->block_group);
3867
3868         if (!rc->block_group->ro) {
3869                 ret = btrfs_set_block_group_ro(extent_root, rc->block_group);
3870                 if (ret) {
3871                         err = ret;
3872                         goto out;
3873                 }
3874                 rw = 1;
3875         }
3876
3877         rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3878         if (IS_ERR(rc->data_inode)) {
3879                 err = PTR_ERR(rc->data_inode);
3880                 rc->data_inode = NULL;
3881                 goto out;
3882         }
3883
3884         printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
3885                (unsigned long long)rc->block_group->key.objectid,
3886                (unsigned long long)rc->block_group->flags);
3887
3888         btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
3889         btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
3890
3891         while (1) {
3892                 mutex_lock(&fs_info->cleaner_mutex);
3893
3894                 btrfs_clean_old_snapshots(fs_info->tree_root);
3895                 ret = relocate_block_group(rc);
3896
3897                 mutex_unlock(&fs_info->cleaner_mutex);
3898                 if (ret < 0) {
3899                         err = ret;
3900                         goto out;
3901                 }
3902
3903                 if (rc->extents_found == 0)
3904                         break;
3905
3906                 printk(KERN_INFO "btrfs: found %llu extents\n",
3907                         (unsigned long long)rc->extents_found);
3908
3909                 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
3910                         btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
3911                         invalidate_mapping_pages(rc->data_inode->i_mapping,
3912                                                  0, -1);
3913                         rc->stage = UPDATE_DATA_PTRS;
3914                 }
3915         }
3916
3917         filemap_write_and_wait_range(fs_info->btree_inode->i_mapping,
3918                                      rc->block_group->key.objectid,
3919                                      rc->block_group->key.objectid +
3920                                      rc->block_group->key.offset - 1);
3921
3922         WARN_ON(rc->block_group->pinned > 0);
3923         WARN_ON(rc->block_group->reserved > 0);
3924         WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0);
3925 out:
3926         if (err && rw)
3927                 btrfs_set_block_group_rw(extent_root, rc->block_group);
3928         iput(rc->data_inode);
3929         btrfs_put_block_group(rc->block_group);
3930         kfree(rc);
3931         return err;
3932 }
3933
3934 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
3935 {
3936         struct btrfs_trans_handle *trans;
3937         int ret;
3938
3939         trans = btrfs_start_transaction(root->fs_info->tree_root, 0);
3940
3941         memset(&root->root_item.drop_progress, 0,
3942                 sizeof(root->root_item.drop_progress));
3943         root->root_item.drop_level = 0;
3944         btrfs_set_root_refs(&root->root_item, 0);
3945         ret = btrfs_update_root(trans, root->fs_info->tree_root,
3946                                 &root->root_key, &root->root_item);
3947         BUG_ON(ret);
3948
3949         ret = btrfs_end_transaction(trans, root->fs_info->tree_root);
3950         BUG_ON(ret);
3951         return 0;
3952 }
3953
3954 /*
3955  * recover relocation interrupted by system crash.
3956  *
3957  * this function resumes merging reloc trees with corresponding fs trees.
3958  * this is important for keeping the sharing of tree blocks
3959  */
3960 int btrfs_recover_relocation(struct btrfs_root *root)
3961 {
3962         LIST_HEAD(reloc_roots);
3963         struct btrfs_key key;
3964         struct btrfs_root *fs_root;
3965         struct btrfs_root *reloc_root;
3966         struct btrfs_path *path;
3967         struct extent_buffer *leaf;
3968         struct reloc_control *rc = NULL;
3969         struct btrfs_trans_handle *trans;
3970         int ret;
3971         int err = 0;
3972
3973         path = btrfs_alloc_path();
3974         if (!path)
3975                 return -ENOMEM;
3976
3977         key.objectid = BTRFS_TREE_RELOC_OBJECTID;
3978         key.type = BTRFS_ROOT_ITEM_KEY;
3979         key.offset = (u64)-1;
3980
3981         while (1) {
3982                 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key,
3983                                         path, 0, 0);
3984                 if (ret < 0) {
3985                         err = ret;
3986                         goto out;
3987                 }
3988                 if (ret > 0) {
3989                         if (path->slots[0] == 0)
3990                                 break;
3991                         path->slots[0]--;
3992                 }
3993                 leaf = path->nodes[0];
3994                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
3995                 btrfs_release_path(root->fs_info->tree_root, path);
3996
3997                 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||
3998                     key.type != BTRFS_ROOT_ITEM_KEY)
3999                         break;
4000
4001                 reloc_root = btrfs_read_fs_root_no_radix(root, &key);
4002                 if (IS_ERR(reloc_root)) {
4003                         err = PTR_ERR(reloc_root);
4004                         goto out;
4005                 }
4006
4007                 list_add(&reloc_root->root_list, &reloc_roots);
4008
4009                 if (btrfs_root_refs(&reloc_root->root_item) > 0) {
4010                         fs_root = read_fs_root(root->fs_info,
4011                                                reloc_root->root_key.offset);
4012                         if (IS_ERR(fs_root)) {
4013                                 ret = PTR_ERR(fs_root);
4014                                 if (ret != -ENOENT) {
4015                                         err = ret;
4016                                         goto out;
4017                                 }
4018                                 mark_garbage_root(reloc_root);
4019                         }
4020                 }
4021
4022                 if (key.offset == 0)
4023                         break;
4024
4025                 key.offset--;
4026         }
4027         btrfs_release_path(root->fs_info->tree_root, path);
4028
4029         if (list_empty(&reloc_roots))
4030                 goto out;
4031
4032         rc = alloc_reloc_control();
4033         if (!rc) {
4034                 err = -ENOMEM;
4035                 goto out;
4036         }
4037
4038         rc->extent_root = root->fs_info->extent_root;
4039
4040         set_reloc_control(rc);
4041
4042         trans = btrfs_join_transaction(rc->extent_root, 1);
4043
4044         rc->merge_reloc_tree = 1;
4045
4046         while (!list_empty(&reloc_roots)) {
4047                 reloc_root = list_entry(reloc_roots.next,
4048                                         struct btrfs_root, root_list);
4049                 list_del(&reloc_root->root_list);
4050
4051                 if (btrfs_root_refs(&reloc_root->root_item) == 0) {
4052                         list_add_tail(&reloc_root->root_list,
4053                                       &rc->reloc_roots);
4054                         continue;
4055                 }
4056
4057                 fs_root = read_fs_root(root->fs_info,
4058                                        reloc_root->root_key.offset);
4059                 BUG_ON(IS_ERR(fs_root));
4060
4061                 __add_reloc_root(reloc_root);
4062                 fs_root->reloc_root = reloc_root;
4063         }
4064
4065         btrfs_commit_transaction(trans, rc->extent_root);
4066
4067         merge_reloc_roots(rc);
4068
4069         unset_reloc_control(rc);
4070
4071         trans = btrfs_join_transaction(rc->extent_root, 1);
4072         btrfs_commit_transaction(trans, rc->extent_root);
4073 out:
4074         kfree(rc);
4075         while (!list_empty(&reloc_roots)) {
4076                 reloc_root = list_entry(reloc_roots.next,
4077                                         struct btrfs_root, root_list);
4078                 list_del(&reloc_root->root_list);
4079                 free_extent_buffer(reloc_root->node);
4080                 free_extent_buffer(reloc_root->commit_root);
4081                 kfree(reloc_root);
4082         }
4083         btrfs_free_path(path);
4084
4085         if (err == 0) {
4086                 /* cleanup orphan inode in data relocation tree */
4087                 fs_root = read_fs_root(root->fs_info,
4088                                        BTRFS_DATA_RELOC_TREE_OBJECTID);
4089                 if (IS_ERR(fs_root))
4090                         err = PTR_ERR(fs_root);
4091                 else
4092                         btrfs_orphan_cleanup(fs_root);
4093         }
4094         return err;
4095 }
4096
4097 /*
4098  * helper to add ordered checksum for data relocation.
4099  *
4100  * cloning checksum properly handles the nodatasum extents.
4101  * it also saves CPU time to re-calculate the checksum.
4102  */
4103 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4104 {
4105         struct btrfs_ordered_sum *sums;
4106         struct btrfs_sector_sum *sector_sum;
4107         struct btrfs_ordered_extent *ordered;
4108         struct btrfs_root *root = BTRFS_I(inode)->root;
4109         size_t offset;
4110         int ret;
4111         u64 disk_bytenr;
4112         LIST_HEAD(list);
4113
4114         ordered = btrfs_lookup_ordered_extent(inode, file_pos);
4115         BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
4116
4117         disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
4118         ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr,
4119                                        disk_bytenr + len - 1, &list);
4120
4121         while (!list_empty(&list)) {
4122                 sums = list_entry(list.next, struct btrfs_ordered_sum, list);
4123                 list_del_init(&sums->list);
4124
4125                 sector_sum = sums->sums;
4126                 sums->bytenr = ordered->start;
4127
4128                 offset = 0;
4129                 while (offset < sums->len) {
4130                         sector_sum->bytenr += ordered->start - disk_bytenr;
4131                         sector_sum++;
4132                         offset += root->sectorsize;
4133                 }
4134
4135                 btrfs_add_ordered_sum(inode, ordered, sums);
4136         }
4137         btrfs_put_ordered_extent(ordered);
4138         return 0;
4139 }
4140
4141 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4142                            struct btrfs_root *root, struct extent_buffer *buf,
4143                            struct extent_buffer *cow)
4144 {
4145         struct reloc_control *rc;
4146         struct backref_node *node;
4147         int first_cow = 0;
4148         int level;
4149         int ret;
4150
4151         rc = root->fs_info->reloc_ctl;
4152         if (!rc)
4153                 return;
4154
4155         BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4156                root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
4157
4158         level = btrfs_header_level(buf);
4159         if (btrfs_header_generation(buf) <=
4160             btrfs_root_last_snapshot(&root->root_item))
4161                 first_cow = 1;
4162
4163         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID &&
4164             rc->create_reloc_tree) {
4165                 WARN_ON(!first_cow && level == 0);
4166
4167                 node = rc->backref_cache.path[level];
4168                 BUG_ON(node->bytenr != buf->start &&
4169                        node->new_bytenr != buf->start);
4170
4171                 drop_node_buffer(node);
4172                 extent_buffer_get(cow);
4173                 node->eb = cow;
4174                 node->new_bytenr = cow->start;
4175
4176                 if (!node->pending) {
4177                         list_move_tail(&node->list,
4178                                        &rc->backref_cache.pending[level]);
4179                         node->pending = 1;
4180                 }
4181
4182                 if (first_cow)
4183                         __mark_block_processed(rc, node);
4184
4185                 if (first_cow && level > 0)
4186                         rc->nodes_relocated += buf->len;
4187         }
4188
4189         if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) {
4190                 ret = replace_file_extents(trans, rc, root, cow);
4191                 BUG_ON(ret);
4192         }
4193 }
4194
4195 /*
4196  * called before creating snapshot. it calculates metadata reservation
4197  * requried for relocating tree blocks in the snapshot
4198  */
4199 void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
4200                               struct btrfs_pending_snapshot *pending,
4201                               u64 *bytes_to_reserve)
4202 {
4203         struct btrfs_root *root;
4204         struct reloc_control *rc;
4205
4206         root = pending->root;
4207         if (!root->reloc_root)
4208                 return;
4209
4210         rc = root->fs_info->reloc_ctl;
4211         if (!rc->merge_reloc_tree)
4212                 return;
4213
4214         root = root->reloc_root;
4215         BUG_ON(btrfs_root_refs(&root->root_item) == 0);
4216         /*
4217          * relocation is in the stage of merging trees. the space
4218          * used by merging a reloc tree is twice the size of
4219          * relocated tree nodes in the worst case. half for cowing
4220          * the reloc tree, half for cowing the fs tree. the space
4221          * used by cowing the reloc tree will be freed after the
4222          * tree is dropped. if we create snapshot, cowing the fs
4223          * tree may use more space than it frees. so we need
4224          * reserve extra space.
4225          */
4226         *bytes_to_reserve += rc->nodes_relocated;
4227 }
4228
4229 /*
4230  * called after snapshot is created. migrate block reservation
4231  * and create reloc root for the newly created snapshot
4232  */
4233 void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
4234                                struct btrfs_pending_snapshot *pending)
4235 {
4236         struct btrfs_root *root = pending->root;
4237         struct btrfs_root *reloc_root;
4238         struct btrfs_root *new_root;
4239         struct reloc_control *rc;
4240         int ret;
4241
4242         if (!root->reloc_root)
4243                 return;
4244
4245         rc = root->fs_info->reloc_ctl;
4246         rc->merging_rsv_size += rc->nodes_relocated;
4247
4248         if (rc->merge_reloc_tree) {
4249                 ret = btrfs_block_rsv_migrate(&pending->block_rsv,
4250                                               rc->block_rsv,
4251                                               rc->nodes_relocated);
4252                 BUG_ON(ret);
4253         }
4254
4255         new_root = pending->snap;
4256         reloc_root = create_reloc_root(trans, root->reloc_root,
4257                                        new_root->root_key.objectid);
4258
4259         __add_reloc_root(reloc_root);
4260         new_root->reloc_root = reloc_root;
4261
4262         if (rc->create_reloc_tree) {
4263                 ret = clone_backref_node(trans, rc, root, reloc_root);
4264                 BUG_ON(ret);
4265         }
4266 }