Merge Btrfs into fs/btrfs
[pandora-kernel.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include <linux/blkdev.h>
22 #include "hash.h"
23 #include "crc32c.h"
24 #include "ctree.h"
25 #include "disk-io.h"
26 #include "print-tree.h"
27 #include "transaction.h"
28 #include "volumes.h"
29 #include "locking.h"
30 #include "ref-cache.h"
31
32 #define PENDING_EXTENT_INSERT 0
33 #define PENDING_EXTENT_DELETE 1
34 #define PENDING_BACKREF_UPDATE 2
35
36 struct pending_extent_op {
37         int type;
38         u64 bytenr;
39         u64 num_bytes;
40         u64 parent;
41         u64 orig_parent;
42         u64 generation;
43         u64 orig_generation;
44         int level;
45 };
46
47 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
48                                  btrfs_root *extent_root);
49 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
50                                btrfs_root *extent_root);
51 static struct btrfs_block_group_cache *
52 __btrfs_find_block_group(struct btrfs_root *root,
53                          struct btrfs_block_group_cache *hint,
54                          u64 search_start, int data, int owner);
55
56 void maybe_lock_mutex(struct btrfs_root *root)
57 {
58         if (root != root->fs_info->extent_root &&
59             root != root->fs_info->chunk_root &&
60             root != root->fs_info->dev_root) {
61                 mutex_lock(&root->fs_info->alloc_mutex);
62         }
63 }
64
65 void maybe_unlock_mutex(struct btrfs_root *root)
66 {
67         if (root != root->fs_info->extent_root &&
68             root != root->fs_info->chunk_root &&
69             root != root->fs_info->dev_root) {
70                 mutex_unlock(&root->fs_info->alloc_mutex);
71         }
72 }
73
74 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
75 {
76         return (cache->flags & bits) == bits;
77 }
78
79 /*
80  * this adds the block group to the fs_info rb tree for the block group
81  * cache
82  */
83 int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
84                                 struct btrfs_block_group_cache *block_group)
85 {
86         struct rb_node **p;
87         struct rb_node *parent = NULL;
88         struct btrfs_block_group_cache *cache;
89
90         spin_lock(&info->block_group_cache_lock);
91         p = &info->block_group_cache_tree.rb_node;
92
93         while (*p) {
94                 parent = *p;
95                 cache = rb_entry(parent, struct btrfs_block_group_cache,
96                                  cache_node);
97                 if (block_group->key.objectid < cache->key.objectid) {
98                         p = &(*p)->rb_left;
99                 } else if (block_group->key.objectid > cache->key.objectid) {
100                         p = &(*p)->rb_right;
101                 } else {
102                         spin_unlock(&info->block_group_cache_lock);
103                         return -EEXIST;
104                 }
105         }
106
107         rb_link_node(&block_group->cache_node, parent, p);
108         rb_insert_color(&block_group->cache_node,
109                         &info->block_group_cache_tree);
110         spin_unlock(&info->block_group_cache_lock);
111
112         return 0;
113 }
114
115 /*
116  * This will return the block group at or after bytenr if contains is 0, else
117  * it will return the block group that contains the bytenr
118  */
119 static struct btrfs_block_group_cache *
120 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
121                               int contains)
122 {
123         struct btrfs_block_group_cache *cache, *ret = NULL;
124         struct rb_node *n;
125         u64 end, start;
126
127         spin_lock(&info->block_group_cache_lock);
128         n = info->block_group_cache_tree.rb_node;
129
130         while (n) {
131                 cache = rb_entry(n, struct btrfs_block_group_cache,
132                                  cache_node);
133                 end = cache->key.objectid + cache->key.offset - 1;
134                 start = cache->key.objectid;
135
136                 if (bytenr < start) {
137                         if (!contains && (!ret || start < ret->key.objectid))
138                                 ret = cache;
139                         n = n->rb_left;
140                 } else if (bytenr > start) {
141                         if (contains && bytenr <= end) {
142                                 ret = cache;
143                                 break;
144                         }
145                         n = n->rb_right;
146                 } else {
147                         ret = cache;
148                         break;
149                 }
150         }
151         spin_unlock(&info->block_group_cache_lock);
152
153         return ret;
154 }
155
156 /*
157  * this is only called by cache_block_group, since we could have freed extents
158  * we need to check the pinned_extents for any extents that can't be used yet
159  * since their free space will be released as soon as the transaction commits.
160  */
161 static int add_new_free_space(struct btrfs_block_group_cache *block_group,
162                               struct btrfs_fs_info *info, u64 start, u64 end)
163 {
164         u64 extent_start, extent_end, size;
165         int ret;
166
167         while (start < end) {
168                 ret = find_first_extent_bit(&info->pinned_extents, start,
169                                             &extent_start, &extent_end,
170                                             EXTENT_DIRTY);
171                 if (ret)
172                         break;
173
174                 if (extent_start == start) {
175                         start = extent_end + 1;
176                 } else if (extent_start > start && extent_start < end) {
177                         size = extent_start - start;
178                         ret = btrfs_add_free_space(block_group, start, size);
179                         BUG_ON(ret);
180                         start = extent_end + 1;
181                 } else {
182                         break;
183                 }
184         }
185
186         if (start < end) {
187                 size = end - start;
188                 ret = btrfs_add_free_space(block_group, start, size);
189                 BUG_ON(ret);
190         }
191
192         return 0;
193 }
194
195 static int cache_block_group(struct btrfs_root *root,
196                              struct btrfs_block_group_cache *block_group)
197 {
198         struct btrfs_path *path;
199         int ret = 0;
200         struct btrfs_key key;
201         struct extent_buffer *leaf;
202         int slot;
203         u64 last = 0;
204         u64 first_free;
205         int found = 0;
206
207         if (!block_group)
208                 return 0;
209
210         root = root->fs_info->extent_root;
211
212         if (block_group->cached)
213                 return 0;
214
215         path = btrfs_alloc_path();
216         if (!path)
217                 return -ENOMEM;
218
219         path->reada = 2;
220         /*
221          * we get into deadlocks with paths held by callers of this function.
222          * since the alloc_mutex is protecting things right now, just
223          * skip the locking here
224          */
225         path->skip_locking = 1;
226         first_free = max_t(u64, block_group->key.objectid,
227                            BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
228         key.objectid = block_group->key.objectid;
229         key.offset = 0;
230         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
231         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
232         if (ret < 0)
233                 goto err;
234         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
235         if (ret < 0)
236                 goto err;
237         if (ret == 0) {
238                 leaf = path->nodes[0];
239                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
240                 if (key.objectid + key.offset > first_free)
241                         first_free = key.objectid + key.offset;
242         }
243         while(1) {
244                 leaf = path->nodes[0];
245                 slot = path->slots[0];
246                 if (slot >= btrfs_header_nritems(leaf)) {
247                         ret = btrfs_next_leaf(root, path);
248                         if (ret < 0)
249                                 goto err;
250                         if (ret == 0)
251                                 continue;
252                         else
253                                 break;
254                 }
255                 btrfs_item_key_to_cpu(leaf, &key, slot);
256                 if (key.objectid < block_group->key.objectid)
257                         goto next;
258
259                 if (key.objectid >= block_group->key.objectid +
260                     block_group->key.offset)
261                         break;
262
263                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
264                         if (!found) {
265                                 last = first_free;
266                                 found = 1;
267                         }
268
269                         add_new_free_space(block_group, root->fs_info, last,
270                                            key.objectid);
271
272                         last = key.objectid + key.offset;
273                 }
274 next:
275                 path->slots[0]++;
276         }
277
278         if (!found)
279                 last = first_free;
280
281         add_new_free_space(block_group, root->fs_info, last,
282                            block_group->key.objectid +
283                            block_group->key.offset);
284
285         block_group->cached = 1;
286         ret = 0;
287 err:
288         btrfs_free_path(path);
289         return ret;
290 }
291
292 /*
293  * return the block group that starts at or after bytenr
294  */
295 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
296                                                        btrfs_fs_info *info,
297                                                          u64 bytenr)
298 {
299         struct btrfs_block_group_cache *cache;
300
301         cache = block_group_cache_tree_search(info, bytenr, 0);
302
303         return cache;
304 }
305
306 /*
307  * return the block group that contains teh given bytenr
308  */
309 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
310                                                          btrfs_fs_info *info,
311                                                          u64 bytenr)
312 {
313         struct btrfs_block_group_cache *cache;
314
315         cache = block_group_cache_tree_search(info, bytenr, 1);
316
317         return cache;
318 }
319
320 static int noinline find_free_space(struct btrfs_root *root,
321                                     struct btrfs_block_group_cache **cache_ret,
322                                     u64 *start_ret, u64 num, int data)
323 {
324         int ret;
325         struct btrfs_block_group_cache *cache = *cache_ret;
326         struct btrfs_free_space *info = NULL;
327         u64 last;
328         u64 total_fs_bytes;
329         u64 search_start = *start_ret;
330
331         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
332         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
333
334         if (!cache)
335                 goto out;
336
337         last = max(search_start, cache->key.objectid);
338
339 again:
340         ret = cache_block_group(root, cache);
341         if (ret)
342                 goto out;
343
344         if (cache->ro || !block_group_bits(cache, data))
345                 goto new_group;
346
347         info = btrfs_find_free_space(cache, last, num);
348         if (info) {
349                 *start_ret = info->offset;
350                 return 0;
351         }
352
353 new_group:
354         last = cache->key.objectid + cache->key.offset;
355
356         cache = btrfs_lookup_first_block_group(root->fs_info, last);
357         if (!cache || cache->key.objectid >= total_fs_bytes)
358                 goto out;
359
360         *cache_ret = cache;
361         goto again;
362
363 out:
364         return -ENOSPC;
365 }
366
367 static u64 div_factor(u64 num, int factor)
368 {
369         if (factor == 10)
370                 return num;
371         num *= factor;
372         do_div(num, 10);
373         return num;
374 }
375
376 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
377                                                   u64 flags)
378 {
379         struct list_head *head = &info->space_info;
380         struct list_head *cur;
381         struct btrfs_space_info *found;
382         list_for_each(cur, head) {
383                 found = list_entry(cur, struct btrfs_space_info, list);
384                 if (found->flags == flags)
385                         return found;
386         }
387         return NULL;
388
389 }
390
391 static struct btrfs_block_group_cache *
392 __btrfs_find_block_group(struct btrfs_root *root,
393                          struct btrfs_block_group_cache *hint,
394                          u64 search_start, int data, int owner)
395 {
396         struct btrfs_block_group_cache *cache;
397         struct btrfs_block_group_cache *found_group = NULL;
398         struct btrfs_fs_info *info = root->fs_info;
399         struct btrfs_space_info *sinfo;
400         u64 used;
401         u64 last = 0;
402         u64 free_check;
403         int full_search = 0;
404         int factor = 10;
405         int wrapped = 0;
406
407         if (data & BTRFS_BLOCK_GROUP_METADATA)
408                 factor = 9;
409
410         if (search_start) {
411                 struct btrfs_block_group_cache *shint;
412                 shint = btrfs_lookup_first_block_group(info, search_start);
413                 if (shint && block_group_bits(shint, data) && !shint->ro) {
414                         spin_lock(&shint->lock);
415                         used = btrfs_block_group_used(&shint->item);
416                         if (used + shint->pinned <
417                             div_factor(shint->key.offset, factor)) {
418                                 spin_unlock(&shint->lock);
419                                 return shint;
420                         }
421                         spin_unlock(&shint->lock);
422                 }
423         }
424         if (hint && !hint->ro && block_group_bits(hint, data)) {
425                 spin_lock(&hint->lock);
426                 used = btrfs_block_group_used(&hint->item);
427                 if (used + hint->pinned <
428                     div_factor(hint->key.offset, factor)) {
429                         spin_unlock(&hint->lock);
430                         return hint;
431                 }
432                 spin_unlock(&hint->lock);
433                 last = hint->key.objectid + hint->key.offset;
434         } else {
435                 if (hint)
436                         last = max(hint->key.objectid, search_start);
437                 else
438                         last = search_start;
439         }
440         sinfo = __find_space_info(root->fs_info, data);
441         if (!sinfo)
442                 goto found;
443 again:
444         while(1) {
445                 struct list_head *l;
446
447                 cache = NULL;
448
449                 spin_lock(&sinfo->lock);
450                 list_for_each(l, &sinfo->block_groups) {
451                         struct btrfs_block_group_cache *entry;
452                         entry = list_entry(l, struct btrfs_block_group_cache,
453                                            list);
454                         if ((entry->key.objectid >= last) &&
455                             (!cache || (entry->key.objectid <
456                                         cache->key.objectid)))
457                                 cache = entry;
458                 }
459                 spin_unlock(&sinfo->lock);
460
461                 if (!cache)
462                         break;
463
464                 spin_lock(&cache->lock);
465                 last = cache->key.objectid + cache->key.offset;
466                 used = btrfs_block_group_used(&cache->item);
467
468                 if (!cache->ro && block_group_bits(cache, data)) {
469                         free_check = div_factor(cache->key.offset, factor);
470                         if (used + cache->pinned < free_check) {
471                                 found_group = cache;
472                                 spin_unlock(&cache->lock);
473                                 goto found;
474                         }
475                 }
476                 spin_unlock(&cache->lock);
477                 cond_resched();
478         }
479         if (!wrapped) {
480                 last = search_start;
481                 wrapped = 1;
482                 goto again;
483         }
484         if (!full_search && factor < 10) {
485                 last = search_start;
486                 full_search = 1;
487                 factor = 10;
488                 goto again;
489         }
490 found:
491         return found_group;
492 }
493
494 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
495                                                  struct btrfs_block_group_cache
496                                                  *hint, u64 search_start,
497                                                  int data, int owner)
498 {
499
500         struct btrfs_block_group_cache *ret;
501         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
502         return ret;
503 }
504
505 /* simple helper to search for an existing extent at a given offset */
506 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
507 {
508         int ret;
509         struct btrfs_key key;
510         struct btrfs_path *path;
511
512         path = btrfs_alloc_path();
513         BUG_ON(!path);
514         maybe_lock_mutex(root);
515         key.objectid = start;
516         key.offset = len;
517         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
518         ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
519                                 0, 0);
520         maybe_unlock_mutex(root);
521         btrfs_free_path(path);
522         return ret;
523 }
524
525 /*
526  * Back reference rules.  Back refs have three main goals:
527  *
528  * 1) differentiate between all holders of references to an extent so that
529  *    when a reference is dropped we can make sure it was a valid reference
530  *    before freeing the extent.
531  *
532  * 2) Provide enough information to quickly find the holders of an extent
533  *    if we notice a given block is corrupted or bad.
534  *
535  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
536  *    maintenance.  This is actually the same as #2, but with a slightly
537  *    different use case.
538  *
539  * File extents can be referenced by:
540  *
541  * - multiple snapshots, subvolumes, or different generations in one subvol
542  * - different files inside a single subvolume
543  * - different offsets inside a file (bookend extents in file.c)
544  *
545  * The extent ref structure has fields for:
546  *
547  * - Objectid of the subvolume root
548  * - Generation number of the tree holding the reference
549  * - objectid of the file holding the reference
550  * - offset in the file corresponding to the key holding the reference
551  * - number of references holding by parent node (alway 1 for tree blocks)
552  *
553  * Btree leaf may hold multiple references to a file extent. In most cases,
554  * these references are from same file and the corresponding offsets inside
555  * the file are close together. So inode objectid and offset in file are
556  * just hints, they provide hints about where in the btree the references
557  * can be found and when we can stop searching.
558  *
559  * When a file extent is allocated the fields are filled in:
560  *     (root_key.objectid, trans->transid, inode objectid, offset in file, 1)
561  *
562  * When a leaf is cow'd new references are added for every file extent found
563  * in the leaf.  It looks similar to the create case, but trans->transid will
564  * be different when the block is cow'd.
565  *
566  *     (root_key.objectid, trans->transid, inode objectid, offset in file,
567  *      number of references in the leaf)
568  *
569  * Because inode objectid and offset in file are just hints, they are not
570  * used when backrefs are deleted. When a file extent is removed either
571  * during snapshot deletion or file truncation, we find the corresponding
572  * back back reference and check the following fields.
573  *
574  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf))
575  *
576  * Btree extents can be referenced by:
577  *
578  * - Different subvolumes
579  * - Different generations of the same subvolume
580  *
581  * When a tree block is created, back references are inserted:
582  *
583  * (root->root_key.objectid, trans->transid, level, 0, 1)
584  *
585  * When a tree block is cow'd, new back references are added for all the
586  * blocks it points to. If the tree block isn't in reference counted root,
587  * the old back references are removed. These new back references are of
588  * the form (trans->transid will have increased since creation):
589  *
590  * (root->root_key.objectid, trans->transid, level, 0, 1)
591  *
592  * When a backref is in deleting, the following fields are checked:
593  *
594  * if backref was for a tree root:
595  *     (btrfs_header_owner(itself), btrfs_header_generation(itself))
596  * else
597  *     (btrfs_header_owner(parent), btrfs_header_generation(parent))
598  *
599  * Back Reference Key composing:
600  *
601  * The key objectid corresponds to the first byte in the extent, the key
602  * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first
603  * byte of parent extent. If a extent is tree root, the key offset is set
604  * to the key objectid.
605  */
606
607 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
608                                           struct btrfs_root *root,
609                                           struct btrfs_path *path, u64 bytenr,
610                                           u64 parent, u64 ref_root,
611                                           u64 ref_generation, int del)
612 {
613         struct btrfs_key key;
614         struct btrfs_extent_ref *ref;
615         struct extent_buffer *leaf;
616         int ret;
617
618         key.objectid = bytenr;
619         key.type = BTRFS_EXTENT_REF_KEY;
620         key.offset = parent;
621
622         ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1);
623         if (ret < 0)
624                 goto out;
625         if (ret > 0) {
626                 ret = -ENOENT;
627                 goto out;
628         }
629
630         leaf = path->nodes[0];
631         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
632         if (btrfs_ref_root(leaf, ref) != ref_root ||
633             btrfs_ref_generation(leaf, ref) != ref_generation) {
634                 ret = -EIO;
635                 WARN_ON(1);
636                 goto out;
637         }
638         ret = 0;
639 out:
640         return ret;
641 }
642
643 static int noinline insert_extent_backref(struct btrfs_trans_handle *trans,
644                                           struct btrfs_root *root,
645                                           struct btrfs_path *path,
646                                           u64 bytenr, u64 parent,
647                                           u64 ref_root, u64 ref_generation,
648                                           u64 owner_objectid, u64 owner_offset)
649 {
650         struct btrfs_key key;
651         struct extent_buffer *leaf;
652         struct btrfs_extent_ref *ref;
653         u32 num_refs;
654         int ret;
655
656         key.objectid = bytenr;
657         key.type = BTRFS_EXTENT_REF_KEY;
658         key.offset = parent;
659
660         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref));
661         if (ret == 0) {
662                 leaf = path->nodes[0];
663                 ref = btrfs_item_ptr(leaf, path->slots[0],
664                                      struct btrfs_extent_ref);
665                 btrfs_set_ref_root(leaf, ref, ref_root);
666                 btrfs_set_ref_generation(leaf, ref, ref_generation);
667                 btrfs_set_ref_objectid(leaf, ref, owner_objectid);
668                 btrfs_set_ref_offset(leaf, ref, owner_offset);
669                 btrfs_set_ref_num_refs(leaf, ref, 1);
670         } else if (ret == -EEXIST) {
671                 u64 existing_owner;
672                 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID);
673                 leaf = path->nodes[0];
674                 ref = btrfs_item_ptr(leaf, path->slots[0],
675                                      struct btrfs_extent_ref);
676                 if (btrfs_ref_root(leaf, ref) != ref_root ||
677                     btrfs_ref_generation(leaf, ref) != ref_generation) {
678                         ret = -EIO;
679                         WARN_ON(1);
680                         goto out;
681                 }
682
683                 num_refs = btrfs_ref_num_refs(leaf, ref);
684                 BUG_ON(num_refs == 0);
685                 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1);
686
687                 existing_owner = btrfs_ref_objectid(leaf, ref);
688                 if (existing_owner == owner_objectid &&
689                     btrfs_ref_offset(leaf, ref) > owner_offset) {
690                         btrfs_set_ref_offset(leaf, ref, owner_offset);
691                 } else if (existing_owner != owner_objectid &&
692                            existing_owner != BTRFS_MULTIPLE_OBJECTIDS) {
693                         btrfs_set_ref_objectid(leaf, ref,
694                                         BTRFS_MULTIPLE_OBJECTIDS);
695                         btrfs_set_ref_offset(leaf, ref, 0);
696                 }
697                 ret = 0;
698         } else {
699                 goto out;
700         }
701         btrfs_mark_buffer_dirty(path->nodes[0]);
702 out:
703         btrfs_release_path(root, path);
704         return ret;
705 }
706
707 static int noinline remove_extent_backref(struct btrfs_trans_handle *trans,
708                                           struct btrfs_root *root,
709                                           struct btrfs_path *path)
710 {
711         struct extent_buffer *leaf;
712         struct btrfs_extent_ref *ref;
713         u32 num_refs;
714         int ret = 0;
715
716         leaf = path->nodes[0];
717         ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref);
718         num_refs = btrfs_ref_num_refs(leaf, ref);
719         BUG_ON(num_refs == 0);
720         num_refs -= 1;
721         if (num_refs == 0) {
722                 ret = btrfs_del_item(trans, root, path);
723         } else {
724                 btrfs_set_ref_num_refs(leaf, ref, num_refs);
725                 btrfs_mark_buffer_dirty(leaf);
726         }
727         btrfs_release_path(root, path);
728         return ret;
729 }
730
731 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
732                                      struct btrfs_root *root, u64 bytenr,
733                                      u64 orig_parent, u64 parent,
734                                      u64 orig_root, u64 ref_root,
735                                      u64 orig_generation, u64 ref_generation,
736                                      u64 owner_objectid, u64 owner_offset)
737 {
738         int ret;
739         struct btrfs_root *extent_root = root->fs_info->extent_root;
740         struct btrfs_path *path;
741
742         if (root == root->fs_info->extent_root) {
743                 struct pending_extent_op *extent_op;
744                 u64 num_bytes;
745
746                 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL);
747                 num_bytes = btrfs_level_size(root, (int)owner_objectid);
748                 if (test_range_bit(&root->fs_info->extent_ins, bytenr,
749                                 bytenr + num_bytes - 1, EXTENT_LOCKED, 0)) {
750                         u64 priv;
751                         ret = get_state_private(&root->fs_info->extent_ins,
752                                                 bytenr, &priv);
753                         BUG_ON(ret);
754                         extent_op = (struct pending_extent_op *)
755                                                         (unsigned long)priv;
756                         BUG_ON(extent_op->parent != orig_parent);
757                         BUG_ON(extent_op->generation != orig_generation);
758                         extent_op->parent = parent;
759                         extent_op->generation = ref_generation;
760                 } else {
761                         extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
762                         BUG_ON(!extent_op);
763
764                         extent_op->type = PENDING_BACKREF_UPDATE;
765                         extent_op->bytenr = bytenr;
766                         extent_op->num_bytes = num_bytes;
767                         extent_op->parent = parent;
768                         extent_op->orig_parent = orig_parent;
769                         extent_op->generation = ref_generation;
770                         extent_op->orig_generation = orig_generation;
771                         extent_op->level = (int)owner_objectid;
772
773                         set_extent_bits(&root->fs_info->extent_ins,
774                                         bytenr, bytenr + num_bytes - 1,
775                                         EXTENT_LOCKED, GFP_NOFS);
776                         set_state_private(&root->fs_info->extent_ins,
777                                           bytenr, (unsigned long)extent_op);
778                 }
779                 return 0;
780         }
781
782         path = btrfs_alloc_path();
783         if (!path)
784                 return -ENOMEM;
785         ret = lookup_extent_backref(trans, extent_root, path,
786                                     bytenr, orig_parent, orig_root,
787                                     orig_generation, 1);
788         if (ret)
789                 goto out;
790         ret = remove_extent_backref(trans, extent_root, path);
791         if (ret)
792                 goto out;
793         ret = insert_extent_backref(trans, extent_root, path, bytenr,
794                                     parent, ref_root, ref_generation,
795                                     owner_objectid, owner_offset);
796         BUG_ON(ret);
797         finish_current_insert(trans, extent_root);
798         del_pending_extents(trans, extent_root);
799 out:
800         btrfs_free_path(path);
801         return ret;
802 }
803
804 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
805                             struct btrfs_root *root, u64 bytenr,
806                             u64 orig_parent, u64 parent,
807                             u64 ref_root, u64 ref_generation,
808                             u64 owner_objectid, u64 owner_offset)
809 {
810         int ret;
811         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
812             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
813                 return 0;
814         maybe_lock_mutex(root);
815         ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent,
816                                         parent, ref_root, ref_root,
817                                         ref_generation, ref_generation,
818                                         owner_objectid, owner_offset);
819         maybe_unlock_mutex(root);
820         return ret;
821 }
822
823 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
824                                   struct btrfs_root *root, u64 bytenr,
825                                   u64 orig_parent, u64 parent,
826                                   u64 orig_root, u64 ref_root,
827                                   u64 orig_generation, u64 ref_generation,
828                                   u64 owner_objectid, u64 owner_offset)
829 {
830         struct btrfs_path *path;
831         int ret;
832         struct btrfs_key key;
833         struct extent_buffer *l;
834         struct btrfs_extent_item *item;
835         u32 refs;
836
837         path = btrfs_alloc_path();
838         if (!path)
839                 return -ENOMEM;
840
841         path->reada = 1;
842         key.objectid = bytenr;
843         key.type = BTRFS_EXTENT_ITEM_KEY;
844         key.offset = (u64)-1;
845
846         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
847                                 0, 1);
848         if (ret < 0)
849                 return ret;
850         BUG_ON(ret == 0 || path->slots[0] == 0);
851
852         path->slots[0]--;
853         l = path->nodes[0];
854
855         btrfs_item_key_to_cpu(l, &key, path->slots[0]);
856         BUG_ON(key.objectid != bytenr);
857         BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY);
858
859         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
860         refs = btrfs_extent_refs(l, item);
861         btrfs_set_extent_refs(l, item, refs + 1);
862         btrfs_mark_buffer_dirty(path->nodes[0]);
863
864         btrfs_release_path(root->fs_info->extent_root, path);
865
866         path->reada = 1;
867         ret = insert_extent_backref(trans, root->fs_info->extent_root,
868                                     path, bytenr, parent,
869                                     ref_root, ref_generation,
870                                     owner_objectid, owner_offset);
871         BUG_ON(ret);
872         finish_current_insert(trans, root->fs_info->extent_root);
873         del_pending_extents(trans, root->fs_info->extent_root);
874
875         btrfs_free_path(path);
876         return 0;
877 }
878
879 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
880                          struct btrfs_root *root,
881                          u64 bytenr, u64 num_bytes, u64 parent,
882                          u64 ref_root, u64 ref_generation,
883                          u64 owner_objectid, u64 owner_offset)
884 {
885         int ret;
886         if (ref_root == BTRFS_TREE_LOG_OBJECTID &&
887             owner_objectid < BTRFS_FIRST_FREE_OBJECTID)
888                 return 0;
889         maybe_lock_mutex(root);
890         ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent,
891                                      0, ref_root, 0, ref_generation,
892                                      owner_objectid, owner_offset);
893         maybe_unlock_mutex(root);
894         return ret;
895 }
896
897 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
898                          struct btrfs_root *root)
899 {
900         finish_current_insert(trans, root->fs_info->extent_root);
901         del_pending_extents(trans, root->fs_info->extent_root);
902         return 0;
903 }
904
905 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
906                             struct btrfs_root *root, u64 bytenr,
907                             u64 num_bytes, u32 *refs)
908 {
909         struct btrfs_path *path;
910         int ret;
911         struct btrfs_key key;
912         struct extent_buffer *l;
913         struct btrfs_extent_item *item;
914
915         WARN_ON(num_bytes < root->sectorsize);
916         path = btrfs_alloc_path();
917         path->reada = 1;
918         key.objectid = bytenr;
919         key.offset = num_bytes;
920         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
921         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
922                                 0, 0);
923         if (ret < 0)
924                 goto out;
925         if (ret != 0) {
926                 btrfs_print_leaf(root, path->nodes[0]);
927                 printk("failed to find block number %Lu\n", bytenr);
928                 BUG();
929         }
930         l = path->nodes[0];
931         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
932         *refs = btrfs_extent_refs(l, item);
933 out:
934         btrfs_free_path(path);
935         return 0;
936 }
937
938 static int get_reference_status(struct btrfs_root *root, u64 bytenr,
939                                 u64 parent_gen, u64 ref_objectid,
940                                 u64 *min_generation, u32 *ref_count)
941 {
942         struct btrfs_root *extent_root = root->fs_info->extent_root;
943         struct btrfs_path *path;
944         struct extent_buffer *leaf;
945         struct btrfs_extent_ref *ref_item;
946         struct btrfs_key key;
947         struct btrfs_key found_key;
948         u64 root_objectid = root->root_key.objectid;
949         u64 ref_generation;
950         u32 nritems;
951         int ret;
952
953         key.objectid = bytenr;
954         key.offset = (u64)-1;
955         key.type = BTRFS_EXTENT_ITEM_KEY;
956
957         path = btrfs_alloc_path();
958         mutex_lock(&root->fs_info->alloc_mutex);
959         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
960         if (ret < 0)
961                 goto out;
962         BUG_ON(ret == 0);
963         if (ret < 0 || path->slots[0] == 0)
964                 goto out;
965
966         path->slots[0]--;
967         leaf = path->nodes[0];
968         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
969
970         if (found_key.objectid != bytenr ||
971             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
972                 ret = 1;
973                 goto out;
974         }
975
976         *ref_count = 0;
977         *min_generation = (u64)-1;
978
979         while (1) {
980                 leaf = path->nodes[0];
981                 nritems = btrfs_header_nritems(leaf);
982                 if (path->slots[0] >= nritems) {
983                         ret = btrfs_next_leaf(extent_root, path);
984                         if (ret < 0)
985                                 goto out;
986                         if (ret == 0)
987                                 continue;
988                         break;
989                 }
990                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
991                 if (found_key.objectid != bytenr)
992                         break;
993
994                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
995                         path->slots[0]++;
996                         continue;
997                 }
998
999                 ref_item = btrfs_item_ptr(leaf, path->slots[0],
1000                                           struct btrfs_extent_ref);
1001                 ref_generation = btrfs_ref_generation(leaf, ref_item);
1002                 /*
1003                  * For (parent_gen > 0 && parent_gen > ref_generation):
1004                  *
1005                  * we reach here through the oldest root, therefore
1006                  * all other reference from same snapshot should have
1007                  * a larger generation.
1008                  */
1009                 if ((root_objectid != btrfs_ref_root(leaf, ref_item)) ||
1010                     (parent_gen > 0 && parent_gen > ref_generation) ||
1011                     (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
1012                      ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
1013                         *ref_count = 2;
1014                         break;
1015                 }
1016
1017                 *ref_count = 1;
1018                 if (*min_generation > ref_generation)
1019                         *min_generation = ref_generation;
1020
1021                 path->slots[0]++;
1022         }
1023         ret = 0;
1024 out:
1025         mutex_unlock(&root->fs_info->alloc_mutex);
1026         btrfs_free_path(path);
1027         return ret;
1028 }
1029
1030 int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
1031                            struct btrfs_root *root,
1032                            struct btrfs_key *key, u64 bytenr)
1033 {
1034         struct btrfs_root *old_root;
1035         struct btrfs_path *path = NULL;
1036         struct extent_buffer *eb;
1037         struct btrfs_file_extent_item *item;
1038         u64 ref_generation;
1039         u64 min_generation;
1040         u64 extent_start;
1041         u32 ref_count;
1042         int level;
1043         int ret;
1044
1045         BUG_ON(trans == NULL);
1046         BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
1047         ret = get_reference_status(root, bytenr, 0, key->objectid,
1048                                    &min_generation, &ref_count);
1049         if (ret)
1050                 return ret;
1051
1052         if (ref_count != 1)
1053                 return 1;
1054
1055         old_root = root->dirty_root->root;
1056         ref_generation = old_root->root_key.offset;
1057
1058         /* all references are created in running transaction */
1059         if (min_generation > ref_generation) {
1060                 ret = 0;
1061                 goto out;
1062         }
1063
1064         path = btrfs_alloc_path();
1065         if (!path) {
1066                 ret = -ENOMEM;
1067                 goto out;
1068         }
1069
1070         path->skip_locking = 1;
1071         /* if no item found, the extent is referenced by other snapshot */
1072         ret = btrfs_search_slot(NULL, old_root, key, path, 0, 0);
1073         if (ret)
1074                 goto out;
1075
1076         eb = path->nodes[0];
1077         item = btrfs_item_ptr(eb, path->slots[0],
1078                               struct btrfs_file_extent_item);
1079         if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
1080             btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
1081                 ret = 1;
1082                 goto out;
1083         }
1084
1085         for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
1086                 if (level >= 0) {
1087                         eb = path->nodes[level];
1088                         if (!eb)
1089                                 continue;
1090                         extent_start = eb->start;
1091                 } else
1092                         extent_start = bytenr;
1093
1094                 ret = get_reference_status(root, extent_start, ref_generation,
1095                                            0, &min_generation, &ref_count);
1096                 if (ret)
1097                         goto out;
1098
1099                 if (ref_count != 1) {
1100                         ret = 1;
1101                         goto out;
1102                 }
1103                 if (level >= 0)
1104                         ref_generation = btrfs_header_generation(eb);
1105         }
1106         ret = 0;
1107 out:
1108         if (path)
1109                 btrfs_free_path(path);
1110         return ret;
1111 }
1112
1113 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1114                     struct extent_buffer *buf, u32 nr_extents)
1115 {
1116         u32 nritems;
1117         struct btrfs_key key;
1118         struct btrfs_file_extent_item *fi;
1119         int i;
1120         int level;
1121         int ret = 0;
1122
1123         if (!root->ref_cows)
1124                 return 0;
1125
1126         level = btrfs_header_level(buf);
1127         nritems = btrfs_header_nritems(buf);
1128
1129         if (level == 0) {
1130                 struct btrfs_leaf_ref *ref;
1131                 struct btrfs_extent_info *info;
1132
1133                 ref = btrfs_alloc_leaf_ref(root, nr_extents);
1134                 if (!ref) {
1135                         ret = -ENOMEM;
1136                         goto out;
1137                 }
1138
1139                 ref->root_gen = root->root_key.offset;
1140                 ref->bytenr = buf->start;
1141                 ref->owner = btrfs_header_owner(buf);
1142                 ref->generation = btrfs_header_generation(buf);
1143                 ref->nritems = nr_extents;
1144                 info = ref->extents;
1145
1146                 for (i = 0; nr_extents > 0 && i < nritems; i++) {
1147                         u64 disk_bytenr;
1148                         btrfs_item_key_to_cpu(buf, &key, i);
1149                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1150                                 continue;
1151                         fi = btrfs_item_ptr(buf, i,
1152                                             struct btrfs_file_extent_item);
1153                         if (btrfs_file_extent_type(buf, fi) ==
1154                             BTRFS_FILE_EXTENT_INLINE)
1155                                 continue;
1156                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1157                         if (disk_bytenr == 0)
1158                                 continue;
1159
1160                         info->bytenr = disk_bytenr;
1161                         info->num_bytes =
1162                                 btrfs_file_extent_disk_num_bytes(buf, fi);
1163                         info->objectid = key.objectid;
1164                         info->offset = key.offset;
1165                         info++;
1166                 }
1167
1168                 BUG_ON(!root->ref_tree);
1169                 ret = btrfs_add_leaf_ref(root, ref);
1170                 WARN_ON(ret);
1171                 btrfs_free_leaf_ref(root, ref);
1172         }
1173 out:
1174         return ret;
1175 }
1176
1177 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1178                   struct extent_buffer *orig_buf, struct extent_buffer *buf,
1179                   u32 *nr_extents)
1180 {
1181         u64 bytenr;
1182         u64 ref_root;
1183         u64 orig_root;
1184         u64 ref_generation;
1185         u64 orig_generation;
1186         u32 nritems;
1187         u32 nr_file_extents = 0;
1188         struct btrfs_key key;
1189         struct btrfs_file_extent_item *fi;
1190         int i;
1191         int level;
1192         int ret = 0;
1193         int faili = 0;
1194         int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
1195                             u64, u64, u64, u64, u64, u64, u64, u64, u64);
1196
1197         ref_root = btrfs_header_owner(buf);
1198         ref_generation = btrfs_header_generation(buf);
1199         orig_root = btrfs_header_owner(orig_buf);
1200         orig_generation = btrfs_header_generation(orig_buf);
1201
1202         nritems = btrfs_header_nritems(buf);
1203         level = btrfs_header_level(buf);
1204
1205         if (root->ref_cows) {
1206                 process_func = __btrfs_inc_extent_ref;
1207         } else {
1208                 if (level == 0 &&
1209                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1210                         goto out;
1211                 if (level != 0 &&
1212                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1213                         goto out;
1214                 process_func = __btrfs_update_extent_ref;
1215         }
1216
1217         for (i = 0; i < nritems; i++) {
1218                 cond_resched();
1219                 if (level == 0) {
1220                         btrfs_item_key_to_cpu(buf, &key, i);
1221                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1222                                 continue;
1223                         fi = btrfs_item_ptr(buf, i,
1224                                             struct btrfs_file_extent_item);
1225                         if (btrfs_file_extent_type(buf, fi) ==
1226                             BTRFS_FILE_EXTENT_INLINE)
1227                                 continue;
1228                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1229                         if (bytenr == 0)
1230                                 continue;
1231
1232                         nr_file_extents++;
1233
1234                         maybe_lock_mutex(root);
1235                         ret = process_func(trans, root, bytenr,
1236                                            orig_buf->start, buf->start,
1237                                            orig_root, ref_root,
1238                                            orig_generation, ref_generation,
1239                                            key.objectid, key.offset);
1240                         maybe_unlock_mutex(root);
1241
1242                         if (ret) {
1243                                 faili = i;
1244                                 WARN_ON(1);
1245                                 goto fail;
1246                         }
1247                 } else {
1248                         bytenr = btrfs_node_blockptr(buf, i);
1249                         maybe_lock_mutex(root);
1250                         ret = process_func(trans, root, bytenr,
1251                                            orig_buf->start, buf->start,
1252                                            orig_root, ref_root,
1253                                            orig_generation, ref_generation,
1254                                            level - 1, 0);
1255                         maybe_unlock_mutex(root);
1256                         if (ret) {
1257                                 faili = i;
1258                                 WARN_ON(1);
1259                                 goto fail;
1260                         }
1261                 }
1262         }
1263 out:
1264         if (nr_extents) {
1265                 if (level == 0)
1266                         *nr_extents = nr_file_extents;
1267                 else
1268                         *nr_extents = nritems;
1269         }
1270         return 0;
1271 fail:
1272         WARN_ON(1);
1273         return ret;
1274 }
1275
1276 int btrfs_update_ref(struct btrfs_trans_handle *trans,
1277                      struct btrfs_root *root, struct extent_buffer *orig_buf,
1278                      struct extent_buffer *buf, int start_slot, int nr)
1279
1280 {
1281         u64 bytenr;
1282         u64 ref_root;
1283         u64 orig_root;
1284         u64 ref_generation;
1285         u64 orig_generation;
1286         struct btrfs_key key;
1287         struct btrfs_file_extent_item *fi;
1288         int i;
1289         int ret;
1290         int slot;
1291         int level;
1292
1293         BUG_ON(start_slot < 0);
1294         BUG_ON(start_slot + nr > btrfs_header_nritems(buf));
1295
1296         ref_root = btrfs_header_owner(buf);
1297         ref_generation = btrfs_header_generation(buf);
1298         orig_root = btrfs_header_owner(orig_buf);
1299         orig_generation = btrfs_header_generation(orig_buf);
1300         level = btrfs_header_level(buf);
1301
1302         if (!root->ref_cows) {
1303                 if (level == 0 &&
1304                     root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID)
1305                         return 0;
1306                 if (level != 0 &&
1307                     root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID)
1308                         return 0;
1309         }
1310
1311         for (i = 0, slot = start_slot; i < nr; i++, slot++) {
1312                 cond_resched();
1313                 if (level == 0) {
1314                         btrfs_item_key_to_cpu(buf, &key, slot);
1315                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1316                                 continue;
1317                         fi = btrfs_item_ptr(buf, slot,
1318                                             struct btrfs_file_extent_item);
1319                         if (btrfs_file_extent_type(buf, fi) ==
1320                             BTRFS_FILE_EXTENT_INLINE)
1321                                 continue;
1322                         bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
1323                         if (bytenr == 0)
1324                                 continue;
1325                         maybe_lock_mutex(root);
1326                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1327                                             orig_buf->start, buf->start,
1328                                             orig_root, ref_root,
1329                                             orig_generation, ref_generation,
1330                                             key.objectid, key.offset);
1331                         maybe_unlock_mutex(root);
1332                         if (ret)
1333                                 goto fail;
1334                 } else {
1335                         bytenr = btrfs_node_blockptr(buf, slot);
1336                         maybe_lock_mutex(root);
1337                         ret = __btrfs_update_extent_ref(trans, root, bytenr,
1338                                             orig_buf->start, buf->start,
1339                                             orig_root, ref_root,
1340                                             orig_generation, ref_generation,
1341                                             level - 1, 0);
1342                         maybe_unlock_mutex(root);
1343                         if (ret)
1344                                 goto fail;
1345                 }
1346         }
1347         return 0;
1348 fail:
1349         WARN_ON(1);
1350         return -1;
1351 }
1352
1353 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1354                                  struct btrfs_root *root,
1355                                  struct btrfs_path *path,
1356                                  struct btrfs_block_group_cache *cache)
1357 {
1358         int ret;
1359         int pending_ret;
1360         struct btrfs_root *extent_root = root->fs_info->extent_root;
1361         unsigned long bi;
1362         struct extent_buffer *leaf;
1363
1364         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1365         if (ret < 0)
1366                 goto fail;
1367         BUG_ON(ret);
1368
1369         leaf = path->nodes[0];
1370         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1371         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1372         btrfs_mark_buffer_dirty(leaf);
1373         btrfs_release_path(extent_root, path);
1374 fail:
1375         finish_current_insert(trans, extent_root);
1376         pending_ret = del_pending_extents(trans, extent_root);
1377         if (ret)
1378                 return ret;
1379         if (pending_ret)
1380                 return pending_ret;
1381         return 0;
1382
1383 }
1384
1385 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1386                                    struct btrfs_root *root)
1387 {
1388         struct btrfs_block_group_cache *cache, *entry;
1389         struct rb_node *n;
1390         int err = 0;
1391         int werr = 0;
1392         struct btrfs_path *path;
1393         u64 last = 0;
1394
1395         path = btrfs_alloc_path();
1396         if (!path)
1397                 return -ENOMEM;
1398
1399         mutex_lock(&root->fs_info->alloc_mutex);
1400         while(1) {
1401                 cache = NULL;
1402                 spin_lock(&root->fs_info->block_group_cache_lock);
1403                 for (n = rb_first(&root->fs_info->block_group_cache_tree);
1404                      n; n = rb_next(n)) {
1405                         entry = rb_entry(n, struct btrfs_block_group_cache,
1406                                          cache_node);
1407                         if (entry->dirty) {
1408                                 cache = entry;
1409                                 break;
1410                         }
1411                 }
1412                 spin_unlock(&root->fs_info->block_group_cache_lock);
1413
1414                 if (!cache)
1415                         break;
1416
1417                 last += cache->key.offset;
1418
1419                 err = write_one_cache_group(trans, root,
1420                                             path, cache);
1421                 /*
1422                  * if we fail to write the cache group, we want
1423                  * to keep it marked dirty in hopes that a later
1424                  * write will work
1425                  */
1426                 if (err) {
1427                         werr = err;
1428                         continue;
1429                 }
1430
1431                 cache->dirty = 0;
1432         }
1433         btrfs_free_path(path);
1434         mutex_unlock(&root->fs_info->alloc_mutex);
1435         return werr;
1436 }
1437
1438 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1439                              u64 total_bytes, u64 bytes_used,
1440                              struct btrfs_space_info **space_info)
1441 {
1442         struct btrfs_space_info *found;
1443
1444         found = __find_space_info(info, flags);
1445         if (found) {
1446                 found->total_bytes += total_bytes;
1447                 found->bytes_used += bytes_used;
1448                 found->full = 0;
1449                 *space_info = found;
1450                 return 0;
1451         }
1452         found = kmalloc(sizeof(*found), GFP_NOFS);
1453         if (!found)
1454                 return -ENOMEM;
1455
1456         list_add(&found->list, &info->space_info);
1457         INIT_LIST_HEAD(&found->block_groups);
1458         spin_lock_init(&found->lock);
1459         found->flags = flags;
1460         found->total_bytes = total_bytes;
1461         found->bytes_used = bytes_used;
1462         found->bytes_pinned = 0;
1463         found->full = 0;
1464         found->force_alloc = 0;
1465         *space_info = found;
1466         return 0;
1467 }
1468
1469 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1470 {
1471         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1472                                    BTRFS_BLOCK_GROUP_RAID1 |
1473                                    BTRFS_BLOCK_GROUP_RAID10 |
1474                                    BTRFS_BLOCK_GROUP_DUP);
1475         if (extra_flags) {
1476                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1477                         fs_info->avail_data_alloc_bits |= extra_flags;
1478                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1479                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1480                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1481                         fs_info->avail_system_alloc_bits |= extra_flags;
1482         }
1483 }
1484
1485 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1486 {
1487         u64 num_devices = root->fs_info->fs_devices->num_devices;
1488
1489         if (num_devices == 1)
1490                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1491         if (num_devices < 4)
1492                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1493
1494         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1495             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1496                       BTRFS_BLOCK_GROUP_RAID10))) {
1497                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1498         }
1499
1500         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1501             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1502                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1503         }
1504
1505         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1506             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1507              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1508              (flags & BTRFS_BLOCK_GROUP_DUP)))
1509                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1510         return flags;
1511 }
1512
1513 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1514                           struct btrfs_root *extent_root, u64 alloc_bytes,
1515                           u64 flags, int force)
1516 {
1517         struct btrfs_space_info *space_info;
1518         u64 thresh;
1519         u64 start;
1520         u64 num_bytes;
1521         int ret = 0;
1522
1523         flags = reduce_alloc_profile(extent_root, flags);
1524
1525         space_info = __find_space_info(extent_root->fs_info, flags);
1526         if (!space_info) {
1527                 ret = update_space_info(extent_root->fs_info, flags,
1528                                         0, 0, &space_info);
1529                 BUG_ON(ret);
1530         }
1531         BUG_ON(!space_info);
1532
1533         if (space_info->force_alloc) {
1534                 force = 1;
1535                 space_info->force_alloc = 0;
1536         }
1537         if (space_info->full)
1538                 goto out;
1539
1540         thresh = div_factor(space_info->total_bytes, 6);
1541         if (!force &&
1542            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1543             thresh)
1544                 goto out;
1545
1546         mutex_lock(&extent_root->fs_info->chunk_mutex);
1547         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1548         if (ret == -ENOSPC) {
1549 printk("space info full %Lu\n", flags);
1550                 space_info->full = 1;
1551                 goto out_unlock;
1552         }
1553         BUG_ON(ret);
1554
1555         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1556                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1557         BUG_ON(ret);
1558
1559 out_unlock:
1560         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1561 out:
1562         return ret;
1563 }
1564
1565 static int update_block_group(struct btrfs_trans_handle *trans,
1566                               struct btrfs_root *root,
1567                               u64 bytenr, u64 num_bytes, int alloc,
1568                               int mark_free)
1569 {
1570         struct btrfs_block_group_cache *cache;
1571         struct btrfs_fs_info *info = root->fs_info;
1572         u64 total = num_bytes;
1573         u64 old_val;
1574         u64 byte_in_group;
1575
1576         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1577         while(total) {
1578                 cache = btrfs_lookup_block_group(info, bytenr);
1579                 if (!cache) {
1580                         return -1;
1581                 }
1582                 byte_in_group = bytenr - cache->key.objectid;
1583                 WARN_ON(byte_in_group > cache->key.offset);
1584
1585                 spin_lock(&cache->lock);
1586                 cache->dirty = 1;
1587                 old_val = btrfs_block_group_used(&cache->item);
1588                 num_bytes = min(total, cache->key.offset - byte_in_group);
1589                 if (alloc) {
1590                         old_val += num_bytes;
1591                         cache->space_info->bytes_used += num_bytes;
1592                         btrfs_set_block_group_used(&cache->item, old_val);
1593                         spin_unlock(&cache->lock);
1594                 } else {
1595                         old_val -= num_bytes;
1596                         cache->space_info->bytes_used -= num_bytes;
1597                         btrfs_set_block_group_used(&cache->item, old_val);
1598                         spin_unlock(&cache->lock);
1599                         if (mark_free) {
1600                                 int ret;
1601                                 ret = btrfs_add_free_space(cache, bytenr,
1602                                                            num_bytes);
1603                                 if (ret)
1604                                         return -1;
1605                         }
1606                 }
1607                 total -= num_bytes;
1608                 bytenr += num_bytes;
1609         }
1610         return 0;
1611 }
1612
1613 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1614 {
1615         struct btrfs_block_group_cache *cache;
1616
1617         cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
1618         if (!cache)
1619                 return 0;
1620
1621         return cache->key.objectid;
1622 }
1623
1624
1625 int btrfs_update_pinned_extents(struct btrfs_root *root,
1626                                 u64 bytenr, u64 num, int pin)
1627 {
1628         u64 len;
1629         struct btrfs_block_group_cache *cache;
1630         struct btrfs_fs_info *fs_info = root->fs_info;
1631
1632         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1633         if (pin) {
1634                 set_extent_dirty(&fs_info->pinned_extents,
1635                                 bytenr, bytenr + num - 1, GFP_NOFS);
1636         } else {
1637                 clear_extent_dirty(&fs_info->pinned_extents,
1638                                 bytenr, bytenr + num - 1, GFP_NOFS);
1639         }
1640         while (num > 0) {
1641                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1642                 if (!cache) {
1643                         u64 first = first_logical_byte(root, bytenr);
1644                         WARN_ON(first < bytenr);
1645                         len = min(first - bytenr, num);
1646                 } else {
1647                         len = min(num, cache->key.offset -
1648                                   (bytenr - cache->key.objectid));
1649                 }
1650                 if (pin) {
1651                         if (cache) {
1652                                 spin_lock(&cache->lock);
1653                                 cache->pinned += len;
1654                                 cache->space_info->bytes_pinned += len;
1655                                 spin_unlock(&cache->lock);
1656                         }
1657                         fs_info->total_pinned += len;
1658                 } else {
1659                         if (cache) {
1660                                 spin_lock(&cache->lock);
1661                                 cache->pinned -= len;
1662                                 cache->space_info->bytes_pinned -= len;
1663                                 spin_unlock(&cache->lock);
1664                         }
1665                         fs_info->total_pinned -= len;
1666                 }
1667                 bytenr += len;
1668                 num -= len;
1669         }
1670         return 0;
1671 }
1672
1673 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1674 {
1675         u64 last = 0;
1676         u64 start;
1677         u64 end;
1678         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1679         int ret;
1680
1681         while(1) {
1682                 ret = find_first_extent_bit(pinned_extents, last,
1683                                             &start, &end, EXTENT_DIRTY);
1684                 if (ret)
1685                         break;
1686                 set_extent_dirty(copy, start, end, GFP_NOFS);
1687                 last = end + 1;
1688         }
1689         return 0;
1690 }
1691
1692 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1693                                struct btrfs_root *root,
1694                                struct extent_io_tree *unpin)
1695 {
1696         u64 start;
1697         u64 end;
1698         int ret;
1699         struct btrfs_block_group_cache *cache;
1700
1701         mutex_lock(&root->fs_info->alloc_mutex);
1702         while(1) {
1703                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1704                                             EXTENT_DIRTY);
1705                 if (ret)
1706                         break;
1707                 btrfs_update_pinned_extents(root, start, end + 1 - start, 0);
1708                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1709                 cache = btrfs_lookup_block_group(root->fs_info, start);
1710                 if (cache->cached)
1711                         btrfs_add_free_space(cache, start, end - start + 1);
1712                 if (need_resched()) {
1713                         mutex_unlock(&root->fs_info->alloc_mutex);
1714                         cond_resched();
1715                         mutex_lock(&root->fs_info->alloc_mutex);
1716                 }
1717         }
1718         mutex_unlock(&root->fs_info->alloc_mutex);
1719         return 0;
1720 }
1721
1722 static int finish_current_insert(struct btrfs_trans_handle *trans,
1723                                  struct btrfs_root *extent_root)
1724 {
1725         u64 start;
1726         u64 end;
1727         u64 priv;
1728         struct btrfs_fs_info *info = extent_root->fs_info;
1729         struct btrfs_path *path;
1730         struct btrfs_extent_ref *ref;
1731         struct pending_extent_op *extent_op;
1732         struct btrfs_key key;
1733         struct btrfs_extent_item extent_item;
1734         int ret;
1735         int err = 0;
1736
1737         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1738         btrfs_set_stack_extent_refs(&extent_item, 1);
1739         path = btrfs_alloc_path();
1740
1741         while(1) {
1742                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1743                                             &end, EXTENT_LOCKED);
1744                 if (ret)
1745                         break;
1746
1747                 ret = get_state_private(&info->extent_ins, start, &priv);
1748                 BUG_ON(ret);
1749                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
1750
1751                 if (extent_op->type == PENDING_EXTENT_INSERT) {
1752                         key.objectid = start;
1753                         key.offset = end + 1 - start;
1754                         key.type = BTRFS_EXTENT_ITEM_KEY;
1755                         err = btrfs_insert_item(trans, extent_root, &key,
1756                                         &extent_item, sizeof(extent_item));
1757                         BUG_ON(err);
1758
1759                         clear_extent_bits(&info->extent_ins, start, end,
1760                                           EXTENT_LOCKED, GFP_NOFS);
1761
1762                         err = insert_extent_backref(trans, extent_root, path,
1763                                                 start, extent_op->parent,
1764                                                 extent_root->root_key.objectid,
1765                                                 extent_op->generation,
1766                                                 extent_op->level, 0);
1767                         BUG_ON(err);
1768                 } else if (extent_op->type == PENDING_BACKREF_UPDATE) {
1769                         err = lookup_extent_backref(trans, extent_root, path,
1770                                                 start, extent_op->orig_parent,
1771                                                 extent_root->root_key.objectid,
1772                                                 extent_op->orig_generation, 0);
1773                         BUG_ON(err);
1774
1775                         clear_extent_bits(&info->extent_ins, start, end,
1776                                           EXTENT_LOCKED, GFP_NOFS);
1777
1778                         key.objectid = start;
1779                         key.offset = extent_op->parent;
1780                         key.type = BTRFS_EXTENT_REF_KEY;
1781                         err = btrfs_set_item_key_safe(trans, extent_root, path,
1782                                                       &key);
1783                         BUG_ON(err);
1784                         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
1785                                              struct btrfs_extent_ref);
1786                         btrfs_set_ref_generation(path->nodes[0], ref,
1787                                                  extent_op->generation);
1788                         btrfs_mark_buffer_dirty(path->nodes[0]);
1789                         btrfs_release_path(extent_root, path);
1790                 } else {
1791                         BUG_ON(1);
1792                 }
1793                 kfree(extent_op);
1794
1795                 if (need_resched()) {
1796                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
1797                         cond_resched();
1798                         mutex_lock(&extent_root->fs_info->alloc_mutex);
1799                 }
1800         }
1801         btrfs_free_path(path);
1802         return 0;
1803 }
1804
1805 static int pin_down_bytes(struct btrfs_trans_handle *trans,
1806                           struct btrfs_root *root,
1807                           u64 bytenr, u64 num_bytes, int is_data)
1808 {
1809         int err = 0;
1810         struct extent_buffer *buf;
1811
1812         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1813         if (is_data)
1814                 goto pinit;
1815
1816         buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1817         if (!buf)
1818                 goto pinit;
1819
1820         /* we can reuse a block if it hasn't been written
1821          * and it is from this transaction.  We can't
1822          * reuse anything from the tree log root because
1823          * it has tiny sub-transactions.
1824          */
1825         if (btrfs_buffer_uptodate(buf, 0) &&
1826             btrfs_try_tree_lock(buf)) {
1827                 u64 header_owner = btrfs_header_owner(buf);
1828                 u64 header_transid = btrfs_header_generation(buf);
1829                 if (header_owner != BTRFS_TREE_LOG_OBJECTID &&
1830                     header_transid == trans->transid &&
1831                     !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
1832                         clean_tree_block(NULL, root, buf);
1833                         btrfs_tree_unlock(buf);
1834                         free_extent_buffer(buf);
1835                         return 1;
1836                 }
1837                 btrfs_tree_unlock(buf);
1838         }
1839         free_extent_buffer(buf);
1840 pinit:
1841         btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
1842
1843         BUG_ON(err < 0);
1844         return 0;
1845 }
1846
1847 /*
1848  * remove an extent from the root, returns 0 on success
1849  */
1850 static int __free_extent(struct btrfs_trans_handle *trans,
1851                          struct btrfs_root *root,
1852                          u64 bytenr, u64 num_bytes, u64 parent,
1853                          u64 root_objectid, u64 ref_generation,
1854                          u64 owner_objectid, u64 owner_offset,
1855                          int pin, int mark_free)
1856 {
1857         struct btrfs_path *path;
1858         struct btrfs_key key;
1859         struct btrfs_fs_info *info = root->fs_info;
1860         struct btrfs_root *extent_root = info->extent_root;
1861         struct extent_buffer *leaf;
1862         int ret;
1863         int extent_slot = 0;
1864         int found_extent = 0;
1865         int num_to_del = 1;
1866         struct btrfs_extent_item *ei;
1867         u32 refs;
1868
1869         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1870         key.objectid = bytenr;
1871         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1872         key.offset = num_bytes;
1873         path = btrfs_alloc_path();
1874         if (!path)
1875                 return -ENOMEM;
1876
1877         path->reada = 1;
1878         ret = lookup_extent_backref(trans, extent_root, path, bytenr, parent,
1879                                     root_objectid, ref_generation, 1);
1880         if (ret == 0) {
1881                 struct btrfs_key found_key;
1882                 extent_slot = path->slots[0];
1883                 while(extent_slot > 0) {
1884                         extent_slot--;
1885                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1886                                               extent_slot);
1887                         if (found_key.objectid != bytenr)
1888                                 break;
1889                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1890                             found_key.offset == num_bytes) {
1891                                 found_extent = 1;
1892                                 break;
1893                         }
1894                         if (path->slots[0] - extent_slot > 5)
1895                                 break;
1896                 }
1897                 if (!found_extent) {
1898                         ret = remove_extent_backref(trans, extent_root, path);
1899                         BUG_ON(ret);
1900                         btrfs_release_path(extent_root, path);
1901                         ret = btrfs_search_slot(trans, extent_root,
1902                                                 &key, path, -1, 1);
1903                         BUG_ON(ret);
1904                         extent_slot = path->slots[0];
1905                 }
1906         } else {
1907                 btrfs_print_leaf(extent_root, path->nodes[0]);
1908                 WARN_ON(1);
1909                 printk("Unable to find ref byte nr %Lu root %Lu "
1910                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1911                        root_objectid, ref_generation, owner_objectid,
1912                        owner_offset);
1913         }
1914
1915         leaf = path->nodes[0];
1916         ei = btrfs_item_ptr(leaf, extent_slot,
1917                             struct btrfs_extent_item);
1918         refs = btrfs_extent_refs(leaf, ei);
1919         BUG_ON(refs == 0);
1920         refs -= 1;
1921         btrfs_set_extent_refs(leaf, ei, refs);
1922
1923         btrfs_mark_buffer_dirty(leaf);
1924
1925         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1926                 struct btrfs_extent_ref *ref;
1927                 ref = btrfs_item_ptr(leaf, path->slots[0],
1928                                      struct btrfs_extent_ref);
1929                 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1);
1930                 /* if the back ref and the extent are next to each other
1931                  * they get deleted below in one shot
1932                  */
1933                 path->slots[0] = extent_slot;
1934                 num_to_del = 2;
1935         } else if (found_extent) {
1936                 /* otherwise delete the extent back ref */
1937                 ret = remove_extent_backref(trans, extent_root, path);
1938                 BUG_ON(ret);
1939                 /* if refs are 0, we need to setup the path for deletion */
1940                 if (refs == 0) {
1941                         btrfs_release_path(extent_root, path);
1942                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1943                                                 -1, 1);
1944                         BUG_ON(ret);
1945                 }
1946         }
1947
1948         if (refs == 0) {
1949                 u64 super_used;
1950                 u64 root_used;
1951 #ifdef BIO_RW_DISCARD
1952                 u64 map_length = num_bytes;
1953                 struct btrfs_multi_bio *multi = NULL;
1954 #endif
1955
1956                 if (pin) {
1957                         ret = pin_down_bytes(trans, root, bytenr, num_bytes,
1958                                 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID);
1959                         if (ret > 0)
1960                                 mark_free = 1;
1961                         BUG_ON(ret < 0);
1962                 }
1963
1964                 /* block accounting for super block */
1965                 spin_lock_irq(&info->delalloc_lock);
1966                 super_used = btrfs_super_bytes_used(&info->super_copy);
1967                 btrfs_set_super_bytes_used(&info->super_copy,
1968                                            super_used - num_bytes);
1969                 spin_unlock_irq(&info->delalloc_lock);
1970
1971                 /* block accounting for root item */
1972                 root_used = btrfs_root_used(&root->root_item);
1973                 btrfs_set_root_used(&root->root_item,
1974                                            root_used - num_bytes);
1975                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1976                                       num_to_del);
1977                 BUG_ON(ret);
1978                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1979                                          mark_free);
1980                 BUG_ON(ret);
1981
1982 #ifdef BIO_RW_DISCARD
1983                 /* Tell the block device(s) that the sectors can be discarded */
1984                 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
1985                                       bytenr, &map_length, &multi, 0);
1986                 if (!ret) {
1987                         struct btrfs_bio_stripe *stripe = multi->stripes;
1988                         int i;
1989
1990                         if (map_length > num_bytes)
1991                                 map_length = num_bytes;
1992
1993                         for (i = 0; i < multi->num_stripes; i++, stripe++) {
1994                                 blkdev_issue_discard(stripe->dev->bdev,
1995                                                      stripe->physical >> 9,
1996                                                      map_length >> 9);
1997                         }
1998                         kfree(multi);
1999                 }
2000 #endif
2001         }
2002         btrfs_free_path(path);
2003         finish_current_insert(trans, extent_root);
2004         return ret;
2005 }
2006
2007 /*
2008  * find all the blocks marked as pending in the radix tree and remove
2009  * them from the extent map
2010  */
2011 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
2012                                btrfs_root *extent_root)
2013 {
2014         int ret;
2015         int err = 0;
2016         int mark_free = 0;
2017         u64 start;
2018         u64 end;
2019         u64 priv;
2020         struct extent_io_tree *pending_del;
2021         struct extent_io_tree *extent_ins;
2022         struct pending_extent_op *extent_op;
2023
2024         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2025         extent_ins = &extent_root->fs_info->extent_ins;
2026         pending_del = &extent_root->fs_info->pending_del;
2027
2028         while(1) {
2029                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
2030                                             EXTENT_LOCKED);
2031                 if (ret)
2032                         break;
2033
2034                 ret = get_state_private(pending_del, start, &priv);
2035                 BUG_ON(ret);
2036                 extent_op = (struct pending_extent_op *)(unsigned long)priv;
2037
2038                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
2039                                   GFP_NOFS);
2040
2041                 ret = pin_down_bytes(trans, extent_root, start,
2042                                      end + 1 - start, 0);
2043                 mark_free = ret > 0;
2044                 if (!test_range_bit(extent_ins, start, end,
2045                                     EXTENT_LOCKED, 0)) {
2046 free_extent:
2047                         ret = __free_extent(trans, extent_root,
2048                                             start, end + 1 - start,
2049                                             extent_op->orig_parent,
2050                                             extent_root->root_key.objectid,
2051                                             extent_op->orig_generation,
2052                                             extent_op->level, 0, 0, mark_free);
2053                         kfree(extent_op);
2054                 } else {
2055                         kfree(extent_op);
2056                         ret = get_state_private(extent_ins, start, &priv);
2057                         BUG_ON(ret);
2058                         extent_op = (struct pending_extent_op *)
2059                                                         (unsigned long)priv;
2060
2061                         clear_extent_bits(extent_ins, start, end,
2062                                           EXTENT_LOCKED, GFP_NOFS);
2063
2064                         if (extent_op->type == PENDING_BACKREF_UPDATE)
2065                                 goto free_extent;
2066
2067                         ret = update_block_group(trans, extent_root, start,
2068                                                 end + 1 - start, 0, mark_free);
2069                         BUG_ON(ret);
2070                         kfree(extent_op);
2071                 }
2072                 if (ret)
2073                         err = ret;
2074
2075                 if (need_resched()) {
2076                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2077                         cond_resched();
2078                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2079                 }
2080         }
2081         return err;
2082 }
2083
2084 /*
2085  * remove an extent from the root, returns 0 on success
2086  */
2087 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
2088                                struct btrfs_root *root,
2089                                u64 bytenr, u64 num_bytes, u64 parent,
2090                                u64 root_objectid, u64 ref_generation,
2091                                u64 owner_objectid, u64 owner_offset, int pin)
2092 {
2093         struct btrfs_root *extent_root = root->fs_info->extent_root;
2094         int pending_ret;
2095         int ret;
2096
2097         WARN_ON(num_bytes < root->sectorsize);
2098         if (root == extent_root) {
2099                 struct pending_extent_op *extent_op;
2100
2101                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2102                 BUG_ON(!extent_op);
2103
2104                 extent_op->type = PENDING_EXTENT_DELETE;
2105                 extent_op->bytenr = bytenr;
2106                 extent_op->num_bytes = num_bytes;
2107                 extent_op->parent = parent;
2108                 extent_op->orig_parent = parent;
2109                 extent_op->generation = ref_generation;
2110                 extent_op->orig_generation = ref_generation;
2111                 extent_op->level = (int)owner_objectid;
2112
2113                 set_extent_bits(&root->fs_info->pending_del,
2114                                 bytenr, bytenr + num_bytes - 1,
2115                                 EXTENT_LOCKED, GFP_NOFS);
2116                 set_state_private(&root->fs_info->pending_del,
2117                                   bytenr, (unsigned long)extent_op);
2118                 return 0;
2119         }
2120         /* if metadata always pin */
2121         if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
2122                 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2123                         struct btrfs_block_group_cache *cache;
2124
2125                         /* btrfs_free_reserved_extent */
2126                         cache = btrfs_lookup_block_group(root->fs_info, bytenr);
2127                         BUG_ON(!cache);
2128                         btrfs_add_free_space(cache, bytenr, num_bytes);
2129                         return 0;
2130                 }
2131                 pin = 1;
2132         }
2133
2134         /* if data pin when any transaction has committed this */
2135         if (ref_generation != trans->transid)
2136                 pin = 1;
2137
2138         ret = __free_extent(trans, root, bytenr, num_bytes, parent,
2139                             root_objectid, ref_generation, owner_objectid,
2140                             owner_offset, pin, pin == 0);
2141
2142         finish_current_insert(trans, root->fs_info->extent_root);
2143         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
2144         return ret ? ret : pending_ret;
2145 }
2146
2147 int btrfs_free_extent(struct btrfs_trans_handle *trans,
2148                       struct btrfs_root *root,
2149                       u64 bytenr, u64 num_bytes, u64 parent,
2150                       u64 root_objectid, u64 ref_generation,
2151                       u64 owner_objectid, u64 owner_offset, int pin)
2152 {
2153         int ret;
2154
2155         maybe_lock_mutex(root);
2156         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent,
2157                                   root_objectid, ref_generation,
2158                                   owner_objectid, owner_offset, pin);
2159         maybe_unlock_mutex(root);
2160         return ret;
2161 }
2162
2163 static u64 stripe_align(struct btrfs_root *root, u64 val)
2164 {
2165         u64 mask = ((u64)root->stripesize - 1);
2166         u64 ret = (val + mask) & ~mask;
2167         return ret;
2168 }
2169
2170 /*
2171  * walks the btree of allocated extents and find a hole of a given size.
2172  * The key ins is changed to record the hole:
2173  * ins->objectid == block start
2174  * ins->flags = BTRFS_EXTENT_ITEM_KEY
2175  * ins->offset == number of blocks
2176  * Any available blocks before search_start are skipped.
2177  */
2178 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
2179                                      struct btrfs_root *orig_root,
2180                                      u64 num_bytes, u64 empty_size,
2181                                      u64 search_start, u64 search_end,
2182                                      u64 hint_byte, struct btrfs_key *ins,
2183                                      u64 exclude_start, u64 exclude_nr,
2184                                      int data)
2185 {
2186         int ret;
2187         u64 orig_search_start;
2188         struct btrfs_root * root = orig_root->fs_info->extent_root;
2189         struct btrfs_fs_info *info = root->fs_info;
2190         u64 total_needed = num_bytes;
2191         u64 *last_ptr = NULL;
2192         struct btrfs_block_group_cache *block_group;
2193         int chunk_alloc_done = 0;
2194         int empty_cluster = 2 * 1024 * 1024;
2195         int allowed_chunk_alloc = 0;
2196
2197         WARN_ON(num_bytes < root->sectorsize);
2198         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
2199
2200         if (orig_root->ref_cows || empty_size)
2201                 allowed_chunk_alloc = 1;
2202
2203         if (data & BTRFS_BLOCK_GROUP_METADATA) {
2204                 last_ptr = &root->fs_info->last_alloc;
2205                 empty_cluster = 256 * 1024;
2206         }
2207
2208         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD))
2209                 last_ptr = &root->fs_info->last_data_alloc;
2210
2211         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2212                 last_ptr = &root->fs_info->last_log_alloc;
2213                 if (!last_ptr == 0 && root->fs_info->last_alloc) {
2214                         *last_ptr = root->fs_info->last_alloc + empty_cluster;
2215                 }
2216         }
2217
2218         if (last_ptr) {
2219                 if (*last_ptr)
2220                         hint_byte = *last_ptr;
2221                 else
2222                         empty_size += empty_cluster;
2223         }
2224
2225         search_start = max(search_start, first_logical_byte(root, 0));
2226         orig_search_start = search_start;
2227
2228         if (search_end == (u64)-1)
2229                 search_end = btrfs_super_total_bytes(&info->super_copy);
2230
2231         search_start = max(search_start, hint_byte);
2232         total_needed += empty_size;
2233
2234 new_group:
2235         block_group = btrfs_lookup_block_group(info, search_start);
2236
2237         /*
2238          * Ok this looks a little tricky, buts its really simple.  First if we
2239          * didn't find a block group obviously we want to start over.
2240          * Secondly, if the block group we found does not match the type we
2241          * need, and we have a last_ptr and its not 0, chances are the last
2242          * allocation we made was at the end of the block group, so lets go
2243          * ahead and skip the looking through the rest of the block groups and
2244          * start at the beginning.  This helps with metadata allocations,
2245          * since you are likely to have a bunch of data block groups to search
2246          * through first before you realize that you need to start over, so go
2247          * ahead and start over and save the time.
2248          */
2249         if (!block_group || (!block_group_bits(block_group, data) &&
2250                              last_ptr && *last_ptr)) {
2251                 if (search_start != orig_search_start) {
2252                         if (last_ptr && *last_ptr)
2253                                 *last_ptr = 0;
2254                         search_start = orig_search_start;
2255                         goto new_group;
2256                 } else if (!chunk_alloc_done && allowed_chunk_alloc) {
2257                         ret = do_chunk_alloc(trans, root,
2258                                              num_bytes + 2 * 1024 * 1024,
2259                                              data, 1);
2260                         if (ret < 0) {
2261                                 struct btrfs_space_info *info;
2262
2263                                 info = __find_space_info(root->fs_info, data);
2264                                 goto error;
2265                         }
2266                         BUG_ON(ret);
2267                         chunk_alloc_done = 1;
2268                         search_start = orig_search_start;
2269                         goto new_group;
2270                 } else {
2271                         ret = -ENOSPC;
2272                         goto error;
2273                 }
2274         }
2275
2276         /*
2277          * this is going to seach through all of the existing block groups it
2278          * can find, so if we don't find something we need to see if we can
2279          * allocate what we need.
2280          */
2281         ret = find_free_space(root, &block_group, &search_start,
2282                               total_needed, data);
2283         if (ret == -ENOSPC) {
2284                 /*
2285                  * instead of allocating, start at the original search start
2286                  * and see if there is something to be found, if not then we
2287                  * allocate
2288                  */
2289                 if (search_start != orig_search_start) {
2290                         if (last_ptr && *last_ptr) {
2291                                 *last_ptr = 0;
2292                                 total_needed += empty_cluster;
2293                         }
2294                         search_start = orig_search_start;
2295                         goto new_group;
2296                 }
2297
2298                 /*
2299                  * we've already allocated, we're pretty screwed
2300                  */
2301                 if (chunk_alloc_done) {
2302                         goto error;
2303                 } else if (!allowed_chunk_alloc && block_group &&
2304                            block_group_bits(block_group, data)) {
2305                         block_group->space_info->force_alloc = 1;
2306                         goto error;
2307                 } else if (!allowed_chunk_alloc) {
2308                         goto error;
2309                 }
2310
2311                 ret = do_chunk_alloc(trans, root, num_bytes + 2 * 1024 * 1024,
2312                                      data, 1);
2313                 if (ret < 0)
2314                         goto error;
2315
2316                 BUG_ON(ret);
2317                 chunk_alloc_done = 1;
2318                 if (block_group)
2319                         search_start = block_group->key.objectid +
2320                                 block_group->key.offset;
2321                 else
2322                         search_start = orig_search_start;
2323                 goto new_group;
2324         }
2325
2326         if (ret)
2327                 goto error;
2328
2329         search_start = stripe_align(root, search_start);
2330         ins->objectid = search_start;
2331         ins->offset = num_bytes;
2332
2333         if (ins->objectid + num_bytes >= search_end) {
2334                 search_start = orig_search_start;
2335                 if (chunk_alloc_done) {
2336                         ret = -ENOSPC;
2337                         goto error;
2338                 }
2339                 goto new_group;
2340         }
2341
2342         if (ins->objectid + num_bytes >
2343             block_group->key.objectid + block_group->key.offset) {
2344                 if (search_start == orig_search_start && chunk_alloc_done) {
2345                         ret = -ENOSPC;
2346                         goto error;
2347                 }
2348                 search_start = block_group->key.objectid +
2349                         block_group->key.offset;
2350                 goto new_group;
2351         }
2352
2353         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
2354             ins->objectid < exclude_start + exclude_nr)) {
2355                 search_start = exclude_start + exclude_nr;
2356                 goto new_group;
2357         }
2358
2359         if (!(data & BTRFS_BLOCK_GROUP_DATA))
2360                 trans->block_group = block_group;
2361
2362         ins->offset = num_bytes;
2363         if (last_ptr) {
2364                 *last_ptr = ins->objectid + ins->offset;
2365                 if (*last_ptr ==
2366                     btrfs_super_total_bytes(&root->fs_info->super_copy))
2367                         *last_ptr = 0;
2368         }
2369
2370         ret = 0;
2371 error:
2372         return ret;
2373 }
2374
2375 static void dump_space_info(struct btrfs_space_info *info, u64 bytes)
2376 {
2377         struct btrfs_block_group_cache *cache;
2378         struct list_head *l;
2379
2380         printk(KERN_INFO "space_info has %Lu free, is %sfull\n",
2381                info->total_bytes - info->bytes_used - info->bytes_pinned,
2382                (info->full) ? "" : "not ");
2383
2384         spin_lock(&info->lock);
2385         list_for_each(l, &info->block_groups) {
2386                 cache = list_entry(l, struct btrfs_block_group_cache, list);
2387                 spin_lock(&cache->lock);
2388                 printk(KERN_INFO "block group %Lu has %Lu bytes, %Lu used "
2389                        "%Lu pinned\n",
2390                        cache->key.objectid, cache->key.offset,
2391                        btrfs_block_group_used(&cache->item), cache->pinned);
2392                 btrfs_dump_free_space(cache, bytes);
2393                 spin_unlock(&cache->lock);
2394         }
2395         spin_unlock(&info->lock);
2396 }
2397 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2398                                   struct btrfs_root *root,
2399                                   u64 num_bytes, u64 min_alloc_size,
2400                                   u64 empty_size, u64 hint_byte,
2401                                   u64 search_end, struct btrfs_key *ins,
2402                                   u64 data)
2403 {
2404         int ret;
2405         u64 search_start = 0;
2406         u64 alloc_profile;
2407         struct btrfs_fs_info *info = root->fs_info;
2408         struct btrfs_block_group_cache *cache;
2409
2410         if (data) {
2411                 alloc_profile = info->avail_data_alloc_bits &
2412                                 info->data_alloc_profile;
2413                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
2414         } else if (root == root->fs_info->chunk_root) {
2415                 alloc_profile = info->avail_system_alloc_bits &
2416                                 info->system_alloc_profile;
2417                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
2418         } else {
2419                 alloc_profile = info->avail_metadata_alloc_bits &
2420                                 info->metadata_alloc_profile;
2421                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
2422         }
2423 again:
2424         data = reduce_alloc_profile(root, data);
2425         /*
2426          * the only place that sets empty_size is btrfs_realloc_node, which
2427          * is not called recursively on allocations
2428          */
2429         if (empty_size || root->ref_cows) {
2430                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
2431                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2432                                      2 * 1024 * 1024,
2433                                      BTRFS_BLOCK_GROUP_METADATA |
2434                                      (info->metadata_alloc_profile &
2435                                       info->avail_metadata_alloc_bits), 0);
2436                 }
2437                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2438                                      num_bytes + 2 * 1024 * 1024, data, 0);
2439         }
2440
2441         WARN_ON(num_bytes < root->sectorsize);
2442         ret = find_free_extent(trans, root, num_bytes, empty_size,
2443                                search_start, search_end, hint_byte, ins,
2444                                trans->alloc_exclude_start,
2445                                trans->alloc_exclude_nr, data);
2446
2447         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
2448                 num_bytes = num_bytes >> 1;
2449                 num_bytes = num_bytes & ~(root->sectorsize - 1);
2450                 num_bytes = max(num_bytes, min_alloc_size);
2451                 do_chunk_alloc(trans, root->fs_info->extent_root,
2452                                num_bytes, data, 1);
2453                 goto again;
2454         }
2455         if (ret) {
2456                 struct btrfs_space_info *sinfo;
2457
2458                 sinfo = __find_space_info(root->fs_info, data);
2459                 printk("allocation failed flags %Lu, wanted %Lu\n",
2460                        data, num_bytes);
2461                 dump_space_info(sinfo, num_bytes);
2462                 BUG();
2463         }
2464         cache = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2465         if (!cache) {
2466                 printk(KERN_ERR "Unable to find block group for %Lu\n", ins->objectid);
2467                 return -ENOSPC;
2468         }
2469
2470         ret = btrfs_remove_free_space(cache, ins->objectid, ins->offset);
2471
2472         return ret;
2473 }
2474
2475 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
2476 {
2477         struct btrfs_block_group_cache *cache;
2478
2479         maybe_lock_mutex(root);
2480         cache = btrfs_lookup_block_group(root->fs_info, start);
2481         if (!cache) {
2482                 printk(KERN_ERR "Unable to find block group for %Lu\n", start);
2483                 maybe_unlock_mutex(root);
2484                 return -ENOSPC;
2485         }
2486         btrfs_add_free_space(cache, start, len);
2487         maybe_unlock_mutex(root);
2488         return 0;
2489 }
2490
2491 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
2492                                   struct btrfs_root *root,
2493                                   u64 num_bytes, u64 min_alloc_size,
2494                                   u64 empty_size, u64 hint_byte,
2495                                   u64 search_end, struct btrfs_key *ins,
2496                                   u64 data)
2497 {
2498         int ret;
2499         maybe_lock_mutex(root);
2500         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
2501                                      empty_size, hint_byte, search_end, ins,
2502                                      data);
2503         maybe_unlock_mutex(root);
2504         return ret;
2505 }
2506
2507 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2508                                          struct btrfs_root *root, u64 parent,
2509                                          u64 root_objectid, u64 ref_generation,
2510                                          u64 owner, u64 owner_offset,
2511                                          struct btrfs_key *ins)
2512 {
2513         int ret;
2514         int pending_ret;
2515         u64 super_used;
2516         u64 root_used;
2517         u64 num_bytes = ins->offset;
2518         u32 sizes[2];
2519         struct btrfs_fs_info *info = root->fs_info;
2520         struct btrfs_root *extent_root = info->extent_root;
2521         struct btrfs_extent_item *extent_item;
2522         struct btrfs_extent_ref *ref;
2523         struct btrfs_path *path;
2524         struct btrfs_key keys[2];
2525
2526         if (parent == 0)
2527                 parent = ins->objectid;
2528
2529         /* block accounting for super block */
2530         spin_lock_irq(&info->delalloc_lock);
2531         super_used = btrfs_super_bytes_used(&info->super_copy);
2532         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2533         spin_unlock_irq(&info->delalloc_lock);
2534
2535         /* block accounting for root item */
2536         root_used = btrfs_root_used(&root->root_item);
2537         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2538
2539         if (root == extent_root) {
2540                 struct pending_extent_op *extent_op;
2541
2542                 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS);
2543                 BUG_ON(!extent_op);
2544
2545                 extent_op->type = PENDING_EXTENT_INSERT;
2546                 extent_op->bytenr = ins->objectid;
2547                 extent_op->num_bytes = ins->offset;
2548                 extent_op->parent = parent;
2549                 extent_op->orig_parent = 0;
2550                 extent_op->generation = ref_generation;
2551                 extent_op->orig_generation = 0;
2552                 extent_op->level = (int)owner;
2553
2554                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2555                                 ins->objectid + ins->offset - 1,
2556                                 EXTENT_LOCKED, GFP_NOFS);
2557                 set_state_private(&root->fs_info->extent_ins,
2558                                   ins->objectid, (unsigned long)extent_op);
2559                 goto update_block;
2560         }
2561
2562         memcpy(&keys[0], ins, sizeof(*ins));
2563         keys[1].objectid = ins->objectid;
2564         keys[1].type = BTRFS_EXTENT_REF_KEY;
2565         keys[1].offset = parent;
2566         sizes[0] = sizeof(*extent_item);
2567         sizes[1] = sizeof(*ref);
2568
2569         path = btrfs_alloc_path();
2570         BUG_ON(!path);
2571
2572         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2573                                        sizes, 2);
2574         BUG_ON(ret);
2575
2576         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2577                                      struct btrfs_extent_item);
2578         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2579         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2580                              struct btrfs_extent_ref);
2581
2582         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2583         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2584         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2585         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2586         btrfs_set_ref_num_refs(path->nodes[0], ref, 1);
2587
2588         btrfs_mark_buffer_dirty(path->nodes[0]);
2589
2590         trans->alloc_exclude_start = 0;
2591         trans->alloc_exclude_nr = 0;
2592         btrfs_free_path(path);
2593         finish_current_insert(trans, extent_root);
2594         pending_ret = del_pending_extents(trans, extent_root);
2595
2596         if (ret)
2597                 goto out;
2598         if (pending_ret) {
2599                 ret = pending_ret;
2600                 goto out;
2601         }
2602
2603 update_block:
2604         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2605         if (ret) {
2606                 printk("update block group failed for %Lu %Lu\n",
2607                        ins->objectid, ins->offset);
2608                 BUG();
2609         }
2610 out:
2611         return ret;
2612 }
2613
2614 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2615                                 struct btrfs_root *root, u64 parent,
2616                                 u64 root_objectid, u64 ref_generation,
2617                                 u64 owner, u64 owner_offset,
2618                                 struct btrfs_key *ins)
2619 {
2620         int ret;
2621
2622         if (root_objectid == BTRFS_TREE_LOG_OBJECTID)
2623                 return 0;
2624         maybe_lock_mutex(root);
2625         ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2626                                             root_objectid, ref_generation,
2627                                             owner, owner_offset, ins);
2628         maybe_unlock_mutex(root);
2629         return ret;
2630 }
2631
2632 /*
2633  * this is used by the tree logging recovery code.  It records that
2634  * an extent has been allocated and makes sure to clear the free
2635  * space cache bits as well
2636  */
2637 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans,
2638                                 struct btrfs_root *root, u64 parent,
2639                                 u64 root_objectid, u64 ref_generation,
2640                                 u64 owner, u64 owner_offset,
2641                                 struct btrfs_key *ins)
2642 {
2643         int ret;
2644         struct btrfs_block_group_cache *block_group;
2645
2646         maybe_lock_mutex(root);
2647         block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
2648         cache_block_group(root, block_group);
2649
2650         ret = btrfs_remove_free_space(block_group, ins->objectid, ins->offset);
2651         BUG_ON(ret);
2652         ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2653                                             root_objectid, ref_generation,
2654                                             owner, owner_offset, ins);
2655         maybe_unlock_mutex(root);
2656         return ret;
2657 }
2658
2659 /*
2660  * finds a free extent and does all the dirty work required for allocation
2661  * returns the key for the extent through ins, and a tree buffer for
2662  * the first block of the extent through buf.
2663  *
2664  * returns 0 if everything worked, non-zero otherwise.
2665  */
2666 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2667                        struct btrfs_root *root,
2668                        u64 num_bytes, u64 parent, u64 min_alloc_size,
2669                        u64 root_objectid, u64 ref_generation,
2670                        u64 owner_objectid, u64 owner_offset,
2671                        u64 empty_size, u64 hint_byte,
2672                        u64 search_end, struct btrfs_key *ins, u64 data)
2673 {
2674         int ret;
2675
2676         maybe_lock_mutex(root);
2677
2678         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2679                                      min_alloc_size, empty_size, hint_byte,
2680                                      search_end, ins, data);
2681         BUG_ON(ret);
2682         if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
2683                 ret = __btrfs_alloc_reserved_extent(trans, root, parent,
2684                                         root_objectid, ref_generation,
2685                                         owner_objectid, owner_offset, ins);
2686                 BUG_ON(ret);
2687
2688         }
2689         maybe_unlock_mutex(root);
2690         return ret;
2691 }
2692
2693 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
2694                                             struct btrfs_root *root,
2695                                             u64 bytenr, u32 blocksize)
2696 {
2697         struct extent_buffer *buf;
2698
2699         buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
2700         if (!buf)
2701                 return ERR_PTR(-ENOMEM);
2702         btrfs_set_header_generation(buf, trans->transid);
2703         btrfs_tree_lock(buf);
2704         clean_tree_block(trans, root, buf);
2705         btrfs_set_buffer_uptodate(buf);
2706         if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
2707                 set_extent_dirty(&root->dirty_log_pages, buf->start,
2708                          buf->start + buf->len - 1, GFP_NOFS);
2709         } else {
2710                 set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2711                          buf->start + buf->len - 1, GFP_NOFS);
2712         }
2713         trans->blocks_used++;
2714         return buf;
2715 }
2716
2717 /*
2718  * helper function to allocate a block for a given tree
2719  * returns the tree buffer or NULL.
2720  */
2721 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2722                                              struct btrfs_root *root,
2723                                              u32 blocksize, u64 parent,
2724                                              u64 root_objectid,
2725                                              u64 ref_generation,
2726                                              int level,
2727                                              u64 hint,
2728                                              u64 empty_size)
2729 {
2730         struct btrfs_key ins;
2731         int ret;
2732         struct extent_buffer *buf;
2733
2734         ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize,
2735                                  root_objectid, ref_generation, level, 0,
2736                                  empty_size, hint, (u64)-1, &ins, 0);
2737         if (ret) {
2738                 BUG_ON(ret > 0);
2739                 return ERR_PTR(ret);
2740         }
2741
2742         buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize);
2743         return buf;
2744 }
2745
2746 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
2747                         struct btrfs_root *root, struct extent_buffer *leaf)
2748 {
2749         u64 leaf_owner;
2750         u64 leaf_generation;
2751         struct btrfs_key key;
2752         struct btrfs_file_extent_item *fi;
2753         int i;
2754         int nritems;
2755         int ret;
2756
2757         BUG_ON(!btrfs_is_leaf(leaf));
2758         nritems = btrfs_header_nritems(leaf);
2759         leaf_owner = btrfs_header_owner(leaf);
2760         leaf_generation = btrfs_header_generation(leaf);
2761
2762         for (i = 0; i < nritems; i++) {
2763                 u64 disk_bytenr;
2764                 cond_resched();
2765
2766                 btrfs_item_key_to_cpu(leaf, &key, i);
2767                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2768                         continue;
2769                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2770                 if (btrfs_file_extent_type(leaf, fi) ==
2771                     BTRFS_FILE_EXTENT_INLINE)
2772                         continue;
2773                 /*
2774                  * FIXME make sure to insert a trans record that
2775                  * repeats the snapshot del on crash
2776                  */
2777                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2778                 if (disk_bytenr == 0)
2779                         continue;
2780
2781                 mutex_lock(&root->fs_info->alloc_mutex);
2782                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2783                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2784                                 leaf->start, leaf_owner, leaf_generation,
2785                                 key.objectid, key.offset, 0);
2786                 mutex_unlock(&root->fs_info->alloc_mutex);
2787                 BUG_ON(ret);
2788
2789                 atomic_inc(&root->fs_info->throttle_gen);
2790                 wake_up(&root->fs_info->transaction_throttle);
2791                 cond_resched();
2792         }
2793         return 0;
2794 }
2795
2796 static int noinline cache_drop_leaf_ref(struct btrfs_trans_handle *trans,
2797                                         struct btrfs_root *root,
2798                                         struct btrfs_leaf_ref *ref)
2799 {
2800         int i;
2801         int ret;
2802         struct btrfs_extent_info *info = ref->extents;
2803
2804         for (i = 0; i < ref->nritems; i++) {
2805                 mutex_lock(&root->fs_info->alloc_mutex);
2806                 ret = __btrfs_free_extent(trans, root, info->bytenr,
2807                                           info->num_bytes, ref->bytenr,
2808                                           ref->owner, ref->generation,
2809                                           info->objectid, info->offset, 0);
2810                 mutex_unlock(&root->fs_info->alloc_mutex);
2811
2812                 atomic_inc(&root->fs_info->throttle_gen);
2813                 wake_up(&root->fs_info->transaction_throttle);
2814                 cond_resched();
2815
2816                 BUG_ON(ret);
2817                 info++;
2818         }
2819
2820         return 0;
2821 }
2822
2823 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2824                               u32 *refs)
2825 {
2826         int ret;
2827
2828         ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs);
2829         BUG_ON(ret);
2830
2831 #if 0 // some debugging code in case we see problems here
2832         /* if the refs count is one, it won't get increased again.  But
2833          * if the ref count is > 1, someone may be decreasing it at
2834          * the same time we are.
2835          */
2836         if (*refs != 1) {
2837                 struct extent_buffer *eb = NULL;
2838                 eb = btrfs_find_create_tree_block(root, start, len);
2839                 if (eb)
2840                         btrfs_tree_lock(eb);
2841
2842                 mutex_lock(&root->fs_info->alloc_mutex);
2843                 ret = lookup_extent_ref(NULL, root, start, len, refs);
2844                 BUG_ON(ret);
2845                 mutex_unlock(&root->fs_info->alloc_mutex);
2846
2847                 if (eb) {
2848                         btrfs_tree_unlock(eb);
2849                         free_extent_buffer(eb);
2850                 }
2851                 if (*refs == 1) {
2852                         printk("block %llu went down to one during drop_snap\n",
2853                                (unsigned long long)start);
2854                 }
2855
2856         }
2857 #endif
2858
2859         cond_resched();
2860         return ret;
2861 }
2862
2863 /*
2864  * helper function for drop_snapshot, this walks down the tree dropping ref
2865  * counts as it goes.
2866  */
2867 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2868                                    struct btrfs_root *root,
2869                                    struct btrfs_path *path, int *level)
2870 {
2871         u64 root_owner;
2872         u64 root_gen;
2873         u64 bytenr;
2874         u64 ptr_gen;
2875         struct extent_buffer *next;
2876         struct extent_buffer *cur;
2877         struct extent_buffer *parent;
2878         struct btrfs_leaf_ref *ref;
2879         u32 blocksize;
2880         int ret;
2881         u32 refs;
2882
2883         WARN_ON(*level < 0);
2884         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2885         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2886                                 path->nodes[*level]->len, &refs);
2887         BUG_ON(ret);
2888         if (refs > 1)
2889                 goto out;
2890
2891         /*
2892          * walk down to the last node level and free all the leaves
2893          */
2894         while(*level >= 0) {
2895                 WARN_ON(*level < 0);
2896                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2897                 cur = path->nodes[*level];
2898
2899                 if (btrfs_header_level(cur) != *level)
2900                         WARN_ON(1);
2901
2902                 if (path->slots[*level] >=
2903                     btrfs_header_nritems(cur))
2904                         break;
2905                 if (*level == 0) {
2906                         ret = btrfs_drop_leaf_ref(trans, root, cur);
2907                         BUG_ON(ret);
2908                         break;
2909                 }
2910                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2911                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2912                 blocksize = btrfs_level_size(root, *level - 1);
2913
2914                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2915                 BUG_ON(ret);
2916                 if (refs != 1) {
2917                         parent = path->nodes[*level];
2918                         root_owner = btrfs_header_owner(parent);
2919                         root_gen = btrfs_header_generation(parent);
2920                         path->slots[*level]++;
2921
2922                         mutex_lock(&root->fs_info->alloc_mutex);
2923                         ret = __btrfs_free_extent(trans, root, bytenr,
2924                                                 blocksize, parent->start,
2925                                                 root_owner, root_gen, 0, 0, 1);
2926                         BUG_ON(ret);
2927                         mutex_unlock(&root->fs_info->alloc_mutex);
2928
2929                         atomic_inc(&root->fs_info->throttle_gen);
2930                         wake_up(&root->fs_info->transaction_throttle);
2931                         cond_resched();
2932
2933                         continue;
2934                 }
2935                 /*
2936                  * at this point, we have a single ref, and since the
2937                  * only place referencing this extent is a dead root
2938                  * the reference count should never go higher.
2939                  * So, we don't need to check it again
2940                  */
2941                 if (*level == 1) {
2942                         ref = btrfs_lookup_leaf_ref(root, bytenr);
2943                         if (ref) {
2944                                 ret = cache_drop_leaf_ref(trans, root, ref);
2945                                 BUG_ON(ret);
2946                                 btrfs_remove_leaf_ref(root, ref);
2947                                 btrfs_free_leaf_ref(root, ref);
2948                                 *level = 0;
2949                                 break;
2950                         }
2951                         if (printk_ratelimit())
2952                                 printk("leaf ref miss for bytenr %llu\n",
2953                                        (unsigned long long)bytenr);
2954                 }
2955                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2956                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2957                         free_extent_buffer(next);
2958
2959                         next = read_tree_block(root, bytenr, blocksize,
2960                                                ptr_gen);
2961                         cond_resched();
2962 #if 0
2963                         /*
2964                          * this is a debugging check and can go away
2965                          * the ref should never go all the way down to 1
2966                          * at this point
2967                          */
2968                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2969                                                 &refs);
2970                         BUG_ON(ret);
2971                         WARN_ON(refs != 1);
2972 #endif
2973                 }
2974                 WARN_ON(*level <= 0);
2975                 if (path->nodes[*level-1])
2976                         free_extent_buffer(path->nodes[*level-1]);
2977                 path->nodes[*level-1] = next;
2978                 *level = btrfs_header_level(next);
2979                 path->slots[*level] = 0;
2980                 cond_resched();
2981         }
2982 out:
2983         WARN_ON(*level < 0);
2984         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2985
2986         if (path->nodes[*level] == root->node) {
2987                 parent = path->nodes[*level];
2988                 bytenr = path->nodes[*level]->start;
2989         } else {
2990                 parent = path->nodes[*level + 1];
2991                 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]);
2992         }
2993
2994         blocksize = btrfs_level_size(root, *level);
2995         root_owner = btrfs_header_owner(parent);
2996         root_gen = btrfs_header_generation(parent);
2997
2998         mutex_lock(&root->fs_info->alloc_mutex);
2999         ret = __btrfs_free_extent(trans, root, bytenr, blocksize,
3000                                   parent->start, root_owner, root_gen,
3001                                   0, 0, 1);
3002         mutex_unlock(&root->fs_info->alloc_mutex);
3003         free_extent_buffer(path->nodes[*level]);
3004         path->nodes[*level] = NULL;
3005         *level += 1;
3006         BUG_ON(ret);
3007
3008         cond_resched();
3009         return 0;
3010 }
3011
3012 /*
3013  * helper for dropping snapshots.  This walks back up the tree in the path
3014  * to find the first node higher up where we haven't yet gone through
3015  * all the slots
3016  */
3017 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
3018                                  struct btrfs_root *root,
3019                                  struct btrfs_path *path, int *level)
3020 {
3021         u64 root_owner;
3022         u64 root_gen;
3023         struct btrfs_root_item *root_item = &root->root_item;
3024         int i;
3025         int slot;
3026         int ret;
3027
3028         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
3029                 slot = path->slots[i];
3030                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
3031                         struct extent_buffer *node;
3032                         struct btrfs_disk_key disk_key;
3033                         node = path->nodes[i];
3034                         path->slots[i]++;
3035                         *level = i;
3036                         WARN_ON(*level == 0);
3037                         btrfs_node_key(node, &disk_key, path->slots[i]);
3038                         memcpy(&root_item->drop_progress,
3039                                &disk_key, sizeof(disk_key));
3040                         root_item->drop_level = i;
3041                         return 0;
3042                 } else {
3043                         struct extent_buffer *parent;
3044                         if (path->nodes[*level] == root->node)
3045                                 parent = path->nodes[*level];
3046                         else
3047                                 parent = path->nodes[*level + 1];
3048
3049                         root_owner = btrfs_header_owner(parent);
3050                         root_gen = btrfs_header_generation(parent);
3051                         ret = btrfs_free_extent(trans, root,
3052                                                 path->nodes[*level]->start,
3053                                                 path->nodes[*level]->len,
3054                                                 parent->start,
3055                                                 root_owner, root_gen, 0, 0, 1);
3056                         BUG_ON(ret);
3057                         free_extent_buffer(path->nodes[*level]);
3058                         path->nodes[*level] = NULL;
3059                         *level = i + 1;
3060                 }
3061         }
3062         return 1;
3063 }
3064
3065 /*
3066  * drop the reference count on the tree rooted at 'snap'.  This traverses
3067  * the tree freeing any blocks that have a ref count of zero after being
3068  * decremented.
3069  */
3070 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
3071                         *root)
3072 {
3073         int ret = 0;
3074         int wret;
3075         int level;
3076         struct btrfs_path *path;
3077         int i;
3078         int orig_level;
3079         struct btrfs_root_item *root_item = &root->root_item;
3080
3081         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
3082         path = btrfs_alloc_path();
3083         BUG_ON(!path);
3084
3085         level = btrfs_header_level(root->node);
3086         orig_level = level;
3087         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
3088                 path->nodes[level] = root->node;
3089                 extent_buffer_get(root->node);
3090                 path->slots[level] = 0;
3091         } else {
3092                 struct btrfs_key key;
3093                 struct btrfs_disk_key found_key;
3094                 struct extent_buffer *node;
3095
3096                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
3097                 level = root_item->drop_level;
3098                 path->lowest_level = level;
3099                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3100                 if (wret < 0) {
3101                         ret = wret;
3102                         goto out;
3103                 }
3104                 node = path->nodes[level];
3105                 btrfs_node_key(node, &found_key, path->slots[level]);
3106                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
3107                                sizeof(found_key)));
3108                 /*
3109                  * unlock our path, this is safe because only this
3110                  * function is allowed to delete this snapshot
3111                  */
3112                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
3113                         if (path->nodes[i] && path->locks[i]) {
3114                                 path->locks[i] = 0;
3115                                 btrfs_tree_unlock(path->nodes[i]);
3116                         }
3117                 }
3118         }
3119         while(1) {
3120                 wret = walk_down_tree(trans, root, path, &level);
3121                 if (wret > 0)
3122                         break;
3123                 if (wret < 0)
3124                         ret = wret;
3125
3126                 wret = walk_up_tree(trans, root, path, &level);
3127                 if (wret > 0)
3128                         break;
3129                 if (wret < 0)
3130                         ret = wret;
3131                 if (trans->transaction->in_commit) {
3132                         ret = -EAGAIN;
3133                         break;
3134                 }
3135                 atomic_inc(&root->fs_info->throttle_gen);
3136                 wake_up(&root->fs_info->transaction_throttle);
3137         }
3138         for (i = 0; i <= orig_level; i++) {
3139                 if (path->nodes[i]) {
3140                         free_extent_buffer(path->nodes[i]);
3141                         path->nodes[i] = NULL;
3142                 }
3143         }
3144 out:
3145         btrfs_free_path(path);
3146         return ret;
3147 }
3148
3149 int btrfs_free_block_groups(struct btrfs_fs_info *info)
3150 {
3151         struct btrfs_block_group_cache *block_group;
3152         struct rb_node *n;
3153
3154         mutex_lock(&info->alloc_mutex);
3155         spin_lock(&info->block_group_cache_lock);
3156         while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
3157                 block_group = rb_entry(n, struct btrfs_block_group_cache,
3158                                        cache_node);
3159
3160                 btrfs_remove_free_space_cache(block_group);
3161                 rb_erase(&block_group->cache_node,
3162                          &info->block_group_cache_tree);
3163                 spin_lock(&block_group->space_info->lock);
3164                 list_del(&block_group->list);
3165                 spin_unlock(&block_group->space_info->lock);
3166                 kfree(block_group);
3167         }
3168         spin_unlock(&info->block_group_cache_lock);
3169         mutex_unlock(&info->alloc_mutex);
3170         return 0;
3171 }
3172
3173 static unsigned long calc_ra(unsigned long start, unsigned long last,
3174                              unsigned long nr)
3175 {
3176         return min(last, start + nr - 1);
3177 }
3178
3179 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
3180                                          u64 len)
3181 {
3182         u64 page_start;
3183         u64 page_end;
3184         unsigned long last_index;
3185         unsigned long i;
3186         struct page *page;
3187         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3188         struct file_ra_state *ra;
3189         unsigned long total_read = 0;
3190         unsigned long ra_pages;
3191         struct btrfs_ordered_extent *ordered;
3192         struct btrfs_trans_handle *trans;
3193
3194         ra = kzalloc(sizeof(*ra), GFP_NOFS);
3195
3196         mutex_lock(&inode->i_mutex);
3197         i = start >> PAGE_CACHE_SHIFT;
3198         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
3199
3200         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
3201
3202         file_ra_state_init(ra, inode->i_mapping);
3203
3204         for (; i <= last_index; i++) {
3205                 if (total_read % ra_pages == 0) {
3206                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
3207                                        calc_ra(i, last_index, ra_pages));
3208                 }
3209                 total_read++;
3210 again:
3211                 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode))
3212                         goto truncate_racing;
3213                 page = grab_cache_page(inode->i_mapping, i);
3214                 if (!page) {
3215                         goto out_unlock;
3216                 }
3217                 if (!PageUptodate(page)) {
3218                         btrfs_readpage(NULL, page);
3219                         lock_page(page);
3220                         if (!PageUptodate(page)) {
3221                                 unlock_page(page);
3222                                 page_cache_release(page);
3223                                 goto out_unlock;
3224                         }
3225                 }
3226                 wait_on_page_writeback(page);
3227
3228                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
3229                 page_end = page_start + PAGE_CACHE_SIZE - 1;
3230                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
3231
3232                 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3233                 if (ordered) {
3234                         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3235                         unlock_page(page);
3236                         page_cache_release(page);
3237                         btrfs_start_ordered_extent(inode, ordered, 1);
3238                         btrfs_put_ordered_extent(ordered);
3239                         goto again;
3240                 }
3241                 set_page_extent_mapped(page);
3242
3243                 /*
3244                  * make sure page_mkwrite is called for this page if userland
3245                  * wants to change it from mmap
3246                  */
3247                 clear_page_dirty_for_io(page);
3248
3249                 btrfs_set_extent_delalloc(inode, page_start, page_end);
3250                 set_page_dirty(page);
3251
3252                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
3253                 unlock_page(page);
3254                 page_cache_release(page);
3255         }
3256
3257 out_unlock:
3258         /* we have to start the IO in order to get the ordered extents
3259          * instantiated.  This allows the relocation to code to wait
3260          * for all the ordered extents to hit the disk.
3261          *
3262          * Otherwise, it would constantly loop over the same extents
3263          * because the old ones don't get deleted  until the IO is
3264          * started
3265          */
3266         btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1,
3267                                WB_SYNC_NONE);
3268         kfree(ra);
3269         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
3270         if (trans) {
3271                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
3272                 mark_inode_dirty(inode);
3273         }
3274         mutex_unlock(&inode->i_mutex);
3275         return 0;
3276
3277 truncate_racing:
3278         vmtruncate(inode, inode->i_size);
3279         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
3280                                            total_read);
3281         goto out_unlock;
3282 }
3283
3284 /*
3285  * The back references tell us which tree holds a ref on a block,
3286  * but it is possible for the tree root field in the reference to
3287  * reflect the original root before a snapshot was made.  In this
3288  * case we should search through all the children of a given root
3289  * to find potential holders of references on a block.
3290  *
3291  * Instead, we do something a little less fancy and just search
3292  * all the roots for a given key/block combination.
3293  */
3294 static int find_root_for_ref(struct btrfs_root *root,
3295                              struct btrfs_path *path,
3296                              struct btrfs_key *key0,
3297                              int level,
3298                              int file_key,
3299                              struct btrfs_root **found_root,
3300                              u64 bytenr)
3301 {
3302         struct btrfs_key root_location;
3303         struct btrfs_root *cur_root = *found_root;
3304         struct btrfs_file_extent_item *file_extent;
3305         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
3306         u64 found_bytenr;
3307         int ret;
3308
3309         root_location.offset = (u64)-1;
3310         root_location.type = BTRFS_ROOT_ITEM_KEY;
3311         path->lowest_level = level;
3312         path->reada = 0;
3313         while(1) {
3314                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
3315                 found_bytenr = 0;
3316                 if (ret == 0 && file_key) {
3317                         struct extent_buffer *leaf = path->nodes[0];
3318                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
3319                                              struct btrfs_file_extent_item);
3320                         if (btrfs_file_extent_type(leaf, file_extent) ==
3321                             BTRFS_FILE_EXTENT_REG) {
3322                                 found_bytenr =
3323                                         btrfs_file_extent_disk_bytenr(leaf,
3324                                                                file_extent);
3325                        }
3326                 } else if (!file_key) {
3327                         if (path->nodes[level])
3328                                 found_bytenr = path->nodes[level]->start;
3329                 }
3330
3331                 btrfs_release_path(cur_root, path);
3332
3333                 if (found_bytenr == bytenr) {
3334                         *found_root = cur_root;
3335                         ret = 0;
3336                         goto out;
3337                 }
3338                 ret = btrfs_search_root(root->fs_info->tree_root,
3339                                         root_search_start, &root_search_start);
3340                 if (ret)
3341                         break;
3342
3343                 root_location.objectid = root_search_start;
3344                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
3345                                                       &root_location);
3346                 if (!cur_root) {
3347                         ret = 1;
3348                         break;
3349                 }
3350         }
3351 out:
3352         path->lowest_level = 0;
3353         return ret;
3354 }
3355
3356 /*
3357  * note, this releases the path
3358  */
3359 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
3360                                   struct btrfs_path *path,
3361                                   struct btrfs_key *extent_key,
3362                                   u64 *last_file_objectid,
3363                                   u64 *last_file_offset,
3364                                   u64 *last_file_root,
3365                                   u64 last_extent)
3366 {
3367         struct inode *inode;
3368         struct btrfs_root *found_root;
3369         struct btrfs_key root_location;
3370         struct btrfs_key found_key;
3371         struct btrfs_extent_ref *ref;
3372         u64 ref_root;
3373         u64 ref_gen;
3374         u64 ref_objectid;
3375         u64 ref_offset;
3376         int ret;
3377         int level;
3378
3379         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
3380
3381         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
3382                              struct btrfs_extent_ref);
3383         ref_root = btrfs_ref_root(path->nodes[0], ref);
3384         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
3385         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
3386         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
3387         btrfs_release_path(extent_root, path);
3388
3389         root_location.objectid = ref_root;
3390         if (ref_gen == 0)
3391                 root_location.offset = 0;
3392         else
3393                 root_location.offset = (u64)-1;
3394         root_location.type = BTRFS_ROOT_ITEM_KEY;
3395
3396         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
3397                                                 &root_location);
3398         BUG_ON(!found_root);
3399         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3400
3401         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
3402                 found_key.objectid = ref_objectid;
3403                 found_key.type = BTRFS_EXTENT_DATA_KEY;
3404                 found_key.offset = ref_offset;
3405                 level = 0;
3406
3407                 if (last_extent == extent_key->objectid &&
3408                     *last_file_objectid == ref_objectid &&
3409                     *last_file_offset == ref_offset &&
3410                     *last_file_root == ref_root)
3411                         goto out;
3412
3413                 ret = find_root_for_ref(extent_root, path, &found_key,
3414                                         level, 1, &found_root,
3415                                         extent_key->objectid);
3416
3417                 if (ret)
3418                         goto out;
3419
3420                 if (last_extent == extent_key->objectid &&
3421                     *last_file_objectid == ref_objectid &&
3422                     *last_file_offset == ref_offset &&
3423                     *last_file_root == ref_root)
3424                         goto out;
3425
3426                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
3427                                           ref_objectid, found_root);
3428                 if (inode->i_state & I_NEW) {
3429                         /* the inode and parent dir are two different roots */
3430                         BTRFS_I(inode)->root = found_root;
3431                         BTRFS_I(inode)->location.objectid = ref_objectid;
3432                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
3433                         BTRFS_I(inode)->location.offset = 0;
3434                         btrfs_read_locked_inode(inode);
3435                         unlock_new_inode(inode);
3436
3437                 }
3438                 /* this can happen if the reference is not against
3439                  * the latest version of the tree root
3440                  */
3441                 if (is_bad_inode(inode))
3442                         goto out;
3443
3444                 *last_file_objectid = inode->i_ino;
3445                 *last_file_root = found_root->root_key.objectid;
3446                 *last_file_offset = ref_offset;
3447
3448                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
3449                 iput(inode);
3450         } else {
3451                 struct btrfs_trans_handle *trans;
3452                 struct extent_buffer *eb;
3453                 int needs_lock = 0;
3454
3455                 eb = read_tree_block(found_root, extent_key->objectid,
3456                                      extent_key->offset, 0);
3457                 btrfs_tree_lock(eb);
3458                 level = btrfs_header_level(eb);
3459
3460                 if (level == 0)
3461                         btrfs_item_key_to_cpu(eb, &found_key, 0);
3462                 else
3463                         btrfs_node_key_to_cpu(eb, &found_key, 0);
3464
3465                 btrfs_tree_unlock(eb);
3466                 free_extent_buffer(eb);
3467
3468                 ret = find_root_for_ref(extent_root, path, &found_key,
3469                                         level, 0, &found_root,
3470                                         extent_key->objectid);
3471
3472                 if (ret)
3473                         goto out;
3474
3475                 /*
3476                  * right here almost anything could happen to our key,
3477                  * but that's ok.  The cow below will either relocate it
3478                  * or someone else will have relocated it.  Either way,
3479                  * it is in a different spot than it was before and
3480                  * we're happy.
3481                  */
3482
3483                 trans = btrfs_start_transaction(found_root, 1);
3484
3485                 if (found_root == extent_root->fs_info->extent_root ||
3486                     found_root == extent_root->fs_info->chunk_root ||
3487                     found_root == extent_root->fs_info->dev_root) {
3488                         needs_lock = 1;
3489                         mutex_lock(&extent_root->fs_info->alloc_mutex);
3490                 }
3491
3492                 path->lowest_level = level;
3493                 path->reada = 2;
3494                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
3495                                         0, 1);
3496                 path->lowest_level = 0;
3497                 btrfs_release_path(found_root, path);
3498
3499                 if (found_root == found_root->fs_info->extent_root)
3500                         btrfs_extent_post_op(trans, found_root);
3501                 if (needs_lock)
3502                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
3503
3504                 btrfs_end_transaction(trans, found_root);
3505
3506         }
3507 out:
3508         mutex_lock(&extent_root->fs_info->alloc_mutex);
3509         return 0;
3510 }
3511
3512 static int noinline del_extent_zero(struct btrfs_root *extent_root,
3513                                     struct btrfs_path *path,
3514                                     struct btrfs_key *extent_key)
3515 {
3516         int ret;
3517         struct btrfs_trans_handle *trans;
3518
3519         trans = btrfs_start_transaction(extent_root, 1);
3520         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
3521         if (ret > 0) {
3522                 ret = -EIO;
3523                 goto out;
3524         }
3525         if (ret < 0)
3526                 goto out;
3527         ret = btrfs_del_item(trans, extent_root, path);
3528 out:
3529         btrfs_end_transaction(trans, extent_root);
3530         return ret;
3531 }
3532
3533 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
3534                                         struct btrfs_path *path,
3535                                         struct btrfs_key *extent_key)
3536 {
3537         struct btrfs_key key;
3538         struct btrfs_key found_key;
3539         struct extent_buffer *leaf;
3540         u64 last_file_objectid = 0;
3541         u64 last_file_root = 0;
3542         u64 last_file_offset = (u64)-1;
3543         u64 last_extent = 0;
3544         u32 nritems;
3545         u32 item_size;
3546         int ret = 0;
3547
3548         if (extent_key->objectid == 0) {
3549                 ret = del_extent_zero(extent_root, path, extent_key);
3550                 goto out;
3551         }
3552         key.objectid = extent_key->objectid;
3553         key.type = BTRFS_EXTENT_REF_KEY;
3554         key.offset = 0;
3555
3556         while(1) {
3557                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
3558
3559                 if (ret < 0)
3560                         goto out;
3561
3562                 ret = 0;
3563                 leaf = path->nodes[0];
3564                 nritems = btrfs_header_nritems(leaf);
3565                 if (path->slots[0] == nritems) {
3566                         ret = btrfs_next_leaf(extent_root, path);
3567                         if (ret > 0) {
3568                                 ret = 0;
3569                                 goto out;
3570                         }
3571                         if (ret < 0)
3572                                 goto out;
3573                         leaf = path->nodes[0];
3574                 }
3575
3576                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3577                 if (found_key.objectid != extent_key->objectid) {
3578                         break;
3579                 }
3580
3581                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
3582                         break;
3583                 }
3584
3585                 key.offset = found_key.offset + 1;
3586                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3587
3588                 ret = relocate_one_reference(extent_root, path, extent_key,
3589                                              &last_file_objectid,
3590                                              &last_file_offset,
3591                                              &last_file_root, last_extent);
3592                 if (ret)
3593                         goto out;
3594                 last_extent = extent_key->objectid;
3595         }
3596         ret = 0;
3597 out:
3598         btrfs_release_path(extent_root, path);
3599         return ret;
3600 }
3601
3602 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
3603 {
3604         u64 num_devices;
3605         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
3606                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
3607
3608         num_devices = root->fs_info->fs_devices->num_devices;
3609         if (num_devices == 1) {
3610                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3611                 stripped = flags & ~stripped;
3612
3613                 /* turn raid0 into single device chunks */
3614                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
3615                         return stripped;
3616
3617                 /* turn mirroring into duplication */
3618                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3619                              BTRFS_BLOCK_GROUP_RAID10))
3620                         return stripped | BTRFS_BLOCK_GROUP_DUP;
3621                 return flags;
3622         } else {
3623                 /* they already had raid on here, just return */
3624                 if (flags & stripped)
3625                         return flags;
3626
3627                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3628                 stripped = flags & ~stripped;
3629
3630                 /* switch duplicated blocks with raid1 */
3631                 if (flags & BTRFS_BLOCK_GROUP_DUP)
3632                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
3633
3634                 /* turn single device chunks into raid0 */
3635                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3636         }
3637         return flags;
3638 }
3639
3640 int __alloc_chunk_for_shrink(struct btrfs_root *root,
3641                      struct btrfs_block_group_cache *shrink_block_group,
3642                      int force)
3643 {
3644         struct btrfs_trans_handle *trans;
3645         u64 new_alloc_flags;
3646         u64 calc;
3647
3648         spin_lock(&shrink_block_group->lock);
3649         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
3650                 spin_unlock(&shrink_block_group->lock);
3651                 mutex_unlock(&root->fs_info->alloc_mutex);
3652
3653                 trans = btrfs_start_transaction(root, 1);
3654                 mutex_lock(&root->fs_info->alloc_mutex);
3655                 spin_lock(&shrink_block_group->lock);
3656
3657                 new_alloc_flags = update_block_group_flags(root,
3658                                                    shrink_block_group->flags);
3659                 if (new_alloc_flags != shrink_block_group->flags) {
3660                         calc =
3661                              btrfs_block_group_used(&shrink_block_group->item);
3662                 } else {
3663                         calc = shrink_block_group->key.offset;
3664                 }
3665                 spin_unlock(&shrink_block_group->lock);
3666
3667                 do_chunk_alloc(trans, root->fs_info->extent_root,
3668                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
3669
3670                 mutex_unlock(&root->fs_info->alloc_mutex);
3671                 btrfs_end_transaction(trans, root);
3672                 mutex_lock(&root->fs_info->alloc_mutex);
3673         } else
3674                 spin_unlock(&shrink_block_group->lock);
3675         return 0;
3676 }
3677
3678 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3679 {
3680         struct btrfs_trans_handle *trans;
3681         struct btrfs_root *tree_root = root->fs_info->tree_root;
3682         struct btrfs_path *path;
3683         u64 cur_byte;
3684         u64 total_found;
3685         u64 shrink_last_byte;
3686         struct btrfs_block_group_cache *shrink_block_group;
3687         struct btrfs_key key;
3688         struct btrfs_key found_key;
3689         struct extent_buffer *leaf;
3690         u32 nritems;
3691         int ret;
3692         int progress;
3693
3694         mutex_lock(&root->fs_info->alloc_mutex);
3695         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3696                                                       shrink_start);
3697         BUG_ON(!shrink_block_group);
3698
3699         shrink_last_byte = shrink_block_group->key.objectid +
3700                 shrink_block_group->key.offset;
3701
3702         shrink_block_group->space_info->total_bytes -=
3703                 shrink_block_group->key.offset;
3704         path = btrfs_alloc_path();
3705         root = root->fs_info->extent_root;
3706         path->reada = 2;
3707
3708         printk("btrfs relocating block group %llu flags %llu\n",
3709                (unsigned long long)shrink_start,
3710                (unsigned long long)shrink_block_group->flags);
3711
3712         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3713
3714 again:
3715
3716         shrink_block_group->ro = 1;
3717
3718         total_found = 0;
3719         progress = 0;
3720         key.objectid = shrink_start;
3721         key.offset = 0;
3722         key.type = 0;
3723         cur_byte = key.objectid;
3724
3725         mutex_unlock(&root->fs_info->alloc_mutex);
3726
3727         btrfs_start_delalloc_inodes(root);
3728         btrfs_wait_ordered_extents(tree_root, 0);
3729
3730         mutex_lock(&root->fs_info->alloc_mutex);
3731
3732         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3733         if (ret < 0)
3734                 goto out;
3735
3736         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3737         if (ret < 0)
3738                 goto out;
3739
3740         if (ret == 0) {
3741                 leaf = path->nodes[0];
3742                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3743                 if (found_key.objectid + found_key.offset > shrink_start &&
3744                     found_key.objectid < shrink_last_byte) {
3745                         cur_byte = found_key.objectid;
3746                         key.objectid = cur_byte;
3747                 }
3748         }
3749         btrfs_release_path(root, path);
3750
3751         while(1) {
3752                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3753                 if (ret < 0)
3754                         goto out;
3755
3756 next:
3757                 leaf = path->nodes[0];
3758                 nritems = btrfs_header_nritems(leaf);
3759                 if (path->slots[0] >= nritems) {
3760                         ret = btrfs_next_leaf(root, path);
3761                         if (ret < 0)
3762                                 goto out;
3763                         if (ret == 1) {
3764                                 ret = 0;
3765                                 break;
3766                         }
3767                         leaf = path->nodes[0];
3768                         nritems = btrfs_header_nritems(leaf);
3769                 }
3770
3771                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3772
3773                 if (found_key.objectid >= shrink_last_byte)
3774                         break;
3775
3776                 if (progress && need_resched()) {
3777                         memcpy(&key, &found_key, sizeof(key));
3778                         cond_resched();
3779                         btrfs_release_path(root, path);
3780                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3781                         progress = 0;
3782                         goto next;
3783                 }
3784                 progress = 1;
3785
3786                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3787                     found_key.objectid + found_key.offset <= cur_byte) {
3788                         memcpy(&key, &found_key, sizeof(key));
3789                         key.offset++;
3790                         path->slots[0]++;
3791                         goto next;
3792                 }
3793
3794                 total_found++;
3795                 cur_byte = found_key.objectid + found_key.offset;
3796                 key.objectid = cur_byte;
3797                 btrfs_release_path(root, path);
3798                 ret = relocate_one_extent(root, path, &found_key);
3799                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3800         }
3801
3802         btrfs_release_path(root, path);
3803
3804         if (total_found > 0) {
3805                 printk("btrfs relocate found %llu last extent was %llu\n",
3806                        (unsigned long long)total_found,
3807                        (unsigned long long)found_key.objectid);
3808                 mutex_unlock(&root->fs_info->alloc_mutex);
3809                 trans = btrfs_start_transaction(tree_root, 1);
3810                 btrfs_commit_transaction(trans, tree_root);
3811
3812                 btrfs_clean_old_snapshots(tree_root);
3813
3814                 btrfs_start_delalloc_inodes(root);
3815                 btrfs_wait_ordered_extents(tree_root, 0);
3816
3817                 trans = btrfs_start_transaction(tree_root, 1);
3818                 btrfs_commit_transaction(trans, tree_root);
3819                 mutex_lock(&root->fs_info->alloc_mutex);
3820                 goto again;
3821         }
3822
3823         /*
3824          * we've freed all the extents, now remove the block
3825          * group item from the tree
3826          */
3827         mutex_unlock(&root->fs_info->alloc_mutex);
3828
3829         trans = btrfs_start_transaction(root, 1);
3830
3831         mutex_lock(&root->fs_info->alloc_mutex);
3832         memcpy(&key, &shrink_block_group->key, sizeof(key));
3833
3834         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3835         if (ret > 0)
3836                 ret = -EIO;
3837         if (ret < 0) {
3838                 btrfs_end_transaction(trans, root);
3839                 goto out;
3840         }
3841
3842         spin_lock(&root->fs_info->block_group_cache_lock);
3843         rb_erase(&shrink_block_group->cache_node,
3844                  &root->fs_info->block_group_cache_tree);
3845         spin_unlock(&root->fs_info->block_group_cache_lock);
3846
3847         ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3848                                       key.offset);
3849         if (ret) {
3850                 btrfs_end_transaction(trans, root);
3851                 goto out;
3852         }
3853         /*
3854         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3855         kfree(shrink_block_group);
3856         */
3857
3858         btrfs_del_item(trans, root, path);
3859         btrfs_release_path(root, path);
3860         mutex_unlock(&root->fs_info->alloc_mutex);
3861         btrfs_commit_transaction(trans, root);
3862
3863         mutex_lock(&root->fs_info->alloc_mutex);
3864
3865         /* the code to unpin extents might set a few bits in the free
3866          * space cache for this range again
3867          */
3868         /* XXX? */
3869         ret = btrfs_remove_free_space(shrink_block_group, key.objectid,
3870                                       key.offset);
3871 out:
3872         btrfs_free_path(path);
3873         mutex_unlock(&root->fs_info->alloc_mutex);
3874         return ret;
3875 }
3876
3877 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3878                            struct btrfs_key *key)
3879 {
3880         int ret = 0;
3881         struct btrfs_key found_key;
3882         struct extent_buffer *leaf;
3883         int slot;
3884
3885         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3886         if (ret < 0)
3887                 goto out;
3888
3889         while(1) {
3890                 slot = path->slots[0];
3891                 leaf = path->nodes[0];
3892                 if (slot >= btrfs_header_nritems(leaf)) {
3893                         ret = btrfs_next_leaf(root, path);
3894                         if (ret == 0)
3895                                 continue;
3896                         if (ret < 0)
3897                                 goto out;
3898                         break;
3899                 }
3900                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3901
3902                 if (found_key.objectid >= key->objectid &&
3903                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3904                         ret = 0;
3905                         goto out;
3906                 }
3907                 path->slots[0]++;
3908         }
3909         ret = -ENOENT;
3910 out:
3911         return ret;
3912 }
3913
3914 int btrfs_read_block_groups(struct btrfs_root *root)
3915 {
3916         struct btrfs_path *path;
3917         int ret;
3918         struct btrfs_block_group_cache *cache;
3919         struct btrfs_fs_info *info = root->fs_info;
3920         struct btrfs_space_info *space_info;
3921         struct btrfs_key key;
3922         struct btrfs_key found_key;
3923         struct extent_buffer *leaf;
3924
3925         root = info->extent_root;
3926         key.objectid = 0;
3927         key.offset = 0;
3928         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3929         path = btrfs_alloc_path();
3930         if (!path)
3931                 return -ENOMEM;
3932
3933         mutex_lock(&root->fs_info->alloc_mutex);
3934         while(1) {
3935                 ret = find_first_block_group(root, path, &key);
3936                 if (ret > 0) {
3937                         ret = 0;
3938                         goto error;
3939                 }
3940                 if (ret != 0)
3941                         goto error;
3942
3943                 leaf = path->nodes[0];
3944                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3945                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3946                 if (!cache) {
3947                         ret = -ENOMEM;
3948                         break;
3949                 }
3950
3951                 spin_lock_init(&cache->lock);
3952                 INIT_LIST_HEAD(&cache->list);
3953                 read_extent_buffer(leaf, &cache->item,
3954                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3955                                    sizeof(cache->item));
3956                 memcpy(&cache->key, &found_key, sizeof(found_key));
3957
3958                 key.objectid = found_key.objectid + found_key.offset;
3959                 btrfs_release_path(root, path);
3960                 cache->flags = btrfs_block_group_flags(&cache->item);
3961
3962                 ret = update_space_info(info, cache->flags, found_key.offset,
3963                                         btrfs_block_group_used(&cache->item),
3964                                         &space_info);
3965                 BUG_ON(ret);
3966                 cache->space_info = space_info;
3967                 spin_lock(&space_info->lock);
3968                 list_add(&cache->list, &space_info->block_groups);
3969                 spin_unlock(&space_info->lock);
3970
3971                 ret = btrfs_add_block_group_cache(root->fs_info, cache);
3972                 BUG_ON(ret);
3973
3974                 if (key.objectid >=
3975                     btrfs_super_total_bytes(&info->super_copy))
3976                         break;
3977         }
3978         ret = 0;
3979 error:
3980         btrfs_free_path(path);
3981         mutex_unlock(&root->fs_info->alloc_mutex);
3982         return ret;
3983 }
3984
3985 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3986                            struct btrfs_root *root, u64 bytes_used,
3987                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3988                            u64 size)
3989 {
3990         int ret;
3991         struct btrfs_root *extent_root;
3992         struct btrfs_block_group_cache *cache;
3993
3994         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3995         extent_root = root->fs_info->extent_root;
3996
3997         root->fs_info->last_trans_new_blockgroup = trans->transid;
3998
3999         cache = kzalloc(sizeof(*cache), GFP_NOFS);
4000         if (!cache)
4001                 return -ENOMEM;
4002
4003         cache->key.objectid = chunk_offset;
4004         cache->key.offset = size;
4005         spin_lock_init(&cache->lock);
4006         INIT_LIST_HEAD(&cache->list);
4007         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
4008
4009         btrfs_set_block_group_used(&cache->item, bytes_used);
4010         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
4011         cache->flags = type;
4012         btrfs_set_block_group_flags(&cache->item, type);
4013
4014         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
4015                                 &cache->space_info);
4016         BUG_ON(ret);
4017         spin_lock(&cache->space_info->lock);
4018         list_add(&cache->list, &cache->space_info->block_groups);
4019         spin_unlock(&cache->space_info->lock);
4020
4021         ret = btrfs_add_block_group_cache(root->fs_info, cache);
4022         BUG_ON(ret);
4023
4024         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
4025                                 sizeof(cache->item));
4026         BUG_ON(ret);
4027
4028         finish_current_insert(trans, extent_root);
4029         ret = del_pending_extents(trans, extent_root);
4030         BUG_ON(ret);
4031         set_avail_alloc_bits(extent_root->fs_info, type);
4032
4033         return 0;
4034 }