Btrfs: Dynamic chunk and block group allocation
[pandora-kernel.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include <linux/crc32c.h>
21 #include <linux/pagemap.h>
22 #include "hash.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28
29 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
30 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
31 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
32
33 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
34
35 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
36                                  btrfs_root *extent_root);
37 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
38                                btrfs_root *extent_root);
39 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
40                            struct btrfs_root *root, u64 bytes_used,
41                            u64 type, u64 chunk_tree, u64 chunk_objectid,
42                            u64 size);
43
44
45 static int cache_block_group(struct btrfs_root *root,
46                              struct btrfs_block_group_cache *block_group)
47 {
48         struct btrfs_path *path;
49         int ret;
50         struct btrfs_key key;
51         struct extent_buffer *leaf;
52         struct extent_io_tree *free_space_cache;
53         int slot;
54         u64 last = 0;
55         u64 hole_size;
56         u64 first_free;
57         int found = 0;
58
59         if (!block_group)
60                 return 0;
61
62         root = root->fs_info->extent_root;
63         free_space_cache = &root->fs_info->free_space_cache;
64
65         if (block_group->cached)
66                 return 0;
67
68         path = btrfs_alloc_path();
69         if (!path)
70                 return -ENOMEM;
71
72         path->reada = 2;
73         first_free = block_group->key.objectid;
74         key.objectid = block_group->key.objectid;
75         key.offset = 0;
76         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
77         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
78         if (ret < 0)
79                 return ret;
80         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
81         if (ret < 0)
82                 return ret;
83         if (ret == 0) {
84                 leaf = path->nodes[0];
85                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
86                 if (key.objectid + key.offset > first_free)
87                         first_free = key.objectid + key.offset;
88         }
89         while(1) {
90                 leaf = path->nodes[0];
91                 slot = path->slots[0];
92                 if (slot >= btrfs_header_nritems(leaf)) {
93                         ret = btrfs_next_leaf(root, path);
94                         if (ret < 0)
95                                 goto err;
96                         if (ret == 0) {
97                                 continue;
98                         } else {
99                                 break;
100                         }
101                 }
102                 btrfs_item_key_to_cpu(leaf, &key, slot);
103                 if (key.objectid < block_group->key.objectid) {
104                         goto next;
105                 }
106                 if (key.objectid >= block_group->key.objectid +
107                     block_group->key.offset) {
108                         break;
109                 }
110
111                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
112                         if (!found) {
113                                 last = first_free;
114                                 found = 1;
115                         }
116                         if (key.objectid > last) {
117                                 hole_size = key.objectid - last;
118                                 set_extent_dirty(free_space_cache, last,
119                                                  last + hole_size - 1,
120                                                  GFP_NOFS);
121                         }
122                         last = key.objectid + key.offset;
123                 }
124 next:
125                 path->slots[0]++;
126         }
127
128         if (!found)
129                 last = first_free;
130         if (block_group->key.objectid +
131             block_group->key.offset > last) {
132                 hole_size = block_group->key.objectid +
133                         block_group->key.offset - last;
134                 set_extent_dirty(free_space_cache, last,
135                                  last + hole_size - 1, GFP_NOFS);
136         }
137         block_group->cached = 1;
138 err:
139         btrfs_free_path(path);
140         return 0;
141 }
142
143 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
144                                                          btrfs_fs_info *info,
145                                                          u64 bytenr)
146 {
147         struct extent_io_tree *block_group_cache;
148         struct btrfs_block_group_cache *block_group = NULL;
149         u64 ptr;
150         u64 start;
151         u64 end;
152         int ret;
153
154         block_group_cache = &info->block_group_cache;
155         ret = find_first_extent_bit(block_group_cache,
156                                     bytenr, &start, &end,
157                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
158                                     BLOCK_GROUP_SYSTEM);
159         if (ret) {
160                 return NULL;
161         }
162         ret = get_state_private(block_group_cache, start, &ptr);
163         if (ret)
164                 return NULL;
165
166         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
167         if (block_group->key.objectid <= bytenr && bytenr <
168             block_group->key.objectid + block_group->key.offset)
169                 return block_group;
170         return NULL;
171 }
172
173 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
174 {
175         return (cache->flags & bits);
176 }
177
178 static int noinline find_search_start(struct btrfs_root *root,
179                               struct btrfs_block_group_cache **cache_ret,
180                               u64 *start_ret, int num, int data)
181 {
182         int ret;
183         struct btrfs_block_group_cache *cache = *cache_ret;
184         struct extent_io_tree *free_space_cache;
185         u64 last;
186         u64 start = 0;
187         u64 end = 0;
188         u64 cache_miss = 0;
189         u64 total_fs_bytes;
190         u64 search_start = *start_ret;
191         int wrapped = 0;
192
193         if (!cache)
194                 goto out;
195         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
196         free_space_cache = &root->fs_info->free_space_cache;
197
198 again:
199         ret = cache_block_group(root, cache);
200         if (ret)
201                 goto out;
202
203         last = max(search_start, cache->key.objectid);
204         if (!block_group_bits(cache, data)) {
205                 goto new_group;
206         }
207
208         while(1) {
209                 ret = find_first_extent_bit(&root->fs_info->free_space_cache,
210                                             last, &start, &end, EXTENT_DIRTY);
211                 if (ret) {
212                         if (!cache_miss)
213                                 cache_miss = last;
214                         goto new_group;
215                 }
216
217                 start = max(last, start);
218                 last = end + 1;
219                 if (last - start < num) {
220                         if (last == cache->key.objectid + cache->key.offset)
221                                 cache_miss = start;
222                         continue;
223                 }
224                 if (start + num > cache->key.objectid + cache->key.offset)
225                         goto new_group;
226                 if (start + num  > total_fs_bytes)
227                         goto new_group;
228                 *start_ret = start;
229                 return 0;
230         }
231 out:
232         cache = btrfs_lookup_block_group(root->fs_info, search_start);
233         if (!cache) {
234                 printk("Unable to find block group for %Lu\n", search_start);
235                 WARN_ON(1);
236         }
237         return -ENOSPC;
238
239 new_group:
240         last = cache->key.objectid + cache->key.offset;
241 wrapped:
242         cache = btrfs_lookup_block_group(root->fs_info, last);
243         if (!cache || cache->key.objectid >= total_fs_bytes) {
244 no_cache:
245                 if (!wrapped) {
246                         wrapped = 1;
247                         last = search_start;
248                         goto wrapped;
249                 }
250                 goto out;
251         }
252         if (cache_miss && !cache->cached) {
253                 cache_block_group(root, cache);
254                 last = cache_miss;
255                 cache = btrfs_lookup_block_group(root->fs_info, last);
256         }
257         cache = btrfs_find_block_group(root, cache, last, data, 0);
258         if (!cache)
259                 goto no_cache;
260         *cache_ret = cache;
261         cache_miss = 0;
262         goto again;
263 }
264
265 static u64 div_factor(u64 num, int factor)
266 {
267         if (factor == 10)
268                 return num;
269         num *= factor;
270         do_div(num, 10);
271         return num;
272 }
273
274 static int block_group_state_bits(u64 flags)
275 {
276         int bits = 0;
277         if (flags & BTRFS_BLOCK_GROUP_DATA)
278                 bits |= BLOCK_GROUP_DATA;
279         if (flags & BTRFS_BLOCK_GROUP_METADATA)
280                 bits |= BLOCK_GROUP_METADATA;
281         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
282                 bits |= BLOCK_GROUP_SYSTEM;
283         return bits;
284 }
285
286 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
287                                                  struct btrfs_block_group_cache
288                                                  *hint, u64 search_start,
289                                                  int data, int owner)
290 {
291         struct btrfs_block_group_cache *cache;
292         struct extent_io_tree *block_group_cache;
293         struct btrfs_block_group_cache *found_group = NULL;
294         struct btrfs_fs_info *info = root->fs_info;
295         u64 used;
296         u64 last = 0;
297         u64 hint_last;
298         u64 start;
299         u64 end;
300         u64 free_check;
301         u64 ptr;
302         u64 total_fs_bytes;
303         int bit;
304         int ret;
305         int full_search = 0;
306         int factor = 8;
307
308         block_group_cache = &info->block_group_cache;
309         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
310
311         if (!owner)
312                 factor = 8;
313
314         bit = block_group_state_bits(data);
315
316         if (search_start && search_start < total_fs_bytes) {
317                 struct btrfs_block_group_cache *shint;
318                 shint = btrfs_lookup_block_group(info, search_start);
319                 if (shint && block_group_bits(shint, data)) {
320                         used = btrfs_block_group_used(&shint->item);
321                         if (used + shint->pinned <
322                             div_factor(shint->key.offset, factor)) {
323                                 return shint;
324                         }
325                 }
326         }
327         if (hint && block_group_bits(hint, data) &&
328             hint->key.objectid < total_fs_bytes) {
329                 used = btrfs_block_group_used(&hint->item);
330                 if (used + hint->pinned <
331                     div_factor(hint->key.offset, factor)) {
332                         return hint;
333                 }
334                 last = hint->key.objectid + hint->key.offset;
335                 hint_last = last;
336         } else {
337                 if (hint)
338                         hint_last = max(hint->key.objectid, search_start);
339                 else
340                         hint_last = search_start;
341
342                 if (hint_last >= total_fs_bytes)
343                         hint_last = search_start;
344                 last = hint_last;
345         }
346 again:
347         while(1) {
348                 ret = find_first_extent_bit(block_group_cache, last,
349                                             &start, &end, bit);
350                 if (ret)
351                         break;
352
353                 ret = get_state_private(block_group_cache, start, &ptr);
354                 if (ret)
355                         break;
356
357                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
358                 last = cache->key.objectid + cache->key.offset;
359                 used = btrfs_block_group_used(&cache->item);
360
361                 if (cache->key.objectid > total_fs_bytes)
362                         break;
363
364                 if (full_search)
365                         free_check = cache->key.offset;
366                 else
367                         free_check = div_factor(cache->key.offset, factor);
368
369                 if (used + cache->pinned < free_check) {
370                         found_group = cache;
371                         goto found;
372                 }
373                 if (full_search) {
374                         printk("failed on cache %Lu used %Lu total %Lu\n",
375                                cache->key.objectid, used, cache->key.offset);
376                 }
377                 cond_resched();
378         }
379         if (!full_search) {
380                 last = search_start;
381                 full_search = 1;
382                 goto again;
383         }
384 found:
385         return found_group;
386 }
387
388 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
389                            u64 owner, u64 owner_offset)
390 {
391         u32 high_crc = ~(u32)0;
392         u32 low_crc = ~(u32)0;
393         __le64 lenum;
394
395         lenum = cpu_to_le64(root_objectid);
396         high_crc = crc32c(high_crc, &lenum, sizeof(lenum));
397         lenum = cpu_to_le64(ref_generation);
398         low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
399         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
400                 lenum = cpu_to_le64(owner);
401                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
402                 lenum = cpu_to_le64(owner_offset);
403                 low_crc = crc32c(low_crc, &lenum, sizeof(lenum));
404         }
405         return ((u64)high_crc << 32) | (u64)low_crc;
406 }
407
408 static int match_extent_ref(struct extent_buffer *leaf,
409                             struct btrfs_extent_ref *disk_ref,
410                             struct btrfs_extent_ref *cpu_ref)
411 {
412         int ret;
413         int len;
414
415         if (cpu_ref->objectid)
416                 len = sizeof(*cpu_ref);
417         else
418                 len = 2 * sizeof(u64);
419         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
420                                    len);
421         return ret == 0;
422 }
423
424 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
425                                           struct btrfs_root *root,
426                                           struct btrfs_path *path, u64 bytenr,
427                                           u64 root_objectid,
428                                           u64 ref_generation, u64 owner,
429                                           u64 owner_offset, int del)
430 {
431         u64 hash;
432         struct btrfs_key key;
433         struct btrfs_key found_key;
434         struct btrfs_extent_ref ref;
435         struct extent_buffer *leaf;
436         struct btrfs_extent_ref *disk_ref;
437         int ret;
438         int ret2;
439
440         btrfs_set_stack_ref_root(&ref, root_objectid);
441         btrfs_set_stack_ref_generation(&ref, ref_generation);
442         btrfs_set_stack_ref_objectid(&ref, owner);
443         btrfs_set_stack_ref_offset(&ref, owner_offset);
444
445         hash = hash_extent_ref(root_objectid, ref_generation, owner,
446                                owner_offset);
447         key.offset = hash;
448         key.objectid = bytenr;
449         key.type = BTRFS_EXTENT_REF_KEY;
450
451         while (1) {
452                 ret = btrfs_search_slot(trans, root, &key, path,
453                                         del ? -1 : 0, del);
454                 if (ret < 0)
455                         goto out;
456                 leaf = path->nodes[0];
457                 if (ret != 0) {
458                         u32 nritems = btrfs_header_nritems(leaf);
459                         if (path->slots[0] >= nritems) {
460                                 ret2 = btrfs_next_leaf(root, path);
461                                 if (ret2)
462                                         goto out;
463                                 leaf = path->nodes[0];
464                         }
465                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
466                         if (found_key.objectid != bytenr ||
467                             found_key.type != BTRFS_EXTENT_REF_KEY)
468                                 goto out;
469                         key.offset = found_key.offset;
470                         if (del) {
471                                 btrfs_release_path(root, path);
472                                 continue;
473                         }
474                 }
475                 disk_ref = btrfs_item_ptr(path->nodes[0],
476                                           path->slots[0],
477                                           struct btrfs_extent_ref);
478                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
479                         ret = 0;
480                         goto out;
481                 }
482                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
483                 key.offset = found_key.offset + 1;
484                 btrfs_release_path(root, path);
485         }
486 out:
487         return ret;
488 }
489
490 /*
491  * Back reference rules.  Back refs have three main goals:
492  *
493  * 1) differentiate between all holders of references to an extent so that
494  *    when a reference is dropped we can make sure it was a valid reference
495  *    before freeing the extent.
496  *
497  * 2) Provide enough information to quickly find the holders of an extent
498  *    if we notice a given block is corrupted or bad.
499  *
500  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
501  *    maintenance.  This is actually the same as #2, but with a slightly
502  *    different use case.
503  *
504  * File extents can be referenced by:
505  *
506  * - multiple snapshots, subvolumes, or different generations in one subvol
507  * - different files inside a single subvolume (in theory, not implemented yet)
508  * - different offsets inside a file (bookend extents in file.c)
509  *
510  * The extent ref structure has fields for:
511  *
512  * - Objectid of the subvolume root
513  * - Generation number of the tree holding the reference
514  * - objectid of the file holding the reference
515  * - offset in the file corresponding to the key holding the reference
516  *
517  * When a file extent is allocated the fields are filled in:
518  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
519  *
520  * When a leaf is cow'd new references are added for every file extent found
521  * in the leaf.  It looks the same as the create case, but trans->transid
522  * will be different when the block is cow'd.
523  *
524  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
525  *
526  * When a file extent is removed either during snapshot deletion or file
527  * truncation, the corresponding back reference is found
528  * by searching for:
529  *
530  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
531  *      inode objectid, offset in file)
532  *
533  * Btree extents can be referenced by:
534  *
535  * - Different subvolumes
536  * - Different generations of the same subvolume
537  *
538  * Storing sufficient information for a full reverse mapping of a btree
539  * block would require storing the lowest key of the block in the backref,
540  * and it would require updating that lowest key either before write out or
541  * every time it changed.  Instead, the objectid of the lowest key is stored
542  * along with the level of the tree block.  This provides a hint
543  * about where in the btree the block can be found.  Searches through the
544  * btree only need to look for a pointer to that block, so they stop one
545  * level higher than the level recorded in the backref.
546  *
547  * Some btrees do not do reference counting on their extents.  These
548  * include the extent tree and the tree of tree roots.  Backrefs for these
549  * trees always have a generation of zero.
550  *
551  * When a tree block is created, back references are inserted:
552  *
553  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
554  *
555  * When a tree block is cow'd in a reference counted root,
556  * new back references are added for all the blocks it points to.
557  * These are of the form (trans->transid will have increased since creation):
558  *
559  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
560  *
561  * Because the lowest_key_objectid and the level are just hints
562  * they are not used when backrefs are deleted.  When a backref is deleted:
563  *
564  * if backref was for a tree root:
565  *     root_objectid = root->root_key.objectid
566  * else
567  *     root_objectid = btrfs_header_owner(parent)
568  *
569  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
570  *
571  * Back Reference Key hashing:
572  *
573  * Back references have four fields, each 64 bits long.  Unfortunately,
574  * This is hashed into a single 64 bit number and placed into the key offset.
575  * The key objectid corresponds to the first byte in the extent, and the
576  * key type is set to BTRFS_EXTENT_REF_KEY
577  */
578 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
579                                  struct btrfs_root *root,
580                                  struct btrfs_path *path, u64 bytenr,
581                                  u64 root_objectid, u64 ref_generation,
582                                  u64 owner, u64 owner_offset)
583 {
584         u64 hash;
585         struct btrfs_key key;
586         struct btrfs_extent_ref ref;
587         struct btrfs_extent_ref *disk_ref;
588         int ret;
589
590         btrfs_set_stack_ref_root(&ref, root_objectid);
591         btrfs_set_stack_ref_generation(&ref, ref_generation);
592         btrfs_set_stack_ref_objectid(&ref, owner);
593         btrfs_set_stack_ref_offset(&ref, owner_offset);
594
595         hash = hash_extent_ref(root_objectid, ref_generation, owner,
596                                owner_offset);
597         key.offset = hash;
598         key.objectid = bytenr;
599         key.type = BTRFS_EXTENT_REF_KEY;
600
601         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
602         while (ret == -EEXIST) {
603                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
604                                           struct btrfs_extent_ref);
605                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
606                         goto out;
607                 key.offset++;
608                 btrfs_release_path(root, path);
609                 ret = btrfs_insert_empty_item(trans, root, path, &key,
610                                               sizeof(ref));
611         }
612         if (ret)
613                 goto out;
614         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
615                                   struct btrfs_extent_ref);
616         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
617                             sizeof(ref));
618         btrfs_mark_buffer_dirty(path->nodes[0]);
619 out:
620         btrfs_release_path(root, path);
621         return ret;
622 }
623
624 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
625                                 struct btrfs_root *root,
626                                 u64 bytenr, u64 num_bytes,
627                                 u64 root_objectid, u64 ref_generation,
628                                 u64 owner, u64 owner_offset)
629 {
630         struct btrfs_path *path;
631         int ret;
632         struct btrfs_key key;
633         struct extent_buffer *l;
634         struct btrfs_extent_item *item;
635         u32 refs;
636
637         WARN_ON(num_bytes < root->sectorsize);
638         path = btrfs_alloc_path();
639         if (!path)
640                 return -ENOMEM;
641
642         path->reada = 0;
643         key.objectid = bytenr;
644         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
645         key.offset = num_bytes;
646         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
647                                 0, 1);
648         if (ret < 0)
649                 return ret;
650         if (ret != 0) {
651                 BUG();
652         }
653         BUG_ON(ret != 0);
654         l = path->nodes[0];
655         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
656         refs = btrfs_extent_refs(l, item);
657         btrfs_set_extent_refs(l, item, refs + 1);
658         btrfs_mark_buffer_dirty(path->nodes[0]);
659
660         btrfs_release_path(root->fs_info->extent_root, path);
661
662         path->reada = 0;
663         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
664                                           path, bytenr, root_objectid,
665                                           ref_generation, owner, owner_offset);
666         BUG_ON(ret);
667         finish_current_insert(trans, root->fs_info->extent_root);
668         del_pending_extents(trans, root->fs_info->extent_root);
669
670         btrfs_free_path(path);
671         return 0;
672 }
673
674 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
675                          struct btrfs_root *root)
676 {
677         finish_current_insert(trans, root->fs_info->extent_root);
678         del_pending_extents(trans, root->fs_info->extent_root);
679         return 0;
680 }
681
682 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
683                              struct btrfs_root *root, u64 bytenr,
684                              u64 num_bytes, u32 *refs)
685 {
686         struct btrfs_path *path;
687         int ret;
688         struct btrfs_key key;
689         struct extent_buffer *l;
690         struct btrfs_extent_item *item;
691
692         WARN_ON(num_bytes < root->sectorsize);
693         path = btrfs_alloc_path();
694         path->reada = 0;
695         key.objectid = bytenr;
696         key.offset = num_bytes;
697         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
698         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
699                                 0, 0);
700         if (ret < 0)
701                 goto out;
702         if (ret != 0) {
703                 btrfs_print_leaf(root, path->nodes[0]);
704                 printk("failed to find block number %Lu\n", bytenr);
705                 BUG();
706         }
707         l = path->nodes[0];
708         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
709         *refs = btrfs_extent_refs(l, item);
710 out:
711         btrfs_free_path(path);
712         return 0;
713 }
714
715 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
716                                   struct btrfs_path *count_path,
717                                   u64 first_extent)
718 {
719         struct btrfs_root *extent_root = root->fs_info->extent_root;
720         struct btrfs_path *path;
721         u64 bytenr;
722         u64 found_objectid;
723         u64 root_objectid = root->root_key.objectid;
724         u32 total_count = 0;
725         u32 cur_count;
726         u32 nritems;
727         int ret;
728         struct btrfs_key key;
729         struct btrfs_key found_key;
730         struct extent_buffer *l;
731         struct btrfs_extent_item *item;
732         struct btrfs_extent_ref *ref_item;
733         int level = -1;
734
735         path = btrfs_alloc_path();
736 again:
737         if (level == -1)
738                 bytenr = first_extent;
739         else
740                 bytenr = count_path->nodes[level]->start;
741
742         cur_count = 0;
743         key.objectid = bytenr;
744         key.offset = 0;
745
746         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
747         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
748         if (ret < 0)
749                 goto out;
750         BUG_ON(ret == 0);
751
752         l = path->nodes[0];
753         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
754
755         if (found_key.objectid != bytenr ||
756             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
757                 goto out;
758         }
759
760         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
761         while (1) {
762                 l = path->nodes[0];
763                 nritems = btrfs_header_nritems(l);
764                 if (path->slots[0] >= nritems) {
765                         ret = btrfs_next_leaf(extent_root, path);
766                         if (ret == 0)
767                                 continue;
768                         break;
769                 }
770                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
771                 if (found_key.objectid != bytenr)
772                         break;
773
774                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
775                         path->slots[0]++;
776                         continue;
777                 }
778
779                 cur_count++;
780                 ref_item = btrfs_item_ptr(l, path->slots[0],
781                                           struct btrfs_extent_ref);
782                 found_objectid = btrfs_ref_root(l, ref_item);
783
784                 if (found_objectid != root_objectid) {
785                         total_count = 2;
786                         goto out;
787                 }
788                 total_count = 1;
789                 path->slots[0]++;
790         }
791         if (cur_count == 0) {
792                 total_count = 0;
793                 goto out;
794         }
795         if (level >= 0 && root->node == count_path->nodes[level])
796                 goto out;
797         level++;
798         btrfs_release_path(root, path);
799         goto again;
800
801 out:
802         btrfs_free_path(path);
803         return total_count;
804 }
805 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
806                        struct btrfs_root *root, u64 owner_objectid)
807 {
808         u64 generation;
809         u64 key_objectid;
810         u64 level;
811         u32 nritems;
812         struct btrfs_disk_key disk_key;
813
814         level = btrfs_header_level(root->node);
815         generation = trans->transid;
816         nritems = btrfs_header_nritems(root->node);
817         if (nritems > 0) {
818                 if (level == 0)
819                         btrfs_item_key(root->node, &disk_key, 0);
820                 else
821                         btrfs_node_key(root->node, &disk_key, 0);
822                 key_objectid = btrfs_disk_key_objectid(&disk_key);
823         } else {
824                 key_objectid = 0;
825         }
826         return btrfs_inc_extent_ref(trans, root, root->node->start,
827                                     root->node->len, owner_objectid,
828                                     generation, level, key_objectid);
829 }
830
831 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
832                   struct extent_buffer *buf)
833 {
834         u64 bytenr;
835         u32 nritems;
836         struct btrfs_key key;
837         struct btrfs_file_extent_item *fi;
838         int i;
839         int level;
840         int ret;
841         int faili;
842
843         if (!root->ref_cows)
844                 return 0;
845
846         level = btrfs_header_level(buf);
847         nritems = btrfs_header_nritems(buf);
848         for (i = 0; i < nritems; i++) {
849                 if (level == 0) {
850                         u64 disk_bytenr;
851                         btrfs_item_key_to_cpu(buf, &key, i);
852                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
853                                 continue;
854                         fi = btrfs_item_ptr(buf, i,
855                                             struct btrfs_file_extent_item);
856                         if (btrfs_file_extent_type(buf, fi) ==
857                             BTRFS_FILE_EXTENT_INLINE)
858                                 continue;
859                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
860                         if (disk_bytenr == 0)
861                                 continue;
862                         ret = btrfs_inc_extent_ref(trans, root, disk_bytenr,
863                                     btrfs_file_extent_disk_num_bytes(buf, fi),
864                                     root->root_key.objectid, trans->transid,
865                                     key.objectid, key.offset);
866                         if (ret) {
867                                 faili = i;
868                                 goto fail;
869                         }
870                 } else {
871                         bytenr = btrfs_node_blockptr(buf, i);
872                         btrfs_node_key_to_cpu(buf, &key, i);
873                         ret = btrfs_inc_extent_ref(trans, root, bytenr,
874                                            btrfs_level_size(root, level - 1),
875                                            root->root_key.objectid,
876                                            trans->transid,
877                                            level - 1, key.objectid);
878                         if (ret) {
879                                 faili = i;
880                                 goto fail;
881                         }
882                 }
883         }
884         return 0;
885 fail:
886         WARN_ON(1);
887 #if 0
888         for (i =0; i < faili; i++) {
889                 if (level == 0) {
890                         u64 disk_bytenr;
891                         btrfs_item_key_to_cpu(buf, &key, i);
892                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
893                                 continue;
894                         fi = btrfs_item_ptr(buf, i,
895                                             struct btrfs_file_extent_item);
896                         if (btrfs_file_extent_type(buf, fi) ==
897                             BTRFS_FILE_EXTENT_INLINE)
898                                 continue;
899                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
900                         if (disk_bytenr == 0)
901                                 continue;
902                         err = btrfs_free_extent(trans, root, disk_bytenr,
903                                     btrfs_file_extent_disk_num_bytes(buf,
904                                                                       fi), 0);
905                         BUG_ON(err);
906                 } else {
907                         bytenr = btrfs_node_blockptr(buf, i);
908                         err = btrfs_free_extent(trans, root, bytenr,
909                                         btrfs_level_size(root, level - 1), 0);
910                         BUG_ON(err);
911                 }
912         }
913 #endif
914         return ret;
915 }
916
917 static int write_one_cache_group(struct btrfs_trans_handle *trans,
918                                  struct btrfs_root *root,
919                                  struct btrfs_path *path,
920                                  struct btrfs_block_group_cache *cache)
921 {
922         int ret;
923         int pending_ret;
924         struct btrfs_root *extent_root = root->fs_info->extent_root;
925         unsigned long bi;
926         struct extent_buffer *leaf;
927
928         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
929         if (ret < 0)
930                 goto fail;
931         BUG_ON(ret);
932
933         leaf = path->nodes[0];
934         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
935         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
936         btrfs_mark_buffer_dirty(leaf);
937         btrfs_release_path(extent_root, path);
938 fail:
939         finish_current_insert(trans, extent_root);
940         pending_ret = del_pending_extents(trans, extent_root);
941         if (ret)
942                 return ret;
943         if (pending_ret)
944                 return pending_ret;
945         return 0;
946
947 }
948
949 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
950                                    struct btrfs_root *root)
951 {
952         struct extent_io_tree *block_group_cache;
953         struct btrfs_block_group_cache *cache;
954         int ret;
955         int err = 0;
956         int werr = 0;
957         struct btrfs_path *path;
958         u64 last = 0;
959         u64 start;
960         u64 end;
961         u64 ptr;
962
963         block_group_cache = &root->fs_info->block_group_cache;
964         path = btrfs_alloc_path();
965         if (!path)
966                 return -ENOMEM;
967
968         while(1) {
969                 ret = find_first_extent_bit(block_group_cache, last,
970                                             &start, &end, BLOCK_GROUP_DIRTY);
971                 if (ret)
972                         break;
973
974                 last = end + 1;
975                 ret = get_state_private(block_group_cache, start, &ptr);
976                 if (ret)
977                         break;
978
979                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
980                 err = write_one_cache_group(trans, root,
981                                             path, cache);
982                 /*
983                  * if we fail to write the cache group, we want
984                  * to keep it marked dirty in hopes that a later
985                  * write will work
986                  */
987                 if (err) {
988                         werr = err;
989                         continue;
990                 }
991                 clear_extent_bits(block_group_cache, start, end,
992                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
993         }
994         btrfs_free_path(path);
995         return werr;
996 }
997
998 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
999                                                   u64 flags)
1000 {
1001         struct list_head *head = &info->space_info;
1002         struct list_head *cur;
1003         struct btrfs_space_info *found;
1004         list_for_each(cur, head) {
1005                 found = list_entry(cur, struct btrfs_space_info, list);
1006                 if (found->flags == flags)
1007                         return found;
1008         }
1009         return NULL;
1010
1011 }
1012
1013 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1014                           struct btrfs_root *extent_root, u64 alloc_bytes,
1015                           u64 flags)
1016 {
1017         struct btrfs_space_info *space_info;
1018         u64 thresh;
1019         u64 start;
1020         u64 num_bytes;
1021         int ret;
1022
1023         space_info = __find_space_info(extent_root->fs_info, flags);
1024         BUG_ON(!space_info);
1025
1026         if (space_info->full)
1027                 return 0;
1028
1029         thresh = div_factor(space_info->total_bytes, 7);
1030         if ((space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1031             thresh)
1032                 return 0;
1033
1034         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1035         if (ret == -ENOSPC) {
1036 printk("space info full %Lu\n", flags);
1037                 space_info->full = 1;
1038                 return 0;
1039         }
1040
1041         BUG_ON(ret);
1042
1043         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1044                      extent_root->fs_info->chunk_root->root_key.objectid,
1045                      start, num_bytes);
1046         BUG_ON(ret);
1047         return 0;
1048 }
1049
1050 static int update_block_group(struct btrfs_trans_handle *trans,
1051                               struct btrfs_root *root,
1052                               u64 bytenr, u64 num_bytes, int alloc,
1053                               int mark_free)
1054 {
1055         struct btrfs_block_group_cache *cache;
1056         struct btrfs_fs_info *info = root->fs_info;
1057         u64 total = num_bytes;
1058         u64 old_val;
1059         u64 byte_in_group;
1060         u64 start;
1061         u64 end;
1062
1063         while(total) {
1064                 cache = btrfs_lookup_block_group(info, bytenr);
1065                 if (!cache) {
1066                         return -1;
1067                 }
1068                 byte_in_group = bytenr - cache->key.objectid;
1069                 WARN_ON(byte_in_group > cache->key.offset);
1070                 start = cache->key.objectid;
1071                 end = start + cache->key.offset - 1;
1072                 set_extent_bits(&info->block_group_cache, start, end,
1073                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1074
1075                 old_val = btrfs_block_group_used(&cache->item);
1076                 num_bytes = min(total, cache->key.offset - byte_in_group);
1077                 if (alloc) {
1078                         old_val += num_bytes;
1079                         cache->space_info->bytes_used += num_bytes;
1080                 } else {
1081                         old_val -= num_bytes;
1082                         cache->space_info->bytes_used -= num_bytes;
1083                         if (mark_free) {
1084                                 set_extent_dirty(&info->free_space_cache,
1085                                                  bytenr, bytenr + num_bytes - 1,
1086                                                  GFP_NOFS);
1087                         }
1088                 }
1089                 btrfs_set_block_group_used(&cache->item, old_val);
1090                 total -= num_bytes;
1091                 bytenr += num_bytes;
1092         }
1093         return 0;
1094 }
1095
1096 static int update_pinned_extents(struct btrfs_root *root,
1097                                 u64 bytenr, u64 num, int pin)
1098 {
1099         u64 len;
1100         struct btrfs_block_group_cache *cache;
1101         struct btrfs_fs_info *fs_info = root->fs_info;
1102
1103         if (pin) {
1104                 set_extent_dirty(&fs_info->pinned_extents,
1105                                 bytenr, bytenr + num - 1, GFP_NOFS);
1106         } else {
1107                 clear_extent_dirty(&fs_info->pinned_extents,
1108                                 bytenr, bytenr + num - 1, GFP_NOFS);
1109         }
1110         while (num > 0) {
1111                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1112                 WARN_ON(!cache);
1113                 len = min(num, cache->key.offset -
1114                           (bytenr - cache->key.objectid));
1115                 if (pin) {
1116                         cache->pinned += len;
1117                         cache->space_info->bytes_pinned += len;
1118                         fs_info->total_pinned += len;
1119                 } else {
1120                         cache->pinned -= len;
1121                         cache->space_info->bytes_pinned -= len;
1122                         fs_info->total_pinned -= len;
1123                 }
1124                 bytenr += len;
1125                 num -= len;
1126         }
1127         return 0;
1128 }
1129
1130 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1131 {
1132         u64 last = 0;
1133         u64 start;
1134         u64 end;
1135         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1136         int ret;
1137
1138         while(1) {
1139                 ret = find_first_extent_bit(pinned_extents, last,
1140                                             &start, &end, EXTENT_DIRTY);
1141                 if (ret)
1142                         break;
1143                 set_extent_dirty(copy, start, end, GFP_NOFS);
1144                 last = end + 1;
1145         }
1146         return 0;
1147 }
1148
1149 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1150                                struct btrfs_root *root,
1151                                struct extent_io_tree *unpin)
1152 {
1153         u64 start;
1154         u64 end;
1155         int ret;
1156         struct extent_io_tree *free_space_cache;
1157         free_space_cache = &root->fs_info->free_space_cache;
1158
1159         while(1) {
1160                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1161                                             EXTENT_DIRTY);
1162                 if (ret)
1163                         break;
1164                 update_pinned_extents(root, start, end + 1 - start, 0);
1165                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1166                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1167         }
1168         return 0;
1169 }
1170
1171 static int finish_current_insert(struct btrfs_trans_handle *trans,
1172                                  struct btrfs_root *extent_root)
1173 {
1174         u64 start;
1175         u64 end;
1176         struct btrfs_fs_info *info = extent_root->fs_info;
1177         struct extent_buffer *eb;
1178         struct btrfs_path *path;
1179         struct btrfs_key ins;
1180         struct btrfs_disk_key first;
1181         struct btrfs_extent_item extent_item;
1182         int ret;
1183         int level;
1184         int err = 0;
1185
1186         btrfs_set_stack_extent_refs(&extent_item, 1);
1187         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1188         path = btrfs_alloc_path();
1189
1190         while(1) {
1191                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1192                                             &end, EXTENT_LOCKED);
1193                 if (ret)
1194                         break;
1195
1196                 ins.objectid = start;
1197                 ins.offset = end + 1 - start;
1198                 err = btrfs_insert_item(trans, extent_root, &ins,
1199                                         &extent_item, sizeof(extent_item));
1200                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1201                                   GFP_NOFS);
1202                 eb = read_tree_block(extent_root, ins.objectid, ins.offset);
1203                 level = btrfs_header_level(eb);
1204                 if (level == 0) {
1205                         btrfs_item_key(eb, &first, 0);
1206                 } else {
1207                         btrfs_node_key(eb, &first, 0);
1208                 }
1209                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1210                                           start, extent_root->root_key.objectid,
1211                                           0, level,
1212                                           btrfs_disk_key_objectid(&first));
1213                 BUG_ON(err);
1214                 free_extent_buffer(eb);
1215         }
1216         btrfs_free_path(path);
1217         return 0;
1218 }
1219
1220 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1221                           int pending)
1222 {
1223         int err = 0;
1224         struct extent_buffer *buf;
1225
1226         if (!pending) {
1227                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1228                 if (buf) {
1229                         if (btrfs_buffer_uptodate(buf)) {
1230                                 u64 transid =
1231                                     root->fs_info->running_transaction->transid;
1232                                 u64 header_transid =
1233                                         btrfs_header_generation(buf);
1234                                 if (header_transid == transid) {
1235                                         clean_tree_block(NULL, root, buf);
1236                                         free_extent_buffer(buf);
1237                                         return 1;
1238                                 }
1239                         }
1240                         free_extent_buffer(buf);
1241                 }
1242                 update_pinned_extents(root, bytenr, num_bytes, 1);
1243         } else {
1244                 set_extent_bits(&root->fs_info->pending_del,
1245                                 bytenr, bytenr + num_bytes - 1,
1246                                 EXTENT_LOCKED, GFP_NOFS);
1247         }
1248         BUG_ON(err < 0);
1249         return 0;
1250 }
1251
1252 /*
1253  * remove an extent from the root, returns 0 on success
1254  */
1255 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1256                          *root, u64 bytenr, u64 num_bytes,
1257                          u64 root_objectid, u64 ref_generation,
1258                          u64 owner_objectid, u64 owner_offset, int pin,
1259                          int mark_free)
1260 {
1261         struct btrfs_path *path;
1262         struct btrfs_key key;
1263         struct btrfs_fs_info *info = root->fs_info;
1264         struct btrfs_root *extent_root = info->extent_root;
1265         struct extent_buffer *leaf;
1266         int ret;
1267         int extent_slot = 0;
1268         int found_extent = 0;
1269         int num_to_del = 1;
1270         struct btrfs_extent_item *ei;
1271         u32 refs;
1272
1273         key.objectid = bytenr;
1274         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1275         key.offset = num_bytes;
1276         path = btrfs_alloc_path();
1277         if (!path)
1278                 return -ENOMEM;
1279
1280         path->reada = 0;
1281         ret = lookup_extent_backref(trans, extent_root, path,
1282                                     bytenr, root_objectid,
1283                                     ref_generation,
1284                                     owner_objectid, owner_offset, 1);
1285         if (ret == 0) {
1286                 struct btrfs_key found_key;
1287                 extent_slot = path->slots[0];
1288                 while(extent_slot > 0) {
1289                         extent_slot--;
1290                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1291                                               extent_slot);
1292                         if (found_key.objectid != bytenr)
1293                                 break;
1294                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1295                             found_key.offset == num_bytes) {
1296                                 found_extent = 1;
1297                                 break;
1298                         }
1299                         if (path->slots[0] - extent_slot > 5)
1300                                 break;
1301                 }
1302                 if (!found_extent)
1303                         ret = btrfs_del_item(trans, extent_root, path);
1304         } else {
1305                 btrfs_print_leaf(extent_root, path->nodes[0]);
1306                 WARN_ON(1);
1307                 printk("Unable to find ref byte nr %Lu root %Lu "
1308                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1309                        root_objectid, ref_generation, owner_objectid,
1310                        owner_offset);
1311         }
1312         if (!found_extent) {
1313                 btrfs_release_path(extent_root, path);
1314                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1315                 if (ret < 0)
1316                         return ret;
1317                 BUG_ON(ret);
1318                 extent_slot = path->slots[0];
1319         }
1320
1321         leaf = path->nodes[0];
1322         ei = btrfs_item_ptr(leaf, extent_slot,
1323                             struct btrfs_extent_item);
1324         refs = btrfs_extent_refs(leaf, ei);
1325         BUG_ON(refs == 0);
1326         refs -= 1;
1327         btrfs_set_extent_refs(leaf, ei, refs);
1328
1329         btrfs_mark_buffer_dirty(leaf);
1330
1331         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1332                 /* if the back ref and the extent are next to each other
1333                  * they get deleted below in one shot
1334                  */
1335                 path->slots[0] = extent_slot;
1336                 num_to_del = 2;
1337         } else if (found_extent) {
1338                 /* otherwise delete the extent back ref */
1339                 ret = btrfs_del_item(trans, extent_root, path);
1340                 BUG_ON(ret);
1341                 /* if refs are 0, we need to setup the path for deletion */
1342                 if (refs == 0) {
1343                         btrfs_release_path(extent_root, path);
1344                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1345                                                 -1, 1);
1346                         if (ret < 0)
1347                                 return ret;
1348                         BUG_ON(ret);
1349                 }
1350         }
1351
1352         if (refs == 0) {
1353                 u64 super_used;
1354                 u64 root_used;
1355
1356                 if (pin) {
1357                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1358                         if (ret > 0)
1359                                 mark_free = 1;
1360                         BUG_ON(ret < 0);
1361                 }
1362
1363                 /* block accounting for super block */
1364                 super_used = btrfs_super_bytes_used(&info->super_copy);
1365                 btrfs_set_super_bytes_used(&info->super_copy,
1366                                            super_used - num_bytes);
1367
1368                 /* block accounting for root item */
1369                 root_used = btrfs_root_used(&root->root_item);
1370                 btrfs_set_root_used(&root->root_item,
1371                                            root_used - num_bytes);
1372                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1373                                       num_to_del);
1374                 if (ret) {
1375                         return ret;
1376                 }
1377                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1378                                          mark_free);
1379                 BUG_ON(ret);
1380         }
1381         btrfs_free_path(path);
1382         finish_current_insert(trans, extent_root);
1383         return ret;
1384 }
1385
1386 /*
1387  * find all the blocks marked as pending in the radix tree and remove
1388  * them from the extent map
1389  */
1390 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1391                                btrfs_root *extent_root)
1392 {
1393         int ret;
1394         int err = 0;
1395         u64 start;
1396         u64 end;
1397         struct extent_io_tree *pending_del;
1398         struct extent_io_tree *pinned_extents;
1399
1400         pending_del = &extent_root->fs_info->pending_del;
1401         pinned_extents = &extent_root->fs_info->pinned_extents;
1402
1403         while(1) {
1404                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1405                                             EXTENT_LOCKED);
1406                 if (ret)
1407                         break;
1408                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1409                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1410                                   GFP_NOFS);
1411                 ret = __free_extent(trans, extent_root,
1412                                      start, end + 1 - start,
1413                                      extent_root->root_key.objectid,
1414                                      0, 0, 0, 0, 0);
1415                 if (ret)
1416                         err = ret;
1417         }
1418         return err;
1419 }
1420
1421 /*
1422  * remove an extent from the root, returns 0 on success
1423  */
1424 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1425                       *root, u64 bytenr, u64 num_bytes,
1426                       u64 root_objectid, u64 ref_generation,
1427                       u64 owner_objectid, u64 owner_offset, int pin)
1428 {
1429         struct btrfs_root *extent_root = root->fs_info->extent_root;
1430         int pending_ret;
1431         int ret;
1432
1433         WARN_ON(num_bytes < root->sectorsize);
1434         if (!root->ref_cows)
1435                 ref_generation = 0;
1436
1437         if (root == extent_root) {
1438                 pin_down_bytes(root, bytenr, num_bytes, 1);
1439                 return 0;
1440         }
1441         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1442                             ref_generation, owner_objectid, owner_offset,
1443                             pin, pin == 0);
1444         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1445         return ret ? ret : pending_ret;
1446 }
1447
1448 static u64 stripe_align(struct btrfs_root *root, u64 val)
1449 {
1450         u64 mask = ((u64)root->stripesize - 1);
1451         u64 ret = (val + mask) & ~mask;
1452         return ret;
1453 }
1454
1455 /*
1456  * walks the btree of allocated extents and find a hole of a given size.
1457  * The key ins is changed to record the hole:
1458  * ins->objectid == block start
1459  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1460  * ins->offset == number of blocks
1461  * Any available blocks before search_start are skipped.
1462  */
1463 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1464                                      struct btrfs_root *orig_root,
1465                                      u64 num_bytes, u64 empty_size,
1466                                      u64 search_start, u64 search_end,
1467                                      u64 hint_byte, struct btrfs_key *ins,
1468                                      u64 exclude_start, u64 exclude_nr,
1469                                      int data)
1470 {
1471         int ret;
1472         u64 orig_search_start = search_start;
1473         struct btrfs_root * root = orig_root->fs_info->extent_root;
1474         struct btrfs_fs_info *info = root->fs_info;
1475         u64 total_needed = num_bytes;
1476         struct btrfs_block_group_cache *block_group;
1477         int full_scan = 0;
1478         int wrapped = 0;
1479
1480         WARN_ON(num_bytes < root->sectorsize);
1481         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1482
1483         if (search_end == (u64)-1)
1484                 search_end = btrfs_super_total_bytes(&info->super_copy);
1485
1486         if (hint_byte) {
1487                 block_group = btrfs_lookup_block_group(info, hint_byte);
1488                 if (!block_group)
1489                         hint_byte = search_start;
1490                 block_group = btrfs_find_block_group(root, block_group,
1491                                                      hint_byte, data, 1);
1492         } else {
1493                 block_group = btrfs_find_block_group(root,
1494                                                      trans->block_group,
1495                                                      search_start, data, 1);
1496         }
1497
1498         total_needed += empty_size;
1499
1500 check_failed:
1501         if (!block_group) {
1502                 block_group = btrfs_lookup_block_group(info, search_start);
1503                 if (!block_group)
1504                         block_group = btrfs_lookup_block_group(info,
1505                                                        orig_search_start);
1506         }
1507         ret = find_search_start(root, &block_group, &search_start,
1508                                 total_needed, data);
1509         if (ret)
1510                 goto error;
1511
1512         search_start = stripe_align(root, search_start);
1513         ins->objectid = search_start;
1514         ins->offset = num_bytes;
1515
1516         if (ins->objectid + num_bytes >= search_end)
1517                 goto enospc;
1518
1519         if (ins->objectid + num_bytes >
1520             block_group->key.objectid + block_group->key.offset) {
1521                 search_start = block_group->key.objectid +
1522                         block_group->key.offset;
1523                 goto new_group;
1524         }
1525
1526         if (test_range_bit(&info->extent_ins, ins->objectid,
1527                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1528                 search_start = ins->objectid + num_bytes;
1529                 goto new_group;
1530         }
1531
1532         if (test_range_bit(&info->pinned_extents, ins->objectid,
1533                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1534                 search_start = ins->objectid + num_bytes;
1535                 goto new_group;
1536         }
1537
1538         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1539             ins->objectid < exclude_start + exclude_nr)) {
1540                 search_start = exclude_start + exclude_nr;
1541                 goto new_group;
1542         }
1543
1544         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1545                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1546                 if (block_group)
1547                         trans->block_group = block_group;
1548         }
1549         ins->offset = num_bytes;
1550         return 0;
1551
1552 new_group:
1553         if (search_start + num_bytes >= search_end) {
1554 enospc:
1555                 search_start = orig_search_start;
1556                 if (full_scan) {
1557                         ret = -ENOSPC;
1558                         goto error;
1559                 }
1560                 if (wrapped) {
1561                         if (!full_scan)
1562                                 total_needed -= empty_size;
1563                         full_scan = 1;
1564                 } else
1565                         wrapped = 1;
1566         }
1567         block_group = btrfs_lookup_block_group(info, search_start);
1568         cond_resched();
1569         block_group = btrfs_find_block_group(root, block_group,
1570                                              search_start, data, 0);
1571         goto check_failed;
1572
1573 error:
1574         return ret;
1575 }
1576 /*
1577  * finds a free extent and does all the dirty work required for allocation
1578  * returns the key for the extent through ins, and a tree buffer for
1579  * the first block of the extent through buf.
1580  *
1581  * returns 0 if everything worked, non-zero otherwise.
1582  */
1583 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1584                        struct btrfs_root *root,
1585                        u64 num_bytes, u64 root_objectid, u64 ref_generation,
1586                        u64 owner, u64 owner_offset,
1587                        u64 empty_size, u64 hint_byte,
1588                        u64 search_end, struct btrfs_key *ins, int data)
1589 {
1590         int ret;
1591         int pending_ret;
1592         u64 super_used;
1593         u64 root_used;
1594         u64 search_start = 0;
1595         u64 new_hint;
1596         u32 sizes[2];
1597         struct btrfs_fs_info *info = root->fs_info;
1598         struct btrfs_root *extent_root = info->extent_root;
1599         struct btrfs_extent_item *extent_item;
1600         struct btrfs_extent_ref *ref;
1601         struct btrfs_path *path;
1602         struct btrfs_key keys[2];
1603
1604         if (data) {
1605                 data = BTRFS_BLOCK_GROUP_DATA;
1606         } else if (root == root->fs_info->chunk_root) {
1607                 data = BTRFS_BLOCK_GROUP_SYSTEM;
1608         } else {
1609                 data = BTRFS_BLOCK_GROUP_METADATA;
1610         }
1611
1612         if (root->ref_cows) {
1613                 if (data != BTRFS_BLOCK_GROUP_METADATA) {
1614                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1615                                              num_bytes,
1616                                              BTRFS_BLOCK_GROUP_METADATA);
1617                         BUG_ON(ret);
1618                 }
1619                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1620                                      num_bytes, data);
1621                 BUG_ON(ret);
1622         }
1623
1624         new_hint = max(hint_byte, root->fs_info->alloc_start);
1625         if (new_hint < btrfs_super_total_bytes(&info->super_copy))
1626                 hint_byte = new_hint;
1627
1628         WARN_ON(num_bytes < root->sectorsize);
1629         ret = find_free_extent(trans, root, num_bytes, empty_size,
1630                                search_start, search_end, hint_byte, ins,
1631                                trans->alloc_exclude_start,
1632                                trans->alloc_exclude_nr, data);
1633         BUG_ON(ret);
1634         if (ret)
1635                 return ret;
1636
1637         /* block accounting for super block */
1638         super_used = btrfs_super_bytes_used(&info->super_copy);
1639         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1640
1641         /* block accounting for root item */
1642         root_used = btrfs_root_used(&root->root_item);
1643         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1644
1645         clear_extent_dirty(&root->fs_info->free_space_cache,
1646                            ins->objectid, ins->objectid + ins->offset - 1,
1647                            GFP_NOFS);
1648
1649         if (root == extent_root) {
1650                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1651                                 ins->objectid + ins->offset - 1,
1652                                 EXTENT_LOCKED, GFP_NOFS);
1653                 goto update_block;
1654         }
1655
1656         WARN_ON(trans->alloc_exclude_nr);
1657         trans->alloc_exclude_start = ins->objectid;
1658         trans->alloc_exclude_nr = ins->offset;
1659
1660         memcpy(&keys[0], ins, sizeof(*ins));
1661         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1662                                          owner, owner_offset);
1663         keys[1].objectid = ins->objectid;
1664         keys[1].type = BTRFS_EXTENT_REF_KEY;
1665         sizes[0] = sizeof(*extent_item);
1666         sizes[1] = sizeof(*ref);
1667
1668         path = btrfs_alloc_path();
1669         BUG_ON(!path);
1670
1671         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
1672                                        sizes, 2);
1673
1674         BUG_ON(ret);
1675         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1676                                      struct btrfs_extent_item);
1677         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
1678         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1679                              struct btrfs_extent_ref);
1680
1681         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
1682         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
1683         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
1684         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
1685
1686         btrfs_mark_buffer_dirty(path->nodes[0]);
1687
1688         trans->alloc_exclude_start = 0;
1689         trans->alloc_exclude_nr = 0;
1690         btrfs_free_path(path);
1691         finish_current_insert(trans, extent_root);
1692         pending_ret = del_pending_extents(trans, extent_root);
1693
1694         if (ret) {
1695                 return ret;
1696         }
1697         if (pending_ret) {
1698                 return pending_ret;
1699         }
1700
1701 update_block:
1702         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
1703         if (ret) {
1704                 printk("update block group failed for %Lu %Lu\n",
1705                        ins->objectid, ins->offset);
1706                 BUG();
1707         }
1708         return 0;
1709 }
1710
1711 /*
1712  * helper function to allocate a block for a given tree
1713  * returns the tree buffer or NULL.
1714  */
1715 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1716                                              struct btrfs_root *root,
1717                                              u32 blocksize,
1718                                              u64 root_objectid, u64 hint,
1719                                              u64 empty_size)
1720 {
1721         u64 ref_generation;
1722
1723         if (root->ref_cows)
1724                 ref_generation = trans->transid;
1725         else
1726                 ref_generation = 0;
1727
1728
1729         return __btrfs_alloc_free_block(trans, root, blocksize, root_objectid,
1730                                         ref_generation, 0, 0, hint, empty_size);
1731 }
1732
1733 /*
1734  * helper function to allocate a block for a given tree
1735  * returns the tree buffer or NULL.
1736  */
1737 struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1738                                              struct btrfs_root *root,
1739                                              u32 blocksize,
1740                                              u64 root_objectid,
1741                                              u64 ref_generation,
1742                                              u64 first_objectid,
1743                                              int level,
1744                                              u64 hint,
1745                                              u64 empty_size)
1746 {
1747         struct btrfs_key ins;
1748         int ret;
1749         struct extent_buffer *buf;
1750
1751         ret = btrfs_alloc_extent(trans, root, blocksize,
1752                                  root_objectid, ref_generation,
1753                                  level, first_objectid, empty_size, hint,
1754                                  (u64)-1, &ins, 0);
1755         if (ret) {
1756                 BUG_ON(ret > 0);
1757                 return ERR_PTR(ret);
1758         }
1759         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
1760         if (!buf) {
1761                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
1762                                   root->root_key.objectid, ref_generation,
1763                                   0, 0, 0);
1764                 return ERR_PTR(-ENOMEM);
1765         }
1766         btrfs_set_header_generation(buf, trans->transid);
1767         clean_tree_block(trans, root, buf);
1768         wait_on_tree_block_writeback(root, buf);
1769         btrfs_set_buffer_uptodate(buf);
1770
1771         if (PageDirty(buf->first_page)) {
1772                 printk("page %lu dirty\n", buf->first_page->index);
1773                 WARN_ON(1);
1774         }
1775
1776         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
1777                          buf->start + buf->len - 1, GFP_NOFS);
1778         set_extent_bits(&BTRFS_I(root->fs_info->btree_inode)->io_tree,
1779                         buf->start, buf->start + buf->len - 1,
1780                         EXTENT_CSUM, GFP_NOFS);
1781         buf->flags |= EXTENT_CSUM;
1782         if (!btrfs_test_opt(root, SSD))
1783                 btrfs_set_buffer_defrag(buf);
1784         trans->blocks_used++;
1785         return buf;
1786 }
1787
1788 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
1789                                   struct btrfs_root *root,
1790                                   struct extent_buffer *leaf)
1791 {
1792         u64 leaf_owner;
1793         u64 leaf_generation;
1794         struct btrfs_key key;
1795         struct btrfs_file_extent_item *fi;
1796         int i;
1797         int nritems;
1798         int ret;
1799
1800         BUG_ON(!btrfs_is_leaf(leaf));
1801         nritems = btrfs_header_nritems(leaf);
1802         leaf_owner = btrfs_header_owner(leaf);
1803         leaf_generation = btrfs_header_generation(leaf);
1804
1805         for (i = 0; i < nritems; i++) {
1806                 u64 disk_bytenr;
1807
1808                 btrfs_item_key_to_cpu(leaf, &key, i);
1809                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
1810                         continue;
1811                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1812                 if (btrfs_file_extent_type(leaf, fi) ==
1813                     BTRFS_FILE_EXTENT_INLINE)
1814                         continue;
1815                 /*
1816                  * FIXME make sure to insert a trans record that
1817                  * repeats the snapshot del on crash
1818                  */
1819                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
1820                 if (disk_bytenr == 0)
1821                         continue;
1822                 ret = btrfs_free_extent(trans, root, disk_bytenr,
1823                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
1824                                 leaf_owner, leaf_generation,
1825                                 key.objectid, key.offset, 0);
1826                 BUG_ON(ret);
1827         }
1828         return 0;
1829 }
1830
1831 static void noinline reada_walk_down(struct btrfs_root *root,
1832                                      struct extent_buffer *node,
1833                                      int slot)
1834 {
1835         u64 bytenr;
1836         u64 last = 0;
1837         u32 nritems;
1838         u32 refs;
1839         u32 blocksize;
1840         int ret;
1841         int i;
1842         int level;
1843         int skipped = 0;
1844
1845         nritems = btrfs_header_nritems(node);
1846         level = btrfs_header_level(node);
1847         if (level)
1848                 return;
1849
1850         for (i = slot; i < nritems && skipped < 32; i++) {
1851                 bytenr = btrfs_node_blockptr(node, i);
1852                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
1853                              (last > bytenr && last - bytenr > 32 * 1024))) {
1854                         skipped++;
1855                         continue;
1856                 }
1857                 blocksize = btrfs_level_size(root, level - 1);
1858                 if (i != slot) {
1859                         ret = lookup_extent_ref(NULL, root, bytenr,
1860                                                 blocksize, &refs);
1861                         BUG_ON(ret);
1862                         if (refs != 1) {
1863                                 skipped++;
1864                                 continue;
1865                         }
1866                 }
1867                 mutex_unlock(&root->fs_info->fs_mutex);
1868                 ret = readahead_tree_block(root, bytenr, blocksize);
1869                 last = bytenr + blocksize;
1870                 cond_resched();
1871                 mutex_lock(&root->fs_info->fs_mutex);
1872                 if (ret)
1873                         break;
1874         }
1875 }
1876
1877 /*
1878  * helper function for drop_snapshot, this walks down the tree dropping ref
1879  * counts as it goes.
1880  */
1881 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
1882                                    struct btrfs_root *root,
1883                                    struct btrfs_path *path, int *level)
1884 {
1885         u64 root_owner;
1886         u64 root_gen;
1887         u64 bytenr;
1888         struct extent_buffer *next;
1889         struct extent_buffer *cur;
1890         struct extent_buffer *parent;
1891         u32 blocksize;
1892         int ret;
1893         u32 refs;
1894
1895         WARN_ON(*level < 0);
1896         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1897         ret = lookup_extent_ref(trans, root,
1898                                 path->nodes[*level]->start,
1899                                 path->nodes[*level]->len, &refs);
1900         BUG_ON(ret);
1901         if (refs > 1)
1902                 goto out;
1903
1904         /*
1905          * walk down to the last node level and free all the leaves
1906          */
1907         while(*level >= 0) {
1908                 WARN_ON(*level < 0);
1909                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1910                 cur = path->nodes[*level];
1911
1912                 if (btrfs_header_level(cur) != *level)
1913                         WARN_ON(1);
1914
1915                 if (path->slots[*level] >=
1916                     btrfs_header_nritems(cur))
1917                         break;
1918                 if (*level == 0) {
1919                         ret = drop_leaf_ref(trans, root, cur);
1920                         BUG_ON(ret);
1921                         break;
1922                 }
1923                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
1924                 blocksize = btrfs_level_size(root, *level - 1);
1925                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
1926                 BUG_ON(ret);
1927                 if (refs != 1) {
1928                         parent = path->nodes[*level];
1929                         root_owner = btrfs_header_owner(parent);
1930                         root_gen = btrfs_header_generation(parent);
1931                         path->slots[*level]++;
1932                         ret = btrfs_free_extent(trans, root, bytenr,
1933                                                 blocksize, root_owner,
1934                                                 root_gen, 0, 0, 1);
1935                         BUG_ON(ret);
1936                         continue;
1937                 }
1938                 next = btrfs_find_tree_block(root, bytenr, blocksize);
1939                 if (!next || !btrfs_buffer_uptodate(next)) {
1940                         free_extent_buffer(next);
1941                         reada_walk_down(root, cur, path->slots[*level]);
1942                         mutex_unlock(&root->fs_info->fs_mutex);
1943                         next = read_tree_block(root, bytenr, blocksize);
1944                         mutex_lock(&root->fs_info->fs_mutex);
1945
1946                         /* we dropped the lock, check one more time */
1947                         ret = lookup_extent_ref(trans, root, bytenr,
1948                                                 blocksize, &refs);
1949                         BUG_ON(ret);
1950                         if (refs != 1) {
1951                                 parent = path->nodes[*level];
1952                                 root_owner = btrfs_header_owner(parent);
1953                                 root_gen = btrfs_header_generation(parent);
1954
1955                                 path->slots[*level]++;
1956                                 free_extent_buffer(next);
1957                                 ret = btrfs_free_extent(trans, root, bytenr,
1958                                                         blocksize,
1959                                                         root_owner,
1960                                                         root_gen, 0, 0, 1);
1961                                 BUG_ON(ret);
1962                                 continue;
1963                         }
1964                 }
1965                 WARN_ON(*level <= 0);
1966                 if (path->nodes[*level-1])
1967                         free_extent_buffer(path->nodes[*level-1]);
1968                 path->nodes[*level-1] = next;
1969                 *level = btrfs_header_level(next);
1970                 path->slots[*level] = 0;
1971         }
1972 out:
1973         WARN_ON(*level < 0);
1974         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1975
1976         if (path->nodes[*level] == root->node) {
1977                 root_owner = root->root_key.objectid;
1978                 parent = path->nodes[*level];
1979         } else {
1980                 parent = path->nodes[*level + 1];
1981                 root_owner = btrfs_header_owner(parent);
1982         }
1983
1984         root_gen = btrfs_header_generation(parent);
1985         ret = btrfs_free_extent(trans, root, path->nodes[*level]->start,
1986                                 path->nodes[*level]->len,
1987                                 root_owner, root_gen, 0, 0, 1);
1988         free_extent_buffer(path->nodes[*level]);
1989         path->nodes[*level] = NULL;
1990         *level += 1;
1991         BUG_ON(ret);
1992         return 0;
1993 }
1994
1995 /*
1996  * helper for dropping snapshots.  This walks back up the tree in the path
1997  * to find the first node higher up where we haven't yet gone through
1998  * all the slots
1999  */
2000 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2001                                  struct btrfs_root *root,
2002                                  struct btrfs_path *path, int *level)
2003 {
2004         u64 root_owner;
2005         u64 root_gen;
2006         struct btrfs_root_item *root_item = &root->root_item;
2007         int i;
2008         int slot;
2009         int ret;
2010
2011         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2012                 slot = path->slots[i];
2013                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2014                         struct extent_buffer *node;
2015                         struct btrfs_disk_key disk_key;
2016                         node = path->nodes[i];
2017                         path->slots[i]++;
2018                         *level = i;
2019                         WARN_ON(*level == 0);
2020                         btrfs_node_key(node, &disk_key, path->slots[i]);
2021                         memcpy(&root_item->drop_progress,
2022                                &disk_key, sizeof(disk_key));
2023                         root_item->drop_level = i;
2024                         return 0;
2025                 } else {
2026                         if (path->nodes[*level] == root->node) {
2027                                 root_owner = root->root_key.objectid;
2028                                 root_gen =
2029                                    btrfs_header_generation(path->nodes[*level]);
2030                         } else {
2031                                 struct extent_buffer *node;
2032                                 node = path->nodes[*level + 1];
2033                                 root_owner = btrfs_header_owner(node);
2034                                 root_gen = btrfs_header_generation(node);
2035                         }
2036                         ret = btrfs_free_extent(trans, root,
2037                                                 path->nodes[*level]->start,
2038                                                 path->nodes[*level]->len,
2039                                                 root_owner, root_gen, 0, 0, 1);
2040                         BUG_ON(ret);
2041                         free_extent_buffer(path->nodes[*level]);
2042                         path->nodes[*level] = NULL;
2043                         *level = i + 1;
2044                 }
2045         }
2046         return 1;
2047 }
2048
2049 /*
2050  * drop the reference count on the tree rooted at 'snap'.  This traverses
2051  * the tree freeing any blocks that have a ref count of zero after being
2052  * decremented.
2053  */
2054 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2055                         *root)
2056 {
2057         int ret = 0;
2058         int wret;
2059         int level;
2060         struct btrfs_path *path;
2061         int i;
2062         int orig_level;
2063         struct btrfs_root_item *root_item = &root->root_item;
2064
2065         path = btrfs_alloc_path();
2066         BUG_ON(!path);
2067
2068         level = btrfs_header_level(root->node);
2069         orig_level = level;
2070         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2071                 path->nodes[level] = root->node;
2072                 extent_buffer_get(root->node);
2073                 path->slots[level] = 0;
2074         } else {
2075                 struct btrfs_key key;
2076                 struct btrfs_disk_key found_key;
2077                 struct extent_buffer *node;
2078
2079                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2080                 level = root_item->drop_level;
2081                 path->lowest_level = level;
2082                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2083                 if (wret < 0) {
2084                         ret = wret;
2085                         goto out;
2086                 }
2087                 node = path->nodes[level];
2088                 btrfs_node_key(node, &found_key, path->slots[level]);
2089                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2090                                sizeof(found_key)));
2091         }
2092         while(1) {
2093                 wret = walk_down_tree(trans, root, path, &level);
2094                 if (wret > 0)
2095                         break;
2096                 if (wret < 0)
2097                         ret = wret;
2098
2099                 wret = walk_up_tree(trans, root, path, &level);
2100                 if (wret > 0)
2101                         break;
2102                 if (wret < 0)
2103                         ret = wret;
2104                 ret = -EAGAIN;
2105                 break;
2106         }
2107         for (i = 0; i <= orig_level; i++) {
2108                 if (path->nodes[i]) {
2109                         free_extent_buffer(path->nodes[i]);
2110                         path->nodes[i] = NULL;
2111                 }
2112         }
2113 out:
2114         btrfs_free_path(path);
2115         return ret;
2116 }
2117
2118 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2119 {
2120         u64 start;
2121         u64 end;
2122         u64 ptr;
2123         int ret;
2124         while(1) {
2125                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2126                                             &start, &end, (unsigned int)-1);
2127                 if (ret)
2128                         break;
2129                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2130                 if (!ret)
2131                         kfree((void *)(unsigned long)ptr);
2132                 clear_extent_bits(&info->block_group_cache, start,
2133                                   end, (unsigned int)-1, GFP_NOFS);
2134         }
2135         while(1) {
2136                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2137                                             &start, &end, EXTENT_DIRTY);
2138                 if (ret)
2139                         break;
2140                 clear_extent_dirty(&info->free_space_cache, start,
2141                                    end, GFP_NOFS);
2142         }
2143         return 0;
2144 }
2145
2146 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2147                                          u64 len)
2148 {
2149         u64 page_start;
2150         u64 page_end;
2151         u64 delalloc_start;
2152         u64 existing_delalloc;
2153         unsigned long last_index;
2154         unsigned long i;
2155         struct page *page;
2156         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2157         struct file_ra_state *ra;
2158
2159         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2160
2161         mutex_lock(&inode->i_mutex);
2162         i = start >> PAGE_CACHE_SHIFT;
2163         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2164
2165         file_ra_state_init(ra, inode->i_mapping);
2166         btrfs_force_ra(inode->i_mapping, ra, NULL, i, last_index);
2167         kfree(ra);
2168
2169         for (; i <= last_index; i++) {
2170                 page = grab_cache_page(inode->i_mapping, i);
2171                 if (!page)
2172                         goto out_unlock;
2173                 if (!PageUptodate(page)) {
2174                         btrfs_readpage(NULL, page);
2175                         lock_page(page);
2176                         if (!PageUptodate(page)) {
2177                                 unlock_page(page);
2178                                 page_cache_release(page);
2179                                 goto out_unlock;
2180                         }
2181                 }
2182                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2183                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2184
2185                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2186
2187                 delalloc_start = page_start;
2188                 existing_delalloc = count_range_bits(io_tree,
2189                                              &delalloc_start, page_end,
2190                                              PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2191
2192                 set_extent_delalloc(io_tree, page_start,
2193                                     page_end, GFP_NOFS);
2194
2195                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2196                 set_page_dirty(page);
2197                 unlock_page(page);
2198                 page_cache_release(page);
2199         }
2200
2201 out_unlock:
2202         mutex_unlock(&inode->i_mutex);
2203         return 0;
2204 }
2205
2206 /*
2207  * note, this releases the path
2208  */
2209 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2210                                   struct btrfs_path *path,
2211                                   struct btrfs_key *extent_key)
2212 {
2213         struct inode *inode;
2214         struct btrfs_root *found_root;
2215         struct btrfs_key *root_location;
2216         struct btrfs_extent_ref *ref;
2217         u64 ref_root;
2218         u64 ref_gen;
2219         u64 ref_objectid;
2220         u64 ref_offset;
2221         int ret;
2222
2223         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2224                              struct btrfs_extent_ref);
2225         ref_root = btrfs_ref_root(path->nodes[0], ref);
2226         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2227         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2228         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2229         btrfs_release_path(extent_root, path);
2230
2231         root_location = kmalloc(sizeof(*root_location), GFP_NOFS);
2232         root_location->objectid = ref_root;
2233         if (ref_gen == 0)
2234                 root_location->offset = 0;
2235         else
2236                 root_location->offset = (u64)-1;
2237         root_location->type = BTRFS_ROOT_ITEM_KEY;
2238
2239         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2240                                                 root_location);
2241         BUG_ON(!found_root);
2242         kfree(root_location);
2243
2244         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2245                 mutex_unlock(&extent_root->fs_info->fs_mutex);
2246                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2247                                           ref_objectid, found_root);
2248                 if (inode->i_state & I_NEW) {
2249                         /* the inode and parent dir are two different roots */
2250                         BTRFS_I(inode)->root = found_root;
2251                         BTRFS_I(inode)->location.objectid = ref_objectid;
2252                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2253                         BTRFS_I(inode)->location.offset = 0;
2254                         btrfs_read_locked_inode(inode);
2255                         unlock_new_inode(inode);
2256
2257                 }
2258                 /* this can happen if the reference is not against
2259                  * the latest version of the tree root
2260                  */
2261                 if (is_bad_inode(inode)) {
2262                         mutex_lock(&extent_root->fs_info->fs_mutex);
2263                         goto out;
2264                 }
2265                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2266                 /* FIXME, data=ordered will help get rid of this */
2267                 filemap_fdatawrite(inode->i_mapping);
2268                 iput(inode);
2269                 mutex_lock(&extent_root->fs_info->fs_mutex);
2270         } else {
2271                 struct btrfs_trans_handle *trans;
2272                 struct btrfs_key found_key;
2273                 struct extent_buffer *eb;
2274                 int level;
2275                 int i;
2276
2277                 trans = btrfs_start_transaction(found_root, 1);
2278                 eb = read_tree_block(found_root, extent_key->objectid,
2279                                      extent_key->offset);
2280                 level = btrfs_header_level(eb);
2281
2282                 if (level == 0)
2283                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2284                 else
2285                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2286
2287                 free_extent_buffer(eb);
2288
2289                 path->lowest_level = level;
2290                 path->reada = 2;
2291                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2292                                         0, 1);
2293                 path->lowest_level = 0;
2294                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2295                         if (!path->nodes[i])
2296                                 break;
2297                         free_extent_buffer(path->nodes[i]);
2298                         path->nodes[i] = NULL;
2299                 }
2300                 btrfs_release_path(found_root, path);
2301                 btrfs_end_transaction(trans, found_root);
2302         }
2303
2304 out:
2305         return 0;
2306 }
2307
2308 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2309                                         struct btrfs_path *path,
2310                                         struct btrfs_key *extent_key)
2311 {
2312         struct btrfs_key key;
2313         struct btrfs_key found_key;
2314         struct extent_buffer *leaf;
2315         u32 nritems;
2316         u32 item_size;
2317         int ret = 0;
2318
2319         key.objectid = extent_key->objectid;
2320         key.type = BTRFS_EXTENT_REF_KEY;
2321         key.offset = 0;
2322
2323         while(1) {
2324                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2325
2326                 if (ret < 0)
2327                         goto out;
2328
2329                 ret = 0;
2330                 leaf = path->nodes[0];
2331                 nritems = btrfs_header_nritems(leaf);
2332                 if (path->slots[0] == nritems)
2333                         goto out;
2334
2335                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2336                 if (found_key.objectid != extent_key->objectid)
2337                         break;
2338
2339                 if (found_key.type != BTRFS_EXTENT_REF_KEY)
2340                         break;
2341
2342                 key.offset = found_key.offset + 1;
2343                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2344
2345                 ret = relocate_one_reference(extent_root, path, extent_key);
2346                 if (ret)
2347                         goto out;
2348         }
2349         ret = 0;
2350 out:
2351         btrfs_release_path(extent_root, path);
2352         return ret;
2353 }
2354
2355 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size)
2356 {
2357         struct btrfs_trans_handle *trans;
2358         struct btrfs_root *tree_root = root->fs_info->tree_root;
2359         struct btrfs_path *path;
2360         u64 cur_byte;
2361         u64 total_found;
2362         struct btrfs_fs_info *info = root->fs_info;
2363         struct extent_io_tree *block_group_cache;
2364         struct btrfs_key key;
2365         struct btrfs_key found_key;
2366         struct extent_buffer *leaf;
2367         u32 nritems;
2368         int ret;
2369         int progress = 0;
2370
2371         btrfs_set_super_total_bytes(&info->super_copy, new_size);
2372         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2373                            GFP_NOFS);
2374         block_group_cache = &info->block_group_cache;
2375         path = btrfs_alloc_path();
2376         root = root->fs_info->extent_root;
2377         path->reada = 2;
2378
2379 again:
2380         total_found = 0;
2381         key.objectid = new_size;
2382         key.offset = 0;
2383         key.type = 0;
2384         cur_byte = key.objectid;
2385
2386         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2387         if (ret < 0)
2388                 goto out;
2389
2390         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
2391         if (ret < 0)
2392                 goto out;
2393         if (ret == 0) {
2394                 leaf = path->nodes[0];
2395                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2396                 if (found_key.objectid + found_key.offset > new_size) {
2397                         cur_byte = found_key.objectid;
2398                         key.objectid = cur_byte;
2399                 }
2400         }
2401         btrfs_release_path(root, path);
2402
2403         while(1) {
2404                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2405                 if (ret < 0)
2406                         goto out;
2407
2408                 leaf = path->nodes[0];
2409                 nritems = btrfs_header_nritems(leaf);
2410 next:
2411                 if (path->slots[0] >= nritems) {
2412                         ret = btrfs_next_leaf(root, path);
2413                         if (ret < 0)
2414                                 goto out;
2415                         if (ret == 1) {
2416                                 ret = 0;
2417                                 break;
2418                         }
2419                         leaf = path->nodes[0];
2420                         nritems = btrfs_header_nritems(leaf);
2421                 }
2422
2423                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2424
2425                 if (progress && need_resched()) {
2426                         memcpy(&key, &found_key, sizeof(key));
2427                         mutex_unlock(&root->fs_info->fs_mutex);
2428                         cond_resched();
2429                         mutex_lock(&root->fs_info->fs_mutex);
2430                         btrfs_release_path(root, path);
2431                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
2432                         progress = 0;
2433                         goto next;
2434                 }
2435                 progress = 1;
2436
2437                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
2438                     found_key.objectid + found_key.offset <= cur_byte) {
2439                         path->slots[0]++;
2440                         goto next;
2441                 }
2442
2443                 total_found++;
2444                 cur_byte = found_key.objectid + found_key.offset;
2445                 key.objectid = cur_byte;
2446                 btrfs_release_path(root, path);
2447                 ret = relocate_one_extent(root, path, &found_key);
2448         }
2449
2450         btrfs_release_path(root, path);
2451
2452         if (total_found > 0) {
2453                 trans = btrfs_start_transaction(tree_root, 1);
2454                 btrfs_commit_transaction(trans, tree_root);
2455
2456                 mutex_unlock(&root->fs_info->fs_mutex);
2457                 btrfs_clean_old_snapshots(tree_root);
2458                 mutex_lock(&root->fs_info->fs_mutex);
2459
2460                 trans = btrfs_start_transaction(tree_root, 1);
2461                 btrfs_commit_transaction(trans, tree_root);
2462                 goto again;
2463         }
2464
2465         trans = btrfs_start_transaction(root, 1);
2466         key.objectid = new_size;
2467         key.offset = 0;
2468         key.type = 0;
2469         while(1) {
2470                 u64 ptr;
2471
2472                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2473                 if (ret < 0)
2474                         goto out;
2475
2476                 leaf = path->nodes[0];
2477                 nritems = btrfs_header_nritems(leaf);
2478 bg_next:
2479                 if (path->slots[0] >= nritems) {
2480                         ret = btrfs_next_leaf(root, path);
2481                         if (ret < 0)
2482                                 break;
2483                         if (ret == 1) {
2484                                 ret = 0;
2485                                 break;
2486                         }
2487                         leaf = path->nodes[0];
2488                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2489
2490                         /*
2491                          * btrfs_next_leaf doesn't cow buffers, we have to
2492                          * do the search again
2493                          */
2494                         memcpy(&key, &found_key, sizeof(key));
2495                         btrfs_release_path(root, path);
2496                         goto resched_check;
2497                 }
2498
2499                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2500                 if (btrfs_key_type(&found_key) != BTRFS_BLOCK_GROUP_ITEM_KEY) {
2501                         printk("shrinker found key %Lu %u %Lu\n",
2502                                 found_key.objectid, found_key.type,
2503                                 found_key.offset);
2504                         path->slots[0]++;
2505                         goto bg_next;
2506                 }
2507                 ret = get_state_private(&info->block_group_cache,
2508                                         found_key.objectid, &ptr);
2509                 if (!ret)
2510                         kfree((void *)(unsigned long)ptr);
2511
2512                 clear_extent_bits(&info->block_group_cache, found_key.objectid,
2513                                   found_key.objectid + found_key.offset - 1,
2514                                   (unsigned int)-1, GFP_NOFS);
2515
2516                 key.objectid = found_key.objectid + 1;
2517                 btrfs_del_item(trans, root, path);
2518                 btrfs_release_path(root, path);
2519 resched_check:
2520                 if (need_resched()) {
2521                         mutex_unlock(&root->fs_info->fs_mutex);
2522                         cond_resched();
2523                         mutex_lock(&root->fs_info->fs_mutex);
2524                 }
2525         }
2526         clear_extent_dirty(&info->free_space_cache, new_size, (u64)-1,
2527                            GFP_NOFS);
2528         btrfs_commit_transaction(trans, root);
2529 out:
2530         btrfs_free_path(path);
2531         return ret;
2532 }
2533
2534 int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
2535                            struct btrfs_root *root, u64 new_size)
2536 {
2537         btrfs_set_super_total_bytes(&root->fs_info->super_copy, new_size);
2538         return 0;
2539 }
2540
2541 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
2542                            struct btrfs_key *key)
2543 {
2544         int ret;
2545         struct btrfs_key found_key;
2546         struct extent_buffer *leaf;
2547         int slot;
2548
2549         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
2550         if (ret < 0)
2551                 return ret;
2552         while(1) {
2553                 slot = path->slots[0];
2554                 leaf = path->nodes[0];
2555                 if (slot >= btrfs_header_nritems(leaf)) {
2556                         ret = btrfs_next_leaf(root, path);
2557                         if (ret == 0)
2558                                 continue;
2559                         if (ret < 0)
2560                                 goto error;
2561                         break;
2562                 }
2563                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2564
2565                 if (found_key.objectid >= key->objectid &&
2566                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY)
2567                         return 0;
2568                 path->slots[0]++;
2569         }
2570         ret = -ENOENT;
2571 error:
2572         return ret;
2573 }
2574
2575 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2576                              u64 total_bytes, u64 bytes_used,
2577                              struct btrfs_space_info **space_info)
2578 {
2579         struct btrfs_space_info *found;
2580
2581         found = __find_space_info(info, flags);
2582         if (found) {
2583                 found->total_bytes += total_bytes;
2584                 found->bytes_used += bytes_used;
2585                 WARN_ON(found->total_bytes < found->bytes_used);
2586                 *space_info = found;
2587                 return 0;
2588         }
2589         found = kmalloc(sizeof(*found), GFP_NOFS);
2590         if (!found)
2591                 return -ENOMEM;
2592
2593         list_add(&found->list, &info->space_info);
2594         found->flags = flags;
2595         found->total_bytes = total_bytes;
2596         found->bytes_used = bytes_used;
2597         found->bytes_pinned = 0;
2598         found->full = 0;
2599         *space_info = found;
2600         return 0;
2601 }
2602
2603 int btrfs_read_block_groups(struct btrfs_root *root)
2604 {
2605         struct btrfs_path *path;
2606         int ret;
2607         int bit;
2608         struct btrfs_block_group_cache *cache;
2609         struct btrfs_fs_info *info = root->fs_info;
2610         struct btrfs_space_info *space_info;
2611         struct extent_io_tree *block_group_cache;
2612         struct btrfs_key key;
2613         struct btrfs_key found_key;
2614         struct extent_buffer *leaf;
2615
2616         block_group_cache = &info->block_group_cache;
2617         root = info->extent_root;
2618         key.objectid = 0;
2619         key.offset = 0;
2620         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2621         path = btrfs_alloc_path();
2622         if (!path)
2623                 return -ENOMEM;
2624
2625         while(1) {
2626                 ret = find_first_block_group(root, path, &key);
2627                 if (ret > 0) {
2628                         ret = 0;
2629                         goto error;
2630                 }
2631                 if (ret != 0)
2632                         goto error;
2633
2634                 leaf = path->nodes[0];
2635                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2636                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
2637                 if (!cache) {
2638                         ret = -ENOMEM;
2639                         break;
2640                 }
2641
2642                 read_extent_buffer(leaf, &cache->item,
2643                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
2644                                    sizeof(cache->item));
2645                 memcpy(&cache->key, &found_key, sizeof(found_key));
2646                 cache->cached = 0;
2647                 cache->pinned = 0;
2648
2649                 key.objectid = found_key.objectid + found_key.offset;
2650                 btrfs_release_path(root, path);
2651                 cache->flags = btrfs_block_group_flags(&cache->item);
2652                 bit = 0;
2653                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
2654                         bit = BLOCK_GROUP_DATA;
2655                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
2656                         bit = BLOCK_GROUP_SYSTEM;
2657                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
2658                         bit = BLOCK_GROUP_METADATA;
2659                 }
2660
2661                 ret = update_space_info(info, cache->flags, found_key.offset,
2662                                         btrfs_block_group_used(&cache->item),
2663                                         &space_info);
2664                 BUG_ON(ret);
2665                 cache->space_info = space_info;
2666
2667                 /* use EXTENT_LOCKED to prevent merging */
2668                 set_extent_bits(block_group_cache, found_key.objectid,
2669                                 found_key.objectid + found_key.offset - 1,
2670                                 bit | EXTENT_LOCKED, GFP_NOFS);
2671                 set_state_private(block_group_cache, found_key.objectid,
2672                                   (unsigned long)cache);
2673
2674                 if (key.objectid >=
2675                     btrfs_super_total_bytes(&info->super_copy))
2676                         break;
2677         }
2678         ret = 0;
2679 error:
2680         btrfs_free_path(path);
2681         return ret;
2682 }
2683
2684 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2685                            struct btrfs_root *root, u64 bytes_used,
2686                            u64 type, u64 chunk_tree, u64 chunk_objectid,
2687                            u64 size)
2688 {
2689         int ret;
2690         int bit = 0;
2691         struct btrfs_root *extent_root;
2692         struct btrfs_block_group_cache *cache;
2693         struct extent_io_tree *block_group_cache;
2694
2695         extent_root = root->fs_info->extent_root;
2696         block_group_cache = &root->fs_info->block_group_cache;
2697
2698         cache = kmalloc(sizeof(*cache), GFP_NOFS);
2699         BUG_ON(!cache);
2700         cache->key.objectid = chunk_objectid;
2701         cache->key.offset = size;
2702         cache->cached = 0;
2703         cache->pinned = 0;
2704         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
2705         memset(&cache->item, 0, sizeof(cache->item));
2706         btrfs_set_block_group_used(&cache->item, bytes_used);
2707         btrfs_set_block_group_chunk_tree(&cache->item, chunk_tree);
2708         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
2709         cache->flags = type;
2710         btrfs_set_block_group_flags(&cache->item, type);
2711
2712         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
2713                                 &cache->space_info);
2714         BUG_ON(ret);
2715
2716         if (type & BTRFS_BLOCK_GROUP_DATA) {
2717                 bit = BLOCK_GROUP_DATA;
2718         } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
2719                 bit = BLOCK_GROUP_SYSTEM;
2720         } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
2721                 bit = BLOCK_GROUP_METADATA;
2722         }
2723         set_extent_bits(block_group_cache, chunk_objectid,
2724                         chunk_objectid + size - 1,
2725                         bit | EXTENT_LOCKED, GFP_NOFS);
2726         set_state_private(block_group_cache, chunk_objectid,
2727                           (unsigned long)cache);
2728
2729         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
2730                                 sizeof(cache->item));
2731         BUG_ON(ret);
2732
2733         finish_current_insert(trans, extent_root);
2734         ret = del_pending_extents(trans, extent_root);
2735         BUG_ON(ret);
2736         return 0;
2737 }