Btrfs: Add locking around volume management (device add/remove/balance)
[pandora-kernel.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include "hash.h"
22 #include "crc32c.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28 #include "locking.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 static struct btrfs_block_group_cache *
41 __btrfs_find_block_group(struct btrfs_root *root,
42                          struct btrfs_block_group_cache *hint,
43                          u64 search_start, int data, int owner);
44
45 void maybe_lock_mutex(struct btrfs_root *root)
46 {
47         if (root != root->fs_info->extent_root &&
48             root != root->fs_info->chunk_root &&
49             root != root->fs_info->dev_root) {
50                 mutex_lock(&root->fs_info->alloc_mutex);
51         }
52 }
53
54 void maybe_unlock_mutex(struct btrfs_root *root)
55 {
56         if (root != root->fs_info->extent_root &&
57             root != root->fs_info->chunk_root &&
58             root != root->fs_info->dev_root) {
59                 mutex_unlock(&root->fs_info->alloc_mutex);
60         }
61 }
62
63 static int cache_block_group(struct btrfs_root *root,
64                              struct btrfs_block_group_cache *block_group)
65 {
66         struct btrfs_path *path;
67         int ret;
68         struct btrfs_key key;
69         struct extent_buffer *leaf;
70         struct extent_io_tree *free_space_cache;
71         int slot;
72         u64 last = 0;
73         u64 hole_size;
74         u64 first_free;
75         int found = 0;
76
77         if (!block_group)
78                 return 0;
79
80         root = root->fs_info->extent_root;
81         free_space_cache = &root->fs_info->free_space_cache;
82
83         if (block_group->cached)
84                 return 0;
85
86         path = btrfs_alloc_path();
87         if (!path)
88                 return -ENOMEM;
89
90         path->reada = 2;
91         /*
92          * we get into deadlocks with paths held by callers of this function.
93          * since the alloc_mutex is protecting things right now, just
94          * skip the locking here
95          */
96         path->skip_locking = 1;
97         first_free = block_group->key.objectid;
98         key.objectid = block_group->key.objectid;
99         key.offset = 0;
100         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
101         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
102         if (ret < 0)
103                 return ret;
104         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
105         if (ret < 0)
106                 return ret;
107         if (ret == 0) {
108                 leaf = path->nodes[0];
109                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
110                 if (key.objectid + key.offset > first_free)
111                         first_free = key.objectid + key.offset;
112         }
113         while(1) {
114                 leaf = path->nodes[0];
115                 slot = path->slots[0];
116                 if (slot >= btrfs_header_nritems(leaf)) {
117                         ret = btrfs_next_leaf(root, path);
118                         if (ret < 0)
119                                 goto err;
120                         if (ret == 0) {
121                                 continue;
122                         } else {
123                                 break;
124                         }
125                 }
126                 btrfs_item_key_to_cpu(leaf, &key, slot);
127                 if (key.objectid < block_group->key.objectid) {
128                         goto next;
129                 }
130                 if (key.objectid >= block_group->key.objectid +
131                     block_group->key.offset) {
132                         break;
133                 }
134
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = first_free;
138                                 found = 1;
139                         }
140                         if (key.objectid > last) {
141                                 hole_size = key.objectid - last;
142                                 set_extent_dirty(free_space_cache, last,
143                                                  last + hole_size - 1,
144                                                  GFP_NOFS);
145                         }
146                         last = key.objectid + key.offset;
147                 }
148 next:
149                 path->slots[0]++;
150         }
151
152         if (!found)
153                 last = first_free;
154         if (block_group->key.objectid +
155             block_group->key.offset > last) {
156                 hole_size = block_group->key.objectid +
157                         block_group->key.offset - last;
158                 set_extent_dirty(free_space_cache, last,
159                                  last + hole_size - 1, GFP_NOFS);
160         }
161         block_group->cached = 1;
162 err:
163         btrfs_free_path(path);
164         return 0;
165 }
166
167 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
168                                                        btrfs_fs_info *info,
169                                                          u64 bytenr)
170 {
171         struct extent_io_tree *block_group_cache;
172         struct btrfs_block_group_cache *block_group = NULL;
173         u64 ptr;
174         u64 start;
175         u64 end;
176         int ret;
177
178         bytenr = max_t(u64, bytenr,
179                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
180         block_group_cache = &info->block_group_cache;
181         ret = find_first_extent_bit(block_group_cache,
182                                     bytenr, &start, &end,
183                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
184                                     BLOCK_GROUP_SYSTEM);
185         if (ret) {
186                 return NULL;
187         }
188         ret = get_state_private(block_group_cache, start, &ptr);
189         if (ret)
190                 return NULL;
191
192         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
193         return block_group;
194 }
195
196 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
197                                                          btrfs_fs_info *info,
198                                                          u64 bytenr)
199 {
200         struct extent_io_tree *block_group_cache;
201         struct btrfs_block_group_cache *block_group = NULL;
202         u64 ptr;
203         u64 start;
204         u64 end;
205         int ret;
206
207         bytenr = max_t(u64, bytenr,
208                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
209         block_group_cache = &info->block_group_cache;
210         ret = find_first_extent_bit(block_group_cache,
211                                     bytenr, &start, &end,
212                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
213                                     BLOCK_GROUP_SYSTEM);
214         if (ret) {
215                 return NULL;
216         }
217         ret = get_state_private(block_group_cache, start, &ptr);
218         if (ret)
219                 return NULL;
220
221         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
222         if (block_group->key.objectid <= bytenr && bytenr <
223             block_group->key.objectid + block_group->key.offset)
224                 return block_group;
225         return NULL;
226 }
227
228 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
229 {
230         return (cache->flags & bits) == bits;
231 }
232
233 static int noinline find_search_start(struct btrfs_root *root,
234                               struct btrfs_block_group_cache **cache_ret,
235                               u64 *start_ret, u64 num, int data)
236 {
237         int ret;
238         struct btrfs_block_group_cache *cache = *cache_ret;
239         struct extent_io_tree *free_space_cache;
240         struct extent_state *state;
241         u64 last;
242         u64 start = 0;
243         u64 cache_miss = 0;
244         u64 total_fs_bytes;
245         u64 search_start = *start_ret;
246         int wrapped = 0;
247
248         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
249         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
250         free_space_cache = &root->fs_info->free_space_cache;
251
252         if (!cache)
253                 goto out;
254
255 again:
256         ret = cache_block_group(root, cache);
257         if (ret) {
258                 goto out;
259         }
260
261         last = max(search_start, cache->key.objectid);
262         if (!block_group_bits(cache, data) || cache->ro)
263                 goto new_group;
264
265         spin_lock_irq(&free_space_cache->lock);
266         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
267         while(1) {
268                 if (!state) {
269                         if (!cache_miss)
270                                 cache_miss = last;
271                         spin_unlock_irq(&free_space_cache->lock);
272                         goto new_group;
273                 }
274
275                 start = max(last, state->start);
276                 last = state->end + 1;
277                 if (last - start < num) {
278                         do {
279                                 state = extent_state_next(state);
280                         } while(state && !(state->state & EXTENT_DIRTY));
281                         continue;
282                 }
283                 spin_unlock_irq(&free_space_cache->lock);
284                 if (cache->ro) {
285                         goto new_group;
286                 }
287                 if (start + num > cache->key.objectid + cache->key.offset)
288                         goto new_group;
289                 if (!block_group_bits(cache, data)) {
290                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
291                 }
292                 *start_ret = start;
293                 return 0;
294         }
295 out:
296         cache = btrfs_lookup_block_group(root->fs_info, search_start);
297         if (!cache) {
298                 printk("Unable to find block group for %Lu\n", search_start);
299                 WARN_ON(1);
300         }
301         return -ENOSPC;
302
303 new_group:
304         last = cache->key.objectid + cache->key.offset;
305 wrapped:
306         cache = btrfs_lookup_first_block_group(root->fs_info, last);
307         if (!cache || cache->key.objectid >= total_fs_bytes) {
308 no_cache:
309                 if (!wrapped) {
310                         wrapped = 1;
311                         last = search_start;
312                         goto wrapped;
313                 }
314                 goto out;
315         }
316         if (cache_miss && !cache->cached) {
317                 cache_block_group(root, cache);
318                 last = cache_miss;
319                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
320         }
321         cache_miss = 0;
322         cache = __btrfs_find_block_group(root, cache, last, data, 0);
323         if (!cache)
324                 goto no_cache;
325         *cache_ret = cache;
326         goto again;
327 }
328
329 static u64 div_factor(u64 num, int factor)
330 {
331         if (factor == 10)
332                 return num;
333         num *= factor;
334         do_div(num, 10);
335         return num;
336 }
337
338 static int block_group_state_bits(u64 flags)
339 {
340         int bits = 0;
341         if (flags & BTRFS_BLOCK_GROUP_DATA)
342                 bits |= BLOCK_GROUP_DATA;
343         if (flags & BTRFS_BLOCK_GROUP_METADATA)
344                 bits |= BLOCK_GROUP_METADATA;
345         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
346                 bits |= BLOCK_GROUP_SYSTEM;
347         return bits;
348 }
349
350 static struct btrfs_block_group_cache *
351 __btrfs_find_block_group(struct btrfs_root *root,
352                          struct btrfs_block_group_cache *hint,
353                          u64 search_start, int data, int owner)
354 {
355         struct btrfs_block_group_cache *cache;
356         struct extent_io_tree *block_group_cache;
357         struct btrfs_block_group_cache *found_group = NULL;
358         struct btrfs_fs_info *info = root->fs_info;
359         u64 used;
360         u64 last = 0;
361         u64 start;
362         u64 end;
363         u64 free_check;
364         u64 ptr;
365         int bit;
366         int ret;
367         int full_search = 0;
368         int factor = 10;
369         int wrapped = 0;
370
371         block_group_cache = &info->block_group_cache;
372
373         if (data & BTRFS_BLOCK_GROUP_METADATA)
374                 factor = 9;
375
376         bit = block_group_state_bits(data);
377
378         if (search_start) {
379                 struct btrfs_block_group_cache *shint;
380                 shint = btrfs_lookup_first_block_group(info, search_start);
381                 if (shint && block_group_bits(shint, data) && !shint->ro) {
382                         used = btrfs_block_group_used(&shint->item);
383                         if (used + shint->pinned <
384                             div_factor(shint->key.offset, factor)) {
385                                 return shint;
386                         }
387                 }
388         }
389         if (hint && !hint->ro && block_group_bits(hint, data)) {
390                 used = btrfs_block_group_used(&hint->item);
391                 if (used + hint->pinned <
392                     div_factor(hint->key.offset, factor)) {
393                         return hint;
394                 }
395                 last = hint->key.objectid + hint->key.offset;
396         } else {
397                 if (hint)
398                         last = max(hint->key.objectid, search_start);
399                 else
400                         last = search_start;
401         }
402 again:
403         while(1) {
404                 ret = find_first_extent_bit(block_group_cache, last,
405                                             &start, &end, bit);
406                 if (ret)
407                         break;
408
409                 ret = get_state_private(block_group_cache, start, &ptr);
410                 if (ret) {
411                         last = end + 1;
412                         continue;
413                 }
414
415                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
416                 last = cache->key.objectid + cache->key.offset;
417                 used = btrfs_block_group_used(&cache->item);
418
419                 if (!cache->ro && block_group_bits(cache, data)) {
420                         free_check = div_factor(cache->key.offset, factor);
421                         if (used + cache->pinned < free_check) {
422                                 found_group = cache;
423                                 goto found;
424                         }
425                 }
426                 cond_resched();
427         }
428         if (!wrapped) {
429                 last = search_start;
430                 wrapped = 1;
431                 goto again;
432         }
433         if (!full_search && factor < 10) {
434                 last = search_start;
435                 full_search = 1;
436                 factor = 10;
437                 goto again;
438         }
439 found:
440         return found_group;
441 }
442
443 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
444                                                  struct btrfs_block_group_cache
445                                                  *hint, u64 search_start,
446                                                  int data, int owner)
447 {
448
449         struct btrfs_block_group_cache *ret;
450         mutex_lock(&root->fs_info->alloc_mutex);
451         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
452         mutex_unlock(&root->fs_info->alloc_mutex);
453         return ret;
454 }
455 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
456                            u64 owner, u64 owner_offset)
457 {
458         u32 high_crc = ~(u32)0;
459         u32 low_crc = ~(u32)0;
460         __le64 lenum;
461         lenum = cpu_to_le64(root_objectid);
462         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
463         lenum = cpu_to_le64(ref_generation);
464         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
465         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
466                 lenum = cpu_to_le64(owner);
467                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
468                 lenum = cpu_to_le64(owner_offset);
469                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
470         }
471         return ((u64)high_crc << 32) | (u64)low_crc;
472 }
473
474 static int match_extent_ref(struct extent_buffer *leaf,
475                             struct btrfs_extent_ref *disk_ref,
476                             struct btrfs_extent_ref *cpu_ref)
477 {
478         int ret;
479         int len;
480
481         if (cpu_ref->objectid)
482                 len = sizeof(*cpu_ref);
483         else
484                 len = 2 * sizeof(u64);
485         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
486                                    len);
487         return ret == 0;
488 }
489
490 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
491                                           struct btrfs_root *root,
492                                           struct btrfs_path *path, u64 bytenr,
493                                           u64 root_objectid,
494                                           u64 ref_generation, u64 owner,
495                                           u64 owner_offset, int del)
496 {
497         u64 hash;
498         struct btrfs_key key;
499         struct btrfs_key found_key;
500         struct btrfs_extent_ref ref;
501         struct extent_buffer *leaf;
502         struct btrfs_extent_ref *disk_ref;
503         int ret;
504         int ret2;
505
506         btrfs_set_stack_ref_root(&ref, root_objectid);
507         btrfs_set_stack_ref_generation(&ref, ref_generation);
508         btrfs_set_stack_ref_objectid(&ref, owner);
509         btrfs_set_stack_ref_offset(&ref, owner_offset);
510
511         hash = hash_extent_ref(root_objectid, ref_generation, owner,
512                                owner_offset);
513         key.offset = hash;
514         key.objectid = bytenr;
515         key.type = BTRFS_EXTENT_REF_KEY;
516
517         while (1) {
518                 ret = btrfs_search_slot(trans, root, &key, path,
519                                         del ? -1 : 0, del);
520                 if (ret < 0)
521                         goto out;
522                 leaf = path->nodes[0];
523                 if (ret != 0) {
524                         u32 nritems = btrfs_header_nritems(leaf);
525                         if (path->slots[0] >= nritems) {
526                                 ret2 = btrfs_next_leaf(root, path);
527                                 if (ret2)
528                                         goto out;
529                                 leaf = path->nodes[0];
530                         }
531                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
532                         if (found_key.objectid != bytenr ||
533                             found_key.type != BTRFS_EXTENT_REF_KEY)
534                                 goto out;
535                         key.offset = found_key.offset;
536                         if (del) {
537                                 btrfs_release_path(root, path);
538                                 continue;
539                         }
540                 }
541                 disk_ref = btrfs_item_ptr(path->nodes[0],
542                                           path->slots[0],
543                                           struct btrfs_extent_ref);
544                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
545                         ret = 0;
546                         goto out;
547                 }
548                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
549                 key.offset = found_key.offset + 1;
550                 btrfs_release_path(root, path);
551         }
552 out:
553         return ret;
554 }
555
556 /*
557  * Back reference rules.  Back refs have three main goals:
558  *
559  * 1) differentiate between all holders of references to an extent so that
560  *    when a reference is dropped we can make sure it was a valid reference
561  *    before freeing the extent.
562  *
563  * 2) Provide enough information to quickly find the holders of an extent
564  *    if we notice a given block is corrupted or bad.
565  *
566  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
567  *    maintenance.  This is actually the same as #2, but with a slightly
568  *    different use case.
569  *
570  * File extents can be referenced by:
571  *
572  * - multiple snapshots, subvolumes, or different generations in one subvol
573  * - different files inside a single subvolume (in theory, not implemented yet)
574  * - different offsets inside a file (bookend extents in file.c)
575  *
576  * The extent ref structure has fields for:
577  *
578  * - Objectid of the subvolume root
579  * - Generation number of the tree holding the reference
580  * - objectid of the file holding the reference
581  * - offset in the file corresponding to the key holding the reference
582  *
583  * When a file extent is allocated the fields are filled in:
584  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
585  *
586  * When a leaf is cow'd new references are added for every file extent found
587  * in the leaf.  It looks the same as the create case, but trans->transid
588  * will be different when the block is cow'd.
589  *
590  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
591  *
592  * When a file extent is removed either during snapshot deletion or file
593  * truncation, the corresponding back reference is found
594  * by searching for:
595  *
596  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
597  *      inode objectid, offset in file)
598  *
599  * Btree extents can be referenced by:
600  *
601  * - Different subvolumes
602  * - Different generations of the same subvolume
603  *
604  * Storing sufficient information for a full reverse mapping of a btree
605  * block would require storing the lowest key of the block in the backref,
606  * and it would require updating that lowest key either before write out or
607  * every time it changed.  Instead, the objectid of the lowest key is stored
608  * along with the level of the tree block.  This provides a hint
609  * about where in the btree the block can be found.  Searches through the
610  * btree only need to look for a pointer to that block, so they stop one
611  * level higher than the level recorded in the backref.
612  *
613  * Some btrees do not do reference counting on their extents.  These
614  * include the extent tree and the tree of tree roots.  Backrefs for these
615  * trees always have a generation of zero.
616  *
617  * When a tree block is created, back references are inserted:
618  *
619  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
620  *
621  * When a tree block is cow'd in a reference counted root,
622  * new back references are added for all the blocks it points to.
623  * These are of the form (trans->transid will have increased since creation):
624  *
625  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
626  *
627  * Because the lowest_key_objectid and the level are just hints
628  * they are not used when backrefs are deleted.  When a backref is deleted:
629  *
630  * if backref was for a tree root:
631  *     root_objectid = root->root_key.objectid
632  * else
633  *     root_objectid = btrfs_header_owner(parent)
634  *
635  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
636  *
637  * Back Reference Key hashing:
638  *
639  * Back references have four fields, each 64 bits long.  Unfortunately,
640  * This is hashed into a single 64 bit number and placed into the key offset.
641  * The key objectid corresponds to the first byte in the extent, and the
642  * key type is set to BTRFS_EXTENT_REF_KEY
643  */
644 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
645                                  struct btrfs_root *root,
646                                  struct btrfs_path *path, u64 bytenr,
647                                  u64 root_objectid, u64 ref_generation,
648                                  u64 owner, u64 owner_offset)
649 {
650         u64 hash;
651         struct btrfs_key key;
652         struct btrfs_extent_ref ref;
653         struct btrfs_extent_ref *disk_ref;
654         int ret;
655
656         btrfs_set_stack_ref_root(&ref, root_objectid);
657         btrfs_set_stack_ref_generation(&ref, ref_generation);
658         btrfs_set_stack_ref_objectid(&ref, owner);
659         btrfs_set_stack_ref_offset(&ref, owner_offset);
660
661         hash = hash_extent_ref(root_objectid, ref_generation, owner,
662                                owner_offset);
663         key.offset = hash;
664         key.objectid = bytenr;
665         key.type = BTRFS_EXTENT_REF_KEY;
666
667         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
668         while (ret == -EEXIST) {
669                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
670                                           struct btrfs_extent_ref);
671                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
672                         goto out;
673                 key.offset++;
674                 btrfs_release_path(root, path);
675                 ret = btrfs_insert_empty_item(trans, root, path, &key,
676                                               sizeof(ref));
677         }
678         if (ret)
679                 goto out;
680         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
681                                   struct btrfs_extent_ref);
682         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
683                             sizeof(ref));
684         btrfs_mark_buffer_dirty(path->nodes[0]);
685 out:
686         btrfs_release_path(root, path);
687         return ret;
688 }
689
690 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
691                                 struct btrfs_root *root,
692                                 u64 bytenr, u64 num_bytes,
693                                 u64 root_objectid, u64 ref_generation,
694                                 u64 owner, u64 owner_offset)
695 {
696         struct btrfs_path *path;
697         int ret;
698         struct btrfs_key key;
699         struct extent_buffer *l;
700         struct btrfs_extent_item *item;
701         u32 refs;
702
703         WARN_ON(num_bytes < root->sectorsize);
704         path = btrfs_alloc_path();
705         if (!path)
706                 return -ENOMEM;
707
708         path->reada = 1;
709         key.objectid = bytenr;
710         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
711         key.offset = num_bytes;
712         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
713                                 0, 1);
714         if (ret < 0)
715                 return ret;
716         if (ret != 0) {
717                 BUG();
718         }
719         BUG_ON(ret != 0);
720         l = path->nodes[0];
721         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
722         refs = btrfs_extent_refs(l, item);
723         btrfs_set_extent_refs(l, item, refs + 1);
724         btrfs_mark_buffer_dirty(path->nodes[0]);
725
726         btrfs_release_path(root->fs_info->extent_root, path);
727
728         path->reada = 1;
729         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
730                                           path, bytenr, root_objectid,
731                                           ref_generation, owner, owner_offset);
732         BUG_ON(ret);
733         finish_current_insert(trans, root->fs_info->extent_root);
734         del_pending_extents(trans, root->fs_info->extent_root);
735
736         btrfs_free_path(path);
737         return 0;
738 }
739
740 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
741                                 struct btrfs_root *root,
742                                 u64 bytenr, u64 num_bytes,
743                                 u64 root_objectid, u64 ref_generation,
744                                 u64 owner, u64 owner_offset)
745 {
746         int ret;
747
748         mutex_lock(&root->fs_info->alloc_mutex);
749         ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
750                                      root_objectid, ref_generation,
751                                      owner, owner_offset);
752         mutex_unlock(&root->fs_info->alloc_mutex);
753         return ret;
754 }
755
756 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
757                          struct btrfs_root *root)
758 {
759         finish_current_insert(trans, root->fs_info->extent_root);
760         del_pending_extents(trans, root->fs_info->extent_root);
761         return 0;
762 }
763
764 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
765                              struct btrfs_root *root, u64 bytenr,
766                              u64 num_bytes, u32 *refs)
767 {
768         struct btrfs_path *path;
769         int ret;
770         struct btrfs_key key;
771         struct extent_buffer *l;
772         struct btrfs_extent_item *item;
773
774         WARN_ON(num_bytes < root->sectorsize);
775         path = btrfs_alloc_path();
776         path->reada = 1;
777         key.objectid = bytenr;
778         key.offset = num_bytes;
779         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
780         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
781                                 0, 0);
782         if (ret < 0)
783                 goto out;
784         if (ret != 0) {
785                 btrfs_print_leaf(root, path->nodes[0]);
786                 printk("failed to find block number %Lu\n", bytenr);
787                 BUG();
788         }
789         l = path->nodes[0];
790         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791         *refs = btrfs_extent_refs(l, item);
792 out:
793         btrfs_free_path(path);
794         return 0;
795 }
796
797 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
798                                   struct btrfs_path *count_path,
799                                   u64 expected_owner,
800                                   u64 first_extent)
801 {
802         struct btrfs_root *extent_root = root->fs_info->extent_root;
803         struct btrfs_path *path;
804         u64 bytenr;
805         u64 found_objectid;
806         u64 found_owner;
807         u64 root_objectid = root->root_key.objectid;
808         u32 total_count = 0;
809         u32 extent_refs;
810         u32 cur_count;
811         u32 nritems;
812         int ret;
813         struct btrfs_key key;
814         struct btrfs_key found_key;
815         struct extent_buffer *l;
816         struct btrfs_extent_item *item;
817         struct btrfs_extent_ref *ref_item;
818         int level = -1;
819
820         /* FIXME, needs locking */
821         BUG();
822
823         mutex_lock(&root->fs_info->alloc_mutex);
824         path = btrfs_alloc_path();
825 again:
826         if (level == -1)
827                 bytenr = first_extent;
828         else
829                 bytenr = count_path->nodes[level]->start;
830
831         cur_count = 0;
832         key.objectid = bytenr;
833         key.offset = 0;
834
835         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
836         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
837         if (ret < 0)
838                 goto out;
839         BUG_ON(ret == 0);
840
841         l = path->nodes[0];
842         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
843
844         if (found_key.objectid != bytenr ||
845             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
846                 goto out;
847         }
848
849         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
850         extent_refs = btrfs_extent_refs(l, item);
851         while (1) {
852                 l = path->nodes[0];
853                 nritems = btrfs_header_nritems(l);
854                 if (path->slots[0] >= nritems) {
855                         ret = btrfs_next_leaf(extent_root, path);
856                         if (ret == 0)
857                                 continue;
858                         break;
859                 }
860                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
861                 if (found_key.objectid != bytenr)
862                         break;
863
864                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
865                         path->slots[0]++;
866                         continue;
867                 }
868
869                 cur_count++;
870                 ref_item = btrfs_item_ptr(l, path->slots[0],
871                                           struct btrfs_extent_ref);
872                 found_objectid = btrfs_ref_root(l, ref_item);
873
874                 if (found_objectid != root_objectid) {
875                         total_count = 2;
876                         goto out;
877                 }
878                 if (level == -1) {
879                         found_owner = btrfs_ref_objectid(l, ref_item);
880                         if (found_owner != expected_owner) {
881                                 total_count = 2;
882                                 goto out;
883                         }
884                         /*
885                          * nasty.  we don't count a reference held by
886                          * the running transaction.  This allows nodatacow
887                          * to avoid cow most of the time
888                          */
889                         if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
890                             btrfs_ref_generation(l, ref_item) ==
891                             root->fs_info->generation) {
892                                 extent_refs--;
893                         }
894                 }
895                 total_count = 1;
896                 path->slots[0]++;
897         }
898         /*
899          * if there is more than one reference against a data extent,
900          * we have to assume the other ref is another snapshot
901          */
902         if (level == -1 && extent_refs > 1) {
903                 total_count = 2;
904                 goto out;
905         }
906         if (cur_count == 0) {
907                 total_count = 0;
908                 goto out;
909         }
910         if (level >= 0 && root->node == count_path->nodes[level])
911                 goto out;
912         level++;
913         btrfs_release_path(root, path);
914         goto again;
915
916 out:
917         btrfs_free_path(path);
918         mutex_unlock(&root->fs_info->alloc_mutex);
919         return total_count;
920 }
921
922 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
923                   struct extent_buffer *buf)
924 {
925         u64 bytenr;
926         u32 nritems;
927         struct btrfs_key key;
928         struct btrfs_file_extent_item *fi;
929         int i;
930         int level;
931         int ret;
932         int faili;
933
934         if (!root->ref_cows)
935                 return 0;
936
937         mutex_lock(&root->fs_info->alloc_mutex);
938         level = btrfs_header_level(buf);
939         nritems = btrfs_header_nritems(buf);
940         for (i = 0; i < nritems; i++) {
941                 if (level == 0) {
942                         u64 disk_bytenr;
943                         btrfs_item_key_to_cpu(buf, &key, i);
944                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
945                                 continue;
946                         fi = btrfs_item_ptr(buf, i,
947                                             struct btrfs_file_extent_item);
948                         if (btrfs_file_extent_type(buf, fi) ==
949                             BTRFS_FILE_EXTENT_INLINE)
950                                 continue;
951                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
952                         if (disk_bytenr == 0)
953                                 continue;
954                         ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
955                                     btrfs_file_extent_disk_num_bytes(buf, fi),
956                                     root->root_key.objectid, trans->transid,
957                                     key.objectid, key.offset);
958                         if (ret) {
959                                 faili = i;
960                                 goto fail;
961                         }
962                 } else {
963                         bytenr = btrfs_node_blockptr(buf, i);
964                         btrfs_node_key_to_cpu(buf, &key, i);
965                         ret = __btrfs_inc_extent_ref(trans, root, bytenr,
966                                            btrfs_level_size(root, level - 1),
967                                            root->root_key.objectid,
968                                            trans->transid,
969                                            level - 1, key.objectid);
970                         if (ret) {
971                                 faili = i;
972                                 goto fail;
973                         }
974                 }
975         }
976         mutex_unlock(&root->fs_info->alloc_mutex);
977         return 0;
978 fail:
979         WARN_ON(1);
980 #if 0
981         for (i =0; i < faili; i++) {
982                 if (level == 0) {
983                         u64 disk_bytenr;
984                         btrfs_item_key_to_cpu(buf, &key, i);
985                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
986                                 continue;
987                         fi = btrfs_item_ptr(buf, i,
988                                             struct btrfs_file_extent_item);
989                         if (btrfs_file_extent_type(buf, fi) ==
990                             BTRFS_FILE_EXTENT_INLINE)
991                                 continue;
992                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
993                         if (disk_bytenr == 0)
994                                 continue;
995                         err = btrfs_free_extent(trans, root, disk_bytenr,
996                                     btrfs_file_extent_disk_num_bytes(buf,
997                                                                       fi), 0);
998                         BUG_ON(err);
999                 } else {
1000                         bytenr = btrfs_node_blockptr(buf, i);
1001                         err = btrfs_free_extent(trans, root, bytenr,
1002                                         btrfs_level_size(root, level - 1), 0);
1003                         BUG_ON(err);
1004                 }
1005         }
1006 #endif
1007         mutex_unlock(&root->fs_info->alloc_mutex);
1008         return ret;
1009 }
1010
1011 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1012                                  struct btrfs_root *root,
1013                                  struct btrfs_path *path,
1014                                  struct btrfs_block_group_cache *cache)
1015 {
1016         int ret;
1017         int pending_ret;
1018         struct btrfs_root *extent_root = root->fs_info->extent_root;
1019         unsigned long bi;
1020         struct extent_buffer *leaf;
1021
1022         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1023         if (ret < 0)
1024                 goto fail;
1025         BUG_ON(ret);
1026
1027         leaf = path->nodes[0];
1028         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1029         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1030         btrfs_mark_buffer_dirty(leaf);
1031         btrfs_release_path(extent_root, path);
1032 fail:
1033         finish_current_insert(trans, extent_root);
1034         pending_ret = del_pending_extents(trans, extent_root);
1035         if (ret)
1036                 return ret;
1037         if (pending_ret)
1038                 return pending_ret;
1039         return 0;
1040
1041 }
1042
1043 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1044                                    struct btrfs_root *root)
1045 {
1046         struct extent_io_tree *block_group_cache;
1047         struct btrfs_block_group_cache *cache;
1048         int ret;
1049         int err = 0;
1050         int werr = 0;
1051         struct btrfs_path *path;
1052         u64 last = 0;
1053         u64 start;
1054         u64 end;
1055         u64 ptr;
1056
1057         block_group_cache = &root->fs_info->block_group_cache;
1058         path = btrfs_alloc_path();
1059         if (!path)
1060                 return -ENOMEM;
1061
1062         mutex_lock(&root->fs_info->alloc_mutex);
1063         while(1) {
1064                 ret = find_first_extent_bit(block_group_cache, last,
1065                                             &start, &end, BLOCK_GROUP_DIRTY);
1066                 if (ret)
1067                         break;
1068
1069                 last = end + 1;
1070                 ret = get_state_private(block_group_cache, start, &ptr);
1071                 if (ret)
1072                         break;
1073                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1074                 err = write_one_cache_group(trans, root,
1075                                             path, cache);
1076                 /*
1077                  * if we fail to write the cache group, we want
1078                  * to keep it marked dirty in hopes that a later
1079                  * write will work
1080                  */
1081                 if (err) {
1082                         werr = err;
1083                         continue;
1084                 }
1085                 clear_extent_bits(block_group_cache, start, end,
1086                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1087         }
1088         btrfs_free_path(path);
1089         mutex_unlock(&root->fs_info->alloc_mutex);
1090         return werr;
1091 }
1092
1093 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1094                                                   u64 flags)
1095 {
1096         struct list_head *head = &info->space_info;
1097         struct list_head *cur;
1098         struct btrfs_space_info *found;
1099         list_for_each(cur, head) {
1100                 found = list_entry(cur, struct btrfs_space_info, list);
1101                 if (found->flags == flags)
1102                         return found;
1103         }
1104         return NULL;
1105
1106 }
1107
1108 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1109                              u64 total_bytes, u64 bytes_used,
1110                              struct btrfs_space_info **space_info)
1111 {
1112         struct btrfs_space_info *found;
1113
1114         found = __find_space_info(info, flags);
1115         if (found) {
1116                 found->total_bytes += total_bytes;
1117                 found->bytes_used += bytes_used;
1118                 found->full = 0;
1119                 WARN_ON(found->total_bytes < found->bytes_used);
1120                 *space_info = found;
1121                 return 0;
1122         }
1123         found = kmalloc(sizeof(*found), GFP_NOFS);
1124         if (!found)
1125                 return -ENOMEM;
1126
1127         list_add(&found->list, &info->space_info);
1128         found->flags = flags;
1129         found->total_bytes = total_bytes;
1130         found->bytes_used = bytes_used;
1131         found->bytes_pinned = 0;
1132         found->full = 0;
1133         found->force_alloc = 0;
1134         *space_info = found;
1135         return 0;
1136 }
1137
1138 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1139 {
1140         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1141                                    BTRFS_BLOCK_GROUP_RAID1 |
1142                                    BTRFS_BLOCK_GROUP_RAID10 |
1143                                    BTRFS_BLOCK_GROUP_DUP);
1144         if (extra_flags) {
1145                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1146                         fs_info->avail_data_alloc_bits |= extra_flags;
1147                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1148                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1149                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1150                         fs_info->avail_system_alloc_bits |= extra_flags;
1151         }
1152 }
1153
1154 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1155 {
1156         u64 num_devices = root->fs_info->fs_devices->num_devices;
1157
1158         if (num_devices == 1)
1159                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1160         if (num_devices < 4)
1161                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1162
1163         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1164             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1165                       BTRFS_BLOCK_GROUP_RAID10))) {
1166                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1167         }
1168
1169         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1170             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1171                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1172         }
1173
1174         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1175             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1176              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1177              (flags & BTRFS_BLOCK_GROUP_DUP)))
1178                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1179         return flags;
1180 }
1181
1182 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1183                           struct btrfs_root *extent_root, u64 alloc_bytes,
1184                           u64 flags, int force)
1185 {
1186         struct btrfs_space_info *space_info;
1187         u64 thresh;
1188         u64 start;
1189         u64 num_bytes;
1190         int ret;
1191
1192         flags = reduce_alloc_profile(extent_root, flags);
1193
1194         space_info = __find_space_info(extent_root->fs_info, flags);
1195         if (!space_info) {
1196                 ret = update_space_info(extent_root->fs_info, flags,
1197                                         0, 0, &space_info);
1198                 BUG_ON(ret);
1199         }
1200         BUG_ON(!space_info);
1201
1202         if (space_info->force_alloc) {
1203                 force = 1;
1204                 space_info->force_alloc = 0;
1205         }
1206         if (space_info->full)
1207                 goto out;
1208
1209         thresh = div_factor(space_info->total_bytes, 6);
1210         if (!force &&
1211            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1212             thresh)
1213                 goto out;
1214
1215         mutex_lock(&extent_root->fs_info->chunk_mutex);
1216         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1217         if (ret == -ENOSPC) {
1218 printk("space info full %Lu\n", flags);
1219                 space_info->full = 1;
1220                 goto out_unlock;
1221         }
1222         BUG_ON(ret);
1223
1224         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1225                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1226         BUG_ON(ret);
1227 out_unlock:
1228         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1229 out:
1230         return 0;
1231 }
1232
1233 static int update_block_group(struct btrfs_trans_handle *trans,
1234                               struct btrfs_root *root,
1235                               u64 bytenr, u64 num_bytes, int alloc,
1236                               int mark_free)
1237 {
1238         struct btrfs_block_group_cache *cache;
1239         struct btrfs_fs_info *info = root->fs_info;
1240         u64 total = num_bytes;
1241         u64 old_val;
1242         u64 byte_in_group;
1243         u64 start;
1244         u64 end;
1245
1246         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1247         while(total) {
1248                 cache = btrfs_lookup_block_group(info, bytenr);
1249                 if (!cache) {
1250                         return -1;
1251                 }
1252                 byte_in_group = bytenr - cache->key.objectid;
1253                 WARN_ON(byte_in_group > cache->key.offset);
1254                 start = cache->key.objectid;
1255                 end = start + cache->key.offset - 1;
1256                 set_extent_bits(&info->block_group_cache, start, end,
1257                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1258
1259                 old_val = btrfs_block_group_used(&cache->item);
1260                 num_bytes = min(total, cache->key.offset - byte_in_group);
1261                 if (alloc) {
1262                         old_val += num_bytes;
1263                         cache->space_info->bytes_used += num_bytes;
1264                 } else {
1265                         old_val -= num_bytes;
1266                         cache->space_info->bytes_used -= num_bytes;
1267                         if (mark_free) {
1268                                 set_extent_dirty(&info->free_space_cache,
1269                                                  bytenr, bytenr + num_bytes - 1,
1270                                                  GFP_NOFS);
1271                         }
1272                 }
1273                 btrfs_set_block_group_used(&cache->item, old_val);
1274                 total -= num_bytes;
1275                 bytenr += num_bytes;
1276         }
1277         return 0;
1278 }
1279
1280 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1281 {
1282         u64 start;
1283         u64 end;
1284         int ret;
1285         ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1286                                     search_start, &start, &end,
1287                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1288                                     BLOCK_GROUP_SYSTEM);
1289         if (ret)
1290                 return 0;
1291         return start;
1292 }
1293
1294
1295 static int update_pinned_extents(struct btrfs_root *root,
1296                                 u64 bytenr, u64 num, int pin)
1297 {
1298         u64 len;
1299         struct btrfs_block_group_cache *cache;
1300         struct btrfs_fs_info *fs_info = root->fs_info;
1301
1302         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1303         if (pin) {
1304                 set_extent_dirty(&fs_info->pinned_extents,
1305                                 bytenr, bytenr + num - 1, GFP_NOFS);
1306         } else {
1307                 clear_extent_dirty(&fs_info->pinned_extents,
1308                                 bytenr, bytenr + num - 1, GFP_NOFS);
1309         }
1310         while (num > 0) {
1311                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1312                 if (!cache) {
1313                         u64 first = first_logical_byte(root, bytenr);
1314                         WARN_ON(first < bytenr);
1315                         len = min(first - bytenr, num);
1316                 } else {
1317                         len = min(num, cache->key.offset -
1318                                   (bytenr - cache->key.objectid));
1319                 }
1320                 if (pin) {
1321                         if (cache) {
1322                                 cache->pinned += len;
1323                                 cache->space_info->bytes_pinned += len;
1324                         }
1325                         fs_info->total_pinned += len;
1326                 } else {
1327                         if (cache) {
1328                                 cache->pinned -= len;
1329                                 cache->space_info->bytes_pinned -= len;
1330                         }
1331                         fs_info->total_pinned -= len;
1332                 }
1333                 bytenr += len;
1334                 num -= len;
1335         }
1336         return 0;
1337 }
1338
1339 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1340 {
1341         u64 last = 0;
1342         u64 start;
1343         u64 end;
1344         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1345         int ret;
1346
1347         while(1) {
1348                 ret = find_first_extent_bit(pinned_extents, last,
1349                                             &start, &end, EXTENT_DIRTY);
1350                 if (ret)
1351                         break;
1352                 set_extent_dirty(copy, start, end, GFP_NOFS);
1353                 last = end + 1;
1354         }
1355         return 0;
1356 }
1357
1358 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1359                                struct btrfs_root *root,
1360                                struct extent_io_tree *unpin)
1361 {
1362         u64 start;
1363         u64 end;
1364         int ret;
1365         struct extent_io_tree *free_space_cache;
1366         free_space_cache = &root->fs_info->free_space_cache;
1367
1368         mutex_lock(&root->fs_info->alloc_mutex);
1369         while(1) {
1370                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1371                                             EXTENT_DIRTY);
1372                 if (ret)
1373                         break;
1374                 update_pinned_extents(root, start, end + 1 - start, 0);
1375                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1376                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1377         }
1378         mutex_unlock(&root->fs_info->alloc_mutex);
1379         return 0;
1380 }
1381
1382 static int finish_current_insert(struct btrfs_trans_handle *trans,
1383                                  struct btrfs_root *extent_root)
1384 {
1385         u64 start;
1386         u64 end;
1387         struct btrfs_fs_info *info = extent_root->fs_info;
1388         struct extent_buffer *eb;
1389         struct btrfs_path *path;
1390         struct btrfs_key ins;
1391         struct btrfs_disk_key first;
1392         struct btrfs_extent_item extent_item;
1393         int ret;
1394         int level;
1395         int err = 0;
1396
1397         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1398         btrfs_set_stack_extent_refs(&extent_item, 1);
1399         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1400         path = btrfs_alloc_path();
1401
1402         while(1) {
1403                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1404                                             &end, EXTENT_LOCKED);
1405                 if (ret)
1406                         break;
1407
1408                 ins.objectid = start;
1409                 ins.offset = end + 1 - start;
1410                 err = btrfs_insert_item(trans, extent_root, &ins,
1411                                         &extent_item, sizeof(extent_item));
1412                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1413                                   GFP_NOFS);
1414                 eb = read_tree_block(extent_root, ins.objectid, ins.offset,
1415                                      trans->transid);
1416                 btrfs_tree_lock(eb);
1417                 level = btrfs_header_level(eb);
1418                 if (level == 0) {
1419                         btrfs_item_key(eb, &first, 0);
1420                 } else {
1421                         btrfs_node_key(eb, &first, 0);
1422                 }
1423                 btrfs_tree_unlock(eb);
1424                 free_extent_buffer(eb);
1425                 /*
1426                  * the first key is just a hint, so the race we've created
1427                  * against reading it is fine
1428                  */
1429                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1430                                           start, extent_root->root_key.objectid,
1431                                           0, level,
1432                                           btrfs_disk_key_objectid(&first));
1433                 BUG_ON(err);
1434         }
1435         btrfs_free_path(path);
1436         return 0;
1437 }
1438
1439 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1440                           int pending)
1441 {
1442         int err = 0;
1443
1444         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1445         if (!pending) {
1446                 struct extent_buffer *buf;
1447                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1448                 if (buf) {
1449                         if (!btrfs_try_tree_lock(buf) &&
1450                             btrfs_buffer_uptodate(buf, 0)) {
1451                                 u64 transid =
1452                                     root->fs_info->running_transaction->transid;
1453                                 u64 header_transid =
1454                                         btrfs_header_generation(buf);
1455                                 if (header_transid == transid &&
1456                                     !btrfs_header_flag(buf,
1457                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1458                                         clean_tree_block(NULL, root, buf);
1459                                         btrfs_tree_unlock(buf);
1460                                         free_extent_buffer(buf);
1461                                         return 1;
1462                                 }
1463                                 btrfs_tree_unlock(buf);
1464                         }
1465                         free_extent_buffer(buf);
1466                 }
1467                 update_pinned_extents(root, bytenr, num_bytes, 1);
1468         } else {
1469                 set_extent_bits(&root->fs_info->pending_del,
1470                                 bytenr, bytenr + num_bytes - 1,
1471                                 EXTENT_LOCKED, GFP_NOFS);
1472         }
1473         BUG_ON(err < 0);
1474         return 0;
1475 }
1476
1477 /*
1478  * remove an extent from the root, returns 0 on success
1479  */
1480 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1481                          *root, u64 bytenr, u64 num_bytes,
1482                          u64 root_objectid, u64 ref_generation,
1483                          u64 owner_objectid, u64 owner_offset, int pin,
1484                          int mark_free)
1485 {
1486         struct btrfs_path *path;
1487         struct btrfs_key key;
1488         struct btrfs_fs_info *info = root->fs_info;
1489         struct btrfs_root *extent_root = info->extent_root;
1490         struct extent_buffer *leaf;
1491         int ret;
1492         int extent_slot = 0;
1493         int found_extent = 0;
1494         int num_to_del = 1;
1495         struct btrfs_extent_item *ei;
1496         u32 refs;
1497
1498         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1499         key.objectid = bytenr;
1500         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1501         key.offset = num_bytes;
1502         path = btrfs_alloc_path();
1503         if (!path)
1504                 return -ENOMEM;
1505
1506         path->reada = 1;
1507         ret = lookup_extent_backref(trans, extent_root, path,
1508                                     bytenr, root_objectid,
1509                                     ref_generation,
1510                                     owner_objectid, owner_offset, 1);
1511         if (ret == 0) {
1512                 struct btrfs_key found_key;
1513                 extent_slot = path->slots[0];
1514                 while(extent_slot > 0) {
1515                         extent_slot--;
1516                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1517                                               extent_slot);
1518                         if (found_key.objectid != bytenr)
1519                                 break;
1520                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1521                             found_key.offset == num_bytes) {
1522                                 found_extent = 1;
1523                                 break;
1524                         }
1525                         if (path->slots[0] - extent_slot > 5)
1526                                 break;
1527                 }
1528                 if (!found_extent)
1529                         ret = btrfs_del_item(trans, extent_root, path);
1530         } else {
1531                 btrfs_print_leaf(extent_root, path->nodes[0]);
1532                 WARN_ON(1);
1533                 printk("Unable to find ref byte nr %Lu root %Lu "
1534                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1535                        root_objectid, ref_generation, owner_objectid,
1536                        owner_offset);
1537         }
1538         if (!found_extent) {
1539                 btrfs_release_path(extent_root, path);
1540                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1541                 if (ret < 0)
1542                         return ret;
1543                 BUG_ON(ret);
1544                 extent_slot = path->slots[0];
1545         }
1546
1547         leaf = path->nodes[0];
1548         ei = btrfs_item_ptr(leaf, extent_slot,
1549                             struct btrfs_extent_item);
1550         refs = btrfs_extent_refs(leaf, ei);
1551         BUG_ON(refs == 0);
1552         refs -= 1;
1553         btrfs_set_extent_refs(leaf, ei, refs);
1554
1555         btrfs_mark_buffer_dirty(leaf);
1556
1557         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1558                 /* if the back ref and the extent are next to each other
1559                  * they get deleted below in one shot
1560                  */
1561                 path->slots[0] = extent_slot;
1562                 num_to_del = 2;
1563         } else if (found_extent) {
1564                 /* otherwise delete the extent back ref */
1565                 ret = btrfs_del_item(trans, extent_root, path);
1566                 BUG_ON(ret);
1567                 /* if refs are 0, we need to setup the path for deletion */
1568                 if (refs == 0) {
1569                         btrfs_release_path(extent_root, path);
1570                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1571                                                 -1, 1);
1572                         if (ret < 0)
1573                                 return ret;
1574                         BUG_ON(ret);
1575                 }
1576         }
1577
1578         if (refs == 0) {
1579                 u64 super_used;
1580                 u64 root_used;
1581
1582                 if (pin) {
1583                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1584                         if (ret > 0)
1585                                 mark_free = 1;
1586                         BUG_ON(ret < 0);
1587                 }
1588
1589                 /* block accounting for super block */
1590                 spin_lock_irq(&info->delalloc_lock);
1591                 super_used = btrfs_super_bytes_used(&info->super_copy);
1592                 btrfs_set_super_bytes_used(&info->super_copy,
1593                                            super_used - num_bytes);
1594                 spin_unlock_irq(&info->delalloc_lock);
1595
1596                 /* block accounting for root item */
1597                 root_used = btrfs_root_used(&root->root_item);
1598                 btrfs_set_root_used(&root->root_item,
1599                                            root_used - num_bytes);
1600                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1601                                       num_to_del);
1602                 if (ret) {
1603                         return ret;
1604                 }
1605                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1606                                          mark_free);
1607                 BUG_ON(ret);
1608         }
1609         btrfs_free_path(path);
1610         finish_current_insert(trans, extent_root);
1611         return ret;
1612 }
1613
1614 /*
1615  * find all the blocks marked as pending in the radix tree and remove
1616  * them from the extent map
1617  */
1618 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1619                                btrfs_root *extent_root)
1620 {
1621         int ret;
1622         int err = 0;
1623         u64 start;
1624         u64 end;
1625         struct extent_io_tree *pending_del;
1626         struct extent_io_tree *pinned_extents;
1627
1628         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1629         pending_del = &extent_root->fs_info->pending_del;
1630         pinned_extents = &extent_root->fs_info->pinned_extents;
1631
1632         while(1) {
1633                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1634                                             EXTENT_LOCKED);
1635                 if (ret)
1636                         break;
1637                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1638                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1639                                   GFP_NOFS);
1640                 ret = __free_extent(trans, extent_root,
1641                                      start, end + 1 - start,
1642                                      extent_root->root_key.objectid,
1643                                      0, 0, 0, 0, 0);
1644                 if (ret)
1645                         err = ret;
1646         }
1647         return err;
1648 }
1649
1650 /*
1651  * remove an extent from the root, returns 0 on success
1652  */
1653 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1654                                struct btrfs_root *root, u64 bytenr,
1655                                u64 num_bytes, u64 root_objectid,
1656                                u64 ref_generation, u64 owner_objectid,
1657                                u64 owner_offset, int pin)
1658 {
1659         struct btrfs_root *extent_root = root->fs_info->extent_root;
1660         int pending_ret;
1661         int ret;
1662
1663         WARN_ON(num_bytes < root->sectorsize);
1664         if (!root->ref_cows)
1665                 ref_generation = 0;
1666
1667         if (root == extent_root) {
1668                 pin_down_bytes(root, bytenr, num_bytes, 1);
1669                 return 0;
1670         }
1671         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1672                             ref_generation, owner_objectid, owner_offset,
1673                             pin, pin == 0);
1674         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1675         return ret ? ret : pending_ret;
1676 }
1677
1678 int btrfs_free_extent(struct btrfs_trans_handle *trans,
1679                       struct btrfs_root *root, u64 bytenr,
1680                       u64 num_bytes, u64 root_objectid,
1681                       u64 ref_generation, u64 owner_objectid,
1682                       u64 owner_offset, int pin)
1683 {
1684         int ret;
1685
1686         maybe_lock_mutex(root);
1687         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1688                                   root_objectid, ref_generation,
1689                                   owner_objectid, owner_offset, pin);
1690         maybe_unlock_mutex(root);
1691         return ret;
1692 }
1693
1694 static u64 stripe_align(struct btrfs_root *root, u64 val)
1695 {
1696         u64 mask = ((u64)root->stripesize - 1);
1697         u64 ret = (val + mask) & ~mask;
1698         return ret;
1699 }
1700
1701 /*
1702  * walks the btree of allocated extents and find a hole of a given size.
1703  * The key ins is changed to record the hole:
1704  * ins->objectid == block start
1705  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1706  * ins->offset == number of blocks
1707  * Any available blocks before search_start are skipped.
1708  */
1709 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1710                                      struct btrfs_root *orig_root,
1711                                      u64 num_bytes, u64 empty_size,
1712                                      u64 search_start, u64 search_end,
1713                                      u64 hint_byte, struct btrfs_key *ins,
1714                                      u64 exclude_start, u64 exclude_nr,
1715                                      int data)
1716 {
1717         int ret;
1718         u64 orig_search_start;
1719         struct btrfs_root * root = orig_root->fs_info->extent_root;
1720         struct btrfs_fs_info *info = root->fs_info;
1721         u64 total_needed = num_bytes;
1722         u64 *last_ptr = NULL;
1723         struct btrfs_block_group_cache *block_group;
1724         int full_scan = 0;
1725         int wrapped = 0;
1726         int chunk_alloc_done = 0;
1727         int empty_cluster = 2 * 1024 * 1024;
1728         int allowed_chunk_alloc = 0;
1729
1730         WARN_ON(num_bytes < root->sectorsize);
1731         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1732
1733         if (orig_root->ref_cows || empty_size)
1734                 allowed_chunk_alloc = 1;
1735
1736         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1737                 last_ptr = &root->fs_info->last_alloc;
1738                 empty_cluster = 256 * 1024;
1739         }
1740
1741         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1742                 last_ptr = &root->fs_info->last_data_alloc;
1743         }
1744
1745         if (last_ptr) {
1746                 if (*last_ptr)
1747                         hint_byte = *last_ptr;
1748                 else {
1749                         empty_size += empty_cluster;
1750                 }
1751         }
1752
1753         search_start = max(search_start, first_logical_byte(root, 0));
1754         orig_search_start = search_start;
1755
1756         if (search_end == (u64)-1)
1757                 search_end = btrfs_super_total_bytes(&info->super_copy);
1758
1759         if (hint_byte) {
1760                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1761                 if (!block_group)
1762                         hint_byte = search_start;
1763                 block_group = __btrfs_find_block_group(root, block_group,
1764                                                      hint_byte, data, 1);
1765                 if (last_ptr && *last_ptr == 0 && block_group)
1766                         hint_byte = block_group->key.objectid;
1767         } else {
1768                 block_group = __btrfs_find_block_group(root,
1769                                                      trans->block_group,
1770                                                      search_start, data, 1);
1771         }
1772         search_start = max(search_start, hint_byte);
1773
1774         total_needed += empty_size;
1775
1776 check_failed:
1777         if (!block_group) {
1778                 block_group = btrfs_lookup_first_block_group(info,
1779                                                              search_start);
1780                 if (!block_group)
1781                         block_group = btrfs_lookup_first_block_group(info,
1782                                                        orig_search_start);
1783         }
1784         if (full_scan && !chunk_alloc_done) {
1785                 if (allowed_chunk_alloc) {
1786                         do_chunk_alloc(trans, root,
1787                                      num_bytes + 2 * 1024 * 1024, data, 1);
1788                         allowed_chunk_alloc = 0;
1789                 } else if (block_group && block_group_bits(block_group, data)) {
1790                         block_group->space_info->force_alloc = 1;
1791                 }
1792                 chunk_alloc_done = 1;
1793         }
1794         ret = find_search_start(root, &block_group, &search_start,
1795                                 total_needed, data);
1796         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1797                 *last_ptr = 0;
1798                 block_group = btrfs_lookup_first_block_group(info,
1799                                                              orig_search_start);
1800                 search_start = orig_search_start;
1801                 ret = find_search_start(root, &block_group, &search_start,
1802                                         total_needed, data);
1803         }
1804         if (ret == -ENOSPC)
1805                 goto enospc;
1806         if (ret)
1807                 goto error;
1808
1809         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1810                 *last_ptr = 0;
1811                 if (!empty_size) {
1812                         empty_size += empty_cluster;
1813                         total_needed += empty_size;
1814                 }
1815                 block_group = btrfs_lookup_first_block_group(info,
1816                                                        orig_search_start);
1817                 search_start = orig_search_start;
1818                 ret = find_search_start(root, &block_group,
1819                                         &search_start, total_needed, data);
1820                 if (ret == -ENOSPC)
1821                         goto enospc;
1822                 if (ret)
1823                         goto error;
1824         }
1825
1826         search_start = stripe_align(root, search_start);
1827         ins->objectid = search_start;
1828         ins->offset = num_bytes;
1829
1830         if (ins->objectid + num_bytes >= search_end)
1831                 goto enospc;
1832
1833         if (ins->objectid + num_bytes >
1834             block_group->key.objectid + block_group->key.offset) {
1835                 search_start = block_group->key.objectid +
1836                         block_group->key.offset;
1837                 goto new_group;
1838         }
1839
1840         if (test_range_bit(&info->extent_ins, ins->objectid,
1841                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1842                 search_start = ins->objectid + num_bytes;
1843                 goto new_group;
1844         }
1845
1846         if (test_range_bit(&info->pinned_extents, ins->objectid,
1847                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1848                 search_start = ins->objectid + num_bytes;
1849                 goto new_group;
1850         }
1851
1852         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1853             ins->objectid < exclude_start + exclude_nr)) {
1854                 search_start = exclude_start + exclude_nr;
1855                 goto new_group;
1856         }
1857
1858         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1859                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1860                 if (block_group)
1861                         trans->block_group = block_group;
1862         }
1863         ins->offset = num_bytes;
1864         if (last_ptr) {
1865                 *last_ptr = ins->objectid + ins->offset;
1866                 if (*last_ptr ==
1867                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1868                         *last_ptr = 0;
1869                 }
1870         }
1871         return 0;
1872
1873 new_group:
1874         if (search_start + num_bytes >= search_end) {
1875 enospc:
1876                 search_start = orig_search_start;
1877                 if (full_scan) {
1878                         ret = -ENOSPC;
1879                         goto error;
1880                 }
1881                 if (wrapped) {
1882                         if (!full_scan)
1883                                 total_needed -= empty_size;
1884                         full_scan = 1;
1885                 } else
1886                         wrapped = 1;
1887         }
1888         block_group = btrfs_lookup_first_block_group(info, search_start);
1889         cond_resched();
1890         block_group = __btrfs_find_block_group(root, block_group,
1891                                              search_start, data, 0);
1892         goto check_failed;
1893
1894 error:
1895         return ret;
1896 }
1897
1898 /*
1899  * finds a free extent and does all the dirty work required for allocation
1900  * returns the key for the extent through ins, and a tree buffer for
1901  * the first block of the extent through buf.
1902  *
1903  * returns 0 if everything worked, non-zero otherwise.
1904  */
1905 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1906                        struct btrfs_root *root,
1907                        u64 num_bytes, u64 min_alloc_size,
1908                        u64 root_objectid, u64 ref_generation,
1909                        u64 owner, u64 owner_offset,
1910                        u64 empty_size, u64 hint_byte,
1911                        u64 search_end, struct btrfs_key *ins, u64 data)
1912 {
1913         int ret;
1914         int pending_ret;
1915         u64 super_used;
1916         u64 root_used;
1917         u64 search_start = 0;
1918         u64 alloc_profile;
1919         u32 sizes[2];
1920         struct btrfs_fs_info *info = root->fs_info;
1921         struct btrfs_root *extent_root = info->extent_root;
1922         struct btrfs_extent_item *extent_item;
1923         struct btrfs_extent_ref *ref;
1924         struct btrfs_path *path;
1925         struct btrfs_key keys[2];
1926
1927         maybe_lock_mutex(root);
1928
1929         if (data) {
1930                 alloc_profile = info->avail_data_alloc_bits &
1931                                 info->data_alloc_profile;
1932                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1933         } else if (root == root->fs_info->chunk_root) {
1934                 alloc_profile = info->avail_system_alloc_bits &
1935                                 info->system_alloc_profile;
1936                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1937         } else {
1938                 alloc_profile = info->avail_metadata_alloc_bits &
1939                                 info->metadata_alloc_profile;
1940                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1941         }
1942 again:
1943         data = reduce_alloc_profile(root, data);
1944         /*
1945          * the only place that sets empty_size is btrfs_realloc_node, which
1946          * is not called recursively on allocations
1947          */
1948         if (empty_size || root->ref_cows) {
1949                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1950                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1951                                      2 * 1024 * 1024,
1952                                      BTRFS_BLOCK_GROUP_METADATA |
1953                                      (info->metadata_alloc_profile &
1954                                       info->avail_metadata_alloc_bits), 0);
1955                         BUG_ON(ret);
1956                 }
1957                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1958                                      num_bytes + 2 * 1024 * 1024, data, 0);
1959                 BUG_ON(ret);
1960         }
1961
1962         WARN_ON(num_bytes < root->sectorsize);
1963         ret = find_free_extent(trans, root, num_bytes, empty_size,
1964                                search_start, search_end, hint_byte, ins,
1965                                trans->alloc_exclude_start,
1966                                trans->alloc_exclude_nr, data);
1967
1968         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1969                 num_bytes = num_bytes >> 1;
1970                 num_bytes = max(num_bytes, min_alloc_size);
1971                 do_chunk_alloc(trans, root->fs_info->extent_root,
1972                                num_bytes, data, 1);
1973                 goto again;
1974         }
1975         if (ret) {
1976                 printk("allocation failed flags %Lu\n", data);
1977         }
1978         if (ret) {
1979                 BUG();
1980                 goto out;
1981         }
1982
1983         /* block accounting for super block */
1984         spin_lock_irq(&info->delalloc_lock);
1985         super_used = btrfs_super_bytes_used(&info->super_copy);
1986         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1987         spin_unlock_irq(&info->delalloc_lock);
1988
1989         /* block accounting for root item */
1990         root_used = btrfs_root_used(&root->root_item);
1991         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1992
1993         clear_extent_dirty(&root->fs_info->free_space_cache,
1994                            ins->objectid, ins->objectid + ins->offset - 1,
1995                            GFP_NOFS);
1996
1997         if (root == extent_root) {
1998                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1999                                 ins->objectid + ins->offset - 1,
2000                                 EXTENT_LOCKED, GFP_NOFS);
2001                 goto update_block;
2002         }
2003
2004         WARN_ON(trans->alloc_exclude_nr);
2005         trans->alloc_exclude_start = ins->objectid;
2006         trans->alloc_exclude_nr = ins->offset;
2007
2008         memcpy(&keys[0], ins, sizeof(*ins));
2009         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2010                                          owner, owner_offset);
2011         keys[1].objectid = ins->objectid;
2012         keys[1].type = BTRFS_EXTENT_REF_KEY;
2013         sizes[0] = sizeof(*extent_item);
2014         sizes[1] = sizeof(*ref);
2015
2016         path = btrfs_alloc_path();
2017         BUG_ON(!path);
2018
2019         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2020                                        sizes, 2);
2021
2022         BUG_ON(ret);
2023         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2024                                      struct btrfs_extent_item);
2025         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2026         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2027                              struct btrfs_extent_ref);
2028
2029         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2030         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2031         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2032         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2033
2034         btrfs_mark_buffer_dirty(path->nodes[0]);
2035
2036         trans->alloc_exclude_start = 0;
2037         trans->alloc_exclude_nr = 0;
2038         btrfs_free_path(path);
2039         finish_current_insert(trans, extent_root);
2040         pending_ret = del_pending_extents(trans, extent_root);
2041
2042         if (ret)
2043                 goto out;
2044         if (pending_ret) {
2045                 ret = pending_ret;
2046                 goto out;
2047         }
2048
2049 update_block:
2050         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2051         if (ret) {
2052                 printk("update block group failed for %Lu %Lu\n",
2053                        ins->objectid, ins->offset);
2054                 BUG();
2055         }
2056 out:
2057         maybe_unlock_mutex(root);
2058         return ret;
2059 }
2060 /*
2061  * helper function to allocate a block for a given tree
2062  * returns the tree buffer or NULL.
2063  */
2064 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2065                                              struct btrfs_root *root,
2066                                              u32 blocksize,
2067                                              u64 root_objectid,
2068                                              u64 ref_generation,
2069                                              u64 first_objectid,
2070                                              int level,
2071                                              u64 hint,
2072                                              u64 empty_size)
2073 {
2074         struct btrfs_key ins;
2075         int ret;
2076         struct extent_buffer *buf;
2077
2078         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
2079                                  root_objectid, ref_generation,
2080                                  level, first_objectid, empty_size, hint,
2081                                  (u64)-1, &ins, 0);
2082         if (ret) {
2083                 BUG_ON(ret > 0);
2084                 return ERR_PTR(ret);
2085         }
2086         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2087         if (!buf) {
2088                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2089                                   root->root_key.objectid, ref_generation,
2090                                   0, 0, 0);
2091                 return ERR_PTR(-ENOMEM);
2092         }
2093         btrfs_set_header_generation(buf, trans->transid);
2094         btrfs_tree_lock(buf);
2095         clean_tree_block(trans, root, buf);
2096         btrfs_set_buffer_uptodate(buf);
2097
2098         if (PageDirty(buf->first_page)) {
2099                 printk("page %lu dirty\n", buf->first_page->index);
2100                 WARN_ON(1);
2101         }
2102
2103         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2104                          buf->start + buf->len - 1, GFP_NOFS);
2105         trans->blocks_used++;
2106         return buf;
2107 }
2108
2109 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2110                                   struct btrfs_root *root,
2111                                   struct extent_buffer *leaf)
2112 {
2113         u64 leaf_owner;
2114         u64 leaf_generation;
2115         struct btrfs_key key;
2116         struct btrfs_file_extent_item *fi;
2117         int i;
2118         int nritems;
2119         int ret;
2120
2121         BUG_ON(!btrfs_is_leaf(leaf));
2122         nritems = btrfs_header_nritems(leaf);
2123         leaf_owner = btrfs_header_owner(leaf);
2124         leaf_generation = btrfs_header_generation(leaf);
2125
2126         for (i = 0; i < nritems; i++) {
2127                 u64 disk_bytenr;
2128
2129                 btrfs_item_key_to_cpu(leaf, &key, i);
2130                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2131                         continue;
2132                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2133                 if (btrfs_file_extent_type(leaf, fi) ==
2134                     BTRFS_FILE_EXTENT_INLINE)
2135                         continue;
2136                 /*
2137                  * FIXME make sure to insert a trans record that
2138                  * repeats the snapshot del on crash
2139                  */
2140                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2141                 if (disk_bytenr == 0)
2142                         continue;
2143                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2144                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2145                                 leaf_owner, leaf_generation,
2146                                 key.objectid, key.offset, 0);
2147                 BUG_ON(ret);
2148         }
2149         return 0;
2150 }
2151
2152 static void noinline reada_walk_down(struct btrfs_root *root,
2153                                      struct extent_buffer *node,
2154                                      int slot)
2155 {
2156         u64 bytenr;
2157         u64 last = 0;
2158         u32 nritems;
2159         u32 refs;
2160         u32 blocksize;
2161         int ret;
2162         int i;
2163         int level;
2164         int skipped = 0;
2165
2166         nritems = btrfs_header_nritems(node);
2167         level = btrfs_header_level(node);
2168         if (level)
2169                 return;
2170
2171         for (i = slot; i < nritems && skipped < 32; i++) {
2172                 bytenr = btrfs_node_blockptr(node, i);
2173                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2174                              (last > bytenr && last - bytenr > 32 * 1024))) {
2175                         skipped++;
2176                         continue;
2177                 }
2178                 blocksize = btrfs_level_size(root, level - 1);
2179                 if (i != slot) {
2180                         ret = lookup_extent_ref(NULL, root, bytenr,
2181                                                 blocksize, &refs);
2182                         BUG_ON(ret);
2183                         if (refs != 1) {
2184                                 skipped++;
2185                                 continue;
2186                         }
2187                 }
2188                 ret = readahead_tree_block(root, bytenr, blocksize,
2189                                            btrfs_node_ptr_generation(node, i));
2190                 last = bytenr + blocksize;
2191                 cond_resched();
2192                 if (ret)
2193                         break;
2194         }
2195 }
2196
2197 /*
2198  * we want to avoid as much random IO as we can with the alloc mutex
2199  * held, so drop the lock and do the lookup, then do it again with the
2200  * lock held.
2201  */
2202 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2203                               u32 *refs)
2204 {
2205         mutex_unlock(&root->fs_info->alloc_mutex);
2206         lookup_extent_ref(NULL, root, start, len, refs);
2207         cond_resched();
2208         mutex_lock(&root->fs_info->alloc_mutex);
2209         return lookup_extent_ref(NULL, root, start, len, refs);
2210 }
2211
2212 /*
2213  * helper function for drop_snapshot, this walks down the tree dropping ref
2214  * counts as it goes.
2215  */
2216 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2217                                    struct btrfs_root *root,
2218                                    struct btrfs_path *path, int *level)
2219 {
2220         u64 root_owner;
2221         u64 root_gen;
2222         u64 bytenr;
2223         u64 ptr_gen;
2224         struct extent_buffer *next;
2225         struct extent_buffer *cur;
2226         struct extent_buffer *parent;
2227         u32 blocksize;
2228         int ret;
2229         u32 refs;
2230
2231         mutex_lock(&root->fs_info->alloc_mutex);
2232
2233         WARN_ON(*level < 0);
2234         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2235         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2236                                 path->nodes[*level]->len, &refs);
2237         BUG_ON(ret);
2238         if (refs > 1)
2239                 goto out;
2240
2241         /*
2242          * walk down to the last node level and free all the leaves
2243          */
2244         while(*level >= 0) {
2245                 WARN_ON(*level < 0);
2246                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2247                 cur = path->nodes[*level];
2248
2249                 if (btrfs_header_level(cur) != *level)
2250                         WARN_ON(1);
2251
2252                 if (path->slots[*level] >=
2253                     btrfs_header_nritems(cur))
2254                         break;
2255                 if (*level == 0) {
2256                         ret = drop_leaf_ref(trans, root, cur);
2257                         BUG_ON(ret);
2258                         break;
2259                 }
2260                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2261                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2262                 blocksize = btrfs_level_size(root, *level - 1);
2263
2264                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2265                 BUG_ON(ret);
2266                 if (refs != 1) {
2267                         parent = path->nodes[*level];
2268                         root_owner = btrfs_header_owner(parent);
2269                         root_gen = btrfs_header_generation(parent);
2270                         path->slots[*level]++;
2271                         ret = __btrfs_free_extent(trans, root, bytenr,
2272                                                 blocksize, root_owner,
2273                                                 root_gen, 0, 0, 1);
2274                         BUG_ON(ret);
2275                         continue;
2276                 }
2277                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2278                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2279                         free_extent_buffer(next);
2280                         mutex_unlock(&root->fs_info->alloc_mutex);
2281
2282                         if (path->slots[*level] == 0)
2283                                 reada_walk_down(root, cur, path->slots[*level]);
2284
2285                         next = read_tree_block(root, bytenr, blocksize,
2286                                                ptr_gen);
2287                         cond_resched();
2288                         mutex_lock(&root->fs_info->alloc_mutex);
2289
2290                         /* we've dropped the lock, double check */
2291                         ret = drop_snap_lookup_refcount(root, bytenr,
2292                                                 blocksize, &refs);
2293                         BUG_ON(ret);
2294                         if (refs != 1) {
2295                                 parent = path->nodes[*level];
2296                                 root_owner = btrfs_header_owner(parent);
2297                                 root_gen = btrfs_header_generation(parent);
2298
2299                                 path->slots[*level]++;
2300                                 free_extent_buffer(next);
2301                                 ret = __btrfs_free_extent(trans, root, bytenr,
2302                                                         blocksize,
2303                                                         root_owner,
2304                                                         root_gen, 0, 0, 1);
2305                                 BUG_ON(ret);
2306                                 continue;
2307                         }
2308                 }
2309                 WARN_ON(*level <= 0);
2310                 if (path->nodes[*level-1])
2311                         free_extent_buffer(path->nodes[*level-1]);
2312                 path->nodes[*level-1] = next;
2313                 *level = btrfs_header_level(next);
2314                 path->slots[*level] = 0;
2315         }
2316 out:
2317         WARN_ON(*level < 0);
2318         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2319
2320         if (path->nodes[*level] == root->node) {
2321                 root_owner = root->root_key.objectid;
2322                 parent = path->nodes[*level];
2323         } else {
2324                 parent = path->nodes[*level + 1];
2325                 root_owner = btrfs_header_owner(parent);
2326         }
2327
2328         root_gen = btrfs_header_generation(parent);
2329         ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start,
2330                                 path->nodes[*level]->len,
2331                                 root_owner, root_gen, 0, 0, 1);
2332         free_extent_buffer(path->nodes[*level]);
2333         path->nodes[*level] = NULL;
2334         *level += 1;
2335         BUG_ON(ret);
2336         mutex_unlock(&root->fs_info->alloc_mutex);
2337         cond_resched();
2338         return 0;
2339 }
2340
2341 /*
2342  * helper for dropping snapshots.  This walks back up the tree in the path
2343  * to find the first node higher up where we haven't yet gone through
2344  * all the slots
2345  */
2346 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2347                                  struct btrfs_root *root,
2348                                  struct btrfs_path *path, int *level)
2349 {
2350         u64 root_owner;
2351         u64 root_gen;
2352         struct btrfs_root_item *root_item = &root->root_item;
2353         int i;
2354         int slot;
2355         int ret;
2356
2357         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2358                 slot = path->slots[i];
2359                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2360                         struct extent_buffer *node;
2361                         struct btrfs_disk_key disk_key;
2362                         node = path->nodes[i];
2363                         path->slots[i]++;
2364                         *level = i;
2365                         WARN_ON(*level == 0);
2366                         btrfs_node_key(node, &disk_key, path->slots[i]);
2367                         memcpy(&root_item->drop_progress,
2368                                &disk_key, sizeof(disk_key));
2369                         root_item->drop_level = i;
2370                         return 0;
2371                 } else {
2372                         if (path->nodes[*level] == root->node) {
2373                                 root_owner = root->root_key.objectid;
2374                                 root_gen =
2375                                    btrfs_header_generation(path->nodes[*level]);
2376                         } else {
2377                                 struct extent_buffer *node;
2378                                 node = path->nodes[*level + 1];
2379                                 root_owner = btrfs_header_owner(node);
2380                                 root_gen = btrfs_header_generation(node);
2381                         }
2382                         ret = btrfs_free_extent(trans, root,
2383                                                 path->nodes[*level]->start,
2384                                                 path->nodes[*level]->len,
2385                                                 root_owner, root_gen, 0, 0, 1);
2386                         BUG_ON(ret);
2387                         free_extent_buffer(path->nodes[*level]);
2388                         path->nodes[*level] = NULL;
2389                         *level = i + 1;
2390                 }
2391         }
2392         return 1;
2393 }
2394
2395 /*
2396  * drop the reference count on the tree rooted at 'snap'.  This traverses
2397  * the tree freeing any blocks that have a ref count of zero after being
2398  * decremented.
2399  */
2400 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2401                         *root)
2402 {
2403         int ret = 0;
2404         int wret;
2405         int level;
2406         struct btrfs_path *path;
2407         int i;
2408         int orig_level;
2409         struct btrfs_root_item *root_item = &root->root_item;
2410
2411         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
2412         path = btrfs_alloc_path();
2413         BUG_ON(!path);
2414
2415         level = btrfs_header_level(root->node);
2416         orig_level = level;
2417         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2418                 path->nodes[level] = root->node;
2419                 extent_buffer_get(root->node);
2420                 path->slots[level] = 0;
2421         } else {
2422                 struct btrfs_key key;
2423                 struct btrfs_disk_key found_key;
2424                 struct extent_buffer *node;
2425
2426                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2427                 level = root_item->drop_level;
2428                 path->lowest_level = level;
2429                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2430                 if (wret < 0) {
2431                         ret = wret;
2432                         goto out;
2433                 }
2434                 node = path->nodes[level];
2435                 btrfs_node_key(node, &found_key, path->slots[level]);
2436                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2437                                sizeof(found_key)));
2438                 /*
2439                  * unlock our path, this is safe because only this
2440                  * function is allowed to delete this snapshot
2441                  */
2442                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2443                         if (path->nodes[i] && path->locks[i]) {
2444                                 path->locks[i] = 0;
2445                                 btrfs_tree_unlock(path->nodes[i]);
2446                         }
2447                 }
2448         }
2449         while(1) {
2450                 wret = walk_down_tree(trans, root, path, &level);
2451                 if (wret > 0)
2452                         break;
2453                 if (wret < 0)
2454                         ret = wret;
2455
2456                 wret = walk_up_tree(trans, root, path, &level);
2457                 if (wret > 0)
2458                         break;
2459                 if (wret < 0)
2460                         ret = wret;
2461                 if (trans->transaction->in_commit) {
2462                         ret = -EAGAIN;
2463                         break;
2464                 }
2465         }
2466         for (i = 0; i <= orig_level; i++) {
2467                 if (path->nodes[i]) {
2468                         free_extent_buffer(path->nodes[i]);
2469                         path->nodes[i] = NULL;
2470                 }
2471         }
2472 out:
2473         btrfs_free_path(path);
2474         return ret;
2475 }
2476
2477 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2478 {
2479         u64 start;
2480         u64 end;
2481         u64 ptr;
2482         int ret;
2483
2484         mutex_lock(&info->alloc_mutex);
2485         while(1) {
2486                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2487                                             &start, &end, (unsigned int)-1);
2488                 if (ret)
2489                         break;
2490                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2491                 if (!ret)
2492                         kfree((void *)(unsigned long)ptr);
2493                 clear_extent_bits(&info->block_group_cache, start,
2494                                   end, (unsigned int)-1, GFP_NOFS);
2495         }
2496         while(1) {
2497                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2498                                             &start, &end, EXTENT_DIRTY);
2499                 if (ret)
2500                         break;
2501                 clear_extent_dirty(&info->free_space_cache, start,
2502                                    end, GFP_NOFS);
2503         }
2504         mutex_unlock(&info->alloc_mutex);
2505         return 0;
2506 }
2507
2508 static unsigned long calc_ra(unsigned long start, unsigned long last,
2509                              unsigned long nr)
2510 {
2511         return min(last, start + nr - 1);
2512 }
2513
2514 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2515                                          u64 len)
2516 {
2517         u64 page_start;
2518         u64 page_end;
2519         unsigned long last_index;
2520         unsigned long i;
2521         struct page *page;
2522         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2523         struct file_ra_state *ra;
2524         unsigned long total_read = 0;
2525         unsigned long ra_pages;
2526         struct btrfs_trans_handle *trans;
2527
2528         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2529
2530         mutex_lock(&inode->i_mutex);
2531         i = start >> PAGE_CACHE_SHIFT;
2532         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2533
2534         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2535
2536         file_ra_state_init(ra, inode->i_mapping);
2537
2538         for (; i <= last_index; i++) {
2539                 if (total_read % ra_pages == 0) {
2540                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2541                                        calc_ra(i, last_index, ra_pages));
2542                 }
2543                 total_read++;
2544                 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
2545                         goto truncate_racing;
2546
2547                 page = grab_cache_page(inode->i_mapping, i);
2548                 if (!page) {
2549                         goto out_unlock;
2550                 }
2551                 if (!PageUptodate(page)) {
2552                         btrfs_readpage(NULL, page);
2553                         lock_page(page);
2554                         if (!PageUptodate(page)) {
2555                                 unlock_page(page);
2556                                 page_cache_release(page);
2557                                 goto out_unlock;
2558                         }
2559                 }
2560 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2561                 ClearPageDirty(page);
2562 #else
2563                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2564 #endif
2565                 wait_on_page_writeback(page);
2566                 set_page_extent_mapped(page);
2567                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2568                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2569
2570                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2571
2572                 set_extent_delalloc(io_tree, page_start,
2573                                     page_end, GFP_NOFS);
2574                 set_page_dirty(page);
2575
2576                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2577                 unlock_page(page);
2578                 page_cache_release(page);
2579         }
2580         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2581                                            total_read);
2582
2583 out_unlock:
2584         kfree(ra);
2585         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2586         if (trans) {
2587                 btrfs_add_ordered_inode(inode);
2588                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2589                 mark_inode_dirty(inode);
2590         }
2591         mutex_unlock(&inode->i_mutex);
2592         return 0;
2593
2594 truncate_racing:
2595         vmtruncate(inode, inode->i_size);
2596         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2597                                            total_read);
2598         goto out_unlock;
2599 }
2600
2601 /*
2602  * The back references tell us which tree holds a ref on a block,
2603  * but it is possible for the tree root field in the reference to
2604  * reflect the original root before a snapshot was made.  In this
2605  * case we should search through all the children of a given root
2606  * to find potential holders of references on a block.
2607  *
2608  * Instead, we do something a little less fancy and just search
2609  * all the roots for a given key/block combination.
2610  */
2611 static int find_root_for_ref(struct btrfs_root *root,
2612                              struct btrfs_path *path,
2613                              struct btrfs_key *key0,
2614                              int level,
2615                              int file_key,
2616                              struct btrfs_root **found_root,
2617                              u64 bytenr)
2618 {
2619         struct btrfs_key root_location;
2620         struct btrfs_root *cur_root = *found_root;
2621         struct btrfs_file_extent_item *file_extent;
2622         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2623         u64 found_bytenr;
2624         int ret;
2625
2626         root_location.offset = (u64)-1;
2627         root_location.type = BTRFS_ROOT_ITEM_KEY;
2628         path->lowest_level = level;
2629         path->reada = 0;
2630         while(1) {
2631                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2632                 found_bytenr = 0;
2633                 if (ret == 0 && file_key) {
2634                         struct extent_buffer *leaf = path->nodes[0];
2635                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
2636                                              struct btrfs_file_extent_item);
2637                         if (btrfs_file_extent_type(leaf, file_extent) ==
2638                             BTRFS_FILE_EXTENT_REG) {
2639                                 found_bytenr =
2640                                         btrfs_file_extent_disk_bytenr(leaf,
2641                                                                file_extent);
2642                        }
2643                 } else if (!file_key) {
2644                         if (path->nodes[level])
2645                                 found_bytenr = path->nodes[level]->start;
2646                 }
2647
2648                 btrfs_release_path(cur_root, path);
2649
2650                 if (found_bytenr == bytenr) {
2651                         *found_root = cur_root;
2652                         ret = 0;
2653                         goto out;
2654                 }
2655                 ret = btrfs_search_root(root->fs_info->tree_root,
2656                                         root_search_start, &root_search_start);
2657                 if (ret)
2658                         break;
2659
2660                 root_location.objectid = root_search_start;
2661                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2662                                                       &root_location);
2663                 if (!cur_root) {
2664                         ret = 1;
2665                         break;
2666                 }
2667         }
2668 out:
2669         path->lowest_level = 0;
2670         return ret;
2671 }
2672
2673 /*
2674  * note, this releases the path
2675  */
2676 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2677                                   struct btrfs_path *path,
2678                                   struct btrfs_key *extent_key,
2679                                   u64 *last_file_objectid,
2680                                   u64 *last_file_offset,
2681                                   u64 *last_file_root,
2682                                   u64 last_extent)
2683 {
2684         struct inode *inode;
2685         struct btrfs_root *found_root;
2686         struct btrfs_key root_location;
2687         struct btrfs_key found_key;
2688         struct btrfs_extent_ref *ref;
2689         u64 ref_root;
2690         u64 ref_gen;
2691         u64 ref_objectid;
2692         u64 ref_offset;
2693         int ret;
2694         int level;
2695
2696         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2697
2698         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2699                              struct btrfs_extent_ref);
2700         ref_root = btrfs_ref_root(path->nodes[0], ref);
2701         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2702         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2703         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2704         btrfs_release_path(extent_root, path);
2705
2706         root_location.objectid = ref_root;
2707         if (ref_gen == 0)
2708                 root_location.offset = 0;
2709         else
2710                 root_location.offset = (u64)-1;
2711         root_location.type = BTRFS_ROOT_ITEM_KEY;
2712
2713         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2714                                                 &root_location);
2715         BUG_ON(!found_root);
2716         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2717
2718         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2719                 found_key.objectid = ref_objectid;
2720                 found_key.type = BTRFS_EXTENT_DATA_KEY;
2721                 found_key.offset = ref_offset;
2722                 level = 0;
2723
2724                 if (last_extent == extent_key->objectid &&
2725                     *last_file_objectid == ref_objectid &&
2726                     *last_file_offset == ref_offset &&
2727                     *last_file_root == ref_root)
2728                         goto out;
2729
2730                 ret = find_root_for_ref(extent_root, path, &found_key,
2731                                         level, 1, &found_root,
2732                                         extent_key->objectid);
2733
2734                 if (ret)
2735                         goto out;
2736
2737                 if (last_extent == extent_key->objectid &&
2738                     *last_file_objectid == ref_objectid &&
2739                     *last_file_offset == ref_offset &&
2740                     *last_file_root == ref_root)
2741                         goto out;
2742
2743                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2744                                           ref_objectid, found_root);
2745                 if (inode->i_state & I_NEW) {
2746                         /* the inode and parent dir are two different roots */
2747                         BTRFS_I(inode)->root = found_root;
2748                         BTRFS_I(inode)->location.objectid = ref_objectid;
2749                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2750                         BTRFS_I(inode)->location.offset = 0;
2751                         btrfs_read_locked_inode(inode);
2752                         unlock_new_inode(inode);
2753
2754                 }
2755                 /* this can happen if the reference is not against
2756                  * the latest version of the tree root
2757                  */
2758                 if (is_bad_inode(inode))
2759                         goto out;
2760
2761                 *last_file_objectid = inode->i_ino;
2762                 *last_file_root = found_root->root_key.objectid;
2763                 *last_file_offset = ref_offset;
2764
2765                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2766                 iput(inode);
2767         } else {
2768                 struct btrfs_trans_handle *trans;
2769                 struct extent_buffer *eb;
2770                 int needs_lock = 0;
2771
2772                 eb = read_tree_block(found_root, extent_key->objectid,
2773                                      extent_key->offset, 0);
2774                 btrfs_tree_lock(eb);
2775                 level = btrfs_header_level(eb);
2776
2777                 if (level == 0)
2778                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2779                 else
2780                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2781
2782                 btrfs_tree_unlock(eb);
2783                 free_extent_buffer(eb);
2784
2785                 ret = find_root_for_ref(extent_root, path, &found_key,
2786                                         level, 0, &found_root,
2787                                         extent_key->objectid);
2788
2789                 if (ret)
2790                         goto out;
2791
2792                 /*
2793                  * right here almost anything could happen to our key,
2794                  * but that's ok.  The cow below will either relocate it
2795                  * or someone else will have relocated it.  Either way,
2796                  * it is in a different spot than it was before and
2797                  * we're happy.
2798                  */
2799
2800                 trans = btrfs_start_transaction(found_root, 1);
2801
2802                 if (found_root == extent_root->fs_info->extent_root ||
2803                     found_root == extent_root->fs_info->chunk_root ||
2804                     found_root == extent_root->fs_info->dev_root) {
2805                         needs_lock = 1;
2806                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2807                 }
2808
2809                 path->lowest_level = level;
2810                 path->reada = 2;
2811                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2812                                         0, 1);
2813                 path->lowest_level = 0;
2814                 btrfs_release_path(found_root, path);
2815
2816                 if (found_root == found_root->fs_info->extent_root)
2817                         btrfs_extent_post_op(trans, found_root);
2818                 if (needs_lock)
2819                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2820
2821                 btrfs_end_transaction(trans, found_root);
2822
2823         }
2824 out:
2825         mutex_lock(&extent_root->fs_info->alloc_mutex);
2826         return 0;
2827 }
2828
2829 static int noinline del_extent_zero(struct btrfs_root *extent_root,
2830                                     struct btrfs_path *path,
2831                                     struct btrfs_key *extent_key)
2832 {
2833         int ret;
2834         struct btrfs_trans_handle *trans;
2835
2836         trans = btrfs_start_transaction(extent_root, 1);
2837         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
2838         if (ret > 0) {
2839                 ret = -EIO;
2840                 goto out;
2841         }
2842         if (ret < 0)
2843                 goto out;
2844         ret = btrfs_del_item(trans, extent_root, path);
2845 out:
2846         btrfs_end_transaction(trans, extent_root);
2847         return ret;
2848 }
2849
2850 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2851                                         struct btrfs_path *path,
2852                                         struct btrfs_key *extent_key)
2853 {
2854         struct btrfs_key key;
2855         struct btrfs_key found_key;
2856         struct extent_buffer *leaf;
2857         u64 last_file_objectid = 0;
2858         u64 last_file_root = 0;
2859         u64 last_file_offset = (u64)-1;
2860         u64 last_extent = 0;
2861         u32 nritems;
2862         u32 item_size;
2863         int ret = 0;
2864
2865         if (extent_key->objectid == 0) {
2866                 ret = del_extent_zero(extent_root, path, extent_key);
2867                 goto out;
2868         }
2869         key.objectid = extent_key->objectid;
2870         key.type = BTRFS_EXTENT_REF_KEY;
2871         key.offset = 0;
2872
2873         while(1) {
2874                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2875
2876                 if (ret < 0)
2877                         goto out;
2878
2879                 ret = 0;
2880                 leaf = path->nodes[0];
2881                 nritems = btrfs_header_nritems(leaf);
2882                 if (path->slots[0] == nritems) {
2883                         ret = btrfs_next_leaf(extent_root, path);
2884                         if (ret > 0) {
2885                                 ret = 0;
2886                                 goto out;
2887                         }
2888                         if (ret < 0)
2889                                 goto out;
2890                         leaf = path->nodes[0];
2891                 }
2892
2893                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2894                 if (found_key.objectid != extent_key->objectid) {
2895                         break;
2896                 }
2897
2898                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
2899                         break;
2900                 }
2901
2902                 key.offset = found_key.offset + 1;
2903                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2904
2905                 ret = relocate_one_reference(extent_root, path, extent_key,
2906                                              &last_file_objectid,
2907                                              &last_file_offset,
2908                                              &last_file_root, last_extent);
2909                 if (ret)
2910                         goto out;
2911                 last_extent = extent_key->objectid;
2912         }
2913         ret = 0;
2914 out:
2915         btrfs_release_path(extent_root, path);
2916         return ret;
2917 }
2918
2919 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
2920 {
2921         u64 num_devices;
2922         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
2923                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
2924
2925         num_devices = root->fs_info->fs_devices->num_devices;
2926         if (num_devices == 1) {
2927                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2928                 stripped = flags & ~stripped;
2929
2930                 /* turn raid0 into single device chunks */
2931                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
2932                         return stripped;
2933
2934                 /* turn mirroring into duplication */
2935                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
2936                              BTRFS_BLOCK_GROUP_RAID10))
2937                         return stripped | BTRFS_BLOCK_GROUP_DUP;
2938                 return flags;
2939         } else {
2940                 /* they already had raid on here, just return */
2941                 if (flags & stripped)
2942                         return flags;
2943
2944                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2945                 stripped = flags & ~stripped;
2946
2947                 /* switch duplicated blocks with raid1 */
2948                 if (flags & BTRFS_BLOCK_GROUP_DUP)
2949                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
2950
2951                 /* turn single device chunks into raid0 */
2952                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
2953         }
2954         return flags;
2955 }
2956
2957 int __alloc_chunk_for_shrink(struct btrfs_root *root,
2958                      struct btrfs_block_group_cache *shrink_block_group,
2959                      int force)
2960 {
2961         struct btrfs_trans_handle *trans;
2962         u64 new_alloc_flags;
2963         u64 calc;
2964
2965         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
2966
2967                 mutex_unlock(&root->fs_info->alloc_mutex);
2968                 trans = btrfs_start_transaction(root, 1);
2969                 mutex_lock(&root->fs_info->alloc_mutex);
2970
2971                 new_alloc_flags = update_block_group_flags(root,
2972                                                    shrink_block_group->flags);
2973                 if (new_alloc_flags != shrink_block_group->flags) {
2974                         calc =
2975                              btrfs_block_group_used(&shrink_block_group->item);
2976                 } else {
2977                         calc = shrink_block_group->key.offset;
2978                 }
2979                 do_chunk_alloc(trans, root->fs_info->extent_root,
2980                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
2981
2982                 mutex_unlock(&root->fs_info->alloc_mutex);
2983                 btrfs_end_transaction(trans, root);
2984                 mutex_lock(&root->fs_info->alloc_mutex);
2985         }
2986         return 0;
2987 }
2988
2989 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
2990 {
2991         struct btrfs_trans_handle *trans;
2992         struct btrfs_root *tree_root = root->fs_info->tree_root;
2993         struct btrfs_path *path;
2994         u64 cur_byte;
2995         u64 total_found;
2996         u64 shrink_last_byte;
2997         struct btrfs_block_group_cache *shrink_block_group;
2998         struct btrfs_fs_info *info = root->fs_info;
2999         struct btrfs_key key;
3000         struct btrfs_key found_key;
3001         struct extent_buffer *leaf;
3002         u32 nritems;
3003         int ret;
3004         int progress;
3005
3006         mutex_lock(&root->fs_info->alloc_mutex);
3007         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3008                                                       shrink_start);
3009         BUG_ON(!shrink_block_group);
3010
3011         shrink_last_byte = shrink_block_group->key.objectid +
3012                 shrink_block_group->key.offset;
3013
3014         shrink_block_group->space_info->total_bytes -=
3015                 shrink_block_group->key.offset;
3016         path = btrfs_alloc_path();
3017         root = root->fs_info->extent_root;
3018         path->reada = 2;
3019
3020         printk("btrfs relocating block group %llu flags %llu\n",
3021                (unsigned long long)shrink_start,
3022                (unsigned long long)shrink_block_group->flags);
3023
3024         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3025
3026 again:
3027
3028         shrink_block_group->ro = 1;
3029
3030         total_found = 0;
3031         progress = 0;
3032         key.objectid = shrink_start;
3033         key.offset = 0;
3034         key.type = 0;
3035         cur_byte = key.objectid;
3036
3037         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3038         if (ret < 0)
3039                 goto out;
3040
3041         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3042         if (ret < 0)
3043                 goto out;
3044
3045         if (ret == 0) {
3046                 leaf = path->nodes[0];
3047                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3048                 if (found_key.objectid + found_key.offset > shrink_start &&
3049                     found_key.objectid < shrink_last_byte) {
3050                         cur_byte = found_key.objectid;
3051                         key.objectid = cur_byte;
3052                 }
3053         }
3054         btrfs_release_path(root, path);
3055
3056         while(1) {
3057                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3058                 if (ret < 0)
3059                         goto out;
3060
3061 next:
3062                 leaf = path->nodes[0];
3063                 nritems = btrfs_header_nritems(leaf);
3064                 if (path->slots[0] >= nritems) {
3065                         ret = btrfs_next_leaf(root, path);
3066                         if (ret < 0)
3067                                 goto out;
3068                         if (ret == 1) {
3069                                 ret = 0;
3070                                 break;
3071                         }
3072                         leaf = path->nodes[0];
3073                         nritems = btrfs_header_nritems(leaf);
3074                 }
3075
3076                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3077
3078                 if (found_key.objectid >= shrink_last_byte)
3079                         break;
3080
3081                 if (progress && need_resched()) {
3082                         memcpy(&key, &found_key, sizeof(key));
3083                         cond_resched();
3084                         btrfs_release_path(root, path);
3085                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3086                         progress = 0;
3087                         goto next;
3088                 }
3089                 progress = 1;
3090
3091                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3092                     found_key.objectid + found_key.offset <= cur_byte) {
3093                         memcpy(&key, &found_key, sizeof(key));
3094                         key.offset++;
3095                         path->slots[0]++;
3096                         goto next;
3097                 }
3098
3099                 total_found++;
3100                 cur_byte = found_key.objectid + found_key.offset;
3101                 key.objectid = cur_byte;
3102                 btrfs_release_path(root, path);
3103                 ret = relocate_one_extent(root, path, &found_key);
3104                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3105         }
3106
3107         btrfs_release_path(root, path);
3108
3109         if (total_found > 0) {
3110                 printk("btrfs relocate found %llu last extent was %llu\n",
3111                        (unsigned long long)total_found,
3112                        (unsigned long long)found_key.objectid);
3113                 mutex_unlock(&root->fs_info->alloc_mutex);
3114                 trans = btrfs_start_transaction(tree_root, 1);
3115                 btrfs_commit_transaction(trans, tree_root);
3116
3117                 btrfs_clean_old_snapshots(tree_root);
3118
3119                 trans = btrfs_start_transaction(tree_root, 1);
3120                 btrfs_commit_transaction(trans, tree_root);
3121                 mutex_lock(&root->fs_info->alloc_mutex);
3122                 goto again;
3123         }
3124
3125         /*
3126          * we've freed all the extents, now remove the block
3127          * group item from the tree
3128          */
3129         mutex_unlock(&root->fs_info->alloc_mutex);
3130
3131         trans = btrfs_start_transaction(root, 1);
3132         mutex_lock(&root->fs_info->alloc_mutex);
3133         memcpy(&key, &shrink_block_group->key, sizeof(key));
3134
3135         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3136         if (ret > 0)
3137                 ret = -EIO;
3138         if (ret < 0)
3139                 goto out;
3140
3141         clear_extent_bits(&info->block_group_cache, key.objectid,
3142                           key.objectid + key.offset - 1,
3143                           (unsigned int)-1, GFP_NOFS);
3144
3145
3146         clear_extent_bits(&info->free_space_cache,
3147                            key.objectid, key.objectid + key.offset - 1,
3148                            (unsigned int)-1, GFP_NOFS);
3149
3150         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3151         kfree(shrink_block_group);
3152
3153         btrfs_del_item(trans, root, path);
3154         btrfs_release_path(root, path);
3155         mutex_unlock(&root->fs_info->alloc_mutex);
3156         btrfs_commit_transaction(trans, root);
3157
3158         mutex_lock(&root->fs_info->alloc_mutex);
3159
3160         /* the code to unpin extents might set a few bits in the free
3161          * space cache for this range again
3162          */
3163         clear_extent_bits(&info->free_space_cache,
3164                            key.objectid, key.objectid + key.offset - 1,
3165                            (unsigned int)-1, GFP_NOFS);
3166 out:
3167         btrfs_free_path(path);
3168         mutex_unlock(&root->fs_info->alloc_mutex);
3169         return ret;
3170 }
3171
3172 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3173                            struct btrfs_key *key)
3174 {
3175         int ret = 0;
3176         struct btrfs_key found_key;
3177         struct extent_buffer *leaf;
3178         int slot;
3179
3180         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3181         if (ret < 0)
3182                 goto out;
3183
3184         while(1) {
3185                 slot = path->slots[0];
3186                 leaf = path->nodes[0];
3187                 if (slot >= btrfs_header_nritems(leaf)) {
3188                         ret = btrfs_next_leaf(root, path);
3189                         if (ret == 0)
3190                                 continue;
3191                         if (ret < 0)
3192                                 goto out;
3193                         break;
3194                 }
3195                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3196
3197                 if (found_key.objectid >= key->objectid &&
3198                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3199                         ret = 0;
3200                         goto out;
3201                 }
3202                 path->slots[0]++;
3203         }
3204         ret = -ENOENT;
3205 out:
3206         return ret;
3207 }
3208
3209 int btrfs_read_block_groups(struct btrfs_root *root)
3210 {
3211         struct btrfs_path *path;
3212         int ret;
3213         int bit;
3214         struct btrfs_block_group_cache *cache;
3215         struct btrfs_fs_info *info = root->fs_info;
3216         struct btrfs_space_info *space_info;
3217         struct extent_io_tree *block_group_cache;
3218         struct btrfs_key key;
3219         struct btrfs_key found_key;
3220         struct extent_buffer *leaf;
3221
3222         block_group_cache = &info->block_group_cache;
3223         root = info->extent_root;
3224         key.objectid = 0;
3225         key.offset = 0;
3226         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3227         path = btrfs_alloc_path();
3228         if (!path)
3229                 return -ENOMEM;
3230
3231         mutex_lock(&root->fs_info->alloc_mutex);
3232         while(1) {
3233                 ret = find_first_block_group(root, path, &key);
3234                 if (ret > 0) {
3235                         ret = 0;
3236                         goto error;
3237                 }
3238                 if (ret != 0)
3239                         goto error;
3240
3241                 leaf = path->nodes[0];
3242                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3243                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3244                 if (!cache) {
3245                         ret = -ENOMEM;
3246                         break;
3247                 }
3248
3249                 read_extent_buffer(leaf, &cache->item,
3250                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3251                                    sizeof(cache->item));
3252                 memcpy(&cache->key, &found_key, sizeof(found_key));
3253
3254                 key.objectid = found_key.objectid + found_key.offset;
3255                 btrfs_release_path(root, path);
3256                 cache->flags = btrfs_block_group_flags(&cache->item);
3257                 bit = 0;
3258                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3259                         bit = BLOCK_GROUP_DATA;
3260                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3261                         bit = BLOCK_GROUP_SYSTEM;
3262                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3263                         bit = BLOCK_GROUP_METADATA;
3264                 }
3265                 set_avail_alloc_bits(info, cache->flags);
3266
3267                 ret = update_space_info(info, cache->flags, found_key.offset,
3268                                         btrfs_block_group_used(&cache->item),
3269                                         &space_info);
3270                 BUG_ON(ret);
3271                 cache->space_info = space_info;
3272
3273                 /* use EXTENT_LOCKED to prevent merging */
3274                 set_extent_bits(block_group_cache, found_key.objectid,
3275                                 found_key.objectid + found_key.offset - 1,
3276                                 bit | EXTENT_LOCKED, GFP_NOFS);
3277                 set_state_private(block_group_cache, found_key.objectid,
3278                                   (unsigned long)cache);
3279
3280                 if (key.objectid >=
3281                     btrfs_super_total_bytes(&info->super_copy))
3282                         break;
3283         }
3284         ret = 0;
3285 error:
3286         btrfs_free_path(path);
3287         mutex_unlock(&root->fs_info->alloc_mutex);
3288         return ret;
3289 }
3290
3291 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3292                            struct btrfs_root *root, u64 bytes_used,
3293                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3294                            u64 size)
3295 {
3296         int ret;
3297         int bit = 0;
3298         struct btrfs_root *extent_root;
3299         struct btrfs_block_group_cache *cache;
3300         struct extent_io_tree *block_group_cache;
3301
3302         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3303         extent_root = root->fs_info->extent_root;
3304         block_group_cache = &root->fs_info->block_group_cache;
3305
3306         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3307         BUG_ON(!cache);
3308         cache->key.objectid = chunk_offset;
3309         cache->key.offset = size;
3310         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3311
3312         btrfs_set_block_group_used(&cache->item, bytes_used);
3313         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3314         cache->flags = type;
3315         btrfs_set_block_group_flags(&cache->item, type);
3316
3317         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3318                                 &cache->space_info);
3319         BUG_ON(ret);
3320
3321         bit = block_group_state_bits(type);
3322         set_extent_bits(block_group_cache, chunk_offset,
3323                         chunk_offset + size - 1,
3324                         bit | EXTENT_LOCKED, GFP_NOFS);
3325
3326         set_state_private(block_group_cache, chunk_offset,
3327                           (unsigned long)cache);
3328         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3329                                 sizeof(cache->item));
3330         BUG_ON(ret);
3331
3332         finish_current_insert(trans, extent_root);
3333         ret = del_pending_extents(trans, extent_root);
3334         BUG_ON(ret);
3335         set_avail_alloc_bits(extent_root->fs_info, type);
3336
3337         return 0;
3338 }