Btrfs: Data ordered fixes
[pandora-kernel.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include "hash.h"
22 #include "crc32c.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28 #include "locking.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 static struct btrfs_block_group_cache *
41 __btrfs_find_block_group(struct btrfs_root *root,
42                          struct btrfs_block_group_cache *hint,
43                          u64 search_start, int data, int owner);
44
45 void maybe_lock_mutex(struct btrfs_root *root)
46 {
47         if (root != root->fs_info->extent_root &&
48             root != root->fs_info->chunk_root &&
49             root != root->fs_info->dev_root) {
50                 mutex_lock(&root->fs_info->alloc_mutex);
51         }
52 }
53
54 void maybe_unlock_mutex(struct btrfs_root *root)
55 {
56         if (root != root->fs_info->extent_root &&
57             root != root->fs_info->chunk_root &&
58             root != root->fs_info->dev_root) {
59                 mutex_unlock(&root->fs_info->alloc_mutex);
60         }
61 }
62
63 static int cache_block_group(struct btrfs_root *root,
64                              struct btrfs_block_group_cache *block_group)
65 {
66         struct btrfs_path *path;
67         int ret;
68         struct btrfs_key key;
69         struct extent_buffer *leaf;
70         struct extent_io_tree *free_space_cache;
71         int slot;
72         u64 last = 0;
73         u64 hole_size;
74         u64 first_free;
75         int found = 0;
76
77         if (!block_group)
78                 return 0;
79
80         root = root->fs_info->extent_root;
81         free_space_cache = &root->fs_info->free_space_cache;
82
83         if (block_group->cached)
84                 return 0;
85
86         path = btrfs_alloc_path();
87         if (!path)
88                 return -ENOMEM;
89
90         path->reada = 2;
91         /*
92          * we get into deadlocks with paths held by callers of this function.
93          * since the alloc_mutex is protecting things right now, just
94          * skip the locking here
95          */
96         path->skip_locking = 1;
97         first_free = block_group->key.objectid;
98         key.objectid = block_group->key.objectid;
99         key.offset = 0;
100         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
101         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
102         if (ret < 0)
103                 return ret;
104         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
105         if (ret < 0)
106                 return ret;
107         if (ret == 0) {
108                 leaf = path->nodes[0];
109                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
110                 if (key.objectid + key.offset > first_free)
111                         first_free = key.objectid + key.offset;
112         }
113         while(1) {
114                 leaf = path->nodes[0];
115                 slot = path->slots[0];
116                 if (slot >= btrfs_header_nritems(leaf)) {
117                         ret = btrfs_next_leaf(root, path);
118                         if (ret < 0)
119                                 goto err;
120                         if (ret == 0) {
121                                 continue;
122                         } else {
123                                 break;
124                         }
125                 }
126                 btrfs_item_key_to_cpu(leaf, &key, slot);
127                 if (key.objectid < block_group->key.objectid) {
128                         goto next;
129                 }
130                 if (key.objectid >= block_group->key.objectid +
131                     block_group->key.offset) {
132                         break;
133                 }
134
135                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
136                         if (!found) {
137                                 last = first_free;
138                                 found = 1;
139                         }
140                         if (key.objectid > last) {
141                                 hole_size = key.objectid - last;
142                                 set_extent_dirty(free_space_cache, last,
143                                                  last + hole_size - 1,
144                                                  GFP_NOFS);
145                         }
146                         last = key.objectid + key.offset;
147                 }
148 next:
149                 path->slots[0]++;
150         }
151
152         if (!found)
153                 last = first_free;
154         if (block_group->key.objectid +
155             block_group->key.offset > last) {
156                 hole_size = block_group->key.objectid +
157                         block_group->key.offset - last;
158                 set_extent_dirty(free_space_cache, last,
159                                  last + hole_size - 1, GFP_NOFS);
160         }
161         block_group->cached = 1;
162 err:
163         btrfs_free_path(path);
164         return 0;
165 }
166
167 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
168                                                        btrfs_fs_info *info,
169                                                          u64 bytenr)
170 {
171         struct extent_io_tree *block_group_cache;
172         struct btrfs_block_group_cache *block_group = NULL;
173         u64 ptr;
174         u64 start;
175         u64 end;
176         int ret;
177
178         bytenr = max_t(u64, bytenr,
179                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
180         block_group_cache = &info->block_group_cache;
181         ret = find_first_extent_bit(block_group_cache,
182                                     bytenr, &start, &end,
183                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
184                                     BLOCK_GROUP_SYSTEM);
185         if (ret) {
186                 return NULL;
187         }
188         ret = get_state_private(block_group_cache, start, &ptr);
189         if (ret)
190                 return NULL;
191
192         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
193         return block_group;
194 }
195
196 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
197                                                          btrfs_fs_info *info,
198                                                          u64 bytenr)
199 {
200         struct extent_io_tree *block_group_cache;
201         struct btrfs_block_group_cache *block_group = NULL;
202         u64 ptr;
203         u64 start;
204         u64 end;
205         int ret;
206
207         bytenr = max_t(u64, bytenr,
208                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
209         block_group_cache = &info->block_group_cache;
210         ret = find_first_extent_bit(block_group_cache,
211                                     bytenr, &start, &end,
212                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
213                                     BLOCK_GROUP_SYSTEM);
214         if (ret) {
215                 return NULL;
216         }
217         ret = get_state_private(block_group_cache, start, &ptr);
218         if (ret)
219                 return NULL;
220
221         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
222         if (block_group->key.objectid <= bytenr && bytenr <
223             block_group->key.objectid + block_group->key.offset)
224                 return block_group;
225         return NULL;
226 }
227
228 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
229 {
230         return (cache->flags & bits) == bits;
231 }
232
233 static int noinline find_search_start(struct btrfs_root *root,
234                               struct btrfs_block_group_cache **cache_ret,
235                               u64 *start_ret, u64 num, int data)
236 {
237         int ret;
238         struct btrfs_block_group_cache *cache = *cache_ret;
239         struct extent_io_tree *free_space_cache;
240         struct extent_state *state;
241         u64 last;
242         u64 start = 0;
243         u64 cache_miss = 0;
244         u64 total_fs_bytes;
245         u64 search_start = *start_ret;
246         int wrapped = 0;
247
248         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
249         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
250         free_space_cache = &root->fs_info->free_space_cache;
251
252         if (!cache)
253                 goto out;
254
255 again:
256         ret = cache_block_group(root, cache);
257         if (ret) {
258                 goto out;
259         }
260
261         last = max(search_start, cache->key.objectid);
262         if (!block_group_bits(cache, data) || cache->ro)
263                 goto new_group;
264
265         spin_lock_irq(&free_space_cache->lock);
266         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
267         while(1) {
268                 if (!state) {
269                         if (!cache_miss)
270                                 cache_miss = last;
271                         spin_unlock_irq(&free_space_cache->lock);
272                         goto new_group;
273                 }
274
275                 start = max(last, state->start);
276                 last = state->end + 1;
277                 if (last - start < num) {
278                         do {
279                                 state = extent_state_next(state);
280                         } while(state && !(state->state & EXTENT_DIRTY));
281                         continue;
282                 }
283                 spin_unlock_irq(&free_space_cache->lock);
284                 if (cache->ro) {
285                         goto new_group;
286                 }
287                 if (start + num > cache->key.objectid + cache->key.offset)
288                         goto new_group;
289                 if (!block_group_bits(cache, data)) {
290                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
291                 }
292                 *start_ret = start;
293                 return 0;
294         }
295 out:
296         cache = btrfs_lookup_block_group(root->fs_info, search_start);
297         if (!cache) {
298                 printk("Unable to find block group for %Lu\n", search_start);
299                 WARN_ON(1);
300         }
301         return -ENOSPC;
302
303 new_group:
304         last = cache->key.objectid + cache->key.offset;
305 wrapped:
306         cache = btrfs_lookup_first_block_group(root->fs_info, last);
307         if (!cache || cache->key.objectid >= total_fs_bytes) {
308 no_cache:
309                 if (!wrapped) {
310                         wrapped = 1;
311                         last = search_start;
312                         goto wrapped;
313                 }
314                 goto out;
315         }
316         if (cache_miss && !cache->cached) {
317                 cache_block_group(root, cache);
318                 last = cache_miss;
319                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
320         }
321         cache_miss = 0;
322         cache = __btrfs_find_block_group(root, cache, last, data, 0);
323         if (!cache)
324                 goto no_cache;
325         *cache_ret = cache;
326         goto again;
327 }
328
329 static u64 div_factor(u64 num, int factor)
330 {
331         if (factor == 10)
332                 return num;
333         num *= factor;
334         do_div(num, 10);
335         return num;
336 }
337
338 static int block_group_state_bits(u64 flags)
339 {
340         int bits = 0;
341         if (flags & BTRFS_BLOCK_GROUP_DATA)
342                 bits |= BLOCK_GROUP_DATA;
343         if (flags & BTRFS_BLOCK_GROUP_METADATA)
344                 bits |= BLOCK_GROUP_METADATA;
345         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
346                 bits |= BLOCK_GROUP_SYSTEM;
347         return bits;
348 }
349
350 static struct btrfs_block_group_cache *
351 __btrfs_find_block_group(struct btrfs_root *root,
352                          struct btrfs_block_group_cache *hint,
353                          u64 search_start, int data, int owner)
354 {
355         struct btrfs_block_group_cache *cache;
356         struct extent_io_tree *block_group_cache;
357         struct btrfs_block_group_cache *found_group = NULL;
358         struct btrfs_fs_info *info = root->fs_info;
359         u64 used;
360         u64 last = 0;
361         u64 start;
362         u64 end;
363         u64 free_check;
364         u64 ptr;
365         int bit;
366         int ret;
367         int full_search = 0;
368         int factor = 10;
369         int wrapped = 0;
370
371         block_group_cache = &info->block_group_cache;
372
373         if (data & BTRFS_BLOCK_GROUP_METADATA)
374                 factor = 9;
375
376         bit = block_group_state_bits(data);
377
378         if (search_start) {
379                 struct btrfs_block_group_cache *shint;
380                 shint = btrfs_lookup_first_block_group(info, search_start);
381                 if (shint && block_group_bits(shint, data) && !shint->ro) {
382                         used = btrfs_block_group_used(&shint->item);
383                         if (used + shint->pinned <
384                             div_factor(shint->key.offset, factor)) {
385                                 return shint;
386                         }
387                 }
388         }
389         if (hint && !hint->ro && block_group_bits(hint, data)) {
390                 used = btrfs_block_group_used(&hint->item);
391                 if (used + hint->pinned <
392                     div_factor(hint->key.offset, factor)) {
393                         return hint;
394                 }
395                 last = hint->key.objectid + hint->key.offset;
396         } else {
397                 if (hint)
398                         last = max(hint->key.objectid, search_start);
399                 else
400                         last = search_start;
401         }
402 again:
403         while(1) {
404                 ret = find_first_extent_bit(block_group_cache, last,
405                                             &start, &end, bit);
406                 if (ret)
407                         break;
408
409                 ret = get_state_private(block_group_cache, start, &ptr);
410                 if (ret) {
411                         last = end + 1;
412                         continue;
413                 }
414
415                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
416                 last = cache->key.objectid + cache->key.offset;
417                 used = btrfs_block_group_used(&cache->item);
418
419                 if (!cache->ro && block_group_bits(cache, data)) {
420                         free_check = div_factor(cache->key.offset, factor);
421                         if (used + cache->pinned < free_check) {
422                                 found_group = cache;
423                                 goto found;
424                         }
425                 }
426                 cond_resched();
427         }
428         if (!wrapped) {
429                 last = search_start;
430                 wrapped = 1;
431                 goto again;
432         }
433         if (!full_search && factor < 10) {
434                 last = search_start;
435                 full_search = 1;
436                 factor = 10;
437                 goto again;
438         }
439 found:
440         return found_group;
441 }
442
443 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
444                                                  struct btrfs_block_group_cache
445                                                  *hint, u64 search_start,
446                                                  int data, int owner)
447 {
448
449         struct btrfs_block_group_cache *ret;
450         mutex_lock(&root->fs_info->alloc_mutex);
451         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
452         mutex_unlock(&root->fs_info->alloc_mutex);
453         return ret;
454 }
455 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
456                            u64 owner, u64 owner_offset)
457 {
458         u32 high_crc = ~(u32)0;
459         u32 low_crc = ~(u32)0;
460         __le64 lenum;
461         lenum = cpu_to_le64(root_objectid);
462         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
463         lenum = cpu_to_le64(ref_generation);
464         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
465         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
466                 lenum = cpu_to_le64(owner);
467                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
468                 lenum = cpu_to_le64(owner_offset);
469                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
470         }
471         return ((u64)high_crc << 32) | (u64)low_crc;
472 }
473
474 static int match_extent_ref(struct extent_buffer *leaf,
475                             struct btrfs_extent_ref *disk_ref,
476                             struct btrfs_extent_ref *cpu_ref)
477 {
478         int ret;
479         int len;
480
481         if (cpu_ref->objectid)
482                 len = sizeof(*cpu_ref);
483         else
484                 len = 2 * sizeof(u64);
485         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
486                                    len);
487         return ret == 0;
488 }
489
490 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
491                                           struct btrfs_root *root,
492                                           struct btrfs_path *path, u64 bytenr,
493                                           u64 root_objectid,
494                                           u64 ref_generation, u64 owner,
495                                           u64 owner_offset, int del)
496 {
497         u64 hash;
498         struct btrfs_key key;
499         struct btrfs_key found_key;
500         struct btrfs_extent_ref ref;
501         struct extent_buffer *leaf;
502         struct btrfs_extent_ref *disk_ref;
503         int ret;
504         int ret2;
505
506         btrfs_set_stack_ref_root(&ref, root_objectid);
507         btrfs_set_stack_ref_generation(&ref, ref_generation);
508         btrfs_set_stack_ref_objectid(&ref, owner);
509         btrfs_set_stack_ref_offset(&ref, owner_offset);
510
511         hash = hash_extent_ref(root_objectid, ref_generation, owner,
512                                owner_offset);
513         key.offset = hash;
514         key.objectid = bytenr;
515         key.type = BTRFS_EXTENT_REF_KEY;
516
517         while (1) {
518                 ret = btrfs_search_slot(trans, root, &key, path,
519                                         del ? -1 : 0, del);
520                 if (ret < 0)
521                         goto out;
522                 leaf = path->nodes[0];
523                 if (ret != 0) {
524                         u32 nritems = btrfs_header_nritems(leaf);
525                         if (path->slots[0] >= nritems) {
526                                 ret2 = btrfs_next_leaf(root, path);
527                                 if (ret2)
528                                         goto out;
529                                 leaf = path->nodes[0];
530                         }
531                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
532                         if (found_key.objectid != bytenr ||
533                             found_key.type != BTRFS_EXTENT_REF_KEY)
534                                 goto out;
535                         key.offset = found_key.offset;
536                         if (del) {
537                                 btrfs_release_path(root, path);
538                                 continue;
539                         }
540                 }
541                 disk_ref = btrfs_item_ptr(path->nodes[0],
542                                           path->slots[0],
543                                           struct btrfs_extent_ref);
544                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
545                         ret = 0;
546                         goto out;
547                 }
548                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
549                 key.offset = found_key.offset + 1;
550                 btrfs_release_path(root, path);
551         }
552 out:
553         return ret;
554 }
555
556 /*
557  * Back reference rules.  Back refs have three main goals:
558  *
559  * 1) differentiate between all holders of references to an extent so that
560  *    when a reference is dropped we can make sure it was a valid reference
561  *    before freeing the extent.
562  *
563  * 2) Provide enough information to quickly find the holders of an extent
564  *    if we notice a given block is corrupted or bad.
565  *
566  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
567  *    maintenance.  This is actually the same as #2, but with a slightly
568  *    different use case.
569  *
570  * File extents can be referenced by:
571  *
572  * - multiple snapshots, subvolumes, or different generations in one subvol
573  * - different files inside a single subvolume (in theory, not implemented yet)
574  * - different offsets inside a file (bookend extents in file.c)
575  *
576  * The extent ref structure has fields for:
577  *
578  * - Objectid of the subvolume root
579  * - Generation number of the tree holding the reference
580  * - objectid of the file holding the reference
581  * - offset in the file corresponding to the key holding the reference
582  *
583  * When a file extent is allocated the fields are filled in:
584  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
585  *
586  * When a leaf is cow'd new references are added for every file extent found
587  * in the leaf.  It looks the same as the create case, but trans->transid
588  * will be different when the block is cow'd.
589  *
590  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
591  *
592  * When a file extent is removed either during snapshot deletion or file
593  * truncation, the corresponding back reference is found
594  * by searching for:
595  *
596  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
597  *      inode objectid, offset in file)
598  *
599  * Btree extents can be referenced by:
600  *
601  * - Different subvolumes
602  * - Different generations of the same subvolume
603  *
604  * Storing sufficient information for a full reverse mapping of a btree
605  * block would require storing the lowest key of the block in the backref,
606  * and it would require updating that lowest key either before write out or
607  * every time it changed.  Instead, the objectid of the lowest key is stored
608  * along with the level of the tree block.  This provides a hint
609  * about where in the btree the block can be found.  Searches through the
610  * btree only need to look for a pointer to that block, so they stop one
611  * level higher than the level recorded in the backref.
612  *
613  * Some btrees do not do reference counting on their extents.  These
614  * include the extent tree and the tree of tree roots.  Backrefs for these
615  * trees always have a generation of zero.
616  *
617  * When a tree block is created, back references are inserted:
618  *
619  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
620  *
621  * When a tree block is cow'd in a reference counted root,
622  * new back references are added for all the blocks it points to.
623  * These are of the form (trans->transid will have increased since creation):
624  *
625  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
626  *
627  * Because the lowest_key_objectid and the level are just hints
628  * they are not used when backrefs are deleted.  When a backref is deleted:
629  *
630  * if backref was for a tree root:
631  *     root_objectid = root->root_key.objectid
632  * else
633  *     root_objectid = btrfs_header_owner(parent)
634  *
635  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
636  *
637  * Back Reference Key hashing:
638  *
639  * Back references have four fields, each 64 bits long.  Unfortunately,
640  * This is hashed into a single 64 bit number and placed into the key offset.
641  * The key objectid corresponds to the first byte in the extent, and the
642  * key type is set to BTRFS_EXTENT_REF_KEY
643  */
644 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
645                                  struct btrfs_root *root,
646                                  struct btrfs_path *path, u64 bytenr,
647                                  u64 root_objectid, u64 ref_generation,
648                                  u64 owner, u64 owner_offset)
649 {
650         u64 hash;
651         struct btrfs_key key;
652         struct btrfs_extent_ref ref;
653         struct btrfs_extent_ref *disk_ref;
654         int ret;
655
656         btrfs_set_stack_ref_root(&ref, root_objectid);
657         btrfs_set_stack_ref_generation(&ref, ref_generation);
658         btrfs_set_stack_ref_objectid(&ref, owner);
659         btrfs_set_stack_ref_offset(&ref, owner_offset);
660
661         hash = hash_extent_ref(root_objectid, ref_generation, owner,
662                                owner_offset);
663         key.offset = hash;
664         key.objectid = bytenr;
665         key.type = BTRFS_EXTENT_REF_KEY;
666
667         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
668         while (ret == -EEXIST) {
669                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
670                                           struct btrfs_extent_ref);
671                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
672                         goto out;
673                 key.offset++;
674                 btrfs_release_path(root, path);
675                 ret = btrfs_insert_empty_item(trans, root, path, &key,
676                                               sizeof(ref));
677         }
678         if (ret)
679                 goto out;
680         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
681                                   struct btrfs_extent_ref);
682         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
683                             sizeof(ref));
684         btrfs_mark_buffer_dirty(path->nodes[0]);
685 out:
686         btrfs_release_path(root, path);
687         return ret;
688 }
689
690 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
691                                 struct btrfs_root *root,
692                                 u64 bytenr, u64 num_bytes,
693                                 u64 root_objectid, u64 ref_generation,
694                                 u64 owner, u64 owner_offset)
695 {
696         struct btrfs_path *path;
697         int ret;
698         struct btrfs_key key;
699         struct extent_buffer *l;
700         struct btrfs_extent_item *item;
701         u32 refs;
702
703         WARN_ON(num_bytes < root->sectorsize);
704         path = btrfs_alloc_path();
705         if (!path)
706                 return -ENOMEM;
707
708         path->reada = 1;
709         key.objectid = bytenr;
710         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
711         key.offset = num_bytes;
712         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
713                                 0, 1);
714         if (ret < 0)
715                 return ret;
716         if (ret != 0) {
717                 BUG();
718         }
719         BUG_ON(ret != 0);
720         l = path->nodes[0];
721         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
722         refs = btrfs_extent_refs(l, item);
723         btrfs_set_extent_refs(l, item, refs + 1);
724         btrfs_mark_buffer_dirty(path->nodes[0]);
725
726         btrfs_release_path(root->fs_info->extent_root, path);
727
728         path->reada = 1;
729         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
730                                           path, bytenr, root_objectid,
731                                           ref_generation, owner, owner_offset);
732         BUG_ON(ret);
733         finish_current_insert(trans, root->fs_info->extent_root);
734         del_pending_extents(trans, root->fs_info->extent_root);
735
736         btrfs_free_path(path);
737         return 0;
738 }
739
740 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
741                                 struct btrfs_root *root,
742                                 u64 bytenr, u64 num_bytes,
743                                 u64 root_objectid, u64 ref_generation,
744                                 u64 owner, u64 owner_offset)
745 {
746         int ret;
747
748         mutex_lock(&root->fs_info->alloc_mutex);
749         ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
750                                      root_objectid, ref_generation,
751                                      owner, owner_offset);
752         mutex_unlock(&root->fs_info->alloc_mutex);
753         return ret;
754 }
755
756 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
757                          struct btrfs_root *root)
758 {
759         finish_current_insert(trans, root->fs_info->extent_root);
760         del_pending_extents(trans, root->fs_info->extent_root);
761         return 0;
762 }
763
764 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
765                              struct btrfs_root *root, u64 bytenr,
766                              u64 num_bytes, u32 *refs)
767 {
768         struct btrfs_path *path;
769         int ret;
770         struct btrfs_key key;
771         struct extent_buffer *l;
772         struct btrfs_extent_item *item;
773
774         WARN_ON(num_bytes < root->sectorsize);
775         path = btrfs_alloc_path();
776         path->reada = 1;
777         key.objectid = bytenr;
778         key.offset = num_bytes;
779         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
780         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
781                                 0, 0);
782         if (ret < 0)
783                 goto out;
784         if (ret != 0) {
785                 btrfs_print_leaf(root, path->nodes[0]);
786                 printk("failed to find block number %Lu\n", bytenr);
787                 BUG();
788         }
789         l = path->nodes[0];
790         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
791         *refs = btrfs_extent_refs(l, item);
792 out:
793         btrfs_free_path(path);
794         return 0;
795 }
796
797 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
798                                   struct btrfs_path *count_path,
799                                   u64 expected_owner,
800                                   u64 first_extent)
801 {
802         struct btrfs_root *extent_root = root->fs_info->extent_root;
803         struct btrfs_path *path;
804         u64 bytenr;
805         u64 found_objectid;
806         u64 found_owner;
807         u64 root_objectid = root->root_key.objectid;
808         u32 total_count = 0;
809         u32 extent_refs;
810         u32 cur_count;
811         u32 nritems;
812         int ret;
813         struct btrfs_key key;
814         struct btrfs_key found_key;
815         struct extent_buffer *l;
816         struct btrfs_extent_item *item;
817         struct btrfs_extent_ref *ref_item;
818         int level = -1;
819
820         /* FIXME, needs locking */
821         BUG();
822
823         mutex_lock(&root->fs_info->alloc_mutex);
824         path = btrfs_alloc_path();
825 again:
826         if (level == -1)
827                 bytenr = first_extent;
828         else
829                 bytenr = count_path->nodes[level]->start;
830
831         cur_count = 0;
832         key.objectid = bytenr;
833         key.offset = 0;
834
835         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
836         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
837         if (ret < 0)
838                 goto out;
839         BUG_ON(ret == 0);
840
841         l = path->nodes[0];
842         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
843
844         if (found_key.objectid != bytenr ||
845             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
846                 goto out;
847         }
848
849         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
850         extent_refs = btrfs_extent_refs(l, item);
851         while (1) {
852                 l = path->nodes[0];
853                 nritems = btrfs_header_nritems(l);
854                 if (path->slots[0] >= nritems) {
855                         ret = btrfs_next_leaf(extent_root, path);
856                         if (ret == 0)
857                                 continue;
858                         break;
859                 }
860                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
861                 if (found_key.objectid != bytenr)
862                         break;
863
864                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
865                         path->slots[0]++;
866                         continue;
867                 }
868
869                 cur_count++;
870                 ref_item = btrfs_item_ptr(l, path->slots[0],
871                                           struct btrfs_extent_ref);
872                 found_objectid = btrfs_ref_root(l, ref_item);
873
874                 if (found_objectid != root_objectid) {
875                         total_count = 2;
876                         goto out;
877                 }
878                 if (level == -1) {
879                         found_owner = btrfs_ref_objectid(l, ref_item);
880                         if (found_owner != expected_owner) {
881                                 total_count = 2;
882                                 goto out;
883                         }
884                         /*
885                          * nasty.  we don't count a reference held by
886                          * the running transaction.  This allows nodatacow
887                          * to avoid cow most of the time
888                          */
889                         if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
890                             btrfs_ref_generation(l, ref_item) ==
891                             root->fs_info->generation) {
892                                 extent_refs--;
893                         }
894                 }
895                 total_count = 1;
896                 path->slots[0]++;
897         }
898         /*
899          * if there is more than one reference against a data extent,
900          * we have to assume the other ref is another snapshot
901          */
902         if (level == -1 && extent_refs > 1) {
903                 total_count = 2;
904                 goto out;
905         }
906         if (cur_count == 0) {
907                 total_count = 0;
908                 goto out;
909         }
910         if (level >= 0 && root->node == count_path->nodes[level])
911                 goto out;
912         level++;
913         btrfs_release_path(root, path);
914         goto again;
915
916 out:
917         btrfs_free_path(path);
918         mutex_unlock(&root->fs_info->alloc_mutex);
919         return total_count;
920 }
921
922 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
923                   struct extent_buffer *buf)
924 {
925         u64 bytenr;
926         u32 nritems;
927         struct btrfs_key key;
928         struct btrfs_file_extent_item *fi;
929         int i;
930         int level;
931         int ret;
932         int faili;
933
934         if (!root->ref_cows)
935                 return 0;
936
937         level = btrfs_header_level(buf);
938         nritems = btrfs_header_nritems(buf);
939         for (i = 0; i < nritems; i++) {
940                 if (level == 0) {
941                         u64 disk_bytenr;
942                         btrfs_item_key_to_cpu(buf, &key, i);
943                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
944                                 continue;
945                         fi = btrfs_item_ptr(buf, i,
946                                             struct btrfs_file_extent_item);
947                         if (btrfs_file_extent_type(buf, fi) ==
948                             BTRFS_FILE_EXTENT_INLINE)
949                                 continue;
950                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
951                         if (disk_bytenr == 0)
952                                 continue;
953
954                         mutex_lock(&root->fs_info->alloc_mutex);
955                         ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
956                                     btrfs_file_extent_disk_num_bytes(buf, fi),
957                                     root->root_key.objectid, trans->transid,
958                                     key.objectid, key.offset);
959                         mutex_unlock(&root->fs_info->alloc_mutex);
960                         if (ret) {
961                                 faili = i;
962                                 WARN_ON(1);
963                                 goto fail;
964                         }
965                 } else {
966                         bytenr = btrfs_node_blockptr(buf, i);
967                         btrfs_node_key_to_cpu(buf, &key, i);
968
969                         mutex_lock(&root->fs_info->alloc_mutex);
970                         ret = __btrfs_inc_extent_ref(trans, root, bytenr,
971                                            btrfs_level_size(root, level - 1),
972                                            root->root_key.objectid,
973                                            trans->transid,
974                                            level - 1, key.objectid);
975                         mutex_unlock(&root->fs_info->alloc_mutex);
976                         if (ret) {
977                                 faili = i;
978                                 WARN_ON(1);
979                                 goto fail;
980                         }
981                 }
982         }
983         return 0;
984 fail:
985         WARN_ON(1);
986 #if 0
987         for (i =0; i < faili; i++) {
988                 if (level == 0) {
989                         u64 disk_bytenr;
990                         btrfs_item_key_to_cpu(buf, &key, i);
991                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
992                                 continue;
993                         fi = btrfs_item_ptr(buf, i,
994                                             struct btrfs_file_extent_item);
995                         if (btrfs_file_extent_type(buf, fi) ==
996                             BTRFS_FILE_EXTENT_INLINE)
997                                 continue;
998                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
999                         if (disk_bytenr == 0)
1000                                 continue;
1001                         err = btrfs_free_extent(trans, root, disk_bytenr,
1002                                     btrfs_file_extent_disk_num_bytes(buf,
1003                                                                       fi), 0);
1004                         BUG_ON(err);
1005                 } else {
1006                         bytenr = btrfs_node_blockptr(buf, i);
1007                         err = btrfs_free_extent(trans, root, bytenr,
1008                                         btrfs_level_size(root, level - 1), 0);
1009                         BUG_ON(err);
1010                 }
1011         }
1012 #endif
1013         return ret;
1014 }
1015
1016 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1017                                  struct btrfs_root *root,
1018                                  struct btrfs_path *path,
1019                                  struct btrfs_block_group_cache *cache)
1020 {
1021         int ret;
1022         int pending_ret;
1023         struct btrfs_root *extent_root = root->fs_info->extent_root;
1024         unsigned long bi;
1025         struct extent_buffer *leaf;
1026
1027         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1028         if (ret < 0)
1029                 goto fail;
1030         BUG_ON(ret);
1031
1032         leaf = path->nodes[0];
1033         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1034         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1035         btrfs_mark_buffer_dirty(leaf);
1036         btrfs_release_path(extent_root, path);
1037 fail:
1038         finish_current_insert(trans, extent_root);
1039         pending_ret = del_pending_extents(trans, extent_root);
1040         if (ret)
1041                 return ret;
1042         if (pending_ret)
1043                 return pending_ret;
1044         return 0;
1045
1046 }
1047
1048 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1049                                    struct btrfs_root *root)
1050 {
1051         struct extent_io_tree *block_group_cache;
1052         struct btrfs_block_group_cache *cache;
1053         int ret;
1054         int err = 0;
1055         int werr = 0;
1056         struct btrfs_path *path;
1057         u64 last = 0;
1058         u64 start;
1059         u64 end;
1060         u64 ptr;
1061
1062         block_group_cache = &root->fs_info->block_group_cache;
1063         path = btrfs_alloc_path();
1064         if (!path)
1065                 return -ENOMEM;
1066
1067         mutex_lock(&root->fs_info->alloc_mutex);
1068         while(1) {
1069                 ret = find_first_extent_bit(block_group_cache, last,
1070                                             &start, &end, BLOCK_GROUP_DIRTY);
1071                 if (ret)
1072                         break;
1073
1074                 last = end + 1;
1075                 ret = get_state_private(block_group_cache, start, &ptr);
1076                 if (ret)
1077                         break;
1078                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1079                 err = write_one_cache_group(trans, root,
1080                                             path, cache);
1081                 /*
1082                  * if we fail to write the cache group, we want
1083                  * to keep it marked dirty in hopes that a later
1084                  * write will work
1085                  */
1086                 if (err) {
1087                         werr = err;
1088                         continue;
1089                 }
1090                 clear_extent_bits(block_group_cache, start, end,
1091                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1092         }
1093         btrfs_free_path(path);
1094         mutex_unlock(&root->fs_info->alloc_mutex);
1095         return werr;
1096 }
1097
1098 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1099                                                   u64 flags)
1100 {
1101         struct list_head *head = &info->space_info;
1102         struct list_head *cur;
1103         struct btrfs_space_info *found;
1104         list_for_each(cur, head) {
1105                 found = list_entry(cur, struct btrfs_space_info, list);
1106                 if (found->flags == flags)
1107                         return found;
1108         }
1109         return NULL;
1110
1111 }
1112
1113 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1114                              u64 total_bytes, u64 bytes_used,
1115                              struct btrfs_space_info **space_info)
1116 {
1117         struct btrfs_space_info *found;
1118
1119         found = __find_space_info(info, flags);
1120         if (found) {
1121                 found->total_bytes += total_bytes;
1122                 found->bytes_used += bytes_used;
1123                 found->full = 0;
1124                 WARN_ON(found->total_bytes < found->bytes_used);
1125                 *space_info = found;
1126                 return 0;
1127         }
1128         found = kmalloc(sizeof(*found), GFP_NOFS);
1129         if (!found)
1130                 return -ENOMEM;
1131
1132         list_add(&found->list, &info->space_info);
1133         found->flags = flags;
1134         found->total_bytes = total_bytes;
1135         found->bytes_used = bytes_used;
1136         found->bytes_pinned = 0;
1137         found->full = 0;
1138         found->force_alloc = 0;
1139         *space_info = found;
1140         return 0;
1141 }
1142
1143 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1144 {
1145         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1146                                    BTRFS_BLOCK_GROUP_RAID1 |
1147                                    BTRFS_BLOCK_GROUP_RAID10 |
1148                                    BTRFS_BLOCK_GROUP_DUP);
1149         if (extra_flags) {
1150                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1151                         fs_info->avail_data_alloc_bits |= extra_flags;
1152                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1153                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1154                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1155                         fs_info->avail_system_alloc_bits |= extra_flags;
1156         }
1157 }
1158
1159 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1160 {
1161         u64 num_devices = root->fs_info->fs_devices->num_devices;
1162
1163         if (num_devices == 1)
1164                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1165         if (num_devices < 4)
1166                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1167
1168         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1169             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1170                       BTRFS_BLOCK_GROUP_RAID10))) {
1171                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1172         }
1173
1174         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1175             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1176                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1177         }
1178
1179         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1180             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1181              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1182              (flags & BTRFS_BLOCK_GROUP_DUP)))
1183                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1184         return flags;
1185 }
1186
1187 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1188                           struct btrfs_root *extent_root, u64 alloc_bytes,
1189                           u64 flags, int force)
1190 {
1191         struct btrfs_space_info *space_info;
1192         u64 thresh;
1193         u64 start;
1194         u64 num_bytes;
1195         int ret;
1196
1197         flags = reduce_alloc_profile(extent_root, flags);
1198
1199         space_info = __find_space_info(extent_root->fs_info, flags);
1200         if (!space_info) {
1201                 ret = update_space_info(extent_root->fs_info, flags,
1202                                         0, 0, &space_info);
1203                 BUG_ON(ret);
1204         }
1205         BUG_ON(!space_info);
1206
1207         if (space_info->force_alloc) {
1208                 force = 1;
1209                 space_info->force_alloc = 0;
1210         }
1211         if (space_info->full)
1212                 goto out;
1213
1214         thresh = div_factor(space_info->total_bytes, 6);
1215         if (!force &&
1216            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1217             thresh)
1218                 goto out;
1219
1220         mutex_lock(&extent_root->fs_info->chunk_mutex);
1221         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1222         if (ret == -ENOSPC) {
1223 printk("space info full %Lu\n", flags);
1224                 space_info->full = 1;
1225                 goto out_unlock;
1226         }
1227         BUG_ON(ret);
1228
1229         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1230                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1231         BUG_ON(ret);
1232 out_unlock:
1233         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1234 out:
1235         return 0;
1236 }
1237
1238 static int update_block_group(struct btrfs_trans_handle *trans,
1239                               struct btrfs_root *root,
1240                               u64 bytenr, u64 num_bytes, int alloc,
1241                               int mark_free)
1242 {
1243         struct btrfs_block_group_cache *cache;
1244         struct btrfs_fs_info *info = root->fs_info;
1245         u64 total = num_bytes;
1246         u64 old_val;
1247         u64 byte_in_group;
1248         u64 start;
1249         u64 end;
1250
1251         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1252         while(total) {
1253                 cache = btrfs_lookup_block_group(info, bytenr);
1254                 if (!cache) {
1255                         return -1;
1256                 }
1257                 byte_in_group = bytenr - cache->key.objectid;
1258                 WARN_ON(byte_in_group > cache->key.offset);
1259                 start = cache->key.objectid;
1260                 end = start + cache->key.offset - 1;
1261                 set_extent_bits(&info->block_group_cache, start, end,
1262                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1263
1264                 old_val = btrfs_block_group_used(&cache->item);
1265                 num_bytes = min(total, cache->key.offset - byte_in_group);
1266                 if (alloc) {
1267                         old_val += num_bytes;
1268                         cache->space_info->bytes_used += num_bytes;
1269                 } else {
1270                         old_val -= num_bytes;
1271                         cache->space_info->bytes_used -= num_bytes;
1272                         if (mark_free) {
1273                                 set_extent_dirty(&info->free_space_cache,
1274                                                  bytenr, bytenr + num_bytes - 1,
1275                                                  GFP_NOFS);
1276                         }
1277                 }
1278                 btrfs_set_block_group_used(&cache->item, old_val);
1279                 total -= num_bytes;
1280                 bytenr += num_bytes;
1281         }
1282         return 0;
1283 }
1284
1285 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1286 {
1287         u64 start;
1288         u64 end;
1289         int ret;
1290         ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1291                                     search_start, &start, &end,
1292                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1293                                     BLOCK_GROUP_SYSTEM);
1294         if (ret)
1295                 return 0;
1296         return start;
1297 }
1298
1299
1300 static int update_pinned_extents(struct btrfs_root *root,
1301                                 u64 bytenr, u64 num, int pin)
1302 {
1303         u64 len;
1304         struct btrfs_block_group_cache *cache;
1305         struct btrfs_fs_info *fs_info = root->fs_info;
1306
1307         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1308         if (pin) {
1309                 set_extent_dirty(&fs_info->pinned_extents,
1310                                 bytenr, bytenr + num - 1, GFP_NOFS);
1311         } else {
1312                 clear_extent_dirty(&fs_info->pinned_extents,
1313                                 bytenr, bytenr + num - 1, GFP_NOFS);
1314         }
1315         while (num > 0) {
1316                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1317                 if (!cache) {
1318                         u64 first = first_logical_byte(root, bytenr);
1319                         WARN_ON(first < bytenr);
1320                         len = min(first - bytenr, num);
1321                 } else {
1322                         len = min(num, cache->key.offset -
1323                                   (bytenr - cache->key.objectid));
1324                 }
1325                 if (pin) {
1326                         if (cache) {
1327                                 cache->pinned += len;
1328                                 cache->space_info->bytes_pinned += len;
1329                         }
1330                         fs_info->total_pinned += len;
1331                 } else {
1332                         if (cache) {
1333                                 cache->pinned -= len;
1334                                 cache->space_info->bytes_pinned -= len;
1335                         }
1336                         fs_info->total_pinned -= len;
1337                 }
1338                 bytenr += len;
1339                 num -= len;
1340         }
1341         return 0;
1342 }
1343
1344 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1345 {
1346         u64 last = 0;
1347         u64 start;
1348         u64 end;
1349         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1350         int ret;
1351
1352         while(1) {
1353                 ret = find_first_extent_bit(pinned_extents, last,
1354                                             &start, &end, EXTENT_DIRTY);
1355                 if (ret)
1356                         break;
1357                 set_extent_dirty(copy, start, end, GFP_NOFS);
1358                 last = end + 1;
1359         }
1360         return 0;
1361 }
1362
1363 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1364                                struct btrfs_root *root,
1365                                struct extent_io_tree *unpin)
1366 {
1367         u64 start;
1368         u64 end;
1369         int ret;
1370         struct extent_io_tree *free_space_cache;
1371         free_space_cache = &root->fs_info->free_space_cache;
1372
1373         mutex_lock(&root->fs_info->alloc_mutex);
1374         while(1) {
1375                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1376                                             EXTENT_DIRTY);
1377                 if (ret)
1378                         break;
1379                 update_pinned_extents(root, start, end + 1 - start, 0);
1380                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1381                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1382         }
1383         mutex_unlock(&root->fs_info->alloc_mutex);
1384         return 0;
1385 }
1386
1387 static int finish_current_insert(struct btrfs_trans_handle *trans,
1388                                  struct btrfs_root *extent_root)
1389 {
1390         u64 start;
1391         u64 end;
1392         struct btrfs_fs_info *info = extent_root->fs_info;
1393         struct extent_buffer *eb;
1394         struct btrfs_path *path;
1395         struct btrfs_key ins;
1396         struct btrfs_disk_key first;
1397         struct btrfs_extent_item extent_item;
1398         int ret;
1399         int level;
1400         int err = 0;
1401
1402         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1403         btrfs_set_stack_extent_refs(&extent_item, 1);
1404         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1405         path = btrfs_alloc_path();
1406
1407         while(1) {
1408                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1409                                             &end, EXTENT_LOCKED);
1410                 if (ret)
1411                         break;
1412
1413                 ins.objectid = start;
1414                 ins.offset = end + 1 - start;
1415                 err = btrfs_insert_item(trans, extent_root, &ins,
1416                                         &extent_item, sizeof(extent_item));
1417                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1418                                   GFP_NOFS);
1419                 eb = read_tree_block(extent_root, ins.objectid, ins.offset,
1420                                      trans->transid);
1421                 btrfs_tree_lock(eb);
1422                 level = btrfs_header_level(eb);
1423                 if (level == 0) {
1424                         btrfs_item_key(eb, &first, 0);
1425                 } else {
1426                         btrfs_node_key(eb, &first, 0);
1427                 }
1428                 btrfs_tree_unlock(eb);
1429                 free_extent_buffer(eb);
1430                 /*
1431                  * the first key is just a hint, so the race we've created
1432                  * against reading it is fine
1433                  */
1434                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1435                                           start, extent_root->root_key.objectid,
1436                                           0, level,
1437                                           btrfs_disk_key_objectid(&first));
1438                 BUG_ON(err);
1439         }
1440         btrfs_free_path(path);
1441         return 0;
1442 }
1443
1444 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1445                           int pending)
1446 {
1447         int err = 0;
1448
1449         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1450         if (!pending) {
1451                 struct extent_buffer *buf;
1452                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1453                 if (buf) {
1454                         if (!btrfs_try_tree_lock(buf) &&
1455                             btrfs_buffer_uptodate(buf, 0)) {
1456                                 u64 transid =
1457                                     root->fs_info->running_transaction->transid;
1458                                 u64 header_transid =
1459                                         btrfs_header_generation(buf);
1460                                 if (header_transid == transid &&
1461                                     !btrfs_header_flag(buf,
1462                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1463                                         clean_tree_block(NULL, root, buf);
1464                                         btrfs_tree_unlock(buf);
1465                                         free_extent_buffer(buf);
1466                                         return 1;
1467                                 }
1468                                 btrfs_tree_unlock(buf);
1469                         }
1470                         free_extent_buffer(buf);
1471                 }
1472                 update_pinned_extents(root, bytenr, num_bytes, 1);
1473         } else {
1474                 set_extent_bits(&root->fs_info->pending_del,
1475                                 bytenr, bytenr + num_bytes - 1,
1476                                 EXTENT_LOCKED, GFP_NOFS);
1477         }
1478         BUG_ON(err < 0);
1479         return 0;
1480 }
1481
1482 /*
1483  * remove an extent from the root, returns 0 on success
1484  */
1485 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1486                          *root, u64 bytenr, u64 num_bytes,
1487                          u64 root_objectid, u64 ref_generation,
1488                          u64 owner_objectid, u64 owner_offset, int pin,
1489                          int mark_free)
1490 {
1491         struct btrfs_path *path;
1492         struct btrfs_key key;
1493         struct btrfs_fs_info *info = root->fs_info;
1494         struct btrfs_root *extent_root = info->extent_root;
1495         struct extent_buffer *leaf;
1496         int ret;
1497         int extent_slot = 0;
1498         int found_extent = 0;
1499         int num_to_del = 1;
1500         struct btrfs_extent_item *ei;
1501         u32 refs;
1502
1503         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
1504         key.objectid = bytenr;
1505         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1506         key.offset = num_bytes;
1507         path = btrfs_alloc_path();
1508         if (!path)
1509                 return -ENOMEM;
1510
1511         path->reada = 1;
1512         ret = lookup_extent_backref(trans, extent_root, path,
1513                                     bytenr, root_objectid,
1514                                     ref_generation,
1515                                     owner_objectid, owner_offset, 1);
1516         if (ret == 0) {
1517                 struct btrfs_key found_key;
1518                 extent_slot = path->slots[0];
1519                 while(extent_slot > 0) {
1520                         extent_slot--;
1521                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1522                                               extent_slot);
1523                         if (found_key.objectid != bytenr)
1524                                 break;
1525                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1526                             found_key.offset == num_bytes) {
1527                                 found_extent = 1;
1528                                 break;
1529                         }
1530                         if (path->slots[0] - extent_slot > 5)
1531                                 break;
1532                 }
1533                 if (!found_extent)
1534                         ret = btrfs_del_item(trans, extent_root, path);
1535         } else {
1536                 btrfs_print_leaf(extent_root, path->nodes[0]);
1537                 WARN_ON(1);
1538                 printk("Unable to find ref byte nr %Lu root %Lu "
1539                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1540                        root_objectid, ref_generation, owner_objectid,
1541                        owner_offset);
1542         }
1543         if (!found_extent) {
1544                 btrfs_release_path(extent_root, path);
1545                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1546                 if (ret < 0)
1547                         return ret;
1548                 BUG_ON(ret);
1549                 extent_slot = path->slots[0];
1550         }
1551
1552         leaf = path->nodes[0];
1553         ei = btrfs_item_ptr(leaf, extent_slot,
1554                             struct btrfs_extent_item);
1555         refs = btrfs_extent_refs(leaf, ei);
1556         BUG_ON(refs == 0);
1557         refs -= 1;
1558         btrfs_set_extent_refs(leaf, ei, refs);
1559
1560         btrfs_mark_buffer_dirty(leaf);
1561
1562         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1563                 /* if the back ref and the extent are next to each other
1564                  * they get deleted below in one shot
1565                  */
1566                 path->slots[0] = extent_slot;
1567                 num_to_del = 2;
1568         } else if (found_extent) {
1569                 /* otherwise delete the extent back ref */
1570                 ret = btrfs_del_item(trans, extent_root, path);
1571                 BUG_ON(ret);
1572                 /* if refs are 0, we need to setup the path for deletion */
1573                 if (refs == 0) {
1574                         btrfs_release_path(extent_root, path);
1575                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1576                                                 -1, 1);
1577                         if (ret < 0)
1578                                 return ret;
1579                         BUG_ON(ret);
1580                 }
1581         }
1582
1583         if (refs == 0) {
1584                 u64 super_used;
1585                 u64 root_used;
1586
1587                 if (pin) {
1588                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1589                         if (ret > 0)
1590                                 mark_free = 1;
1591                         BUG_ON(ret < 0);
1592                 }
1593
1594                 /* block accounting for super block */
1595                 spin_lock_irq(&info->delalloc_lock);
1596                 super_used = btrfs_super_bytes_used(&info->super_copy);
1597                 btrfs_set_super_bytes_used(&info->super_copy,
1598                                            super_used - num_bytes);
1599                 spin_unlock_irq(&info->delalloc_lock);
1600
1601                 /* block accounting for root item */
1602                 root_used = btrfs_root_used(&root->root_item);
1603                 btrfs_set_root_used(&root->root_item,
1604                                            root_used - num_bytes);
1605                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1606                                       num_to_del);
1607                 if (ret) {
1608                         return ret;
1609                 }
1610                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1611                                          mark_free);
1612                 BUG_ON(ret);
1613         }
1614         btrfs_free_path(path);
1615         finish_current_insert(trans, extent_root);
1616         return ret;
1617 }
1618
1619 /*
1620  * find all the blocks marked as pending in the radix tree and remove
1621  * them from the extent map
1622  */
1623 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1624                                btrfs_root *extent_root)
1625 {
1626         int ret;
1627         int err = 0;
1628         u64 start;
1629         u64 end;
1630         struct extent_io_tree *pending_del;
1631         struct extent_io_tree *pinned_extents;
1632
1633         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
1634         pending_del = &extent_root->fs_info->pending_del;
1635         pinned_extents = &extent_root->fs_info->pinned_extents;
1636
1637         while(1) {
1638                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1639                                             EXTENT_LOCKED);
1640                 if (ret)
1641                         break;
1642                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1643                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1644                                   GFP_NOFS);
1645                 ret = __free_extent(trans, extent_root,
1646                                      start, end + 1 - start,
1647                                      extent_root->root_key.objectid,
1648                                      0, 0, 0, 0, 0);
1649                 if (ret)
1650                         err = ret;
1651         }
1652         return err;
1653 }
1654
1655 /*
1656  * remove an extent from the root, returns 0 on success
1657  */
1658 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1659                                struct btrfs_root *root, u64 bytenr,
1660                                u64 num_bytes, u64 root_objectid,
1661                                u64 ref_generation, u64 owner_objectid,
1662                                u64 owner_offset, int pin)
1663 {
1664         struct btrfs_root *extent_root = root->fs_info->extent_root;
1665         int pending_ret;
1666         int ret;
1667
1668         WARN_ON(num_bytes < root->sectorsize);
1669         if (!root->ref_cows)
1670                 ref_generation = 0;
1671
1672         if (root == extent_root) {
1673                 pin_down_bytes(root, bytenr, num_bytes, 1);
1674                 return 0;
1675         }
1676         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1677                             ref_generation, owner_objectid, owner_offset,
1678                             pin, pin == 0);
1679
1680         finish_current_insert(trans, root->fs_info->extent_root);
1681         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1682         return ret ? ret : pending_ret;
1683 }
1684
1685 int btrfs_free_extent(struct btrfs_trans_handle *trans,
1686                       struct btrfs_root *root, u64 bytenr,
1687                       u64 num_bytes, u64 root_objectid,
1688                       u64 ref_generation, u64 owner_objectid,
1689                       u64 owner_offset, int pin)
1690 {
1691         int ret;
1692
1693         maybe_lock_mutex(root);
1694         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1695                                   root_objectid, ref_generation,
1696                                   owner_objectid, owner_offset, pin);
1697         maybe_unlock_mutex(root);
1698         return ret;
1699 }
1700
1701 static u64 stripe_align(struct btrfs_root *root, u64 val)
1702 {
1703         u64 mask = ((u64)root->stripesize - 1);
1704         u64 ret = (val + mask) & ~mask;
1705         return ret;
1706 }
1707
1708 /*
1709  * walks the btree of allocated extents and find a hole of a given size.
1710  * The key ins is changed to record the hole:
1711  * ins->objectid == block start
1712  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1713  * ins->offset == number of blocks
1714  * Any available blocks before search_start are skipped.
1715  */
1716 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1717                                      struct btrfs_root *orig_root,
1718                                      u64 num_bytes, u64 empty_size,
1719                                      u64 search_start, u64 search_end,
1720                                      u64 hint_byte, struct btrfs_key *ins,
1721                                      u64 exclude_start, u64 exclude_nr,
1722                                      int data)
1723 {
1724         int ret;
1725         u64 orig_search_start;
1726         struct btrfs_root * root = orig_root->fs_info->extent_root;
1727         struct btrfs_fs_info *info = root->fs_info;
1728         u64 total_needed = num_bytes;
1729         u64 *last_ptr = NULL;
1730         struct btrfs_block_group_cache *block_group;
1731         int full_scan = 0;
1732         int wrapped = 0;
1733         int chunk_alloc_done = 0;
1734         int empty_cluster = 2 * 1024 * 1024;
1735         int allowed_chunk_alloc = 0;
1736
1737         WARN_ON(num_bytes < root->sectorsize);
1738         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1739
1740         if (orig_root->ref_cows || empty_size)
1741                 allowed_chunk_alloc = 1;
1742
1743         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1744                 last_ptr = &root->fs_info->last_alloc;
1745                 empty_cluster = 256 * 1024;
1746         }
1747
1748         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1749                 last_ptr = &root->fs_info->last_data_alloc;
1750         }
1751
1752         if (last_ptr) {
1753                 if (*last_ptr)
1754                         hint_byte = *last_ptr;
1755                 else {
1756                         empty_size += empty_cluster;
1757                 }
1758         }
1759
1760         search_start = max(search_start, first_logical_byte(root, 0));
1761         orig_search_start = search_start;
1762
1763         if (search_end == (u64)-1)
1764                 search_end = btrfs_super_total_bytes(&info->super_copy);
1765
1766         if (hint_byte) {
1767                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1768                 if (!block_group)
1769                         hint_byte = search_start;
1770                 block_group = __btrfs_find_block_group(root, block_group,
1771                                                      hint_byte, data, 1);
1772                 if (last_ptr && *last_ptr == 0 && block_group)
1773                         hint_byte = block_group->key.objectid;
1774         } else {
1775                 block_group = __btrfs_find_block_group(root,
1776                                                      trans->block_group,
1777                                                      search_start, data, 1);
1778         }
1779         search_start = max(search_start, hint_byte);
1780
1781         total_needed += empty_size;
1782
1783 check_failed:
1784         if (!block_group) {
1785                 block_group = btrfs_lookup_first_block_group(info,
1786                                                              search_start);
1787                 if (!block_group)
1788                         block_group = btrfs_lookup_first_block_group(info,
1789                                                        orig_search_start);
1790         }
1791         if (full_scan && !chunk_alloc_done) {
1792                 if (allowed_chunk_alloc) {
1793                         do_chunk_alloc(trans, root,
1794                                      num_bytes + 2 * 1024 * 1024, data, 1);
1795                         allowed_chunk_alloc = 0;
1796                 } else if (block_group && block_group_bits(block_group, data)) {
1797                         block_group->space_info->force_alloc = 1;
1798                 }
1799                 chunk_alloc_done = 1;
1800         }
1801         ret = find_search_start(root, &block_group, &search_start,
1802                                 total_needed, data);
1803         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1804                 *last_ptr = 0;
1805                 block_group = btrfs_lookup_first_block_group(info,
1806                                                              orig_search_start);
1807                 search_start = orig_search_start;
1808                 ret = find_search_start(root, &block_group, &search_start,
1809                                         total_needed, data);
1810         }
1811         if (ret == -ENOSPC)
1812                 goto enospc;
1813         if (ret)
1814                 goto error;
1815
1816         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1817                 *last_ptr = 0;
1818                 if (!empty_size) {
1819                         empty_size += empty_cluster;
1820                         total_needed += empty_size;
1821                 }
1822                 block_group = btrfs_lookup_first_block_group(info,
1823                                                        orig_search_start);
1824                 search_start = orig_search_start;
1825                 ret = find_search_start(root, &block_group,
1826                                         &search_start, total_needed, data);
1827                 if (ret == -ENOSPC)
1828                         goto enospc;
1829                 if (ret)
1830                         goto error;
1831         }
1832
1833         search_start = stripe_align(root, search_start);
1834         ins->objectid = search_start;
1835         ins->offset = num_bytes;
1836
1837         if (ins->objectid + num_bytes >= search_end)
1838                 goto enospc;
1839
1840         if (ins->objectid + num_bytes >
1841             block_group->key.objectid + block_group->key.offset) {
1842                 search_start = block_group->key.objectid +
1843                         block_group->key.offset;
1844                 goto new_group;
1845         }
1846
1847         if (test_range_bit(&info->extent_ins, ins->objectid,
1848                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1849                 search_start = ins->objectid + num_bytes;
1850                 goto new_group;
1851         }
1852
1853         if (test_range_bit(&info->pinned_extents, ins->objectid,
1854                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1855                 search_start = ins->objectid + num_bytes;
1856                 goto new_group;
1857         }
1858
1859         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1860             ins->objectid < exclude_start + exclude_nr)) {
1861                 search_start = exclude_start + exclude_nr;
1862                 goto new_group;
1863         }
1864
1865         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1866                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1867                 if (block_group)
1868                         trans->block_group = block_group;
1869         }
1870         ins->offset = num_bytes;
1871         if (last_ptr) {
1872                 *last_ptr = ins->objectid + ins->offset;
1873                 if (*last_ptr ==
1874                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1875                         *last_ptr = 0;
1876                 }
1877         }
1878         return 0;
1879
1880 new_group:
1881         if (search_start + num_bytes >= search_end) {
1882 enospc:
1883                 search_start = orig_search_start;
1884                 if (full_scan) {
1885                         ret = -ENOSPC;
1886                         goto error;
1887                 }
1888                 if (wrapped) {
1889                         if (!full_scan)
1890                                 total_needed -= empty_size;
1891                         full_scan = 1;
1892                 } else
1893                         wrapped = 1;
1894         }
1895         block_group = btrfs_lookup_first_block_group(info, search_start);
1896         cond_resched();
1897         block_group = __btrfs_find_block_group(root, block_group,
1898                                              search_start, data, 0);
1899         goto check_failed;
1900
1901 error:
1902         return ret;
1903 }
1904
1905 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1906                                   struct btrfs_root *root,
1907                                   u64 num_bytes, u64 min_alloc_size,
1908                                   u64 empty_size, u64 hint_byte,
1909                                   u64 search_end, struct btrfs_key *ins,
1910                                   u64 data)
1911 {
1912         int ret;
1913         u64 search_start = 0;
1914         u64 alloc_profile;
1915         struct btrfs_fs_info *info = root->fs_info;
1916
1917         if (data) {
1918                 alloc_profile = info->avail_data_alloc_bits &
1919                                 info->data_alloc_profile;
1920                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1921         } else if (root == root->fs_info->chunk_root) {
1922                 alloc_profile = info->avail_system_alloc_bits &
1923                                 info->system_alloc_profile;
1924                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1925         } else {
1926                 alloc_profile = info->avail_metadata_alloc_bits &
1927                                 info->metadata_alloc_profile;
1928                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1929         }
1930 again:
1931         data = reduce_alloc_profile(root, data);
1932         /*
1933          * the only place that sets empty_size is btrfs_realloc_node, which
1934          * is not called recursively on allocations
1935          */
1936         if (empty_size || root->ref_cows) {
1937                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1938                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1939                                      2 * 1024 * 1024,
1940                                      BTRFS_BLOCK_GROUP_METADATA |
1941                                      (info->metadata_alloc_profile &
1942                                       info->avail_metadata_alloc_bits), 0);
1943                         BUG_ON(ret);
1944                 }
1945                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1946                                      num_bytes + 2 * 1024 * 1024, data, 0);
1947                 BUG_ON(ret);
1948         }
1949
1950         WARN_ON(num_bytes < root->sectorsize);
1951         ret = find_free_extent(trans, root, num_bytes, empty_size,
1952                                search_start, search_end, hint_byte, ins,
1953                                trans->alloc_exclude_start,
1954                                trans->alloc_exclude_nr, data);
1955
1956         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1957                 num_bytes = num_bytes >> 1;
1958                 num_bytes = max(num_bytes, min_alloc_size);
1959                 do_chunk_alloc(trans, root->fs_info->extent_root,
1960                                num_bytes, data, 1);
1961                 goto again;
1962         }
1963         if (ret) {
1964                 printk("allocation failed flags %Lu\n", data);
1965                 BUG();
1966         }
1967         clear_extent_dirty(&root->fs_info->free_space_cache,
1968                            ins->objectid, ins->objectid + ins->offset - 1,
1969                            GFP_NOFS);
1970         return 0;
1971 }
1972
1973 int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
1974                                   struct btrfs_root *root,
1975                                   u64 num_bytes, u64 min_alloc_size,
1976                                   u64 empty_size, u64 hint_byte,
1977                                   u64 search_end, struct btrfs_key *ins,
1978                                   u64 data)
1979 {
1980         int ret;
1981         maybe_lock_mutex(root);
1982         ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
1983                                      empty_size, hint_byte, search_end, ins,
1984                                      data);
1985         maybe_unlock_mutex(root);
1986         return ret;
1987 }
1988
1989 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
1990                                          struct btrfs_root *root,
1991                                          u64 root_objectid, u64 ref_generation,
1992                                          u64 owner, u64 owner_offset,
1993                                          struct btrfs_key *ins)
1994 {
1995         int ret;
1996         int pending_ret;
1997         u64 super_used;
1998         u64 root_used;
1999         u64 num_bytes = ins->offset;
2000         u32 sizes[2];
2001         struct btrfs_fs_info *info = root->fs_info;
2002         struct btrfs_root *extent_root = info->extent_root;
2003         struct btrfs_extent_item *extent_item;
2004         struct btrfs_extent_ref *ref;
2005         struct btrfs_path *path;
2006         struct btrfs_key keys[2];
2007
2008         /* block accounting for super block */
2009         spin_lock_irq(&info->delalloc_lock);
2010         super_used = btrfs_super_bytes_used(&info->super_copy);
2011         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
2012         spin_unlock_irq(&info->delalloc_lock);
2013
2014         /* block accounting for root item */
2015         root_used = btrfs_root_used(&root->root_item);
2016         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
2017
2018         if (root == extent_root) {
2019                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
2020                                 ins->objectid + ins->offset - 1,
2021                                 EXTENT_LOCKED, GFP_NOFS);
2022                 goto update_block;
2023         }
2024
2025         memcpy(&keys[0], ins, sizeof(*ins));
2026         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
2027                                          owner, owner_offset);
2028         keys[1].objectid = ins->objectid;
2029         keys[1].type = BTRFS_EXTENT_REF_KEY;
2030         sizes[0] = sizeof(*extent_item);
2031         sizes[1] = sizeof(*ref);
2032
2033         path = btrfs_alloc_path();
2034         BUG_ON(!path);
2035
2036         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2037                                        sizes, 2);
2038
2039         BUG_ON(ret);
2040         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2041                                      struct btrfs_extent_item);
2042         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2043         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2044                              struct btrfs_extent_ref);
2045
2046         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2047         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2048         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2049         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2050
2051         btrfs_mark_buffer_dirty(path->nodes[0]);
2052
2053         trans->alloc_exclude_start = 0;
2054         trans->alloc_exclude_nr = 0;
2055         btrfs_free_path(path);
2056         finish_current_insert(trans, extent_root);
2057         pending_ret = del_pending_extents(trans, extent_root);
2058
2059         if (ret)
2060                 goto out;
2061         if (pending_ret) {
2062                 ret = pending_ret;
2063                 goto out;
2064         }
2065
2066 update_block:
2067         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2068         if (ret) {
2069                 printk("update block group failed for %Lu %Lu\n",
2070                        ins->objectid, ins->offset);
2071                 BUG();
2072         }
2073 out:
2074         return ret;
2075 }
2076
2077 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans,
2078                                 struct btrfs_root *root,
2079                                 u64 root_objectid, u64 ref_generation,
2080                                 u64 owner, u64 owner_offset,
2081                                 struct btrfs_key *ins)
2082 {
2083         int ret;
2084         maybe_lock_mutex(root);
2085         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2086                                             ref_generation, owner,
2087                                             owner_offset, ins);
2088         maybe_unlock_mutex(root);
2089         return ret;
2090 }
2091 /*
2092  * finds a free extent and does all the dirty work required for allocation
2093  * returns the key for the extent through ins, and a tree buffer for
2094  * the first block of the extent through buf.
2095  *
2096  * returns 0 if everything worked, non-zero otherwise.
2097  */
2098 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
2099                        struct btrfs_root *root,
2100                        u64 num_bytes, u64 min_alloc_size,
2101                        u64 root_objectid, u64 ref_generation,
2102                        u64 owner, u64 owner_offset,
2103                        u64 empty_size, u64 hint_byte,
2104                        u64 search_end, struct btrfs_key *ins, u64 data)
2105 {
2106         int ret;
2107
2108         maybe_lock_mutex(root);
2109
2110         ret = __btrfs_reserve_extent(trans, root, num_bytes,
2111                                      min_alloc_size, empty_size, hint_byte,
2112                                      search_end, ins, data);
2113         BUG_ON(ret);
2114         ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid,
2115                                             ref_generation, owner,
2116                                             owner_offset, ins);
2117         BUG_ON(ret);
2118
2119         maybe_unlock_mutex(root);
2120         return ret;
2121 }
2122 /*
2123  * helper function to allocate a block for a given tree
2124  * returns the tree buffer or NULL.
2125  */
2126 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2127                                              struct btrfs_root *root,
2128                                              u32 blocksize,
2129                                              u64 root_objectid,
2130                                              u64 ref_generation,
2131                                              u64 first_objectid,
2132                                              int level,
2133                                              u64 hint,
2134                                              u64 empty_size)
2135 {
2136         struct btrfs_key ins;
2137         int ret;
2138         struct extent_buffer *buf;
2139
2140         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
2141                                  root_objectid, ref_generation,
2142                                  level, first_objectid, empty_size, hint,
2143                                  (u64)-1, &ins, 0);
2144         if (ret) {
2145                 BUG_ON(ret > 0);
2146                 return ERR_PTR(ret);
2147         }
2148         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2149         if (!buf) {
2150                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2151                                   root->root_key.objectid, ref_generation,
2152                                   0, 0, 0);
2153                 return ERR_PTR(-ENOMEM);
2154         }
2155         btrfs_set_header_generation(buf, trans->transid);
2156         btrfs_tree_lock(buf);
2157         clean_tree_block(trans, root, buf);
2158         btrfs_set_buffer_uptodate(buf);
2159
2160         if (PageDirty(buf->first_page)) {
2161                 printk("page %lu dirty\n", buf->first_page->index);
2162                 WARN_ON(1);
2163         }
2164
2165         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2166                          buf->start + buf->len - 1, GFP_NOFS);
2167         trans->blocks_used++;
2168         return buf;
2169 }
2170
2171 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2172                                   struct btrfs_root *root,
2173                                   struct extent_buffer *leaf)
2174 {
2175         u64 leaf_owner;
2176         u64 leaf_generation;
2177         struct btrfs_key key;
2178         struct btrfs_file_extent_item *fi;
2179         int i;
2180         int nritems;
2181         int ret;
2182
2183         BUG_ON(!btrfs_is_leaf(leaf));
2184         nritems = btrfs_header_nritems(leaf);
2185         leaf_owner = btrfs_header_owner(leaf);
2186         leaf_generation = btrfs_header_generation(leaf);
2187
2188         mutex_unlock(&root->fs_info->alloc_mutex);
2189
2190         for (i = 0; i < nritems; i++) {
2191                 u64 disk_bytenr;
2192
2193                 btrfs_item_key_to_cpu(leaf, &key, i);
2194                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2195                         continue;
2196                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2197                 if (btrfs_file_extent_type(leaf, fi) ==
2198                     BTRFS_FILE_EXTENT_INLINE)
2199                         continue;
2200                 /*
2201                  * FIXME make sure to insert a trans record that
2202                  * repeats the snapshot del on crash
2203                  */
2204                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2205                 if (disk_bytenr == 0)
2206                         continue;
2207
2208                 mutex_lock(&root->fs_info->alloc_mutex);
2209                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2210                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2211                                 leaf_owner, leaf_generation,
2212                                 key.objectid, key.offset, 0);
2213                 mutex_unlock(&root->fs_info->alloc_mutex);
2214                 BUG_ON(ret);
2215         }
2216
2217         mutex_lock(&root->fs_info->alloc_mutex);
2218         return 0;
2219 }
2220
2221 static void noinline reada_walk_down(struct btrfs_root *root,
2222                                      struct extent_buffer *node,
2223                                      int slot)
2224 {
2225         u64 bytenr;
2226         u64 last = 0;
2227         u32 nritems;
2228         u32 refs;
2229         u32 blocksize;
2230         int ret;
2231         int i;
2232         int level;
2233         int skipped = 0;
2234
2235         nritems = btrfs_header_nritems(node);
2236         level = btrfs_header_level(node);
2237         if (level)
2238                 return;
2239
2240         for (i = slot; i < nritems && skipped < 32; i++) {
2241                 bytenr = btrfs_node_blockptr(node, i);
2242                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2243                              (last > bytenr && last - bytenr > 32 * 1024))) {
2244                         skipped++;
2245                         continue;
2246                 }
2247                 blocksize = btrfs_level_size(root, level - 1);
2248                 if (i != slot) {
2249                         ret = lookup_extent_ref(NULL, root, bytenr,
2250                                                 blocksize, &refs);
2251                         BUG_ON(ret);
2252                         if (refs != 1) {
2253                                 skipped++;
2254                                 continue;
2255                         }
2256                 }
2257                 ret = readahead_tree_block(root, bytenr, blocksize,
2258                                            btrfs_node_ptr_generation(node, i));
2259                 last = bytenr + blocksize;
2260                 cond_resched();
2261                 if (ret)
2262                         break;
2263         }
2264 }
2265
2266 /*
2267  * we want to avoid as much random IO as we can with the alloc mutex
2268  * held, so drop the lock and do the lookup, then do it again with the
2269  * lock held.
2270  */
2271 int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, u64 len,
2272                               u32 *refs)
2273 {
2274         mutex_unlock(&root->fs_info->alloc_mutex);
2275         lookup_extent_ref(NULL, root, start, len, refs);
2276         cond_resched();
2277         mutex_lock(&root->fs_info->alloc_mutex);
2278         return lookup_extent_ref(NULL, root, start, len, refs);
2279 }
2280
2281 /*
2282  * helper function for drop_snapshot, this walks down the tree dropping ref
2283  * counts as it goes.
2284  */
2285 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2286                                    struct btrfs_root *root,
2287                                    struct btrfs_path *path, int *level)
2288 {
2289         u64 root_owner;
2290         u64 root_gen;
2291         u64 bytenr;
2292         u64 ptr_gen;
2293         struct extent_buffer *next;
2294         struct extent_buffer *cur;
2295         struct extent_buffer *parent;
2296         u32 blocksize;
2297         int ret;
2298         u32 refs;
2299
2300         mutex_lock(&root->fs_info->alloc_mutex);
2301
2302         WARN_ON(*level < 0);
2303         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2304         ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start,
2305                                 path->nodes[*level]->len, &refs);
2306         BUG_ON(ret);
2307         if (refs > 1)
2308                 goto out;
2309
2310         /*
2311          * walk down to the last node level and free all the leaves
2312          */
2313         while(*level >= 0) {
2314                 WARN_ON(*level < 0);
2315                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2316                 cur = path->nodes[*level];
2317
2318                 if (btrfs_header_level(cur) != *level)
2319                         WARN_ON(1);
2320
2321                 if (path->slots[*level] >=
2322                     btrfs_header_nritems(cur))
2323                         break;
2324                 if (*level == 0) {
2325                         ret = drop_leaf_ref(trans, root, cur);
2326                         BUG_ON(ret);
2327                         break;
2328                 }
2329                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2330                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2331                 blocksize = btrfs_level_size(root, *level - 1);
2332
2333                 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs);
2334                 BUG_ON(ret);
2335                 if (refs != 1) {
2336                         parent = path->nodes[*level];
2337                         root_owner = btrfs_header_owner(parent);
2338                         root_gen = btrfs_header_generation(parent);
2339                         path->slots[*level]++;
2340                         ret = __btrfs_free_extent(trans, root, bytenr,
2341                                                 blocksize, root_owner,
2342                                                 root_gen, 0, 0, 1);
2343                         BUG_ON(ret);
2344                         continue;
2345                 }
2346                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2347                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2348                         free_extent_buffer(next);
2349                         mutex_unlock(&root->fs_info->alloc_mutex);
2350
2351                         if (path->slots[*level] == 0)
2352                                 reada_walk_down(root, cur, path->slots[*level]);
2353
2354                         next = read_tree_block(root, bytenr, blocksize,
2355                                                ptr_gen);
2356                         cond_resched();
2357                         mutex_lock(&root->fs_info->alloc_mutex);
2358
2359                         /* we've dropped the lock, double check */
2360                         ret = lookup_extent_ref(NULL, root, bytenr, blocksize,
2361                                                 &refs);
2362                         BUG_ON(ret);
2363                         if (refs != 1) {
2364                                 parent = path->nodes[*level];
2365                                 root_owner = btrfs_header_owner(parent);
2366                                 root_gen = btrfs_header_generation(parent);
2367
2368                                 path->slots[*level]++;
2369                                 free_extent_buffer(next);
2370                                 ret = __btrfs_free_extent(trans, root, bytenr,
2371                                                         blocksize,
2372                                                         root_owner,
2373                                                         root_gen, 0, 0, 1);
2374                                 BUG_ON(ret);
2375                                 continue;
2376                         }
2377                 }
2378                 WARN_ON(*level <= 0);
2379                 if (path->nodes[*level-1])
2380                         free_extent_buffer(path->nodes[*level-1]);
2381                 path->nodes[*level-1] = next;
2382                 *level = btrfs_header_level(next);
2383                 path->slots[*level] = 0;
2384         }
2385 out:
2386         WARN_ON(*level < 0);
2387         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2388
2389         if (path->nodes[*level] == root->node) {
2390                 root_owner = root->root_key.objectid;
2391                 parent = path->nodes[*level];
2392         } else {
2393                 parent = path->nodes[*level + 1];
2394                 root_owner = btrfs_header_owner(parent);
2395         }
2396
2397         root_gen = btrfs_header_generation(parent);
2398         ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start,
2399                                 path->nodes[*level]->len,
2400                                 root_owner, root_gen, 0, 0, 1);
2401         free_extent_buffer(path->nodes[*level]);
2402         path->nodes[*level] = NULL;
2403         *level += 1;
2404         BUG_ON(ret);
2405         mutex_unlock(&root->fs_info->alloc_mutex);
2406         cond_resched();
2407         return 0;
2408 }
2409
2410 /*
2411  * helper for dropping snapshots.  This walks back up the tree in the path
2412  * to find the first node higher up where we haven't yet gone through
2413  * all the slots
2414  */
2415 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2416                                  struct btrfs_root *root,
2417                                  struct btrfs_path *path, int *level)
2418 {
2419         u64 root_owner;
2420         u64 root_gen;
2421         struct btrfs_root_item *root_item = &root->root_item;
2422         int i;
2423         int slot;
2424         int ret;
2425
2426         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2427                 slot = path->slots[i];
2428                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2429                         struct extent_buffer *node;
2430                         struct btrfs_disk_key disk_key;
2431                         node = path->nodes[i];
2432                         path->slots[i]++;
2433                         *level = i;
2434                         WARN_ON(*level == 0);
2435                         btrfs_node_key(node, &disk_key, path->slots[i]);
2436                         memcpy(&root_item->drop_progress,
2437                                &disk_key, sizeof(disk_key));
2438                         root_item->drop_level = i;
2439                         return 0;
2440                 } else {
2441                         if (path->nodes[*level] == root->node) {
2442                                 root_owner = root->root_key.objectid;
2443                                 root_gen =
2444                                    btrfs_header_generation(path->nodes[*level]);
2445                         } else {
2446                                 struct extent_buffer *node;
2447                                 node = path->nodes[*level + 1];
2448                                 root_owner = btrfs_header_owner(node);
2449                                 root_gen = btrfs_header_generation(node);
2450                         }
2451                         ret = btrfs_free_extent(trans, root,
2452                                                 path->nodes[*level]->start,
2453                                                 path->nodes[*level]->len,
2454                                                 root_owner, root_gen, 0, 0, 1);
2455                         BUG_ON(ret);
2456                         free_extent_buffer(path->nodes[*level]);
2457                         path->nodes[*level] = NULL;
2458                         *level = i + 1;
2459                 }
2460         }
2461         return 1;
2462 }
2463
2464 /*
2465  * drop the reference count on the tree rooted at 'snap'.  This traverses
2466  * the tree freeing any blocks that have a ref count of zero after being
2467  * decremented.
2468  */
2469 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2470                         *root)
2471 {
2472         int ret = 0;
2473         int wret;
2474         int level;
2475         struct btrfs_path *path;
2476         int i;
2477         int orig_level;
2478         struct btrfs_root_item *root_item = &root->root_item;
2479
2480         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
2481         path = btrfs_alloc_path();
2482         BUG_ON(!path);
2483
2484         level = btrfs_header_level(root->node);
2485         orig_level = level;
2486         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2487                 path->nodes[level] = root->node;
2488                 extent_buffer_get(root->node);
2489                 path->slots[level] = 0;
2490         } else {
2491                 struct btrfs_key key;
2492                 struct btrfs_disk_key found_key;
2493                 struct extent_buffer *node;
2494
2495                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2496                 level = root_item->drop_level;
2497                 path->lowest_level = level;
2498                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2499                 if (wret < 0) {
2500                         ret = wret;
2501                         goto out;
2502                 }
2503                 node = path->nodes[level];
2504                 btrfs_node_key(node, &found_key, path->slots[level]);
2505                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2506                                sizeof(found_key)));
2507                 /*
2508                  * unlock our path, this is safe because only this
2509                  * function is allowed to delete this snapshot
2510                  */
2511                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2512                         if (path->nodes[i] && path->locks[i]) {
2513                                 path->locks[i] = 0;
2514                                 btrfs_tree_unlock(path->nodes[i]);
2515                         }
2516                 }
2517         }
2518         while(1) {
2519                 wret = walk_down_tree(trans, root, path, &level);
2520                 if (wret > 0)
2521                         break;
2522                 if (wret < 0)
2523                         ret = wret;
2524
2525                 wret = walk_up_tree(trans, root, path, &level);
2526                 if (wret > 0)
2527                         break;
2528                 if (wret < 0)
2529                         ret = wret;
2530                 if (trans->transaction->in_commit) {
2531                         ret = -EAGAIN;
2532                         break;
2533                 }
2534         }
2535         for (i = 0; i <= orig_level; i++) {
2536                 if (path->nodes[i]) {
2537                         free_extent_buffer(path->nodes[i]);
2538                         path->nodes[i] = NULL;
2539                 }
2540         }
2541 out:
2542         btrfs_free_path(path);
2543         return ret;
2544 }
2545
2546 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2547 {
2548         u64 start;
2549         u64 end;
2550         u64 ptr;
2551         int ret;
2552
2553         mutex_lock(&info->alloc_mutex);
2554         while(1) {
2555                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2556                                             &start, &end, (unsigned int)-1);
2557                 if (ret)
2558                         break;
2559                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2560                 if (!ret)
2561                         kfree((void *)(unsigned long)ptr);
2562                 clear_extent_bits(&info->block_group_cache, start,
2563                                   end, (unsigned int)-1, GFP_NOFS);
2564         }
2565         while(1) {
2566                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2567                                             &start, &end, EXTENT_DIRTY);
2568                 if (ret)
2569                         break;
2570                 clear_extent_dirty(&info->free_space_cache, start,
2571                                    end, GFP_NOFS);
2572         }
2573         mutex_unlock(&info->alloc_mutex);
2574         return 0;
2575 }
2576
2577 static unsigned long calc_ra(unsigned long start, unsigned long last,
2578                              unsigned long nr)
2579 {
2580         return min(last, start + nr - 1);
2581 }
2582
2583 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2584                                          u64 len)
2585 {
2586         u64 page_start;
2587         u64 page_end;
2588         unsigned long last_index;
2589         unsigned long i;
2590         struct page *page;
2591         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2592         struct file_ra_state *ra;
2593         unsigned long total_read = 0;
2594         unsigned long ra_pages;
2595         struct btrfs_trans_handle *trans;
2596
2597         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2598
2599         mutex_lock(&inode->i_mutex);
2600         i = start >> PAGE_CACHE_SHIFT;
2601         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2602
2603         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2604
2605         file_ra_state_init(ra, inode->i_mapping);
2606
2607         for (; i <= last_index; i++) {
2608                 if (total_read % ra_pages == 0) {
2609                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2610                                        calc_ra(i, last_index, ra_pages));
2611                 }
2612                 total_read++;
2613                 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
2614                         goto truncate_racing;
2615
2616                 page = grab_cache_page(inode->i_mapping, i);
2617                 if (!page) {
2618                         goto out_unlock;
2619                 }
2620                 if (!PageUptodate(page)) {
2621                         btrfs_readpage(NULL, page);
2622                         lock_page(page);
2623                         if (!PageUptodate(page)) {
2624                                 unlock_page(page);
2625                                 page_cache_release(page);
2626                                 goto out_unlock;
2627                         }
2628                 }
2629 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2630                 ClearPageDirty(page);
2631 #else
2632                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2633 #endif
2634                 wait_on_page_writeback(page);
2635                 set_page_extent_mapped(page);
2636                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2637                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2638
2639                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2640
2641                 set_extent_delalloc(io_tree, page_start,
2642                                     page_end, GFP_NOFS);
2643                 set_page_dirty(page);
2644
2645                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2646                 unlock_page(page);
2647                 page_cache_release(page);
2648         }
2649         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2650                                            total_read);
2651
2652 out_unlock:
2653         kfree(ra);
2654         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2655         if (trans) {
2656                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2657                 mark_inode_dirty(inode);
2658         }
2659         mutex_unlock(&inode->i_mutex);
2660         return 0;
2661
2662 truncate_racing:
2663         vmtruncate(inode, inode->i_size);
2664         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2665                                            total_read);
2666         goto out_unlock;
2667 }
2668
2669 /*
2670  * The back references tell us which tree holds a ref on a block,
2671  * but it is possible for the tree root field in the reference to
2672  * reflect the original root before a snapshot was made.  In this
2673  * case we should search through all the children of a given root
2674  * to find potential holders of references on a block.
2675  *
2676  * Instead, we do something a little less fancy and just search
2677  * all the roots for a given key/block combination.
2678  */
2679 static int find_root_for_ref(struct btrfs_root *root,
2680                              struct btrfs_path *path,
2681                              struct btrfs_key *key0,
2682                              int level,
2683                              int file_key,
2684                              struct btrfs_root **found_root,
2685                              u64 bytenr)
2686 {
2687         struct btrfs_key root_location;
2688         struct btrfs_root *cur_root = *found_root;
2689         struct btrfs_file_extent_item *file_extent;
2690         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2691         u64 found_bytenr;
2692         int ret;
2693
2694         root_location.offset = (u64)-1;
2695         root_location.type = BTRFS_ROOT_ITEM_KEY;
2696         path->lowest_level = level;
2697         path->reada = 0;
2698         while(1) {
2699                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2700                 found_bytenr = 0;
2701                 if (ret == 0 && file_key) {
2702                         struct extent_buffer *leaf = path->nodes[0];
2703                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
2704                                              struct btrfs_file_extent_item);
2705                         if (btrfs_file_extent_type(leaf, file_extent) ==
2706                             BTRFS_FILE_EXTENT_REG) {
2707                                 found_bytenr =
2708                                         btrfs_file_extent_disk_bytenr(leaf,
2709                                                                file_extent);
2710                        }
2711                 } else if (!file_key) {
2712                         if (path->nodes[level])
2713                                 found_bytenr = path->nodes[level]->start;
2714                 }
2715
2716                 btrfs_release_path(cur_root, path);
2717
2718                 if (found_bytenr == bytenr) {
2719                         *found_root = cur_root;
2720                         ret = 0;
2721                         goto out;
2722                 }
2723                 ret = btrfs_search_root(root->fs_info->tree_root,
2724                                         root_search_start, &root_search_start);
2725                 if (ret)
2726                         break;
2727
2728                 root_location.objectid = root_search_start;
2729                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2730                                                       &root_location);
2731                 if (!cur_root) {
2732                         ret = 1;
2733                         break;
2734                 }
2735         }
2736 out:
2737         path->lowest_level = 0;
2738         return ret;
2739 }
2740
2741 /*
2742  * note, this releases the path
2743  */
2744 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2745                                   struct btrfs_path *path,
2746                                   struct btrfs_key *extent_key,
2747                                   u64 *last_file_objectid,
2748                                   u64 *last_file_offset,
2749                                   u64 *last_file_root,
2750                                   u64 last_extent)
2751 {
2752         struct inode *inode;
2753         struct btrfs_root *found_root;
2754         struct btrfs_key root_location;
2755         struct btrfs_key found_key;
2756         struct btrfs_extent_ref *ref;
2757         u64 ref_root;
2758         u64 ref_gen;
2759         u64 ref_objectid;
2760         u64 ref_offset;
2761         int ret;
2762         int level;
2763
2764         WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex));
2765
2766         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2767                              struct btrfs_extent_ref);
2768         ref_root = btrfs_ref_root(path->nodes[0], ref);
2769         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2770         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2771         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2772         btrfs_release_path(extent_root, path);
2773
2774         root_location.objectid = ref_root;
2775         if (ref_gen == 0)
2776                 root_location.offset = 0;
2777         else
2778                 root_location.offset = (u64)-1;
2779         root_location.type = BTRFS_ROOT_ITEM_KEY;
2780
2781         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2782                                                 &root_location);
2783         BUG_ON(!found_root);
2784         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2785
2786         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2787                 found_key.objectid = ref_objectid;
2788                 found_key.type = BTRFS_EXTENT_DATA_KEY;
2789                 found_key.offset = ref_offset;
2790                 level = 0;
2791
2792                 if (last_extent == extent_key->objectid &&
2793                     *last_file_objectid == ref_objectid &&
2794                     *last_file_offset == ref_offset &&
2795                     *last_file_root == ref_root)
2796                         goto out;
2797
2798                 ret = find_root_for_ref(extent_root, path, &found_key,
2799                                         level, 1, &found_root,
2800                                         extent_key->objectid);
2801
2802                 if (ret)
2803                         goto out;
2804
2805                 if (last_extent == extent_key->objectid &&
2806                     *last_file_objectid == ref_objectid &&
2807                     *last_file_offset == ref_offset &&
2808                     *last_file_root == ref_root)
2809                         goto out;
2810
2811                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2812                                           ref_objectid, found_root);
2813                 if (inode->i_state & I_NEW) {
2814                         /* the inode and parent dir are two different roots */
2815                         BTRFS_I(inode)->root = found_root;
2816                         BTRFS_I(inode)->location.objectid = ref_objectid;
2817                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2818                         BTRFS_I(inode)->location.offset = 0;
2819                         btrfs_read_locked_inode(inode);
2820                         unlock_new_inode(inode);
2821
2822                 }
2823                 /* this can happen if the reference is not against
2824                  * the latest version of the tree root
2825                  */
2826                 if (is_bad_inode(inode))
2827                         goto out;
2828
2829                 *last_file_objectid = inode->i_ino;
2830                 *last_file_root = found_root->root_key.objectid;
2831                 *last_file_offset = ref_offset;
2832
2833                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2834                 iput(inode);
2835         } else {
2836                 struct btrfs_trans_handle *trans;
2837                 struct extent_buffer *eb;
2838                 int needs_lock = 0;
2839
2840                 eb = read_tree_block(found_root, extent_key->objectid,
2841                                      extent_key->offset, 0);
2842                 btrfs_tree_lock(eb);
2843                 level = btrfs_header_level(eb);
2844
2845                 if (level == 0)
2846                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2847                 else
2848                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2849
2850                 btrfs_tree_unlock(eb);
2851                 free_extent_buffer(eb);
2852
2853                 ret = find_root_for_ref(extent_root, path, &found_key,
2854                                         level, 0, &found_root,
2855                                         extent_key->objectid);
2856
2857                 if (ret)
2858                         goto out;
2859
2860                 /*
2861                  * right here almost anything could happen to our key,
2862                  * but that's ok.  The cow below will either relocate it
2863                  * or someone else will have relocated it.  Either way,
2864                  * it is in a different spot than it was before and
2865                  * we're happy.
2866                  */
2867
2868                 trans = btrfs_start_transaction(found_root, 1);
2869
2870                 if (found_root == extent_root->fs_info->extent_root ||
2871                     found_root == extent_root->fs_info->chunk_root ||
2872                     found_root == extent_root->fs_info->dev_root) {
2873                         needs_lock = 1;
2874                         mutex_lock(&extent_root->fs_info->alloc_mutex);
2875                 }
2876
2877                 path->lowest_level = level;
2878                 path->reada = 2;
2879                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2880                                         0, 1);
2881                 path->lowest_level = 0;
2882                 btrfs_release_path(found_root, path);
2883
2884                 if (found_root == found_root->fs_info->extent_root)
2885                         btrfs_extent_post_op(trans, found_root);
2886                 if (needs_lock)
2887                         mutex_unlock(&extent_root->fs_info->alloc_mutex);
2888
2889                 btrfs_end_transaction(trans, found_root);
2890
2891         }
2892 out:
2893         mutex_lock(&extent_root->fs_info->alloc_mutex);
2894         return 0;
2895 }
2896
2897 static int noinline del_extent_zero(struct btrfs_root *extent_root,
2898                                     struct btrfs_path *path,
2899                                     struct btrfs_key *extent_key)
2900 {
2901         int ret;
2902         struct btrfs_trans_handle *trans;
2903
2904         trans = btrfs_start_transaction(extent_root, 1);
2905         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
2906         if (ret > 0) {
2907                 ret = -EIO;
2908                 goto out;
2909         }
2910         if (ret < 0)
2911                 goto out;
2912         ret = btrfs_del_item(trans, extent_root, path);
2913 out:
2914         btrfs_end_transaction(trans, extent_root);
2915         return ret;
2916 }
2917
2918 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2919                                         struct btrfs_path *path,
2920                                         struct btrfs_key *extent_key)
2921 {
2922         struct btrfs_key key;
2923         struct btrfs_key found_key;
2924         struct extent_buffer *leaf;
2925         u64 last_file_objectid = 0;
2926         u64 last_file_root = 0;
2927         u64 last_file_offset = (u64)-1;
2928         u64 last_extent = 0;
2929         u32 nritems;
2930         u32 item_size;
2931         int ret = 0;
2932
2933         if (extent_key->objectid == 0) {
2934                 ret = del_extent_zero(extent_root, path, extent_key);
2935                 goto out;
2936         }
2937         key.objectid = extent_key->objectid;
2938         key.type = BTRFS_EXTENT_REF_KEY;
2939         key.offset = 0;
2940
2941         while(1) {
2942                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2943
2944                 if (ret < 0)
2945                         goto out;
2946
2947                 ret = 0;
2948                 leaf = path->nodes[0];
2949                 nritems = btrfs_header_nritems(leaf);
2950                 if (path->slots[0] == nritems) {
2951                         ret = btrfs_next_leaf(extent_root, path);
2952                         if (ret > 0) {
2953                                 ret = 0;
2954                                 goto out;
2955                         }
2956                         if (ret < 0)
2957                                 goto out;
2958                         leaf = path->nodes[0];
2959                 }
2960
2961                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2962                 if (found_key.objectid != extent_key->objectid) {
2963                         break;
2964                 }
2965
2966                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
2967                         break;
2968                 }
2969
2970                 key.offset = found_key.offset + 1;
2971                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2972
2973                 ret = relocate_one_reference(extent_root, path, extent_key,
2974                                              &last_file_objectid,
2975                                              &last_file_offset,
2976                                              &last_file_root, last_extent);
2977                 if (ret)
2978                         goto out;
2979                 last_extent = extent_key->objectid;
2980         }
2981         ret = 0;
2982 out:
2983         btrfs_release_path(extent_root, path);
2984         return ret;
2985 }
2986
2987 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
2988 {
2989         u64 num_devices;
2990         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
2991                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
2992
2993         num_devices = root->fs_info->fs_devices->num_devices;
2994         if (num_devices == 1) {
2995                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2996                 stripped = flags & ~stripped;
2997
2998                 /* turn raid0 into single device chunks */
2999                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
3000                         return stripped;
3001
3002                 /* turn mirroring into duplication */
3003                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
3004                              BTRFS_BLOCK_GROUP_RAID10))
3005                         return stripped | BTRFS_BLOCK_GROUP_DUP;
3006                 return flags;
3007         } else {
3008                 /* they already had raid on here, just return */
3009                 if (flags & stripped)
3010                         return flags;
3011
3012                 stripped |= BTRFS_BLOCK_GROUP_DUP;
3013                 stripped = flags & ~stripped;
3014
3015                 /* switch duplicated blocks with raid1 */
3016                 if (flags & BTRFS_BLOCK_GROUP_DUP)
3017                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
3018
3019                 /* turn single device chunks into raid0 */
3020                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
3021         }
3022         return flags;
3023 }
3024
3025 int __alloc_chunk_for_shrink(struct btrfs_root *root,
3026                      struct btrfs_block_group_cache *shrink_block_group,
3027                      int force)
3028 {
3029         struct btrfs_trans_handle *trans;
3030         u64 new_alloc_flags;
3031         u64 calc;
3032
3033         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
3034
3035                 mutex_unlock(&root->fs_info->alloc_mutex);
3036                 trans = btrfs_start_transaction(root, 1);
3037                 mutex_lock(&root->fs_info->alloc_mutex);
3038
3039                 new_alloc_flags = update_block_group_flags(root,
3040                                                    shrink_block_group->flags);
3041                 if (new_alloc_flags != shrink_block_group->flags) {
3042                         calc =
3043                              btrfs_block_group_used(&shrink_block_group->item);
3044                 } else {
3045                         calc = shrink_block_group->key.offset;
3046                 }
3047                 do_chunk_alloc(trans, root->fs_info->extent_root,
3048                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
3049
3050                 mutex_unlock(&root->fs_info->alloc_mutex);
3051                 btrfs_end_transaction(trans, root);
3052                 mutex_lock(&root->fs_info->alloc_mutex);
3053         }
3054         return 0;
3055 }
3056
3057 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
3058 {
3059         struct btrfs_trans_handle *trans;
3060         struct btrfs_root *tree_root = root->fs_info->tree_root;
3061         struct btrfs_path *path;
3062         u64 cur_byte;
3063         u64 total_found;
3064         u64 shrink_last_byte;
3065         struct btrfs_block_group_cache *shrink_block_group;
3066         struct btrfs_fs_info *info = root->fs_info;
3067         struct btrfs_key key;
3068         struct btrfs_key found_key;
3069         struct extent_buffer *leaf;
3070         u32 nritems;
3071         int ret;
3072         int progress;
3073
3074         mutex_lock(&root->fs_info->alloc_mutex);
3075         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
3076                                                       shrink_start);
3077         BUG_ON(!shrink_block_group);
3078
3079         shrink_last_byte = shrink_block_group->key.objectid +
3080                 shrink_block_group->key.offset;
3081
3082         shrink_block_group->space_info->total_bytes -=
3083                 shrink_block_group->key.offset;
3084         path = btrfs_alloc_path();
3085         root = root->fs_info->extent_root;
3086         path->reada = 2;
3087
3088         printk("btrfs relocating block group %llu flags %llu\n",
3089                (unsigned long long)shrink_start,
3090                (unsigned long long)shrink_block_group->flags);
3091
3092         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
3093
3094 again:
3095
3096         shrink_block_group->ro = 1;
3097
3098         total_found = 0;
3099         progress = 0;
3100         key.objectid = shrink_start;
3101         key.offset = 0;
3102         key.type = 0;
3103         cur_byte = key.objectid;
3104
3105         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3106         if (ret < 0)
3107                 goto out;
3108
3109         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
3110         if (ret < 0)
3111                 goto out;
3112
3113         if (ret == 0) {
3114                 leaf = path->nodes[0];
3115                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3116                 if (found_key.objectid + found_key.offset > shrink_start &&
3117                     found_key.objectid < shrink_last_byte) {
3118                         cur_byte = found_key.objectid;
3119                         key.objectid = cur_byte;
3120                 }
3121         }
3122         btrfs_release_path(root, path);
3123
3124         while(1) {
3125                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3126                 if (ret < 0)
3127                         goto out;
3128
3129 next:
3130                 leaf = path->nodes[0];
3131                 nritems = btrfs_header_nritems(leaf);
3132                 if (path->slots[0] >= nritems) {
3133                         ret = btrfs_next_leaf(root, path);
3134                         if (ret < 0)
3135                                 goto out;
3136                         if (ret == 1) {
3137                                 ret = 0;
3138                                 break;
3139                         }
3140                         leaf = path->nodes[0];
3141                         nritems = btrfs_header_nritems(leaf);
3142                 }
3143
3144                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3145
3146                 if (found_key.objectid >= shrink_last_byte)
3147                         break;
3148
3149                 if (progress && need_resched()) {
3150                         memcpy(&key, &found_key, sizeof(key));
3151                         cond_resched();
3152                         btrfs_release_path(root, path);
3153                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3154                         progress = 0;
3155                         goto next;
3156                 }
3157                 progress = 1;
3158
3159                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3160                     found_key.objectid + found_key.offset <= cur_byte) {
3161                         memcpy(&key, &found_key, sizeof(key));
3162                         key.offset++;
3163                         path->slots[0]++;
3164                         goto next;
3165                 }
3166
3167                 total_found++;
3168                 cur_byte = found_key.objectid + found_key.offset;
3169                 key.objectid = cur_byte;
3170                 btrfs_release_path(root, path);
3171                 ret = relocate_one_extent(root, path, &found_key);
3172                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3173         }
3174
3175         btrfs_release_path(root, path);
3176
3177         if (total_found > 0) {
3178                 printk("btrfs relocate found %llu last extent was %llu\n",
3179                        (unsigned long long)total_found,
3180                        (unsigned long long)found_key.objectid);
3181                 mutex_unlock(&root->fs_info->alloc_mutex);
3182                 trans = btrfs_start_transaction(tree_root, 1);
3183                 btrfs_commit_transaction(trans, tree_root);
3184
3185                 btrfs_clean_old_snapshots(tree_root);
3186
3187                 trans = btrfs_start_transaction(tree_root, 1);
3188                 btrfs_commit_transaction(trans, tree_root);
3189                 mutex_lock(&root->fs_info->alloc_mutex);
3190                 goto again;
3191         }
3192
3193         /*
3194          * we've freed all the extents, now remove the block
3195          * group item from the tree
3196          */
3197         mutex_unlock(&root->fs_info->alloc_mutex);
3198
3199         trans = btrfs_start_transaction(root, 1);
3200         mutex_lock(&root->fs_info->alloc_mutex);
3201         memcpy(&key, &shrink_block_group->key, sizeof(key));
3202
3203         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3204         if (ret > 0)
3205                 ret = -EIO;
3206         if (ret < 0)
3207                 goto out;
3208
3209         clear_extent_bits(&info->block_group_cache, key.objectid,
3210                           key.objectid + key.offset - 1,
3211                           (unsigned int)-1, GFP_NOFS);
3212
3213
3214         clear_extent_bits(&info->free_space_cache,
3215                            key.objectid, key.objectid + key.offset - 1,
3216                            (unsigned int)-1, GFP_NOFS);
3217
3218         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3219         kfree(shrink_block_group);
3220
3221         btrfs_del_item(trans, root, path);
3222         btrfs_release_path(root, path);
3223         mutex_unlock(&root->fs_info->alloc_mutex);
3224         btrfs_commit_transaction(trans, root);
3225
3226         mutex_lock(&root->fs_info->alloc_mutex);
3227
3228         /* the code to unpin extents might set a few bits in the free
3229          * space cache for this range again
3230          */
3231         clear_extent_bits(&info->free_space_cache,
3232                            key.objectid, key.objectid + key.offset - 1,
3233                            (unsigned int)-1, GFP_NOFS);
3234 out:
3235         btrfs_free_path(path);
3236         mutex_unlock(&root->fs_info->alloc_mutex);
3237         return ret;
3238 }
3239
3240 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3241                            struct btrfs_key *key)
3242 {
3243         int ret = 0;
3244         struct btrfs_key found_key;
3245         struct extent_buffer *leaf;
3246         int slot;
3247
3248         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3249         if (ret < 0)
3250                 goto out;
3251
3252         while(1) {
3253                 slot = path->slots[0];
3254                 leaf = path->nodes[0];
3255                 if (slot >= btrfs_header_nritems(leaf)) {
3256                         ret = btrfs_next_leaf(root, path);
3257                         if (ret == 0)
3258                                 continue;
3259                         if (ret < 0)
3260                                 goto out;
3261                         break;
3262                 }
3263                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3264
3265                 if (found_key.objectid >= key->objectid &&
3266                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3267                         ret = 0;
3268                         goto out;
3269                 }
3270                 path->slots[0]++;
3271         }
3272         ret = -ENOENT;
3273 out:
3274         return ret;
3275 }
3276
3277 int btrfs_read_block_groups(struct btrfs_root *root)
3278 {
3279         struct btrfs_path *path;
3280         int ret;
3281         int bit;
3282         struct btrfs_block_group_cache *cache;
3283         struct btrfs_fs_info *info = root->fs_info;
3284         struct btrfs_space_info *space_info;
3285         struct extent_io_tree *block_group_cache;
3286         struct btrfs_key key;
3287         struct btrfs_key found_key;
3288         struct extent_buffer *leaf;
3289
3290         block_group_cache = &info->block_group_cache;
3291         root = info->extent_root;
3292         key.objectid = 0;
3293         key.offset = 0;
3294         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3295         path = btrfs_alloc_path();
3296         if (!path)
3297                 return -ENOMEM;
3298
3299         mutex_lock(&root->fs_info->alloc_mutex);
3300         while(1) {
3301                 ret = find_first_block_group(root, path, &key);
3302                 if (ret > 0) {
3303                         ret = 0;
3304                         goto error;
3305                 }
3306                 if (ret != 0)
3307                         goto error;
3308
3309                 leaf = path->nodes[0];
3310                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3311                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3312                 if (!cache) {
3313                         ret = -ENOMEM;
3314                         break;
3315                 }
3316
3317                 read_extent_buffer(leaf, &cache->item,
3318                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3319                                    sizeof(cache->item));
3320                 memcpy(&cache->key, &found_key, sizeof(found_key));
3321
3322                 key.objectid = found_key.objectid + found_key.offset;
3323                 btrfs_release_path(root, path);
3324                 cache->flags = btrfs_block_group_flags(&cache->item);
3325                 bit = 0;
3326                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3327                         bit = BLOCK_GROUP_DATA;
3328                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3329                         bit = BLOCK_GROUP_SYSTEM;
3330                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3331                         bit = BLOCK_GROUP_METADATA;
3332                 }
3333                 set_avail_alloc_bits(info, cache->flags);
3334
3335                 ret = update_space_info(info, cache->flags, found_key.offset,
3336                                         btrfs_block_group_used(&cache->item),
3337                                         &space_info);
3338                 BUG_ON(ret);
3339                 cache->space_info = space_info;
3340
3341                 /* use EXTENT_LOCKED to prevent merging */
3342                 set_extent_bits(block_group_cache, found_key.objectid,
3343                                 found_key.objectid + found_key.offset - 1,
3344                                 bit | EXTENT_LOCKED, GFP_NOFS);
3345                 set_state_private(block_group_cache, found_key.objectid,
3346                                   (unsigned long)cache);
3347
3348                 /* hack for now */
3349                 if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3350                         cache_block_group(root->fs_info->extent_root,
3351                                           cache);
3352                 }
3353                 if (key.objectid >=
3354                     btrfs_super_total_bytes(&info->super_copy))
3355                         break;
3356         }
3357         ret = 0;
3358 error:
3359         btrfs_free_path(path);
3360         mutex_unlock(&root->fs_info->alloc_mutex);
3361         return ret;
3362 }
3363
3364 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3365                            struct btrfs_root *root, u64 bytes_used,
3366                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3367                            u64 size)
3368 {
3369         int ret;
3370         int bit = 0;
3371         struct btrfs_root *extent_root;
3372         struct btrfs_block_group_cache *cache;
3373         struct extent_io_tree *block_group_cache;
3374
3375         WARN_ON(!mutex_is_locked(&root->fs_info->alloc_mutex));
3376         extent_root = root->fs_info->extent_root;
3377         block_group_cache = &root->fs_info->block_group_cache;
3378
3379         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3380         BUG_ON(!cache);
3381         cache->key.objectid = chunk_offset;
3382         cache->key.offset = size;
3383         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3384
3385         btrfs_set_block_group_used(&cache->item, bytes_used);
3386         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3387         cache->flags = type;
3388         btrfs_set_block_group_flags(&cache->item, type);
3389
3390         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3391                                 &cache->space_info);
3392         BUG_ON(ret);
3393
3394         bit = block_group_state_bits(type);
3395         set_extent_bits(block_group_cache, chunk_offset,
3396                         chunk_offset + size - 1,
3397                         bit | EXTENT_LOCKED, GFP_NOFS);
3398
3399         set_state_private(block_group_cache, chunk_offset,
3400                           (unsigned long)cache);
3401         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3402                                 sizeof(cache->item));
3403         BUG_ON(ret);
3404
3405         finish_current_insert(trans, extent_root);
3406         ret = del_pending_extents(trans, extent_root);
3407         BUG_ON(ret);
3408         set_avail_alloc_bits(extent_root->fs_info, type);
3409
3410         return 0;
3411 }