Btrfs: Replace the big fs_mutex with a collection of other locks
[pandora-kernel.git] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18 #include <linux/sched.h>
19 #include <linux/pagemap.h>
20 #include <linux/writeback.h>
21 #include "hash.h"
22 #include "crc32c.h"
23 #include "ctree.h"
24 #include "disk-io.h"
25 #include "print-tree.h"
26 #include "transaction.h"
27 #include "volumes.h"
28 #include "locking.h"
29
30 #define BLOCK_GROUP_DATA     EXTENT_WRITEBACK
31 #define BLOCK_GROUP_METADATA EXTENT_UPTODATE
32 #define BLOCK_GROUP_SYSTEM   EXTENT_NEW
33
34 #define BLOCK_GROUP_DIRTY EXTENT_DIRTY
35
36 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
37                                  btrfs_root *extent_root);
38 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
39                                btrfs_root *extent_root);
40 static struct btrfs_block_group_cache *
41 __btrfs_find_block_group(struct btrfs_root *root,
42                          struct btrfs_block_group_cache *hint,
43                          u64 search_start, int data, int owner);
44
45 void maybe_lock_mutex(struct btrfs_root *root)
46 {
47         if (root != root->fs_info->extent_root &&
48             root != root->fs_info->chunk_root &&
49             root != root->fs_info->dev_root) {
50                 mutex_lock(&root->fs_info->alloc_mutex);
51         }
52 }
53
54 void maybe_unlock_mutex(struct btrfs_root *root)
55 {
56         if (root != root->fs_info->extent_root &&
57             root != root->fs_info->chunk_root &&
58             root != root->fs_info->dev_root) {
59                 mutex_unlock(&root->fs_info->alloc_mutex);
60         }
61 }
62
63 static int cache_block_group(struct btrfs_root *root,
64                              struct btrfs_block_group_cache *block_group)
65 {
66         struct btrfs_path *path;
67         int ret;
68         struct btrfs_key key;
69         struct extent_buffer *leaf;
70         struct extent_io_tree *free_space_cache;
71         int slot;
72         u64 last = 0;
73         u64 hole_size;
74         u64 first_free;
75         int found = 0;
76
77         if (!block_group)
78                 return 0;
79
80         root = root->fs_info->extent_root;
81         free_space_cache = &root->fs_info->free_space_cache;
82
83         if (block_group->cached)
84                 return 0;
85
86         path = btrfs_alloc_path();
87         if (!path)
88                 return -ENOMEM;
89
90         path->reada = 2;
91         path->skip_locking = 1;
92         first_free = block_group->key.objectid;
93         key.objectid = block_group->key.objectid;
94         key.offset = 0;
95         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
96         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
97         if (ret < 0)
98                 return ret;
99         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
100         if (ret < 0)
101                 return ret;
102         if (ret == 0) {
103                 leaf = path->nodes[0];
104                 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
105                 if (key.objectid + key.offset > first_free)
106                         first_free = key.objectid + key.offset;
107         }
108         while(1) {
109                 leaf = path->nodes[0];
110                 slot = path->slots[0];
111                 if (slot >= btrfs_header_nritems(leaf)) {
112                         ret = btrfs_next_leaf(root, path);
113                         if (ret < 0)
114                                 goto err;
115                         if (ret == 0) {
116                                 continue;
117                         } else {
118                                 break;
119                         }
120                 }
121                 btrfs_item_key_to_cpu(leaf, &key, slot);
122                 if (key.objectid < block_group->key.objectid) {
123                         goto next;
124                 }
125                 if (key.objectid >= block_group->key.objectid +
126                     block_group->key.offset) {
127                         break;
128                 }
129
130                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
131                         if (!found) {
132                                 last = first_free;
133                                 found = 1;
134                         }
135                         if (key.objectid > last) {
136                                 hole_size = key.objectid - last;
137                                 set_extent_dirty(free_space_cache, last,
138                                                  last + hole_size - 1,
139                                                  GFP_NOFS);
140                         }
141                         last = key.objectid + key.offset;
142                 }
143 next:
144                 path->slots[0]++;
145         }
146
147         if (!found)
148                 last = first_free;
149         if (block_group->key.objectid +
150             block_group->key.offset > last) {
151                 hole_size = block_group->key.objectid +
152                         block_group->key.offset - last;
153                 set_extent_dirty(free_space_cache, last,
154                                  last + hole_size - 1, GFP_NOFS);
155         }
156         block_group->cached = 1;
157 err:
158         btrfs_free_path(path);
159         return 0;
160 }
161
162 struct btrfs_block_group_cache *btrfs_lookup_first_block_group(struct
163                                                        btrfs_fs_info *info,
164                                                          u64 bytenr)
165 {
166         struct extent_io_tree *block_group_cache;
167         struct btrfs_block_group_cache *block_group = NULL;
168         u64 ptr;
169         u64 start;
170         u64 end;
171         int ret;
172
173         bytenr = max_t(u64, bytenr,
174                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
175         block_group_cache = &info->block_group_cache;
176         ret = find_first_extent_bit(block_group_cache,
177                                     bytenr, &start, &end,
178                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
179                                     BLOCK_GROUP_SYSTEM);
180         if (ret) {
181                 return NULL;
182         }
183         ret = get_state_private(block_group_cache, start, &ptr);
184         if (ret)
185                 return NULL;
186
187         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
188         return block_group;
189 }
190
191 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
192                                                          btrfs_fs_info *info,
193                                                          u64 bytenr)
194 {
195         struct extent_io_tree *block_group_cache;
196         struct btrfs_block_group_cache *block_group = NULL;
197         u64 ptr;
198         u64 start;
199         u64 end;
200         int ret;
201
202         bytenr = max_t(u64, bytenr,
203                        BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
204         block_group_cache = &info->block_group_cache;
205         ret = find_first_extent_bit(block_group_cache,
206                                     bytenr, &start, &end,
207                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
208                                     BLOCK_GROUP_SYSTEM);
209         if (ret) {
210                 return NULL;
211         }
212         ret = get_state_private(block_group_cache, start, &ptr);
213         if (ret)
214                 return NULL;
215
216         block_group = (struct btrfs_block_group_cache *)(unsigned long)ptr;
217         if (block_group->key.objectid <= bytenr && bytenr <
218             block_group->key.objectid + block_group->key.offset)
219                 return block_group;
220         return NULL;
221 }
222
223 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
224 {
225         return (cache->flags & bits) == bits;
226 }
227
228 static int noinline find_search_start(struct btrfs_root *root,
229                               struct btrfs_block_group_cache **cache_ret,
230                               u64 *start_ret, u64 num, int data)
231 {
232         int ret;
233         struct btrfs_block_group_cache *cache = *cache_ret;
234         struct extent_io_tree *free_space_cache;
235         struct extent_state *state;
236         u64 last;
237         u64 start = 0;
238         u64 cache_miss = 0;
239         u64 total_fs_bytes;
240         u64 search_start = *start_ret;
241         int wrapped = 0;
242
243         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
244         free_space_cache = &root->fs_info->free_space_cache;
245
246         if (!cache)
247                 goto out;
248
249 again:
250         ret = cache_block_group(root, cache);
251         if (ret) {
252                 goto out;
253         }
254
255         last = max(search_start, cache->key.objectid);
256         if (!block_group_bits(cache, data) || cache->ro)
257                 goto new_group;
258
259         spin_lock_irq(&free_space_cache->lock);
260         state = find_first_extent_bit_state(free_space_cache, last, EXTENT_DIRTY);
261         while(1) {
262                 if (!state) {
263                         if (!cache_miss)
264                                 cache_miss = last;
265                         spin_unlock_irq(&free_space_cache->lock);
266                         goto new_group;
267                 }
268
269                 start = max(last, state->start);
270                 last = state->end + 1;
271                 if (last - start < num) {
272                         do {
273                                 state = extent_state_next(state);
274                         } while(state && !(state->state & EXTENT_DIRTY));
275                         continue;
276                 }
277                 spin_unlock_irq(&free_space_cache->lock);
278                 if (cache->ro) {
279                         goto new_group;
280                 }
281                 if (start + num > cache->key.objectid + cache->key.offset)
282                         goto new_group;
283                 if (!block_group_bits(cache, data)) {
284                         printk("block group bits don't match %Lu %d\n", cache->flags, data);
285                 }
286                 *start_ret = start;
287                 return 0;
288         }
289 out:
290         cache = btrfs_lookup_block_group(root->fs_info, search_start);
291         if (!cache) {
292                 printk("Unable to find block group for %Lu\n", search_start);
293                 WARN_ON(1);
294         }
295         return -ENOSPC;
296
297 new_group:
298         last = cache->key.objectid + cache->key.offset;
299 wrapped:
300         cache = btrfs_lookup_first_block_group(root->fs_info, last);
301         if (!cache || cache->key.objectid >= total_fs_bytes) {
302 no_cache:
303                 if (!wrapped) {
304                         wrapped = 1;
305                         last = search_start;
306                         goto wrapped;
307                 }
308                 goto out;
309         }
310         if (cache_miss && !cache->cached) {
311                 cache_block_group(root, cache);
312                 last = cache_miss;
313                 cache = btrfs_lookup_first_block_group(root->fs_info, last);
314         }
315         cache_miss = 0;
316         cache = __btrfs_find_block_group(root, cache, last, data, 0);
317         if (!cache)
318                 goto no_cache;
319         *cache_ret = cache;
320         goto again;
321 }
322
323 static u64 div_factor(u64 num, int factor)
324 {
325         if (factor == 10)
326                 return num;
327         num *= factor;
328         do_div(num, 10);
329         return num;
330 }
331
332 static int block_group_state_bits(u64 flags)
333 {
334         int bits = 0;
335         if (flags & BTRFS_BLOCK_GROUP_DATA)
336                 bits |= BLOCK_GROUP_DATA;
337         if (flags & BTRFS_BLOCK_GROUP_METADATA)
338                 bits |= BLOCK_GROUP_METADATA;
339         if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
340                 bits |= BLOCK_GROUP_SYSTEM;
341         return bits;
342 }
343
344 static struct btrfs_block_group_cache *
345 __btrfs_find_block_group(struct btrfs_root *root,
346                          struct btrfs_block_group_cache *hint,
347                          u64 search_start, int data, int owner)
348 {
349         struct btrfs_block_group_cache *cache;
350         struct extent_io_tree *block_group_cache;
351         struct btrfs_block_group_cache *found_group = NULL;
352         struct btrfs_fs_info *info = root->fs_info;
353         u64 used;
354         u64 last = 0;
355         u64 start;
356         u64 end;
357         u64 free_check;
358         u64 ptr;
359         int bit;
360         int ret;
361         int full_search = 0;
362         int factor = 10;
363         int wrapped = 0;
364
365         block_group_cache = &info->block_group_cache;
366
367         if (data & BTRFS_BLOCK_GROUP_METADATA)
368                 factor = 9;
369
370         bit = block_group_state_bits(data);
371
372         if (search_start) {
373                 struct btrfs_block_group_cache *shint;
374                 shint = btrfs_lookup_first_block_group(info, search_start);
375                 if (shint && block_group_bits(shint, data) && !shint->ro) {
376                         used = btrfs_block_group_used(&shint->item);
377                         if (used + shint->pinned <
378                             div_factor(shint->key.offset, factor)) {
379                                 return shint;
380                         }
381                 }
382         }
383         if (hint && !hint->ro && block_group_bits(hint, data)) {
384                 used = btrfs_block_group_used(&hint->item);
385                 if (used + hint->pinned <
386                     div_factor(hint->key.offset, factor)) {
387                         return hint;
388                 }
389                 last = hint->key.objectid + hint->key.offset;
390         } else {
391                 if (hint)
392                         last = max(hint->key.objectid, search_start);
393                 else
394                         last = search_start;
395         }
396 again:
397         while(1) {
398                 ret = find_first_extent_bit(block_group_cache, last,
399                                             &start, &end, bit);
400                 if (ret)
401                         break;
402
403                 ret = get_state_private(block_group_cache, start, &ptr);
404                 if (ret) {
405                         last = end + 1;
406                         continue;
407                 }
408
409                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
410                 last = cache->key.objectid + cache->key.offset;
411                 used = btrfs_block_group_used(&cache->item);
412
413                 if (!cache->ro && block_group_bits(cache, data)) {
414                         free_check = div_factor(cache->key.offset, factor);
415                         if (used + cache->pinned < free_check) {
416                                 found_group = cache;
417                                 goto found;
418                         }
419                 }
420                 cond_resched();
421         }
422         if (!wrapped) {
423                 last = search_start;
424                 wrapped = 1;
425                 goto again;
426         }
427         if (!full_search && factor < 10) {
428                 last = search_start;
429                 full_search = 1;
430                 factor = 10;
431                 goto again;
432         }
433 found:
434         return found_group;
435 }
436
437 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
438                                                  struct btrfs_block_group_cache
439                                                  *hint, u64 search_start,
440                                                  int data, int owner)
441 {
442
443         struct btrfs_block_group_cache *ret;
444         mutex_lock(&root->fs_info->alloc_mutex);
445         ret = __btrfs_find_block_group(root, hint, search_start, data, owner);
446         mutex_unlock(&root->fs_info->alloc_mutex);
447         return ret;
448 }
449 static u64 hash_extent_ref(u64 root_objectid, u64 ref_generation,
450                            u64 owner, u64 owner_offset)
451 {
452         u32 high_crc = ~(u32)0;
453         u32 low_crc = ~(u32)0;
454         __le64 lenum;
455         lenum = cpu_to_le64(root_objectid);
456         high_crc = btrfs_crc32c(high_crc, &lenum, sizeof(lenum));
457         lenum = cpu_to_le64(ref_generation);
458         low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
459         if (owner >= BTRFS_FIRST_FREE_OBJECTID) {
460                 lenum = cpu_to_le64(owner);
461                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
462                 lenum = cpu_to_le64(owner_offset);
463                 low_crc = btrfs_crc32c(low_crc, &lenum, sizeof(lenum));
464         }
465         return ((u64)high_crc << 32) | (u64)low_crc;
466 }
467
468 static int match_extent_ref(struct extent_buffer *leaf,
469                             struct btrfs_extent_ref *disk_ref,
470                             struct btrfs_extent_ref *cpu_ref)
471 {
472         int ret;
473         int len;
474
475         if (cpu_ref->objectid)
476                 len = sizeof(*cpu_ref);
477         else
478                 len = 2 * sizeof(u64);
479         ret = memcmp_extent_buffer(leaf, cpu_ref, (unsigned long)disk_ref,
480                                    len);
481         return ret == 0;
482 }
483
484 static int noinline lookup_extent_backref(struct btrfs_trans_handle *trans,
485                                           struct btrfs_root *root,
486                                           struct btrfs_path *path, u64 bytenr,
487                                           u64 root_objectid,
488                                           u64 ref_generation, u64 owner,
489                                           u64 owner_offset, int del)
490 {
491         u64 hash;
492         struct btrfs_key key;
493         struct btrfs_key found_key;
494         struct btrfs_extent_ref ref;
495         struct extent_buffer *leaf;
496         struct btrfs_extent_ref *disk_ref;
497         int ret;
498         int ret2;
499
500         btrfs_set_stack_ref_root(&ref, root_objectid);
501         btrfs_set_stack_ref_generation(&ref, ref_generation);
502         btrfs_set_stack_ref_objectid(&ref, owner);
503         btrfs_set_stack_ref_offset(&ref, owner_offset);
504
505         hash = hash_extent_ref(root_objectid, ref_generation, owner,
506                                owner_offset);
507         key.offset = hash;
508         key.objectid = bytenr;
509         key.type = BTRFS_EXTENT_REF_KEY;
510
511         while (1) {
512                 ret = btrfs_search_slot(trans, root, &key, path,
513                                         del ? -1 : 0, del);
514                 if (ret < 0)
515                         goto out;
516                 leaf = path->nodes[0];
517                 if (ret != 0) {
518                         u32 nritems = btrfs_header_nritems(leaf);
519                         if (path->slots[0] >= nritems) {
520                                 ret2 = btrfs_next_leaf(root, path);
521                                 if (ret2)
522                                         goto out;
523                                 leaf = path->nodes[0];
524                         }
525                         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
526                         if (found_key.objectid != bytenr ||
527                             found_key.type != BTRFS_EXTENT_REF_KEY)
528                                 goto out;
529                         key.offset = found_key.offset;
530                         if (del) {
531                                 btrfs_release_path(root, path);
532                                 continue;
533                         }
534                 }
535                 disk_ref = btrfs_item_ptr(path->nodes[0],
536                                           path->slots[0],
537                                           struct btrfs_extent_ref);
538                 if (match_extent_ref(path->nodes[0], disk_ref, &ref)) {
539                         ret = 0;
540                         goto out;
541                 }
542                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
543                 key.offset = found_key.offset + 1;
544                 btrfs_release_path(root, path);
545         }
546 out:
547         return ret;
548 }
549
550 /*
551  * Back reference rules.  Back refs have three main goals:
552  *
553  * 1) differentiate between all holders of references to an extent so that
554  *    when a reference is dropped we can make sure it was a valid reference
555  *    before freeing the extent.
556  *
557  * 2) Provide enough information to quickly find the holders of an extent
558  *    if we notice a given block is corrupted or bad.
559  *
560  * 3) Make it easy to migrate blocks for FS shrinking or storage pool
561  *    maintenance.  This is actually the same as #2, but with a slightly
562  *    different use case.
563  *
564  * File extents can be referenced by:
565  *
566  * - multiple snapshots, subvolumes, or different generations in one subvol
567  * - different files inside a single subvolume (in theory, not implemented yet)
568  * - different offsets inside a file (bookend extents in file.c)
569  *
570  * The extent ref structure has fields for:
571  *
572  * - Objectid of the subvolume root
573  * - Generation number of the tree holding the reference
574  * - objectid of the file holding the reference
575  * - offset in the file corresponding to the key holding the reference
576  *
577  * When a file extent is allocated the fields are filled in:
578  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
579  *
580  * When a leaf is cow'd new references are added for every file extent found
581  * in the leaf.  It looks the same as the create case, but trans->transid
582  * will be different when the block is cow'd.
583  *
584  *     (root_key.objectid, trans->transid, inode objectid, offset in file)
585  *
586  * When a file extent is removed either during snapshot deletion or file
587  * truncation, the corresponding back reference is found
588  * by searching for:
589  *
590  *     (btrfs_header_owner(leaf), btrfs_header_generation(leaf),
591  *      inode objectid, offset in file)
592  *
593  * Btree extents can be referenced by:
594  *
595  * - Different subvolumes
596  * - Different generations of the same subvolume
597  *
598  * Storing sufficient information for a full reverse mapping of a btree
599  * block would require storing the lowest key of the block in the backref,
600  * and it would require updating that lowest key either before write out or
601  * every time it changed.  Instead, the objectid of the lowest key is stored
602  * along with the level of the tree block.  This provides a hint
603  * about where in the btree the block can be found.  Searches through the
604  * btree only need to look for a pointer to that block, so they stop one
605  * level higher than the level recorded in the backref.
606  *
607  * Some btrees do not do reference counting on their extents.  These
608  * include the extent tree and the tree of tree roots.  Backrefs for these
609  * trees always have a generation of zero.
610  *
611  * When a tree block is created, back references are inserted:
612  *
613  * (root->root_key.objectid, trans->transid or zero, level, lowest_key_objectid)
614  *
615  * When a tree block is cow'd in a reference counted root,
616  * new back references are added for all the blocks it points to.
617  * These are of the form (trans->transid will have increased since creation):
618  *
619  * (root->root_key.objectid, trans->transid, level, lowest_key_objectid)
620  *
621  * Because the lowest_key_objectid and the level are just hints
622  * they are not used when backrefs are deleted.  When a backref is deleted:
623  *
624  * if backref was for a tree root:
625  *     root_objectid = root->root_key.objectid
626  * else
627  *     root_objectid = btrfs_header_owner(parent)
628  *
629  * (root_objectid, btrfs_header_generation(parent) or zero, 0, 0)
630  *
631  * Back Reference Key hashing:
632  *
633  * Back references have four fields, each 64 bits long.  Unfortunately,
634  * This is hashed into a single 64 bit number and placed into the key offset.
635  * The key objectid corresponds to the first byte in the extent, and the
636  * key type is set to BTRFS_EXTENT_REF_KEY
637  */
638 int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
639                                  struct btrfs_root *root,
640                                  struct btrfs_path *path, u64 bytenr,
641                                  u64 root_objectid, u64 ref_generation,
642                                  u64 owner, u64 owner_offset)
643 {
644         u64 hash;
645         struct btrfs_key key;
646         struct btrfs_extent_ref ref;
647         struct btrfs_extent_ref *disk_ref;
648         int ret;
649
650         btrfs_set_stack_ref_root(&ref, root_objectid);
651         btrfs_set_stack_ref_generation(&ref, ref_generation);
652         btrfs_set_stack_ref_objectid(&ref, owner);
653         btrfs_set_stack_ref_offset(&ref, owner_offset);
654
655         hash = hash_extent_ref(root_objectid, ref_generation, owner,
656                                owner_offset);
657         key.offset = hash;
658         key.objectid = bytenr;
659         key.type = BTRFS_EXTENT_REF_KEY;
660
661         ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(ref));
662         while (ret == -EEXIST) {
663                 disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
664                                           struct btrfs_extent_ref);
665                 if (match_extent_ref(path->nodes[0], disk_ref, &ref))
666                         goto out;
667                 key.offset++;
668                 btrfs_release_path(root, path);
669                 ret = btrfs_insert_empty_item(trans, root, path, &key,
670                                               sizeof(ref));
671         }
672         if (ret)
673                 goto out;
674         disk_ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
675                                   struct btrfs_extent_ref);
676         write_extent_buffer(path->nodes[0], &ref, (unsigned long)disk_ref,
677                             sizeof(ref));
678         btrfs_mark_buffer_dirty(path->nodes[0]);
679 out:
680         btrfs_release_path(root, path);
681         return ret;
682 }
683
684 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
685                                 struct btrfs_root *root,
686                                 u64 bytenr, u64 num_bytes,
687                                 u64 root_objectid, u64 ref_generation,
688                                 u64 owner, u64 owner_offset)
689 {
690         struct btrfs_path *path;
691         int ret;
692         struct btrfs_key key;
693         struct extent_buffer *l;
694         struct btrfs_extent_item *item;
695         u32 refs;
696
697         WARN_ON(num_bytes < root->sectorsize);
698         path = btrfs_alloc_path();
699         if (!path)
700                 return -ENOMEM;
701
702         path->reada = 1;
703         key.objectid = bytenr;
704         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
705         key.offset = num_bytes;
706         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
707                                 0, 1);
708         if (ret < 0)
709                 return ret;
710         if (ret != 0) {
711                 BUG();
712         }
713         BUG_ON(ret != 0);
714         l = path->nodes[0];
715         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
716         refs = btrfs_extent_refs(l, item);
717         btrfs_set_extent_refs(l, item, refs + 1);
718         btrfs_mark_buffer_dirty(path->nodes[0]);
719
720         btrfs_release_path(root->fs_info->extent_root, path);
721
722         path->reada = 1;
723         ret = btrfs_insert_extent_backref(trans, root->fs_info->extent_root,
724                                           path, bytenr, root_objectid,
725                                           ref_generation, owner, owner_offset);
726         BUG_ON(ret);
727         finish_current_insert(trans, root->fs_info->extent_root);
728         del_pending_extents(trans, root->fs_info->extent_root);
729
730         btrfs_free_path(path);
731         return 0;
732 }
733
734 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
735                                 struct btrfs_root *root,
736                                 u64 bytenr, u64 num_bytes,
737                                 u64 root_objectid, u64 ref_generation,
738                                 u64 owner, u64 owner_offset)
739 {
740         int ret;
741
742         mutex_lock(&root->fs_info->alloc_mutex);
743         ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes,
744                                      root_objectid, ref_generation,
745                                      owner, owner_offset);
746         mutex_unlock(&root->fs_info->alloc_mutex);
747         return ret;
748 }
749
750 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
751                          struct btrfs_root *root)
752 {
753         finish_current_insert(trans, root->fs_info->extent_root);
754         del_pending_extents(trans, root->fs_info->extent_root);
755         return 0;
756 }
757
758 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
759                              struct btrfs_root *root, u64 bytenr,
760                              u64 num_bytes, u32 *refs)
761 {
762         struct btrfs_path *path;
763         int ret;
764         struct btrfs_key key;
765         struct extent_buffer *l;
766         struct btrfs_extent_item *item;
767
768         WARN_ON(num_bytes < root->sectorsize);
769         path = btrfs_alloc_path();
770         path->reada = 1;
771         key.objectid = bytenr;
772         key.offset = num_bytes;
773         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
774         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
775                                 0, 0);
776         if (ret < 0)
777                 goto out;
778         if (ret != 0) {
779                 btrfs_print_leaf(root, path->nodes[0]);
780                 printk("failed to find block number %Lu\n", bytenr);
781                 BUG();
782         }
783         l = path->nodes[0];
784         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
785         *refs = btrfs_extent_refs(l, item);
786 out:
787         btrfs_free_path(path);
788         return 0;
789 }
790
791 u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
792                                   struct btrfs_path *count_path,
793                                   u64 expected_owner,
794                                   u64 first_extent)
795 {
796         struct btrfs_root *extent_root = root->fs_info->extent_root;
797         struct btrfs_path *path;
798         u64 bytenr;
799         u64 found_objectid;
800         u64 found_owner;
801         u64 root_objectid = root->root_key.objectid;
802         u32 total_count = 0;
803         u32 extent_refs;
804         u32 cur_count;
805         u32 nritems;
806         int ret;
807         struct btrfs_key key;
808         struct btrfs_key found_key;
809         struct extent_buffer *l;
810         struct btrfs_extent_item *item;
811         struct btrfs_extent_ref *ref_item;
812         int level = -1;
813
814         /* FIXME, needs locking */
815         BUG();
816
817         mutex_lock(&root->fs_info->alloc_mutex);
818         path = btrfs_alloc_path();
819 again:
820         if (level == -1)
821                 bytenr = first_extent;
822         else
823                 bytenr = count_path->nodes[level]->start;
824
825         cur_count = 0;
826         key.objectid = bytenr;
827         key.offset = 0;
828
829         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
830         ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
831         if (ret < 0)
832                 goto out;
833         BUG_ON(ret == 0);
834
835         l = path->nodes[0];
836         btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
837
838         if (found_key.objectid != bytenr ||
839             found_key.type != BTRFS_EXTENT_ITEM_KEY) {
840                 goto out;
841         }
842
843         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
844         extent_refs = btrfs_extent_refs(l, item);
845         while (1) {
846                 l = path->nodes[0];
847                 nritems = btrfs_header_nritems(l);
848                 if (path->slots[0] >= nritems) {
849                         ret = btrfs_next_leaf(extent_root, path);
850                         if (ret == 0)
851                                 continue;
852                         break;
853                 }
854                 btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
855                 if (found_key.objectid != bytenr)
856                         break;
857
858                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
859                         path->slots[0]++;
860                         continue;
861                 }
862
863                 cur_count++;
864                 ref_item = btrfs_item_ptr(l, path->slots[0],
865                                           struct btrfs_extent_ref);
866                 found_objectid = btrfs_ref_root(l, ref_item);
867
868                 if (found_objectid != root_objectid) {
869                         total_count = 2;
870                         goto out;
871                 }
872                 if (level == -1) {
873                         found_owner = btrfs_ref_objectid(l, ref_item);
874                         if (found_owner != expected_owner) {
875                                 total_count = 2;
876                                 goto out;
877                         }
878                         /*
879                          * nasty.  we don't count a reference held by
880                          * the running transaction.  This allows nodatacow
881                          * to avoid cow most of the time
882                          */
883                         if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
884                             btrfs_ref_generation(l, ref_item) ==
885                             root->fs_info->generation) {
886                                 extent_refs--;
887                         }
888                 }
889                 total_count = 1;
890                 path->slots[0]++;
891         }
892         /*
893          * if there is more than one reference against a data extent,
894          * we have to assume the other ref is another snapshot
895          */
896         if (level == -1 && extent_refs > 1) {
897                 total_count = 2;
898                 goto out;
899         }
900         if (cur_count == 0) {
901                 total_count = 0;
902                 goto out;
903         }
904         if (level >= 0 && root->node == count_path->nodes[level])
905                 goto out;
906         level++;
907         btrfs_release_path(root, path);
908         goto again;
909
910 out:
911         btrfs_free_path(path);
912         mutex_unlock(&root->fs_info->alloc_mutex);
913         return total_count;
914 }
915
916 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
917                   struct extent_buffer *buf)
918 {
919         u64 bytenr;
920         u32 nritems;
921         struct btrfs_key key;
922         struct btrfs_file_extent_item *fi;
923         int i;
924         int level;
925         int ret;
926         int faili;
927
928         if (!root->ref_cows)
929                 return 0;
930
931         mutex_lock(&root->fs_info->alloc_mutex);
932         level = btrfs_header_level(buf);
933         nritems = btrfs_header_nritems(buf);
934         for (i = 0; i < nritems; i++) {
935                 if (level == 0) {
936                         u64 disk_bytenr;
937                         btrfs_item_key_to_cpu(buf, &key, i);
938                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
939                                 continue;
940                         fi = btrfs_item_ptr(buf, i,
941                                             struct btrfs_file_extent_item);
942                         if (btrfs_file_extent_type(buf, fi) ==
943                             BTRFS_FILE_EXTENT_INLINE)
944                                 continue;
945                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
946                         if (disk_bytenr == 0)
947                                 continue;
948                         ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
949                                     btrfs_file_extent_disk_num_bytes(buf, fi),
950                                     root->root_key.objectid, trans->transid,
951                                     key.objectid, key.offset);
952                         if (ret) {
953                                 faili = i;
954                                 goto fail;
955                         }
956                 } else {
957                         bytenr = btrfs_node_blockptr(buf, i);
958                         btrfs_node_key_to_cpu(buf, &key, i);
959                         ret = __btrfs_inc_extent_ref(trans, root, bytenr,
960                                            btrfs_level_size(root, level - 1),
961                                            root->root_key.objectid,
962                                            trans->transid,
963                                            level - 1, key.objectid);
964                         if (ret) {
965                                 faili = i;
966                                 goto fail;
967                         }
968                 }
969         }
970         mutex_unlock(&root->fs_info->alloc_mutex);
971         return 0;
972 fail:
973         WARN_ON(1);
974 #if 0
975         for (i =0; i < faili; i++) {
976                 if (level == 0) {
977                         u64 disk_bytenr;
978                         btrfs_item_key_to_cpu(buf, &key, i);
979                         if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
980                                 continue;
981                         fi = btrfs_item_ptr(buf, i,
982                                             struct btrfs_file_extent_item);
983                         if (btrfs_file_extent_type(buf, fi) ==
984                             BTRFS_FILE_EXTENT_INLINE)
985                                 continue;
986                         disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
987                         if (disk_bytenr == 0)
988                                 continue;
989                         err = btrfs_free_extent(trans, root, disk_bytenr,
990                                     btrfs_file_extent_disk_num_bytes(buf,
991                                                                       fi), 0);
992                         BUG_ON(err);
993                 } else {
994                         bytenr = btrfs_node_blockptr(buf, i);
995                         err = btrfs_free_extent(trans, root, bytenr,
996                                         btrfs_level_size(root, level - 1), 0);
997                         BUG_ON(err);
998                 }
999         }
1000 #endif
1001         mutex_unlock(&root->fs_info->alloc_mutex);
1002         return ret;
1003 }
1004
1005 static int write_one_cache_group(struct btrfs_trans_handle *trans,
1006                                  struct btrfs_root *root,
1007                                  struct btrfs_path *path,
1008                                  struct btrfs_block_group_cache *cache)
1009 {
1010         int ret;
1011         int pending_ret;
1012         struct btrfs_root *extent_root = root->fs_info->extent_root;
1013         unsigned long bi;
1014         struct extent_buffer *leaf;
1015
1016         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
1017         if (ret < 0)
1018                 goto fail;
1019         BUG_ON(ret);
1020
1021         leaf = path->nodes[0];
1022         bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
1023         write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item));
1024         btrfs_mark_buffer_dirty(leaf);
1025         btrfs_release_path(extent_root, path);
1026 fail:
1027         finish_current_insert(trans, extent_root);
1028         pending_ret = del_pending_extents(trans, extent_root);
1029         if (ret)
1030                 return ret;
1031         if (pending_ret)
1032                 return pending_ret;
1033         return 0;
1034
1035 }
1036
1037 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
1038                                    struct btrfs_root *root)
1039 {
1040         struct extent_io_tree *block_group_cache;
1041         struct btrfs_block_group_cache *cache;
1042         int ret;
1043         int err = 0;
1044         int werr = 0;
1045         struct btrfs_path *path;
1046         u64 last = 0;
1047         u64 start;
1048         u64 end;
1049         u64 ptr;
1050
1051         block_group_cache = &root->fs_info->block_group_cache;
1052         path = btrfs_alloc_path();
1053         if (!path)
1054                 return -ENOMEM;
1055
1056         mutex_lock(&root->fs_info->alloc_mutex);
1057         while(1) {
1058                 ret = find_first_extent_bit(block_group_cache, last,
1059                                             &start, &end, BLOCK_GROUP_DIRTY);
1060                 if (ret)
1061                         break;
1062
1063                 last = end + 1;
1064                 ret = get_state_private(block_group_cache, start, &ptr);
1065                 if (ret)
1066                         break;
1067                 cache = (struct btrfs_block_group_cache *)(unsigned long)ptr;
1068                 err = write_one_cache_group(trans, root,
1069                                             path, cache);
1070                 /*
1071                  * if we fail to write the cache group, we want
1072                  * to keep it marked dirty in hopes that a later
1073                  * write will work
1074                  */
1075                 if (err) {
1076                         werr = err;
1077                         continue;
1078                 }
1079                 clear_extent_bits(block_group_cache, start, end,
1080                                   BLOCK_GROUP_DIRTY, GFP_NOFS);
1081         }
1082         btrfs_free_path(path);
1083         mutex_unlock(&root->fs_info->alloc_mutex);
1084         return werr;
1085 }
1086
1087 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
1088                                                   u64 flags)
1089 {
1090         struct list_head *head = &info->space_info;
1091         struct list_head *cur;
1092         struct btrfs_space_info *found;
1093         list_for_each(cur, head) {
1094                 found = list_entry(cur, struct btrfs_space_info, list);
1095                 if (found->flags == flags)
1096                         return found;
1097         }
1098         return NULL;
1099
1100 }
1101
1102 static int update_space_info(struct btrfs_fs_info *info, u64 flags,
1103                              u64 total_bytes, u64 bytes_used,
1104                              struct btrfs_space_info **space_info)
1105 {
1106         struct btrfs_space_info *found;
1107
1108         found = __find_space_info(info, flags);
1109         if (found) {
1110                 found->total_bytes += total_bytes;
1111                 found->bytes_used += bytes_used;
1112                 found->full = 0;
1113                 WARN_ON(found->total_bytes < found->bytes_used);
1114                 *space_info = found;
1115                 return 0;
1116         }
1117         found = kmalloc(sizeof(*found), GFP_NOFS);
1118         if (!found)
1119                 return -ENOMEM;
1120
1121         list_add(&found->list, &info->space_info);
1122         found->flags = flags;
1123         found->total_bytes = total_bytes;
1124         found->bytes_used = bytes_used;
1125         found->bytes_pinned = 0;
1126         found->full = 0;
1127         found->force_alloc = 0;
1128         *space_info = found;
1129         return 0;
1130 }
1131
1132 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
1133 {
1134         u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 |
1135                                    BTRFS_BLOCK_GROUP_RAID1 |
1136                                    BTRFS_BLOCK_GROUP_RAID10 |
1137                                    BTRFS_BLOCK_GROUP_DUP);
1138         if (extra_flags) {
1139                 if (flags & BTRFS_BLOCK_GROUP_DATA)
1140                         fs_info->avail_data_alloc_bits |= extra_flags;
1141                 if (flags & BTRFS_BLOCK_GROUP_METADATA)
1142                         fs_info->avail_metadata_alloc_bits |= extra_flags;
1143                 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
1144                         fs_info->avail_system_alloc_bits |= extra_flags;
1145         }
1146 }
1147
1148 static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
1149 {
1150         u64 num_devices = root->fs_info->fs_devices->num_devices;
1151
1152         if (num_devices == 1)
1153                 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
1154         if (num_devices < 4)
1155                 flags &= ~BTRFS_BLOCK_GROUP_RAID10;
1156
1157         if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
1158             (flags & (BTRFS_BLOCK_GROUP_RAID1 |
1159                       BTRFS_BLOCK_GROUP_RAID10))) {
1160                 flags &= ~BTRFS_BLOCK_GROUP_DUP;
1161         }
1162
1163         if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
1164             (flags & BTRFS_BLOCK_GROUP_RAID10)) {
1165                 flags &= ~BTRFS_BLOCK_GROUP_RAID1;
1166         }
1167
1168         if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
1169             ((flags & BTRFS_BLOCK_GROUP_RAID1) |
1170              (flags & BTRFS_BLOCK_GROUP_RAID10) |
1171              (flags & BTRFS_BLOCK_GROUP_DUP)))
1172                 flags &= ~BTRFS_BLOCK_GROUP_RAID0;
1173         return flags;
1174 }
1175
1176 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
1177                           struct btrfs_root *extent_root, u64 alloc_bytes,
1178                           u64 flags, int force)
1179 {
1180         struct btrfs_space_info *space_info;
1181         u64 thresh;
1182         u64 start;
1183         u64 num_bytes;
1184         int ret;
1185
1186         flags = reduce_alloc_profile(extent_root, flags);
1187
1188         space_info = __find_space_info(extent_root->fs_info, flags);
1189         if (!space_info) {
1190                 ret = update_space_info(extent_root->fs_info, flags,
1191                                         0, 0, &space_info);
1192                 BUG_ON(ret);
1193         }
1194         BUG_ON(!space_info);
1195
1196         if (space_info->force_alloc) {
1197                 force = 1;
1198                 space_info->force_alloc = 0;
1199         }
1200         if (space_info->full)
1201                 goto out;
1202
1203         thresh = div_factor(space_info->total_bytes, 6);
1204         if (!force &&
1205            (space_info->bytes_used + space_info->bytes_pinned + alloc_bytes) <
1206             thresh)
1207                 goto out;
1208
1209         mutex_lock(&extent_root->fs_info->chunk_mutex);
1210         ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
1211         if (ret == -ENOSPC) {
1212 printk("space info full %Lu\n", flags);
1213                 space_info->full = 1;
1214                 goto out;
1215         }
1216         BUG_ON(ret);
1217
1218         ret = btrfs_make_block_group(trans, extent_root, 0, flags,
1219                      BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
1220         BUG_ON(ret);
1221         mutex_unlock(&extent_root->fs_info->chunk_mutex);
1222 out:
1223         return 0;
1224 }
1225
1226 static int update_block_group(struct btrfs_trans_handle *trans,
1227                               struct btrfs_root *root,
1228                               u64 bytenr, u64 num_bytes, int alloc,
1229                               int mark_free)
1230 {
1231         struct btrfs_block_group_cache *cache;
1232         struct btrfs_fs_info *info = root->fs_info;
1233         u64 total = num_bytes;
1234         u64 old_val;
1235         u64 byte_in_group;
1236         u64 start;
1237         u64 end;
1238
1239         while(total) {
1240                 cache = btrfs_lookup_block_group(info, bytenr);
1241                 if (!cache) {
1242                         return -1;
1243                 }
1244                 byte_in_group = bytenr - cache->key.objectid;
1245                 WARN_ON(byte_in_group > cache->key.offset);
1246                 start = cache->key.objectid;
1247                 end = start + cache->key.offset - 1;
1248                 set_extent_bits(&info->block_group_cache, start, end,
1249                                 BLOCK_GROUP_DIRTY, GFP_NOFS);
1250
1251                 old_val = btrfs_block_group_used(&cache->item);
1252                 num_bytes = min(total, cache->key.offset - byte_in_group);
1253                 if (alloc) {
1254                         old_val += num_bytes;
1255                         cache->space_info->bytes_used += num_bytes;
1256                 } else {
1257                         old_val -= num_bytes;
1258                         cache->space_info->bytes_used -= num_bytes;
1259                         if (mark_free) {
1260                                 set_extent_dirty(&info->free_space_cache,
1261                                                  bytenr, bytenr + num_bytes - 1,
1262                                                  GFP_NOFS);
1263                         }
1264                 }
1265                 btrfs_set_block_group_used(&cache->item, old_val);
1266                 total -= num_bytes;
1267                 bytenr += num_bytes;
1268         }
1269         return 0;
1270 }
1271
1272 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
1273 {
1274         u64 start;
1275         u64 end;
1276         int ret;
1277         ret = find_first_extent_bit(&root->fs_info->block_group_cache,
1278                                     search_start, &start, &end,
1279                                     BLOCK_GROUP_DATA | BLOCK_GROUP_METADATA |
1280                                     BLOCK_GROUP_SYSTEM);
1281         if (ret)
1282                 return 0;
1283         return start;
1284 }
1285
1286
1287 static int update_pinned_extents(struct btrfs_root *root,
1288                                 u64 bytenr, u64 num, int pin)
1289 {
1290         u64 len;
1291         struct btrfs_block_group_cache *cache;
1292         struct btrfs_fs_info *fs_info = root->fs_info;
1293
1294         if (pin) {
1295                 set_extent_dirty(&fs_info->pinned_extents,
1296                                 bytenr, bytenr + num - 1, GFP_NOFS);
1297         } else {
1298                 clear_extent_dirty(&fs_info->pinned_extents,
1299                                 bytenr, bytenr + num - 1, GFP_NOFS);
1300         }
1301         while (num > 0) {
1302                 cache = btrfs_lookup_block_group(fs_info, bytenr);
1303                 if (!cache) {
1304                         u64 first = first_logical_byte(root, bytenr);
1305                         WARN_ON(first < bytenr);
1306                         len = min(first - bytenr, num);
1307                 } else {
1308                         len = min(num, cache->key.offset -
1309                                   (bytenr - cache->key.objectid));
1310                 }
1311                 if (pin) {
1312                         if (cache) {
1313                                 cache->pinned += len;
1314                                 cache->space_info->bytes_pinned += len;
1315                         }
1316                         fs_info->total_pinned += len;
1317                 } else {
1318                         if (cache) {
1319                                 cache->pinned -= len;
1320                                 cache->space_info->bytes_pinned -= len;
1321                         }
1322                         fs_info->total_pinned -= len;
1323                 }
1324                 bytenr += len;
1325                 num -= len;
1326         }
1327         return 0;
1328 }
1329
1330 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
1331 {
1332         u64 last = 0;
1333         u64 start;
1334         u64 end;
1335         struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents;
1336         int ret;
1337
1338         while(1) {
1339                 ret = find_first_extent_bit(pinned_extents, last,
1340                                             &start, &end, EXTENT_DIRTY);
1341                 if (ret)
1342                         break;
1343                 set_extent_dirty(copy, start, end, GFP_NOFS);
1344                 last = end + 1;
1345         }
1346         return 0;
1347 }
1348
1349 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
1350                                struct btrfs_root *root,
1351                                struct extent_io_tree *unpin)
1352 {
1353         u64 start;
1354         u64 end;
1355         int ret;
1356         struct extent_io_tree *free_space_cache;
1357         free_space_cache = &root->fs_info->free_space_cache;
1358
1359         mutex_lock(&root->fs_info->alloc_mutex);
1360         while(1) {
1361                 ret = find_first_extent_bit(unpin, 0, &start, &end,
1362                                             EXTENT_DIRTY);
1363                 if (ret)
1364                         break;
1365                 update_pinned_extents(root, start, end + 1 - start, 0);
1366                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
1367                 set_extent_dirty(free_space_cache, start, end, GFP_NOFS);
1368         }
1369         mutex_unlock(&root->fs_info->alloc_mutex);
1370         return 0;
1371 }
1372
1373 static int finish_current_insert(struct btrfs_trans_handle *trans,
1374                                  struct btrfs_root *extent_root)
1375 {
1376         u64 start;
1377         u64 end;
1378         struct btrfs_fs_info *info = extent_root->fs_info;
1379         struct extent_buffer *eb;
1380         struct btrfs_path *path;
1381         struct btrfs_key ins;
1382         struct btrfs_disk_key first;
1383         struct btrfs_extent_item extent_item;
1384         int ret;
1385         int level;
1386         int err = 0;
1387
1388         btrfs_set_stack_extent_refs(&extent_item, 1);
1389         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
1390         path = btrfs_alloc_path();
1391
1392         while(1) {
1393                 ret = find_first_extent_bit(&info->extent_ins, 0, &start,
1394                                             &end, EXTENT_LOCKED);
1395                 if (ret)
1396                         break;
1397
1398                 ins.objectid = start;
1399                 ins.offset = end + 1 - start;
1400                 err = btrfs_insert_item(trans, extent_root, &ins,
1401                                         &extent_item, sizeof(extent_item));
1402                 clear_extent_bits(&info->extent_ins, start, end, EXTENT_LOCKED,
1403                                   GFP_NOFS);
1404                 eb = read_tree_block(extent_root, ins.objectid, ins.offset,
1405                                      trans->transid);
1406                 btrfs_tree_lock(eb);
1407                 level = btrfs_header_level(eb);
1408                 if (level == 0) {
1409                         btrfs_item_key(eb, &first, 0);
1410                 } else {
1411                         btrfs_node_key(eb, &first, 0);
1412                 }
1413                 btrfs_tree_unlock(eb);
1414                 free_extent_buffer(eb);
1415                 /*
1416                  * the first key is just a hint, so the race we've created
1417                  * against reading it is fine
1418                  */
1419                 err = btrfs_insert_extent_backref(trans, extent_root, path,
1420                                           start, extent_root->root_key.objectid,
1421                                           0, level,
1422                                           btrfs_disk_key_objectid(&first));
1423                 BUG_ON(err);
1424         }
1425         btrfs_free_path(path);
1426         return 0;
1427 }
1428
1429 static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
1430                           int pending)
1431 {
1432         int err = 0;
1433
1434         if (!pending) {
1435 #if 0
1436                 struct extent_buffer *buf;
1437                 buf = btrfs_find_tree_block(root, bytenr, num_bytes);
1438                 if (buf) {
1439                         if (!btrfs_try_tree_lock(buf) &&
1440                             btrfs_buffer_uptodate(buf, 0)) {
1441                                 u64 transid =
1442                                     root->fs_info->running_transaction->transid;
1443                                 u64 header_transid =
1444                                         btrfs_header_generation(buf);
1445                                 if (header_transid == transid &&
1446                                     !btrfs_header_flag(buf,
1447                                                BTRFS_HEADER_FLAG_WRITTEN)) {
1448                                         clean_tree_block(NULL, root, buf);
1449                                         btrfs_tree_unlock(buf);
1450                                         free_extent_buffer(buf);
1451                                         return 1;
1452                                 }
1453                                 btrfs_tree_unlock(buf);
1454                         }
1455                         free_extent_buffer(buf);
1456                 }
1457 #endif
1458                 update_pinned_extents(root, bytenr, num_bytes, 1);
1459         } else {
1460                 set_extent_bits(&root->fs_info->pending_del,
1461                                 bytenr, bytenr + num_bytes - 1,
1462                                 EXTENT_LOCKED, GFP_NOFS);
1463         }
1464         BUG_ON(err < 0);
1465         return 0;
1466 }
1467
1468 /*
1469  * remove an extent from the root, returns 0 on success
1470  */
1471 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
1472                          *root, u64 bytenr, u64 num_bytes,
1473                          u64 root_objectid, u64 ref_generation,
1474                          u64 owner_objectid, u64 owner_offset, int pin,
1475                          int mark_free)
1476 {
1477         struct btrfs_path *path;
1478         struct btrfs_key key;
1479         struct btrfs_fs_info *info = root->fs_info;
1480         struct btrfs_root *extent_root = info->extent_root;
1481         struct extent_buffer *leaf;
1482         int ret;
1483         int extent_slot = 0;
1484         int found_extent = 0;
1485         int num_to_del = 1;
1486         struct btrfs_extent_item *ei;
1487         u32 refs;
1488
1489         key.objectid = bytenr;
1490         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
1491         key.offset = num_bytes;
1492         path = btrfs_alloc_path();
1493         if (!path)
1494                 return -ENOMEM;
1495
1496         path->reada = 1;
1497         ret = lookup_extent_backref(trans, extent_root, path,
1498                                     bytenr, root_objectid,
1499                                     ref_generation,
1500                                     owner_objectid, owner_offset, 1);
1501         if (ret == 0) {
1502                 struct btrfs_key found_key;
1503                 extent_slot = path->slots[0];
1504                 while(extent_slot > 0) {
1505                         extent_slot--;
1506                         btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1507                                               extent_slot);
1508                         if (found_key.objectid != bytenr)
1509                                 break;
1510                         if (found_key.type == BTRFS_EXTENT_ITEM_KEY &&
1511                             found_key.offset == num_bytes) {
1512                                 found_extent = 1;
1513                                 break;
1514                         }
1515                         if (path->slots[0] - extent_slot > 5)
1516                                 break;
1517                 }
1518                 if (!found_extent)
1519                         ret = btrfs_del_item(trans, extent_root, path);
1520         } else {
1521                 btrfs_print_leaf(extent_root, path->nodes[0]);
1522                 WARN_ON(1);
1523                 printk("Unable to find ref byte nr %Lu root %Lu "
1524                        " gen %Lu owner %Lu offset %Lu\n", bytenr,
1525                        root_objectid, ref_generation, owner_objectid,
1526                        owner_offset);
1527         }
1528         if (!found_extent) {
1529                 btrfs_release_path(extent_root, path);
1530                 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
1531                 if (ret < 0)
1532                         return ret;
1533                 BUG_ON(ret);
1534                 extent_slot = path->slots[0];
1535         }
1536
1537         leaf = path->nodes[0];
1538         ei = btrfs_item_ptr(leaf, extent_slot,
1539                             struct btrfs_extent_item);
1540         refs = btrfs_extent_refs(leaf, ei);
1541         BUG_ON(refs == 0);
1542         refs -= 1;
1543         btrfs_set_extent_refs(leaf, ei, refs);
1544
1545         btrfs_mark_buffer_dirty(leaf);
1546
1547         if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) {
1548                 /* if the back ref and the extent are next to each other
1549                  * they get deleted below in one shot
1550                  */
1551                 path->slots[0] = extent_slot;
1552                 num_to_del = 2;
1553         } else if (found_extent) {
1554                 /* otherwise delete the extent back ref */
1555                 ret = btrfs_del_item(trans, extent_root, path);
1556                 BUG_ON(ret);
1557                 /* if refs are 0, we need to setup the path for deletion */
1558                 if (refs == 0) {
1559                         btrfs_release_path(extent_root, path);
1560                         ret = btrfs_search_slot(trans, extent_root, &key, path,
1561                                                 -1, 1);
1562                         if (ret < 0)
1563                                 return ret;
1564                         BUG_ON(ret);
1565                 }
1566         }
1567
1568         if (refs == 0) {
1569                 u64 super_used;
1570                 u64 root_used;
1571
1572                 if (pin) {
1573                         ret = pin_down_bytes(root, bytenr, num_bytes, 0);
1574                         if (ret > 0)
1575                                 mark_free = 1;
1576                         BUG_ON(ret < 0);
1577                 }
1578
1579                 /* block accounting for super block */
1580                 spin_lock_irq(&info->delalloc_lock);
1581                 super_used = btrfs_super_bytes_used(&info->super_copy);
1582                 btrfs_set_super_bytes_used(&info->super_copy,
1583                                            super_used - num_bytes);
1584                 spin_unlock_irq(&info->delalloc_lock);
1585
1586                 /* block accounting for root item */
1587                 root_used = btrfs_root_used(&root->root_item);
1588                 btrfs_set_root_used(&root->root_item,
1589                                            root_used - num_bytes);
1590                 ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
1591                                       num_to_del);
1592                 if (ret) {
1593                         return ret;
1594                 }
1595                 ret = update_block_group(trans, root, bytenr, num_bytes, 0,
1596                                          mark_free);
1597                 BUG_ON(ret);
1598         }
1599         btrfs_free_path(path);
1600         finish_current_insert(trans, extent_root);
1601         return ret;
1602 }
1603
1604 /*
1605  * find all the blocks marked as pending in the radix tree and remove
1606  * them from the extent map
1607  */
1608 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
1609                                btrfs_root *extent_root)
1610 {
1611         int ret;
1612         int err = 0;
1613         u64 start;
1614         u64 end;
1615         struct extent_io_tree *pending_del;
1616         struct extent_io_tree *pinned_extents;
1617
1618         pending_del = &extent_root->fs_info->pending_del;
1619         pinned_extents = &extent_root->fs_info->pinned_extents;
1620
1621         while(1) {
1622                 ret = find_first_extent_bit(pending_del, 0, &start, &end,
1623                                             EXTENT_LOCKED);
1624                 if (ret)
1625                         break;
1626                 update_pinned_extents(extent_root, start, end + 1 - start, 1);
1627                 clear_extent_bits(pending_del, start, end, EXTENT_LOCKED,
1628                                   GFP_NOFS);
1629                 ret = __free_extent(trans, extent_root,
1630                                      start, end + 1 - start,
1631                                      extent_root->root_key.objectid,
1632                                      0, 0, 0, 0, 0);
1633                 if (ret)
1634                         err = ret;
1635         }
1636         return err;
1637 }
1638
1639 /*
1640  * remove an extent from the root, returns 0 on success
1641  */
1642 static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
1643                                struct btrfs_root *root, u64 bytenr,
1644                                u64 num_bytes, u64 root_objectid,
1645                                u64 ref_generation, u64 owner_objectid,
1646                                u64 owner_offset, int pin)
1647 {
1648         struct btrfs_root *extent_root = root->fs_info->extent_root;
1649         int pending_ret;
1650         int ret;
1651
1652         WARN_ON(num_bytes < root->sectorsize);
1653         if (!root->ref_cows)
1654                 ref_generation = 0;
1655
1656         if (root == extent_root) {
1657                 pin_down_bytes(root, bytenr, num_bytes, 1);
1658                 return 0;
1659         }
1660         ret = __free_extent(trans, root, bytenr, num_bytes, root_objectid,
1661                             ref_generation, owner_objectid, owner_offset,
1662                             pin, pin == 0);
1663         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
1664         return ret ? ret : pending_ret;
1665 }
1666
1667 int btrfs_free_extent(struct btrfs_trans_handle *trans,
1668                       struct btrfs_root *root, u64 bytenr,
1669                       u64 num_bytes, u64 root_objectid,
1670                       u64 ref_generation, u64 owner_objectid,
1671                       u64 owner_offset, int pin)
1672 {
1673         int ret;
1674
1675         maybe_lock_mutex(root);
1676         ret = __btrfs_free_extent(trans, root, bytenr, num_bytes,
1677                                   root_objectid, ref_generation,
1678                                   owner_objectid, owner_offset, pin);
1679         maybe_unlock_mutex(root);
1680         return ret;
1681 }
1682
1683 static u64 stripe_align(struct btrfs_root *root, u64 val)
1684 {
1685         u64 mask = ((u64)root->stripesize - 1);
1686         u64 ret = (val + mask) & ~mask;
1687         return ret;
1688 }
1689
1690 /*
1691  * walks the btree of allocated extents and find a hole of a given size.
1692  * The key ins is changed to record the hole:
1693  * ins->objectid == block start
1694  * ins->flags = BTRFS_EXTENT_ITEM_KEY
1695  * ins->offset == number of blocks
1696  * Any available blocks before search_start are skipped.
1697  */
1698 static int noinline find_free_extent(struct btrfs_trans_handle *trans,
1699                                      struct btrfs_root *orig_root,
1700                                      u64 num_bytes, u64 empty_size,
1701                                      u64 search_start, u64 search_end,
1702                                      u64 hint_byte, struct btrfs_key *ins,
1703                                      u64 exclude_start, u64 exclude_nr,
1704                                      int data)
1705 {
1706         int ret;
1707         u64 orig_search_start;
1708         struct btrfs_root * root = orig_root->fs_info->extent_root;
1709         struct btrfs_fs_info *info = root->fs_info;
1710         u64 total_needed = num_bytes;
1711         u64 *last_ptr = NULL;
1712         struct btrfs_block_group_cache *block_group;
1713         int full_scan = 0;
1714         int wrapped = 0;
1715         int chunk_alloc_done = 0;
1716         int empty_cluster = 2 * 1024 * 1024;
1717         int allowed_chunk_alloc = 0;
1718
1719         WARN_ON(num_bytes < root->sectorsize);
1720         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
1721
1722         if (orig_root->ref_cows || empty_size)
1723                 allowed_chunk_alloc = 1;
1724
1725         if (data & BTRFS_BLOCK_GROUP_METADATA) {
1726                 last_ptr = &root->fs_info->last_alloc;
1727                 empty_cluster = 256 * 1024;
1728         }
1729
1730         if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
1731                 last_ptr = &root->fs_info->last_data_alloc;
1732         }
1733
1734         if (last_ptr) {
1735                 if (*last_ptr)
1736                         hint_byte = *last_ptr;
1737                 else {
1738                         empty_size += empty_cluster;
1739                 }
1740         }
1741
1742         search_start = max(search_start, first_logical_byte(root, 0));
1743         orig_search_start = search_start;
1744
1745         if (search_end == (u64)-1)
1746                 search_end = btrfs_super_total_bytes(&info->super_copy);
1747
1748         if (hint_byte) {
1749                 block_group = btrfs_lookup_first_block_group(info, hint_byte);
1750                 if (!block_group)
1751                         hint_byte = search_start;
1752                 block_group = __btrfs_find_block_group(root, block_group,
1753                                                      hint_byte, data, 1);
1754                 if (last_ptr && *last_ptr == 0 && block_group)
1755                         hint_byte = block_group->key.objectid;
1756         } else {
1757                 block_group = __btrfs_find_block_group(root,
1758                                                      trans->block_group,
1759                                                      search_start, data, 1);
1760         }
1761         search_start = max(search_start, hint_byte);
1762
1763         total_needed += empty_size;
1764
1765 check_failed:
1766         if (!block_group) {
1767                 block_group = btrfs_lookup_first_block_group(info,
1768                                                              search_start);
1769                 if (!block_group)
1770                         block_group = btrfs_lookup_first_block_group(info,
1771                                                        orig_search_start);
1772         }
1773         if (full_scan && !chunk_alloc_done) {
1774                 if (allowed_chunk_alloc) {
1775                         do_chunk_alloc(trans, root,
1776                                      num_bytes + 2 * 1024 * 1024, data, 1);
1777                         allowed_chunk_alloc = 0;
1778                 } else if (block_group && block_group_bits(block_group, data)) {
1779                         block_group->space_info->force_alloc = 1;
1780                 }
1781                 chunk_alloc_done = 1;
1782         }
1783         ret = find_search_start(root, &block_group, &search_start,
1784                                 total_needed, data);
1785         if (ret == -ENOSPC && last_ptr && *last_ptr) {
1786                 *last_ptr = 0;
1787                 block_group = btrfs_lookup_first_block_group(info,
1788                                                              orig_search_start);
1789                 search_start = orig_search_start;
1790                 ret = find_search_start(root, &block_group, &search_start,
1791                                         total_needed, data);
1792         }
1793         if (ret == -ENOSPC)
1794                 goto enospc;
1795         if (ret)
1796                 goto error;
1797
1798         if (last_ptr && *last_ptr && search_start != *last_ptr) {
1799                 *last_ptr = 0;
1800                 if (!empty_size) {
1801                         empty_size += empty_cluster;
1802                         total_needed += empty_size;
1803                 }
1804                 block_group = btrfs_lookup_first_block_group(info,
1805                                                        orig_search_start);
1806                 search_start = orig_search_start;
1807                 ret = find_search_start(root, &block_group,
1808                                         &search_start, total_needed, data);
1809                 if (ret == -ENOSPC)
1810                         goto enospc;
1811                 if (ret)
1812                         goto error;
1813         }
1814
1815         search_start = stripe_align(root, search_start);
1816         ins->objectid = search_start;
1817         ins->offset = num_bytes;
1818
1819         if (ins->objectid + num_bytes >= search_end)
1820                 goto enospc;
1821
1822         if (ins->objectid + num_bytes >
1823             block_group->key.objectid + block_group->key.offset) {
1824                 search_start = block_group->key.objectid +
1825                         block_group->key.offset;
1826                 goto new_group;
1827         }
1828
1829         if (test_range_bit(&info->extent_ins, ins->objectid,
1830                            ins->objectid + num_bytes -1, EXTENT_LOCKED, 0)) {
1831                 search_start = ins->objectid + num_bytes;
1832                 goto new_group;
1833         }
1834
1835         if (test_range_bit(&info->pinned_extents, ins->objectid,
1836                            ins->objectid + num_bytes -1, EXTENT_DIRTY, 0)) {
1837                 search_start = ins->objectid + num_bytes;
1838                 goto new_group;
1839         }
1840
1841         if (exclude_nr > 0 && (ins->objectid + num_bytes > exclude_start &&
1842             ins->objectid < exclude_start + exclude_nr)) {
1843                 search_start = exclude_start + exclude_nr;
1844                 goto new_group;
1845         }
1846
1847         if (!(data & BTRFS_BLOCK_GROUP_DATA)) {
1848                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1849                 if (block_group)
1850                         trans->block_group = block_group;
1851         }
1852         ins->offset = num_bytes;
1853         if (last_ptr) {
1854                 *last_ptr = ins->objectid + ins->offset;
1855                 if (*last_ptr ==
1856                     btrfs_super_total_bytes(&root->fs_info->super_copy)) {
1857                         *last_ptr = 0;
1858                 }
1859         }
1860         return 0;
1861
1862 new_group:
1863         if (search_start + num_bytes >= search_end) {
1864 enospc:
1865                 search_start = orig_search_start;
1866                 if (full_scan) {
1867                         ret = -ENOSPC;
1868                         goto error;
1869                 }
1870                 if (wrapped) {
1871                         if (!full_scan)
1872                                 total_needed -= empty_size;
1873                         full_scan = 1;
1874                 } else
1875                         wrapped = 1;
1876         }
1877         block_group = btrfs_lookup_first_block_group(info, search_start);
1878         cond_resched();
1879         block_group = __btrfs_find_block_group(root, block_group,
1880                                              search_start, data, 0);
1881         goto check_failed;
1882
1883 error:
1884         return ret;
1885 }
1886
1887 /*
1888  * finds a free extent and does all the dirty work required for allocation
1889  * returns the key for the extent through ins, and a tree buffer for
1890  * the first block of the extent through buf.
1891  *
1892  * returns 0 if everything worked, non-zero otherwise.
1893  */
1894 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1895                        struct btrfs_root *root,
1896                        u64 num_bytes, u64 min_alloc_size,
1897                        u64 root_objectid, u64 ref_generation,
1898                        u64 owner, u64 owner_offset,
1899                        u64 empty_size, u64 hint_byte,
1900                        u64 search_end, struct btrfs_key *ins, u64 data)
1901 {
1902         int ret;
1903         int pending_ret;
1904         u64 super_used;
1905         u64 root_used;
1906         u64 search_start = 0;
1907         u64 alloc_profile;
1908         u32 sizes[2];
1909         struct btrfs_fs_info *info = root->fs_info;
1910         struct btrfs_root *extent_root = info->extent_root;
1911         struct btrfs_extent_item *extent_item;
1912         struct btrfs_extent_ref *ref;
1913         struct btrfs_path *path;
1914         struct btrfs_key keys[2];
1915
1916         maybe_lock_mutex(root);
1917
1918         if (data) {
1919                 alloc_profile = info->avail_data_alloc_bits &
1920                                 info->data_alloc_profile;
1921                 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile;
1922         } else if (root == root->fs_info->chunk_root) {
1923                 alloc_profile = info->avail_system_alloc_bits &
1924                                 info->system_alloc_profile;
1925                 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile;
1926         } else {
1927                 alloc_profile = info->avail_metadata_alloc_bits &
1928                                 info->metadata_alloc_profile;
1929                 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
1930         }
1931 again:
1932         data = reduce_alloc_profile(root, data);
1933         /*
1934          * the only place that sets empty_size is btrfs_realloc_node, which
1935          * is not called recursively on allocations
1936          */
1937         if (empty_size || root->ref_cows) {
1938                 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
1939                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1940                                      2 * 1024 * 1024,
1941                                      BTRFS_BLOCK_GROUP_METADATA |
1942                                      (info->metadata_alloc_profile &
1943                                       info->avail_metadata_alloc_bits), 0);
1944                         BUG_ON(ret);
1945                 }
1946                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
1947                                      num_bytes + 2 * 1024 * 1024, data, 0);
1948                 BUG_ON(ret);
1949         }
1950
1951         WARN_ON(num_bytes < root->sectorsize);
1952         ret = find_free_extent(trans, root, num_bytes, empty_size,
1953                                search_start, search_end, hint_byte, ins,
1954                                trans->alloc_exclude_start,
1955                                trans->alloc_exclude_nr, data);
1956
1957         if (ret == -ENOSPC && num_bytes > min_alloc_size) {
1958                 num_bytes = num_bytes >> 1;
1959                 num_bytes = max(num_bytes, min_alloc_size);
1960                 do_chunk_alloc(trans, root->fs_info->extent_root,
1961                                num_bytes, data, 1);
1962                 goto again;
1963         }
1964         if (ret) {
1965                 printk("allocation failed flags %Lu\n", data);
1966         }
1967         if (ret) {
1968                 BUG();
1969                 goto out;
1970         }
1971
1972         /* block accounting for super block */
1973         spin_lock_irq(&info->delalloc_lock);
1974         super_used = btrfs_super_bytes_used(&info->super_copy);
1975         btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes);
1976         spin_unlock_irq(&info->delalloc_lock);
1977
1978         /* block accounting for root item */
1979         root_used = btrfs_root_used(&root->root_item);
1980         btrfs_set_root_used(&root->root_item, root_used + num_bytes);
1981
1982         clear_extent_dirty(&root->fs_info->free_space_cache,
1983                            ins->objectid, ins->objectid + ins->offset - 1,
1984                            GFP_NOFS);
1985
1986         if (root == extent_root) {
1987                 set_extent_bits(&root->fs_info->extent_ins, ins->objectid,
1988                                 ins->objectid + ins->offset - 1,
1989                                 EXTENT_LOCKED, GFP_NOFS);
1990                 goto update_block;
1991         }
1992
1993         WARN_ON(trans->alloc_exclude_nr);
1994         trans->alloc_exclude_start = ins->objectid;
1995         trans->alloc_exclude_nr = ins->offset;
1996
1997         memcpy(&keys[0], ins, sizeof(*ins));
1998         keys[1].offset = hash_extent_ref(root_objectid, ref_generation,
1999                                          owner, owner_offset);
2000         keys[1].objectid = ins->objectid;
2001         keys[1].type = BTRFS_EXTENT_REF_KEY;
2002         sizes[0] = sizeof(*extent_item);
2003         sizes[1] = sizeof(*ref);
2004
2005         path = btrfs_alloc_path();
2006         BUG_ON(!path);
2007
2008         ret = btrfs_insert_empty_items(trans, extent_root, path, keys,
2009                                        sizes, 2);
2010
2011         BUG_ON(ret);
2012         extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2013                                      struct btrfs_extent_item);
2014         btrfs_set_extent_refs(path->nodes[0], extent_item, 1);
2015         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
2016                              struct btrfs_extent_ref);
2017
2018         btrfs_set_ref_root(path->nodes[0], ref, root_objectid);
2019         btrfs_set_ref_generation(path->nodes[0], ref, ref_generation);
2020         btrfs_set_ref_objectid(path->nodes[0], ref, owner);
2021         btrfs_set_ref_offset(path->nodes[0], ref, owner_offset);
2022
2023         btrfs_mark_buffer_dirty(path->nodes[0]);
2024
2025         trans->alloc_exclude_start = 0;
2026         trans->alloc_exclude_nr = 0;
2027         btrfs_free_path(path);
2028         finish_current_insert(trans, extent_root);
2029         pending_ret = del_pending_extents(trans, extent_root);
2030
2031         if (ret)
2032                 goto out;
2033         if (pending_ret) {
2034                 ret = pending_ret;
2035                 goto out;
2036         }
2037
2038 update_block:
2039         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0);
2040         if (ret) {
2041                 printk("update block group failed for %Lu %Lu\n",
2042                        ins->objectid, ins->offset);
2043                 BUG();
2044         }
2045 out:
2046         maybe_unlock_mutex(root);
2047         return ret;
2048 }
2049 /*
2050  * helper function to allocate a block for a given tree
2051  * returns the tree buffer or NULL.
2052  */
2053 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
2054                                              struct btrfs_root *root,
2055                                              u32 blocksize,
2056                                              u64 root_objectid,
2057                                              u64 ref_generation,
2058                                              u64 first_objectid,
2059                                              int level,
2060                                              u64 hint,
2061                                              u64 empty_size)
2062 {
2063         struct btrfs_key ins;
2064         int ret;
2065         struct extent_buffer *buf;
2066
2067         ret = btrfs_alloc_extent(trans, root, blocksize, blocksize,
2068                                  root_objectid, ref_generation,
2069                                  level, first_objectid, empty_size, hint,
2070                                  (u64)-1, &ins, 0);
2071         if (ret) {
2072                 BUG_ON(ret > 0);
2073                 return ERR_PTR(ret);
2074         }
2075         buf = btrfs_find_create_tree_block(root, ins.objectid, blocksize);
2076         if (!buf) {
2077                 btrfs_free_extent(trans, root, ins.objectid, blocksize,
2078                                   root->root_key.objectid, ref_generation,
2079                                   0, 0, 0);
2080                 return ERR_PTR(-ENOMEM);
2081         }
2082         btrfs_set_header_generation(buf, trans->transid);
2083         btrfs_tree_lock(buf);
2084         clean_tree_block(trans, root, buf);
2085         btrfs_set_buffer_uptodate(buf);
2086
2087         if (PageDirty(buf->first_page)) {
2088                 printk("page %lu dirty\n", buf->first_page->index);
2089                 WARN_ON(1);
2090         }
2091
2092         set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
2093                          buf->start + buf->len - 1, GFP_NOFS);
2094         if (!btrfs_test_opt(root, SSD))
2095                 btrfs_set_buffer_defrag(buf);
2096         trans->blocks_used++;
2097         return buf;
2098 }
2099
2100 static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
2101                                   struct btrfs_root *root,
2102                                   struct extent_buffer *leaf)
2103 {
2104         u64 leaf_owner;
2105         u64 leaf_generation;
2106         struct btrfs_key key;
2107         struct btrfs_file_extent_item *fi;
2108         int i;
2109         int nritems;
2110         int ret;
2111
2112         BUG_ON(!btrfs_is_leaf(leaf));
2113         nritems = btrfs_header_nritems(leaf);
2114         leaf_owner = btrfs_header_owner(leaf);
2115         leaf_generation = btrfs_header_generation(leaf);
2116
2117         for (i = 0; i < nritems; i++) {
2118                 u64 disk_bytenr;
2119
2120                 btrfs_item_key_to_cpu(leaf, &key, i);
2121                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY)
2122                         continue;
2123                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
2124                 if (btrfs_file_extent_type(leaf, fi) ==
2125                     BTRFS_FILE_EXTENT_INLINE)
2126                         continue;
2127                 /*
2128                  * FIXME make sure to insert a trans record that
2129                  * repeats the snapshot del on crash
2130                  */
2131                 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
2132                 if (disk_bytenr == 0)
2133                         continue;
2134                 ret = __btrfs_free_extent(trans, root, disk_bytenr,
2135                                 btrfs_file_extent_disk_num_bytes(leaf, fi),
2136                                 leaf_owner, leaf_generation,
2137                                 key.objectid, key.offset, 0);
2138                 BUG_ON(ret);
2139         }
2140         return 0;
2141 }
2142
2143 static void noinline reada_walk_down(struct btrfs_root *root,
2144                                      struct extent_buffer *node,
2145                                      int slot)
2146 {
2147         u64 bytenr;
2148         u64 last = 0;
2149         u32 nritems;
2150         u32 refs;
2151         u32 blocksize;
2152         int ret;
2153         int i;
2154         int level;
2155         int skipped = 0;
2156
2157         nritems = btrfs_header_nritems(node);
2158         level = btrfs_header_level(node);
2159         if (level)
2160                 return;
2161
2162         for (i = slot; i < nritems && skipped < 32; i++) {
2163                 bytenr = btrfs_node_blockptr(node, i);
2164                 if (last && ((bytenr > last && bytenr - last > 32 * 1024) ||
2165                              (last > bytenr && last - bytenr > 32 * 1024))) {
2166                         skipped++;
2167                         continue;
2168                 }
2169                 blocksize = btrfs_level_size(root, level - 1);
2170                 if (i != slot) {
2171                         ret = lookup_extent_ref(NULL, root, bytenr,
2172                                                 blocksize, &refs);
2173                         BUG_ON(ret);
2174                         if (refs != 1) {
2175                                 skipped++;
2176                                 continue;
2177                         }
2178                 }
2179                 mutex_unlock(&root->fs_info->alloc_mutex);
2180                 ret = readahead_tree_block(root, bytenr, blocksize,
2181                                            btrfs_node_ptr_generation(node, i));
2182                 last = bytenr + blocksize;
2183                 cond_resched();
2184                 mutex_lock(&root->fs_info->alloc_mutex);
2185                 if (ret)
2186                         break;
2187         }
2188 }
2189
2190 /*
2191  * helper function for drop_snapshot, this walks down the tree dropping ref
2192  * counts as it goes.
2193  */
2194 static int noinline walk_down_tree(struct btrfs_trans_handle *trans,
2195                                    struct btrfs_root *root,
2196                                    struct btrfs_path *path, int *level)
2197 {
2198         u64 root_owner;
2199         u64 root_gen;
2200         u64 bytenr;
2201         u64 ptr_gen;
2202         struct extent_buffer *next;
2203         struct extent_buffer *cur;
2204         struct extent_buffer *parent;
2205         u32 blocksize;
2206         int ret;
2207         u32 refs;
2208
2209         mutex_lock(&root->fs_info->alloc_mutex);
2210
2211         WARN_ON(*level < 0);
2212         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2213         ret = lookup_extent_ref(trans, root,
2214                                 path->nodes[*level]->start,
2215                                 path->nodes[*level]->len, &refs);
2216         BUG_ON(ret);
2217         if (refs > 1)
2218                 goto out;
2219
2220         /*
2221          * walk down to the last node level and free all the leaves
2222          */
2223         while(*level >= 0) {
2224                 WARN_ON(*level < 0);
2225                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
2226                 cur = path->nodes[*level];
2227
2228                 if (btrfs_header_level(cur) != *level)
2229                         WARN_ON(1);
2230
2231                 if (path->slots[*level] >=
2232                     btrfs_header_nritems(cur))
2233                         break;
2234                 if (*level == 0) {
2235                         ret = drop_leaf_ref(trans, root, cur);
2236                         BUG_ON(ret);
2237                         break;
2238                 }
2239                 bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
2240                 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
2241                 blocksize = btrfs_level_size(root, *level - 1);
2242
2243                 ret = lookup_extent_ref(trans, root, bytenr, blocksize, &refs);
2244                 BUG_ON(ret);
2245                 if (refs != 1) {
2246                         parent = path->nodes[*level];
2247                         root_owner = btrfs_header_owner(parent);
2248                         root_gen = btrfs_header_generation(parent);
2249                         path->slots[*level]++;
2250                         ret = __btrfs_free_extent(trans, root, bytenr,
2251                                                 blocksize, root_owner,
2252                                                 root_gen, 0, 0, 1);
2253                         BUG_ON(ret);
2254                         continue;
2255                 }
2256                 next = btrfs_find_tree_block(root, bytenr, blocksize);
2257                 if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
2258                         free_extent_buffer(next);
2259                         reada_walk_down(root, cur, path->slots[*level]);
2260
2261                         mutex_unlock(&root->fs_info->alloc_mutex);
2262                         next = read_tree_block(root, bytenr, blocksize,
2263                                                ptr_gen);
2264                         mutex_lock(&root->fs_info->alloc_mutex);
2265
2266                         /* we've dropped the lock, double check */
2267                         ret = lookup_extent_ref(trans, root, bytenr,
2268                                                 blocksize, &refs);
2269                         BUG_ON(ret);
2270                         if (refs != 1) {
2271                                 parent = path->nodes[*level];
2272                                 root_owner = btrfs_header_owner(parent);
2273                                 root_gen = btrfs_header_generation(parent);
2274
2275                                 path->slots[*level]++;
2276                                 free_extent_buffer(next);
2277                                 ret = __btrfs_free_extent(trans, root, bytenr,
2278                                                         blocksize,
2279                                                         root_owner,
2280                                                         root_gen, 0, 0, 1);
2281                                 BUG_ON(ret);
2282                                 continue;
2283                         }
2284                 }
2285                 WARN_ON(*level <= 0);
2286                 if (path->nodes[*level-1])
2287                         free_extent_buffer(path->nodes[*level-1]);
2288                 path->nodes[*level-1] = next;
2289                 *level = btrfs_header_level(next);
2290                 path->slots[*level] = 0;
2291         }
2292 out:
2293         WARN_ON(*level < 0);
2294         WARN_ON(*level >= BTRFS_MAX_LEVEL);
2295
2296         if (path->nodes[*level] == root->node) {
2297                 root_owner = root->root_key.objectid;
2298                 parent = path->nodes[*level];
2299         } else {
2300                 parent = path->nodes[*level + 1];
2301                 root_owner = btrfs_header_owner(parent);
2302         }
2303
2304         root_gen = btrfs_header_generation(parent);
2305         ret = __btrfs_free_extent(trans, root, path->nodes[*level]->start,
2306                                 path->nodes[*level]->len,
2307                                 root_owner, root_gen, 0, 0, 1);
2308         free_extent_buffer(path->nodes[*level]);
2309         path->nodes[*level] = NULL;
2310         *level += 1;
2311         BUG_ON(ret);
2312         mutex_unlock(&root->fs_info->alloc_mutex);
2313         return 0;
2314 }
2315
2316 /*
2317  * helper for dropping snapshots.  This walks back up the tree in the path
2318  * to find the first node higher up where we haven't yet gone through
2319  * all the slots
2320  */
2321 static int noinline walk_up_tree(struct btrfs_trans_handle *trans,
2322                                  struct btrfs_root *root,
2323                                  struct btrfs_path *path, int *level)
2324 {
2325         u64 root_owner;
2326         u64 root_gen;
2327         struct btrfs_root_item *root_item = &root->root_item;
2328         int i;
2329         int slot;
2330         int ret;
2331
2332         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
2333                 slot = path->slots[i];
2334                 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) {
2335                         struct extent_buffer *node;
2336                         struct btrfs_disk_key disk_key;
2337                         node = path->nodes[i];
2338                         path->slots[i]++;
2339                         *level = i;
2340                         WARN_ON(*level == 0);
2341                         btrfs_node_key(node, &disk_key, path->slots[i]);
2342                         memcpy(&root_item->drop_progress,
2343                                &disk_key, sizeof(disk_key));
2344                         root_item->drop_level = i;
2345                         return 0;
2346                 } else {
2347                         if (path->nodes[*level] == root->node) {
2348                                 root_owner = root->root_key.objectid;
2349                                 root_gen =
2350                                    btrfs_header_generation(path->nodes[*level]);
2351                         } else {
2352                                 struct extent_buffer *node;
2353                                 node = path->nodes[*level + 1];
2354                                 root_owner = btrfs_header_owner(node);
2355                                 root_gen = btrfs_header_generation(node);
2356                         }
2357                         ret = btrfs_free_extent(trans, root,
2358                                                 path->nodes[*level]->start,
2359                                                 path->nodes[*level]->len,
2360                                                 root_owner, root_gen, 0, 0, 1);
2361                         BUG_ON(ret);
2362                         free_extent_buffer(path->nodes[*level]);
2363                         path->nodes[*level] = NULL;
2364                         *level = i + 1;
2365                 }
2366         }
2367         return 1;
2368 }
2369
2370 /*
2371  * drop the reference count on the tree rooted at 'snap'.  This traverses
2372  * the tree freeing any blocks that have a ref count of zero after being
2373  * decremented.
2374  */
2375 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
2376                         *root)
2377 {
2378         int ret = 0;
2379         int wret;
2380         int level;
2381         struct btrfs_path *path;
2382         int i;
2383         int orig_level;
2384         struct btrfs_root_item *root_item = &root->root_item;
2385
2386         WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex));
2387         path = btrfs_alloc_path();
2388         BUG_ON(!path);
2389
2390         level = btrfs_header_level(root->node);
2391         orig_level = level;
2392         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
2393                 path->nodes[level] = root->node;
2394                 extent_buffer_get(root->node);
2395                 path->slots[level] = 0;
2396         } else {
2397                 struct btrfs_key key;
2398                 struct btrfs_disk_key found_key;
2399                 struct extent_buffer *node;
2400
2401                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
2402                 level = root_item->drop_level;
2403                 path->lowest_level = level;
2404                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2405                 if (wret < 0) {
2406                         ret = wret;
2407                         goto out;
2408                 }
2409                 node = path->nodes[level];
2410                 btrfs_node_key(node, &found_key, path->slots[level]);
2411                 WARN_ON(memcmp(&found_key, &root_item->drop_progress,
2412                                sizeof(found_key)));
2413                 for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
2414                         if (path->nodes[i] && path->locks[i]) {
2415                                 path->locks[i] = 0;
2416                                 btrfs_tree_unlock(path->nodes[i]);
2417                         }
2418                 }
2419         }
2420         while(1) {
2421                 wret = walk_down_tree(trans, root, path, &level);
2422                 if (wret > 0)
2423                         break;
2424                 if (wret < 0)
2425                         ret = wret;
2426
2427                 wret = walk_up_tree(trans, root, path, &level);
2428                 if (wret > 0)
2429                         break;
2430                 if (wret < 0)
2431                         ret = wret;
2432                 ret = -EAGAIN;
2433                 break;
2434         }
2435         for (i = 0; i <= orig_level; i++) {
2436                 if (path->nodes[i]) {
2437                         free_extent_buffer(path->nodes[i]);
2438                         path->nodes[i] = NULL;
2439                 }
2440         }
2441 out:
2442         btrfs_free_path(path);
2443         return ret;
2444 }
2445
2446 int btrfs_free_block_groups(struct btrfs_fs_info *info)
2447 {
2448         u64 start;
2449         u64 end;
2450         u64 ptr;
2451         int ret;
2452
2453         mutex_lock(&info->alloc_mutex);
2454         while(1) {
2455                 ret = find_first_extent_bit(&info->block_group_cache, 0,
2456                                             &start, &end, (unsigned int)-1);
2457                 if (ret)
2458                         break;
2459                 ret = get_state_private(&info->block_group_cache, start, &ptr);
2460                 if (!ret)
2461                         kfree((void *)(unsigned long)ptr);
2462                 clear_extent_bits(&info->block_group_cache, start,
2463                                   end, (unsigned int)-1, GFP_NOFS);
2464         }
2465         while(1) {
2466                 ret = find_first_extent_bit(&info->free_space_cache, 0,
2467                                             &start, &end, EXTENT_DIRTY);
2468                 if (ret)
2469                         break;
2470                 clear_extent_dirty(&info->free_space_cache, start,
2471                                    end, GFP_NOFS);
2472         }
2473         mutex_unlock(&info->alloc_mutex);
2474         return 0;
2475 }
2476
2477 static unsigned long calc_ra(unsigned long start, unsigned long last,
2478                              unsigned long nr)
2479 {
2480         return min(last, start + nr - 1);
2481 }
2482
2483 static int noinline relocate_inode_pages(struct inode *inode, u64 start,
2484                                          u64 len)
2485 {
2486         u64 page_start;
2487         u64 page_end;
2488         unsigned long last_index;
2489         unsigned long i;
2490         struct page *page;
2491         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2492         struct file_ra_state *ra;
2493         unsigned long total_read = 0;
2494         unsigned long ra_pages;
2495         struct btrfs_trans_handle *trans;
2496
2497         ra = kzalloc(sizeof(*ra), GFP_NOFS);
2498
2499         mutex_lock(&inode->i_mutex);
2500         i = start >> PAGE_CACHE_SHIFT;
2501         last_index = (start + len - 1) >> PAGE_CACHE_SHIFT;
2502
2503         ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
2504
2505         file_ra_state_init(ra, inode->i_mapping);
2506
2507         for (; i <= last_index; i++) {
2508                 if (total_read % ra_pages == 0) {
2509                         btrfs_force_ra(inode->i_mapping, ra, NULL, i,
2510                                        calc_ra(i, last_index, ra_pages));
2511                 }
2512                 total_read++;
2513                 if (((u64)i << PAGE_CACHE_SHIFT) > inode->i_size)
2514                         goto truncate_racing;
2515
2516                 page = grab_cache_page(inode->i_mapping, i);
2517                 if (!page) {
2518                         goto out_unlock;
2519                 }
2520                 if (!PageUptodate(page)) {
2521                         btrfs_readpage(NULL, page);
2522                         lock_page(page);
2523                         if (!PageUptodate(page)) {
2524                                 unlock_page(page);
2525                                 page_cache_release(page);
2526                                 goto out_unlock;
2527                         }
2528                 }
2529 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2530                 ClearPageDirty(page);
2531 #else
2532                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2533 #endif
2534                 wait_on_page_writeback(page);
2535                 set_page_extent_mapped(page);
2536                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2537                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2538
2539                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2540
2541                 set_extent_delalloc(io_tree, page_start,
2542                                     page_end, GFP_NOFS);
2543                 set_page_dirty(page);
2544
2545                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2546                 unlock_page(page);
2547                 page_cache_release(page);
2548         }
2549         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2550                                            total_read);
2551
2552 out_unlock:
2553         kfree(ra);
2554         trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
2555         if (trans) {
2556                 btrfs_add_ordered_inode(inode);
2557                 btrfs_end_transaction(trans, BTRFS_I(inode)->root);
2558                 mark_inode_dirty(inode);
2559         }
2560         mutex_unlock(&inode->i_mutex);
2561         return 0;
2562
2563 truncate_racing:
2564         vmtruncate(inode, inode->i_size);
2565         balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2566                                            total_read);
2567         goto out_unlock;
2568 }
2569
2570 /*
2571  * The back references tell us which tree holds a ref on a block,
2572  * but it is possible for the tree root field in the reference to
2573  * reflect the original root before a snapshot was made.  In this
2574  * case we should search through all the children of a given root
2575  * to find potential holders of references on a block.
2576  *
2577  * Instead, we do something a little less fancy and just search
2578  * all the roots for a given key/block combination.
2579  */
2580 static int find_root_for_ref(struct btrfs_root *root,
2581                              struct btrfs_path *path,
2582                              struct btrfs_key *key0,
2583                              int level,
2584                              int file_key,
2585                              struct btrfs_root **found_root,
2586                              u64 bytenr)
2587 {
2588         struct btrfs_key root_location;
2589         struct btrfs_root *cur_root = *found_root;
2590         struct btrfs_file_extent_item *file_extent;
2591         u64 root_search_start = BTRFS_FS_TREE_OBJECTID;
2592         u64 found_bytenr;
2593         int ret;
2594         int i;
2595
2596         root_location.offset = (u64)-1;
2597         root_location.type = BTRFS_ROOT_ITEM_KEY;
2598         path->lowest_level = level;
2599         path->reada = 0;
2600         while(1) {
2601                 ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0);
2602                 found_bytenr = 0;
2603                 if (ret == 0 && file_key) {
2604                         struct extent_buffer *leaf = path->nodes[0];
2605                         file_extent = btrfs_item_ptr(leaf, path->slots[0],
2606                                              struct btrfs_file_extent_item);
2607                         if (btrfs_file_extent_type(leaf, file_extent) ==
2608                             BTRFS_FILE_EXTENT_REG) {
2609                                 found_bytenr =
2610                                         btrfs_file_extent_disk_bytenr(leaf,
2611                                                                file_extent);
2612                        }
2613                 } else if (!file_key) {
2614                         if (path->nodes[level])
2615                                 found_bytenr = path->nodes[level]->start;
2616                 }
2617
2618                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2619                         if (!path->nodes[i])
2620                                 break;
2621                         free_extent_buffer(path->nodes[i]);
2622                         path->nodes[i] = NULL;
2623                 }
2624                 btrfs_release_path(cur_root, path);
2625
2626                 if (found_bytenr == bytenr) {
2627                         *found_root = cur_root;
2628                         ret = 0;
2629                         goto out;
2630                 }
2631                 ret = btrfs_search_root(root->fs_info->tree_root,
2632                                         root_search_start, &root_search_start);
2633                 if (ret)
2634                         break;
2635
2636                 root_location.objectid = root_search_start;
2637                 cur_root = btrfs_read_fs_root_no_name(root->fs_info,
2638                                                       &root_location);
2639                 if (!cur_root) {
2640                         ret = 1;
2641                         break;
2642                 }
2643         }
2644 out:
2645         path->lowest_level = 0;
2646         return ret;
2647 }
2648
2649 /*
2650  * note, this releases the path
2651  */
2652 static int noinline relocate_one_reference(struct btrfs_root *extent_root,
2653                                   struct btrfs_path *path,
2654                                   struct btrfs_key *extent_key,
2655                                   u64 *last_file_objectid,
2656                                   u64 *last_file_offset,
2657                                   u64 *last_file_root,
2658                                   u64 last_extent)
2659 {
2660         struct inode *inode;
2661         struct btrfs_root *found_root;
2662         struct btrfs_key root_location;
2663         struct btrfs_key found_key;
2664         struct btrfs_extent_ref *ref;
2665         u64 ref_root;
2666         u64 ref_gen;
2667         u64 ref_objectid;
2668         u64 ref_offset;
2669         int ret;
2670         int level;
2671
2672         ref = btrfs_item_ptr(path->nodes[0], path->slots[0],
2673                              struct btrfs_extent_ref);
2674         ref_root = btrfs_ref_root(path->nodes[0], ref);
2675         ref_gen = btrfs_ref_generation(path->nodes[0], ref);
2676         ref_objectid = btrfs_ref_objectid(path->nodes[0], ref);
2677         ref_offset = btrfs_ref_offset(path->nodes[0], ref);
2678         btrfs_release_path(extent_root, path);
2679
2680         root_location.objectid = ref_root;
2681         if (ref_gen == 0)
2682                 root_location.offset = 0;
2683         else
2684                 root_location.offset = (u64)-1;
2685         root_location.type = BTRFS_ROOT_ITEM_KEY;
2686
2687         found_root = btrfs_read_fs_root_no_name(extent_root->fs_info,
2688                                                 &root_location);
2689         BUG_ON(!found_root);
2690
2691         if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
2692                 found_key.objectid = ref_objectid;
2693                 found_key.type = BTRFS_EXTENT_DATA_KEY;
2694                 found_key.offset = ref_offset;
2695                 level = 0;
2696
2697                 if (last_extent == extent_key->objectid &&
2698                     *last_file_objectid == ref_objectid &&
2699                     *last_file_offset == ref_offset &&
2700                     *last_file_root == ref_root)
2701                         goto out;
2702
2703                 ret = find_root_for_ref(extent_root, path, &found_key,
2704                                         level, 1, &found_root,
2705                                         extent_key->objectid);
2706
2707                 if (ret)
2708                         goto out;
2709
2710                 if (last_extent == extent_key->objectid &&
2711                     *last_file_objectid == ref_objectid &&
2712                     *last_file_offset == ref_offset &&
2713                     *last_file_root == ref_root)
2714                         goto out;
2715
2716                 inode = btrfs_iget_locked(extent_root->fs_info->sb,
2717                                           ref_objectid, found_root);
2718                 if (inode->i_state & I_NEW) {
2719                         /* the inode and parent dir are two different roots */
2720                         BTRFS_I(inode)->root = found_root;
2721                         BTRFS_I(inode)->location.objectid = ref_objectid;
2722                         BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
2723                         BTRFS_I(inode)->location.offset = 0;
2724                         btrfs_read_locked_inode(inode);
2725                         unlock_new_inode(inode);
2726
2727                 }
2728                 /* this can happen if the reference is not against
2729                  * the latest version of the tree root
2730                  */
2731                 if (is_bad_inode(inode)) {
2732                         goto out;
2733                 }
2734                 *last_file_objectid = inode->i_ino;
2735                 *last_file_root = found_root->root_key.objectid;
2736                 *last_file_offset = ref_offset;
2737
2738                 relocate_inode_pages(inode, ref_offset, extent_key->offset);
2739                 iput(inode);
2740         } else {
2741                 struct btrfs_trans_handle *trans;
2742                 struct extent_buffer *eb;
2743                 int i;
2744
2745                 eb = read_tree_block(found_root, extent_key->objectid,
2746                                      extent_key->offset, 0);
2747                 btrfs_tree_lock(eb);
2748                 level = btrfs_header_level(eb);
2749
2750                 if (level == 0)
2751                         btrfs_item_key_to_cpu(eb, &found_key, 0);
2752                 else
2753                         btrfs_node_key_to_cpu(eb, &found_key, 0);
2754
2755                 btrfs_tree_unlock(eb);
2756                 free_extent_buffer(eb);
2757
2758                 ret = find_root_for_ref(extent_root, path, &found_key,
2759                                         level, 0, &found_root,
2760                                         extent_key->objectid);
2761
2762                 if (ret)
2763                         goto out;
2764
2765                 trans = btrfs_start_transaction(found_root, 1);
2766
2767                 path->lowest_level = level;
2768                 path->reada = 2;
2769                 ret = btrfs_search_slot(trans, found_root, &found_key, path,
2770                                         0, 1);
2771                 path->lowest_level = 0;
2772                 for (i = level; i < BTRFS_MAX_LEVEL; i++) {
2773                         if (!path->nodes[i])
2774                                 break;
2775                         free_extent_buffer(path->nodes[i]);
2776                         path->nodes[i] = NULL;
2777                 }
2778                 btrfs_release_path(found_root, path);
2779                 if (found_root == found_root->fs_info->extent_root)
2780                         btrfs_extent_post_op(trans, found_root);
2781                 btrfs_end_transaction(trans, found_root);
2782         }
2783
2784 out:
2785         return 0;
2786 }
2787
2788 static int noinline del_extent_zero(struct btrfs_root *extent_root,
2789                                     struct btrfs_path *path,
2790                                     struct btrfs_key *extent_key)
2791 {
2792         int ret;
2793         struct btrfs_trans_handle *trans;
2794
2795         trans = btrfs_start_transaction(extent_root, 1);
2796         ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1);
2797         if (ret > 0) {
2798                 ret = -EIO;
2799                 goto out;
2800         }
2801         if (ret < 0)
2802                 goto out;
2803         ret = btrfs_del_item(trans, extent_root, path);
2804 out:
2805         btrfs_end_transaction(trans, extent_root);
2806         return ret;
2807 }
2808
2809 static int noinline relocate_one_extent(struct btrfs_root *extent_root,
2810                                         struct btrfs_path *path,
2811                                         struct btrfs_key *extent_key)
2812 {
2813         struct btrfs_key key;
2814         struct btrfs_key found_key;
2815         struct extent_buffer *leaf;
2816         u64 last_file_objectid = 0;
2817         u64 last_file_root = 0;
2818         u64 last_file_offset = (u64)-1;
2819         u64 last_extent = 0;
2820         u32 nritems;
2821         u32 item_size;
2822         int ret = 0;
2823
2824         if (extent_key->objectid == 0) {
2825                 ret = del_extent_zero(extent_root, path, extent_key);
2826                 goto out;
2827         }
2828         key.objectid = extent_key->objectid;
2829         key.type = BTRFS_EXTENT_REF_KEY;
2830         key.offset = 0;
2831
2832         while(1) {
2833                 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
2834
2835                 if (ret < 0)
2836                         goto out;
2837
2838                 ret = 0;
2839                 leaf = path->nodes[0];
2840                 nritems = btrfs_header_nritems(leaf);
2841                 if (path->slots[0] == nritems) {
2842                         ret = btrfs_next_leaf(extent_root, path);
2843                         if (ret > 0) {
2844                                 ret = 0;
2845                                 goto out;
2846                         }
2847                         if (ret < 0)
2848                                 goto out;
2849                         leaf = path->nodes[0];
2850                 }
2851
2852                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2853                 if (found_key.objectid != extent_key->objectid) {
2854                         break;
2855                 }
2856
2857                 if (found_key.type != BTRFS_EXTENT_REF_KEY) {
2858                         break;
2859                 }
2860
2861                 key.offset = found_key.offset + 1;
2862                 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2863
2864                 ret = relocate_one_reference(extent_root, path, extent_key,
2865                                              &last_file_objectid,
2866                                              &last_file_offset,
2867                                              &last_file_root, last_extent);
2868                 if (ret)
2869                         goto out;
2870                 last_extent = extent_key->objectid;
2871         }
2872         ret = 0;
2873 out:
2874         btrfs_release_path(extent_root, path);
2875         return ret;
2876 }
2877
2878 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
2879 {
2880         u64 num_devices;
2881         u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
2882                 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
2883
2884         num_devices = root->fs_info->fs_devices->num_devices;
2885         if (num_devices == 1) {
2886                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2887                 stripped = flags & ~stripped;
2888
2889                 /* turn raid0 into single device chunks */
2890                 if (flags & BTRFS_BLOCK_GROUP_RAID0)
2891                         return stripped;
2892
2893                 /* turn mirroring into duplication */
2894                 if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
2895                              BTRFS_BLOCK_GROUP_RAID10))
2896                         return stripped | BTRFS_BLOCK_GROUP_DUP;
2897                 return flags;
2898         } else {
2899                 /* they already had raid on here, just return */
2900                 if (flags & stripped)
2901                         return flags;
2902
2903                 stripped |= BTRFS_BLOCK_GROUP_DUP;
2904                 stripped = flags & ~stripped;
2905
2906                 /* switch duplicated blocks with raid1 */
2907                 if (flags & BTRFS_BLOCK_GROUP_DUP)
2908                         return stripped | BTRFS_BLOCK_GROUP_RAID1;
2909
2910                 /* turn single device chunks into raid0 */
2911                 return stripped | BTRFS_BLOCK_GROUP_RAID0;
2912         }
2913         return flags;
2914 }
2915
2916 int __alloc_chunk_for_shrink(struct btrfs_root *root,
2917                      struct btrfs_block_group_cache *shrink_block_group,
2918                      int force)
2919 {
2920         struct btrfs_trans_handle *trans;
2921         u64 new_alloc_flags;
2922         u64 calc;
2923
2924         if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
2925
2926                 trans = btrfs_start_transaction(root, 1);
2927                 new_alloc_flags = update_block_group_flags(root,
2928                                                    shrink_block_group->flags);
2929                 if (new_alloc_flags != shrink_block_group->flags) {
2930                         calc =
2931                              btrfs_block_group_used(&shrink_block_group->item);
2932                 } else {
2933                         calc = shrink_block_group->key.offset;
2934                 }
2935                 do_chunk_alloc(trans, root->fs_info->extent_root,
2936                                calc + 2 * 1024 * 1024, new_alloc_flags, force);
2937                 btrfs_end_transaction(trans, root);
2938         }
2939         return 0;
2940 }
2941
2942 int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
2943 {
2944         struct btrfs_trans_handle *trans;
2945         struct btrfs_root *tree_root = root->fs_info->tree_root;
2946         struct btrfs_path *path;
2947         u64 cur_byte;
2948         u64 total_found;
2949         u64 shrink_last_byte;
2950         struct btrfs_block_group_cache *shrink_block_group;
2951         struct btrfs_fs_info *info = root->fs_info;
2952         struct btrfs_key key;
2953         struct btrfs_key found_key;
2954         struct extent_buffer *leaf;
2955         u32 nritems;
2956         int ret;
2957         int progress;
2958
2959         mutex_lock(&root->fs_info->alloc_mutex);
2960         shrink_block_group = btrfs_lookup_block_group(root->fs_info,
2961                                                       shrink_start);
2962         BUG_ON(!shrink_block_group);
2963
2964         shrink_last_byte = shrink_block_group->key.objectid +
2965                 shrink_block_group->key.offset;
2966
2967         shrink_block_group->space_info->total_bytes -=
2968                 shrink_block_group->key.offset;
2969         path = btrfs_alloc_path();
2970         root = root->fs_info->extent_root;
2971         path->reada = 2;
2972
2973         printk("btrfs relocating block group %llu flags %llu\n",
2974                (unsigned long long)shrink_start,
2975                (unsigned long long)shrink_block_group->flags);
2976
2977         __alloc_chunk_for_shrink(root, shrink_block_group, 1);
2978
2979 again:
2980
2981         shrink_block_group->ro = 1;
2982
2983         total_found = 0;
2984         progress = 0;
2985         key.objectid = shrink_start;
2986         key.offset = 0;
2987         key.type = 0;
2988         cur_byte = key.objectid;
2989
2990         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2991         if (ret < 0)
2992                 goto out;
2993
2994         ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
2995         if (ret < 0)
2996                 goto out;
2997
2998         if (ret == 0) {
2999                 leaf = path->nodes[0];
3000                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3001                 if (found_key.objectid + found_key.offset > shrink_start &&
3002                     found_key.objectid < shrink_last_byte) {
3003                         cur_byte = found_key.objectid;
3004                         key.objectid = cur_byte;
3005                 }
3006         }
3007         btrfs_release_path(root, path);
3008
3009         while(1) {
3010                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3011                 if (ret < 0)
3012                         goto out;
3013
3014                 leaf = path->nodes[0];
3015                 nritems = btrfs_header_nritems(leaf);
3016 next:
3017                 if (path->slots[0] >= nritems) {
3018                         ret = btrfs_next_leaf(root, path);
3019                         if (ret < 0)
3020                                 goto out;
3021                         if (ret == 1) {
3022                                 ret = 0;
3023                                 break;
3024                         }
3025                         leaf = path->nodes[0];
3026                         nritems = btrfs_header_nritems(leaf);
3027                 }
3028
3029                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3030
3031                 if (found_key.objectid >= shrink_last_byte)
3032                         break;
3033
3034                 if (progress && need_resched()) {
3035                         memcpy(&key, &found_key, sizeof(key));
3036                         cond_resched();
3037                         btrfs_release_path(root, path);
3038                         btrfs_search_slot(NULL, root, &key, path, 0, 0);
3039                         progress = 0;
3040                         goto next;
3041                 }
3042                 progress = 1;
3043
3044                 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_ITEM_KEY ||
3045                     found_key.objectid + found_key.offset <= cur_byte) {
3046                         memcpy(&key, &found_key, sizeof(key));
3047                         key.offset++;
3048                         path->slots[0]++;
3049                         goto next;
3050                 }
3051
3052                 total_found++;
3053                 cur_byte = found_key.objectid + found_key.offset;
3054                 key.objectid = cur_byte;
3055                 btrfs_release_path(root, path);
3056                 ret = relocate_one_extent(root, path, &found_key);
3057                 __alloc_chunk_for_shrink(root, shrink_block_group, 0);
3058         }
3059
3060         btrfs_release_path(root, path);
3061
3062         if (total_found > 0) {
3063                 printk("btrfs relocate found %llu last extent was %llu\n",
3064                        (unsigned long long)total_found,
3065                        (unsigned long long)found_key.objectid);
3066                 trans = btrfs_start_transaction(tree_root, 1);
3067                 btrfs_commit_transaction(trans, tree_root);
3068
3069                 btrfs_clean_old_snapshots(tree_root);
3070
3071                 trans = btrfs_start_transaction(tree_root, 1);
3072                 btrfs_commit_transaction(trans, tree_root);
3073                 goto again;
3074         }
3075
3076         /*
3077          * we've freed all the extents, now remove the block
3078          * group item from the tree
3079          */
3080         trans = btrfs_start_transaction(root, 1);
3081         memcpy(&key, &shrink_block_group->key, sizeof(key));
3082
3083         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
3084         if (ret > 0)
3085                 ret = -EIO;
3086         if (ret < 0)
3087                 goto out;
3088
3089         clear_extent_bits(&info->block_group_cache, key.objectid,
3090                           key.objectid + key.offset - 1,
3091                           (unsigned int)-1, GFP_NOFS);
3092
3093
3094         clear_extent_bits(&info->free_space_cache,
3095                            key.objectid, key.objectid + key.offset - 1,
3096                            (unsigned int)-1, GFP_NOFS);
3097
3098         memset(shrink_block_group, 0, sizeof(*shrink_block_group));
3099         kfree(shrink_block_group);
3100
3101         btrfs_del_item(trans, root, path);
3102         btrfs_commit_transaction(trans, root);
3103
3104         /* the code to unpin extents might set a few bits in the free
3105          * space cache for this range again
3106          */
3107         clear_extent_bits(&info->free_space_cache,
3108                            key.objectid, key.objectid + key.offset - 1,
3109                            (unsigned int)-1, GFP_NOFS);
3110 out:
3111         btrfs_free_path(path);
3112         mutex_unlock(&root->fs_info->alloc_mutex);
3113         return ret;
3114 }
3115
3116 int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
3117                            struct btrfs_key *key)
3118 {
3119         int ret = 0;
3120         struct btrfs_key found_key;
3121         struct extent_buffer *leaf;
3122         int slot;
3123
3124         ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
3125         if (ret < 0)
3126                 goto out;
3127
3128         while(1) {
3129                 slot = path->slots[0];
3130                 leaf = path->nodes[0];
3131                 if (slot >= btrfs_header_nritems(leaf)) {
3132                         ret = btrfs_next_leaf(root, path);
3133                         if (ret == 0)
3134                                 continue;
3135                         if (ret < 0)
3136                                 goto out;
3137                         break;
3138                 }
3139                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3140
3141                 if (found_key.objectid >= key->objectid &&
3142                     found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
3143                         ret = 0;
3144                         goto out;
3145                 }
3146                 path->slots[0]++;
3147         }
3148         ret = -ENOENT;
3149 out:
3150         return ret;
3151 }
3152
3153 int btrfs_read_block_groups(struct btrfs_root *root)
3154 {
3155         struct btrfs_path *path;
3156         int ret;
3157         int bit;
3158         struct btrfs_block_group_cache *cache;
3159         struct btrfs_fs_info *info = root->fs_info;
3160         struct btrfs_space_info *space_info;
3161         struct extent_io_tree *block_group_cache;
3162         struct btrfs_key key;
3163         struct btrfs_key found_key;
3164         struct extent_buffer *leaf;
3165
3166         block_group_cache = &info->block_group_cache;
3167         root = info->extent_root;
3168         key.objectid = 0;
3169         key.offset = 0;
3170         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3171         path = btrfs_alloc_path();
3172         if (!path)
3173                 return -ENOMEM;
3174
3175         mutex_lock(&root->fs_info->alloc_mutex);
3176         while(1) {
3177                 ret = find_first_block_group(root, path, &key);
3178                 if (ret > 0) {
3179                         ret = 0;
3180                         goto error;
3181                 }
3182                 if (ret != 0)
3183                         goto error;
3184
3185                 leaf = path->nodes[0];
3186                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
3187                 cache = kzalloc(sizeof(*cache), GFP_NOFS);
3188                 if (!cache) {
3189                         ret = -ENOMEM;
3190                         break;
3191                 }
3192
3193                 read_extent_buffer(leaf, &cache->item,
3194                                    btrfs_item_ptr_offset(leaf, path->slots[0]),
3195                                    sizeof(cache->item));
3196                 memcpy(&cache->key, &found_key, sizeof(found_key));
3197
3198                 key.objectid = found_key.objectid + found_key.offset;
3199                 btrfs_release_path(root, path);
3200                 cache->flags = btrfs_block_group_flags(&cache->item);
3201                 bit = 0;
3202                 if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
3203                         bit = BLOCK_GROUP_DATA;
3204                 } else if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
3205                         bit = BLOCK_GROUP_SYSTEM;
3206                 } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
3207                         bit = BLOCK_GROUP_METADATA;
3208                 }
3209                 set_avail_alloc_bits(info, cache->flags);
3210
3211                 ret = update_space_info(info, cache->flags, found_key.offset,
3212                                         btrfs_block_group_used(&cache->item),
3213                                         &space_info);
3214                 BUG_ON(ret);
3215                 cache->space_info = space_info;
3216
3217                 /* use EXTENT_LOCKED to prevent merging */
3218                 set_extent_bits(block_group_cache, found_key.objectid,
3219                                 found_key.objectid + found_key.offset - 1,
3220                                 bit | EXTENT_LOCKED, GFP_NOFS);
3221                 set_state_private(block_group_cache, found_key.objectid,
3222                                   (unsigned long)cache);
3223
3224                 if (key.objectid >=
3225                     btrfs_super_total_bytes(&info->super_copy))
3226                         break;
3227         }
3228         ret = 0;
3229 error:
3230         btrfs_free_path(path);
3231         mutex_unlock(&root->fs_info->alloc_mutex);
3232         return ret;
3233 }
3234
3235 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
3236                            struct btrfs_root *root, u64 bytes_used,
3237                            u64 type, u64 chunk_objectid, u64 chunk_offset,
3238                            u64 size)
3239 {
3240         int ret;
3241         int bit = 0;
3242         struct btrfs_root *extent_root;
3243         struct btrfs_block_group_cache *cache;
3244         struct extent_io_tree *block_group_cache;
3245
3246         extent_root = root->fs_info->extent_root;
3247         block_group_cache = &root->fs_info->block_group_cache;
3248
3249         cache = kzalloc(sizeof(*cache), GFP_NOFS);
3250         BUG_ON(!cache);
3251         cache->key.objectid = chunk_offset;
3252         cache->key.offset = size;
3253         btrfs_set_key_type(&cache->key, BTRFS_BLOCK_GROUP_ITEM_KEY);
3254
3255         btrfs_set_block_group_used(&cache->item, bytes_used);
3256         btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
3257         cache->flags = type;
3258         btrfs_set_block_group_flags(&cache->item, type);
3259
3260         ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
3261                                 &cache->space_info);
3262         BUG_ON(ret);
3263
3264         bit = block_group_state_bits(type);
3265         set_extent_bits(block_group_cache, chunk_offset,
3266                         chunk_offset + size - 1,
3267                         bit | EXTENT_LOCKED, GFP_NOFS);
3268
3269         set_state_private(block_group_cache, chunk_offset,
3270                           (unsigned long)cache);
3271         ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item,
3272                                 sizeof(cache->item));
3273         BUG_ON(ret);
3274
3275         finish_current_insert(trans, extent_root);
3276         ret = del_pending_extents(trans, extent_root);
3277         BUG_ON(ret);
3278         set_avail_alloc_bits(extent_root->fs_info, type);
3279
3280         return 0;
3281 }