genirq: Make sparse_lock a mutex
[pandora-kernel.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include "ctree.h"
24 #include "free-space-cache.h"
25 #include "transaction.h"
26
27 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
28 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29
30 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
31                                           u64 offset)
32 {
33         BUG_ON(offset < bitmap_start);
34         offset -= bitmap_start;
35         return (unsigned long)(div64_u64(offset, sectorsize));
36 }
37
38 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
39 {
40         return (unsigned long)(div64_u64(bytes, sectorsize));
41 }
42
43 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
44                                    u64 offset)
45 {
46         u64 bitmap_start;
47         u64 bytes_per_bitmap;
48
49         bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
50         bitmap_start = offset - block_group->key.objectid;
51         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
52         bitmap_start *= bytes_per_bitmap;
53         bitmap_start += block_group->key.objectid;
54
55         return bitmap_start;
56 }
57
58 static int tree_insert_offset(struct rb_root *root, u64 offset,
59                               struct rb_node *node, int bitmap)
60 {
61         struct rb_node **p = &root->rb_node;
62         struct rb_node *parent = NULL;
63         struct btrfs_free_space *info;
64
65         while (*p) {
66                 parent = *p;
67                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
68
69                 if (offset < info->offset) {
70                         p = &(*p)->rb_left;
71                 } else if (offset > info->offset) {
72                         p = &(*p)->rb_right;
73                 } else {
74                         /*
75                          * we could have a bitmap entry and an extent entry
76                          * share the same offset.  If this is the case, we want
77                          * the extent entry to always be found first if we do a
78                          * linear search through the tree, since we want to have
79                          * the quickest allocation time, and allocating from an
80                          * extent is faster than allocating from a bitmap.  So
81                          * if we're inserting a bitmap and we find an entry at
82                          * this offset, we want to go right, or after this entry
83                          * logically.  If we are inserting an extent and we've
84                          * found a bitmap, we want to go left, or before
85                          * logically.
86                          */
87                         if (bitmap) {
88                                 WARN_ON(info->bitmap);
89                                 p = &(*p)->rb_right;
90                         } else {
91                                 WARN_ON(!info->bitmap);
92                                 p = &(*p)->rb_left;
93                         }
94                 }
95         }
96
97         rb_link_node(node, parent, p);
98         rb_insert_color(node, root);
99
100         return 0;
101 }
102
103 /*
104  * searches the tree for the given offset.
105  *
106  * fuzzy - If this is set, then we are trying to make an allocation, and we just
107  * want a section that has at least bytes size and comes at or after the given
108  * offset.
109  */
110 static struct btrfs_free_space *
111 tree_search_offset(struct btrfs_block_group_cache *block_group,
112                    u64 offset, int bitmap_only, int fuzzy)
113 {
114         struct rb_node *n = block_group->free_space_offset.rb_node;
115         struct btrfs_free_space *entry, *prev = NULL;
116
117         /* find entry that is closest to the 'offset' */
118         while (1) {
119                 if (!n) {
120                         entry = NULL;
121                         break;
122                 }
123
124                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
125                 prev = entry;
126
127                 if (offset < entry->offset)
128                         n = n->rb_left;
129                 else if (offset > entry->offset)
130                         n = n->rb_right;
131                 else
132                         break;
133         }
134
135         if (bitmap_only) {
136                 if (!entry)
137                         return NULL;
138                 if (entry->bitmap)
139                         return entry;
140
141                 /*
142                  * bitmap entry and extent entry may share same offset,
143                  * in that case, bitmap entry comes after extent entry.
144                  */
145                 n = rb_next(n);
146                 if (!n)
147                         return NULL;
148                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
149                 if (entry->offset != offset)
150                         return NULL;
151
152                 WARN_ON(!entry->bitmap);
153                 return entry;
154         } else if (entry) {
155                 if (entry->bitmap) {
156                         /*
157                          * if previous extent entry covers the offset,
158                          * we should return it instead of the bitmap entry
159                          */
160                         n = &entry->offset_index;
161                         while (1) {
162                                 n = rb_prev(n);
163                                 if (!n)
164                                         break;
165                                 prev = rb_entry(n, struct btrfs_free_space,
166                                                 offset_index);
167                                 if (!prev->bitmap) {
168                                         if (prev->offset + prev->bytes > offset)
169                                                 entry = prev;
170                                         break;
171                                 }
172                         }
173                 }
174                 return entry;
175         }
176
177         if (!prev)
178                 return NULL;
179
180         /* find last entry before the 'offset' */
181         entry = prev;
182         if (entry->offset > offset) {
183                 n = rb_prev(&entry->offset_index);
184                 if (n) {
185                         entry = rb_entry(n, struct btrfs_free_space,
186                                         offset_index);
187                         BUG_ON(entry->offset > offset);
188                 } else {
189                         if (fuzzy)
190                                 return entry;
191                         else
192                                 return NULL;
193                 }
194         }
195
196         if (entry->bitmap) {
197                 n = &entry->offset_index;
198                 while (1) {
199                         n = rb_prev(n);
200                         if (!n)
201                                 break;
202                         prev = rb_entry(n, struct btrfs_free_space,
203                                         offset_index);
204                         if (!prev->bitmap) {
205                                 if (prev->offset + prev->bytes > offset)
206                                         return prev;
207                                 break;
208                         }
209                 }
210                 if (entry->offset + BITS_PER_BITMAP *
211                     block_group->sectorsize > offset)
212                         return entry;
213         } else if (entry->offset + entry->bytes > offset)
214                 return entry;
215
216         if (!fuzzy)
217                 return NULL;
218
219         while (1) {
220                 if (entry->bitmap) {
221                         if (entry->offset + BITS_PER_BITMAP *
222                             block_group->sectorsize > offset)
223                                 break;
224                 } else {
225                         if (entry->offset + entry->bytes > offset)
226                                 break;
227                 }
228
229                 n = rb_next(&entry->offset_index);
230                 if (!n)
231                         return NULL;
232                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
233         }
234         return entry;
235 }
236
237 static void unlink_free_space(struct btrfs_block_group_cache *block_group,
238                               struct btrfs_free_space *info)
239 {
240         rb_erase(&info->offset_index, &block_group->free_space_offset);
241         block_group->free_extents--;
242         block_group->free_space -= info->bytes;
243 }
244
245 static int link_free_space(struct btrfs_block_group_cache *block_group,
246                            struct btrfs_free_space *info)
247 {
248         int ret = 0;
249
250         BUG_ON(!info->bitmap && !info->bytes);
251         ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
252                                  &info->offset_index, (info->bitmap != NULL));
253         if (ret)
254                 return ret;
255
256         block_group->free_space += info->bytes;
257         block_group->free_extents++;
258         return ret;
259 }
260
261 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
262 {
263         u64 max_bytes;
264         u64 bitmap_bytes;
265         u64 extent_bytes;
266
267         /*
268          * The goal is to keep the total amount of memory used per 1gb of space
269          * at or below 32k, so we need to adjust how much memory we allow to be
270          * used by extent based free space tracking
271          */
272         max_bytes = MAX_CACHE_BYTES_PER_GIG *
273                 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
274
275         /*
276          * we want to account for 1 more bitmap than what we have so we can make
277          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
278          * we add more bitmaps.
279          */
280         bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
281
282         if (bitmap_bytes >= max_bytes) {
283                 block_group->extents_thresh = 0;
284                 return;
285         }
286
287         /*
288          * we want the extent entry threshold to always be at most 1/2 the maxw
289          * bytes we can have, or whatever is less than that.
290          */
291         extent_bytes = max_bytes - bitmap_bytes;
292         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
293
294         block_group->extents_thresh =
295                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
296 }
297
298 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
299                               struct btrfs_free_space *info, u64 offset,
300                               u64 bytes)
301 {
302         unsigned long start, end;
303         unsigned long i;
304
305         start = offset_to_bit(info->offset, block_group->sectorsize, offset);
306         end = start + bytes_to_bits(bytes, block_group->sectorsize);
307         BUG_ON(end > BITS_PER_BITMAP);
308
309         for (i = start; i < end; i++)
310                 clear_bit(i, info->bitmap);
311
312         info->bytes -= bytes;
313         block_group->free_space -= bytes;
314 }
315
316 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
317                             struct btrfs_free_space *info, u64 offset,
318                             u64 bytes)
319 {
320         unsigned long start, end;
321         unsigned long i;
322
323         start = offset_to_bit(info->offset, block_group->sectorsize, offset);
324         end = start + bytes_to_bits(bytes, block_group->sectorsize);
325         BUG_ON(end > BITS_PER_BITMAP);
326
327         for (i = start; i < end; i++)
328                 set_bit(i, info->bitmap);
329
330         info->bytes += bytes;
331         block_group->free_space += bytes;
332 }
333
334 static int search_bitmap(struct btrfs_block_group_cache *block_group,
335                          struct btrfs_free_space *bitmap_info, u64 *offset,
336                          u64 *bytes)
337 {
338         unsigned long found_bits = 0;
339         unsigned long bits, i;
340         unsigned long next_zero;
341
342         i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
343                           max_t(u64, *offset, bitmap_info->offset));
344         bits = bytes_to_bits(*bytes, block_group->sectorsize);
345
346         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
347              i < BITS_PER_BITMAP;
348              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
349                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
350                                                BITS_PER_BITMAP, i);
351                 if ((next_zero - i) >= bits) {
352                         found_bits = next_zero - i;
353                         break;
354                 }
355                 i = next_zero;
356         }
357
358         if (found_bits) {
359                 *offset = (u64)(i * block_group->sectorsize) +
360                         bitmap_info->offset;
361                 *bytes = (u64)(found_bits) * block_group->sectorsize;
362                 return 0;
363         }
364
365         return -1;
366 }
367
368 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
369                                                 *block_group, u64 *offset,
370                                                 u64 *bytes, int debug)
371 {
372         struct btrfs_free_space *entry;
373         struct rb_node *node;
374         int ret;
375
376         if (!block_group->free_space_offset.rb_node)
377                 return NULL;
378
379         entry = tree_search_offset(block_group,
380                                    offset_to_bitmap(block_group, *offset),
381                                    0, 1);
382         if (!entry)
383                 return NULL;
384
385         for (node = &entry->offset_index; node; node = rb_next(node)) {
386                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
387                 if (entry->bytes < *bytes)
388                         continue;
389
390                 if (entry->bitmap) {
391                         ret = search_bitmap(block_group, entry, offset, bytes);
392                         if (!ret)
393                                 return entry;
394                         continue;
395                 }
396
397                 *offset = entry->offset;
398                 *bytes = entry->bytes;
399                 return entry;
400         }
401
402         return NULL;
403 }
404
405 static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
406                            struct btrfs_free_space *info, u64 offset)
407 {
408         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
409         int max_bitmaps = (int)div64_u64(block_group->key.offset +
410                                          bytes_per_bg - 1, bytes_per_bg);
411         BUG_ON(block_group->total_bitmaps >= max_bitmaps);
412
413         info->offset = offset_to_bitmap(block_group, offset);
414         info->bytes = 0;
415         link_free_space(block_group, info);
416         block_group->total_bitmaps++;
417
418         recalculate_thresholds(block_group);
419 }
420
421 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
422                               struct btrfs_free_space *bitmap_info,
423                               u64 *offset, u64 *bytes)
424 {
425         u64 end;
426         u64 search_start, search_bytes;
427         int ret;
428
429 again:
430         end = bitmap_info->offset +
431                 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
432
433         /*
434          * XXX - this can go away after a few releases.
435          *
436          * since the only user of btrfs_remove_free_space is the tree logging
437          * stuff, and the only way to test that is under crash conditions, we
438          * want to have this debug stuff here just in case somethings not
439          * working.  Search the bitmap for the space we are trying to use to
440          * make sure its actually there.  If its not there then we need to stop
441          * because something has gone wrong.
442          */
443         search_start = *offset;
444         search_bytes = *bytes;
445         ret = search_bitmap(block_group, bitmap_info, &search_start,
446                             &search_bytes);
447         BUG_ON(ret < 0 || search_start != *offset);
448
449         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
450                 bitmap_clear_bits(block_group, bitmap_info, *offset,
451                                   end - *offset + 1);
452                 *bytes -= end - *offset + 1;
453                 *offset = end + 1;
454         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
455                 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
456                 *bytes = 0;
457         }
458
459         if (*bytes) {
460                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
461                 if (!bitmap_info->bytes) {
462                         unlink_free_space(block_group, bitmap_info);
463                         kfree(bitmap_info->bitmap);
464                         kfree(bitmap_info);
465                         block_group->total_bitmaps--;
466                         recalculate_thresholds(block_group);
467                 }
468
469                 /*
470                  * no entry after this bitmap, but we still have bytes to
471                  * remove, so something has gone wrong.
472                  */
473                 if (!next)
474                         return -EINVAL;
475
476                 bitmap_info = rb_entry(next, struct btrfs_free_space,
477                                        offset_index);
478
479                 /*
480                  * if the next entry isn't a bitmap we need to return to let the
481                  * extent stuff do its work.
482                  */
483                 if (!bitmap_info->bitmap)
484                         return -EAGAIN;
485
486                 /*
487                  * Ok the next item is a bitmap, but it may not actually hold
488                  * the information for the rest of this free space stuff, so
489                  * look for it, and if we don't find it return so we can try
490                  * everything over again.
491                  */
492                 search_start = *offset;
493                 search_bytes = *bytes;
494                 ret = search_bitmap(block_group, bitmap_info, &search_start,
495                                     &search_bytes);
496                 if (ret < 0 || search_start != *offset)
497                         return -EAGAIN;
498
499                 goto again;
500         } else if (!bitmap_info->bytes) {
501                 unlink_free_space(block_group, bitmap_info);
502                 kfree(bitmap_info->bitmap);
503                 kfree(bitmap_info);
504                 block_group->total_bitmaps--;
505                 recalculate_thresholds(block_group);
506         }
507
508         return 0;
509 }
510
511 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
512                               struct btrfs_free_space *info)
513 {
514         struct btrfs_free_space *bitmap_info;
515         int added = 0;
516         u64 bytes, offset, end;
517         int ret;
518
519         /*
520          * If we are below the extents threshold then we can add this as an
521          * extent, and don't have to deal with the bitmap
522          */
523         if (block_group->free_extents < block_group->extents_thresh &&
524             info->bytes > block_group->sectorsize * 4)
525                 return 0;
526
527         /*
528          * some block groups are so tiny they can't be enveloped by a bitmap, so
529          * don't even bother to create a bitmap for this
530          */
531         if (BITS_PER_BITMAP * block_group->sectorsize >
532             block_group->key.offset)
533                 return 0;
534
535         bytes = info->bytes;
536         offset = info->offset;
537
538 again:
539         bitmap_info = tree_search_offset(block_group,
540                                          offset_to_bitmap(block_group, offset),
541                                          1, 0);
542         if (!bitmap_info) {
543                 BUG_ON(added);
544                 goto new_bitmap;
545         }
546
547         end = bitmap_info->offset +
548                 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
549
550         if (offset >= bitmap_info->offset && offset + bytes > end) {
551                 bitmap_set_bits(block_group, bitmap_info, offset,
552                                 end - offset);
553                 bytes -= end - offset;
554                 offset = end;
555                 added = 0;
556         } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
557                 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
558                 bytes = 0;
559         } else {
560                 BUG();
561         }
562
563         if (!bytes) {
564                 ret = 1;
565                 goto out;
566         } else
567                 goto again;
568
569 new_bitmap:
570         if (info && info->bitmap) {
571                 add_new_bitmap(block_group, info, offset);
572                 added = 1;
573                 info = NULL;
574                 goto again;
575         } else {
576                 spin_unlock(&block_group->tree_lock);
577
578                 /* no pre-allocated info, allocate a new one */
579                 if (!info) {
580                         info = kzalloc(sizeof(struct btrfs_free_space),
581                                        GFP_NOFS);
582                         if (!info) {
583                                 spin_lock(&block_group->tree_lock);
584                                 ret = -ENOMEM;
585                                 goto out;
586                         }
587                 }
588
589                 /* allocate the bitmap */
590                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
591                 spin_lock(&block_group->tree_lock);
592                 if (!info->bitmap) {
593                         ret = -ENOMEM;
594                         goto out;
595                 }
596                 goto again;
597         }
598
599 out:
600         if (info) {
601                 if (info->bitmap)
602                         kfree(info->bitmap);
603                 kfree(info);
604         }
605
606         return ret;
607 }
608
609 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
610                          u64 offset, u64 bytes)
611 {
612         struct btrfs_free_space *right_info = NULL;
613         struct btrfs_free_space *left_info = NULL;
614         struct btrfs_free_space *info = NULL;
615         int ret = 0;
616
617         info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
618         if (!info)
619                 return -ENOMEM;
620
621         info->offset = offset;
622         info->bytes = bytes;
623
624         spin_lock(&block_group->tree_lock);
625
626         /*
627          * first we want to see if there is free space adjacent to the range we
628          * are adding, if there is remove that struct and add a new one to
629          * cover the entire range
630          */
631         right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
632         if (right_info && rb_prev(&right_info->offset_index))
633                 left_info = rb_entry(rb_prev(&right_info->offset_index),
634                                      struct btrfs_free_space, offset_index);
635         else
636                 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
637
638         /*
639          * If there was no extent directly to the left or right of this new
640          * extent then we know we're going to have to allocate a new extent, so
641          * before we do that see if we need to drop this into a bitmap
642          */
643         if ((!left_info || left_info->bitmap) &&
644             (!right_info || right_info->bitmap)) {
645                 ret = insert_into_bitmap(block_group, info);
646
647                 if (ret < 0) {
648                         goto out;
649                 } else if (ret) {
650                         ret = 0;
651                         goto out;
652                 }
653         }
654
655         if (right_info && !right_info->bitmap) {
656                 unlink_free_space(block_group, right_info);
657                 info->bytes += right_info->bytes;
658                 kfree(right_info);
659         }
660
661         if (left_info && !left_info->bitmap &&
662             left_info->offset + left_info->bytes == offset) {
663                 unlink_free_space(block_group, left_info);
664                 info->offset = left_info->offset;
665                 info->bytes += left_info->bytes;
666                 kfree(left_info);
667         }
668
669         ret = link_free_space(block_group, info);
670         if (ret)
671                 kfree(info);
672 out:
673         spin_unlock(&block_group->tree_lock);
674
675         if (ret) {
676                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
677                 BUG_ON(ret == -EEXIST);
678         }
679
680         return ret;
681 }
682
683 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
684                             u64 offset, u64 bytes)
685 {
686         struct btrfs_free_space *info;
687         struct btrfs_free_space *next_info = NULL;
688         int ret = 0;
689
690         spin_lock(&block_group->tree_lock);
691
692 again:
693         info = tree_search_offset(block_group, offset, 0, 0);
694         if (!info) {
695                 /*
696                  * oops didn't find an extent that matched the space we wanted
697                  * to remove, look for a bitmap instead
698                  */
699                 info = tree_search_offset(block_group,
700                                           offset_to_bitmap(block_group, offset),
701                                           1, 0);
702                 if (!info) {
703                         WARN_ON(1);
704                         goto out_lock;
705                 }
706         }
707
708         if (info->bytes < bytes && rb_next(&info->offset_index)) {
709                 u64 end;
710                 next_info = rb_entry(rb_next(&info->offset_index),
711                                              struct btrfs_free_space,
712                                              offset_index);
713
714                 if (next_info->bitmap)
715                         end = next_info->offset + BITS_PER_BITMAP *
716                                 block_group->sectorsize - 1;
717                 else
718                         end = next_info->offset + next_info->bytes;
719
720                 if (next_info->bytes < bytes ||
721                     next_info->offset > offset || offset > end) {
722                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
723                               " trying to use %llu\n",
724                               (unsigned long long)info->offset,
725                               (unsigned long long)info->bytes,
726                               (unsigned long long)bytes);
727                         WARN_ON(1);
728                         ret = -EINVAL;
729                         goto out_lock;
730                 }
731
732                 info = next_info;
733         }
734
735         if (info->bytes == bytes) {
736                 unlink_free_space(block_group, info);
737                 if (info->bitmap) {
738                         kfree(info->bitmap);
739                         block_group->total_bitmaps--;
740                 }
741                 kfree(info);
742                 goto out_lock;
743         }
744
745         if (!info->bitmap && info->offset == offset) {
746                 unlink_free_space(block_group, info);
747                 info->offset += bytes;
748                 info->bytes -= bytes;
749                 link_free_space(block_group, info);
750                 goto out_lock;
751         }
752
753         if (!info->bitmap && info->offset <= offset &&
754             info->offset + info->bytes >= offset + bytes) {
755                 u64 old_start = info->offset;
756                 /*
757                  * we're freeing space in the middle of the info,
758                  * this can happen during tree log replay
759                  *
760                  * first unlink the old info and then
761                  * insert it again after the hole we're creating
762                  */
763                 unlink_free_space(block_group, info);
764                 if (offset + bytes < info->offset + info->bytes) {
765                         u64 old_end = info->offset + info->bytes;
766
767                         info->offset = offset + bytes;
768                         info->bytes = old_end - info->offset;
769                         ret = link_free_space(block_group, info);
770                         WARN_ON(ret);
771                         if (ret)
772                                 goto out_lock;
773                 } else {
774                         /* the hole we're creating ends at the end
775                          * of the info struct, just free the info
776                          */
777                         kfree(info);
778                 }
779                 spin_unlock(&block_group->tree_lock);
780
781                 /* step two, insert a new info struct to cover
782                  * anything before the hole
783                  */
784                 ret = btrfs_add_free_space(block_group, old_start,
785                                            offset - old_start);
786                 WARN_ON(ret);
787                 goto out;
788         }
789
790         ret = remove_from_bitmap(block_group, info, &offset, &bytes);
791         if (ret == -EAGAIN)
792                 goto again;
793         BUG_ON(ret);
794 out_lock:
795         spin_unlock(&block_group->tree_lock);
796 out:
797         return ret;
798 }
799
800 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
801                            u64 bytes)
802 {
803         struct btrfs_free_space *info;
804         struct rb_node *n;
805         int count = 0;
806
807         for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
808                 info = rb_entry(n, struct btrfs_free_space, offset_index);
809                 if (info->bytes >= bytes)
810                         count++;
811                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
812                        (unsigned long long)info->offset,
813                        (unsigned long long)info->bytes,
814                        (info->bitmap) ? "yes" : "no");
815         }
816         printk(KERN_INFO "block group has cluster?: %s\n",
817                list_empty(&block_group->cluster_list) ? "no" : "yes");
818         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
819                "\n", count);
820 }
821
822 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
823 {
824         struct btrfs_free_space *info;
825         struct rb_node *n;
826         u64 ret = 0;
827
828         for (n = rb_first(&block_group->free_space_offset); n;
829              n = rb_next(n)) {
830                 info = rb_entry(n, struct btrfs_free_space, offset_index);
831                 ret += info->bytes;
832         }
833
834         return ret;
835 }
836
837 /*
838  * for a given cluster, put all of its extents back into the free
839  * space cache.  If the block group passed doesn't match the block group
840  * pointed to by the cluster, someone else raced in and freed the
841  * cluster already.  In that case, we just return without changing anything
842  */
843 static int
844 __btrfs_return_cluster_to_free_space(
845                              struct btrfs_block_group_cache *block_group,
846                              struct btrfs_free_cluster *cluster)
847 {
848         struct btrfs_free_space *entry;
849         struct rb_node *node;
850         bool bitmap;
851
852         spin_lock(&cluster->lock);
853         if (cluster->block_group != block_group)
854                 goto out;
855
856         bitmap = cluster->points_to_bitmap;
857         cluster->block_group = NULL;
858         cluster->window_start = 0;
859         list_del_init(&cluster->block_group_list);
860         cluster->points_to_bitmap = false;
861
862         if (bitmap)
863                 goto out;
864
865         node = rb_first(&cluster->root);
866         while (node) {
867                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
868                 node = rb_next(&entry->offset_index);
869                 rb_erase(&entry->offset_index, &cluster->root);
870                 BUG_ON(entry->bitmap);
871                 tree_insert_offset(&block_group->free_space_offset,
872                                    entry->offset, &entry->offset_index, 0);
873         }
874         cluster->root = RB_ROOT;
875
876 out:
877         spin_unlock(&cluster->lock);
878         btrfs_put_block_group(block_group);
879         return 0;
880 }
881
882 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
883 {
884         struct btrfs_free_space *info;
885         struct rb_node *node;
886         struct btrfs_free_cluster *cluster;
887         struct list_head *head;
888
889         spin_lock(&block_group->tree_lock);
890         while ((head = block_group->cluster_list.next) !=
891                &block_group->cluster_list) {
892                 cluster = list_entry(head, struct btrfs_free_cluster,
893                                      block_group_list);
894
895                 WARN_ON(cluster->block_group != block_group);
896                 __btrfs_return_cluster_to_free_space(block_group, cluster);
897                 if (need_resched()) {
898                         spin_unlock(&block_group->tree_lock);
899                         cond_resched();
900                         spin_lock(&block_group->tree_lock);
901                 }
902         }
903
904         while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
905                 info = rb_entry(node, struct btrfs_free_space, offset_index);
906                 unlink_free_space(block_group, info);
907                 if (info->bitmap)
908                         kfree(info->bitmap);
909                 kfree(info);
910                 if (need_resched()) {
911                         spin_unlock(&block_group->tree_lock);
912                         cond_resched();
913                         spin_lock(&block_group->tree_lock);
914                 }
915         }
916
917         spin_unlock(&block_group->tree_lock);
918 }
919
920 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
921                                u64 offset, u64 bytes, u64 empty_size)
922 {
923         struct btrfs_free_space *entry = NULL;
924         u64 bytes_search = bytes + empty_size;
925         u64 ret = 0;
926
927         spin_lock(&block_group->tree_lock);
928         entry = find_free_space(block_group, &offset, &bytes_search, 0);
929         if (!entry)
930                 goto out;
931
932         ret = offset;
933         if (entry->bitmap) {
934                 bitmap_clear_bits(block_group, entry, offset, bytes);
935                 if (!entry->bytes) {
936                         unlink_free_space(block_group, entry);
937                         kfree(entry->bitmap);
938                         kfree(entry);
939                         block_group->total_bitmaps--;
940                         recalculate_thresholds(block_group);
941                 }
942         } else {
943                 unlink_free_space(block_group, entry);
944                 entry->offset += bytes;
945                 entry->bytes -= bytes;
946                 if (!entry->bytes)
947                         kfree(entry);
948                 else
949                         link_free_space(block_group, entry);
950         }
951
952 out:
953         spin_unlock(&block_group->tree_lock);
954
955         return ret;
956 }
957
958 /*
959  * given a cluster, put all of its extents back into the free space
960  * cache.  If a block group is passed, this function will only free
961  * a cluster that belongs to the passed block group.
962  *
963  * Otherwise, it'll get a reference on the block group pointed to by the
964  * cluster and remove the cluster from it.
965  */
966 int btrfs_return_cluster_to_free_space(
967                                struct btrfs_block_group_cache *block_group,
968                                struct btrfs_free_cluster *cluster)
969 {
970         int ret;
971
972         /* first, get a safe pointer to the block group */
973         spin_lock(&cluster->lock);
974         if (!block_group) {
975                 block_group = cluster->block_group;
976                 if (!block_group) {
977                         spin_unlock(&cluster->lock);
978                         return 0;
979                 }
980         } else if (cluster->block_group != block_group) {
981                 /* someone else has already freed it don't redo their work */
982                 spin_unlock(&cluster->lock);
983                 return 0;
984         }
985         atomic_inc(&block_group->count);
986         spin_unlock(&cluster->lock);
987
988         /* now return any extents the cluster had on it */
989         spin_lock(&block_group->tree_lock);
990         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
991         spin_unlock(&block_group->tree_lock);
992
993         /* finally drop our ref */
994         btrfs_put_block_group(block_group);
995         return ret;
996 }
997
998 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
999                                    struct btrfs_free_cluster *cluster,
1000                                    u64 bytes, u64 min_start)
1001 {
1002         struct btrfs_free_space *entry;
1003         int err;
1004         u64 search_start = cluster->window_start;
1005         u64 search_bytes = bytes;
1006         u64 ret = 0;
1007
1008         spin_lock(&block_group->tree_lock);
1009         spin_lock(&cluster->lock);
1010
1011         if (!cluster->points_to_bitmap)
1012                 goto out;
1013
1014         if (cluster->block_group != block_group)
1015                 goto out;
1016
1017         /*
1018          * search_start is the beginning of the bitmap, but at some point it may
1019          * be a good idea to point to the actual start of the free area in the
1020          * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1021          * to 1 to make sure we get the bitmap entry
1022          */
1023         entry = tree_search_offset(block_group,
1024                                    offset_to_bitmap(block_group, search_start),
1025                                    1, 0);
1026         if (!entry || !entry->bitmap)
1027                 goto out;
1028
1029         search_start = min_start;
1030         search_bytes = bytes;
1031
1032         err = search_bitmap(block_group, entry, &search_start,
1033                             &search_bytes);
1034         if (err)
1035                 goto out;
1036
1037         ret = search_start;
1038         bitmap_clear_bits(block_group, entry, ret, bytes);
1039 out:
1040         spin_unlock(&cluster->lock);
1041         spin_unlock(&block_group->tree_lock);
1042
1043         return ret;
1044 }
1045
1046 /*
1047  * given a cluster, try to allocate 'bytes' from it, returns 0
1048  * if it couldn't find anything suitably large, or a logical disk offset
1049  * if things worked out
1050  */
1051 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1052                              struct btrfs_free_cluster *cluster, u64 bytes,
1053                              u64 min_start)
1054 {
1055         struct btrfs_free_space *entry = NULL;
1056         struct rb_node *node;
1057         u64 ret = 0;
1058
1059         if (cluster->points_to_bitmap)
1060                 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1061                                                min_start);
1062
1063         spin_lock(&cluster->lock);
1064         if (bytes > cluster->max_size)
1065                 goto out;
1066
1067         if (cluster->block_group != block_group)
1068                 goto out;
1069
1070         node = rb_first(&cluster->root);
1071         if (!node)
1072                 goto out;
1073
1074         entry = rb_entry(node, struct btrfs_free_space, offset_index);
1075
1076         while(1) {
1077                 if (entry->bytes < bytes || entry->offset < min_start) {
1078                         struct rb_node *node;
1079
1080                         node = rb_next(&entry->offset_index);
1081                         if (!node)
1082                                 break;
1083                         entry = rb_entry(node, struct btrfs_free_space,
1084                                          offset_index);
1085                         continue;
1086                 }
1087                 ret = entry->offset;
1088
1089                 entry->offset += bytes;
1090                 entry->bytes -= bytes;
1091
1092                 if (entry->bytes == 0) {
1093                         rb_erase(&entry->offset_index, &cluster->root);
1094                         kfree(entry);
1095                 }
1096                 break;
1097         }
1098 out:
1099         spin_unlock(&cluster->lock);
1100
1101         return ret;
1102 }
1103
1104 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1105                                 struct btrfs_free_space *entry,
1106                                 struct btrfs_free_cluster *cluster,
1107                                 u64 offset, u64 bytes, u64 min_bytes)
1108 {
1109         unsigned long next_zero;
1110         unsigned long i;
1111         unsigned long search_bits;
1112         unsigned long total_bits;
1113         unsigned long found_bits;
1114         unsigned long start = 0;
1115         unsigned long total_found = 0;
1116         bool found = false;
1117
1118         i = offset_to_bit(entry->offset, block_group->sectorsize,
1119                           max_t(u64, offset, entry->offset));
1120         search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1121         total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1122
1123 again:
1124         found_bits = 0;
1125         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1126              i < BITS_PER_BITMAP;
1127              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1128                 next_zero = find_next_zero_bit(entry->bitmap,
1129                                                BITS_PER_BITMAP, i);
1130                 if (next_zero - i >= search_bits) {
1131                         found_bits = next_zero - i;
1132                         break;
1133                 }
1134                 i = next_zero;
1135         }
1136
1137         if (!found_bits)
1138                 return -1;
1139
1140         if (!found) {
1141                 start = i;
1142                 found = true;
1143         }
1144
1145         total_found += found_bits;
1146
1147         if (cluster->max_size < found_bits * block_group->sectorsize)
1148                 cluster->max_size = found_bits * block_group->sectorsize;
1149
1150         if (total_found < total_bits) {
1151                 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1152                 if (i - start > total_bits * 2) {
1153                         total_found = 0;
1154                         cluster->max_size = 0;
1155                         found = false;
1156                 }
1157                 goto again;
1158         }
1159
1160         cluster->window_start = start * block_group->sectorsize +
1161                 entry->offset;
1162         cluster->points_to_bitmap = true;
1163
1164         return 0;
1165 }
1166
1167 /*
1168  * here we try to find a cluster of blocks in a block group.  The goal
1169  * is to find at least bytes free and up to empty_size + bytes free.
1170  * We might not find them all in one contiguous area.
1171  *
1172  * returns zero and sets up cluster if things worked out, otherwise
1173  * it returns -enospc
1174  */
1175 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
1176                              struct btrfs_root *root,
1177                              struct btrfs_block_group_cache *block_group,
1178                              struct btrfs_free_cluster *cluster,
1179                              u64 offset, u64 bytes, u64 empty_size)
1180 {
1181         struct btrfs_free_space *entry = NULL;
1182         struct rb_node *node;
1183         struct btrfs_free_space *next;
1184         struct btrfs_free_space *last = NULL;
1185         u64 min_bytes;
1186         u64 window_start;
1187         u64 window_free;
1188         u64 max_extent = 0;
1189         bool found_bitmap = false;
1190         int ret;
1191
1192         /* for metadata, allow allocates with more holes */
1193         if (btrfs_test_opt(root, SSD_SPREAD)) {
1194                 min_bytes = bytes + empty_size;
1195         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1196                 /*
1197                  * we want to do larger allocations when we are
1198                  * flushing out the delayed refs, it helps prevent
1199                  * making more work as we go along.
1200                  */
1201                 if (trans->transaction->delayed_refs.flushing)
1202                         min_bytes = max(bytes, (bytes + empty_size) >> 1);
1203                 else
1204                         min_bytes = max(bytes, (bytes + empty_size) >> 4);
1205         } else
1206                 min_bytes = max(bytes, (bytes + empty_size) >> 2);
1207
1208         spin_lock(&block_group->tree_lock);
1209         spin_lock(&cluster->lock);
1210
1211         /* someone already found a cluster, hooray */
1212         if (cluster->block_group) {
1213                 ret = 0;
1214                 goto out;
1215         }
1216 again:
1217         entry = tree_search_offset(block_group, offset, found_bitmap, 1);
1218         if (!entry) {
1219                 ret = -ENOSPC;
1220                 goto out;
1221         }
1222
1223         /*
1224          * If found_bitmap is true, we exhausted our search for extent entries,
1225          * and we just want to search all of the bitmaps that we can find, and
1226          * ignore any extent entries we find.
1227          */
1228         while (entry->bitmap || found_bitmap ||
1229                (!entry->bitmap && entry->bytes < min_bytes)) {
1230                 struct rb_node *node = rb_next(&entry->offset_index);
1231
1232                 if (entry->bitmap && entry->bytes > bytes + empty_size) {
1233                         ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1234                                                    offset, bytes + empty_size,
1235                                                    min_bytes);
1236                         if (!ret)
1237                                 goto got_it;
1238                 }
1239
1240                 if (!node) {
1241                         ret = -ENOSPC;
1242                         goto out;
1243                 }
1244                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1245         }
1246
1247         /*
1248          * We already searched all the extent entries from the passed in offset
1249          * to the end and didn't find enough space for the cluster, and we also
1250          * didn't find any bitmaps that met our criteria, just go ahead and exit
1251          */
1252         if (found_bitmap) {
1253                 ret = -ENOSPC;
1254                 goto out;
1255         }
1256
1257         cluster->points_to_bitmap = false;
1258         window_start = entry->offset;
1259         window_free = entry->bytes;
1260         last = entry;
1261         max_extent = entry->bytes;
1262
1263         while (1) {
1264                 /* out window is just right, lets fill it */
1265                 if (window_free >= bytes + empty_size)
1266                         break;
1267
1268                 node = rb_next(&last->offset_index);
1269                 if (!node) {
1270                         if (found_bitmap)
1271                                 goto again;
1272                         ret = -ENOSPC;
1273                         goto out;
1274                 }
1275                 next = rb_entry(node, struct btrfs_free_space, offset_index);
1276
1277                 /*
1278                  * we found a bitmap, so if this search doesn't result in a
1279                  * cluster, we know to go and search again for the bitmaps and
1280                  * start looking for space there
1281                  */
1282                 if (next->bitmap) {
1283                         if (!found_bitmap)
1284                                 offset = next->offset;
1285                         found_bitmap = true;
1286                         last = next;
1287                         continue;
1288                 }
1289
1290                 /*
1291                  * we haven't filled the empty size and the window is
1292                  * very large.  reset and try again
1293                  */
1294                 if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
1295                     next->offset - window_start > (bytes + empty_size) * 2) {
1296                         entry = next;
1297                         window_start = entry->offset;
1298                         window_free = entry->bytes;
1299                         last = entry;
1300                         max_extent = entry->bytes;
1301                 } else {
1302                         last = next;
1303                         window_free += next->bytes;
1304                         if (entry->bytes > max_extent)
1305                                 max_extent = entry->bytes;
1306                 }
1307         }
1308
1309         cluster->window_start = entry->offset;
1310
1311         /*
1312          * now we've found our entries, pull them out of the free space
1313          * cache and put them into the cluster rbtree
1314          *
1315          * The cluster includes an rbtree, but only uses the offset index
1316          * of each free space cache entry.
1317          */
1318         while (1) {
1319                 node = rb_next(&entry->offset_index);
1320                 if (entry->bitmap && node) {
1321                         entry = rb_entry(node, struct btrfs_free_space,
1322                                          offset_index);
1323                         continue;
1324                 } else if (entry->bitmap && !node) {
1325                         break;
1326                 }
1327
1328                 rb_erase(&entry->offset_index, &block_group->free_space_offset);
1329                 ret = tree_insert_offset(&cluster->root, entry->offset,
1330                                          &entry->offset_index, 0);
1331                 BUG_ON(ret);
1332
1333                 if (!node || entry == last)
1334                         break;
1335
1336                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1337         }
1338
1339         cluster->max_size = max_extent;
1340 got_it:
1341         ret = 0;
1342         atomic_inc(&block_group->count);
1343         list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
1344         cluster->block_group = block_group;
1345 out:
1346         spin_unlock(&cluster->lock);
1347         spin_unlock(&block_group->tree_lock);
1348
1349         return ret;
1350 }
1351
1352 /*
1353  * simple code to zero out a cluster
1354  */
1355 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1356 {
1357         spin_lock_init(&cluster->lock);
1358         spin_lock_init(&cluster->refill_lock);
1359         cluster->root = RB_ROOT;
1360         cluster->max_size = 0;
1361         cluster->points_to_bitmap = false;
1362         INIT_LIST_HEAD(&cluster->block_group_list);
1363         cluster->block_group = NULL;
1364 }
1365