Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph...
[pandora-kernel.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include "ctree.h"
24 #include "free-space-cache.h"
25 #include "transaction.h"
26 #include "disk-io.h"
27 #include "extent_io.h"
28
29 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
30 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
31
32 static void recalculate_thresholds(struct btrfs_block_group_cache
33                                    *block_group);
34 static int link_free_space(struct btrfs_block_group_cache *block_group,
35                            struct btrfs_free_space *info);
36
37 struct inode *lookup_free_space_inode(struct btrfs_root *root,
38                                       struct btrfs_block_group_cache
39                                       *block_group, struct btrfs_path *path)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         spin_lock(&block_group->lock);
50         if (block_group->inode)
51                 inode = igrab(block_group->inode);
52         spin_unlock(&block_group->lock);
53         if (inode)
54                 return inode;
55
56         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
57         key.offset = block_group->key.objectid;
58         key.type = 0;
59
60         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
61         if (ret < 0)
62                 return ERR_PTR(ret);
63         if (ret > 0) {
64                 btrfs_release_path(root, path);
65                 return ERR_PTR(-ENOENT);
66         }
67
68         leaf = path->nodes[0];
69         header = btrfs_item_ptr(leaf, path->slots[0],
70                                 struct btrfs_free_space_header);
71         btrfs_free_space_key(leaf, header, &disk_key);
72         btrfs_disk_key_to_cpu(&location, &disk_key);
73         btrfs_release_path(root, path);
74
75         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
76         if (!inode)
77                 return ERR_PTR(-ENOENT);
78         if (IS_ERR(inode))
79                 return inode;
80         if (is_bad_inode(inode)) {
81                 iput(inode);
82                 return ERR_PTR(-ENOENT);
83         }
84
85         inode->i_mapping->flags &= ~__GFP_FS;
86
87         spin_lock(&block_group->lock);
88         if (!root->fs_info->closing) {
89                 block_group->inode = igrab(inode);
90                 block_group->iref = 1;
91         }
92         spin_unlock(&block_group->lock);
93
94         return inode;
95 }
96
97 int create_free_space_inode(struct btrfs_root *root,
98                             struct btrfs_trans_handle *trans,
99                             struct btrfs_block_group_cache *block_group,
100                             struct btrfs_path *path)
101 {
102         struct btrfs_key key;
103         struct btrfs_disk_key disk_key;
104         struct btrfs_free_space_header *header;
105         struct btrfs_inode_item *inode_item;
106         struct extent_buffer *leaf;
107         u64 objectid;
108         int ret;
109
110         ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
111         if (ret < 0)
112                 return ret;
113
114         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
115         if (ret)
116                 return ret;
117
118         leaf = path->nodes[0];
119         inode_item = btrfs_item_ptr(leaf, path->slots[0],
120                                     struct btrfs_inode_item);
121         btrfs_item_key(leaf, &disk_key, path->slots[0]);
122         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
123                              sizeof(*inode_item));
124         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
125         btrfs_set_inode_size(leaf, inode_item, 0);
126         btrfs_set_inode_nbytes(leaf, inode_item, 0);
127         btrfs_set_inode_uid(leaf, inode_item, 0);
128         btrfs_set_inode_gid(leaf, inode_item, 0);
129         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
130         btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
131                               BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
132         btrfs_set_inode_nlink(leaf, inode_item, 1);
133         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
134         btrfs_set_inode_block_group(leaf, inode_item,
135                                     block_group->key.objectid);
136         btrfs_mark_buffer_dirty(leaf);
137         btrfs_release_path(root, path);
138
139         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
140         key.offset = block_group->key.objectid;
141         key.type = 0;
142
143         ret = btrfs_insert_empty_item(trans, root, path, &key,
144                                       sizeof(struct btrfs_free_space_header));
145         if (ret < 0) {
146                 btrfs_release_path(root, path);
147                 return ret;
148         }
149         leaf = path->nodes[0];
150         header = btrfs_item_ptr(leaf, path->slots[0],
151                                 struct btrfs_free_space_header);
152         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
153         btrfs_set_free_space_key(leaf, header, &disk_key);
154         btrfs_mark_buffer_dirty(leaf);
155         btrfs_release_path(root, path);
156
157         return 0;
158 }
159
160 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
161                                     struct btrfs_trans_handle *trans,
162                                     struct btrfs_path *path,
163                                     struct inode *inode)
164 {
165         loff_t oldsize;
166         int ret = 0;
167
168         trans->block_rsv = root->orphan_block_rsv;
169         ret = btrfs_block_rsv_check(trans, root,
170                                     root->orphan_block_rsv,
171                                     0, 5);
172         if (ret)
173                 return ret;
174
175         oldsize = i_size_read(inode);
176         btrfs_i_size_write(inode, 0);
177         truncate_pagecache(inode, oldsize, 0);
178
179         /*
180          * We don't need an orphan item because truncating the free space cache
181          * will never be split across transactions.
182          */
183         ret = btrfs_truncate_inode_items(trans, root, inode,
184                                          0, BTRFS_EXTENT_DATA_KEY);
185         if (ret) {
186                 WARN_ON(1);
187                 return ret;
188         }
189
190         return btrfs_update_inode(trans, root, inode);
191 }
192
193 static int readahead_cache(struct inode *inode)
194 {
195         struct file_ra_state *ra;
196         unsigned long last_index;
197
198         ra = kzalloc(sizeof(*ra), GFP_NOFS);
199         if (!ra)
200                 return -ENOMEM;
201
202         file_ra_state_init(ra, inode->i_mapping);
203         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
204
205         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
206
207         kfree(ra);
208
209         return 0;
210 }
211
212 int load_free_space_cache(struct btrfs_fs_info *fs_info,
213                           struct btrfs_block_group_cache *block_group)
214 {
215         struct btrfs_root *root = fs_info->tree_root;
216         struct inode *inode;
217         struct btrfs_free_space_header *header;
218         struct extent_buffer *leaf;
219         struct page *page;
220         struct btrfs_path *path;
221         u32 *checksums = NULL, *crc;
222         char *disk_crcs = NULL;
223         struct btrfs_key key;
224         struct list_head bitmaps;
225         u64 num_entries;
226         u64 num_bitmaps;
227         u64 generation;
228         u64 used = btrfs_block_group_used(&block_group->item);
229         u32 cur_crc = ~(u32)0;
230         pgoff_t index = 0;
231         unsigned long first_page_offset;
232         int num_checksums;
233         int ret = 0;
234
235         /*
236          * If we're unmounting then just return, since this does a search on the
237          * normal root and not the commit root and we could deadlock.
238          */
239         smp_mb();
240         if (fs_info->closing)
241                 return 0;
242
243         /*
244          * If this block group has been marked to be cleared for one reason or
245          * another then we can't trust the on disk cache, so just return.
246          */
247         spin_lock(&block_group->lock);
248         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
249                 spin_unlock(&block_group->lock);
250                 return 0;
251         }
252         spin_unlock(&block_group->lock);
253
254         INIT_LIST_HEAD(&bitmaps);
255
256         path = btrfs_alloc_path();
257         if (!path)
258                 return 0;
259
260         inode = lookup_free_space_inode(root, block_group, path);
261         if (IS_ERR(inode)) {
262                 btrfs_free_path(path);
263                 return 0;
264         }
265
266         /* Nothing in the space cache, goodbye */
267         if (!i_size_read(inode)) {
268                 btrfs_free_path(path);
269                 goto out;
270         }
271
272         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
273         key.offset = block_group->key.objectid;
274         key.type = 0;
275
276         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
277         if (ret) {
278                 btrfs_free_path(path);
279                 goto out;
280         }
281
282         leaf = path->nodes[0];
283         header = btrfs_item_ptr(leaf, path->slots[0],
284                                 struct btrfs_free_space_header);
285         num_entries = btrfs_free_space_entries(leaf, header);
286         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
287         generation = btrfs_free_space_generation(leaf, header);
288         btrfs_free_path(path);
289
290         if (BTRFS_I(inode)->generation != generation) {
291                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
292                        " not match free space cache generation (%llu) for "
293                        "block group %llu\n",
294                        (unsigned long long)BTRFS_I(inode)->generation,
295                        (unsigned long long)generation,
296                        (unsigned long long)block_group->key.objectid);
297                 goto free_cache;
298         }
299
300         if (!num_entries)
301                 goto out;
302
303         /* Setup everything for doing checksumming */
304         num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
305         checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
306         if (!checksums)
307                 goto out;
308         first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
309         disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
310         if (!disk_crcs)
311                 goto out;
312
313         ret = readahead_cache(inode);
314         if (ret) {
315                 ret = 0;
316                 goto out;
317         }
318
319         while (1) {
320                 struct btrfs_free_space_entry *entry;
321                 struct btrfs_free_space *e;
322                 void *addr;
323                 unsigned long offset = 0;
324                 unsigned long start_offset = 0;
325                 int need_loop = 0;
326
327                 if (!num_entries && !num_bitmaps)
328                         break;
329
330                 if (index == 0) {
331                         start_offset = first_page_offset;
332                         offset = start_offset;
333                 }
334
335                 page = grab_cache_page(inode->i_mapping, index);
336                 if (!page) {
337                         ret = 0;
338                         goto free_cache;
339                 }
340
341                 if (!PageUptodate(page)) {
342                         btrfs_readpage(NULL, page);
343                         lock_page(page);
344                         if (!PageUptodate(page)) {
345                                 unlock_page(page);
346                                 page_cache_release(page);
347                                 printk(KERN_ERR "btrfs: error reading free "
348                                        "space cache: %llu\n",
349                                        (unsigned long long)
350                                        block_group->key.objectid);
351                                 goto free_cache;
352                         }
353                 }
354                 addr = kmap(page);
355
356                 if (index == 0) {
357                         u64 *gen;
358
359                         memcpy(disk_crcs, addr, first_page_offset);
360                         gen = addr + (sizeof(u32) * num_checksums);
361                         if (*gen != BTRFS_I(inode)->generation) {
362                                 printk(KERN_ERR "btrfs: space cache generation"
363                                        " (%llu) does not match inode (%llu) "
364                                        "for block group %llu\n",
365                                        (unsigned long long)*gen,
366                                        (unsigned long long)
367                                        BTRFS_I(inode)->generation,
368                                        (unsigned long long)
369                                        block_group->key.objectid);
370                                 kunmap(page);
371                                 unlock_page(page);
372                                 page_cache_release(page);
373                                 goto free_cache;
374                         }
375                         crc = (u32 *)disk_crcs;
376                 }
377                 entry = addr + start_offset;
378
379                 /* First lets check our crc before we do anything fun */
380                 cur_crc = ~(u32)0;
381                 cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
382                                           PAGE_CACHE_SIZE - start_offset);
383                 btrfs_csum_final(cur_crc, (char *)&cur_crc);
384                 if (cur_crc != *crc) {
385                         printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
386                                "block group %llu\n", index,
387                                (unsigned long long)block_group->key.objectid);
388                         kunmap(page);
389                         unlock_page(page);
390                         page_cache_release(page);
391                         goto free_cache;
392                 }
393                 crc++;
394
395                 while (1) {
396                         if (!num_entries)
397                                 break;
398
399                         need_loop = 1;
400                         e = kmem_cache_zalloc(btrfs_free_space_cachep,
401                                               GFP_NOFS);
402                         if (!e) {
403                                 kunmap(page);
404                                 unlock_page(page);
405                                 page_cache_release(page);
406                                 goto free_cache;
407                         }
408
409                         e->offset = le64_to_cpu(entry->offset);
410                         e->bytes = le64_to_cpu(entry->bytes);
411                         if (!e->bytes) {
412                                 kunmap(page);
413                                 kmem_cache_free(btrfs_free_space_cachep, e);
414                                 unlock_page(page);
415                                 page_cache_release(page);
416                                 goto free_cache;
417                         }
418
419                         if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
420                                 spin_lock(&block_group->tree_lock);
421                                 ret = link_free_space(block_group, e);
422                                 spin_unlock(&block_group->tree_lock);
423                                 BUG_ON(ret);
424                         } else {
425                                 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
426                                 if (!e->bitmap) {
427                                         kunmap(page);
428                                         kmem_cache_free(
429                                                 btrfs_free_space_cachep, e);
430                                         unlock_page(page);
431                                         page_cache_release(page);
432                                         goto free_cache;
433                                 }
434                                 spin_lock(&block_group->tree_lock);
435                                 ret = link_free_space(block_group, e);
436                                 block_group->total_bitmaps++;
437                                 recalculate_thresholds(block_group);
438                                 spin_unlock(&block_group->tree_lock);
439                                 list_add_tail(&e->list, &bitmaps);
440                         }
441
442                         num_entries--;
443                         offset += sizeof(struct btrfs_free_space_entry);
444                         if (offset + sizeof(struct btrfs_free_space_entry) >=
445                             PAGE_CACHE_SIZE)
446                                 break;
447                         entry++;
448                 }
449
450                 /*
451                  * We read an entry out of this page, we need to move on to the
452                  * next page.
453                  */
454                 if (need_loop) {
455                         kunmap(page);
456                         goto next;
457                 }
458
459                 /*
460                  * We add the bitmaps at the end of the entries in order that
461                  * the bitmap entries are added to the cache.
462                  */
463                 e = list_entry(bitmaps.next, struct btrfs_free_space, list);
464                 list_del_init(&e->list);
465                 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
466                 kunmap(page);
467                 num_bitmaps--;
468 next:
469                 unlock_page(page);
470                 page_cache_release(page);
471                 index++;
472         }
473
474         spin_lock(&block_group->tree_lock);
475         if (block_group->free_space != (block_group->key.offset - used -
476                                         block_group->bytes_super)) {
477                 spin_unlock(&block_group->tree_lock);
478                 printk(KERN_ERR "block group %llu has an wrong amount of free "
479                        "space\n", block_group->key.objectid);
480                 ret = 0;
481                 goto free_cache;
482         }
483         spin_unlock(&block_group->tree_lock);
484
485         ret = 1;
486 out:
487         kfree(checksums);
488         kfree(disk_crcs);
489         iput(inode);
490         return ret;
491
492 free_cache:
493         /* This cache is bogus, make sure it gets cleared */
494         spin_lock(&block_group->lock);
495         block_group->disk_cache_state = BTRFS_DC_CLEAR;
496         spin_unlock(&block_group->lock);
497         btrfs_remove_free_space_cache(block_group);
498         goto out;
499 }
500
501 int btrfs_write_out_cache(struct btrfs_root *root,
502                           struct btrfs_trans_handle *trans,
503                           struct btrfs_block_group_cache *block_group,
504                           struct btrfs_path *path)
505 {
506         struct btrfs_free_space_header *header;
507         struct extent_buffer *leaf;
508         struct inode *inode;
509         struct rb_node *node;
510         struct list_head *pos, *n;
511         struct page **pages;
512         struct page *page;
513         struct extent_state *cached_state = NULL;
514         struct btrfs_free_cluster *cluster = NULL;
515         struct extent_io_tree *unpin = NULL;
516         struct list_head bitmap_list;
517         struct btrfs_key key;
518         u64 start, end, len;
519         u64 bytes = 0;
520         u32 *crc, *checksums;
521         unsigned long first_page_offset;
522         int index = 0, num_pages = 0;
523         int entries = 0;
524         int bitmaps = 0;
525         int ret = 0;
526         bool next_page = false;
527         bool out_of_space = false;
528
529         root = root->fs_info->tree_root;
530
531         INIT_LIST_HEAD(&bitmap_list);
532
533         spin_lock(&block_group->lock);
534         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
535                 spin_unlock(&block_group->lock);
536                 return 0;
537         }
538         spin_unlock(&block_group->lock);
539
540         inode = lookup_free_space_inode(root, block_group, path);
541         if (IS_ERR(inode))
542                 return 0;
543
544         if (!i_size_read(inode)) {
545                 iput(inode);
546                 return 0;
547         }
548
549         node = rb_first(&block_group->free_space_offset);
550         if (!node) {
551                 iput(inode);
552                 return 0;
553         }
554
555         num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556                 PAGE_CACHE_SHIFT;
557         filemap_write_and_wait(inode->i_mapping);
558         btrfs_wait_ordered_range(inode, inode->i_size &
559                                  ~(root->sectorsize - 1), (u64)-1);
560
561         /* We need a checksum per page. */
562         crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
563         if (!crc) {
564                 iput(inode);
565                 return 0;
566         }
567
568         pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569         if (!pages) {
570                 kfree(crc);
571                 iput(inode);
572                 return 0;
573         }
574
575         /* Since the first page has all of our checksums and our generation we
576          * need to calculate the offset into the page that we can start writing
577          * our entries.
578          */
579         first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580
581         /* Get the cluster for this block_group if it exists */
582         if (!list_empty(&block_group->cluster_list))
583                 cluster = list_entry(block_group->cluster_list.next,
584                                      struct btrfs_free_cluster,
585                                      block_group_list);
586
587         /*
588          * We shouldn't have switched the pinned extents yet so this is the
589          * right one
590          */
591         unpin = root->fs_info->pinned_extents;
592
593         /*
594          * Lock all pages first so we can lock the extent safely.
595          *
596          * NOTE: Because we hold the ref the entire time we're going to write to
597          * the page find_get_page should never fail, so we don't do a check
598          * after find_get_page at this point.  Just putting this here so people
599          * know and don't freak out.
600          */
601         while (index < num_pages) {
602                 page = grab_cache_page(inode->i_mapping, index);
603                 if (!page) {
604                         int i;
605
606                         for (i = 0; i < num_pages; i++) {
607                                 unlock_page(pages[i]);
608                                 page_cache_release(pages[i]);
609                         }
610                         goto out_free;
611                 }
612                 pages[index] = page;
613                 index++;
614         }
615
616         index = 0;
617         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
618                          0, &cached_state, GFP_NOFS);
619
620         /*
621          * When searching for pinned extents, we need to start at our start
622          * offset.
623          */
624         start = block_group->key.objectid;
625
626         /* Write out the extent entries */
627         do {
628                 struct btrfs_free_space_entry *entry;
629                 void *addr;
630                 unsigned long offset = 0;
631                 unsigned long start_offset = 0;
632
633                 next_page = false;
634
635                 if (index == 0) {
636                         start_offset = first_page_offset;
637                         offset = start_offset;
638                 }
639
640                 if (index >= num_pages) {
641                         out_of_space = true;
642                         break;
643                 }
644
645                 page = pages[index];
646
647                 addr = kmap(page);
648                 entry = addr + start_offset;
649
650                 memset(addr, 0, PAGE_CACHE_SIZE);
651                 while (node && !next_page) {
652                         struct btrfs_free_space *e;
653
654                         e = rb_entry(node, struct btrfs_free_space, offset_index);
655                         entries++;
656
657                         entry->offset = cpu_to_le64(e->offset);
658                         entry->bytes = cpu_to_le64(e->bytes);
659                         if (e->bitmap) {
660                                 entry->type = BTRFS_FREE_SPACE_BITMAP;
661                                 list_add_tail(&e->list, &bitmap_list);
662                                 bitmaps++;
663                         } else {
664                                 entry->type = BTRFS_FREE_SPACE_EXTENT;
665                         }
666                         node = rb_next(node);
667                         if (!node && cluster) {
668                                 node = rb_first(&cluster->root);
669                                 cluster = NULL;
670                         }
671                         offset += sizeof(struct btrfs_free_space_entry);
672                         if (offset + sizeof(struct btrfs_free_space_entry) >=
673                             PAGE_CACHE_SIZE)
674                                 next_page = true;
675                         entry++;
676                 }
677
678                 /*
679                  * We want to add any pinned extents to our free space cache
680                  * so we don't leak the space
681                  */
682                 while (!next_page && (start < block_group->key.objectid +
683                                       block_group->key.offset)) {
684                         ret = find_first_extent_bit(unpin, start, &start, &end,
685                                                     EXTENT_DIRTY);
686                         if (ret) {
687                                 ret = 0;
688                                 break;
689                         }
690
691                         /* This pinned extent is out of our range */
692                         if (start >= block_group->key.objectid +
693                             block_group->key.offset)
694                                 break;
695
696                         len = block_group->key.objectid +
697                                 block_group->key.offset - start;
698                         len = min(len, end + 1 - start);
699
700                         entries++;
701                         entry->offset = cpu_to_le64(start);
702                         entry->bytes = cpu_to_le64(len);
703                         entry->type = BTRFS_FREE_SPACE_EXTENT;
704
705                         start = end + 1;
706                         offset += sizeof(struct btrfs_free_space_entry);
707                         if (offset + sizeof(struct btrfs_free_space_entry) >=
708                             PAGE_CACHE_SIZE)
709                                 next_page = true;
710                         entry++;
711                 }
712                 *crc = ~(u32)0;
713                 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
714                                        PAGE_CACHE_SIZE - start_offset);
715                 kunmap(page);
716
717                 btrfs_csum_final(*crc, (char *)crc);
718                 crc++;
719
720                 bytes += PAGE_CACHE_SIZE;
721
722                 index++;
723         } while (node || next_page);
724
725         /* Write out the bitmaps */
726         list_for_each_safe(pos, n, &bitmap_list) {
727                 void *addr;
728                 struct btrfs_free_space *entry =
729                         list_entry(pos, struct btrfs_free_space, list);
730
731                 if (index >= num_pages) {
732                         out_of_space = true;
733                         break;
734                 }
735                 page = pages[index];
736
737                 addr = kmap(page);
738                 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
739                 *crc = ~(u32)0;
740                 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
741                 kunmap(page);
742                 btrfs_csum_final(*crc, (char *)crc);
743                 crc++;
744                 bytes += PAGE_CACHE_SIZE;
745
746                 list_del_init(&entry->list);
747                 index++;
748         }
749
750         if (out_of_space) {
751                 btrfs_drop_pages(pages, num_pages);
752                 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753                                      i_size_read(inode) - 1, &cached_state,
754                                      GFP_NOFS);
755                 ret = 0;
756                 goto out_free;
757         }
758
759         /* Zero out the rest of the pages just to make sure */
760         while (index < num_pages) {
761                 void *addr;
762
763                 page = pages[index];
764                 addr = kmap(page);
765                 memset(addr, 0, PAGE_CACHE_SIZE);
766                 kunmap(page);
767                 bytes += PAGE_CACHE_SIZE;
768                 index++;
769         }
770
771         /* Write the checksums and trans id to the first page */
772         {
773                 void *addr;
774                 u64 *gen;
775
776                 page = pages[0];
777
778                 addr = kmap(page);
779                 memcpy(addr, checksums, sizeof(u32) * num_pages);
780                 gen = addr + (sizeof(u32) * num_pages);
781                 *gen = trans->transid;
782                 kunmap(page);
783         }
784
785         ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786                                             bytes, &cached_state);
787         btrfs_drop_pages(pages, num_pages);
788         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
789                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
790
791         if (ret) {
792                 ret = 0;
793                 goto out_free;
794         }
795
796         BTRFS_I(inode)->generation = trans->transid;
797
798         filemap_write_and_wait(inode->i_mapping);
799
800         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
801         key.offset = block_group->key.objectid;
802         key.type = 0;
803
804         ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
805         if (ret < 0) {
806                 ret = 0;
807                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
808                                  EXTENT_DIRTY | EXTENT_DELALLOC |
809                                  EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
810                 goto out_free;
811         }
812         leaf = path->nodes[0];
813         if (ret > 0) {
814                 struct btrfs_key found_key;
815                 BUG_ON(!path->slots[0]);
816                 path->slots[0]--;
817                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
818                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
819                     found_key.offset != block_group->key.objectid) {
820                         ret = 0;
821                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
822                                          EXTENT_DIRTY | EXTENT_DELALLOC |
823                                          EXTENT_DO_ACCOUNTING, 0, 0, NULL,
824                                          GFP_NOFS);
825                         btrfs_release_path(root, path);
826                         goto out_free;
827                 }
828         }
829         header = btrfs_item_ptr(leaf, path->slots[0],
830                                 struct btrfs_free_space_header);
831         btrfs_set_free_space_entries(leaf, header, entries);
832         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
833         btrfs_set_free_space_generation(leaf, header, trans->transid);
834         btrfs_mark_buffer_dirty(leaf);
835         btrfs_release_path(root, path);
836
837         ret = 1;
838
839 out_free:
840         if (ret == 0) {
841                 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
842                 spin_lock(&block_group->lock);
843                 block_group->disk_cache_state = BTRFS_DC_ERROR;
844                 spin_unlock(&block_group->lock);
845                 BTRFS_I(inode)->generation = 0;
846         }
847         kfree(checksums);
848         kfree(pages);
849         btrfs_update_inode(trans, root, inode);
850         iput(inode);
851         return ret;
852 }
853
854 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
855                                           u64 offset)
856 {
857         BUG_ON(offset < bitmap_start);
858         offset -= bitmap_start;
859         return (unsigned long)(div64_u64(offset, sectorsize));
860 }
861
862 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
863 {
864         return (unsigned long)(div64_u64(bytes, sectorsize));
865 }
866
867 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
868                                    u64 offset)
869 {
870         u64 bitmap_start;
871         u64 bytes_per_bitmap;
872
873         bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
874         bitmap_start = offset - block_group->key.objectid;
875         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
876         bitmap_start *= bytes_per_bitmap;
877         bitmap_start += block_group->key.objectid;
878
879         return bitmap_start;
880 }
881
882 static int tree_insert_offset(struct rb_root *root, u64 offset,
883                               struct rb_node *node, int bitmap)
884 {
885         struct rb_node **p = &root->rb_node;
886         struct rb_node *parent = NULL;
887         struct btrfs_free_space *info;
888
889         while (*p) {
890                 parent = *p;
891                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
892
893                 if (offset < info->offset) {
894                         p = &(*p)->rb_left;
895                 } else if (offset > info->offset) {
896                         p = &(*p)->rb_right;
897                 } else {
898                         /*
899                          * we could have a bitmap entry and an extent entry
900                          * share the same offset.  If this is the case, we want
901                          * the extent entry to always be found first if we do a
902                          * linear search through the tree, since we want to have
903                          * the quickest allocation time, and allocating from an
904                          * extent is faster than allocating from a bitmap.  So
905                          * if we're inserting a bitmap and we find an entry at
906                          * this offset, we want to go right, or after this entry
907                          * logically.  If we are inserting an extent and we've
908                          * found a bitmap, we want to go left, or before
909                          * logically.
910                          */
911                         if (bitmap) {
912                                 WARN_ON(info->bitmap);
913                                 p = &(*p)->rb_right;
914                         } else {
915                                 WARN_ON(!info->bitmap);
916                                 p = &(*p)->rb_left;
917                         }
918                 }
919         }
920
921         rb_link_node(node, parent, p);
922         rb_insert_color(node, root);
923
924         return 0;
925 }
926
927 /*
928  * searches the tree for the given offset.
929  *
930  * fuzzy - If this is set, then we are trying to make an allocation, and we just
931  * want a section that has at least bytes size and comes at or after the given
932  * offset.
933  */
934 static struct btrfs_free_space *
935 tree_search_offset(struct btrfs_block_group_cache *block_group,
936                    u64 offset, int bitmap_only, int fuzzy)
937 {
938         struct rb_node *n = block_group->free_space_offset.rb_node;
939         struct btrfs_free_space *entry, *prev = NULL;
940
941         /* find entry that is closest to the 'offset' */
942         while (1) {
943                 if (!n) {
944                         entry = NULL;
945                         break;
946                 }
947
948                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
949                 prev = entry;
950
951                 if (offset < entry->offset)
952                         n = n->rb_left;
953                 else if (offset > entry->offset)
954                         n = n->rb_right;
955                 else
956                         break;
957         }
958
959         if (bitmap_only) {
960                 if (!entry)
961                         return NULL;
962                 if (entry->bitmap)
963                         return entry;
964
965                 /*
966                  * bitmap entry and extent entry may share same offset,
967                  * in that case, bitmap entry comes after extent entry.
968                  */
969                 n = rb_next(n);
970                 if (!n)
971                         return NULL;
972                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
973                 if (entry->offset != offset)
974                         return NULL;
975
976                 WARN_ON(!entry->bitmap);
977                 return entry;
978         } else if (entry) {
979                 if (entry->bitmap) {
980                         /*
981                          * if previous extent entry covers the offset,
982                          * we should return it instead of the bitmap entry
983                          */
984                         n = &entry->offset_index;
985                         while (1) {
986                                 n = rb_prev(n);
987                                 if (!n)
988                                         break;
989                                 prev = rb_entry(n, struct btrfs_free_space,
990                                                 offset_index);
991                                 if (!prev->bitmap) {
992                                         if (prev->offset + prev->bytes > offset)
993                                                 entry = prev;
994                                         break;
995                                 }
996                         }
997                 }
998                 return entry;
999         }
1000
1001         if (!prev)
1002                 return NULL;
1003
1004         /* find last entry before the 'offset' */
1005         entry = prev;
1006         if (entry->offset > offset) {
1007                 n = rb_prev(&entry->offset_index);
1008                 if (n) {
1009                         entry = rb_entry(n, struct btrfs_free_space,
1010                                         offset_index);
1011                         BUG_ON(entry->offset > offset);
1012                 } else {
1013                         if (fuzzy)
1014                                 return entry;
1015                         else
1016                                 return NULL;
1017                 }
1018         }
1019
1020         if (entry->bitmap) {
1021                 n = &entry->offset_index;
1022                 while (1) {
1023                         n = rb_prev(n);
1024                         if (!n)
1025                                 break;
1026                         prev = rb_entry(n, struct btrfs_free_space,
1027                                         offset_index);
1028                         if (!prev->bitmap) {
1029                                 if (prev->offset + prev->bytes > offset)
1030                                         return prev;
1031                                 break;
1032                         }
1033                 }
1034                 if (entry->offset + BITS_PER_BITMAP *
1035                     block_group->sectorsize > offset)
1036                         return entry;
1037         } else if (entry->offset + entry->bytes > offset)
1038                 return entry;
1039
1040         if (!fuzzy)
1041                 return NULL;
1042
1043         while (1) {
1044                 if (entry->bitmap) {
1045                         if (entry->offset + BITS_PER_BITMAP *
1046                             block_group->sectorsize > offset)
1047                                 break;
1048                 } else {
1049                         if (entry->offset + entry->bytes > offset)
1050                                 break;
1051                 }
1052
1053                 n = rb_next(&entry->offset_index);
1054                 if (!n)
1055                         return NULL;
1056                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1057         }
1058         return entry;
1059 }
1060
1061 static inline void
1062 __unlink_free_space(struct btrfs_block_group_cache *block_group,
1063                     struct btrfs_free_space *info)
1064 {
1065         rb_erase(&info->offset_index, &block_group->free_space_offset);
1066         block_group->free_extents--;
1067 }
1068
1069 static void unlink_free_space(struct btrfs_block_group_cache *block_group,
1070                               struct btrfs_free_space *info)
1071 {
1072         __unlink_free_space(block_group, info);
1073         block_group->free_space -= info->bytes;
1074 }
1075
1076 static int link_free_space(struct btrfs_block_group_cache *block_group,
1077                            struct btrfs_free_space *info)
1078 {
1079         int ret = 0;
1080
1081         BUG_ON(!info->bitmap && !info->bytes);
1082         ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
1083                                  &info->offset_index, (info->bitmap != NULL));
1084         if (ret)
1085                 return ret;
1086
1087         block_group->free_space += info->bytes;
1088         block_group->free_extents++;
1089         return ret;
1090 }
1091
1092 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
1093 {
1094         u64 max_bytes;
1095         u64 bitmap_bytes;
1096         u64 extent_bytes;
1097         u64 size = block_group->key.offset;
1098
1099         /*
1100          * The goal is to keep the total amount of memory used per 1gb of space
1101          * at or below 32k, so we need to adjust how much memory we allow to be
1102          * used by extent based free space tracking
1103          */
1104         if (size < 1024 * 1024 * 1024)
1105                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1106         else
1107                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1108                         div64_u64(size, 1024 * 1024 * 1024);
1109
1110         /*
1111          * we want to account for 1 more bitmap than what we have so we can make
1112          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1113          * we add more bitmaps.
1114          */
1115         bitmap_bytes = (block_group->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1116
1117         if (bitmap_bytes >= max_bytes) {
1118                 block_group->extents_thresh = 0;
1119                 return;
1120         }
1121
1122         /*
1123          * we want the extent entry threshold to always be at most 1/2 the maxw
1124          * bytes we can have, or whatever is less than that.
1125          */
1126         extent_bytes = max_bytes - bitmap_bytes;
1127         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1128
1129         block_group->extents_thresh =
1130                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1131 }
1132
1133 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
1134                               struct btrfs_free_space *info, u64 offset,
1135                               u64 bytes)
1136 {
1137         unsigned long start, end;
1138         unsigned long i;
1139
1140         start = offset_to_bit(info->offset, block_group->sectorsize, offset);
1141         end = start + bytes_to_bits(bytes, block_group->sectorsize);
1142         BUG_ON(end > BITS_PER_BITMAP);
1143
1144         for (i = start; i < end; i++)
1145                 clear_bit(i, info->bitmap);
1146
1147         info->bytes -= bytes;
1148         block_group->free_space -= bytes;
1149 }
1150
1151 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
1152                             struct btrfs_free_space *info, u64 offset,
1153                             u64 bytes)
1154 {
1155         unsigned long start, end;
1156         unsigned long i;
1157
1158         start = offset_to_bit(info->offset, block_group->sectorsize, offset);
1159         end = start + bytes_to_bits(bytes, block_group->sectorsize);
1160         BUG_ON(end > BITS_PER_BITMAP);
1161
1162         for (i = start; i < end; i++)
1163                 set_bit(i, info->bitmap);
1164
1165         info->bytes += bytes;
1166         block_group->free_space += bytes;
1167 }
1168
1169 static int search_bitmap(struct btrfs_block_group_cache *block_group,
1170                          struct btrfs_free_space *bitmap_info, u64 *offset,
1171                          u64 *bytes)
1172 {
1173         unsigned long found_bits = 0;
1174         unsigned long bits, i;
1175         unsigned long next_zero;
1176
1177         i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
1178                           max_t(u64, *offset, bitmap_info->offset));
1179         bits = bytes_to_bits(*bytes, block_group->sectorsize);
1180
1181         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1182              i < BITS_PER_BITMAP;
1183              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1184                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1185                                                BITS_PER_BITMAP, i);
1186                 if ((next_zero - i) >= bits) {
1187                         found_bits = next_zero - i;
1188                         break;
1189                 }
1190                 i = next_zero;
1191         }
1192
1193         if (found_bits) {
1194                 *offset = (u64)(i * block_group->sectorsize) +
1195                         bitmap_info->offset;
1196                 *bytes = (u64)(found_bits) * block_group->sectorsize;
1197                 return 0;
1198         }
1199
1200         return -1;
1201 }
1202
1203 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
1204                                                 *block_group, u64 *offset,
1205                                                 u64 *bytes, int debug)
1206 {
1207         struct btrfs_free_space *entry;
1208         struct rb_node *node;
1209         int ret;
1210
1211         if (!block_group->free_space_offset.rb_node)
1212                 return NULL;
1213
1214         entry = tree_search_offset(block_group,
1215                                    offset_to_bitmap(block_group, *offset),
1216                                    0, 1);
1217         if (!entry)
1218                 return NULL;
1219
1220         for (node = &entry->offset_index; node; node = rb_next(node)) {
1221                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1222                 if (entry->bytes < *bytes)
1223                         continue;
1224
1225                 if (entry->bitmap) {
1226                         ret = search_bitmap(block_group, entry, offset, bytes);
1227                         if (!ret)
1228                                 return entry;
1229                         continue;
1230                 }
1231
1232                 *offset = entry->offset;
1233                 *bytes = entry->bytes;
1234                 return entry;
1235         }
1236
1237         return NULL;
1238 }
1239
1240 static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
1241                            struct btrfs_free_space *info, u64 offset)
1242 {
1243         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1244         int max_bitmaps = (int)div64_u64(block_group->key.offset +
1245                                          bytes_per_bg - 1, bytes_per_bg);
1246         BUG_ON(block_group->total_bitmaps >= max_bitmaps);
1247
1248         info->offset = offset_to_bitmap(block_group, offset);
1249         info->bytes = 0;
1250         link_free_space(block_group, info);
1251         block_group->total_bitmaps++;
1252
1253         recalculate_thresholds(block_group);
1254 }
1255
1256 static void free_bitmap(struct btrfs_block_group_cache *block_group,
1257                         struct btrfs_free_space *bitmap_info)
1258 {
1259         unlink_free_space(block_group, bitmap_info);
1260         kfree(bitmap_info->bitmap);
1261         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1262         block_group->total_bitmaps--;
1263         recalculate_thresholds(block_group);
1264 }
1265
1266 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
1267                               struct btrfs_free_space *bitmap_info,
1268                               u64 *offset, u64 *bytes)
1269 {
1270         u64 end;
1271         u64 search_start, search_bytes;
1272         int ret;
1273
1274 again:
1275         end = bitmap_info->offset +
1276                 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
1277
1278         /*
1279          * XXX - this can go away after a few releases.
1280          *
1281          * since the only user of btrfs_remove_free_space is the tree logging
1282          * stuff, and the only way to test that is under crash conditions, we
1283          * want to have this debug stuff here just in case somethings not
1284          * working.  Search the bitmap for the space we are trying to use to
1285          * make sure its actually there.  If its not there then we need to stop
1286          * because something has gone wrong.
1287          */
1288         search_start = *offset;
1289         search_bytes = *bytes;
1290         search_bytes = min(search_bytes, end - search_start + 1);
1291         ret = search_bitmap(block_group, bitmap_info, &search_start,
1292                             &search_bytes);
1293         BUG_ON(ret < 0 || search_start != *offset);
1294
1295         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1296                 bitmap_clear_bits(block_group, bitmap_info, *offset,
1297                                   end - *offset + 1);
1298                 *bytes -= end - *offset + 1;
1299                 *offset = end + 1;
1300         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1301                 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
1302                 *bytes = 0;
1303         }
1304
1305         if (*bytes) {
1306                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1307                 if (!bitmap_info->bytes)
1308                         free_bitmap(block_group, bitmap_info);
1309
1310                 /*
1311                  * no entry after this bitmap, but we still have bytes to
1312                  * remove, so something has gone wrong.
1313                  */
1314                 if (!next)
1315                         return -EINVAL;
1316
1317                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1318                                        offset_index);
1319
1320                 /*
1321                  * if the next entry isn't a bitmap we need to return to let the
1322                  * extent stuff do its work.
1323                  */
1324                 if (!bitmap_info->bitmap)
1325                         return -EAGAIN;
1326
1327                 /*
1328                  * Ok the next item is a bitmap, but it may not actually hold
1329                  * the information for the rest of this free space stuff, so
1330                  * look for it, and if we don't find it return so we can try
1331                  * everything over again.
1332                  */
1333                 search_start = *offset;
1334                 search_bytes = *bytes;
1335                 ret = search_bitmap(block_group, bitmap_info, &search_start,
1336                                     &search_bytes);
1337                 if (ret < 0 || search_start != *offset)
1338                         return -EAGAIN;
1339
1340                 goto again;
1341         } else if (!bitmap_info->bytes)
1342                 free_bitmap(block_group, bitmap_info);
1343
1344         return 0;
1345 }
1346
1347 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
1348                               struct btrfs_free_space *info)
1349 {
1350         struct btrfs_free_space *bitmap_info;
1351         int added = 0;
1352         u64 bytes, offset, end;
1353         int ret;
1354
1355         /*
1356          * If we are below the extents threshold then we can add this as an
1357          * extent, and don't have to deal with the bitmap
1358          */
1359         if (block_group->free_extents < block_group->extents_thresh) {
1360                 /*
1361                  * If this block group has some small extents we don't want to
1362                  * use up all of our free slots in the cache with them, we want
1363                  * to reserve them to larger extents, however if we have plent
1364                  * of cache left then go ahead an dadd them, no sense in adding
1365                  * the overhead of a bitmap if we don't have to.
1366                  */
1367                 if (info->bytes <= block_group->sectorsize * 4) {
1368                         if (block_group->free_extents * 2 <=
1369                             block_group->extents_thresh)
1370                                 return 0;
1371                 } else {
1372                         return 0;
1373                 }
1374         }
1375
1376         /*
1377          * some block groups are so tiny they can't be enveloped by a bitmap, so
1378          * don't even bother to create a bitmap for this
1379          */
1380         if (BITS_PER_BITMAP * block_group->sectorsize >
1381             block_group->key.offset)
1382                 return 0;
1383
1384         bytes = info->bytes;
1385         offset = info->offset;
1386
1387 again:
1388         bitmap_info = tree_search_offset(block_group,
1389                                          offset_to_bitmap(block_group, offset),
1390                                          1, 0);
1391         if (!bitmap_info) {
1392                 BUG_ON(added);
1393                 goto new_bitmap;
1394         }
1395
1396         end = bitmap_info->offset +
1397                 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
1398
1399         if (offset >= bitmap_info->offset && offset + bytes > end) {
1400                 bitmap_set_bits(block_group, bitmap_info, offset,
1401                                 end - offset);
1402                 bytes -= end - offset;
1403                 offset = end;
1404                 added = 0;
1405         } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
1406                 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
1407                 bytes = 0;
1408         } else {
1409                 BUG();
1410         }
1411
1412         if (!bytes) {
1413                 ret = 1;
1414                 goto out;
1415         } else
1416                 goto again;
1417
1418 new_bitmap:
1419         if (info && info->bitmap) {
1420                 add_new_bitmap(block_group, info, offset);
1421                 added = 1;
1422                 info = NULL;
1423                 goto again;
1424         } else {
1425                 spin_unlock(&block_group->tree_lock);
1426
1427                 /* no pre-allocated info, allocate a new one */
1428                 if (!info) {
1429                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1430                                                  GFP_NOFS);
1431                         if (!info) {
1432                                 spin_lock(&block_group->tree_lock);
1433                                 ret = -ENOMEM;
1434                                 goto out;
1435                         }
1436                 }
1437
1438                 /* allocate the bitmap */
1439                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1440                 spin_lock(&block_group->tree_lock);
1441                 if (!info->bitmap) {
1442                         ret = -ENOMEM;
1443                         goto out;
1444                 }
1445                 goto again;
1446         }
1447
1448 out:
1449         if (info) {
1450                 if (info->bitmap)
1451                         kfree(info->bitmap);
1452                 kmem_cache_free(btrfs_free_space_cachep, info);
1453         }
1454
1455         return ret;
1456 }
1457
1458 bool try_merge_free_space(struct btrfs_block_group_cache *block_group,
1459                           struct btrfs_free_space *info, bool update_stat)
1460 {
1461         struct btrfs_free_space *left_info;
1462         struct btrfs_free_space *right_info;
1463         bool merged = false;
1464         u64 offset = info->offset;
1465         u64 bytes = info->bytes;
1466
1467         /*
1468          * first we want to see if there is free space adjacent to the range we
1469          * are adding, if there is remove that struct and add a new one to
1470          * cover the entire range
1471          */
1472         right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
1473         if (right_info && rb_prev(&right_info->offset_index))
1474                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1475                                      struct btrfs_free_space, offset_index);
1476         else
1477                 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
1478
1479         if (right_info && !right_info->bitmap) {
1480                 if (update_stat)
1481                         unlink_free_space(block_group, right_info);
1482                 else
1483                         __unlink_free_space(block_group, right_info);
1484                 info->bytes += right_info->bytes;
1485                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1486                 merged = true;
1487         }
1488
1489         if (left_info && !left_info->bitmap &&
1490             left_info->offset + left_info->bytes == offset) {
1491                 if (update_stat)
1492                         unlink_free_space(block_group, left_info);
1493                 else
1494                         __unlink_free_space(block_group, left_info);
1495                 info->offset = left_info->offset;
1496                 info->bytes += left_info->bytes;
1497                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1498                 merged = true;
1499         }
1500
1501         return merged;
1502 }
1503
1504 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
1505                          u64 offset, u64 bytes)
1506 {
1507         struct btrfs_free_space *info;
1508         int ret = 0;
1509
1510         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1511         if (!info)
1512                 return -ENOMEM;
1513
1514         info->offset = offset;
1515         info->bytes = bytes;
1516
1517         spin_lock(&block_group->tree_lock);
1518
1519         if (try_merge_free_space(block_group, info, true))
1520                 goto link;
1521
1522         /*
1523          * There was no extent directly to the left or right of this new
1524          * extent then we know we're going to have to allocate a new extent, so
1525          * before we do that see if we need to drop this into a bitmap
1526          */
1527         ret = insert_into_bitmap(block_group, info);
1528         if (ret < 0) {
1529                 goto out;
1530         } else if (ret) {
1531                 ret = 0;
1532                 goto out;
1533         }
1534 link:
1535         ret = link_free_space(block_group, info);
1536         if (ret)
1537                 kmem_cache_free(btrfs_free_space_cachep, info);
1538 out:
1539         spin_unlock(&block_group->tree_lock);
1540
1541         if (ret) {
1542                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1543                 BUG_ON(ret == -EEXIST);
1544         }
1545
1546         return ret;
1547 }
1548
1549 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1550                             u64 offset, u64 bytes)
1551 {
1552         struct btrfs_free_space *info;
1553         struct btrfs_free_space *next_info = NULL;
1554         int ret = 0;
1555
1556         spin_lock(&block_group->tree_lock);
1557
1558 again:
1559         info = tree_search_offset(block_group, offset, 0, 0);
1560         if (!info) {
1561                 /*
1562                  * oops didn't find an extent that matched the space we wanted
1563                  * to remove, look for a bitmap instead
1564                  */
1565                 info = tree_search_offset(block_group,
1566                                           offset_to_bitmap(block_group, offset),
1567                                           1, 0);
1568                 if (!info) {
1569                         WARN_ON(1);
1570                         goto out_lock;
1571                 }
1572         }
1573
1574         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1575                 u64 end;
1576                 next_info = rb_entry(rb_next(&info->offset_index),
1577                                              struct btrfs_free_space,
1578                                              offset_index);
1579
1580                 if (next_info->bitmap)
1581                         end = next_info->offset + BITS_PER_BITMAP *
1582                                 block_group->sectorsize - 1;
1583                 else
1584                         end = next_info->offset + next_info->bytes;
1585
1586                 if (next_info->bytes < bytes ||
1587                     next_info->offset > offset || offset > end) {
1588                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1589                               " trying to use %llu\n",
1590                               (unsigned long long)info->offset,
1591                               (unsigned long long)info->bytes,
1592                               (unsigned long long)bytes);
1593                         WARN_ON(1);
1594                         ret = -EINVAL;
1595                         goto out_lock;
1596                 }
1597
1598                 info = next_info;
1599         }
1600
1601         if (info->bytes == bytes) {
1602                 unlink_free_space(block_group, info);
1603                 if (info->bitmap) {
1604                         kfree(info->bitmap);
1605                         block_group->total_bitmaps--;
1606                 }
1607                 kmem_cache_free(btrfs_free_space_cachep, info);
1608                 goto out_lock;
1609         }
1610
1611         if (!info->bitmap && info->offset == offset) {
1612                 unlink_free_space(block_group, info);
1613                 info->offset += bytes;
1614                 info->bytes -= bytes;
1615                 link_free_space(block_group, info);
1616                 goto out_lock;
1617         }
1618
1619         if (!info->bitmap && info->offset <= offset &&
1620             info->offset + info->bytes >= offset + bytes) {
1621                 u64 old_start = info->offset;
1622                 /*
1623                  * we're freeing space in the middle of the info,
1624                  * this can happen during tree log replay
1625                  *
1626                  * first unlink the old info and then
1627                  * insert it again after the hole we're creating
1628                  */
1629                 unlink_free_space(block_group, info);
1630                 if (offset + bytes < info->offset + info->bytes) {
1631                         u64 old_end = info->offset + info->bytes;
1632
1633                         info->offset = offset + bytes;
1634                         info->bytes = old_end - info->offset;
1635                         ret = link_free_space(block_group, info);
1636                         WARN_ON(ret);
1637                         if (ret)
1638                                 goto out_lock;
1639                 } else {
1640                         /* the hole we're creating ends at the end
1641                          * of the info struct, just free the info
1642                          */
1643                         kmem_cache_free(btrfs_free_space_cachep, info);
1644                 }
1645                 spin_unlock(&block_group->tree_lock);
1646
1647                 /* step two, insert a new info struct to cover
1648                  * anything before the hole
1649                  */
1650                 ret = btrfs_add_free_space(block_group, old_start,
1651                                            offset - old_start);
1652                 WARN_ON(ret);
1653                 goto out;
1654         }
1655
1656         ret = remove_from_bitmap(block_group, info, &offset, &bytes);
1657         if (ret == -EAGAIN)
1658                 goto again;
1659         BUG_ON(ret);
1660 out_lock:
1661         spin_unlock(&block_group->tree_lock);
1662 out:
1663         return ret;
1664 }
1665
1666 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1667                            u64 bytes)
1668 {
1669         struct btrfs_free_space *info;
1670         struct rb_node *n;
1671         int count = 0;
1672
1673         for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
1674                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1675                 if (info->bytes >= bytes)
1676                         count++;
1677                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1678                        (unsigned long long)info->offset,
1679                        (unsigned long long)info->bytes,
1680                        (info->bitmap) ? "yes" : "no");
1681         }
1682         printk(KERN_INFO "block group has cluster?: %s\n",
1683                list_empty(&block_group->cluster_list) ? "no" : "yes");
1684         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1685                "\n", count);
1686 }
1687
1688 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
1689 {
1690         struct btrfs_free_space *info;
1691         struct rb_node *n;
1692         u64 ret = 0;
1693
1694         for (n = rb_first(&block_group->free_space_offset); n;
1695              n = rb_next(n)) {
1696                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1697                 ret += info->bytes;
1698         }
1699
1700         return ret;
1701 }
1702
1703 /*
1704  * for a given cluster, put all of its extents back into the free
1705  * space cache.  If the block group passed doesn't match the block group
1706  * pointed to by the cluster, someone else raced in and freed the
1707  * cluster already.  In that case, we just return without changing anything
1708  */
1709 static int
1710 __btrfs_return_cluster_to_free_space(
1711                              struct btrfs_block_group_cache *block_group,
1712                              struct btrfs_free_cluster *cluster)
1713 {
1714         struct btrfs_free_space *entry;
1715         struct rb_node *node;
1716
1717         spin_lock(&cluster->lock);
1718         if (cluster->block_group != block_group)
1719                 goto out;
1720
1721         cluster->block_group = NULL;
1722         cluster->window_start = 0;
1723         list_del_init(&cluster->block_group_list);
1724
1725         node = rb_first(&cluster->root);
1726         while (node) {
1727                 bool bitmap;
1728
1729                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1730                 node = rb_next(&entry->offset_index);
1731                 rb_erase(&entry->offset_index, &cluster->root);
1732
1733                 bitmap = (entry->bitmap != NULL);
1734                 if (!bitmap)
1735                         try_merge_free_space(block_group, entry, false);
1736                 tree_insert_offset(&block_group->free_space_offset,
1737                                    entry->offset, &entry->offset_index, bitmap);
1738         }
1739         cluster->root = RB_ROOT;
1740
1741 out:
1742         spin_unlock(&cluster->lock);
1743         btrfs_put_block_group(block_group);
1744         return 0;
1745 }
1746
1747 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
1748 {
1749         struct btrfs_free_space *info;
1750         struct rb_node *node;
1751         struct btrfs_free_cluster *cluster;
1752         struct list_head *head;
1753
1754         spin_lock(&block_group->tree_lock);
1755         while ((head = block_group->cluster_list.next) !=
1756                &block_group->cluster_list) {
1757                 cluster = list_entry(head, struct btrfs_free_cluster,
1758                                      block_group_list);
1759
1760                 WARN_ON(cluster->block_group != block_group);
1761                 __btrfs_return_cluster_to_free_space(block_group, cluster);
1762                 if (need_resched()) {
1763                         spin_unlock(&block_group->tree_lock);
1764                         cond_resched();
1765                         spin_lock(&block_group->tree_lock);
1766                 }
1767         }
1768
1769         while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
1770                 info = rb_entry(node, struct btrfs_free_space, offset_index);
1771                 if (!info->bitmap) {
1772                         unlink_free_space(block_group, info);
1773                         kmem_cache_free(btrfs_free_space_cachep, info);
1774                 } else {
1775                         free_bitmap(block_group, info);
1776                 }
1777
1778                 if (need_resched()) {
1779                         spin_unlock(&block_group->tree_lock);
1780                         cond_resched();
1781                         spin_lock(&block_group->tree_lock);
1782                 }
1783         }
1784
1785         spin_unlock(&block_group->tree_lock);
1786 }
1787
1788 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
1789                                u64 offset, u64 bytes, u64 empty_size)
1790 {
1791         struct btrfs_free_space *entry = NULL;
1792         u64 bytes_search = bytes + empty_size;
1793         u64 ret = 0;
1794
1795         spin_lock(&block_group->tree_lock);
1796         entry = find_free_space(block_group, &offset, &bytes_search, 0);
1797         if (!entry)
1798                 goto out;
1799
1800         ret = offset;
1801         if (entry->bitmap) {
1802                 bitmap_clear_bits(block_group, entry, offset, bytes);
1803                 if (!entry->bytes)
1804                         free_bitmap(block_group, entry);
1805         } else {
1806                 unlink_free_space(block_group, entry);
1807                 entry->offset += bytes;
1808                 entry->bytes -= bytes;
1809                 if (!entry->bytes)
1810                         kmem_cache_free(btrfs_free_space_cachep, entry);
1811                 else
1812                         link_free_space(block_group, entry);
1813         }
1814
1815 out:
1816         spin_unlock(&block_group->tree_lock);
1817
1818         return ret;
1819 }
1820
1821 /*
1822  * given a cluster, put all of its extents back into the free space
1823  * cache.  If a block group is passed, this function will only free
1824  * a cluster that belongs to the passed block group.
1825  *
1826  * Otherwise, it'll get a reference on the block group pointed to by the
1827  * cluster and remove the cluster from it.
1828  */
1829 int btrfs_return_cluster_to_free_space(
1830                                struct btrfs_block_group_cache *block_group,
1831                                struct btrfs_free_cluster *cluster)
1832 {
1833         int ret;
1834
1835         /* first, get a safe pointer to the block group */
1836         spin_lock(&cluster->lock);
1837         if (!block_group) {
1838                 block_group = cluster->block_group;
1839                 if (!block_group) {
1840                         spin_unlock(&cluster->lock);
1841                         return 0;
1842                 }
1843         } else if (cluster->block_group != block_group) {
1844                 /* someone else has already freed it don't redo their work */
1845                 spin_unlock(&cluster->lock);
1846                 return 0;
1847         }
1848         atomic_inc(&block_group->count);
1849         spin_unlock(&cluster->lock);
1850
1851         /* now return any extents the cluster had on it */
1852         spin_lock(&block_group->tree_lock);
1853         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
1854         spin_unlock(&block_group->tree_lock);
1855
1856         /* finally drop our ref */
1857         btrfs_put_block_group(block_group);
1858         return ret;
1859 }
1860
1861 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
1862                                    struct btrfs_free_cluster *cluster,
1863                                    struct btrfs_free_space *entry,
1864                                    u64 bytes, u64 min_start)
1865 {
1866         int err;
1867         u64 search_start = cluster->window_start;
1868         u64 search_bytes = bytes;
1869         u64 ret = 0;
1870
1871         search_start = min_start;
1872         search_bytes = bytes;
1873
1874         err = search_bitmap(block_group, entry, &search_start,
1875                             &search_bytes);
1876         if (err)
1877                 return 0;
1878
1879         ret = search_start;
1880         bitmap_clear_bits(block_group, entry, ret, bytes);
1881
1882         return ret;
1883 }
1884
1885 /*
1886  * given a cluster, try to allocate 'bytes' from it, returns 0
1887  * if it couldn't find anything suitably large, or a logical disk offset
1888  * if things worked out
1889  */
1890 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1891                              struct btrfs_free_cluster *cluster, u64 bytes,
1892                              u64 min_start)
1893 {
1894         struct btrfs_free_space *entry = NULL;
1895         struct rb_node *node;
1896         u64 ret = 0;
1897
1898         spin_lock(&cluster->lock);
1899         if (bytes > cluster->max_size)
1900                 goto out;
1901
1902         if (cluster->block_group != block_group)
1903                 goto out;
1904
1905         node = rb_first(&cluster->root);
1906         if (!node)
1907                 goto out;
1908
1909         entry = rb_entry(node, struct btrfs_free_space, offset_index);
1910         while(1) {
1911                 if (entry->bytes < bytes ||
1912                     (!entry->bitmap && entry->offset < min_start)) {
1913                         struct rb_node *node;
1914
1915                         node = rb_next(&entry->offset_index);
1916                         if (!node)
1917                                 break;
1918                         entry = rb_entry(node, struct btrfs_free_space,
1919                                          offset_index);
1920                         continue;
1921                 }
1922
1923                 if (entry->bitmap) {
1924                         ret = btrfs_alloc_from_bitmap(block_group,
1925                                                       cluster, entry, bytes,
1926                                                       min_start);
1927                         if (ret == 0) {
1928                                 struct rb_node *node;
1929                                 node = rb_next(&entry->offset_index);
1930                                 if (!node)
1931                                         break;
1932                                 entry = rb_entry(node, struct btrfs_free_space,
1933                                                  offset_index);
1934                                 continue;
1935                         }
1936                 } else {
1937
1938                         ret = entry->offset;
1939
1940                         entry->offset += bytes;
1941                         entry->bytes -= bytes;
1942                 }
1943
1944                 if (entry->bytes == 0)
1945                         rb_erase(&entry->offset_index, &cluster->root);
1946                 break;
1947         }
1948 out:
1949         spin_unlock(&cluster->lock);
1950
1951         if (!ret)
1952                 return 0;
1953
1954         spin_lock(&block_group->tree_lock);
1955
1956         block_group->free_space -= bytes;
1957         if (entry->bytes == 0) {
1958                 block_group->free_extents--;
1959                 if (entry->bitmap) {
1960                         kfree(entry->bitmap);
1961                         block_group->total_bitmaps--;
1962                         recalculate_thresholds(block_group);
1963                 }
1964                 kmem_cache_free(btrfs_free_space_cachep, entry);
1965         }
1966
1967         spin_unlock(&block_group->tree_lock);
1968
1969         return ret;
1970 }
1971
1972 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1973                                 struct btrfs_free_space *entry,
1974                                 struct btrfs_free_cluster *cluster,
1975                                 u64 offset, u64 bytes, u64 min_bytes)
1976 {
1977         unsigned long next_zero;
1978         unsigned long i;
1979         unsigned long search_bits;
1980         unsigned long total_bits;
1981         unsigned long found_bits;
1982         unsigned long start = 0;
1983         unsigned long total_found = 0;
1984         int ret;
1985         bool found = false;
1986
1987         i = offset_to_bit(entry->offset, block_group->sectorsize,
1988                           max_t(u64, offset, entry->offset));
1989         search_bits = bytes_to_bits(bytes, block_group->sectorsize);
1990         total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1991
1992 again:
1993         found_bits = 0;
1994         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1995              i < BITS_PER_BITMAP;
1996              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1997                 next_zero = find_next_zero_bit(entry->bitmap,
1998                                                BITS_PER_BITMAP, i);
1999                 if (next_zero - i >= search_bits) {
2000                         found_bits = next_zero - i;
2001                         break;
2002                 }
2003                 i = next_zero;
2004         }
2005
2006         if (!found_bits)
2007                 return -ENOSPC;
2008
2009         if (!found) {
2010                 start = i;
2011                 found = true;
2012         }
2013
2014         total_found += found_bits;
2015
2016         if (cluster->max_size < found_bits * block_group->sectorsize)
2017                 cluster->max_size = found_bits * block_group->sectorsize;
2018
2019         if (total_found < total_bits) {
2020                 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2021                 if (i - start > total_bits * 2) {
2022                         total_found = 0;
2023                         cluster->max_size = 0;
2024                         found = false;
2025                 }
2026                 goto again;
2027         }
2028
2029         cluster->window_start = start * block_group->sectorsize +
2030                 entry->offset;
2031         rb_erase(&entry->offset_index, &block_group->free_space_offset);
2032         ret = tree_insert_offset(&cluster->root, entry->offset,
2033                                  &entry->offset_index, 1);
2034         BUG_ON(ret);
2035
2036         return 0;
2037 }
2038
2039 /*
2040  * This searches the block group for just extents to fill the cluster with.
2041  */
2042 static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2043                                    struct btrfs_free_cluster *cluster,
2044                                    u64 offset, u64 bytes, u64 min_bytes)
2045 {
2046         struct btrfs_free_space *first = NULL;
2047         struct btrfs_free_space *entry = NULL;
2048         struct btrfs_free_space *prev = NULL;
2049         struct btrfs_free_space *last;
2050         struct rb_node *node;
2051         u64 window_start;
2052         u64 window_free;
2053         u64 max_extent;
2054         u64 max_gap = 128 * 1024;
2055
2056         entry = tree_search_offset(block_group, offset, 0, 1);
2057         if (!entry)
2058                 return -ENOSPC;
2059
2060         /*
2061          * We don't want bitmaps, so just move along until we find a normal
2062          * extent entry.
2063          */
2064         while (entry->bitmap) {
2065                 node = rb_next(&entry->offset_index);
2066                 if (!node)
2067                         return -ENOSPC;
2068                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2069         }
2070
2071         window_start = entry->offset;
2072         window_free = entry->bytes;
2073         max_extent = entry->bytes;
2074         first = entry;
2075         last = entry;
2076         prev = entry;
2077
2078         while (window_free <= min_bytes) {
2079                 node = rb_next(&entry->offset_index);
2080                 if (!node)
2081                         return -ENOSPC;
2082                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2083
2084                 if (entry->bitmap)
2085                         continue;
2086                 /*
2087                  * we haven't filled the empty size and the window is
2088                  * very large.  reset and try again
2089                  */
2090                 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2091                     entry->offset - window_start > (min_bytes * 2)) {
2092                         first = entry;
2093                         window_start = entry->offset;
2094                         window_free = entry->bytes;
2095                         last = entry;
2096                         max_extent = entry->bytes;
2097                 } else {
2098                         last = entry;
2099                         window_free += entry->bytes;
2100                         if (entry->bytes > max_extent)
2101                                 max_extent = entry->bytes;
2102                 }
2103                 prev = entry;
2104         }
2105
2106         cluster->window_start = first->offset;
2107
2108         node = &first->offset_index;
2109
2110         /*
2111          * now we've found our entries, pull them out of the free space
2112          * cache and put them into the cluster rbtree
2113          */
2114         do {
2115                 int ret;
2116
2117                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2118                 node = rb_next(&entry->offset_index);
2119                 if (entry->bitmap)
2120                         continue;
2121
2122                 rb_erase(&entry->offset_index, &block_group->free_space_offset);
2123                 ret = tree_insert_offset(&cluster->root, entry->offset,
2124                                          &entry->offset_index, 0);
2125                 BUG_ON(ret);
2126         } while (node && entry != last);
2127
2128         cluster->max_size = max_extent;
2129
2130         return 0;
2131 }
2132
2133 /*
2134  * This specifically looks for bitmaps that may work in the cluster, we assume
2135  * that we have already failed to find extents that will work.
2136  */
2137 static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2138                                 struct btrfs_free_cluster *cluster,
2139                                 u64 offset, u64 bytes, u64 min_bytes)
2140 {
2141         struct btrfs_free_space *entry;
2142         struct rb_node *node;
2143         int ret = -ENOSPC;
2144
2145         if (block_group->total_bitmaps == 0)
2146                 return -ENOSPC;
2147
2148         entry = tree_search_offset(block_group,
2149                                    offset_to_bitmap(block_group, offset),
2150                                    0, 1);
2151         if (!entry)
2152                 return -ENOSPC;
2153
2154         node = &entry->offset_index;
2155         do {
2156                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2157                 node = rb_next(&entry->offset_index);
2158                 if (!entry->bitmap)
2159                         continue;
2160                 if (entry->bytes < min_bytes)
2161                         continue;
2162                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2163                                            bytes, min_bytes);
2164         } while (ret && node);
2165
2166         return ret;
2167 }
2168
2169 /*
2170  * here we try to find a cluster of blocks in a block group.  The goal
2171  * is to find at least bytes free and up to empty_size + bytes free.
2172  * We might not find them all in one contiguous area.
2173  *
2174  * returns zero and sets up cluster if things worked out, otherwise
2175  * it returns -enospc
2176  */
2177 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2178                              struct btrfs_root *root,
2179                              struct btrfs_block_group_cache *block_group,
2180                              struct btrfs_free_cluster *cluster,
2181                              u64 offset, u64 bytes, u64 empty_size)
2182 {
2183         u64 min_bytes;
2184         int ret;
2185
2186         /* for metadata, allow allocates with more holes */
2187         if (btrfs_test_opt(root, SSD_SPREAD)) {
2188                 min_bytes = bytes + empty_size;
2189         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2190                 /*
2191                  * we want to do larger allocations when we are
2192                  * flushing out the delayed refs, it helps prevent
2193                  * making more work as we go along.
2194                  */
2195                 if (trans->transaction->delayed_refs.flushing)
2196                         min_bytes = max(bytes, (bytes + empty_size) >> 1);
2197                 else
2198                         min_bytes = max(bytes, (bytes + empty_size) >> 4);
2199         } else
2200                 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2201
2202         spin_lock(&block_group->tree_lock);
2203
2204         /*
2205          * If we know we don't have enough space to make a cluster don't even
2206          * bother doing all the work to try and find one.
2207          */
2208         if (block_group->free_space < min_bytes) {
2209                 spin_unlock(&block_group->tree_lock);
2210                 return -ENOSPC;
2211         }
2212
2213         spin_lock(&cluster->lock);
2214
2215         /* someone already found a cluster, hooray */
2216         if (cluster->block_group) {
2217                 ret = 0;
2218                 goto out;
2219         }
2220
2221         ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes,
2222                                       min_bytes);
2223         if (ret)
2224                 ret = setup_cluster_bitmap(block_group, cluster, offset,
2225                                            bytes, min_bytes);
2226
2227         if (!ret) {
2228                 atomic_inc(&block_group->count);
2229                 list_add_tail(&cluster->block_group_list,
2230                               &block_group->cluster_list);
2231                 cluster->block_group = block_group;
2232         }
2233 out:
2234         spin_unlock(&cluster->lock);
2235         spin_unlock(&block_group->tree_lock);
2236
2237         return ret;
2238 }
2239
2240 /*
2241  * simple code to zero out a cluster
2242  */
2243 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2244 {
2245         spin_lock_init(&cluster->lock);
2246         spin_lock_init(&cluster->refill_lock);
2247         cluster->root = RB_ROOT;
2248         cluster->max_size = 0;
2249         INIT_LIST_HEAD(&cluster->block_group_list);
2250         cluster->block_group = NULL;
2251 }
2252
2253 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2254                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2255 {
2256         struct btrfs_free_space *entry = NULL;
2257         struct btrfs_fs_info *fs_info = block_group->fs_info;
2258         u64 bytes = 0;
2259         u64 actually_trimmed;
2260         int ret = 0;
2261
2262         *trimmed = 0;
2263
2264         while (start < end) {
2265                 spin_lock(&block_group->tree_lock);
2266
2267                 if (block_group->free_space < minlen) {
2268                         spin_unlock(&block_group->tree_lock);
2269                         break;
2270                 }
2271
2272                 entry = tree_search_offset(block_group, start, 0, 1);
2273                 if (!entry)
2274                         entry = tree_search_offset(block_group,
2275                                                    offset_to_bitmap(block_group,
2276                                                                     start),
2277                                                    1, 1);
2278
2279                 if (!entry || entry->offset >= end) {
2280                         spin_unlock(&block_group->tree_lock);
2281                         break;
2282                 }
2283
2284                 if (entry->bitmap) {
2285                         ret = search_bitmap(block_group, entry, &start, &bytes);
2286                         if (!ret) {
2287                                 if (start >= end) {
2288                                         spin_unlock(&block_group->tree_lock);
2289                                         break;
2290                                 }
2291                                 bytes = min(bytes, end - start);
2292                                 bitmap_clear_bits(block_group, entry,
2293                                                   start, bytes);
2294                                 if (entry->bytes == 0)
2295                                         free_bitmap(block_group, entry);
2296                         } else {
2297                                 start = entry->offset + BITS_PER_BITMAP *
2298                                         block_group->sectorsize;
2299                                 spin_unlock(&block_group->tree_lock);
2300                                 ret = 0;
2301                                 continue;
2302                         }
2303                 } else {
2304                         start = entry->offset;
2305                         bytes = min(entry->bytes, end - start);
2306                         unlink_free_space(block_group, entry);
2307                         kmem_cache_free(btrfs_free_space_cachep, entry);
2308                 }
2309
2310                 spin_unlock(&block_group->tree_lock);
2311
2312                 if (bytes >= minlen) {
2313                         int update_ret;
2314                         update_ret = btrfs_update_reserved_bytes(block_group,
2315                                                                  bytes, 1, 1);
2316
2317                         ret = btrfs_error_discard_extent(fs_info->extent_root,
2318                                                          start,
2319                                                          bytes,
2320                                                          &actually_trimmed);
2321
2322                         btrfs_add_free_space(block_group,
2323                                              start, bytes);
2324                         if (!update_ret)
2325                                 btrfs_update_reserved_bytes(block_group,
2326                                                             bytes, 0, 1);
2327
2328                         if (ret)
2329                                 break;
2330                         *trimmed += actually_trimmed;
2331                 }
2332                 start += bytes;
2333                 bytes = 0;
2334
2335                 if (fatal_signal_pending(current)) {
2336                         ret = -ERESTARTSYS;
2337                         break;
2338                 }
2339
2340                 cond_resched();
2341         }
2342
2343         return ret;
2344 }