Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[pandora-kernel.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38                                                struct btrfs_path *path,
39                                                u64 offset)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50         key.offset = offset;
51         key.type = 0;
52
53         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54         if (ret < 0)
55                 return ERR_PTR(ret);
56         if (ret > 0) {
57                 btrfs_release_path(path);
58                 return ERR_PTR(-ENOENT);
59         }
60
61         leaf = path->nodes[0];
62         header = btrfs_item_ptr(leaf, path->slots[0],
63                                 struct btrfs_free_space_header);
64         btrfs_free_space_key(leaf, header, &disk_key);
65         btrfs_disk_key_to_cpu(&location, &disk_key);
66         btrfs_release_path(path);
67
68         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69         if (!inode)
70                 return ERR_PTR(-ENOENT);
71         if (IS_ERR(inode))
72                 return inode;
73         if (is_bad_inode(inode)) {
74                 iput(inode);
75                 return ERR_PTR(-ENOENT);
76         }
77
78         inode->i_mapping->flags &= ~__GFP_FS;
79
80         return inode;
81 }
82
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84                                       struct btrfs_block_group_cache
85                                       *block_group, struct btrfs_path *path)
86 {
87         struct inode *inode = NULL;
88         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
89
90         spin_lock(&block_group->lock);
91         if (block_group->inode)
92                 inode = igrab(block_group->inode);
93         spin_unlock(&block_group->lock);
94         if (inode)
95                 return inode;
96
97         inode = __lookup_free_space_inode(root, path,
98                                           block_group->key.objectid);
99         if (IS_ERR(inode))
100                 return inode;
101
102         spin_lock(&block_group->lock);
103         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104                 printk(KERN_INFO "Old style space inode found, converting.\n");
105                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106                         BTRFS_INODE_NODATACOW;
107                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108         }
109
110         if (!block_group->iref) {
111                 block_group->inode = igrab(inode);
112                 block_group->iref = 1;
113         }
114         spin_unlock(&block_group->lock);
115
116         return inode;
117 }
118
119 int __create_free_space_inode(struct btrfs_root *root,
120                               struct btrfs_trans_handle *trans,
121                               struct btrfs_path *path, u64 ino, u64 offset)
122 {
123         struct btrfs_key key;
124         struct btrfs_disk_key disk_key;
125         struct btrfs_free_space_header *header;
126         struct btrfs_inode_item *inode_item;
127         struct extent_buffer *leaf;
128         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129         int ret;
130
131         ret = btrfs_insert_empty_inode(trans, root, path, ino);
132         if (ret)
133                 return ret;
134
135         /* We inline crc's for the free disk space cache */
136         if (ino != BTRFS_FREE_INO_OBJECTID)
137                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
139         leaf = path->nodes[0];
140         inode_item = btrfs_item_ptr(leaf, path->slots[0],
141                                     struct btrfs_inode_item);
142         btrfs_item_key(leaf, &disk_key, path->slots[0]);
143         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144                              sizeof(*inode_item));
145         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146         btrfs_set_inode_size(leaf, inode_item, 0);
147         btrfs_set_inode_nbytes(leaf, inode_item, 0);
148         btrfs_set_inode_uid(leaf, inode_item, 0);
149         btrfs_set_inode_gid(leaf, inode_item, 0);
150         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151         btrfs_set_inode_flags(leaf, inode_item, flags);
152         btrfs_set_inode_nlink(leaf, inode_item, 1);
153         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154         btrfs_set_inode_block_group(leaf, inode_item, offset);
155         btrfs_mark_buffer_dirty(leaf);
156         btrfs_release_path(path);
157
158         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159         key.offset = offset;
160         key.type = 0;
161
162         ret = btrfs_insert_empty_item(trans, root, path, &key,
163                                       sizeof(struct btrfs_free_space_header));
164         if (ret < 0) {
165                 btrfs_release_path(path);
166                 return ret;
167         }
168         leaf = path->nodes[0];
169         header = btrfs_item_ptr(leaf, path->slots[0],
170                                 struct btrfs_free_space_header);
171         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172         btrfs_set_free_space_key(leaf, header, &disk_key);
173         btrfs_mark_buffer_dirty(leaf);
174         btrfs_release_path(path);
175
176         return 0;
177 }
178
179 int create_free_space_inode(struct btrfs_root *root,
180                             struct btrfs_trans_handle *trans,
181                             struct btrfs_block_group_cache *block_group,
182                             struct btrfs_path *path)
183 {
184         int ret;
185         u64 ino;
186
187         ret = btrfs_find_free_objectid(root, &ino);
188         if (ret < 0)
189                 return ret;
190
191         return __create_free_space_inode(root, trans, path, ino,
192                                          block_group->key.objectid);
193 }
194
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196                                     struct btrfs_trans_handle *trans,
197                                     struct btrfs_path *path,
198                                     struct inode *inode)
199 {
200         struct btrfs_block_rsv *rsv;
201         u64 needed_bytes;
202         loff_t oldsize;
203         int ret = 0;
204
205         rsv = trans->block_rsv;
206         trans->block_rsv = &root->fs_info->global_block_rsv;
207
208         /* 1 for slack space, 1 for updating the inode */
209         needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210                 btrfs_calc_trans_metadata_size(root, 1);
211
212         spin_lock(&trans->block_rsv->lock);
213         if (trans->block_rsv->reserved < needed_bytes) {
214                 spin_unlock(&trans->block_rsv->lock);
215                 trans->block_rsv = rsv;
216                 return -ENOSPC;
217         }
218         spin_unlock(&trans->block_rsv->lock);
219
220         oldsize = i_size_read(inode);
221         btrfs_i_size_write(inode, 0);
222         truncate_pagecache(inode, oldsize, 0);
223
224         /*
225          * We don't need an orphan item because truncating the free space cache
226          * will never be split across transactions.
227          */
228         ret = btrfs_truncate_inode_items(trans, root, inode,
229                                          0, BTRFS_EXTENT_DATA_KEY);
230
231         if (ret) {
232                 trans->block_rsv = rsv;
233                 WARN_ON(1);
234                 return ret;
235         }
236
237         ret = btrfs_update_inode(trans, root, inode);
238         trans->block_rsv = rsv;
239
240         return ret;
241 }
242
243 static int readahead_cache(struct inode *inode)
244 {
245         struct file_ra_state *ra;
246         unsigned long last_index;
247
248         ra = kzalloc(sizeof(*ra), GFP_NOFS);
249         if (!ra)
250                 return -ENOMEM;
251
252         file_ra_state_init(ra, inode->i_mapping);
253         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
254
255         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
256
257         kfree(ra);
258
259         return 0;
260 }
261
262 struct io_ctl {
263         void *cur, *orig;
264         struct page *page;
265         struct page **pages;
266         struct btrfs_root *root;
267         unsigned long size;
268         int index;
269         int num_pages;
270         unsigned check_crcs:1;
271 };
272
273 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
274                        struct btrfs_root *root)
275 {
276         memset(io_ctl, 0, sizeof(struct io_ctl));
277         io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
278                 PAGE_CACHE_SHIFT;
279         io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
280                                 GFP_NOFS);
281         if (!io_ctl->pages)
282                 return -ENOMEM;
283         io_ctl->root = root;
284         if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
285                 io_ctl->check_crcs = 1;
286         return 0;
287 }
288
289 static void io_ctl_free(struct io_ctl *io_ctl)
290 {
291         kfree(io_ctl->pages);
292 }
293
294 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
295 {
296         if (io_ctl->cur) {
297                 kunmap(io_ctl->page);
298                 io_ctl->cur = NULL;
299                 io_ctl->orig = NULL;
300         }
301 }
302
303 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
304 {
305         WARN_ON(io_ctl->cur);
306         BUG_ON(io_ctl->index >= io_ctl->num_pages);
307         io_ctl->page = io_ctl->pages[io_ctl->index++];
308         io_ctl->cur = kmap(io_ctl->page);
309         io_ctl->orig = io_ctl->cur;
310         io_ctl->size = PAGE_CACHE_SIZE;
311         if (clear)
312                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
313 }
314
315 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
316 {
317         int i;
318
319         io_ctl_unmap_page(io_ctl);
320
321         for (i = 0; i < io_ctl->num_pages; i++) {
322                 if (io_ctl->pages[i]) {
323                         ClearPageChecked(io_ctl->pages[i]);
324                         unlock_page(io_ctl->pages[i]);
325                         page_cache_release(io_ctl->pages[i]);
326                 }
327         }
328 }
329
330 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
331                                 int uptodate)
332 {
333         struct page *page;
334         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
335         int i;
336
337         for (i = 0; i < io_ctl->num_pages; i++) {
338                 page = find_or_create_page(inode->i_mapping, i, mask);
339                 if (!page) {
340                         io_ctl_drop_pages(io_ctl);
341                         return -ENOMEM;
342                 }
343                 io_ctl->pages[i] = page;
344                 if (uptodate && !PageUptodate(page)) {
345                         btrfs_readpage(NULL, page);
346                         lock_page(page);
347                         if (!PageUptodate(page)) {
348                                 printk(KERN_ERR "btrfs: error reading free "
349                                        "space cache\n");
350                                 io_ctl_drop_pages(io_ctl);
351                                 return -EIO;
352                         }
353                 }
354         }
355
356         for (i = 0; i < io_ctl->num_pages; i++) {
357                 clear_page_dirty_for_io(io_ctl->pages[i]);
358                 set_page_extent_mapped(io_ctl->pages[i]);
359         }
360
361         return 0;
362 }
363
364 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
365 {
366         u64 *val;
367
368         io_ctl_map_page(io_ctl, 1);
369
370         /*
371          * Skip the csum areas.  If we don't check crcs then we just have a
372          * 64bit chunk at the front of the first page.
373          */
374         if (io_ctl->check_crcs) {
375                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
376                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
377         } else {
378                 io_ctl->cur += sizeof(u64);
379                 io_ctl->size -= sizeof(u64) * 2;
380         }
381
382         val = io_ctl->cur;
383         *val = cpu_to_le64(generation);
384         io_ctl->cur += sizeof(u64);
385 }
386
387 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
388 {
389         u64 *gen;
390
391         /*
392          * Skip the crc area.  If we don't check crcs then we just have a 64bit
393          * chunk at the front of the first page.
394          */
395         if (io_ctl->check_crcs) {
396                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
397                 io_ctl->size -= sizeof(u64) +
398                         (sizeof(u32) * io_ctl->num_pages);
399         } else {
400                 io_ctl->cur += sizeof(u64);
401                 io_ctl->size -= sizeof(u64) * 2;
402         }
403
404         gen = io_ctl->cur;
405         if (le64_to_cpu(*gen) != generation) {
406                 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
407                                    "(%Lu) does not match inode (%Lu)\n", *gen,
408                                    generation);
409                 io_ctl_unmap_page(io_ctl);
410                 return -EIO;
411         }
412         io_ctl->cur += sizeof(u64);
413         return 0;
414 }
415
416 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
417 {
418         u32 *tmp;
419         u32 crc = ~(u32)0;
420         unsigned offset = 0;
421
422         if (!io_ctl->check_crcs) {
423                 io_ctl_unmap_page(io_ctl);
424                 return;
425         }
426
427         if (index == 0)
428                 offset = sizeof(u32) * io_ctl->num_pages;
429
430         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
431                               PAGE_CACHE_SIZE - offset);
432         btrfs_csum_final(crc, (char *)&crc);
433         io_ctl_unmap_page(io_ctl);
434         tmp = kmap(io_ctl->pages[0]);
435         tmp += index;
436         *tmp = crc;
437         kunmap(io_ctl->pages[0]);
438 }
439
440 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
441 {
442         u32 *tmp, val;
443         u32 crc = ~(u32)0;
444         unsigned offset = 0;
445
446         if (!io_ctl->check_crcs) {
447                 io_ctl_map_page(io_ctl, 0);
448                 return 0;
449         }
450
451         if (index == 0)
452                 offset = sizeof(u32) * io_ctl->num_pages;
453
454         tmp = kmap(io_ctl->pages[0]);
455         tmp += index;
456         val = *tmp;
457         kunmap(io_ctl->pages[0]);
458
459         io_ctl_map_page(io_ctl, 0);
460         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
461                               PAGE_CACHE_SIZE - offset);
462         btrfs_csum_final(crc, (char *)&crc);
463         if (val != crc) {
464                 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
465                                    "space cache\n");
466                 io_ctl_unmap_page(io_ctl);
467                 return -EIO;
468         }
469
470         return 0;
471 }
472
473 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
474                             void *bitmap)
475 {
476         struct btrfs_free_space_entry *entry;
477
478         if (!io_ctl->cur)
479                 return -ENOSPC;
480
481         entry = io_ctl->cur;
482         entry->offset = cpu_to_le64(offset);
483         entry->bytes = cpu_to_le64(bytes);
484         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
485                 BTRFS_FREE_SPACE_EXTENT;
486         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
487         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
488
489         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
490                 return 0;
491
492         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
493
494         /* No more pages to map */
495         if (io_ctl->index >= io_ctl->num_pages)
496                 return 0;
497
498         /* map the next page */
499         io_ctl_map_page(io_ctl, 1);
500         return 0;
501 }
502
503 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
504 {
505         if (!io_ctl->cur)
506                 return -ENOSPC;
507
508         /*
509          * If we aren't at the start of the current page, unmap this one and
510          * map the next one if there is any left.
511          */
512         if (io_ctl->cur != io_ctl->orig) {
513                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
514                 if (io_ctl->index >= io_ctl->num_pages)
515                         return -ENOSPC;
516                 io_ctl_map_page(io_ctl, 0);
517         }
518
519         memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
520         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
521         if (io_ctl->index < io_ctl->num_pages)
522                 io_ctl_map_page(io_ctl, 0);
523         return 0;
524 }
525
526 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
527 {
528         /*
529          * If we're not on the boundary we know we've modified the page and we
530          * need to crc the page.
531          */
532         if (io_ctl->cur != io_ctl->orig)
533                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
534         else
535                 io_ctl_unmap_page(io_ctl);
536
537         while (io_ctl->index < io_ctl->num_pages) {
538                 io_ctl_map_page(io_ctl, 1);
539                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
540         }
541 }
542
543 static int io_ctl_read_entry(struct io_ctl *io_ctl,
544                             struct btrfs_free_space *entry, u8 *type)
545 {
546         struct btrfs_free_space_entry *e;
547         int ret;
548
549         if (!io_ctl->cur) {
550                 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
551                 if (ret)
552                         return ret;
553         }
554
555         e = io_ctl->cur;
556         entry->offset = le64_to_cpu(e->offset);
557         entry->bytes = le64_to_cpu(e->bytes);
558         *type = e->type;
559         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
560         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
561
562         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
563                 return 0;
564
565         io_ctl_unmap_page(io_ctl);
566
567         return 0;
568 }
569
570 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
571                               struct btrfs_free_space *entry)
572 {
573         int ret;
574
575         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
576         if (ret)
577                 return ret;
578
579         memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
580         io_ctl_unmap_page(io_ctl);
581
582         return 0;
583 }
584
585 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
586                             struct btrfs_free_space_ctl *ctl,
587                             struct btrfs_path *path, u64 offset)
588 {
589         struct btrfs_free_space_header *header;
590         struct extent_buffer *leaf;
591         struct io_ctl io_ctl;
592         struct btrfs_key key;
593         struct btrfs_free_space *e, *n;
594         struct list_head bitmaps;
595         u64 num_entries;
596         u64 num_bitmaps;
597         u64 generation;
598         u8 type;
599         int ret = 0;
600
601         INIT_LIST_HEAD(&bitmaps);
602
603         /* Nothing in the space cache, goodbye */
604         if (!i_size_read(inode))
605                 return 0;
606
607         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
608         key.offset = offset;
609         key.type = 0;
610
611         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
612         if (ret < 0)
613                 return 0;
614         else if (ret > 0) {
615                 btrfs_release_path(path);
616                 return 0;
617         }
618
619         ret = -1;
620
621         leaf = path->nodes[0];
622         header = btrfs_item_ptr(leaf, path->slots[0],
623                                 struct btrfs_free_space_header);
624         num_entries = btrfs_free_space_entries(leaf, header);
625         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
626         generation = btrfs_free_space_generation(leaf, header);
627         btrfs_release_path(path);
628
629         if (BTRFS_I(inode)->generation != generation) {
630                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
631                        " not match free space cache generation (%llu)\n",
632                        (unsigned long long)BTRFS_I(inode)->generation,
633                        (unsigned long long)generation);
634                 return 0;
635         }
636
637         if (!num_entries)
638                 return 0;
639
640         ret = io_ctl_init(&io_ctl, inode, root);
641         if (ret)
642                 return ret;
643
644         ret = readahead_cache(inode);
645         if (ret)
646                 goto out;
647
648         ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
649         if (ret)
650                 goto out;
651
652         ret = io_ctl_check_crc(&io_ctl, 0);
653         if (ret)
654                 goto free_cache;
655
656         ret = io_ctl_check_generation(&io_ctl, generation);
657         if (ret)
658                 goto free_cache;
659
660         while (num_entries) {
661                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
662                                       GFP_NOFS);
663                 if (!e)
664                         goto free_cache;
665
666                 ret = io_ctl_read_entry(&io_ctl, e, &type);
667                 if (ret) {
668                         kmem_cache_free(btrfs_free_space_cachep, e);
669                         goto free_cache;
670                 }
671
672                 if (!e->bytes) {
673                         kmem_cache_free(btrfs_free_space_cachep, e);
674                         goto free_cache;
675                 }
676
677                 if (type == BTRFS_FREE_SPACE_EXTENT) {
678                         spin_lock(&ctl->tree_lock);
679                         ret = link_free_space(ctl, e);
680                         spin_unlock(&ctl->tree_lock);
681                         if (ret) {
682                                 printk(KERN_ERR "Duplicate entries in "
683                                        "free space cache, dumping\n");
684                                 kmem_cache_free(btrfs_free_space_cachep, e);
685                                 goto free_cache;
686                         }
687                 } else {
688                         BUG_ON(!num_bitmaps);
689                         num_bitmaps--;
690                         e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
691                         if (!e->bitmap) {
692                                 kmem_cache_free(
693                                         btrfs_free_space_cachep, e);
694                                 goto free_cache;
695                         }
696                         spin_lock(&ctl->tree_lock);
697                         ret = link_free_space(ctl, e);
698                         ctl->total_bitmaps++;
699                         ctl->op->recalc_thresholds(ctl);
700                         spin_unlock(&ctl->tree_lock);
701                         if (ret) {
702                                 printk(KERN_ERR "Duplicate entries in "
703                                        "free space cache, dumping\n");
704                                 kmem_cache_free(btrfs_free_space_cachep, e);
705                                 goto free_cache;
706                         }
707                         list_add_tail(&e->list, &bitmaps);
708                 }
709
710                 num_entries--;
711         }
712
713         io_ctl_unmap_page(&io_ctl);
714
715         /*
716          * We add the bitmaps at the end of the entries in order that
717          * the bitmap entries are added to the cache.
718          */
719         list_for_each_entry_safe(e, n, &bitmaps, list) {
720                 list_del_init(&e->list);
721                 ret = io_ctl_read_bitmap(&io_ctl, e);
722                 if (ret)
723                         goto free_cache;
724         }
725
726         io_ctl_drop_pages(&io_ctl);
727         ret = 1;
728 out:
729         io_ctl_free(&io_ctl);
730         return ret;
731 free_cache:
732         io_ctl_drop_pages(&io_ctl);
733         __btrfs_remove_free_space_cache(ctl);
734         goto out;
735 }
736
737 int load_free_space_cache(struct btrfs_fs_info *fs_info,
738                           struct btrfs_block_group_cache *block_group)
739 {
740         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
741         struct btrfs_root *root = fs_info->tree_root;
742         struct inode *inode;
743         struct btrfs_path *path;
744         int ret = 0;
745         bool matched;
746         u64 used = btrfs_block_group_used(&block_group->item);
747
748         /*
749          * If we're unmounting then just return, since this does a search on the
750          * normal root and not the commit root and we could deadlock.
751          */
752         if (btrfs_fs_closing(fs_info))
753                 return 0;
754
755         /*
756          * If this block group has been marked to be cleared for one reason or
757          * another then we can't trust the on disk cache, so just return.
758          */
759         spin_lock(&block_group->lock);
760         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
761                 spin_unlock(&block_group->lock);
762                 return 0;
763         }
764         spin_unlock(&block_group->lock);
765
766         path = btrfs_alloc_path();
767         if (!path)
768                 return 0;
769
770         inode = lookup_free_space_inode(root, block_group, path);
771         if (IS_ERR(inode)) {
772                 btrfs_free_path(path);
773                 return 0;
774         }
775
776         /* We may have converted the inode and made the cache invalid. */
777         spin_lock(&block_group->lock);
778         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
779                 spin_unlock(&block_group->lock);
780                 btrfs_free_path(path);
781                 goto out;
782         }
783         spin_unlock(&block_group->lock);
784
785         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
786                                       path, block_group->key.objectid);
787         btrfs_free_path(path);
788         if (ret <= 0)
789                 goto out;
790
791         spin_lock(&ctl->tree_lock);
792         matched = (ctl->free_space == (block_group->key.offset - used -
793                                        block_group->bytes_super));
794         spin_unlock(&ctl->tree_lock);
795
796         if (!matched) {
797                 __btrfs_remove_free_space_cache(ctl);
798                 printk(KERN_ERR "block group %llu has an wrong amount of free "
799                        "space\n", block_group->key.objectid);
800                 ret = -1;
801         }
802 out:
803         if (ret < 0) {
804                 /* This cache is bogus, make sure it gets cleared */
805                 spin_lock(&block_group->lock);
806                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
807                 spin_unlock(&block_group->lock);
808                 ret = 0;
809
810                 printk(KERN_ERR "btrfs: failed to load free space cache "
811                        "for block group %llu\n", block_group->key.objectid);
812         }
813
814         iput(inode);
815         return ret;
816 }
817
818 /**
819  * __btrfs_write_out_cache - write out cached info to an inode
820  * @root - the root the inode belongs to
821  * @ctl - the free space cache we are going to write out
822  * @block_group - the block_group for this cache if it belongs to a block_group
823  * @trans - the trans handle
824  * @path - the path to use
825  * @offset - the offset for the key we'll insert
826  *
827  * This function writes out a free space cache struct to disk for quick recovery
828  * on mount.  This will return 0 if it was successfull in writing the cache out,
829  * and -1 if it was not.
830  */
831 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
832                             struct btrfs_free_space_ctl *ctl,
833                             struct btrfs_block_group_cache *block_group,
834                             struct btrfs_trans_handle *trans,
835                             struct btrfs_path *path, u64 offset)
836 {
837         struct btrfs_free_space_header *header;
838         struct extent_buffer *leaf;
839         struct rb_node *node;
840         struct list_head *pos, *n;
841         struct extent_state *cached_state = NULL;
842         struct btrfs_free_cluster *cluster = NULL;
843         struct extent_io_tree *unpin = NULL;
844         struct io_ctl io_ctl;
845         struct list_head bitmap_list;
846         struct btrfs_key key;
847         u64 start, extent_start, extent_end, len;
848         int entries = 0;
849         int bitmaps = 0;
850         int ret;
851         int err = -1;
852
853         INIT_LIST_HEAD(&bitmap_list);
854
855         if (!i_size_read(inode))
856                 return -1;
857
858         ret = io_ctl_init(&io_ctl, inode, root);
859         if (ret)
860                 return -1;
861
862         /* Get the cluster for this block_group if it exists */
863         if (block_group && !list_empty(&block_group->cluster_list))
864                 cluster = list_entry(block_group->cluster_list.next,
865                                      struct btrfs_free_cluster,
866                                      block_group_list);
867
868         /* Lock all pages first so we can lock the extent safely. */
869         io_ctl_prepare_pages(&io_ctl, inode, 0);
870
871         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
872                          0, &cached_state, GFP_NOFS);
873
874         node = rb_first(&ctl->free_space_offset);
875         if (!node && cluster) {
876                 node = rb_first(&cluster->root);
877                 cluster = NULL;
878         }
879
880         /* Make sure we can fit our crcs into the first page */
881         if (io_ctl.check_crcs &&
882             (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
883                 WARN_ON(1);
884                 goto out_nospc;
885         }
886
887         io_ctl_set_generation(&io_ctl, trans->transid);
888
889         /* Write out the extent entries */
890         while (node) {
891                 struct btrfs_free_space *e;
892
893                 e = rb_entry(node, struct btrfs_free_space, offset_index);
894                 entries++;
895
896                 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
897                                        e->bitmap);
898                 if (ret)
899                         goto out_nospc;
900
901                 if (e->bitmap) {
902                         list_add_tail(&e->list, &bitmap_list);
903                         bitmaps++;
904                 }
905                 node = rb_next(node);
906                 if (!node && cluster) {
907                         node = rb_first(&cluster->root);
908                         cluster = NULL;
909                 }
910         }
911
912         /*
913          * We want to add any pinned extents to our free space cache
914          * so we don't leak the space
915          */
916
917         /*
918          * We shouldn't have switched the pinned extents yet so this is the
919          * right one
920          */
921         unpin = root->fs_info->pinned_extents;
922
923         if (block_group)
924                 start = block_group->key.objectid;
925
926         while (block_group && (start < block_group->key.objectid +
927                                block_group->key.offset)) {
928                 ret = find_first_extent_bit(unpin, start,
929                                             &extent_start, &extent_end,
930                                             EXTENT_DIRTY);
931                 if (ret) {
932                         ret = 0;
933                         break;
934                 }
935
936                 /* This pinned extent is out of our range */
937                 if (extent_start >= block_group->key.objectid +
938                     block_group->key.offset)
939                         break;
940
941                 extent_start = max(extent_start, start);
942                 extent_end = min(block_group->key.objectid +
943                                  block_group->key.offset, extent_end + 1);
944                 len = extent_end - extent_start;
945
946                 entries++;
947                 ret = io_ctl_add_entry(&io_ctl, extent_start, len, NULL);
948                 if (ret)
949                         goto out_nospc;
950
951                 start = extent_end;
952         }
953
954         /* Write out the bitmaps */
955         list_for_each_safe(pos, n, &bitmap_list) {
956                 struct btrfs_free_space *entry =
957                         list_entry(pos, struct btrfs_free_space, list);
958
959                 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
960                 if (ret)
961                         goto out_nospc;
962                 list_del_init(&entry->list);
963         }
964
965         /* Zero out the rest of the pages just to make sure */
966         io_ctl_zero_remaining_pages(&io_ctl);
967
968         ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
969                                 0, i_size_read(inode), &cached_state);
970         io_ctl_drop_pages(&io_ctl);
971         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
972                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
973
974         if (ret)
975                 goto out;
976
977
978         ret = filemap_write_and_wait(inode->i_mapping);
979         if (ret)
980                 goto out;
981
982         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
983         key.offset = offset;
984         key.type = 0;
985
986         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
987         if (ret < 0) {
988                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
989                                  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
990                                  GFP_NOFS);
991                 goto out;
992         }
993         leaf = path->nodes[0];
994         if (ret > 0) {
995                 struct btrfs_key found_key;
996                 BUG_ON(!path->slots[0]);
997                 path->slots[0]--;
998                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
999                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
1000                     found_key.offset != offset) {
1001                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
1002                                          inode->i_size - 1,
1003                                          EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
1004                                          NULL, GFP_NOFS);
1005                         btrfs_release_path(path);
1006                         goto out;
1007                 }
1008         }
1009
1010         BTRFS_I(inode)->generation = trans->transid;
1011         header = btrfs_item_ptr(leaf, path->slots[0],
1012                                 struct btrfs_free_space_header);
1013         btrfs_set_free_space_entries(leaf, header, entries);
1014         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1015         btrfs_set_free_space_generation(leaf, header, trans->transid);
1016         btrfs_mark_buffer_dirty(leaf);
1017         btrfs_release_path(path);
1018
1019         err = 0;
1020 out:
1021         io_ctl_free(&io_ctl);
1022         if (err) {
1023                 invalidate_inode_pages2(inode->i_mapping);
1024                 BTRFS_I(inode)->generation = 0;
1025         }
1026         btrfs_update_inode(trans, root, inode);
1027         return err;
1028
1029 out_nospc:
1030         list_for_each_safe(pos, n, &bitmap_list) {
1031                 struct btrfs_free_space *entry =
1032                         list_entry(pos, struct btrfs_free_space, list);
1033                 list_del_init(&entry->list);
1034         }
1035         io_ctl_drop_pages(&io_ctl);
1036         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1037                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1038         goto out;
1039 }
1040
1041 int btrfs_write_out_cache(struct btrfs_root *root,
1042                           struct btrfs_trans_handle *trans,
1043                           struct btrfs_block_group_cache *block_group,
1044                           struct btrfs_path *path)
1045 {
1046         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1047         struct inode *inode;
1048         int ret = 0;
1049
1050         root = root->fs_info->tree_root;
1051
1052         spin_lock(&block_group->lock);
1053         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1054                 spin_unlock(&block_group->lock);
1055                 return 0;
1056         }
1057         spin_unlock(&block_group->lock);
1058
1059         inode = lookup_free_space_inode(root, block_group, path);
1060         if (IS_ERR(inode))
1061                 return 0;
1062
1063         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1064                                       path, block_group->key.objectid);
1065         if (ret) {
1066                 spin_lock(&block_group->lock);
1067                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1068                 spin_unlock(&block_group->lock);
1069                 ret = 0;
1070 #ifdef DEBUG
1071                 printk(KERN_ERR "btrfs: failed to write free space cace "
1072                        "for block group %llu\n", block_group->key.objectid);
1073 #endif
1074         }
1075
1076         iput(inode);
1077         return ret;
1078 }
1079
1080 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1081                                           u64 offset)
1082 {
1083         BUG_ON(offset < bitmap_start);
1084         offset -= bitmap_start;
1085         return (unsigned long)(div_u64(offset, unit));
1086 }
1087
1088 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1089 {
1090         return (unsigned long)(div_u64(bytes, unit));
1091 }
1092
1093 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1094                                    u64 offset)
1095 {
1096         u64 bitmap_start;
1097         u64 bytes_per_bitmap;
1098
1099         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1100         bitmap_start = offset - ctl->start;
1101         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1102         bitmap_start *= bytes_per_bitmap;
1103         bitmap_start += ctl->start;
1104
1105         return bitmap_start;
1106 }
1107
1108 static int tree_insert_offset(struct rb_root *root, u64 offset,
1109                               struct rb_node *node, int bitmap)
1110 {
1111         struct rb_node **p = &root->rb_node;
1112         struct rb_node *parent = NULL;
1113         struct btrfs_free_space *info;
1114
1115         while (*p) {
1116                 parent = *p;
1117                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1118
1119                 if (offset < info->offset) {
1120                         p = &(*p)->rb_left;
1121                 } else if (offset > info->offset) {
1122                         p = &(*p)->rb_right;
1123                 } else {
1124                         /*
1125                          * we could have a bitmap entry and an extent entry
1126                          * share the same offset.  If this is the case, we want
1127                          * the extent entry to always be found first if we do a
1128                          * linear search through the tree, since we want to have
1129                          * the quickest allocation time, and allocating from an
1130                          * extent is faster than allocating from a bitmap.  So
1131                          * if we're inserting a bitmap and we find an entry at
1132                          * this offset, we want to go right, or after this entry
1133                          * logically.  If we are inserting an extent and we've
1134                          * found a bitmap, we want to go left, or before
1135                          * logically.
1136                          */
1137                         if (bitmap) {
1138                                 if (info->bitmap) {
1139                                         WARN_ON_ONCE(1);
1140                                         return -EEXIST;
1141                                 }
1142                                 p = &(*p)->rb_right;
1143                         } else {
1144                                 if (!info->bitmap) {
1145                                         WARN_ON_ONCE(1);
1146                                         return -EEXIST;
1147                                 }
1148                                 p = &(*p)->rb_left;
1149                         }
1150                 }
1151         }
1152
1153         rb_link_node(node, parent, p);
1154         rb_insert_color(node, root);
1155
1156         return 0;
1157 }
1158
1159 /*
1160  * searches the tree for the given offset.
1161  *
1162  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1163  * want a section that has at least bytes size and comes at or after the given
1164  * offset.
1165  */
1166 static struct btrfs_free_space *
1167 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1168                    u64 offset, int bitmap_only, int fuzzy)
1169 {
1170         struct rb_node *n = ctl->free_space_offset.rb_node;
1171         struct btrfs_free_space *entry, *prev = NULL;
1172
1173         /* find entry that is closest to the 'offset' */
1174         while (1) {
1175                 if (!n) {
1176                         entry = NULL;
1177                         break;
1178                 }
1179
1180                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1181                 prev = entry;
1182
1183                 if (offset < entry->offset)
1184                         n = n->rb_left;
1185                 else if (offset > entry->offset)
1186                         n = n->rb_right;
1187                 else
1188                         break;
1189         }
1190
1191         if (bitmap_only) {
1192                 if (!entry)
1193                         return NULL;
1194                 if (entry->bitmap)
1195                         return entry;
1196
1197                 /*
1198                  * bitmap entry and extent entry may share same offset,
1199                  * in that case, bitmap entry comes after extent entry.
1200                  */
1201                 n = rb_next(n);
1202                 if (!n)
1203                         return NULL;
1204                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1205                 if (entry->offset != offset)
1206                         return NULL;
1207
1208                 WARN_ON(!entry->bitmap);
1209                 return entry;
1210         } else if (entry) {
1211                 if (entry->bitmap) {
1212                         /*
1213                          * if previous extent entry covers the offset,
1214                          * we should return it instead of the bitmap entry
1215                          */
1216                         n = &entry->offset_index;
1217                         while (1) {
1218                                 n = rb_prev(n);
1219                                 if (!n)
1220                                         break;
1221                                 prev = rb_entry(n, struct btrfs_free_space,
1222                                                 offset_index);
1223                                 if (!prev->bitmap) {
1224                                         if (prev->offset + prev->bytes > offset)
1225                                                 entry = prev;
1226                                         break;
1227                                 }
1228                         }
1229                 }
1230                 return entry;
1231         }
1232
1233         if (!prev)
1234                 return NULL;
1235
1236         /* find last entry before the 'offset' */
1237         entry = prev;
1238         if (entry->offset > offset) {
1239                 n = rb_prev(&entry->offset_index);
1240                 if (n) {
1241                         entry = rb_entry(n, struct btrfs_free_space,
1242                                         offset_index);
1243                         BUG_ON(entry->offset > offset);
1244                 } else {
1245                         if (fuzzy)
1246                                 return entry;
1247                         else
1248                                 return NULL;
1249                 }
1250         }
1251
1252         if (entry->bitmap) {
1253                 n = &entry->offset_index;
1254                 while (1) {
1255                         n = rb_prev(n);
1256                         if (!n)
1257                                 break;
1258                         prev = rb_entry(n, struct btrfs_free_space,
1259                                         offset_index);
1260                         if (!prev->bitmap) {
1261                                 if (prev->offset + prev->bytes > offset)
1262                                         return prev;
1263                                 break;
1264                         }
1265                 }
1266                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1267                         return entry;
1268         } else if (entry->offset + entry->bytes > offset)
1269                 return entry;
1270
1271         if (!fuzzy)
1272                 return NULL;
1273
1274         while (1) {
1275                 if (entry->bitmap) {
1276                         if (entry->offset + BITS_PER_BITMAP *
1277                             ctl->unit > offset)
1278                                 break;
1279                 } else {
1280                         if (entry->offset + entry->bytes > offset)
1281                                 break;
1282                 }
1283
1284                 n = rb_next(&entry->offset_index);
1285                 if (!n)
1286                         return NULL;
1287                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1288         }
1289         return entry;
1290 }
1291
1292 static inline void
1293 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1294                     struct btrfs_free_space *info)
1295 {
1296         rb_erase(&info->offset_index, &ctl->free_space_offset);
1297         ctl->free_extents--;
1298 }
1299
1300 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1301                               struct btrfs_free_space *info)
1302 {
1303         __unlink_free_space(ctl, info);
1304         ctl->free_space -= info->bytes;
1305 }
1306
1307 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1308                            struct btrfs_free_space *info)
1309 {
1310         int ret = 0;
1311
1312         BUG_ON(!info->bitmap && !info->bytes);
1313         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1314                                  &info->offset_index, (info->bitmap != NULL));
1315         if (ret)
1316                 return ret;
1317
1318         ctl->free_space += info->bytes;
1319         ctl->free_extents++;
1320         return ret;
1321 }
1322
1323 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1324 {
1325         struct btrfs_block_group_cache *block_group = ctl->private;
1326         u64 max_bytes;
1327         u64 bitmap_bytes;
1328         u64 extent_bytes;
1329         u64 size = block_group->key.offset;
1330         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1331         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1332
1333         BUG_ON(ctl->total_bitmaps > max_bitmaps);
1334
1335         /*
1336          * The goal is to keep the total amount of memory used per 1gb of space
1337          * at or below 32k, so we need to adjust how much memory we allow to be
1338          * used by extent based free space tracking
1339          */
1340         if (size < 1024 * 1024 * 1024)
1341                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1342         else
1343                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1344                         div64_u64(size, 1024 * 1024 * 1024);
1345
1346         /*
1347          * we want to account for 1 more bitmap than what we have so we can make
1348          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1349          * we add more bitmaps.
1350          */
1351         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1352
1353         if (bitmap_bytes >= max_bytes) {
1354                 ctl->extents_thresh = 0;
1355                 return;
1356         }
1357
1358         /*
1359          * we want the extent entry threshold to always be at most 1/2 the maxw
1360          * bytes we can have, or whatever is less than that.
1361          */
1362         extent_bytes = max_bytes - bitmap_bytes;
1363         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1364
1365         ctl->extents_thresh =
1366                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1367 }
1368
1369 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1370                                        struct btrfs_free_space *info,
1371                                        u64 offset, u64 bytes)
1372 {
1373         unsigned long start, count;
1374
1375         start = offset_to_bit(info->offset, ctl->unit, offset);
1376         count = bytes_to_bits(bytes, ctl->unit);
1377         BUG_ON(start + count > BITS_PER_BITMAP);
1378
1379         bitmap_clear(info->bitmap, start, count);
1380
1381         info->bytes -= bytes;
1382 }
1383
1384 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1385                               struct btrfs_free_space *info, u64 offset,
1386                               u64 bytes)
1387 {
1388         __bitmap_clear_bits(ctl, info, offset, bytes);
1389         ctl->free_space -= bytes;
1390 }
1391
1392 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1393                             struct btrfs_free_space *info, u64 offset,
1394                             u64 bytes)
1395 {
1396         unsigned long start, count;
1397
1398         start = offset_to_bit(info->offset, ctl->unit, offset);
1399         count = bytes_to_bits(bytes, ctl->unit);
1400         BUG_ON(start + count > BITS_PER_BITMAP);
1401
1402         bitmap_set(info->bitmap, start, count);
1403
1404         info->bytes += bytes;
1405         ctl->free_space += bytes;
1406 }
1407
1408 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1409                          struct btrfs_free_space *bitmap_info, u64 *offset,
1410                          u64 *bytes)
1411 {
1412         unsigned long found_bits = 0;
1413         unsigned long bits, i;
1414         unsigned long next_zero;
1415
1416         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1417                           max_t(u64, *offset, bitmap_info->offset));
1418         bits = bytes_to_bits(*bytes, ctl->unit);
1419
1420         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1421              i < BITS_PER_BITMAP;
1422              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1423                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1424                                                BITS_PER_BITMAP, i);
1425                 if ((next_zero - i) >= bits) {
1426                         found_bits = next_zero - i;
1427                         break;
1428                 }
1429                 i = next_zero;
1430         }
1431
1432         if (found_bits) {
1433                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1434                 *bytes = (u64)(found_bits) * ctl->unit;
1435                 return 0;
1436         }
1437
1438         return -1;
1439 }
1440
1441 static struct btrfs_free_space *
1442 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1443 {
1444         struct btrfs_free_space *entry;
1445         struct rb_node *node;
1446         int ret;
1447
1448         if (!ctl->free_space_offset.rb_node)
1449                 return NULL;
1450
1451         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1452         if (!entry)
1453                 return NULL;
1454
1455         for (node = &entry->offset_index; node; node = rb_next(node)) {
1456                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1457                 if (entry->bytes < *bytes)
1458                         continue;
1459
1460                 if (entry->bitmap) {
1461                         ret = search_bitmap(ctl, entry, offset, bytes);
1462                         if (!ret)
1463                                 return entry;
1464                         continue;
1465                 }
1466
1467                 *offset = entry->offset;
1468                 *bytes = entry->bytes;
1469                 return entry;
1470         }
1471
1472         return NULL;
1473 }
1474
1475 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1476                            struct btrfs_free_space *info, u64 offset)
1477 {
1478         info->offset = offset_to_bitmap(ctl, offset);
1479         info->bytes = 0;
1480         INIT_LIST_HEAD(&info->list);
1481         link_free_space(ctl, info);
1482         ctl->total_bitmaps++;
1483
1484         ctl->op->recalc_thresholds(ctl);
1485 }
1486
1487 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1488                         struct btrfs_free_space *bitmap_info)
1489 {
1490         unlink_free_space(ctl, bitmap_info);
1491         kfree(bitmap_info->bitmap);
1492         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1493         ctl->total_bitmaps--;
1494         ctl->op->recalc_thresholds(ctl);
1495 }
1496
1497 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1498                               struct btrfs_free_space *bitmap_info,
1499                               u64 *offset, u64 *bytes)
1500 {
1501         u64 end;
1502         u64 search_start, search_bytes;
1503         int ret;
1504
1505 again:
1506         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1507
1508         /*
1509          * XXX - this can go away after a few releases.
1510          *
1511          * since the only user of btrfs_remove_free_space is the tree logging
1512          * stuff, and the only way to test that is under crash conditions, we
1513          * want to have this debug stuff here just in case somethings not
1514          * working.  Search the bitmap for the space we are trying to use to
1515          * make sure its actually there.  If its not there then we need to stop
1516          * because something has gone wrong.
1517          */
1518         search_start = *offset;
1519         search_bytes = *bytes;
1520         search_bytes = min(search_bytes, end - search_start + 1);
1521         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1522         BUG_ON(ret < 0 || search_start != *offset);
1523
1524         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1525                 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1526                 *bytes -= end - *offset + 1;
1527                 *offset = end + 1;
1528         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1529                 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1530                 *bytes = 0;
1531         }
1532
1533         if (*bytes) {
1534                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1535                 if (!bitmap_info->bytes)
1536                         free_bitmap(ctl, bitmap_info);
1537
1538                 /*
1539                  * no entry after this bitmap, but we still have bytes to
1540                  * remove, so something has gone wrong.
1541                  */
1542                 if (!next)
1543                         return -EINVAL;
1544
1545                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1546                                        offset_index);
1547
1548                 /*
1549                  * if the next entry isn't a bitmap we need to return to let the
1550                  * extent stuff do its work.
1551                  */
1552                 if (!bitmap_info->bitmap)
1553                         return -EAGAIN;
1554
1555                 /*
1556                  * Ok the next item is a bitmap, but it may not actually hold
1557                  * the information for the rest of this free space stuff, so
1558                  * look for it, and if we don't find it return so we can try
1559                  * everything over again.
1560                  */
1561                 search_start = *offset;
1562                 search_bytes = *bytes;
1563                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1564                                     &search_bytes);
1565                 if (ret < 0 || search_start != *offset)
1566                         return -EAGAIN;
1567
1568                 goto again;
1569         } else if (!bitmap_info->bytes)
1570                 free_bitmap(ctl, bitmap_info);
1571
1572         return 0;
1573 }
1574
1575 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1576                                struct btrfs_free_space *info, u64 offset,
1577                                u64 bytes)
1578 {
1579         u64 bytes_to_set = 0;
1580         u64 end;
1581
1582         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1583
1584         bytes_to_set = min(end - offset, bytes);
1585
1586         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1587
1588         return bytes_to_set;
1589
1590 }
1591
1592 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1593                       struct btrfs_free_space *info)
1594 {
1595         struct btrfs_block_group_cache *block_group = ctl->private;
1596
1597         /*
1598          * If we are below the extents threshold then we can add this as an
1599          * extent, and don't have to deal with the bitmap
1600          */
1601         if (ctl->free_extents < ctl->extents_thresh) {
1602                 /*
1603                  * If this block group has some small extents we don't want to
1604                  * use up all of our free slots in the cache with them, we want
1605                  * to reserve them to larger extents, however if we have plent
1606                  * of cache left then go ahead an dadd them, no sense in adding
1607                  * the overhead of a bitmap if we don't have to.
1608                  */
1609                 if (info->bytes <= block_group->sectorsize * 4) {
1610                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1611                                 return false;
1612                 } else {
1613                         return false;
1614                 }
1615         }
1616
1617         /*
1618          * some block groups are so tiny they can't be enveloped by a bitmap, so
1619          * don't even bother to create a bitmap for this
1620          */
1621         if (BITS_PER_BITMAP * block_group->sectorsize >
1622             block_group->key.offset)
1623                 return false;
1624
1625         return true;
1626 }
1627
1628 static struct btrfs_free_space_op free_space_op = {
1629         .recalc_thresholds      = recalculate_thresholds,
1630         .use_bitmap             = use_bitmap,
1631 };
1632
1633 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1634                               struct btrfs_free_space *info)
1635 {
1636         struct btrfs_free_space *bitmap_info;
1637         struct btrfs_block_group_cache *block_group = NULL;
1638         int added = 0;
1639         u64 bytes, offset, bytes_added;
1640         int ret;
1641
1642         bytes = info->bytes;
1643         offset = info->offset;
1644
1645         if (!ctl->op->use_bitmap(ctl, info))
1646                 return 0;
1647
1648         if (ctl->op == &free_space_op)
1649                 block_group = ctl->private;
1650 again:
1651         /*
1652          * Since we link bitmaps right into the cluster we need to see if we
1653          * have a cluster here, and if so and it has our bitmap we need to add
1654          * the free space to that bitmap.
1655          */
1656         if (block_group && !list_empty(&block_group->cluster_list)) {
1657                 struct btrfs_free_cluster *cluster;
1658                 struct rb_node *node;
1659                 struct btrfs_free_space *entry;
1660
1661                 cluster = list_entry(block_group->cluster_list.next,
1662                                      struct btrfs_free_cluster,
1663                                      block_group_list);
1664                 spin_lock(&cluster->lock);
1665                 node = rb_first(&cluster->root);
1666                 if (!node) {
1667                         spin_unlock(&cluster->lock);
1668                         goto no_cluster_bitmap;
1669                 }
1670
1671                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1672                 if (!entry->bitmap) {
1673                         spin_unlock(&cluster->lock);
1674                         goto no_cluster_bitmap;
1675                 }
1676
1677                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1678                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1679                                                           offset, bytes);
1680                         bytes -= bytes_added;
1681                         offset += bytes_added;
1682                 }
1683                 spin_unlock(&cluster->lock);
1684                 if (!bytes) {
1685                         ret = 1;
1686                         goto out;
1687                 }
1688         }
1689
1690 no_cluster_bitmap:
1691         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1692                                          1, 0);
1693         if (!bitmap_info) {
1694                 BUG_ON(added);
1695                 goto new_bitmap;
1696         }
1697
1698         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1699         bytes -= bytes_added;
1700         offset += bytes_added;
1701         added = 0;
1702
1703         if (!bytes) {
1704                 ret = 1;
1705                 goto out;
1706         } else
1707                 goto again;
1708
1709 new_bitmap:
1710         if (info && info->bitmap) {
1711                 add_new_bitmap(ctl, info, offset);
1712                 added = 1;
1713                 info = NULL;
1714                 goto again;
1715         } else {
1716                 spin_unlock(&ctl->tree_lock);
1717
1718                 /* no pre-allocated info, allocate a new one */
1719                 if (!info) {
1720                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1721                                                  GFP_NOFS);
1722                         if (!info) {
1723                                 spin_lock(&ctl->tree_lock);
1724                                 ret = -ENOMEM;
1725                                 goto out;
1726                         }
1727                 }
1728
1729                 /* allocate the bitmap */
1730                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1731                 spin_lock(&ctl->tree_lock);
1732                 if (!info->bitmap) {
1733                         ret = -ENOMEM;
1734                         goto out;
1735                 }
1736                 goto again;
1737         }
1738
1739 out:
1740         if (info) {
1741                 if (info->bitmap)
1742                         kfree(info->bitmap);
1743                 kmem_cache_free(btrfs_free_space_cachep, info);
1744         }
1745
1746         return ret;
1747 }
1748
1749 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1750                           struct btrfs_free_space *info, bool update_stat)
1751 {
1752         struct btrfs_free_space *left_info;
1753         struct btrfs_free_space *right_info;
1754         bool merged = false;
1755         u64 offset = info->offset;
1756         u64 bytes = info->bytes;
1757
1758         /*
1759          * first we want to see if there is free space adjacent to the range we
1760          * are adding, if there is remove that struct and add a new one to
1761          * cover the entire range
1762          */
1763         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1764         if (right_info && rb_prev(&right_info->offset_index))
1765                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1766                                      struct btrfs_free_space, offset_index);
1767         else
1768                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1769
1770         if (right_info && !right_info->bitmap) {
1771                 if (update_stat)
1772                         unlink_free_space(ctl, right_info);
1773                 else
1774                         __unlink_free_space(ctl, right_info);
1775                 info->bytes += right_info->bytes;
1776                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1777                 merged = true;
1778         }
1779
1780         if (left_info && !left_info->bitmap &&
1781             left_info->offset + left_info->bytes == offset) {
1782                 if (update_stat)
1783                         unlink_free_space(ctl, left_info);
1784                 else
1785                         __unlink_free_space(ctl, left_info);
1786                 info->offset = left_info->offset;
1787                 info->bytes += left_info->bytes;
1788                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1789                 merged = true;
1790         }
1791
1792         return merged;
1793 }
1794
1795 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1796                            u64 offset, u64 bytes)
1797 {
1798         struct btrfs_free_space *info;
1799         int ret = 0;
1800
1801         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1802         if (!info)
1803                 return -ENOMEM;
1804
1805         info->offset = offset;
1806         info->bytes = bytes;
1807
1808         spin_lock(&ctl->tree_lock);
1809
1810         if (try_merge_free_space(ctl, info, true))
1811                 goto link;
1812
1813         /*
1814          * There was no extent directly to the left or right of this new
1815          * extent then we know we're going to have to allocate a new extent, so
1816          * before we do that see if we need to drop this into a bitmap
1817          */
1818         ret = insert_into_bitmap(ctl, info);
1819         if (ret < 0) {
1820                 goto out;
1821         } else if (ret) {
1822                 ret = 0;
1823                 goto out;
1824         }
1825 link:
1826         ret = link_free_space(ctl, info);
1827         if (ret)
1828                 kmem_cache_free(btrfs_free_space_cachep, info);
1829 out:
1830         spin_unlock(&ctl->tree_lock);
1831
1832         if (ret) {
1833                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1834                 BUG_ON(ret == -EEXIST);
1835         }
1836
1837         return ret;
1838 }
1839
1840 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1841                             u64 offset, u64 bytes)
1842 {
1843         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1844         struct btrfs_free_space *info;
1845         struct btrfs_free_space *next_info = NULL;
1846         int ret = 0;
1847
1848         spin_lock(&ctl->tree_lock);
1849
1850 again:
1851         info = tree_search_offset(ctl, offset, 0, 0);
1852         if (!info) {
1853                 /*
1854                  * oops didn't find an extent that matched the space we wanted
1855                  * to remove, look for a bitmap instead
1856                  */
1857                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1858                                           1, 0);
1859                 if (!info) {
1860                         /* the tree logging code might be calling us before we
1861                          * have fully loaded the free space rbtree for this
1862                          * block group.  So it is possible the entry won't
1863                          * be in the rbtree yet at all.  The caching code
1864                          * will make sure not to put it in the rbtree if
1865                          * the logging code has pinned it.
1866                          */
1867                         goto out_lock;
1868                 }
1869         }
1870
1871         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1872                 u64 end;
1873                 next_info = rb_entry(rb_next(&info->offset_index),
1874                                              struct btrfs_free_space,
1875                                              offset_index);
1876
1877                 if (next_info->bitmap)
1878                         end = next_info->offset +
1879                               BITS_PER_BITMAP * ctl->unit - 1;
1880                 else
1881                         end = next_info->offset + next_info->bytes;
1882
1883                 if (next_info->bytes < bytes ||
1884                     next_info->offset > offset || offset > end) {
1885                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1886                               " trying to use %llu\n",
1887                               (unsigned long long)info->offset,
1888                               (unsigned long long)info->bytes,
1889                               (unsigned long long)bytes);
1890                         WARN_ON(1);
1891                         ret = -EINVAL;
1892                         goto out_lock;
1893                 }
1894
1895                 info = next_info;
1896         }
1897
1898         if (info->bytes == bytes) {
1899                 unlink_free_space(ctl, info);
1900                 if (info->bitmap) {
1901                         kfree(info->bitmap);
1902                         ctl->total_bitmaps--;
1903                 }
1904                 kmem_cache_free(btrfs_free_space_cachep, info);
1905                 ret = 0;
1906                 goto out_lock;
1907         }
1908
1909         if (!info->bitmap && info->offset == offset) {
1910                 unlink_free_space(ctl, info);
1911                 info->offset += bytes;
1912                 info->bytes -= bytes;
1913                 ret = link_free_space(ctl, info);
1914                 WARN_ON(ret);
1915                 goto out_lock;
1916         }
1917
1918         if (!info->bitmap && info->offset <= offset &&
1919             info->offset + info->bytes >= offset + bytes) {
1920                 u64 old_start = info->offset;
1921                 /*
1922                  * we're freeing space in the middle of the info,
1923                  * this can happen during tree log replay
1924                  *
1925                  * first unlink the old info and then
1926                  * insert it again after the hole we're creating
1927                  */
1928                 unlink_free_space(ctl, info);
1929                 if (offset + bytes < info->offset + info->bytes) {
1930                         u64 old_end = info->offset + info->bytes;
1931
1932                         info->offset = offset + bytes;
1933                         info->bytes = old_end - info->offset;
1934                         ret = link_free_space(ctl, info);
1935                         WARN_ON(ret);
1936                         if (ret)
1937                                 goto out_lock;
1938                 } else {
1939                         /* the hole we're creating ends at the end
1940                          * of the info struct, just free the info
1941                          */
1942                         kmem_cache_free(btrfs_free_space_cachep, info);
1943                 }
1944                 spin_unlock(&ctl->tree_lock);
1945
1946                 /* step two, insert a new info struct to cover
1947                  * anything before the hole
1948                  */
1949                 ret = btrfs_add_free_space(block_group, old_start,
1950                                            offset - old_start);
1951                 WARN_ON(ret);
1952                 goto out;
1953         }
1954
1955         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1956         if (ret == -EAGAIN)
1957                 goto again;
1958         BUG_ON(ret);
1959 out_lock:
1960         spin_unlock(&ctl->tree_lock);
1961 out:
1962         return ret;
1963 }
1964
1965 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1966                            u64 bytes)
1967 {
1968         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1969         struct btrfs_free_space *info;
1970         struct rb_node *n;
1971         int count = 0;
1972
1973         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1974                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1975                 if (info->bytes >= bytes)
1976                         count++;
1977                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1978                        (unsigned long long)info->offset,
1979                        (unsigned long long)info->bytes,
1980                        (info->bitmap) ? "yes" : "no");
1981         }
1982         printk(KERN_INFO "block group has cluster?: %s\n",
1983                list_empty(&block_group->cluster_list) ? "no" : "yes");
1984         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1985                "\n", count);
1986 }
1987
1988 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1989 {
1990         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1991
1992         spin_lock_init(&ctl->tree_lock);
1993         ctl->unit = block_group->sectorsize;
1994         ctl->start = block_group->key.objectid;
1995         ctl->private = block_group;
1996         ctl->op = &free_space_op;
1997
1998         /*
1999          * we only want to have 32k of ram per block group for keeping
2000          * track of free space, and if we pass 1/2 of that we want to
2001          * start converting things over to using bitmaps
2002          */
2003         ctl->extents_thresh = ((1024 * 32) / 2) /
2004                                 sizeof(struct btrfs_free_space);
2005 }
2006
2007 /*
2008  * for a given cluster, put all of its extents back into the free
2009  * space cache.  If the block group passed doesn't match the block group
2010  * pointed to by the cluster, someone else raced in and freed the
2011  * cluster already.  In that case, we just return without changing anything
2012  */
2013 static int
2014 __btrfs_return_cluster_to_free_space(
2015                              struct btrfs_block_group_cache *block_group,
2016                              struct btrfs_free_cluster *cluster)
2017 {
2018         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2019         struct btrfs_free_space *entry;
2020         struct rb_node *node;
2021
2022         spin_lock(&cluster->lock);
2023         if (cluster->block_group != block_group)
2024                 goto out;
2025
2026         cluster->block_group = NULL;
2027         cluster->window_start = 0;
2028         list_del_init(&cluster->block_group_list);
2029
2030         node = rb_first(&cluster->root);
2031         while (node) {
2032                 bool bitmap;
2033
2034                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2035                 node = rb_next(&entry->offset_index);
2036                 rb_erase(&entry->offset_index, &cluster->root);
2037
2038                 bitmap = (entry->bitmap != NULL);
2039                 if (!bitmap)
2040                         try_merge_free_space(ctl, entry, false);
2041                 tree_insert_offset(&ctl->free_space_offset,
2042                                    entry->offset, &entry->offset_index, bitmap);
2043         }
2044         cluster->root = RB_ROOT;
2045
2046 out:
2047         spin_unlock(&cluster->lock);
2048         btrfs_put_block_group(block_group);
2049         return 0;
2050 }
2051
2052 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2053 {
2054         struct btrfs_free_space *info;
2055         struct rb_node *node;
2056
2057         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2058                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2059                 if (!info->bitmap) {
2060                         unlink_free_space(ctl, info);
2061                         kmem_cache_free(btrfs_free_space_cachep, info);
2062                 } else {
2063                         free_bitmap(ctl, info);
2064                 }
2065                 if (need_resched()) {
2066                         spin_unlock(&ctl->tree_lock);
2067                         cond_resched();
2068                         spin_lock(&ctl->tree_lock);
2069                 }
2070         }
2071 }
2072
2073 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2074 {
2075         spin_lock(&ctl->tree_lock);
2076         __btrfs_remove_free_space_cache_locked(ctl);
2077         spin_unlock(&ctl->tree_lock);
2078 }
2079
2080 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2081 {
2082         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2083         struct btrfs_free_cluster *cluster;
2084         struct list_head *head;
2085
2086         spin_lock(&ctl->tree_lock);
2087         while ((head = block_group->cluster_list.next) !=
2088                &block_group->cluster_list) {
2089                 cluster = list_entry(head, struct btrfs_free_cluster,
2090                                      block_group_list);
2091
2092                 WARN_ON(cluster->block_group != block_group);
2093                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2094                 if (need_resched()) {
2095                         spin_unlock(&ctl->tree_lock);
2096                         cond_resched();
2097                         spin_lock(&ctl->tree_lock);
2098                 }
2099         }
2100         __btrfs_remove_free_space_cache_locked(ctl);
2101         spin_unlock(&ctl->tree_lock);
2102
2103 }
2104
2105 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2106                                u64 offset, u64 bytes, u64 empty_size)
2107 {
2108         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2109         struct btrfs_free_space *entry = NULL;
2110         u64 bytes_search = bytes + empty_size;
2111         u64 ret = 0;
2112
2113         spin_lock(&ctl->tree_lock);
2114         entry = find_free_space(ctl, &offset, &bytes_search);
2115         if (!entry)
2116                 goto out;
2117
2118         ret = offset;
2119         if (entry->bitmap) {
2120                 bitmap_clear_bits(ctl, entry, offset, bytes);
2121                 if (!entry->bytes)
2122                         free_bitmap(ctl, entry);
2123         } else {
2124                 unlink_free_space(ctl, entry);
2125                 entry->offset += bytes;
2126                 entry->bytes -= bytes;
2127                 if (!entry->bytes)
2128                         kmem_cache_free(btrfs_free_space_cachep, entry);
2129                 else
2130                         link_free_space(ctl, entry);
2131         }
2132
2133 out:
2134         spin_unlock(&ctl->tree_lock);
2135
2136         return ret;
2137 }
2138
2139 /*
2140  * given a cluster, put all of its extents back into the free space
2141  * cache.  If a block group is passed, this function will only free
2142  * a cluster that belongs to the passed block group.
2143  *
2144  * Otherwise, it'll get a reference on the block group pointed to by the
2145  * cluster and remove the cluster from it.
2146  */
2147 int btrfs_return_cluster_to_free_space(
2148                                struct btrfs_block_group_cache *block_group,
2149                                struct btrfs_free_cluster *cluster)
2150 {
2151         struct btrfs_free_space_ctl *ctl;
2152         int ret;
2153
2154         /* first, get a safe pointer to the block group */
2155         spin_lock(&cluster->lock);
2156         if (!block_group) {
2157                 block_group = cluster->block_group;
2158                 if (!block_group) {
2159                         spin_unlock(&cluster->lock);
2160                         return 0;
2161                 }
2162         } else if (cluster->block_group != block_group) {
2163                 /* someone else has already freed it don't redo their work */
2164                 spin_unlock(&cluster->lock);
2165                 return 0;
2166         }
2167         atomic_inc(&block_group->count);
2168         spin_unlock(&cluster->lock);
2169
2170         ctl = block_group->free_space_ctl;
2171
2172         /* now return any extents the cluster had on it */
2173         spin_lock(&ctl->tree_lock);
2174         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2175         spin_unlock(&ctl->tree_lock);
2176
2177         /* finally drop our ref */
2178         btrfs_put_block_group(block_group);
2179         return ret;
2180 }
2181
2182 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2183                                    struct btrfs_free_cluster *cluster,
2184                                    struct btrfs_free_space *entry,
2185                                    u64 bytes, u64 min_start)
2186 {
2187         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2188         int err;
2189         u64 search_start = cluster->window_start;
2190         u64 search_bytes = bytes;
2191         u64 ret = 0;
2192
2193         search_start = min_start;
2194         search_bytes = bytes;
2195
2196         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2197         if (err)
2198                 return 0;
2199
2200         ret = search_start;
2201         __bitmap_clear_bits(ctl, entry, ret, bytes);
2202
2203         return ret;
2204 }
2205
2206 /*
2207  * given a cluster, try to allocate 'bytes' from it, returns 0
2208  * if it couldn't find anything suitably large, or a logical disk offset
2209  * if things worked out
2210  */
2211 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2212                              struct btrfs_free_cluster *cluster, u64 bytes,
2213                              u64 min_start)
2214 {
2215         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2216         struct btrfs_free_space *entry = NULL;
2217         struct rb_node *node;
2218         u64 ret = 0;
2219
2220         spin_lock(&cluster->lock);
2221         if (bytes > cluster->max_size)
2222                 goto out;
2223
2224         if (cluster->block_group != block_group)
2225                 goto out;
2226
2227         node = rb_first(&cluster->root);
2228         if (!node)
2229                 goto out;
2230
2231         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2232         while(1) {
2233                 if (entry->bytes < bytes ||
2234                     (!entry->bitmap && entry->offset < min_start)) {
2235                         node = rb_next(&entry->offset_index);
2236                         if (!node)
2237                                 break;
2238                         entry = rb_entry(node, struct btrfs_free_space,
2239                                          offset_index);
2240                         continue;
2241                 }
2242
2243                 if (entry->bitmap) {
2244                         ret = btrfs_alloc_from_bitmap(block_group,
2245                                                       cluster, entry, bytes,
2246                                                       cluster->window_start);
2247                         if (ret == 0) {
2248                                 node = rb_next(&entry->offset_index);
2249                                 if (!node)
2250                                         break;
2251                                 entry = rb_entry(node, struct btrfs_free_space,
2252                                                  offset_index);
2253                                 continue;
2254                         }
2255                         cluster->window_start += bytes;
2256                 } else {
2257                         ret = entry->offset;
2258
2259                         entry->offset += bytes;
2260                         entry->bytes -= bytes;
2261                 }
2262
2263                 if (entry->bytes == 0)
2264                         rb_erase(&entry->offset_index, &cluster->root);
2265                 break;
2266         }
2267 out:
2268         spin_unlock(&cluster->lock);
2269
2270         if (!ret)
2271                 return 0;
2272
2273         spin_lock(&ctl->tree_lock);
2274
2275         ctl->free_space -= bytes;
2276         if (entry->bytes == 0) {
2277                 ctl->free_extents--;
2278                 if (entry->bitmap) {
2279                         kfree(entry->bitmap);
2280                         ctl->total_bitmaps--;
2281                         ctl->op->recalc_thresholds(ctl);
2282                 }
2283                 kmem_cache_free(btrfs_free_space_cachep, entry);
2284         }
2285
2286         spin_unlock(&ctl->tree_lock);
2287
2288         return ret;
2289 }
2290
2291 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2292                                 struct btrfs_free_space *entry,
2293                                 struct btrfs_free_cluster *cluster,
2294                                 u64 offset, u64 bytes,
2295                                 u64 cont1_bytes, u64 min_bytes)
2296 {
2297         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2298         unsigned long next_zero;
2299         unsigned long i;
2300         unsigned long want_bits;
2301         unsigned long min_bits;
2302         unsigned long found_bits;
2303         unsigned long start = 0;
2304         unsigned long total_found = 0;
2305         int ret;
2306
2307         i = offset_to_bit(entry->offset, block_group->sectorsize,
2308                           max_t(u64, offset, entry->offset));
2309         want_bits = bytes_to_bits(bytes, block_group->sectorsize);
2310         min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2311
2312 again:
2313         found_bits = 0;
2314         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2315              i < BITS_PER_BITMAP;
2316              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2317                 next_zero = find_next_zero_bit(entry->bitmap,
2318                                                BITS_PER_BITMAP, i);
2319                 if (next_zero - i >= min_bits) {
2320                         found_bits = next_zero - i;
2321                         break;
2322                 }
2323                 i = next_zero;
2324         }
2325
2326         if (!found_bits)
2327                 return -ENOSPC;
2328
2329         if (!total_found) {
2330                 start = i;
2331                 cluster->max_size = 0;
2332         }
2333
2334         total_found += found_bits;
2335
2336         if (cluster->max_size < found_bits * block_group->sectorsize)
2337                 cluster->max_size = found_bits * block_group->sectorsize;
2338
2339         if (total_found < want_bits || cluster->max_size < cont1_bytes) {
2340                 i = next_zero + 1;
2341                 goto again;
2342         }
2343
2344         cluster->window_start = start * block_group->sectorsize +
2345                 entry->offset;
2346         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2347         ret = tree_insert_offset(&cluster->root, entry->offset,
2348                                  &entry->offset_index, 1);
2349         BUG_ON(ret);
2350
2351         trace_btrfs_setup_cluster(block_group, cluster,
2352                                   total_found * block_group->sectorsize, 1);
2353         return 0;
2354 }
2355
2356 /*
2357  * This searches the block group for just extents to fill the cluster with.
2358  * Try to find a cluster with at least bytes total bytes, at least one
2359  * extent of cont1_bytes, and other clusters of at least min_bytes.
2360  */
2361 static noinline int
2362 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2363                         struct btrfs_free_cluster *cluster,
2364                         struct list_head *bitmaps, u64 offset, u64 bytes,
2365                         u64 cont1_bytes, u64 min_bytes)
2366 {
2367         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2368         struct btrfs_free_space *first = NULL;
2369         struct btrfs_free_space *entry = NULL;
2370         struct btrfs_free_space *last;
2371         struct rb_node *node;
2372         u64 window_start;
2373         u64 window_free;
2374         u64 max_extent;
2375         u64 total_size = 0;
2376
2377         entry = tree_search_offset(ctl, offset, 0, 1);
2378         if (!entry)
2379                 return -ENOSPC;
2380
2381         /*
2382          * We don't want bitmaps, so just move along until we find a normal
2383          * extent entry.
2384          */
2385         while (entry->bitmap || entry->bytes < min_bytes) {
2386                 if (entry->bitmap && list_empty(&entry->list))
2387                         list_add_tail(&entry->list, bitmaps);
2388                 node = rb_next(&entry->offset_index);
2389                 if (!node)
2390                         return -ENOSPC;
2391                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2392         }
2393
2394         window_start = entry->offset;
2395         window_free = entry->bytes;
2396         max_extent = entry->bytes;
2397         first = entry;
2398         last = entry;
2399
2400         for (node = rb_next(&entry->offset_index); node;
2401              node = rb_next(&entry->offset_index)) {
2402                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2403
2404                 if (entry->bitmap) {
2405                         if (list_empty(&entry->list))
2406                                 list_add_tail(&entry->list, bitmaps);
2407                         continue;
2408                 }
2409
2410                 if (entry->bytes < min_bytes)
2411                         continue;
2412
2413                 last = entry;
2414                 window_free += entry->bytes;
2415                 if (entry->bytes > max_extent)
2416                         max_extent = entry->bytes;
2417         }
2418
2419         if (window_free < bytes || max_extent < cont1_bytes)
2420                 return -ENOSPC;
2421
2422         cluster->window_start = first->offset;
2423
2424         node = &first->offset_index;
2425
2426         /*
2427          * now we've found our entries, pull them out of the free space
2428          * cache and put them into the cluster rbtree
2429          */
2430         do {
2431                 int ret;
2432
2433                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2434                 node = rb_next(&entry->offset_index);
2435                 if (entry->bitmap || entry->bytes < min_bytes)
2436                         continue;
2437
2438                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2439                 ret = tree_insert_offset(&cluster->root, entry->offset,
2440                                          &entry->offset_index, 0);
2441                 total_size += entry->bytes;
2442                 BUG_ON(ret);
2443         } while (node && entry != last);
2444
2445         cluster->max_size = max_extent;
2446         trace_btrfs_setup_cluster(block_group, cluster, total_size, 0);
2447         return 0;
2448 }
2449
2450 /*
2451  * This specifically looks for bitmaps that may work in the cluster, we assume
2452  * that we have already failed to find extents that will work.
2453  */
2454 static noinline int
2455 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2456                      struct btrfs_free_cluster *cluster,
2457                      struct list_head *bitmaps, u64 offset, u64 bytes,
2458                      u64 cont1_bytes, u64 min_bytes)
2459 {
2460         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2461         struct btrfs_free_space *entry;
2462         int ret = -ENOSPC;
2463         u64 bitmap_offset = offset_to_bitmap(ctl, offset);
2464
2465         if (ctl->total_bitmaps == 0)
2466                 return -ENOSPC;
2467
2468         /*
2469          * The bitmap that covers offset won't be in the list unless offset
2470          * is just its start offset.
2471          */
2472         entry = list_first_entry(bitmaps, struct btrfs_free_space, list);
2473         if (entry->offset != bitmap_offset) {
2474                 entry = tree_search_offset(ctl, bitmap_offset, 1, 0);
2475                 if (entry && list_empty(&entry->list))
2476                         list_add(&entry->list, bitmaps);
2477         }
2478
2479         list_for_each_entry(entry, bitmaps, list) {
2480                 if (entry->bytes < bytes)
2481                         continue;
2482                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2483                                            bytes, cont1_bytes, min_bytes);
2484                 if (!ret)
2485                         return 0;
2486         }
2487
2488         /*
2489          * The bitmaps list has all the bitmaps that record free space
2490          * starting after offset, so no more search is required.
2491          */
2492         return -ENOSPC;
2493 }
2494
2495 /*
2496  * here we try to find a cluster of blocks in a block group.  The goal
2497  * is to find at least bytes+empty_size.
2498  * We might not find them all in one contiguous area.
2499  *
2500  * returns zero and sets up cluster if things worked out, otherwise
2501  * it returns -enospc
2502  */
2503 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2504                              struct btrfs_root *root,
2505                              struct btrfs_block_group_cache *block_group,
2506                              struct btrfs_free_cluster *cluster,
2507                              u64 offset, u64 bytes, u64 empty_size)
2508 {
2509         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2510         struct btrfs_free_space *entry, *tmp;
2511         LIST_HEAD(bitmaps);
2512         u64 min_bytes;
2513         u64 cont1_bytes;
2514         int ret;
2515
2516         /*
2517          * Choose the minimum extent size we'll require for this
2518          * cluster.  For SSD_SPREAD, don't allow any fragmentation.
2519          * For metadata, allow allocates with smaller extents.  For
2520          * data, keep it dense.
2521          */
2522         if (btrfs_test_opt(root, SSD_SPREAD)) {
2523                 cont1_bytes = min_bytes = bytes + empty_size;
2524         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2525                 cont1_bytes = bytes;
2526                 min_bytes = block_group->sectorsize;
2527         } else {
2528                 cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
2529                 min_bytes = block_group->sectorsize;
2530         }
2531
2532         spin_lock(&ctl->tree_lock);
2533
2534         /*
2535          * If we know we don't have enough space to make a cluster don't even
2536          * bother doing all the work to try and find one.
2537          */
2538         if (ctl->free_space < bytes) {
2539                 spin_unlock(&ctl->tree_lock);
2540                 return -ENOSPC;
2541         }
2542
2543         spin_lock(&cluster->lock);
2544
2545         /* someone already found a cluster, hooray */
2546         if (cluster->block_group) {
2547                 ret = 0;
2548                 goto out;
2549         }
2550
2551         trace_btrfs_find_cluster(block_group, offset, bytes, empty_size,
2552                                  min_bytes);
2553
2554         INIT_LIST_HEAD(&bitmaps);
2555         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2556                                       bytes + empty_size,
2557                                       cont1_bytes, min_bytes);
2558         if (ret)
2559                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2560                                            offset, bytes + empty_size,
2561                                            cont1_bytes, min_bytes);
2562
2563         /* Clear our temporary list */
2564         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2565                 list_del_init(&entry->list);
2566
2567         if (!ret) {
2568                 atomic_inc(&block_group->count);
2569                 list_add_tail(&cluster->block_group_list,
2570                               &block_group->cluster_list);
2571                 cluster->block_group = block_group;
2572         } else {
2573                 trace_btrfs_failed_cluster_setup(block_group);
2574         }
2575 out:
2576         spin_unlock(&cluster->lock);
2577         spin_unlock(&ctl->tree_lock);
2578
2579         return ret;
2580 }
2581
2582 /*
2583  * simple code to zero out a cluster
2584  */
2585 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2586 {
2587         spin_lock_init(&cluster->lock);
2588         spin_lock_init(&cluster->refill_lock);
2589         cluster->root = RB_ROOT;
2590         cluster->max_size = 0;
2591         INIT_LIST_HEAD(&cluster->block_group_list);
2592         cluster->block_group = NULL;
2593 }
2594
2595 static int do_trimming(struct btrfs_block_group_cache *block_group,
2596                        u64 *total_trimmed, u64 start, u64 bytes,
2597                        u64 reserved_start, u64 reserved_bytes)
2598 {
2599         struct btrfs_space_info *space_info = block_group->space_info;
2600         struct btrfs_fs_info *fs_info = block_group->fs_info;
2601         int ret;
2602         int update = 0;
2603         u64 trimmed = 0;
2604
2605         spin_lock(&space_info->lock);
2606         spin_lock(&block_group->lock);
2607         if (!block_group->ro) {
2608                 block_group->reserved += reserved_bytes;
2609                 space_info->bytes_reserved += reserved_bytes;
2610                 update = 1;
2611         }
2612         spin_unlock(&block_group->lock);
2613         spin_unlock(&space_info->lock);
2614
2615         ret = btrfs_error_discard_extent(fs_info->extent_root,
2616                                          start, bytes, &trimmed);
2617         if (!ret)
2618                 *total_trimmed += trimmed;
2619
2620         btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
2621
2622         if (update) {
2623                 spin_lock(&space_info->lock);
2624                 spin_lock(&block_group->lock);
2625                 if (block_group->ro)
2626                         space_info->bytes_readonly += reserved_bytes;
2627                 block_group->reserved -= reserved_bytes;
2628                 space_info->bytes_reserved -= reserved_bytes;
2629                 spin_unlock(&space_info->lock);
2630                 spin_unlock(&block_group->lock);
2631         }
2632
2633         return ret;
2634 }
2635
2636 static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
2637                           u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2638 {
2639         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2640         struct btrfs_free_space *entry;
2641         struct rb_node *node;
2642         int ret = 0;
2643         u64 extent_start;
2644         u64 extent_bytes;
2645         u64 bytes;
2646
2647         while (start < end) {
2648                 spin_lock(&ctl->tree_lock);
2649
2650                 if (ctl->free_space < minlen) {
2651                         spin_unlock(&ctl->tree_lock);
2652                         break;
2653                 }
2654
2655                 entry = tree_search_offset(ctl, start, 0, 1);
2656                 if (!entry) {
2657                         spin_unlock(&ctl->tree_lock);
2658                         break;
2659                 }
2660
2661                 /* skip bitmaps */
2662                 while (entry->bitmap) {
2663                         node = rb_next(&entry->offset_index);
2664                         if (!node) {
2665                                 spin_unlock(&ctl->tree_lock);
2666                                 goto out;
2667                         }
2668                         entry = rb_entry(node, struct btrfs_free_space,
2669                                          offset_index);
2670                 }
2671
2672                 if (entry->offset >= end) {
2673                         spin_unlock(&ctl->tree_lock);
2674                         break;
2675                 }
2676
2677                 extent_start = entry->offset;
2678                 extent_bytes = entry->bytes;
2679                 start = max(start, extent_start);
2680                 bytes = min(extent_start + extent_bytes, end) - start;
2681                 if (bytes < minlen) {
2682                         spin_unlock(&ctl->tree_lock);
2683                         goto next;
2684                 }
2685
2686                 unlink_free_space(ctl, entry);
2687                 kmem_cache_free(btrfs_free_space_cachep, entry);
2688
2689                 spin_unlock(&ctl->tree_lock);
2690
2691                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2692                                   extent_start, extent_bytes);
2693                 if (ret)
2694                         break;
2695 next:
2696                 start += bytes;
2697
2698                 if (fatal_signal_pending(current)) {
2699                         ret = -ERESTARTSYS;
2700                         break;
2701                 }
2702
2703                 cond_resched();
2704         }
2705 out:
2706         return ret;
2707 }
2708
2709 static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
2710                         u64 *total_trimmed, u64 start, u64 end, u64 minlen)
2711 {
2712         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2713         struct btrfs_free_space *entry;
2714         int ret = 0;
2715         int ret2;
2716         u64 bytes;
2717         u64 offset = offset_to_bitmap(ctl, start);
2718
2719         while (offset < end) {
2720                 bool next_bitmap = false;
2721
2722                 spin_lock(&ctl->tree_lock);
2723
2724                 if (ctl->free_space < minlen) {
2725                         spin_unlock(&ctl->tree_lock);
2726                         break;
2727                 }
2728
2729                 entry = tree_search_offset(ctl, offset, 1, 0);
2730                 if (!entry) {
2731                         spin_unlock(&ctl->tree_lock);
2732                         next_bitmap = true;
2733                         goto next;
2734                 }
2735
2736                 bytes = minlen;
2737                 ret2 = search_bitmap(ctl, entry, &start, &bytes);
2738                 if (ret2 || start >= end) {
2739                         spin_unlock(&ctl->tree_lock);
2740                         next_bitmap = true;
2741                         goto next;
2742                 }
2743
2744                 bytes = min(bytes, end - start);
2745                 if (bytes < minlen) {
2746                         spin_unlock(&ctl->tree_lock);
2747                         goto next;
2748                 }
2749
2750                 bitmap_clear_bits(ctl, entry, start, bytes);
2751                 if (entry->bytes == 0)
2752                         free_bitmap(ctl, entry);
2753
2754                 spin_unlock(&ctl->tree_lock);
2755
2756                 ret = do_trimming(block_group, total_trimmed, start, bytes,
2757                                   start, bytes);
2758                 if (ret)
2759                         break;
2760 next:
2761                 if (next_bitmap) {
2762                         offset += BITS_PER_BITMAP * ctl->unit;
2763                 } else {
2764                         start += bytes;
2765                         if (start >= offset + BITS_PER_BITMAP * ctl->unit)
2766                                 offset += BITS_PER_BITMAP * ctl->unit;
2767                 }
2768
2769                 if (fatal_signal_pending(current)) {
2770                         ret = -ERESTARTSYS;
2771                         break;
2772                 }
2773
2774                 cond_resched();
2775         }
2776
2777         return ret;
2778 }
2779
2780 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2781                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2782 {
2783         int ret;
2784
2785         *trimmed = 0;
2786
2787         ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
2788         if (ret)
2789                 return ret;
2790
2791         ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
2792
2793         return ret;
2794 }
2795
2796 /*
2797  * Find the left-most item in the cache tree, and then return the
2798  * smallest inode number in the item.
2799  *
2800  * Note: the returned inode number may not be the smallest one in
2801  * the tree, if the left-most item is a bitmap.
2802  */
2803 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2804 {
2805         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2806         struct btrfs_free_space *entry = NULL;
2807         u64 ino = 0;
2808
2809         spin_lock(&ctl->tree_lock);
2810
2811         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2812                 goto out;
2813
2814         entry = rb_entry(rb_first(&ctl->free_space_offset),
2815                          struct btrfs_free_space, offset_index);
2816
2817         if (!entry->bitmap) {
2818                 ino = entry->offset;
2819
2820                 unlink_free_space(ctl, entry);
2821                 entry->offset++;
2822                 entry->bytes--;
2823                 if (!entry->bytes)
2824                         kmem_cache_free(btrfs_free_space_cachep, entry);
2825                 else
2826                         link_free_space(ctl, entry);
2827         } else {
2828                 u64 offset = 0;
2829                 u64 count = 1;
2830                 int ret;
2831
2832                 ret = search_bitmap(ctl, entry, &offset, &count);
2833                 BUG_ON(ret);
2834
2835                 ino = offset;
2836                 bitmap_clear_bits(ctl, entry, offset, 1);
2837                 if (entry->bytes == 0)
2838                         free_bitmap(ctl, entry);
2839         }
2840 out:
2841         spin_unlock(&ctl->tree_lock);
2842
2843         return ino;
2844 }
2845
2846 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2847                                     struct btrfs_path *path)
2848 {
2849         struct inode *inode = NULL;
2850
2851         spin_lock(&root->cache_lock);
2852         if (root->cache_inode)
2853                 inode = igrab(root->cache_inode);
2854         spin_unlock(&root->cache_lock);
2855         if (inode)
2856                 return inode;
2857
2858         inode = __lookup_free_space_inode(root, path, 0);
2859         if (IS_ERR(inode))
2860                 return inode;
2861
2862         spin_lock(&root->cache_lock);
2863         if (!btrfs_fs_closing(root->fs_info))
2864                 root->cache_inode = igrab(inode);
2865         spin_unlock(&root->cache_lock);
2866
2867         return inode;
2868 }
2869
2870 int create_free_ino_inode(struct btrfs_root *root,
2871                           struct btrfs_trans_handle *trans,
2872                           struct btrfs_path *path)
2873 {
2874         return __create_free_space_inode(root, trans, path,
2875                                          BTRFS_FREE_INO_OBJECTID, 0);
2876 }
2877
2878 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2879 {
2880         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2881         struct btrfs_path *path;
2882         struct inode *inode;
2883         int ret = 0;
2884         u64 root_gen = btrfs_root_generation(&root->root_item);
2885
2886         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2887                 return 0;
2888
2889         /*
2890          * If we're unmounting then just return, since this does a search on the
2891          * normal root and not the commit root and we could deadlock.
2892          */
2893         if (btrfs_fs_closing(fs_info))
2894                 return 0;
2895
2896         path = btrfs_alloc_path();
2897         if (!path)
2898                 return 0;
2899
2900         inode = lookup_free_ino_inode(root, path);
2901         if (IS_ERR(inode))
2902                 goto out;
2903
2904         if (root_gen != BTRFS_I(inode)->generation)
2905                 goto out_put;
2906
2907         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2908
2909         if (ret < 0)
2910                 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2911                        "root %llu\n", root->root_key.objectid);
2912 out_put:
2913         iput(inode);
2914 out:
2915         btrfs_free_path(path);
2916         return ret;
2917 }
2918
2919 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2920                               struct btrfs_trans_handle *trans,
2921                               struct btrfs_path *path)
2922 {
2923         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2924         struct inode *inode;
2925         int ret;
2926
2927         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2928                 return 0;
2929
2930         inode = lookup_free_ino_inode(root, path);
2931         if (IS_ERR(inode))
2932                 return 0;
2933
2934         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2935         if (ret) {
2936                 btrfs_delalloc_release_metadata(inode, inode->i_size);
2937 #ifdef DEBUG
2938                 printk(KERN_ERR "btrfs: failed to write free ino cache "
2939                        "for root %llu\n", root->root_key.objectid);
2940 #endif
2941         }
2942
2943         iput(inode);
2944         return ret;
2945 }