Btrfs: make sure btrfs_remove_free_space doesn't leak EAGAIN
[pandora-kernel.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38                                                struct btrfs_path *path,
39                                                u64 offset)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50         key.offset = offset;
51         key.type = 0;
52
53         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54         if (ret < 0)
55                 return ERR_PTR(ret);
56         if (ret > 0) {
57                 btrfs_release_path(path);
58                 return ERR_PTR(-ENOENT);
59         }
60
61         leaf = path->nodes[0];
62         header = btrfs_item_ptr(leaf, path->slots[0],
63                                 struct btrfs_free_space_header);
64         btrfs_free_space_key(leaf, header, &disk_key);
65         btrfs_disk_key_to_cpu(&location, &disk_key);
66         btrfs_release_path(path);
67
68         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69         if (!inode)
70                 return ERR_PTR(-ENOENT);
71         if (IS_ERR(inode))
72                 return inode;
73         if (is_bad_inode(inode)) {
74                 iput(inode);
75                 return ERR_PTR(-ENOENT);
76         }
77
78         inode->i_mapping->flags &= ~__GFP_FS;
79
80         return inode;
81 }
82
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84                                       struct btrfs_block_group_cache
85                                       *block_group, struct btrfs_path *path)
86 {
87         struct inode *inode = NULL;
88         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
89
90         spin_lock(&block_group->lock);
91         if (block_group->inode)
92                 inode = igrab(block_group->inode);
93         spin_unlock(&block_group->lock);
94         if (inode)
95                 return inode;
96
97         inode = __lookup_free_space_inode(root, path,
98                                           block_group->key.objectid);
99         if (IS_ERR(inode))
100                 return inode;
101
102         spin_lock(&block_group->lock);
103         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104                 printk(KERN_INFO "Old style space inode found, converting.\n");
105                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106                         BTRFS_INODE_NODATACOW;
107                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108         }
109
110         if (!block_group->iref) {
111                 block_group->inode = igrab(inode);
112                 block_group->iref = 1;
113         }
114         spin_unlock(&block_group->lock);
115
116         return inode;
117 }
118
119 int __create_free_space_inode(struct btrfs_root *root,
120                               struct btrfs_trans_handle *trans,
121                               struct btrfs_path *path, u64 ino, u64 offset)
122 {
123         struct btrfs_key key;
124         struct btrfs_disk_key disk_key;
125         struct btrfs_free_space_header *header;
126         struct btrfs_inode_item *inode_item;
127         struct extent_buffer *leaf;
128         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129         int ret;
130
131         ret = btrfs_insert_empty_inode(trans, root, path, ino);
132         if (ret)
133                 return ret;
134
135         /* We inline crc's for the free disk space cache */
136         if (ino != BTRFS_FREE_INO_OBJECTID)
137                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
139         leaf = path->nodes[0];
140         inode_item = btrfs_item_ptr(leaf, path->slots[0],
141                                     struct btrfs_inode_item);
142         btrfs_item_key(leaf, &disk_key, path->slots[0]);
143         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144                              sizeof(*inode_item));
145         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146         btrfs_set_inode_size(leaf, inode_item, 0);
147         btrfs_set_inode_nbytes(leaf, inode_item, 0);
148         btrfs_set_inode_uid(leaf, inode_item, 0);
149         btrfs_set_inode_gid(leaf, inode_item, 0);
150         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151         btrfs_set_inode_flags(leaf, inode_item, flags);
152         btrfs_set_inode_nlink(leaf, inode_item, 1);
153         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154         btrfs_set_inode_block_group(leaf, inode_item, offset);
155         btrfs_mark_buffer_dirty(leaf);
156         btrfs_release_path(path);
157
158         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159         key.offset = offset;
160         key.type = 0;
161
162         ret = btrfs_insert_empty_item(trans, root, path, &key,
163                                       sizeof(struct btrfs_free_space_header));
164         if (ret < 0) {
165                 btrfs_release_path(path);
166                 return ret;
167         }
168         leaf = path->nodes[0];
169         header = btrfs_item_ptr(leaf, path->slots[0],
170                                 struct btrfs_free_space_header);
171         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172         btrfs_set_free_space_key(leaf, header, &disk_key);
173         btrfs_mark_buffer_dirty(leaf);
174         btrfs_release_path(path);
175
176         return 0;
177 }
178
179 int create_free_space_inode(struct btrfs_root *root,
180                             struct btrfs_trans_handle *trans,
181                             struct btrfs_block_group_cache *block_group,
182                             struct btrfs_path *path)
183 {
184         int ret;
185         u64 ino;
186
187         ret = btrfs_find_free_objectid(root, &ino);
188         if (ret < 0)
189                 return ret;
190
191         return __create_free_space_inode(root, trans, path, ino,
192                                          block_group->key.objectid);
193 }
194
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196                                     struct btrfs_trans_handle *trans,
197                                     struct btrfs_path *path,
198                                     struct inode *inode)
199 {
200         struct btrfs_block_rsv *rsv;
201         loff_t oldsize;
202         int ret = 0;
203
204         rsv = trans->block_rsv;
205         trans->block_rsv = root->orphan_block_rsv;
206         ret = btrfs_block_rsv_check(root, root->orphan_block_rsv, 5);
207         if (ret)
208                 return ret;
209
210         oldsize = i_size_read(inode);
211         btrfs_i_size_write(inode, 0);
212         truncate_pagecache(inode, oldsize, 0);
213
214         /*
215          * We don't need an orphan item because truncating the free space cache
216          * will never be split across transactions.
217          */
218         ret = btrfs_truncate_inode_items(trans, root, inode,
219                                          0, BTRFS_EXTENT_DATA_KEY);
220
221         trans->block_rsv = rsv;
222         if (ret) {
223                 WARN_ON(1);
224                 return ret;
225         }
226
227         ret = btrfs_update_inode(trans, root, inode);
228         return ret;
229 }
230
231 static int readahead_cache(struct inode *inode)
232 {
233         struct file_ra_state *ra;
234         unsigned long last_index;
235
236         ra = kzalloc(sizeof(*ra), GFP_NOFS);
237         if (!ra)
238                 return -ENOMEM;
239
240         file_ra_state_init(ra, inode->i_mapping);
241         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
242
243         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
244
245         kfree(ra);
246
247         return 0;
248 }
249
250 struct io_ctl {
251         void *cur, *orig;
252         struct page *page;
253         struct page **pages;
254         struct btrfs_root *root;
255         unsigned long size;
256         int index;
257         int num_pages;
258         unsigned check_crcs:1;
259 };
260
261 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
262                        struct btrfs_root *root)
263 {
264         memset(io_ctl, 0, sizeof(struct io_ctl));
265         io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
266                 PAGE_CACHE_SHIFT;
267         io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
268                                 GFP_NOFS);
269         if (!io_ctl->pages)
270                 return -ENOMEM;
271         io_ctl->root = root;
272         if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
273                 io_ctl->check_crcs = 1;
274         return 0;
275 }
276
277 static void io_ctl_free(struct io_ctl *io_ctl)
278 {
279         kfree(io_ctl->pages);
280 }
281
282 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
283 {
284         if (io_ctl->cur) {
285                 kunmap(io_ctl->page);
286                 io_ctl->cur = NULL;
287                 io_ctl->orig = NULL;
288         }
289 }
290
291 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
292 {
293         WARN_ON(io_ctl->cur);
294         BUG_ON(io_ctl->index >= io_ctl->num_pages);
295         io_ctl->page = io_ctl->pages[io_ctl->index++];
296         io_ctl->cur = kmap(io_ctl->page);
297         io_ctl->orig = io_ctl->cur;
298         io_ctl->size = PAGE_CACHE_SIZE;
299         if (clear)
300                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
301 }
302
303 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
304 {
305         int i;
306
307         io_ctl_unmap_page(io_ctl);
308
309         for (i = 0; i < io_ctl->num_pages; i++) {
310                 ClearPageChecked(io_ctl->pages[i]);
311                 unlock_page(io_ctl->pages[i]);
312                 page_cache_release(io_ctl->pages[i]);
313         }
314 }
315
316 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
317                                 int uptodate)
318 {
319         struct page *page;
320         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
321         int i;
322
323         for (i = 0; i < io_ctl->num_pages; i++) {
324                 page = find_or_create_page(inode->i_mapping, i, mask);
325                 if (!page) {
326                         io_ctl_drop_pages(io_ctl);
327                         return -ENOMEM;
328                 }
329                 io_ctl->pages[i] = page;
330                 if (uptodate && !PageUptodate(page)) {
331                         btrfs_readpage(NULL, page);
332                         lock_page(page);
333                         if (!PageUptodate(page)) {
334                                 printk(KERN_ERR "btrfs: error reading free "
335                                        "space cache\n");
336                                 io_ctl_drop_pages(io_ctl);
337                                 return -EIO;
338                         }
339                 }
340         }
341
342         return 0;
343 }
344
345 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
346 {
347         u64 *val;
348
349         io_ctl_map_page(io_ctl, 1);
350
351         /*
352          * Skip the csum areas.  If we don't check crcs then we just have a
353          * 64bit chunk at the front of the first page.
354          */
355         if (io_ctl->check_crcs) {
356                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
357                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
358         } else {
359                 io_ctl->cur += sizeof(u64);
360                 io_ctl->size -= sizeof(u64) * 2;
361         }
362
363         val = io_ctl->cur;
364         *val = cpu_to_le64(generation);
365         io_ctl->cur += sizeof(u64);
366 }
367
368 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
369 {
370         u64 *gen;
371
372         /*
373          * Skip the crc area.  If we don't check crcs then we just have a 64bit
374          * chunk at the front of the first page.
375          */
376         if (io_ctl->check_crcs) {
377                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
378                 io_ctl->size -= sizeof(u64) +
379                         (sizeof(u32) * io_ctl->num_pages);
380         } else {
381                 io_ctl->cur += sizeof(u64);
382                 io_ctl->size -= sizeof(u64) * 2;
383         }
384
385         gen = io_ctl->cur;
386         if (le64_to_cpu(*gen) != generation) {
387                 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
388                                    "(%Lu) does not match inode (%Lu)\n", *gen,
389                                    generation);
390                 io_ctl_unmap_page(io_ctl);
391                 return -EIO;
392         }
393         io_ctl->cur += sizeof(u64);
394         return 0;
395 }
396
397 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
398 {
399         u32 *tmp;
400         u32 crc = ~(u32)0;
401         unsigned offset = 0;
402
403         if (!io_ctl->check_crcs) {
404                 io_ctl_unmap_page(io_ctl);
405                 return;
406         }
407
408         if (index == 0)
409                 offset = sizeof(u32) * io_ctl->num_pages;;
410
411         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
412                               PAGE_CACHE_SIZE - offset);
413         btrfs_csum_final(crc, (char *)&crc);
414         io_ctl_unmap_page(io_ctl);
415         tmp = kmap(io_ctl->pages[0]);
416         tmp += index;
417         *tmp = crc;
418         kunmap(io_ctl->pages[0]);
419 }
420
421 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
422 {
423         u32 *tmp, val;
424         u32 crc = ~(u32)0;
425         unsigned offset = 0;
426
427         if (!io_ctl->check_crcs) {
428                 io_ctl_map_page(io_ctl, 0);
429                 return 0;
430         }
431
432         if (index == 0)
433                 offset = sizeof(u32) * io_ctl->num_pages;
434
435         tmp = kmap(io_ctl->pages[0]);
436         tmp += index;
437         val = *tmp;
438         kunmap(io_ctl->pages[0]);
439
440         io_ctl_map_page(io_ctl, 0);
441         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
442                               PAGE_CACHE_SIZE - offset);
443         btrfs_csum_final(crc, (char *)&crc);
444         if (val != crc) {
445                 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
446                                    "space cache\n");
447                 io_ctl_unmap_page(io_ctl);
448                 return -EIO;
449         }
450
451         return 0;
452 }
453
454 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
455                             void *bitmap)
456 {
457         struct btrfs_free_space_entry *entry;
458
459         if (!io_ctl->cur)
460                 return -ENOSPC;
461
462         entry = io_ctl->cur;
463         entry->offset = cpu_to_le64(offset);
464         entry->bytes = cpu_to_le64(bytes);
465         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
466                 BTRFS_FREE_SPACE_EXTENT;
467         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
468         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
469
470         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
471                 return 0;
472
473         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
474
475         /* No more pages to map */
476         if (io_ctl->index >= io_ctl->num_pages)
477                 return 0;
478
479         /* map the next page */
480         io_ctl_map_page(io_ctl, 1);
481         return 0;
482 }
483
484 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
485 {
486         if (!io_ctl->cur)
487                 return -ENOSPC;
488
489         /*
490          * If we aren't at the start of the current page, unmap this one and
491          * map the next one if there is any left.
492          */
493         if (io_ctl->cur != io_ctl->orig) {
494                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
495                 if (io_ctl->index >= io_ctl->num_pages)
496                         return -ENOSPC;
497                 io_ctl_map_page(io_ctl, 0);
498         }
499
500         memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
501         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
502         if (io_ctl->index < io_ctl->num_pages)
503                 io_ctl_map_page(io_ctl, 0);
504         return 0;
505 }
506
507 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
508 {
509         /*
510          * If we're not on the boundary we know we've modified the page and we
511          * need to crc the page.
512          */
513         if (io_ctl->cur != io_ctl->orig)
514                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
515         else
516                 io_ctl_unmap_page(io_ctl);
517
518         while (io_ctl->index < io_ctl->num_pages) {
519                 io_ctl_map_page(io_ctl, 1);
520                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
521         }
522 }
523
524 static int io_ctl_read_entry(struct io_ctl *io_ctl,
525                             struct btrfs_free_space *entry, u8 *type)
526 {
527         struct btrfs_free_space_entry *e;
528
529         e = io_ctl->cur;
530         entry->offset = le64_to_cpu(e->offset);
531         entry->bytes = le64_to_cpu(e->bytes);
532         *type = e->type;
533         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
534         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
535
536         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
537                 return 0;
538
539         io_ctl_unmap_page(io_ctl);
540
541         if (io_ctl->index >= io_ctl->num_pages)
542                 return 0;
543
544         return io_ctl_check_crc(io_ctl, io_ctl->index);
545 }
546
547 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
548                               struct btrfs_free_space *entry)
549 {
550         int ret;
551
552         if (io_ctl->cur && io_ctl->cur != io_ctl->orig)
553                 io_ctl_unmap_page(io_ctl);
554
555         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
556         if (ret)
557                 return ret;
558
559         memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
560         io_ctl_unmap_page(io_ctl);
561
562         return 0;
563 }
564
565 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
566                             struct btrfs_free_space_ctl *ctl,
567                             struct btrfs_path *path, u64 offset)
568 {
569         struct btrfs_free_space_header *header;
570         struct extent_buffer *leaf;
571         struct io_ctl io_ctl;
572         struct btrfs_key key;
573         struct btrfs_free_space *e, *n;
574         struct list_head bitmaps;
575         u64 num_entries;
576         u64 num_bitmaps;
577         u64 generation;
578         u8 type;
579         int ret = 0;
580
581         INIT_LIST_HEAD(&bitmaps);
582
583         /* Nothing in the space cache, goodbye */
584         if (!i_size_read(inode))
585                 return 0;
586
587         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
588         key.offset = offset;
589         key.type = 0;
590
591         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
592         if (ret < 0)
593                 return 0;
594         else if (ret > 0) {
595                 btrfs_release_path(path);
596                 return 0;
597         }
598
599         ret = -1;
600
601         leaf = path->nodes[0];
602         header = btrfs_item_ptr(leaf, path->slots[0],
603                                 struct btrfs_free_space_header);
604         num_entries = btrfs_free_space_entries(leaf, header);
605         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
606         generation = btrfs_free_space_generation(leaf, header);
607         btrfs_release_path(path);
608
609         if (BTRFS_I(inode)->generation != generation) {
610                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
611                        " not match free space cache generation (%llu)\n",
612                        (unsigned long long)BTRFS_I(inode)->generation,
613                        (unsigned long long)generation);
614                 return 0;
615         }
616
617         if (!num_entries)
618                 return 0;
619
620         io_ctl_init(&io_ctl, inode, root);
621         ret = readahead_cache(inode);
622         if (ret)
623                 goto out;
624
625         ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
626         if (ret)
627                 goto out;
628
629         ret = io_ctl_check_crc(&io_ctl, 0);
630         if (ret)
631                 goto free_cache;
632
633         ret = io_ctl_check_generation(&io_ctl, generation);
634         if (ret)
635                 goto free_cache;
636
637         while (num_entries) {
638                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
639                                       GFP_NOFS);
640                 if (!e)
641                         goto free_cache;
642
643                 ret = io_ctl_read_entry(&io_ctl, e, &type);
644                 if (ret) {
645                         kmem_cache_free(btrfs_free_space_cachep, e);
646                         goto free_cache;
647                 }
648
649                 if (!e->bytes) {
650                         kmem_cache_free(btrfs_free_space_cachep, e);
651                         goto free_cache;
652                 }
653
654                 if (type == BTRFS_FREE_SPACE_EXTENT) {
655                         spin_lock(&ctl->tree_lock);
656                         ret = link_free_space(ctl, e);
657                         spin_unlock(&ctl->tree_lock);
658                         if (ret) {
659                                 printk(KERN_ERR "Duplicate entries in "
660                                        "free space cache, dumping\n");
661                                 kmem_cache_free(btrfs_free_space_cachep, e);
662                                 goto free_cache;
663                         }
664                 } else {
665                         BUG_ON(!num_bitmaps);
666                         num_bitmaps--;
667                         e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
668                         if (!e->bitmap) {
669                                 kmem_cache_free(
670                                         btrfs_free_space_cachep, e);
671                                 goto free_cache;
672                         }
673                         spin_lock(&ctl->tree_lock);
674                         ret = link_free_space(ctl, e);
675                         ctl->total_bitmaps++;
676                         ctl->op->recalc_thresholds(ctl);
677                         spin_unlock(&ctl->tree_lock);
678                         if (ret) {
679                                 printk(KERN_ERR "Duplicate entries in "
680                                        "free space cache, dumping\n");
681                                 kmem_cache_free(btrfs_free_space_cachep, e);
682                                 goto free_cache;
683                         }
684                         list_add_tail(&e->list, &bitmaps);
685                 }
686
687                 num_entries--;
688         }
689
690         /*
691          * We add the bitmaps at the end of the entries in order that
692          * the bitmap entries are added to the cache.
693          */
694         list_for_each_entry_safe(e, n, &bitmaps, list) {
695                 list_del_init(&e->list);
696                 ret = io_ctl_read_bitmap(&io_ctl, e);
697                 if (ret)
698                         goto free_cache;
699         }
700
701         io_ctl_drop_pages(&io_ctl);
702         ret = 1;
703 out:
704         io_ctl_free(&io_ctl);
705         return ret;
706 free_cache:
707         io_ctl_drop_pages(&io_ctl);
708         __btrfs_remove_free_space_cache(ctl);
709         goto out;
710 }
711
712 int load_free_space_cache(struct btrfs_fs_info *fs_info,
713                           struct btrfs_block_group_cache *block_group)
714 {
715         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
716         struct btrfs_root *root = fs_info->tree_root;
717         struct inode *inode;
718         struct btrfs_path *path;
719         int ret = 0;
720         bool matched;
721         u64 used = btrfs_block_group_used(&block_group->item);
722
723         /*
724          * If we're unmounting then just return, since this does a search on the
725          * normal root and not the commit root and we could deadlock.
726          */
727         if (btrfs_fs_closing(fs_info))
728                 return 0;
729
730         /*
731          * If this block group has been marked to be cleared for one reason or
732          * another then we can't trust the on disk cache, so just return.
733          */
734         spin_lock(&block_group->lock);
735         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
736                 spin_unlock(&block_group->lock);
737                 return 0;
738         }
739         spin_unlock(&block_group->lock);
740
741         path = btrfs_alloc_path();
742         if (!path)
743                 return 0;
744
745         inode = lookup_free_space_inode(root, block_group, path);
746         if (IS_ERR(inode)) {
747                 btrfs_free_path(path);
748                 return 0;
749         }
750
751         /* We may have converted the inode and made the cache invalid. */
752         spin_lock(&block_group->lock);
753         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
754                 spin_unlock(&block_group->lock);
755                 goto out;
756         }
757         spin_unlock(&block_group->lock);
758
759         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
760                                       path, block_group->key.objectid);
761         btrfs_free_path(path);
762         if (ret <= 0)
763                 goto out;
764
765         spin_lock(&ctl->tree_lock);
766         matched = (ctl->free_space == (block_group->key.offset - used -
767                                        block_group->bytes_super));
768         spin_unlock(&ctl->tree_lock);
769
770         if (!matched) {
771                 __btrfs_remove_free_space_cache(ctl);
772                 printk(KERN_ERR "block group %llu has an wrong amount of free "
773                        "space\n", block_group->key.objectid);
774                 ret = -1;
775         }
776 out:
777         if (ret < 0) {
778                 /* This cache is bogus, make sure it gets cleared */
779                 spin_lock(&block_group->lock);
780                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
781                 spin_unlock(&block_group->lock);
782                 ret = 0;
783
784                 printk(KERN_ERR "btrfs: failed to load free space cache "
785                        "for block group %llu\n", block_group->key.objectid);
786         }
787
788         iput(inode);
789         return ret;
790 }
791
792 /**
793  * __btrfs_write_out_cache - write out cached info to an inode
794  * @root - the root the inode belongs to
795  * @ctl - the free space cache we are going to write out
796  * @block_group - the block_group for this cache if it belongs to a block_group
797  * @trans - the trans handle
798  * @path - the path to use
799  * @offset - the offset for the key we'll insert
800  *
801  * This function writes out a free space cache struct to disk for quick recovery
802  * on mount.  This will return 0 if it was successfull in writing the cache out,
803  * and -1 if it was not.
804  */
805 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
806                             struct btrfs_free_space_ctl *ctl,
807                             struct btrfs_block_group_cache *block_group,
808                             struct btrfs_trans_handle *trans,
809                             struct btrfs_path *path, u64 offset)
810 {
811         struct btrfs_free_space_header *header;
812         struct extent_buffer *leaf;
813         struct rb_node *node;
814         struct list_head *pos, *n;
815         struct extent_state *cached_state = NULL;
816         struct btrfs_free_cluster *cluster = NULL;
817         struct extent_io_tree *unpin = NULL;
818         struct io_ctl io_ctl;
819         struct list_head bitmap_list;
820         struct btrfs_key key;
821         u64 start, end, len;
822         int entries = 0;
823         int bitmaps = 0;
824         int ret;
825         int err = -1;
826
827         INIT_LIST_HEAD(&bitmap_list);
828
829         if (!i_size_read(inode))
830                 return -1;
831
832         io_ctl_init(&io_ctl, inode, root);
833
834         /* Get the cluster for this block_group if it exists */
835         if (block_group && !list_empty(&block_group->cluster_list))
836                 cluster = list_entry(block_group->cluster_list.next,
837                                      struct btrfs_free_cluster,
838                                      block_group_list);
839
840         /*
841          * We shouldn't have switched the pinned extents yet so this is the
842          * right one
843          */
844         unpin = root->fs_info->pinned_extents;
845
846         /* Lock all pages first so we can lock the extent safely. */
847         io_ctl_prepare_pages(&io_ctl, inode, 0);
848
849         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
850                          0, &cached_state, GFP_NOFS);
851
852         /*
853          * When searching for pinned extents, we need to start at our start
854          * offset.
855          */
856         if (block_group)
857                 start = block_group->key.objectid;
858
859         node = rb_first(&ctl->free_space_offset);
860         if (!node && cluster) {
861                 node = rb_first(&cluster->root);
862                 cluster = NULL;
863         }
864
865         /* Make sure we can fit our crcs into the first page */
866         if (io_ctl.check_crcs &&
867             (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
868                 WARN_ON(1);
869                 goto out_nospc;
870         }
871
872         io_ctl_set_generation(&io_ctl, trans->transid);
873
874         /* Write out the extent entries */
875         while (node) {
876                 struct btrfs_free_space *e;
877
878                 e = rb_entry(node, struct btrfs_free_space, offset_index);
879                 entries++;
880
881                 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
882                                        e->bitmap);
883                 if (ret)
884                         goto out_nospc;
885
886                 if (e->bitmap) {
887                         list_add_tail(&e->list, &bitmap_list);
888                         bitmaps++;
889                 }
890                 node = rb_next(node);
891                 if (!node && cluster) {
892                         node = rb_first(&cluster->root);
893                         cluster = NULL;
894                 }
895         }
896
897         /*
898          * We want to add any pinned extents to our free space cache
899          * so we don't leak the space
900          */
901         while (block_group && (start < block_group->key.objectid +
902                                block_group->key.offset)) {
903                 ret = find_first_extent_bit(unpin, start, &start, &end,
904                                             EXTENT_DIRTY);
905                 if (ret) {
906                         ret = 0;
907                         break;
908                 }
909
910                 /* This pinned extent is out of our range */
911                 if (start >= block_group->key.objectid +
912                     block_group->key.offset)
913                         break;
914
915                 len = block_group->key.objectid +
916                         block_group->key.offset - start;
917                 len = min(len, end + 1 - start);
918
919                 entries++;
920                 ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
921                 if (ret)
922                         goto out_nospc;
923
924                 start = end + 1;
925         }
926
927         /* Write out the bitmaps */
928         list_for_each_safe(pos, n, &bitmap_list) {
929                 struct btrfs_free_space *entry =
930                         list_entry(pos, struct btrfs_free_space, list);
931
932                 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
933                 if (ret)
934                         goto out_nospc;
935                 list_del_init(&entry->list);
936         }
937
938         /* Zero out the rest of the pages just to make sure */
939         io_ctl_zero_remaining_pages(&io_ctl);
940
941         ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
942                                 0, i_size_read(inode), &cached_state);
943         io_ctl_drop_pages(&io_ctl);
944         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
945                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
946
947         if (ret)
948                 goto out;
949
950
951         ret = filemap_write_and_wait(inode->i_mapping);
952         if (ret)
953                 goto out;
954
955         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
956         key.offset = offset;
957         key.type = 0;
958
959         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
960         if (ret < 0) {
961                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
962                                  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
963                                  GFP_NOFS);
964                 goto out;
965         }
966         leaf = path->nodes[0];
967         if (ret > 0) {
968                 struct btrfs_key found_key;
969                 BUG_ON(!path->slots[0]);
970                 path->slots[0]--;
971                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
972                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
973                     found_key.offset != offset) {
974                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
975                                          inode->i_size - 1,
976                                          EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
977                                          NULL, GFP_NOFS);
978                         btrfs_release_path(path);
979                         goto out;
980                 }
981         }
982
983         BTRFS_I(inode)->generation = trans->transid;
984         header = btrfs_item_ptr(leaf, path->slots[0],
985                                 struct btrfs_free_space_header);
986         btrfs_set_free_space_entries(leaf, header, entries);
987         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
988         btrfs_set_free_space_generation(leaf, header, trans->transid);
989         btrfs_mark_buffer_dirty(leaf);
990         btrfs_release_path(path);
991
992         err = 0;
993 out:
994         io_ctl_free(&io_ctl);
995         if (err) {
996                 invalidate_inode_pages2(inode->i_mapping);
997                 BTRFS_I(inode)->generation = 0;
998         }
999         btrfs_update_inode(trans, root, inode);
1000         return err;
1001
1002 out_nospc:
1003         list_for_each_safe(pos, n, &bitmap_list) {
1004                 struct btrfs_free_space *entry =
1005                         list_entry(pos, struct btrfs_free_space, list);
1006                 list_del_init(&entry->list);
1007         }
1008         io_ctl_drop_pages(&io_ctl);
1009         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1010                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1011         goto out;
1012 }
1013
1014 int btrfs_write_out_cache(struct btrfs_root *root,
1015                           struct btrfs_trans_handle *trans,
1016                           struct btrfs_block_group_cache *block_group,
1017                           struct btrfs_path *path)
1018 {
1019         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1020         struct inode *inode;
1021         int ret = 0;
1022
1023         root = root->fs_info->tree_root;
1024
1025         spin_lock(&block_group->lock);
1026         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1027                 spin_unlock(&block_group->lock);
1028                 return 0;
1029         }
1030         spin_unlock(&block_group->lock);
1031
1032         inode = lookup_free_space_inode(root, block_group, path);
1033         if (IS_ERR(inode))
1034                 return 0;
1035
1036         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1037                                       path, block_group->key.objectid);
1038         if (ret) {
1039                 spin_lock(&block_group->lock);
1040                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1041                 spin_unlock(&block_group->lock);
1042                 ret = 0;
1043 #ifdef DEBUG
1044                 printk(KERN_ERR "btrfs: failed to write free space cace "
1045                        "for block group %llu\n", block_group->key.objectid);
1046 #endif
1047         }
1048
1049         iput(inode);
1050         return ret;
1051 }
1052
1053 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1054                                           u64 offset)
1055 {
1056         BUG_ON(offset < bitmap_start);
1057         offset -= bitmap_start;
1058         return (unsigned long)(div_u64(offset, unit));
1059 }
1060
1061 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1062 {
1063         return (unsigned long)(div_u64(bytes, unit));
1064 }
1065
1066 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1067                                    u64 offset)
1068 {
1069         u64 bitmap_start;
1070         u64 bytes_per_bitmap;
1071
1072         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1073         bitmap_start = offset - ctl->start;
1074         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1075         bitmap_start *= bytes_per_bitmap;
1076         bitmap_start += ctl->start;
1077
1078         return bitmap_start;
1079 }
1080
1081 static int tree_insert_offset(struct rb_root *root, u64 offset,
1082                               struct rb_node *node, int bitmap)
1083 {
1084         struct rb_node **p = &root->rb_node;
1085         struct rb_node *parent = NULL;
1086         struct btrfs_free_space *info;
1087
1088         while (*p) {
1089                 parent = *p;
1090                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1091
1092                 if (offset < info->offset) {
1093                         p = &(*p)->rb_left;
1094                 } else if (offset > info->offset) {
1095                         p = &(*p)->rb_right;
1096                 } else {
1097                         /*
1098                          * we could have a bitmap entry and an extent entry
1099                          * share the same offset.  If this is the case, we want
1100                          * the extent entry to always be found first if we do a
1101                          * linear search through the tree, since we want to have
1102                          * the quickest allocation time, and allocating from an
1103                          * extent is faster than allocating from a bitmap.  So
1104                          * if we're inserting a bitmap and we find an entry at
1105                          * this offset, we want to go right, or after this entry
1106                          * logically.  If we are inserting an extent and we've
1107                          * found a bitmap, we want to go left, or before
1108                          * logically.
1109                          */
1110                         if (bitmap) {
1111                                 if (info->bitmap) {
1112                                         WARN_ON_ONCE(1);
1113                                         return -EEXIST;
1114                                 }
1115                                 p = &(*p)->rb_right;
1116                         } else {
1117                                 if (!info->bitmap) {
1118                                         WARN_ON_ONCE(1);
1119                                         return -EEXIST;
1120                                 }
1121                                 p = &(*p)->rb_left;
1122                         }
1123                 }
1124         }
1125
1126         rb_link_node(node, parent, p);
1127         rb_insert_color(node, root);
1128
1129         return 0;
1130 }
1131
1132 /*
1133  * searches the tree for the given offset.
1134  *
1135  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1136  * want a section that has at least bytes size and comes at or after the given
1137  * offset.
1138  */
1139 static struct btrfs_free_space *
1140 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1141                    u64 offset, int bitmap_only, int fuzzy)
1142 {
1143         struct rb_node *n = ctl->free_space_offset.rb_node;
1144         struct btrfs_free_space *entry, *prev = NULL;
1145
1146         /* find entry that is closest to the 'offset' */
1147         while (1) {
1148                 if (!n) {
1149                         entry = NULL;
1150                         break;
1151                 }
1152
1153                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1154                 prev = entry;
1155
1156                 if (offset < entry->offset)
1157                         n = n->rb_left;
1158                 else if (offset > entry->offset)
1159                         n = n->rb_right;
1160                 else
1161                         break;
1162         }
1163
1164         if (bitmap_only) {
1165                 if (!entry)
1166                         return NULL;
1167                 if (entry->bitmap)
1168                         return entry;
1169
1170                 /*
1171                  * bitmap entry and extent entry may share same offset,
1172                  * in that case, bitmap entry comes after extent entry.
1173                  */
1174                 n = rb_next(n);
1175                 if (!n)
1176                         return NULL;
1177                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1178                 if (entry->offset != offset)
1179                         return NULL;
1180
1181                 WARN_ON(!entry->bitmap);
1182                 return entry;
1183         } else if (entry) {
1184                 if (entry->bitmap) {
1185                         /*
1186                          * if previous extent entry covers the offset,
1187                          * we should return it instead of the bitmap entry
1188                          */
1189                         n = &entry->offset_index;
1190                         while (1) {
1191                                 n = rb_prev(n);
1192                                 if (!n)
1193                                         break;
1194                                 prev = rb_entry(n, struct btrfs_free_space,
1195                                                 offset_index);
1196                                 if (!prev->bitmap) {
1197                                         if (prev->offset + prev->bytes > offset)
1198                                                 entry = prev;
1199                                         break;
1200                                 }
1201                         }
1202                 }
1203                 return entry;
1204         }
1205
1206         if (!prev)
1207                 return NULL;
1208
1209         /* find last entry before the 'offset' */
1210         entry = prev;
1211         if (entry->offset > offset) {
1212                 n = rb_prev(&entry->offset_index);
1213                 if (n) {
1214                         entry = rb_entry(n, struct btrfs_free_space,
1215                                         offset_index);
1216                         BUG_ON(entry->offset > offset);
1217                 } else {
1218                         if (fuzzy)
1219                                 return entry;
1220                         else
1221                                 return NULL;
1222                 }
1223         }
1224
1225         if (entry->bitmap) {
1226                 n = &entry->offset_index;
1227                 while (1) {
1228                         n = rb_prev(n);
1229                         if (!n)
1230                                 break;
1231                         prev = rb_entry(n, struct btrfs_free_space,
1232                                         offset_index);
1233                         if (!prev->bitmap) {
1234                                 if (prev->offset + prev->bytes > offset)
1235                                         return prev;
1236                                 break;
1237                         }
1238                 }
1239                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1240                         return entry;
1241         } else if (entry->offset + entry->bytes > offset)
1242                 return entry;
1243
1244         if (!fuzzy)
1245                 return NULL;
1246
1247         while (1) {
1248                 if (entry->bitmap) {
1249                         if (entry->offset + BITS_PER_BITMAP *
1250                             ctl->unit > offset)
1251                                 break;
1252                 } else {
1253                         if (entry->offset + entry->bytes > offset)
1254                                 break;
1255                 }
1256
1257                 n = rb_next(&entry->offset_index);
1258                 if (!n)
1259                         return NULL;
1260                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1261         }
1262         return entry;
1263 }
1264
1265 static inline void
1266 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1267                     struct btrfs_free_space *info)
1268 {
1269         rb_erase(&info->offset_index, &ctl->free_space_offset);
1270         ctl->free_extents--;
1271 }
1272
1273 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1274                               struct btrfs_free_space *info)
1275 {
1276         __unlink_free_space(ctl, info);
1277         ctl->free_space -= info->bytes;
1278 }
1279
1280 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1281                            struct btrfs_free_space *info)
1282 {
1283         int ret = 0;
1284
1285         BUG_ON(!info->bitmap && !info->bytes);
1286         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1287                                  &info->offset_index, (info->bitmap != NULL));
1288         if (ret)
1289                 return ret;
1290
1291         ctl->free_space += info->bytes;
1292         ctl->free_extents++;
1293         return ret;
1294 }
1295
1296 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1297 {
1298         struct btrfs_block_group_cache *block_group = ctl->private;
1299         u64 max_bytes;
1300         u64 bitmap_bytes;
1301         u64 extent_bytes;
1302         u64 size = block_group->key.offset;
1303         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1304         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1305
1306         BUG_ON(ctl->total_bitmaps > max_bitmaps);
1307
1308         /*
1309          * The goal is to keep the total amount of memory used per 1gb of space
1310          * at or below 32k, so we need to adjust how much memory we allow to be
1311          * used by extent based free space tracking
1312          */
1313         if (size < 1024 * 1024 * 1024)
1314                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1315         else
1316                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1317                         div64_u64(size, 1024 * 1024 * 1024);
1318
1319         /*
1320          * we want to account for 1 more bitmap than what we have so we can make
1321          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1322          * we add more bitmaps.
1323          */
1324         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1325
1326         if (bitmap_bytes >= max_bytes) {
1327                 ctl->extents_thresh = 0;
1328                 return;
1329         }
1330
1331         /*
1332          * we want the extent entry threshold to always be at most 1/2 the maxw
1333          * bytes we can have, or whatever is less than that.
1334          */
1335         extent_bytes = max_bytes - bitmap_bytes;
1336         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1337
1338         ctl->extents_thresh =
1339                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1340 }
1341
1342 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1343                                        struct btrfs_free_space *info,
1344                                        u64 offset, u64 bytes)
1345 {
1346         unsigned long start, count;
1347
1348         start = offset_to_bit(info->offset, ctl->unit, offset);
1349         count = bytes_to_bits(bytes, ctl->unit);
1350         BUG_ON(start + count > BITS_PER_BITMAP);
1351
1352         bitmap_clear(info->bitmap, start, count);
1353
1354         info->bytes -= bytes;
1355 }
1356
1357 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1358                               struct btrfs_free_space *info, u64 offset,
1359                               u64 bytes)
1360 {
1361         __bitmap_clear_bits(ctl, info, offset, bytes);
1362         ctl->free_space -= bytes;
1363 }
1364
1365 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1366                             struct btrfs_free_space *info, u64 offset,
1367                             u64 bytes)
1368 {
1369         unsigned long start, count;
1370
1371         start = offset_to_bit(info->offset, ctl->unit, offset);
1372         count = bytes_to_bits(bytes, ctl->unit);
1373         BUG_ON(start + count > BITS_PER_BITMAP);
1374
1375         bitmap_set(info->bitmap, start, count);
1376
1377         info->bytes += bytes;
1378         ctl->free_space += bytes;
1379 }
1380
1381 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1382                          struct btrfs_free_space *bitmap_info, u64 *offset,
1383                          u64 *bytes)
1384 {
1385         unsigned long found_bits = 0;
1386         unsigned long bits, i;
1387         unsigned long next_zero;
1388
1389         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1390                           max_t(u64, *offset, bitmap_info->offset));
1391         bits = bytes_to_bits(*bytes, ctl->unit);
1392
1393         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1394              i < BITS_PER_BITMAP;
1395              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1396                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1397                                                BITS_PER_BITMAP, i);
1398                 if ((next_zero - i) >= bits) {
1399                         found_bits = next_zero - i;
1400                         break;
1401                 }
1402                 i = next_zero;
1403         }
1404
1405         if (found_bits) {
1406                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1407                 *bytes = (u64)(found_bits) * ctl->unit;
1408                 return 0;
1409         }
1410
1411         return -1;
1412 }
1413
1414 static struct btrfs_free_space *
1415 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1416 {
1417         struct btrfs_free_space *entry;
1418         struct rb_node *node;
1419         int ret;
1420
1421         if (!ctl->free_space_offset.rb_node)
1422                 return NULL;
1423
1424         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1425         if (!entry)
1426                 return NULL;
1427
1428         for (node = &entry->offset_index; node; node = rb_next(node)) {
1429                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1430                 if (entry->bytes < *bytes)
1431                         continue;
1432
1433                 if (entry->bitmap) {
1434                         ret = search_bitmap(ctl, entry, offset, bytes);
1435                         if (!ret)
1436                                 return entry;
1437                         continue;
1438                 }
1439
1440                 *offset = entry->offset;
1441                 *bytes = entry->bytes;
1442                 return entry;
1443         }
1444
1445         return NULL;
1446 }
1447
1448 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1449                            struct btrfs_free_space *info, u64 offset)
1450 {
1451         info->offset = offset_to_bitmap(ctl, offset);
1452         info->bytes = 0;
1453         link_free_space(ctl, info);
1454         ctl->total_bitmaps++;
1455
1456         ctl->op->recalc_thresholds(ctl);
1457 }
1458
1459 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1460                         struct btrfs_free_space *bitmap_info)
1461 {
1462         unlink_free_space(ctl, bitmap_info);
1463         kfree(bitmap_info->bitmap);
1464         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1465         ctl->total_bitmaps--;
1466         ctl->op->recalc_thresholds(ctl);
1467 }
1468
1469 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1470                               struct btrfs_free_space *bitmap_info,
1471                               u64 *offset, u64 *bytes)
1472 {
1473         u64 end;
1474         u64 search_start, search_bytes;
1475         int ret;
1476
1477 again:
1478         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1479
1480         /*
1481          * XXX - this can go away after a few releases.
1482          *
1483          * since the only user of btrfs_remove_free_space is the tree logging
1484          * stuff, and the only way to test that is under crash conditions, we
1485          * want to have this debug stuff here just in case somethings not
1486          * working.  Search the bitmap for the space we are trying to use to
1487          * make sure its actually there.  If its not there then we need to stop
1488          * because something has gone wrong.
1489          */
1490         search_start = *offset;
1491         search_bytes = *bytes;
1492         search_bytes = min(search_bytes, end - search_start + 1);
1493         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1494         BUG_ON(ret < 0 || search_start != *offset);
1495
1496         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1497                 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1498                 *bytes -= end - *offset + 1;
1499                 *offset = end + 1;
1500         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1501                 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1502                 *bytes = 0;
1503         }
1504
1505         if (*bytes) {
1506                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1507                 if (!bitmap_info->bytes)
1508                         free_bitmap(ctl, bitmap_info);
1509
1510                 /*
1511                  * no entry after this bitmap, but we still have bytes to
1512                  * remove, so something has gone wrong.
1513                  */
1514                 if (!next)
1515                         return -EINVAL;
1516
1517                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1518                                        offset_index);
1519
1520                 /*
1521                  * if the next entry isn't a bitmap we need to return to let the
1522                  * extent stuff do its work.
1523                  */
1524                 if (!bitmap_info->bitmap)
1525                         return -EAGAIN;
1526
1527                 /*
1528                  * Ok the next item is a bitmap, but it may not actually hold
1529                  * the information for the rest of this free space stuff, so
1530                  * look for it, and if we don't find it return so we can try
1531                  * everything over again.
1532                  */
1533                 search_start = *offset;
1534                 search_bytes = *bytes;
1535                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1536                                     &search_bytes);
1537                 if (ret < 0 || search_start != *offset)
1538                         return -EAGAIN;
1539
1540                 goto again;
1541         } else if (!bitmap_info->bytes)
1542                 free_bitmap(ctl, bitmap_info);
1543
1544         return 0;
1545 }
1546
1547 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1548                                struct btrfs_free_space *info, u64 offset,
1549                                u64 bytes)
1550 {
1551         u64 bytes_to_set = 0;
1552         u64 end;
1553
1554         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1555
1556         bytes_to_set = min(end - offset, bytes);
1557
1558         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1559
1560         return bytes_to_set;
1561
1562 }
1563
1564 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1565                       struct btrfs_free_space *info)
1566 {
1567         struct btrfs_block_group_cache *block_group = ctl->private;
1568
1569         /*
1570          * If we are below the extents threshold then we can add this as an
1571          * extent, and don't have to deal with the bitmap
1572          */
1573         if (ctl->free_extents < ctl->extents_thresh) {
1574                 /*
1575                  * If this block group has some small extents we don't want to
1576                  * use up all of our free slots in the cache with them, we want
1577                  * to reserve them to larger extents, however if we have plent
1578                  * of cache left then go ahead an dadd them, no sense in adding
1579                  * the overhead of a bitmap if we don't have to.
1580                  */
1581                 if (info->bytes <= block_group->sectorsize * 4) {
1582                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1583                                 return false;
1584                 } else {
1585                         return false;
1586                 }
1587         }
1588
1589         /*
1590          * some block groups are so tiny they can't be enveloped by a bitmap, so
1591          * don't even bother to create a bitmap for this
1592          */
1593         if (BITS_PER_BITMAP * block_group->sectorsize >
1594             block_group->key.offset)
1595                 return false;
1596
1597         return true;
1598 }
1599
1600 static struct btrfs_free_space_op free_space_op = {
1601         .recalc_thresholds      = recalculate_thresholds,
1602         .use_bitmap             = use_bitmap,
1603 };
1604
1605 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1606                               struct btrfs_free_space *info)
1607 {
1608         struct btrfs_free_space *bitmap_info;
1609         struct btrfs_block_group_cache *block_group = NULL;
1610         int added = 0;
1611         u64 bytes, offset, bytes_added;
1612         int ret;
1613
1614         bytes = info->bytes;
1615         offset = info->offset;
1616
1617         if (!ctl->op->use_bitmap(ctl, info))
1618                 return 0;
1619
1620         if (ctl->op == &free_space_op)
1621                 block_group = ctl->private;
1622 again:
1623         /*
1624          * Since we link bitmaps right into the cluster we need to see if we
1625          * have a cluster here, and if so and it has our bitmap we need to add
1626          * the free space to that bitmap.
1627          */
1628         if (block_group && !list_empty(&block_group->cluster_list)) {
1629                 struct btrfs_free_cluster *cluster;
1630                 struct rb_node *node;
1631                 struct btrfs_free_space *entry;
1632
1633                 cluster = list_entry(block_group->cluster_list.next,
1634                                      struct btrfs_free_cluster,
1635                                      block_group_list);
1636                 spin_lock(&cluster->lock);
1637                 node = rb_first(&cluster->root);
1638                 if (!node) {
1639                         spin_unlock(&cluster->lock);
1640                         goto no_cluster_bitmap;
1641                 }
1642
1643                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1644                 if (!entry->bitmap) {
1645                         spin_unlock(&cluster->lock);
1646                         goto no_cluster_bitmap;
1647                 }
1648
1649                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1650                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1651                                                           offset, bytes);
1652                         bytes -= bytes_added;
1653                         offset += bytes_added;
1654                 }
1655                 spin_unlock(&cluster->lock);
1656                 if (!bytes) {
1657                         ret = 1;
1658                         goto out;
1659                 }
1660         }
1661
1662 no_cluster_bitmap:
1663         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1664                                          1, 0);
1665         if (!bitmap_info) {
1666                 BUG_ON(added);
1667                 goto new_bitmap;
1668         }
1669
1670         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1671         bytes -= bytes_added;
1672         offset += bytes_added;
1673         added = 0;
1674
1675         if (!bytes) {
1676                 ret = 1;
1677                 goto out;
1678         } else
1679                 goto again;
1680
1681 new_bitmap:
1682         if (info && info->bitmap) {
1683                 add_new_bitmap(ctl, info, offset);
1684                 added = 1;
1685                 info = NULL;
1686                 goto again;
1687         } else {
1688                 spin_unlock(&ctl->tree_lock);
1689
1690                 /* no pre-allocated info, allocate a new one */
1691                 if (!info) {
1692                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1693                                                  GFP_NOFS);
1694                         if (!info) {
1695                                 spin_lock(&ctl->tree_lock);
1696                                 ret = -ENOMEM;
1697                                 goto out;
1698                         }
1699                 }
1700
1701                 /* allocate the bitmap */
1702                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1703                 spin_lock(&ctl->tree_lock);
1704                 if (!info->bitmap) {
1705                         ret = -ENOMEM;
1706                         goto out;
1707                 }
1708                 goto again;
1709         }
1710
1711 out:
1712         if (info) {
1713                 if (info->bitmap)
1714                         kfree(info->bitmap);
1715                 kmem_cache_free(btrfs_free_space_cachep, info);
1716         }
1717
1718         return ret;
1719 }
1720
1721 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1722                           struct btrfs_free_space *info, bool update_stat)
1723 {
1724         struct btrfs_free_space *left_info;
1725         struct btrfs_free_space *right_info;
1726         bool merged = false;
1727         u64 offset = info->offset;
1728         u64 bytes = info->bytes;
1729
1730         /*
1731          * first we want to see if there is free space adjacent to the range we
1732          * are adding, if there is remove that struct and add a new one to
1733          * cover the entire range
1734          */
1735         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1736         if (right_info && rb_prev(&right_info->offset_index))
1737                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1738                                      struct btrfs_free_space, offset_index);
1739         else
1740                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1741
1742         if (right_info && !right_info->bitmap) {
1743                 if (update_stat)
1744                         unlink_free_space(ctl, right_info);
1745                 else
1746                         __unlink_free_space(ctl, right_info);
1747                 info->bytes += right_info->bytes;
1748                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1749                 merged = true;
1750         }
1751
1752         if (left_info && !left_info->bitmap &&
1753             left_info->offset + left_info->bytes == offset) {
1754                 if (update_stat)
1755                         unlink_free_space(ctl, left_info);
1756                 else
1757                         __unlink_free_space(ctl, left_info);
1758                 info->offset = left_info->offset;
1759                 info->bytes += left_info->bytes;
1760                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1761                 merged = true;
1762         }
1763
1764         return merged;
1765 }
1766
1767 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1768                            u64 offset, u64 bytes)
1769 {
1770         struct btrfs_free_space *info;
1771         int ret = 0;
1772
1773         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1774         if (!info)
1775                 return -ENOMEM;
1776
1777         info->offset = offset;
1778         info->bytes = bytes;
1779
1780         spin_lock(&ctl->tree_lock);
1781
1782         if (try_merge_free_space(ctl, info, true))
1783                 goto link;
1784
1785         /*
1786          * There was no extent directly to the left or right of this new
1787          * extent then we know we're going to have to allocate a new extent, so
1788          * before we do that see if we need to drop this into a bitmap
1789          */
1790         ret = insert_into_bitmap(ctl, info);
1791         if (ret < 0) {
1792                 goto out;
1793         } else if (ret) {
1794                 ret = 0;
1795                 goto out;
1796         }
1797 link:
1798         ret = link_free_space(ctl, info);
1799         if (ret)
1800                 kmem_cache_free(btrfs_free_space_cachep, info);
1801 out:
1802         spin_unlock(&ctl->tree_lock);
1803
1804         if (ret) {
1805                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1806                 BUG_ON(ret == -EEXIST);
1807         }
1808
1809         return ret;
1810 }
1811
1812 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1813                             u64 offset, u64 bytes)
1814 {
1815         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1816         struct btrfs_free_space *info;
1817         struct btrfs_free_space *next_info = NULL;
1818         int ret = 0;
1819
1820         spin_lock(&ctl->tree_lock);
1821
1822 again:
1823         info = tree_search_offset(ctl, offset, 0, 0);
1824         if (!info) {
1825                 /*
1826                  * oops didn't find an extent that matched the space we wanted
1827                  * to remove, look for a bitmap instead
1828                  */
1829                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1830                                           1, 0);
1831                 if (!info) {
1832                         WARN_ON(1);
1833                         goto out_lock;
1834                 }
1835         }
1836
1837         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1838                 u64 end;
1839                 next_info = rb_entry(rb_next(&info->offset_index),
1840                                              struct btrfs_free_space,
1841                                              offset_index);
1842
1843                 if (next_info->bitmap)
1844                         end = next_info->offset +
1845                               BITS_PER_BITMAP * ctl->unit - 1;
1846                 else
1847                         end = next_info->offset + next_info->bytes;
1848
1849                 if (next_info->bytes < bytes ||
1850                     next_info->offset > offset || offset > end) {
1851                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1852                               " trying to use %llu\n",
1853                               (unsigned long long)info->offset,
1854                               (unsigned long long)info->bytes,
1855                               (unsigned long long)bytes);
1856                         WARN_ON(1);
1857                         ret = -EINVAL;
1858                         goto out_lock;
1859                 }
1860
1861                 info = next_info;
1862         }
1863
1864         if (info->bytes == bytes) {
1865                 unlink_free_space(ctl, info);
1866                 if (info->bitmap) {
1867                         kfree(info->bitmap);
1868                         ctl->total_bitmaps--;
1869                 }
1870                 kmem_cache_free(btrfs_free_space_cachep, info);
1871                 ret = 0;
1872                 goto out_lock;
1873         }
1874
1875         if (!info->bitmap && info->offset == offset) {
1876                 unlink_free_space(ctl, info);
1877                 info->offset += bytes;
1878                 info->bytes -= bytes;
1879                 ret = link_free_space(ctl, info);
1880                 WARN_ON(ret);
1881                 goto out_lock;
1882         }
1883
1884         if (!info->bitmap && info->offset <= offset &&
1885             info->offset + info->bytes >= offset + bytes) {
1886                 u64 old_start = info->offset;
1887                 /*
1888                  * we're freeing space in the middle of the info,
1889                  * this can happen during tree log replay
1890                  *
1891                  * first unlink the old info and then
1892                  * insert it again after the hole we're creating
1893                  */
1894                 unlink_free_space(ctl, info);
1895                 if (offset + bytes < info->offset + info->bytes) {
1896                         u64 old_end = info->offset + info->bytes;
1897
1898                         info->offset = offset + bytes;
1899                         info->bytes = old_end - info->offset;
1900                         ret = link_free_space(ctl, info);
1901                         WARN_ON(ret);
1902                         if (ret)
1903                                 goto out_lock;
1904                 } else {
1905                         /* the hole we're creating ends at the end
1906                          * of the info struct, just free the info
1907                          */
1908                         kmem_cache_free(btrfs_free_space_cachep, info);
1909                 }
1910                 spin_unlock(&ctl->tree_lock);
1911
1912                 /* step two, insert a new info struct to cover
1913                  * anything before the hole
1914                  */
1915                 ret = btrfs_add_free_space(block_group, old_start,
1916                                            offset - old_start);
1917                 WARN_ON(ret);
1918                 goto out;
1919         }
1920
1921         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1922         if (ret == -EAGAIN)
1923                 goto again;
1924         BUG_ON(ret);
1925 out_lock:
1926         spin_unlock(&ctl->tree_lock);
1927 out:
1928         return ret;
1929 }
1930
1931 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1932                            u64 bytes)
1933 {
1934         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1935         struct btrfs_free_space *info;
1936         struct rb_node *n;
1937         int count = 0;
1938
1939         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1940                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1941                 if (info->bytes >= bytes)
1942                         count++;
1943                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1944                        (unsigned long long)info->offset,
1945                        (unsigned long long)info->bytes,
1946                        (info->bitmap) ? "yes" : "no");
1947         }
1948         printk(KERN_INFO "block group has cluster?: %s\n",
1949                list_empty(&block_group->cluster_list) ? "no" : "yes");
1950         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1951                "\n", count);
1952 }
1953
1954 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1955 {
1956         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1957
1958         spin_lock_init(&ctl->tree_lock);
1959         ctl->unit = block_group->sectorsize;
1960         ctl->start = block_group->key.objectid;
1961         ctl->private = block_group;
1962         ctl->op = &free_space_op;
1963
1964         /*
1965          * we only want to have 32k of ram per block group for keeping
1966          * track of free space, and if we pass 1/2 of that we want to
1967          * start converting things over to using bitmaps
1968          */
1969         ctl->extents_thresh = ((1024 * 32) / 2) /
1970                                 sizeof(struct btrfs_free_space);
1971 }
1972
1973 /*
1974  * for a given cluster, put all of its extents back into the free
1975  * space cache.  If the block group passed doesn't match the block group
1976  * pointed to by the cluster, someone else raced in and freed the
1977  * cluster already.  In that case, we just return without changing anything
1978  */
1979 static int
1980 __btrfs_return_cluster_to_free_space(
1981                              struct btrfs_block_group_cache *block_group,
1982                              struct btrfs_free_cluster *cluster)
1983 {
1984         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1985         struct btrfs_free_space *entry;
1986         struct rb_node *node;
1987
1988         spin_lock(&cluster->lock);
1989         if (cluster->block_group != block_group)
1990                 goto out;
1991
1992         cluster->block_group = NULL;
1993         cluster->window_start = 0;
1994         list_del_init(&cluster->block_group_list);
1995
1996         node = rb_first(&cluster->root);
1997         while (node) {
1998                 bool bitmap;
1999
2000                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2001                 node = rb_next(&entry->offset_index);
2002                 rb_erase(&entry->offset_index, &cluster->root);
2003
2004                 bitmap = (entry->bitmap != NULL);
2005                 if (!bitmap)
2006                         try_merge_free_space(ctl, entry, false);
2007                 tree_insert_offset(&ctl->free_space_offset,
2008                                    entry->offset, &entry->offset_index, bitmap);
2009         }
2010         cluster->root = RB_ROOT;
2011
2012 out:
2013         spin_unlock(&cluster->lock);
2014         btrfs_put_block_group(block_group);
2015         return 0;
2016 }
2017
2018 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2019 {
2020         struct btrfs_free_space *info;
2021         struct rb_node *node;
2022
2023         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2024                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2025                 if (!info->bitmap) {
2026                         unlink_free_space(ctl, info);
2027                         kmem_cache_free(btrfs_free_space_cachep, info);
2028                 } else {
2029                         free_bitmap(ctl, info);
2030                 }
2031                 if (need_resched()) {
2032                         spin_unlock(&ctl->tree_lock);
2033                         cond_resched();
2034                         spin_lock(&ctl->tree_lock);
2035                 }
2036         }
2037 }
2038
2039 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2040 {
2041         spin_lock(&ctl->tree_lock);
2042         __btrfs_remove_free_space_cache_locked(ctl);
2043         spin_unlock(&ctl->tree_lock);
2044 }
2045
2046 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2047 {
2048         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2049         struct btrfs_free_cluster *cluster;
2050         struct list_head *head;
2051
2052         spin_lock(&ctl->tree_lock);
2053         while ((head = block_group->cluster_list.next) !=
2054                &block_group->cluster_list) {
2055                 cluster = list_entry(head, struct btrfs_free_cluster,
2056                                      block_group_list);
2057
2058                 WARN_ON(cluster->block_group != block_group);
2059                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2060                 if (need_resched()) {
2061                         spin_unlock(&ctl->tree_lock);
2062                         cond_resched();
2063                         spin_lock(&ctl->tree_lock);
2064                 }
2065         }
2066         __btrfs_remove_free_space_cache_locked(ctl);
2067         spin_unlock(&ctl->tree_lock);
2068
2069 }
2070
2071 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2072                                u64 offset, u64 bytes, u64 empty_size)
2073 {
2074         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2075         struct btrfs_free_space *entry = NULL;
2076         u64 bytes_search = bytes + empty_size;
2077         u64 ret = 0;
2078
2079         spin_lock(&ctl->tree_lock);
2080         entry = find_free_space(ctl, &offset, &bytes_search);
2081         if (!entry)
2082                 goto out;
2083
2084         ret = offset;
2085         if (entry->bitmap) {
2086                 bitmap_clear_bits(ctl, entry, offset, bytes);
2087                 if (!entry->bytes)
2088                         free_bitmap(ctl, entry);
2089         } else {
2090                 unlink_free_space(ctl, entry);
2091                 entry->offset += bytes;
2092                 entry->bytes -= bytes;
2093                 if (!entry->bytes)
2094                         kmem_cache_free(btrfs_free_space_cachep, entry);
2095                 else
2096                         link_free_space(ctl, entry);
2097         }
2098
2099 out:
2100         spin_unlock(&ctl->tree_lock);
2101
2102         return ret;
2103 }
2104
2105 /*
2106  * given a cluster, put all of its extents back into the free space
2107  * cache.  If a block group is passed, this function will only free
2108  * a cluster that belongs to the passed block group.
2109  *
2110  * Otherwise, it'll get a reference on the block group pointed to by the
2111  * cluster and remove the cluster from it.
2112  */
2113 int btrfs_return_cluster_to_free_space(
2114                                struct btrfs_block_group_cache *block_group,
2115                                struct btrfs_free_cluster *cluster)
2116 {
2117         struct btrfs_free_space_ctl *ctl;
2118         int ret;
2119
2120         /* first, get a safe pointer to the block group */
2121         spin_lock(&cluster->lock);
2122         if (!block_group) {
2123                 block_group = cluster->block_group;
2124                 if (!block_group) {
2125                         spin_unlock(&cluster->lock);
2126                         return 0;
2127                 }
2128         } else if (cluster->block_group != block_group) {
2129                 /* someone else has already freed it don't redo their work */
2130                 spin_unlock(&cluster->lock);
2131                 return 0;
2132         }
2133         atomic_inc(&block_group->count);
2134         spin_unlock(&cluster->lock);
2135
2136         ctl = block_group->free_space_ctl;
2137
2138         /* now return any extents the cluster had on it */
2139         spin_lock(&ctl->tree_lock);
2140         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2141         spin_unlock(&ctl->tree_lock);
2142
2143         /* finally drop our ref */
2144         btrfs_put_block_group(block_group);
2145         return ret;
2146 }
2147
2148 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2149                                    struct btrfs_free_cluster *cluster,
2150                                    struct btrfs_free_space *entry,
2151                                    u64 bytes, u64 min_start)
2152 {
2153         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2154         int err;
2155         u64 search_start = cluster->window_start;
2156         u64 search_bytes = bytes;
2157         u64 ret = 0;
2158
2159         search_start = min_start;
2160         search_bytes = bytes;
2161
2162         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2163         if (err)
2164                 return 0;
2165
2166         ret = search_start;
2167         __bitmap_clear_bits(ctl, entry, ret, bytes);
2168
2169         return ret;
2170 }
2171
2172 /*
2173  * given a cluster, try to allocate 'bytes' from it, returns 0
2174  * if it couldn't find anything suitably large, or a logical disk offset
2175  * if things worked out
2176  */
2177 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2178                              struct btrfs_free_cluster *cluster, u64 bytes,
2179                              u64 min_start)
2180 {
2181         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2182         struct btrfs_free_space *entry = NULL;
2183         struct rb_node *node;
2184         u64 ret = 0;
2185
2186         spin_lock(&cluster->lock);
2187         if (bytes > cluster->max_size)
2188                 goto out;
2189
2190         if (cluster->block_group != block_group)
2191                 goto out;
2192
2193         node = rb_first(&cluster->root);
2194         if (!node)
2195                 goto out;
2196
2197         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2198         while(1) {
2199                 if (entry->bytes < bytes ||
2200                     (!entry->bitmap && entry->offset < min_start)) {
2201                         node = rb_next(&entry->offset_index);
2202                         if (!node)
2203                                 break;
2204                         entry = rb_entry(node, struct btrfs_free_space,
2205                                          offset_index);
2206                         continue;
2207                 }
2208
2209                 if (entry->bitmap) {
2210                         ret = btrfs_alloc_from_bitmap(block_group,
2211                                                       cluster, entry, bytes,
2212                                                       min_start);
2213                         if (ret == 0) {
2214                                 node = rb_next(&entry->offset_index);
2215                                 if (!node)
2216                                         break;
2217                                 entry = rb_entry(node, struct btrfs_free_space,
2218                                                  offset_index);
2219                                 continue;
2220                         }
2221                 } else {
2222                         ret = entry->offset;
2223
2224                         entry->offset += bytes;
2225                         entry->bytes -= bytes;
2226                 }
2227
2228                 if (entry->bytes == 0)
2229                         rb_erase(&entry->offset_index, &cluster->root);
2230                 break;
2231         }
2232 out:
2233         spin_unlock(&cluster->lock);
2234
2235         if (!ret)
2236                 return 0;
2237
2238         spin_lock(&ctl->tree_lock);
2239
2240         ctl->free_space -= bytes;
2241         if (entry->bytes == 0) {
2242                 ctl->free_extents--;
2243                 if (entry->bitmap) {
2244                         kfree(entry->bitmap);
2245                         ctl->total_bitmaps--;
2246                         ctl->op->recalc_thresholds(ctl);
2247                 }
2248                 kmem_cache_free(btrfs_free_space_cachep, entry);
2249         }
2250
2251         spin_unlock(&ctl->tree_lock);
2252
2253         return ret;
2254 }
2255
2256 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2257                                 struct btrfs_free_space *entry,
2258                                 struct btrfs_free_cluster *cluster,
2259                                 u64 offset, u64 bytes, u64 min_bytes)
2260 {
2261         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2262         unsigned long next_zero;
2263         unsigned long i;
2264         unsigned long search_bits;
2265         unsigned long total_bits;
2266         unsigned long found_bits;
2267         unsigned long start = 0;
2268         unsigned long total_found = 0;
2269         int ret;
2270         bool found = false;
2271
2272         i = offset_to_bit(entry->offset, block_group->sectorsize,
2273                           max_t(u64, offset, entry->offset));
2274         search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2275         total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2276
2277 again:
2278         found_bits = 0;
2279         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2280              i < BITS_PER_BITMAP;
2281              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2282                 next_zero = find_next_zero_bit(entry->bitmap,
2283                                                BITS_PER_BITMAP, i);
2284                 if (next_zero - i >= search_bits) {
2285                         found_bits = next_zero - i;
2286                         break;
2287                 }
2288                 i = next_zero;
2289         }
2290
2291         if (!found_bits)
2292                 return -ENOSPC;
2293
2294         if (!found) {
2295                 start = i;
2296                 found = true;
2297         }
2298
2299         total_found += found_bits;
2300
2301         if (cluster->max_size < found_bits * block_group->sectorsize)
2302                 cluster->max_size = found_bits * block_group->sectorsize;
2303
2304         if (total_found < total_bits) {
2305                 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2306                 if (i - start > total_bits * 2) {
2307                         total_found = 0;
2308                         cluster->max_size = 0;
2309                         found = false;
2310                 }
2311                 goto again;
2312         }
2313
2314         cluster->window_start = start * block_group->sectorsize +
2315                 entry->offset;
2316         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2317         ret = tree_insert_offset(&cluster->root, entry->offset,
2318                                  &entry->offset_index, 1);
2319         BUG_ON(ret);
2320
2321         return 0;
2322 }
2323
2324 /*
2325  * This searches the block group for just extents to fill the cluster with.
2326  */
2327 static noinline int
2328 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2329                         struct btrfs_free_cluster *cluster,
2330                         struct list_head *bitmaps, u64 offset, u64 bytes,
2331                         u64 min_bytes)
2332 {
2333         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2334         struct btrfs_free_space *first = NULL;
2335         struct btrfs_free_space *entry = NULL;
2336         struct btrfs_free_space *prev = NULL;
2337         struct btrfs_free_space *last;
2338         struct rb_node *node;
2339         u64 window_start;
2340         u64 window_free;
2341         u64 max_extent;
2342         u64 max_gap = 128 * 1024;
2343
2344         entry = tree_search_offset(ctl, offset, 0, 1);
2345         if (!entry)
2346                 return -ENOSPC;
2347
2348         /*
2349          * We don't want bitmaps, so just move along until we find a normal
2350          * extent entry.
2351          */
2352         while (entry->bitmap) {
2353                 if (list_empty(&entry->list))
2354                         list_add_tail(&entry->list, bitmaps);
2355                 node = rb_next(&entry->offset_index);
2356                 if (!node)
2357                         return -ENOSPC;
2358                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2359         }
2360
2361         window_start = entry->offset;
2362         window_free = entry->bytes;
2363         max_extent = entry->bytes;
2364         first = entry;
2365         last = entry;
2366         prev = entry;
2367
2368         while (window_free <= min_bytes) {
2369                 node = rb_next(&entry->offset_index);
2370                 if (!node)
2371                         return -ENOSPC;
2372                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2373
2374                 if (entry->bitmap) {
2375                         if (list_empty(&entry->list))
2376                                 list_add_tail(&entry->list, bitmaps);
2377                         continue;
2378                 }
2379
2380                 /*
2381                  * we haven't filled the empty size and the window is
2382                  * very large.  reset and try again
2383                  */
2384                 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2385                     entry->offset - window_start > (min_bytes * 2)) {
2386                         first = entry;
2387                         window_start = entry->offset;
2388                         window_free = entry->bytes;
2389                         last = entry;
2390                         max_extent = entry->bytes;
2391                 } else {
2392                         last = entry;
2393                         window_free += entry->bytes;
2394                         if (entry->bytes > max_extent)
2395                                 max_extent = entry->bytes;
2396                 }
2397                 prev = entry;
2398         }
2399
2400         cluster->window_start = first->offset;
2401
2402         node = &first->offset_index;
2403
2404         /*
2405          * now we've found our entries, pull them out of the free space
2406          * cache and put them into the cluster rbtree
2407          */
2408         do {
2409                 int ret;
2410
2411                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2412                 node = rb_next(&entry->offset_index);
2413                 if (entry->bitmap)
2414                         continue;
2415
2416                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2417                 ret = tree_insert_offset(&cluster->root, entry->offset,
2418                                          &entry->offset_index, 0);
2419                 BUG_ON(ret);
2420         } while (node && entry != last);
2421
2422         cluster->max_size = max_extent;
2423
2424         return 0;
2425 }
2426
2427 /*
2428  * This specifically looks for bitmaps that may work in the cluster, we assume
2429  * that we have already failed to find extents that will work.
2430  */
2431 static noinline int
2432 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2433                      struct btrfs_free_cluster *cluster,
2434                      struct list_head *bitmaps, u64 offset, u64 bytes,
2435                      u64 min_bytes)
2436 {
2437         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2438         struct btrfs_free_space *entry;
2439         struct rb_node *node;
2440         int ret = -ENOSPC;
2441
2442         if (ctl->total_bitmaps == 0)
2443                 return -ENOSPC;
2444
2445         /*
2446          * First check our cached list of bitmaps and see if there is an entry
2447          * here that will work.
2448          */
2449         list_for_each_entry(entry, bitmaps, list) {
2450                 if (entry->bytes < min_bytes)
2451                         continue;
2452                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2453                                            bytes, min_bytes);
2454                 if (!ret)
2455                         return 0;
2456         }
2457
2458         /*
2459          * If we do have entries on our list and we are here then we didn't find
2460          * anything, so go ahead and get the next entry after the last entry in
2461          * this list and start the search from there.
2462          */
2463         if (!list_empty(bitmaps)) {
2464                 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2465                                    list);
2466                 node = rb_next(&entry->offset_index);
2467                 if (!node)
2468                         return -ENOSPC;
2469                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2470                 goto search;
2471         }
2472
2473         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2474         if (!entry)
2475                 return -ENOSPC;
2476
2477 search:
2478         node = &entry->offset_index;
2479         do {
2480                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2481                 node = rb_next(&entry->offset_index);
2482                 if (!entry->bitmap)
2483                         continue;
2484                 if (entry->bytes < min_bytes)
2485                         continue;
2486                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2487                                            bytes, min_bytes);
2488         } while (ret && node);
2489
2490         return ret;
2491 }
2492
2493 /*
2494  * here we try to find a cluster of blocks in a block group.  The goal
2495  * is to find at least bytes free and up to empty_size + bytes free.
2496  * We might not find them all in one contiguous area.
2497  *
2498  * returns zero and sets up cluster if things worked out, otherwise
2499  * it returns -enospc
2500  */
2501 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2502                              struct btrfs_root *root,
2503                              struct btrfs_block_group_cache *block_group,
2504                              struct btrfs_free_cluster *cluster,
2505                              u64 offset, u64 bytes, u64 empty_size)
2506 {
2507         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2508         struct list_head bitmaps;
2509         struct btrfs_free_space *entry, *tmp;
2510         u64 min_bytes;
2511         int ret;
2512
2513         /* for metadata, allow allocates with more holes */
2514         if (btrfs_test_opt(root, SSD_SPREAD)) {
2515                 min_bytes = bytes + empty_size;
2516         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2517                 /*
2518                  * we want to do larger allocations when we are
2519                  * flushing out the delayed refs, it helps prevent
2520                  * making more work as we go along.
2521                  */
2522                 if (trans->transaction->delayed_refs.flushing)
2523                         min_bytes = max(bytes, (bytes + empty_size) >> 1);
2524                 else
2525                         min_bytes = max(bytes, (bytes + empty_size) >> 4);
2526         } else
2527                 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2528
2529         spin_lock(&ctl->tree_lock);
2530
2531         /*
2532          * If we know we don't have enough space to make a cluster don't even
2533          * bother doing all the work to try and find one.
2534          */
2535         if (ctl->free_space < min_bytes) {
2536                 spin_unlock(&ctl->tree_lock);
2537                 return -ENOSPC;
2538         }
2539
2540         spin_lock(&cluster->lock);
2541
2542         /* someone already found a cluster, hooray */
2543         if (cluster->block_group) {
2544                 ret = 0;
2545                 goto out;
2546         }
2547
2548         INIT_LIST_HEAD(&bitmaps);
2549         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2550                                       bytes, min_bytes);
2551         if (ret)
2552                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2553                                            offset, bytes, min_bytes);
2554
2555         /* Clear our temporary list */
2556         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2557                 list_del_init(&entry->list);
2558
2559         if (!ret) {
2560                 atomic_inc(&block_group->count);
2561                 list_add_tail(&cluster->block_group_list,
2562                               &block_group->cluster_list);
2563                 cluster->block_group = block_group;
2564         }
2565 out:
2566         spin_unlock(&cluster->lock);
2567         spin_unlock(&ctl->tree_lock);
2568
2569         return ret;
2570 }
2571
2572 /*
2573  * simple code to zero out a cluster
2574  */
2575 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2576 {
2577         spin_lock_init(&cluster->lock);
2578         spin_lock_init(&cluster->refill_lock);
2579         cluster->root = RB_ROOT;
2580         cluster->max_size = 0;
2581         INIT_LIST_HEAD(&cluster->block_group_list);
2582         cluster->block_group = NULL;
2583 }
2584
2585 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2586                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2587 {
2588         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2589         struct btrfs_free_space *entry = NULL;
2590         struct btrfs_fs_info *fs_info = block_group->fs_info;
2591         u64 bytes = 0;
2592         u64 actually_trimmed;
2593         int ret = 0;
2594
2595         *trimmed = 0;
2596
2597         while (start < end) {
2598                 spin_lock(&ctl->tree_lock);
2599
2600                 if (ctl->free_space < minlen) {
2601                         spin_unlock(&ctl->tree_lock);
2602                         break;
2603                 }
2604
2605                 entry = tree_search_offset(ctl, start, 0, 1);
2606                 if (!entry)
2607                         entry = tree_search_offset(ctl,
2608                                                    offset_to_bitmap(ctl, start),
2609                                                    1, 1);
2610
2611                 if (!entry || entry->offset >= end) {
2612                         spin_unlock(&ctl->tree_lock);
2613                         break;
2614                 }
2615
2616                 if (entry->bitmap) {
2617                         ret = search_bitmap(ctl, entry, &start, &bytes);
2618                         if (!ret) {
2619                                 if (start >= end) {
2620                                         spin_unlock(&ctl->tree_lock);
2621                                         break;
2622                                 }
2623                                 bytes = min(bytes, end - start);
2624                                 bitmap_clear_bits(ctl, entry, start, bytes);
2625                                 if (entry->bytes == 0)
2626                                         free_bitmap(ctl, entry);
2627                         } else {
2628                                 start = entry->offset + BITS_PER_BITMAP *
2629                                         block_group->sectorsize;
2630                                 spin_unlock(&ctl->tree_lock);
2631                                 ret = 0;
2632                                 continue;
2633                         }
2634                 } else {
2635                         start = entry->offset;
2636                         bytes = min(entry->bytes, end - start);
2637                         unlink_free_space(ctl, entry);
2638                         kmem_cache_free(btrfs_free_space_cachep, entry);
2639                 }
2640
2641                 spin_unlock(&ctl->tree_lock);
2642
2643                 if (bytes >= minlen) {
2644                         struct btrfs_space_info *space_info;
2645                         int update = 0;
2646
2647                         space_info = block_group->space_info;
2648                         spin_lock(&space_info->lock);
2649                         spin_lock(&block_group->lock);
2650                         if (!block_group->ro) {
2651                                 block_group->reserved += bytes;
2652                                 space_info->bytes_reserved += bytes;
2653                                 update = 1;
2654                         }
2655                         spin_unlock(&block_group->lock);
2656                         spin_unlock(&space_info->lock);
2657
2658                         ret = btrfs_error_discard_extent(fs_info->extent_root,
2659                                                          start,
2660                                                          bytes,
2661                                                          &actually_trimmed);
2662
2663                         btrfs_add_free_space(block_group, start, bytes);
2664                         if (update) {
2665                                 spin_lock(&space_info->lock);
2666                                 spin_lock(&block_group->lock);
2667                                 if (block_group->ro)
2668                                         space_info->bytes_readonly += bytes;
2669                                 block_group->reserved -= bytes;
2670                                 space_info->bytes_reserved -= bytes;
2671                                 spin_unlock(&space_info->lock);
2672                                 spin_unlock(&block_group->lock);
2673                         }
2674
2675                         if (ret)
2676                                 break;
2677                         *trimmed += actually_trimmed;
2678                 }
2679                 start += bytes;
2680                 bytes = 0;
2681
2682                 if (fatal_signal_pending(current)) {
2683                         ret = -ERESTARTSYS;
2684                         break;
2685                 }
2686
2687                 cond_resched();
2688         }
2689
2690         return ret;
2691 }
2692
2693 /*
2694  * Find the left-most item in the cache tree, and then return the
2695  * smallest inode number in the item.
2696  *
2697  * Note: the returned inode number may not be the smallest one in
2698  * the tree, if the left-most item is a bitmap.
2699  */
2700 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2701 {
2702         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2703         struct btrfs_free_space *entry = NULL;
2704         u64 ino = 0;
2705
2706         spin_lock(&ctl->tree_lock);
2707
2708         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2709                 goto out;
2710
2711         entry = rb_entry(rb_first(&ctl->free_space_offset),
2712                          struct btrfs_free_space, offset_index);
2713
2714         if (!entry->bitmap) {
2715                 ino = entry->offset;
2716
2717                 unlink_free_space(ctl, entry);
2718                 entry->offset++;
2719                 entry->bytes--;
2720                 if (!entry->bytes)
2721                         kmem_cache_free(btrfs_free_space_cachep, entry);
2722                 else
2723                         link_free_space(ctl, entry);
2724         } else {
2725                 u64 offset = 0;
2726                 u64 count = 1;
2727                 int ret;
2728
2729                 ret = search_bitmap(ctl, entry, &offset, &count);
2730                 BUG_ON(ret);
2731
2732                 ino = offset;
2733                 bitmap_clear_bits(ctl, entry, offset, 1);
2734                 if (entry->bytes == 0)
2735                         free_bitmap(ctl, entry);
2736         }
2737 out:
2738         spin_unlock(&ctl->tree_lock);
2739
2740         return ino;
2741 }
2742
2743 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2744                                     struct btrfs_path *path)
2745 {
2746         struct inode *inode = NULL;
2747
2748         spin_lock(&root->cache_lock);
2749         if (root->cache_inode)
2750                 inode = igrab(root->cache_inode);
2751         spin_unlock(&root->cache_lock);
2752         if (inode)
2753                 return inode;
2754
2755         inode = __lookup_free_space_inode(root, path, 0);
2756         if (IS_ERR(inode))
2757                 return inode;
2758
2759         spin_lock(&root->cache_lock);
2760         if (!btrfs_fs_closing(root->fs_info))
2761                 root->cache_inode = igrab(inode);
2762         spin_unlock(&root->cache_lock);
2763
2764         return inode;
2765 }
2766
2767 int create_free_ino_inode(struct btrfs_root *root,
2768                           struct btrfs_trans_handle *trans,
2769                           struct btrfs_path *path)
2770 {
2771         return __create_free_space_inode(root, trans, path,
2772                                          BTRFS_FREE_INO_OBJECTID, 0);
2773 }
2774
2775 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2776 {
2777         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2778         struct btrfs_path *path;
2779         struct inode *inode;
2780         int ret = 0;
2781         u64 root_gen = btrfs_root_generation(&root->root_item);
2782
2783         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2784                 return 0;
2785
2786         /*
2787          * If we're unmounting then just return, since this does a search on the
2788          * normal root and not the commit root and we could deadlock.
2789          */
2790         if (btrfs_fs_closing(fs_info))
2791                 return 0;
2792
2793         path = btrfs_alloc_path();
2794         if (!path)
2795                 return 0;
2796
2797         inode = lookup_free_ino_inode(root, path);
2798         if (IS_ERR(inode))
2799                 goto out;
2800
2801         if (root_gen != BTRFS_I(inode)->generation)
2802                 goto out_put;
2803
2804         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2805
2806         if (ret < 0)
2807                 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2808                        "root %llu\n", root->root_key.objectid);
2809 out_put:
2810         iput(inode);
2811 out:
2812         btrfs_free_path(path);
2813         return ret;
2814 }
2815
2816 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2817                               struct btrfs_trans_handle *trans,
2818                               struct btrfs_path *path)
2819 {
2820         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2821         struct inode *inode;
2822         int ret;
2823
2824         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2825                 return 0;
2826
2827         inode = lookup_free_ino_inode(root, path);
2828         if (IS_ERR(inode))
2829                 return 0;
2830
2831         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2832         if (ret) {
2833                 btrfs_delalloc_release_metadata(inode, inode->i_size);
2834 #ifdef DEBUG
2835                 printk(KERN_ERR "btrfs: failed to write free ino cache "
2836                        "for root %llu\n", root->root_key.objectid);
2837 #endif
2838         }
2839
2840         iput(inode);
2841         return ret;
2842 }