Merge git://git.samba.org/sfrench/cifs-2.6
[pandora-kernel.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38                                                struct btrfs_path *path,
39                                                u64 offset)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50         key.offset = offset;
51         key.type = 0;
52
53         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54         if (ret < 0)
55                 return ERR_PTR(ret);
56         if (ret > 0) {
57                 btrfs_release_path(path);
58                 return ERR_PTR(-ENOENT);
59         }
60
61         leaf = path->nodes[0];
62         header = btrfs_item_ptr(leaf, path->slots[0],
63                                 struct btrfs_free_space_header);
64         btrfs_free_space_key(leaf, header, &disk_key);
65         btrfs_disk_key_to_cpu(&location, &disk_key);
66         btrfs_release_path(path);
67
68         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69         if (!inode)
70                 return ERR_PTR(-ENOENT);
71         if (IS_ERR(inode))
72                 return inode;
73         if (is_bad_inode(inode)) {
74                 iput(inode);
75                 return ERR_PTR(-ENOENT);
76         }
77
78         inode->i_mapping->flags &= ~__GFP_FS;
79
80         return inode;
81 }
82
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84                                       struct btrfs_block_group_cache
85                                       *block_group, struct btrfs_path *path)
86 {
87         struct inode *inode = NULL;
88         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
89
90         spin_lock(&block_group->lock);
91         if (block_group->inode)
92                 inode = igrab(block_group->inode);
93         spin_unlock(&block_group->lock);
94         if (inode)
95                 return inode;
96
97         inode = __lookup_free_space_inode(root, path,
98                                           block_group->key.objectid);
99         if (IS_ERR(inode))
100                 return inode;
101
102         spin_lock(&block_group->lock);
103         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104                 printk(KERN_INFO "Old style space inode found, converting.\n");
105                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106                         BTRFS_INODE_NODATACOW;
107                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108         }
109
110         if (!block_group->iref) {
111                 block_group->inode = igrab(inode);
112                 block_group->iref = 1;
113         }
114         spin_unlock(&block_group->lock);
115
116         return inode;
117 }
118
119 int __create_free_space_inode(struct btrfs_root *root,
120                               struct btrfs_trans_handle *trans,
121                               struct btrfs_path *path, u64 ino, u64 offset)
122 {
123         struct btrfs_key key;
124         struct btrfs_disk_key disk_key;
125         struct btrfs_free_space_header *header;
126         struct btrfs_inode_item *inode_item;
127         struct extent_buffer *leaf;
128         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129         int ret;
130
131         ret = btrfs_insert_empty_inode(trans, root, path, ino);
132         if (ret)
133                 return ret;
134
135         /* We inline crc's for the free disk space cache */
136         if (ino != BTRFS_FREE_INO_OBJECTID)
137                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
139         leaf = path->nodes[0];
140         inode_item = btrfs_item_ptr(leaf, path->slots[0],
141                                     struct btrfs_inode_item);
142         btrfs_item_key(leaf, &disk_key, path->slots[0]);
143         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144                              sizeof(*inode_item));
145         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146         btrfs_set_inode_size(leaf, inode_item, 0);
147         btrfs_set_inode_nbytes(leaf, inode_item, 0);
148         btrfs_set_inode_uid(leaf, inode_item, 0);
149         btrfs_set_inode_gid(leaf, inode_item, 0);
150         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151         btrfs_set_inode_flags(leaf, inode_item, flags);
152         btrfs_set_inode_nlink(leaf, inode_item, 1);
153         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154         btrfs_set_inode_block_group(leaf, inode_item, offset);
155         btrfs_mark_buffer_dirty(leaf);
156         btrfs_release_path(path);
157
158         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159         key.offset = offset;
160         key.type = 0;
161
162         ret = btrfs_insert_empty_item(trans, root, path, &key,
163                                       sizeof(struct btrfs_free_space_header));
164         if (ret < 0) {
165                 btrfs_release_path(path);
166                 return ret;
167         }
168         leaf = path->nodes[0];
169         header = btrfs_item_ptr(leaf, path->slots[0],
170                                 struct btrfs_free_space_header);
171         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172         btrfs_set_free_space_key(leaf, header, &disk_key);
173         btrfs_mark_buffer_dirty(leaf);
174         btrfs_release_path(path);
175
176         return 0;
177 }
178
179 int create_free_space_inode(struct btrfs_root *root,
180                             struct btrfs_trans_handle *trans,
181                             struct btrfs_block_group_cache *block_group,
182                             struct btrfs_path *path)
183 {
184         int ret;
185         u64 ino;
186
187         ret = btrfs_find_free_objectid(root, &ino);
188         if (ret < 0)
189                 return ret;
190
191         return __create_free_space_inode(root, trans, path, ino,
192                                          block_group->key.objectid);
193 }
194
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196                                     struct btrfs_trans_handle *trans,
197                                     struct btrfs_path *path,
198                                     struct inode *inode)
199 {
200         struct btrfs_block_rsv *rsv;
201         u64 needed_bytes;
202         loff_t oldsize;
203         int ret = 0;
204
205         rsv = trans->block_rsv;
206         trans->block_rsv = &root->fs_info->global_block_rsv;
207
208         /* 1 for slack space, 1 for updating the inode */
209         needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210                 btrfs_calc_trans_metadata_size(root, 1);
211
212         spin_lock(&trans->block_rsv->lock);
213         if (trans->block_rsv->reserved < needed_bytes) {
214                 spin_unlock(&trans->block_rsv->lock);
215                 trans->block_rsv = rsv;
216                 return -ENOSPC;
217         }
218         spin_unlock(&trans->block_rsv->lock);
219
220         oldsize = i_size_read(inode);
221         btrfs_i_size_write(inode, 0);
222         truncate_pagecache(inode, oldsize, 0);
223
224         /*
225          * We don't need an orphan item because truncating the free space cache
226          * will never be split across transactions.
227          */
228         ret = btrfs_truncate_inode_items(trans, root, inode,
229                                          0, BTRFS_EXTENT_DATA_KEY);
230
231         if (ret) {
232                 trans->block_rsv = rsv;
233                 WARN_ON(1);
234                 return ret;
235         }
236
237         ret = btrfs_update_inode(trans, root, inode);
238         trans->block_rsv = rsv;
239
240         return ret;
241 }
242
243 static int readahead_cache(struct inode *inode)
244 {
245         struct file_ra_state *ra;
246         unsigned long last_index;
247
248         ra = kzalloc(sizeof(*ra), GFP_NOFS);
249         if (!ra)
250                 return -ENOMEM;
251
252         file_ra_state_init(ra, inode->i_mapping);
253         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
254
255         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
256
257         kfree(ra);
258
259         return 0;
260 }
261
262 struct io_ctl {
263         void *cur, *orig;
264         struct page *page;
265         struct page **pages;
266         struct btrfs_root *root;
267         unsigned long size;
268         int index;
269         int num_pages;
270         unsigned check_crcs:1;
271 };
272
273 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
274                        struct btrfs_root *root)
275 {
276         memset(io_ctl, 0, sizeof(struct io_ctl));
277         io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
278                 PAGE_CACHE_SHIFT;
279         io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
280                                 GFP_NOFS);
281         if (!io_ctl->pages)
282                 return -ENOMEM;
283         io_ctl->root = root;
284         if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
285                 io_ctl->check_crcs = 1;
286         return 0;
287 }
288
289 static void io_ctl_free(struct io_ctl *io_ctl)
290 {
291         kfree(io_ctl->pages);
292 }
293
294 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
295 {
296         if (io_ctl->cur) {
297                 kunmap(io_ctl->page);
298                 io_ctl->cur = NULL;
299                 io_ctl->orig = NULL;
300         }
301 }
302
303 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
304 {
305         WARN_ON(io_ctl->cur);
306         BUG_ON(io_ctl->index >= io_ctl->num_pages);
307         io_ctl->page = io_ctl->pages[io_ctl->index++];
308         io_ctl->cur = kmap(io_ctl->page);
309         io_ctl->orig = io_ctl->cur;
310         io_ctl->size = PAGE_CACHE_SIZE;
311         if (clear)
312                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
313 }
314
315 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
316 {
317         int i;
318
319         io_ctl_unmap_page(io_ctl);
320
321         for (i = 0; i < io_ctl->num_pages; i++) {
322                 ClearPageChecked(io_ctl->pages[i]);
323                 unlock_page(io_ctl->pages[i]);
324                 page_cache_release(io_ctl->pages[i]);
325         }
326 }
327
328 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
329                                 int uptodate)
330 {
331         struct page *page;
332         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
333         int i;
334
335         for (i = 0; i < io_ctl->num_pages; i++) {
336                 page = find_or_create_page(inode->i_mapping, i, mask);
337                 if (!page) {
338                         io_ctl_drop_pages(io_ctl);
339                         return -ENOMEM;
340                 }
341                 io_ctl->pages[i] = page;
342                 if (uptodate && !PageUptodate(page)) {
343                         btrfs_readpage(NULL, page);
344                         lock_page(page);
345                         if (!PageUptodate(page)) {
346                                 printk(KERN_ERR "btrfs: error reading free "
347                                        "space cache\n");
348                                 io_ctl_drop_pages(io_ctl);
349                                 return -EIO;
350                         }
351                 }
352         }
353
354         return 0;
355 }
356
357 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
358 {
359         u64 *val;
360
361         io_ctl_map_page(io_ctl, 1);
362
363         /*
364          * Skip the csum areas.  If we don't check crcs then we just have a
365          * 64bit chunk at the front of the first page.
366          */
367         if (io_ctl->check_crcs) {
368                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
369                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
370         } else {
371                 io_ctl->cur += sizeof(u64);
372                 io_ctl->size -= sizeof(u64) * 2;
373         }
374
375         val = io_ctl->cur;
376         *val = cpu_to_le64(generation);
377         io_ctl->cur += sizeof(u64);
378 }
379
380 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
381 {
382         u64 *gen;
383
384         /*
385          * Skip the crc area.  If we don't check crcs then we just have a 64bit
386          * chunk at the front of the first page.
387          */
388         if (io_ctl->check_crcs) {
389                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
390                 io_ctl->size -= sizeof(u64) +
391                         (sizeof(u32) * io_ctl->num_pages);
392         } else {
393                 io_ctl->cur += sizeof(u64);
394                 io_ctl->size -= sizeof(u64) * 2;
395         }
396
397         gen = io_ctl->cur;
398         if (le64_to_cpu(*gen) != generation) {
399                 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
400                                    "(%Lu) does not match inode (%Lu)\n", *gen,
401                                    generation);
402                 io_ctl_unmap_page(io_ctl);
403                 return -EIO;
404         }
405         io_ctl->cur += sizeof(u64);
406         return 0;
407 }
408
409 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
410 {
411         u32 *tmp;
412         u32 crc = ~(u32)0;
413         unsigned offset = 0;
414
415         if (!io_ctl->check_crcs) {
416                 io_ctl_unmap_page(io_ctl);
417                 return;
418         }
419
420         if (index == 0)
421                 offset = sizeof(u32) * io_ctl->num_pages;;
422
423         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
424                               PAGE_CACHE_SIZE - offset);
425         btrfs_csum_final(crc, (char *)&crc);
426         io_ctl_unmap_page(io_ctl);
427         tmp = kmap(io_ctl->pages[0]);
428         tmp += index;
429         *tmp = crc;
430         kunmap(io_ctl->pages[0]);
431 }
432
433 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
434 {
435         u32 *tmp, val;
436         u32 crc = ~(u32)0;
437         unsigned offset = 0;
438
439         if (!io_ctl->check_crcs) {
440                 io_ctl_map_page(io_ctl, 0);
441                 return 0;
442         }
443
444         if (index == 0)
445                 offset = sizeof(u32) * io_ctl->num_pages;
446
447         tmp = kmap(io_ctl->pages[0]);
448         tmp += index;
449         val = *tmp;
450         kunmap(io_ctl->pages[0]);
451
452         io_ctl_map_page(io_ctl, 0);
453         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
454                               PAGE_CACHE_SIZE - offset);
455         btrfs_csum_final(crc, (char *)&crc);
456         if (val != crc) {
457                 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
458                                    "space cache\n");
459                 io_ctl_unmap_page(io_ctl);
460                 return -EIO;
461         }
462
463         return 0;
464 }
465
466 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
467                             void *bitmap)
468 {
469         struct btrfs_free_space_entry *entry;
470
471         if (!io_ctl->cur)
472                 return -ENOSPC;
473
474         entry = io_ctl->cur;
475         entry->offset = cpu_to_le64(offset);
476         entry->bytes = cpu_to_le64(bytes);
477         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
478                 BTRFS_FREE_SPACE_EXTENT;
479         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
480         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
481
482         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
483                 return 0;
484
485         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
486
487         /* No more pages to map */
488         if (io_ctl->index >= io_ctl->num_pages)
489                 return 0;
490
491         /* map the next page */
492         io_ctl_map_page(io_ctl, 1);
493         return 0;
494 }
495
496 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
497 {
498         if (!io_ctl->cur)
499                 return -ENOSPC;
500
501         /*
502          * If we aren't at the start of the current page, unmap this one and
503          * map the next one if there is any left.
504          */
505         if (io_ctl->cur != io_ctl->orig) {
506                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
507                 if (io_ctl->index >= io_ctl->num_pages)
508                         return -ENOSPC;
509                 io_ctl_map_page(io_ctl, 0);
510         }
511
512         memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
513         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
514         if (io_ctl->index < io_ctl->num_pages)
515                 io_ctl_map_page(io_ctl, 0);
516         return 0;
517 }
518
519 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
520 {
521         /*
522          * If we're not on the boundary we know we've modified the page and we
523          * need to crc the page.
524          */
525         if (io_ctl->cur != io_ctl->orig)
526                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
527         else
528                 io_ctl_unmap_page(io_ctl);
529
530         while (io_ctl->index < io_ctl->num_pages) {
531                 io_ctl_map_page(io_ctl, 1);
532                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
533         }
534 }
535
536 static int io_ctl_read_entry(struct io_ctl *io_ctl,
537                             struct btrfs_free_space *entry, u8 *type)
538 {
539         struct btrfs_free_space_entry *e;
540
541         e = io_ctl->cur;
542         entry->offset = le64_to_cpu(e->offset);
543         entry->bytes = le64_to_cpu(e->bytes);
544         *type = e->type;
545         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
546         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
547
548         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
549                 return 0;
550
551         io_ctl_unmap_page(io_ctl);
552
553         if (io_ctl->index >= io_ctl->num_pages)
554                 return 0;
555
556         return io_ctl_check_crc(io_ctl, io_ctl->index);
557 }
558
559 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
560                               struct btrfs_free_space *entry)
561 {
562         int ret;
563
564         if (io_ctl->cur && io_ctl->cur != io_ctl->orig)
565                 io_ctl_unmap_page(io_ctl);
566
567         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
568         if (ret)
569                 return ret;
570
571         memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
572         io_ctl_unmap_page(io_ctl);
573
574         return 0;
575 }
576
577 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
578                             struct btrfs_free_space_ctl *ctl,
579                             struct btrfs_path *path, u64 offset)
580 {
581         struct btrfs_free_space_header *header;
582         struct extent_buffer *leaf;
583         struct io_ctl io_ctl;
584         struct btrfs_key key;
585         struct btrfs_free_space *e, *n;
586         struct list_head bitmaps;
587         u64 num_entries;
588         u64 num_bitmaps;
589         u64 generation;
590         u8 type;
591         int ret = 0;
592
593         INIT_LIST_HEAD(&bitmaps);
594
595         /* Nothing in the space cache, goodbye */
596         if (!i_size_read(inode))
597                 return 0;
598
599         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
600         key.offset = offset;
601         key.type = 0;
602
603         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
604         if (ret < 0)
605                 return 0;
606         else if (ret > 0) {
607                 btrfs_release_path(path);
608                 return 0;
609         }
610
611         ret = -1;
612
613         leaf = path->nodes[0];
614         header = btrfs_item_ptr(leaf, path->slots[0],
615                                 struct btrfs_free_space_header);
616         num_entries = btrfs_free_space_entries(leaf, header);
617         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
618         generation = btrfs_free_space_generation(leaf, header);
619         btrfs_release_path(path);
620
621         if (BTRFS_I(inode)->generation != generation) {
622                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
623                        " not match free space cache generation (%llu)\n",
624                        (unsigned long long)BTRFS_I(inode)->generation,
625                        (unsigned long long)generation);
626                 return 0;
627         }
628
629         if (!num_entries)
630                 return 0;
631
632         io_ctl_init(&io_ctl, inode, root);
633         ret = readahead_cache(inode);
634         if (ret)
635                 goto out;
636
637         ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
638         if (ret)
639                 goto out;
640
641         ret = io_ctl_check_crc(&io_ctl, 0);
642         if (ret)
643                 goto free_cache;
644
645         ret = io_ctl_check_generation(&io_ctl, generation);
646         if (ret)
647                 goto free_cache;
648
649         while (num_entries) {
650                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
651                                       GFP_NOFS);
652                 if (!e)
653                         goto free_cache;
654
655                 ret = io_ctl_read_entry(&io_ctl, e, &type);
656                 if (ret) {
657                         kmem_cache_free(btrfs_free_space_cachep, e);
658                         goto free_cache;
659                 }
660
661                 if (!e->bytes) {
662                         kmem_cache_free(btrfs_free_space_cachep, e);
663                         goto free_cache;
664                 }
665
666                 if (type == BTRFS_FREE_SPACE_EXTENT) {
667                         spin_lock(&ctl->tree_lock);
668                         ret = link_free_space(ctl, e);
669                         spin_unlock(&ctl->tree_lock);
670                         if (ret) {
671                                 printk(KERN_ERR "Duplicate entries in "
672                                        "free space cache, dumping\n");
673                                 kmem_cache_free(btrfs_free_space_cachep, e);
674                                 goto free_cache;
675                         }
676                 } else {
677                         BUG_ON(!num_bitmaps);
678                         num_bitmaps--;
679                         e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
680                         if (!e->bitmap) {
681                                 kmem_cache_free(
682                                         btrfs_free_space_cachep, e);
683                                 goto free_cache;
684                         }
685                         spin_lock(&ctl->tree_lock);
686                         ret = link_free_space(ctl, e);
687                         ctl->total_bitmaps++;
688                         ctl->op->recalc_thresholds(ctl);
689                         spin_unlock(&ctl->tree_lock);
690                         if (ret) {
691                                 printk(KERN_ERR "Duplicate entries in "
692                                        "free space cache, dumping\n");
693                                 kmem_cache_free(btrfs_free_space_cachep, e);
694                                 goto free_cache;
695                         }
696                         list_add_tail(&e->list, &bitmaps);
697                 }
698
699                 num_entries--;
700         }
701
702         /*
703          * We add the bitmaps at the end of the entries in order that
704          * the bitmap entries are added to the cache.
705          */
706         list_for_each_entry_safe(e, n, &bitmaps, list) {
707                 list_del_init(&e->list);
708                 ret = io_ctl_read_bitmap(&io_ctl, e);
709                 if (ret)
710                         goto free_cache;
711         }
712
713         io_ctl_drop_pages(&io_ctl);
714         ret = 1;
715 out:
716         io_ctl_free(&io_ctl);
717         return ret;
718 free_cache:
719         io_ctl_drop_pages(&io_ctl);
720         __btrfs_remove_free_space_cache(ctl);
721         goto out;
722 }
723
724 int load_free_space_cache(struct btrfs_fs_info *fs_info,
725                           struct btrfs_block_group_cache *block_group)
726 {
727         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
728         struct btrfs_root *root = fs_info->tree_root;
729         struct inode *inode;
730         struct btrfs_path *path;
731         int ret = 0;
732         bool matched;
733         u64 used = btrfs_block_group_used(&block_group->item);
734
735         /*
736          * If we're unmounting then just return, since this does a search on the
737          * normal root and not the commit root and we could deadlock.
738          */
739         if (btrfs_fs_closing(fs_info))
740                 return 0;
741
742         /*
743          * If this block group has been marked to be cleared for one reason or
744          * another then we can't trust the on disk cache, so just return.
745          */
746         spin_lock(&block_group->lock);
747         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
748                 spin_unlock(&block_group->lock);
749                 return 0;
750         }
751         spin_unlock(&block_group->lock);
752
753         path = btrfs_alloc_path();
754         if (!path)
755                 return 0;
756
757         inode = lookup_free_space_inode(root, block_group, path);
758         if (IS_ERR(inode)) {
759                 btrfs_free_path(path);
760                 return 0;
761         }
762
763         /* We may have converted the inode and made the cache invalid. */
764         spin_lock(&block_group->lock);
765         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
766                 spin_unlock(&block_group->lock);
767                 goto out;
768         }
769         spin_unlock(&block_group->lock);
770
771         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
772                                       path, block_group->key.objectid);
773         btrfs_free_path(path);
774         if (ret <= 0)
775                 goto out;
776
777         spin_lock(&ctl->tree_lock);
778         matched = (ctl->free_space == (block_group->key.offset - used -
779                                        block_group->bytes_super));
780         spin_unlock(&ctl->tree_lock);
781
782         if (!matched) {
783                 __btrfs_remove_free_space_cache(ctl);
784                 printk(KERN_ERR "block group %llu has an wrong amount of free "
785                        "space\n", block_group->key.objectid);
786                 ret = -1;
787         }
788 out:
789         if (ret < 0) {
790                 /* This cache is bogus, make sure it gets cleared */
791                 spin_lock(&block_group->lock);
792                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
793                 spin_unlock(&block_group->lock);
794                 ret = 0;
795
796                 printk(KERN_ERR "btrfs: failed to load free space cache "
797                        "for block group %llu\n", block_group->key.objectid);
798         }
799
800         iput(inode);
801         return ret;
802 }
803
804 /**
805  * __btrfs_write_out_cache - write out cached info to an inode
806  * @root - the root the inode belongs to
807  * @ctl - the free space cache we are going to write out
808  * @block_group - the block_group for this cache if it belongs to a block_group
809  * @trans - the trans handle
810  * @path - the path to use
811  * @offset - the offset for the key we'll insert
812  *
813  * This function writes out a free space cache struct to disk for quick recovery
814  * on mount.  This will return 0 if it was successfull in writing the cache out,
815  * and -1 if it was not.
816  */
817 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
818                             struct btrfs_free_space_ctl *ctl,
819                             struct btrfs_block_group_cache *block_group,
820                             struct btrfs_trans_handle *trans,
821                             struct btrfs_path *path, u64 offset)
822 {
823         struct btrfs_free_space_header *header;
824         struct extent_buffer *leaf;
825         struct rb_node *node;
826         struct list_head *pos, *n;
827         struct extent_state *cached_state = NULL;
828         struct btrfs_free_cluster *cluster = NULL;
829         struct extent_io_tree *unpin = NULL;
830         struct io_ctl io_ctl;
831         struct list_head bitmap_list;
832         struct btrfs_key key;
833         u64 start, end, len;
834         int entries = 0;
835         int bitmaps = 0;
836         int ret;
837         int err = -1;
838
839         INIT_LIST_HEAD(&bitmap_list);
840
841         if (!i_size_read(inode))
842                 return -1;
843
844         io_ctl_init(&io_ctl, inode, root);
845
846         /* Get the cluster for this block_group if it exists */
847         if (block_group && !list_empty(&block_group->cluster_list))
848                 cluster = list_entry(block_group->cluster_list.next,
849                                      struct btrfs_free_cluster,
850                                      block_group_list);
851
852         /*
853          * We shouldn't have switched the pinned extents yet so this is the
854          * right one
855          */
856         unpin = root->fs_info->pinned_extents;
857
858         /* Lock all pages first so we can lock the extent safely. */
859         io_ctl_prepare_pages(&io_ctl, inode, 0);
860
861         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
862                          0, &cached_state, GFP_NOFS);
863
864         /*
865          * When searching for pinned extents, we need to start at our start
866          * offset.
867          */
868         if (block_group)
869                 start = block_group->key.objectid;
870
871         node = rb_first(&ctl->free_space_offset);
872         if (!node && cluster) {
873                 node = rb_first(&cluster->root);
874                 cluster = NULL;
875         }
876
877         /* Make sure we can fit our crcs into the first page */
878         if (io_ctl.check_crcs &&
879             (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
880                 WARN_ON(1);
881                 goto out_nospc;
882         }
883
884         io_ctl_set_generation(&io_ctl, trans->transid);
885
886         /* Write out the extent entries */
887         while (node) {
888                 struct btrfs_free_space *e;
889
890                 e = rb_entry(node, struct btrfs_free_space, offset_index);
891                 entries++;
892
893                 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
894                                        e->bitmap);
895                 if (ret)
896                         goto out_nospc;
897
898                 if (e->bitmap) {
899                         list_add_tail(&e->list, &bitmap_list);
900                         bitmaps++;
901                 }
902                 node = rb_next(node);
903                 if (!node && cluster) {
904                         node = rb_first(&cluster->root);
905                         cluster = NULL;
906                 }
907         }
908
909         /*
910          * We want to add any pinned extents to our free space cache
911          * so we don't leak the space
912          */
913         while (block_group && (start < block_group->key.objectid +
914                                block_group->key.offset)) {
915                 ret = find_first_extent_bit(unpin, start, &start, &end,
916                                             EXTENT_DIRTY);
917                 if (ret) {
918                         ret = 0;
919                         break;
920                 }
921
922                 /* This pinned extent is out of our range */
923                 if (start >= block_group->key.objectid +
924                     block_group->key.offset)
925                         break;
926
927                 len = block_group->key.objectid +
928                         block_group->key.offset - start;
929                 len = min(len, end + 1 - start);
930
931                 entries++;
932                 ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
933                 if (ret)
934                         goto out_nospc;
935
936                 start = end + 1;
937         }
938
939         /* Write out the bitmaps */
940         list_for_each_safe(pos, n, &bitmap_list) {
941                 struct btrfs_free_space *entry =
942                         list_entry(pos, struct btrfs_free_space, list);
943
944                 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
945                 if (ret)
946                         goto out_nospc;
947                 list_del_init(&entry->list);
948         }
949
950         /* Zero out the rest of the pages just to make sure */
951         io_ctl_zero_remaining_pages(&io_ctl);
952
953         ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
954                                 0, i_size_read(inode), &cached_state);
955         io_ctl_drop_pages(&io_ctl);
956         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
957                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
958
959         if (ret)
960                 goto out;
961
962
963         ret = filemap_write_and_wait(inode->i_mapping);
964         if (ret)
965                 goto out;
966
967         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
968         key.offset = offset;
969         key.type = 0;
970
971         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
972         if (ret < 0) {
973                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
974                                  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
975                                  GFP_NOFS);
976                 goto out;
977         }
978         leaf = path->nodes[0];
979         if (ret > 0) {
980                 struct btrfs_key found_key;
981                 BUG_ON(!path->slots[0]);
982                 path->slots[0]--;
983                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
984                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
985                     found_key.offset != offset) {
986                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
987                                          inode->i_size - 1,
988                                          EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
989                                          NULL, GFP_NOFS);
990                         btrfs_release_path(path);
991                         goto out;
992                 }
993         }
994
995         BTRFS_I(inode)->generation = trans->transid;
996         header = btrfs_item_ptr(leaf, path->slots[0],
997                                 struct btrfs_free_space_header);
998         btrfs_set_free_space_entries(leaf, header, entries);
999         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1000         btrfs_set_free_space_generation(leaf, header, trans->transid);
1001         btrfs_mark_buffer_dirty(leaf);
1002         btrfs_release_path(path);
1003
1004         err = 0;
1005 out:
1006         io_ctl_free(&io_ctl);
1007         if (err) {
1008                 invalidate_inode_pages2(inode->i_mapping);
1009                 BTRFS_I(inode)->generation = 0;
1010         }
1011         btrfs_update_inode(trans, root, inode);
1012         return err;
1013
1014 out_nospc:
1015         list_for_each_safe(pos, n, &bitmap_list) {
1016                 struct btrfs_free_space *entry =
1017                         list_entry(pos, struct btrfs_free_space, list);
1018                 list_del_init(&entry->list);
1019         }
1020         io_ctl_drop_pages(&io_ctl);
1021         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1022                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1023         goto out;
1024 }
1025
1026 int btrfs_write_out_cache(struct btrfs_root *root,
1027                           struct btrfs_trans_handle *trans,
1028                           struct btrfs_block_group_cache *block_group,
1029                           struct btrfs_path *path)
1030 {
1031         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1032         struct inode *inode;
1033         int ret = 0;
1034
1035         root = root->fs_info->tree_root;
1036
1037         spin_lock(&block_group->lock);
1038         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1039                 spin_unlock(&block_group->lock);
1040                 return 0;
1041         }
1042         spin_unlock(&block_group->lock);
1043
1044         inode = lookup_free_space_inode(root, block_group, path);
1045         if (IS_ERR(inode))
1046                 return 0;
1047
1048         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1049                                       path, block_group->key.objectid);
1050         if (ret) {
1051                 spin_lock(&block_group->lock);
1052                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1053                 spin_unlock(&block_group->lock);
1054                 ret = 0;
1055 #ifdef DEBUG
1056                 printk(KERN_ERR "btrfs: failed to write free space cace "
1057                        "for block group %llu\n", block_group->key.objectid);
1058 #endif
1059         }
1060
1061         iput(inode);
1062         return ret;
1063 }
1064
1065 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1066                                           u64 offset)
1067 {
1068         BUG_ON(offset < bitmap_start);
1069         offset -= bitmap_start;
1070         return (unsigned long)(div_u64(offset, unit));
1071 }
1072
1073 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1074 {
1075         return (unsigned long)(div_u64(bytes, unit));
1076 }
1077
1078 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1079                                    u64 offset)
1080 {
1081         u64 bitmap_start;
1082         u64 bytes_per_bitmap;
1083
1084         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1085         bitmap_start = offset - ctl->start;
1086         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1087         bitmap_start *= bytes_per_bitmap;
1088         bitmap_start += ctl->start;
1089
1090         return bitmap_start;
1091 }
1092
1093 static int tree_insert_offset(struct rb_root *root, u64 offset,
1094                               struct rb_node *node, int bitmap)
1095 {
1096         struct rb_node **p = &root->rb_node;
1097         struct rb_node *parent = NULL;
1098         struct btrfs_free_space *info;
1099
1100         while (*p) {
1101                 parent = *p;
1102                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1103
1104                 if (offset < info->offset) {
1105                         p = &(*p)->rb_left;
1106                 } else if (offset > info->offset) {
1107                         p = &(*p)->rb_right;
1108                 } else {
1109                         /*
1110                          * we could have a bitmap entry and an extent entry
1111                          * share the same offset.  If this is the case, we want
1112                          * the extent entry to always be found first if we do a
1113                          * linear search through the tree, since we want to have
1114                          * the quickest allocation time, and allocating from an
1115                          * extent is faster than allocating from a bitmap.  So
1116                          * if we're inserting a bitmap and we find an entry at
1117                          * this offset, we want to go right, or after this entry
1118                          * logically.  If we are inserting an extent and we've
1119                          * found a bitmap, we want to go left, or before
1120                          * logically.
1121                          */
1122                         if (bitmap) {
1123                                 if (info->bitmap) {
1124                                         WARN_ON_ONCE(1);
1125                                         return -EEXIST;
1126                                 }
1127                                 p = &(*p)->rb_right;
1128                         } else {
1129                                 if (!info->bitmap) {
1130                                         WARN_ON_ONCE(1);
1131                                         return -EEXIST;
1132                                 }
1133                                 p = &(*p)->rb_left;
1134                         }
1135                 }
1136         }
1137
1138         rb_link_node(node, parent, p);
1139         rb_insert_color(node, root);
1140
1141         return 0;
1142 }
1143
1144 /*
1145  * searches the tree for the given offset.
1146  *
1147  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1148  * want a section that has at least bytes size and comes at or after the given
1149  * offset.
1150  */
1151 static struct btrfs_free_space *
1152 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1153                    u64 offset, int bitmap_only, int fuzzy)
1154 {
1155         struct rb_node *n = ctl->free_space_offset.rb_node;
1156         struct btrfs_free_space *entry, *prev = NULL;
1157
1158         /* find entry that is closest to the 'offset' */
1159         while (1) {
1160                 if (!n) {
1161                         entry = NULL;
1162                         break;
1163                 }
1164
1165                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1166                 prev = entry;
1167
1168                 if (offset < entry->offset)
1169                         n = n->rb_left;
1170                 else if (offset > entry->offset)
1171                         n = n->rb_right;
1172                 else
1173                         break;
1174         }
1175
1176         if (bitmap_only) {
1177                 if (!entry)
1178                         return NULL;
1179                 if (entry->bitmap)
1180                         return entry;
1181
1182                 /*
1183                  * bitmap entry and extent entry may share same offset,
1184                  * in that case, bitmap entry comes after extent entry.
1185                  */
1186                 n = rb_next(n);
1187                 if (!n)
1188                         return NULL;
1189                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1190                 if (entry->offset != offset)
1191                         return NULL;
1192
1193                 WARN_ON(!entry->bitmap);
1194                 return entry;
1195         } else if (entry) {
1196                 if (entry->bitmap) {
1197                         /*
1198                          * if previous extent entry covers the offset,
1199                          * we should return it instead of the bitmap entry
1200                          */
1201                         n = &entry->offset_index;
1202                         while (1) {
1203                                 n = rb_prev(n);
1204                                 if (!n)
1205                                         break;
1206                                 prev = rb_entry(n, struct btrfs_free_space,
1207                                                 offset_index);
1208                                 if (!prev->bitmap) {
1209                                         if (prev->offset + prev->bytes > offset)
1210                                                 entry = prev;
1211                                         break;
1212                                 }
1213                         }
1214                 }
1215                 return entry;
1216         }
1217
1218         if (!prev)
1219                 return NULL;
1220
1221         /* find last entry before the 'offset' */
1222         entry = prev;
1223         if (entry->offset > offset) {
1224                 n = rb_prev(&entry->offset_index);
1225                 if (n) {
1226                         entry = rb_entry(n, struct btrfs_free_space,
1227                                         offset_index);
1228                         BUG_ON(entry->offset > offset);
1229                 } else {
1230                         if (fuzzy)
1231                                 return entry;
1232                         else
1233                                 return NULL;
1234                 }
1235         }
1236
1237         if (entry->bitmap) {
1238                 n = &entry->offset_index;
1239                 while (1) {
1240                         n = rb_prev(n);
1241                         if (!n)
1242                                 break;
1243                         prev = rb_entry(n, struct btrfs_free_space,
1244                                         offset_index);
1245                         if (!prev->bitmap) {
1246                                 if (prev->offset + prev->bytes > offset)
1247                                         return prev;
1248                                 break;
1249                         }
1250                 }
1251                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1252                         return entry;
1253         } else if (entry->offset + entry->bytes > offset)
1254                 return entry;
1255
1256         if (!fuzzy)
1257                 return NULL;
1258
1259         while (1) {
1260                 if (entry->bitmap) {
1261                         if (entry->offset + BITS_PER_BITMAP *
1262                             ctl->unit > offset)
1263                                 break;
1264                 } else {
1265                         if (entry->offset + entry->bytes > offset)
1266                                 break;
1267                 }
1268
1269                 n = rb_next(&entry->offset_index);
1270                 if (!n)
1271                         return NULL;
1272                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1273         }
1274         return entry;
1275 }
1276
1277 static inline void
1278 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1279                     struct btrfs_free_space *info)
1280 {
1281         rb_erase(&info->offset_index, &ctl->free_space_offset);
1282         ctl->free_extents--;
1283 }
1284
1285 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1286                               struct btrfs_free_space *info)
1287 {
1288         __unlink_free_space(ctl, info);
1289         ctl->free_space -= info->bytes;
1290 }
1291
1292 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1293                            struct btrfs_free_space *info)
1294 {
1295         int ret = 0;
1296
1297         BUG_ON(!info->bitmap && !info->bytes);
1298         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1299                                  &info->offset_index, (info->bitmap != NULL));
1300         if (ret)
1301                 return ret;
1302
1303         ctl->free_space += info->bytes;
1304         ctl->free_extents++;
1305         return ret;
1306 }
1307
1308 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1309 {
1310         struct btrfs_block_group_cache *block_group = ctl->private;
1311         u64 max_bytes;
1312         u64 bitmap_bytes;
1313         u64 extent_bytes;
1314         u64 size = block_group->key.offset;
1315         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1316         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1317
1318         BUG_ON(ctl->total_bitmaps > max_bitmaps);
1319
1320         /*
1321          * The goal is to keep the total amount of memory used per 1gb of space
1322          * at or below 32k, so we need to adjust how much memory we allow to be
1323          * used by extent based free space tracking
1324          */
1325         if (size < 1024 * 1024 * 1024)
1326                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1327         else
1328                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1329                         div64_u64(size, 1024 * 1024 * 1024);
1330
1331         /*
1332          * we want to account for 1 more bitmap than what we have so we can make
1333          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1334          * we add more bitmaps.
1335          */
1336         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1337
1338         if (bitmap_bytes >= max_bytes) {
1339                 ctl->extents_thresh = 0;
1340                 return;
1341         }
1342
1343         /*
1344          * we want the extent entry threshold to always be at most 1/2 the maxw
1345          * bytes we can have, or whatever is less than that.
1346          */
1347         extent_bytes = max_bytes - bitmap_bytes;
1348         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1349
1350         ctl->extents_thresh =
1351                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1352 }
1353
1354 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1355                                        struct btrfs_free_space *info,
1356                                        u64 offset, u64 bytes)
1357 {
1358         unsigned long start, count;
1359
1360         start = offset_to_bit(info->offset, ctl->unit, offset);
1361         count = bytes_to_bits(bytes, ctl->unit);
1362         BUG_ON(start + count > BITS_PER_BITMAP);
1363
1364         bitmap_clear(info->bitmap, start, count);
1365
1366         info->bytes -= bytes;
1367 }
1368
1369 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1370                               struct btrfs_free_space *info, u64 offset,
1371                               u64 bytes)
1372 {
1373         __bitmap_clear_bits(ctl, info, offset, bytes);
1374         ctl->free_space -= bytes;
1375 }
1376
1377 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1378                             struct btrfs_free_space *info, u64 offset,
1379                             u64 bytes)
1380 {
1381         unsigned long start, count;
1382
1383         start = offset_to_bit(info->offset, ctl->unit, offset);
1384         count = bytes_to_bits(bytes, ctl->unit);
1385         BUG_ON(start + count > BITS_PER_BITMAP);
1386
1387         bitmap_set(info->bitmap, start, count);
1388
1389         info->bytes += bytes;
1390         ctl->free_space += bytes;
1391 }
1392
1393 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1394                          struct btrfs_free_space *bitmap_info, u64 *offset,
1395                          u64 *bytes)
1396 {
1397         unsigned long found_bits = 0;
1398         unsigned long bits, i;
1399         unsigned long next_zero;
1400
1401         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1402                           max_t(u64, *offset, bitmap_info->offset));
1403         bits = bytes_to_bits(*bytes, ctl->unit);
1404
1405         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1406              i < BITS_PER_BITMAP;
1407              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1408                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1409                                                BITS_PER_BITMAP, i);
1410                 if ((next_zero - i) >= bits) {
1411                         found_bits = next_zero - i;
1412                         break;
1413                 }
1414                 i = next_zero;
1415         }
1416
1417         if (found_bits) {
1418                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1419                 *bytes = (u64)(found_bits) * ctl->unit;
1420                 return 0;
1421         }
1422
1423         return -1;
1424 }
1425
1426 static struct btrfs_free_space *
1427 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1428 {
1429         struct btrfs_free_space *entry;
1430         struct rb_node *node;
1431         int ret;
1432
1433         if (!ctl->free_space_offset.rb_node)
1434                 return NULL;
1435
1436         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1437         if (!entry)
1438                 return NULL;
1439
1440         for (node = &entry->offset_index; node; node = rb_next(node)) {
1441                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1442                 if (entry->bytes < *bytes)
1443                         continue;
1444
1445                 if (entry->bitmap) {
1446                         ret = search_bitmap(ctl, entry, offset, bytes);
1447                         if (!ret)
1448                                 return entry;
1449                         continue;
1450                 }
1451
1452                 *offset = entry->offset;
1453                 *bytes = entry->bytes;
1454                 return entry;
1455         }
1456
1457         return NULL;
1458 }
1459
1460 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1461                            struct btrfs_free_space *info, u64 offset)
1462 {
1463         info->offset = offset_to_bitmap(ctl, offset);
1464         info->bytes = 0;
1465         link_free_space(ctl, info);
1466         ctl->total_bitmaps++;
1467
1468         ctl->op->recalc_thresholds(ctl);
1469 }
1470
1471 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1472                         struct btrfs_free_space *bitmap_info)
1473 {
1474         unlink_free_space(ctl, bitmap_info);
1475         kfree(bitmap_info->bitmap);
1476         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1477         ctl->total_bitmaps--;
1478         ctl->op->recalc_thresholds(ctl);
1479 }
1480
1481 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1482                               struct btrfs_free_space *bitmap_info,
1483                               u64 *offset, u64 *bytes)
1484 {
1485         u64 end;
1486         u64 search_start, search_bytes;
1487         int ret;
1488
1489 again:
1490         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1491
1492         /*
1493          * XXX - this can go away after a few releases.
1494          *
1495          * since the only user of btrfs_remove_free_space is the tree logging
1496          * stuff, and the only way to test that is under crash conditions, we
1497          * want to have this debug stuff here just in case somethings not
1498          * working.  Search the bitmap for the space we are trying to use to
1499          * make sure its actually there.  If its not there then we need to stop
1500          * because something has gone wrong.
1501          */
1502         search_start = *offset;
1503         search_bytes = *bytes;
1504         search_bytes = min(search_bytes, end - search_start + 1);
1505         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1506         BUG_ON(ret < 0 || search_start != *offset);
1507
1508         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1509                 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1510                 *bytes -= end - *offset + 1;
1511                 *offset = end + 1;
1512         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1513                 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1514                 *bytes = 0;
1515         }
1516
1517         if (*bytes) {
1518                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1519                 if (!bitmap_info->bytes)
1520                         free_bitmap(ctl, bitmap_info);
1521
1522                 /*
1523                  * no entry after this bitmap, but we still have bytes to
1524                  * remove, so something has gone wrong.
1525                  */
1526                 if (!next)
1527                         return -EINVAL;
1528
1529                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1530                                        offset_index);
1531
1532                 /*
1533                  * if the next entry isn't a bitmap we need to return to let the
1534                  * extent stuff do its work.
1535                  */
1536                 if (!bitmap_info->bitmap)
1537                         return -EAGAIN;
1538
1539                 /*
1540                  * Ok the next item is a bitmap, but it may not actually hold
1541                  * the information for the rest of this free space stuff, so
1542                  * look for it, and if we don't find it return so we can try
1543                  * everything over again.
1544                  */
1545                 search_start = *offset;
1546                 search_bytes = *bytes;
1547                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1548                                     &search_bytes);
1549                 if (ret < 0 || search_start != *offset)
1550                         return -EAGAIN;
1551
1552                 goto again;
1553         } else if (!bitmap_info->bytes)
1554                 free_bitmap(ctl, bitmap_info);
1555
1556         return 0;
1557 }
1558
1559 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1560                                struct btrfs_free_space *info, u64 offset,
1561                                u64 bytes)
1562 {
1563         u64 bytes_to_set = 0;
1564         u64 end;
1565
1566         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1567
1568         bytes_to_set = min(end - offset, bytes);
1569
1570         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1571
1572         return bytes_to_set;
1573
1574 }
1575
1576 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1577                       struct btrfs_free_space *info)
1578 {
1579         struct btrfs_block_group_cache *block_group = ctl->private;
1580
1581         /*
1582          * If we are below the extents threshold then we can add this as an
1583          * extent, and don't have to deal with the bitmap
1584          */
1585         if (ctl->free_extents < ctl->extents_thresh) {
1586                 /*
1587                  * If this block group has some small extents we don't want to
1588                  * use up all of our free slots in the cache with them, we want
1589                  * to reserve them to larger extents, however if we have plent
1590                  * of cache left then go ahead an dadd them, no sense in adding
1591                  * the overhead of a bitmap if we don't have to.
1592                  */
1593                 if (info->bytes <= block_group->sectorsize * 4) {
1594                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1595                                 return false;
1596                 } else {
1597                         return false;
1598                 }
1599         }
1600
1601         /*
1602          * some block groups are so tiny they can't be enveloped by a bitmap, so
1603          * don't even bother to create a bitmap for this
1604          */
1605         if (BITS_PER_BITMAP * block_group->sectorsize >
1606             block_group->key.offset)
1607                 return false;
1608
1609         return true;
1610 }
1611
1612 static struct btrfs_free_space_op free_space_op = {
1613         .recalc_thresholds      = recalculate_thresholds,
1614         .use_bitmap             = use_bitmap,
1615 };
1616
1617 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1618                               struct btrfs_free_space *info)
1619 {
1620         struct btrfs_free_space *bitmap_info;
1621         struct btrfs_block_group_cache *block_group = NULL;
1622         int added = 0;
1623         u64 bytes, offset, bytes_added;
1624         int ret;
1625
1626         bytes = info->bytes;
1627         offset = info->offset;
1628
1629         if (!ctl->op->use_bitmap(ctl, info))
1630                 return 0;
1631
1632         if (ctl->op == &free_space_op)
1633                 block_group = ctl->private;
1634 again:
1635         /*
1636          * Since we link bitmaps right into the cluster we need to see if we
1637          * have a cluster here, and if so and it has our bitmap we need to add
1638          * the free space to that bitmap.
1639          */
1640         if (block_group && !list_empty(&block_group->cluster_list)) {
1641                 struct btrfs_free_cluster *cluster;
1642                 struct rb_node *node;
1643                 struct btrfs_free_space *entry;
1644
1645                 cluster = list_entry(block_group->cluster_list.next,
1646                                      struct btrfs_free_cluster,
1647                                      block_group_list);
1648                 spin_lock(&cluster->lock);
1649                 node = rb_first(&cluster->root);
1650                 if (!node) {
1651                         spin_unlock(&cluster->lock);
1652                         goto no_cluster_bitmap;
1653                 }
1654
1655                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1656                 if (!entry->bitmap) {
1657                         spin_unlock(&cluster->lock);
1658                         goto no_cluster_bitmap;
1659                 }
1660
1661                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1662                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1663                                                           offset, bytes);
1664                         bytes -= bytes_added;
1665                         offset += bytes_added;
1666                 }
1667                 spin_unlock(&cluster->lock);
1668                 if (!bytes) {
1669                         ret = 1;
1670                         goto out;
1671                 }
1672         }
1673
1674 no_cluster_bitmap:
1675         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1676                                          1, 0);
1677         if (!bitmap_info) {
1678                 BUG_ON(added);
1679                 goto new_bitmap;
1680         }
1681
1682         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1683         bytes -= bytes_added;
1684         offset += bytes_added;
1685         added = 0;
1686
1687         if (!bytes) {
1688                 ret = 1;
1689                 goto out;
1690         } else
1691                 goto again;
1692
1693 new_bitmap:
1694         if (info && info->bitmap) {
1695                 add_new_bitmap(ctl, info, offset);
1696                 added = 1;
1697                 info = NULL;
1698                 goto again;
1699         } else {
1700                 spin_unlock(&ctl->tree_lock);
1701
1702                 /* no pre-allocated info, allocate a new one */
1703                 if (!info) {
1704                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1705                                                  GFP_NOFS);
1706                         if (!info) {
1707                                 spin_lock(&ctl->tree_lock);
1708                                 ret = -ENOMEM;
1709                                 goto out;
1710                         }
1711                 }
1712
1713                 /* allocate the bitmap */
1714                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1715                 spin_lock(&ctl->tree_lock);
1716                 if (!info->bitmap) {
1717                         ret = -ENOMEM;
1718                         goto out;
1719                 }
1720                 goto again;
1721         }
1722
1723 out:
1724         if (info) {
1725                 if (info->bitmap)
1726                         kfree(info->bitmap);
1727                 kmem_cache_free(btrfs_free_space_cachep, info);
1728         }
1729
1730         return ret;
1731 }
1732
1733 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1734                           struct btrfs_free_space *info, bool update_stat)
1735 {
1736         struct btrfs_free_space *left_info;
1737         struct btrfs_free_space *right_info;
1738         bool merged = false;
1739         u64 offset = info->offset;
1740         u64 bytes = info->bytes;
1741
1742         /*
1743          * first we want to see if there is free space adjacent to the range we
1744          * are adding, if there is remove that struct and add a new one to
1745          * cover the entire range
1746          */
1747         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1748         if (right_info && rb_prev(&right_info->offset_index))
1749                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1750                                      struct btrfs_free_space, offset_index);
1751         else
1752                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1753
1754         if (right_info && !right_info->bitmap) {
1755                 if (update_stat)
1756                         unlink_free_space(ctl, right_info);
1757                 else
1758                         __unlink_free_space(ctl, right_info);
1759                 info->bytes += right_info->bytes;
1760                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1761                 merged = true;
1762         }
1763
1764         if (left_info && !left_info->bitmap &&
1765             left_info->offset + left_info->bytes == offset) {
1766                 if (update_stat)
1767                         unlink_free_space(ctl, left_info);
1768                 else
1769                         __unlink_free_space(ctl, left_info);
1770                 info->offset = left_info->offset;
1771                 info->bytes += left_info->bytes;
1772                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1773                 merged = true;
1774         }
1775
1776         return merged;
1777 }
1778
1779 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1780                            u64 offset, u64 bytes)
1781 {
1782         struct btrfs_free_space *info;
1783         int ret = 0;
1784
1785         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1786         if (!info)
1787                 return -ENOMEM;
1788
1789         info->offset = offset;
1790         info->bytes = bytes;
1791
1792         spin_lock(&ctl->tree_lock);
1793
1794         if (try_merge_free_space(ctl, info, true))
1795                 goto link;
1796
1797         /*
1798          * There was no extent directly to the left or right of this new
1799          * extent then we know we're going to have to allocate a new extent, so
1800          * before we do that see if we need to drop this into a bitmap
1801          */
1802         ret = insert_into_bitmap(ctl, info);
1803         if (ret < 0) {
1804                 goto out;
1805         } else if (ret) {
1806                 ret = 0;
1807                 goto out;
1808         }
1809 link:
1810         ret = link_free_space(ctl, info);
1811         if (ret)
1812                 kmem_cache_free(btrfs_free_space_cachep, info);
1813 out:
1814         spin_unlock(&ctl->tree_lock);
1815
1816         if (ret) {
1817                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1818                 BUG_ON(ret == -EEXIST);
1819         }
1820
1821         return ret;
1822 }
1823
1824 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1825                             u64 offset, u64 bytes)
1826 {
1827         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1828         struct btrfs_free_space *info;
1829         struct btrfs_free_space *next_info = NULL;
1830         int ret = 0;
1831
1832         spin_lock(&ctl->tree_lock);
1833
1834 again:
1835         info = tree_search_offset(ctl, offset, 0, 0);
1836         if (!info) {
1837                 /*
1838                  * oops didn't find an extent that matched the space we wanted
1839                  * to remove, look for a bitmap instead
1840                  */
1841                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1842                                           1, 0);
1843                 if (!info) {
1844                         WARN_ON(1);
1845                         goto out_lock;
1846                 }
1847         }
1848
1849         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1850                 u64 end;
1851                 next_info = rb_entry(rb_next(&info->offset_index),
1852                                              struct btrfs_free_space,
1853                                              offset_index);
1854
1855                 if (next_info->bitmap)
1856                         end = next_info->offset +
1857                               BITS_PER_BITMAP * ctl->unit - 1;
1858                 else
1859                         end = next_info->offset + next_info->bytes;
1860
1861                 if (next_info->bytes < bytes ||
1862                     next_info->offset > offset || offset > end) {
1863                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1864                               " trying to use %llu\n",
1865                               (unsigned long long)info->offset,
1866                               (unsigned long long)info->bytes,
1867                               (unsigned long long)bytes);
1868                         WARN_ON(1);
1869                         ret = -EINVAL;
1870                         goto out_lock;
1871                 }
1872
1873                 info = next_info;
1874         }
1875
1876         if (info->bytes == bytes) {
1877                 unlink_free_space(ctl, info);
1878                 if (info->bitmap) {
1879                         kfree(info->bitmap);
1880                         ctl->total_bitmaps--;
1881                 }
1882                 kmem_cache_free(btrfs_free_space_cachep, info);
1883                 ret = 0;
1884                 goto out_lock;
1885         }
1886
1887         if (!info->bitmap && info->offset == offset) {
1888                 unlink_free_space(ctl, info);
1889                 info->offset += bytes;
1890                 info->bytes -= bytes;
1891                 ret = link_free_space(ctl, info);
1892                 WARN_ON(ret);
1893                 goto out_lock;
1894         }
1895
1896         if (!info->bitmap && info->offset <= offset &&
1897             info->offset + info->bytes >= offset + bytes) {
1898                 u64 old_start = info->offset;
1899                 /*
1900                  * we're freeing space in the middle of the info,
1901                  * this can happen during tree log replay
1902                  *
1903                  * first unlink the old info and then
1904                  * insert it again after the hole we're creating
1905                  */
1906                 unlink_free_space(ctl, info);
1907                 if (offset + bytes < info->offset + info->bytes) {
1908                         u64 old_end = info->offset + info->bytes;
1909
1910                         info->offset = offset + bytes;
1911                         info->bytes = old_end - info->offset;
1912                         ret = link_free_space(ctl, info);
1913                         WARN_ON(ret);
1914                         if (ret)
1915                                 goto out_lock;
1916                 } else {
1917                         /* the hole we're creating ends at the end
1918                          * of the info struct, just free the info
1919                          */
1920                         kmem_cache_free(btrfs_free_space_cachep, info);
1921                 }
1922                 spin_unlock(&ctl->tree_lock);
1923
1924                 /* step two, insert a new info struct to cover
1925                  * anything before the hole
1926                  */
1927                 ret = btrfs_add_free_space(block_group, old_start,
1928                                            offset - old_start);
1929                 WARN_ON(ret);
1930                 goto out;
1931         }
1932
1933         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1934         if (ret == -EAGAIN)
1935                 goto again;
1936         BUG_ON(ret);
1937 out_lock:
1938         spin_unlock(&ctl->tree_lock);
1939 out:
1940         return ret;
1941 }
1942
1943 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1944                            u64 bytes)
1945 {
1946         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1947         struct btrfs_free_space *info;
1948         struct rb_node *n;
1949         int count = 0;
1950
1951         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1952                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1953                 if (info->bytes >= bytes)
1954                         count++;
1955                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1956                        (unsigned long long)info->offset,
1957                        (unsigned long long)info->bytes,
1958                        (info->bitmap) ? "yes" : "no");
1959         }
1960         printk(KERN_INFO "block group has cluster?: %s\n",
1961                list_empty(&block_group->cluster_list) ? "no" : "yes");
1962         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1963                "\n", count);
1964 }
1965
1966 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1967 {
1968         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1969
1970         spin_lock_init(&ctl->tree_lock);
1971         ctl->unit = block_group->sectorsize;
1972         ctl->start = block_group->key.objectid;
1973         ctl->private = block_group;
1974         ctl->op = &free_space_op;
1975
1976         /*
1977          * we only want to have 32k of ram per block group for keeping
1978          * track of free space, and if we pass 1/2 of that we want to
1979          * start converting things over to using bitmaps
1980          */
1981         ctl->extents_thresh = ((1024 * 32) / 2) /
1982                                 sizeof(struct btrfs_free_space);
1983 }
1984
1985 /*
1986  * for a given cluster, put all of its extents back into the free
1987  * space cache.  If the block group passed doesn't match the block group
1988  * pointed to by the cluster, someone else raced in and freed the
1989  * cluster already.  In that case, we just return without changing anything
1990  */
1991 static int
1992 __btrfs_return_cluster_to_free_space(
1993                              struct btrfs_block_group_cache *block_group,
1994                              struct btrfs_free_cluster *cluster)
1995 {
1996         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1997         struct btrfs_free_space *entry;
1998         struct rb_node *node;
1999
2000         spin_lock(&cluster->lock);
2001         if (cluster->block_group != block_group)
2002                 goto out;
2003
2004         cluster->block_group = NULL;
2005         cluster->window_start = 0;
2006         list_del_init(&cluster->block_group_list);
2007
2008         node = rb_first(&cluster->root);
2009         while (node) {
2010                 bool bitmap;
2011
2012                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2013                 node = rb_next(&entry->offset_index);
2014                 rb_erase(&entry->offset_index, &cluster->root);
2015
2016                 bitmap = (entry->bitmap != NULL);
2017                 if (!bitmap)
2018                         try_merge_free_space(ctl, entry, false);
2019                 tree_insert_offset(&ctl->free_space_offset,
2020                                    entry->offset, &entry->offset_index, bitmap);
2021         }
2022         cluster->root = RB_ROOT;
2023
2024 out:
2025         spin_unlock(&cluster->lock);
2026         btrfs_put_block_group(block_group);
2027         return 0;
2028 }
2029
2030 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2031 {
2032         struct btrfs_free_space *info;
2033         struct rb_node *node;
2034
2035         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2036                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2037                 if (!info->bitmap) {
2038                         unlink_free_space(ctl, info);
2039                         kmem_cache_free(btrfs_free_space_cachep, info);
2040                 } else {
2041                         free_bitmap(ctl, info);
2042                 }
2043                 if (need_resched()) {
2044                         spin_unlock(&ctl->tree_lock);
2045                         cond_resched();
2046                         spin_lock(&ctl->tree_lock);
2047                 }
2048         }
2049 }
2050
2051 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2052 {
2053         spin_lock(&ctl->tree_lock);
2054         __btrfs_remove_free_space_cache_locked(ctl);
2055         spin_unlock(&ctl->tree_lock);
2056 }
2057
2058 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2059 {
2060         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2061         struct btrfs_free_cluster *cluster;
2062         struct list_head *head;
2063
2064         spin_lock(&ctl->tree_lock);
2065         while ((head = block_group->cluster_list.next) !=
2066                &block_group->cluster_list) {
2067                 cluster = list_entry(head, struct btrfs_free_cluster,
2068                                      block_group_list);
2069
2070                 WARN_ON(cluster->block_group != block_group);
2071                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2072                 if (need_resched()) {
2073                         spin_unlock(&ctl->tree_lock);
2074                         cond_resched();
2075                         spin_lock(&ctl->tree_lock);
2076                 }
2077         }
2078         __btrfs_remove_free_space_cache_locked(ctl);
2079         spin_unlock(&ctl->tree_lock);
2080
2081 }
2082
2083 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2084                                u64 offset, u64 bytes, u64 empty_size)
2085 {
2086         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2087         struct btrfs_free_space *entry = NULL;
2088         u64 bytes_search = bytes + empty_size;
2089         u64 ret = 0;
2090
2091         spin_lock(&ctl->tree_lock);
2092         entry = find_free_space(ctl, &offset, &bytes_search);
2093         if (!entry)
2094                 goto out;
2095
2096         ret = offset;
2097         if (entry->bitmap) {
2098                 bitmap_clear_bits(ctl, entry, offset, bytes);
2099                 if (!entry->bytes)
2100                         free_bitmap(ctl, entry);
2101         } else {
2102                 unlink_free_space(ctl, entry);
2103                 entry->offset += bytes;
2104                 entry->bytes -= bytes;
2105                 if (!entry->bytes)
2106                         kmem_cache_free(btrfs_free_space_cachep, entry);
2107                 else
2108                         link_free_space(ctl, entry);
2109         }
2110
2111 out:
2112         spin_unlock(&ctl->tree_lock);
2113
2114         return ret;
2115 }
2116
2117 /*
2118  * given a cluster, put all of its extents back into the free space
2119  * cache.  If a block group is passed, this function will only free
2120  * a cluster that belongs to the passed block group.
2121  *
2122  * Otherwise, it'll get a reference on the block group pointed to by the
2123  * cluster and remove the cluster from it.
2124  */
2125 int btrfs_return_cluster_to_free_space(
2126                                struct btrfs_block_group_cache *block_group,
2127                                struct btrfs_free_cluster *cluster)
2128 {
2129         struct btrfs_free_space_ctl *ctl;
2130         int ret;
2131
2132         /* first, get a safe pointer to the block group */
2133         spin_lock(&cluster->lock);
2134         if (!block_group) {
2135                 block_group = cluster->block_group;
2136                 if (!block_group) {
2137                         spin_unlock(&cluster->lock);
2138                         return 0;
2139                 }
2140         } else if (cluster->block_group != block_group) {
2141                 /* someone else has already freed it don't redo their work */
2142                 spin_unlock(&cluster->lock);
2143                 return 0;
2144         }
2145         atomic_inc(&block_group->count);
2146         spin_unlock(&cluster->lock);
2147
2148         ctl = block_group->free_space_ctl;
2149
2150         /* now return any extents the cluster had on it */
2151         spin_lock(&ctl->tree_lock);
2152         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2153         spin_unlock(&ctl->tree_lock);
2154
2155         /* finally drop our ref */
2156         btrfs_put_block_group(block_group);
2157         return ret;
2158 }
2159
2160 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2161                                    struct btrfs_free_cluster *cluster,
2162                                    struct btrfs_free_space *entry,
2163                                    u64 bytes, u64 min_start)
2164 {
2165         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2166         int err;
2167         u64 search_start = cluster->window_start;
2168         u64 search_bytes = bytes;
2169         u64 ret = 0;
2170
2171         search_start = min_start;
2172         search_bytes = bytes;
2173
2174         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2175         if (err)
2176                 return 0;
2177
2178         ret = search_start;
2179         __bitmap_clear_bits(ctl, entry, ret, bytes);
2180
2181         return ret;
2182 }
2183
2184 /*
2185  * given a cluster, try to allocate 'bytes' from it, returns 0
2186  * if it couldn't find anything suitably large, or a logical disk offset
2187  * if things worked out
2188  */
2189 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2190                              struct btrfs_free_cluster *cluster, u64 bytes,
2191                              u64 min_start)
2192 {
2193         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2194         struct btrfs_free_space *entry = NULL;
2195         struct rb_node *node;
2196         u64 ret = 0;
2197
2198         spin_lock(&cluster->lock);
2199         if (bytes > cluster->max_size)
2200                 goto out;
2201
2202         if (cluster->block_group != block_group)
2203                 goto out;
2204
2205         node = rb_first(&cluster->root);
2206         if (!node)
2207                 goto out;
2208
2209         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2210         while(1) {
2211                 if (entry->bytes < bytes ||
2212                     (!entry->bitmap && entry->offset < min_start)) {
2213                         node = rb_next(&entry->offset_index);
2214                         if (!node)
2215                                 break;
2216                         entry = rb_entry(node, struct btrfs_free_space,
2217                                          offset_index);
2218                         continue;
2219                 }
2220
2221                 if (entry->bitmap) {
2222                         ret = btrfs_alloc_from_bitmap(block_group,
2223                                                       cluster, entry, bytes,
2224                                                       min_start);
2225                         if (ret == 0) {
2226                                 node = rb_next(&entry->offset_index);
2227                                 if (!node)
2228                                         break;
2229                                 entry = rb_entry(node, struct btrfs_free_space,
2230                                                  offset_index);
2231                                 continue;
2232                         }
2233                 } else {
2234                         ret = entry->offset;
2235
2236                         entry->offset += bytes;
2237                         entry->bytes -= bytes;
2238                 }
2239
2240                 if (entry->bytes == 0)
2241                         rb_erase(&entry->offset_index, &cluster->root);
2242                 break;
2243         }
2244 out:
2245         spin_unlock(&cluster->lock);
2246
2247         if (!ret)
2248                 return 0;
2249
2250         spin_lock(&ctl->tree_lock);
2251
2252         ctl->free_space -= bytes;
2253         if (entry->bytes == 0) {
2254                 ctl->free_extents--;
2255                 if (entry->bitmap) {
2256                         kfree(entry->bitmap);
2257                         ctl->total_bitmaps--;
2258                         ctl->op->recalc_thresholds(ctl);
2259                 }
2260                 kmem_cache_free(btrfs_free_space_cachep, entry);
2261         }
2262
2263         spin_unlock(&ctl->tree_lock);
2264
2265         return ret;
2266 }
2267
2268 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2269                                 struct btrfs_free_space *entry,
2270                                 struct btrfs_free_cluster *cluster,
2271                                 u64 offset, u64 bytes, u64 min_bytes)
2272 {
2273         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2274         unsigned long next_zero;
2275         unsigned long i;
2276         unsigned long search_bits;
2277         unsigned long total_bits;
2278         unsigned long found_bits;
2279         unsigned long start = 0;
2280         unsigned long total_found = 0;
2281         int ret;
2282         bool found = false;
2283
2284         i = offset_to_bit(entry->offset, block_group->sectorsize,
2285                           max_t(u64, offset, entry->offset));
2286         search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2287         total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2288
2289 again:
2290         found_bits = 0;
2291         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2292              i < BITS_PER_BITMAP;
2293              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2294                 next_zero = find_next_zero_bit(entry->bitmap,
2295                                                BITS_PER_BITMAP, i);
2296                 if (next_zero - i >= search_bits) {
2297                         found_bits = next_zero - i;
2298                         break;
2299                 }
2300                 i = next_zero;
2301         }
2302
2303         if (!found_bits)
2304                 return -ENOSPC;
2305
2306         if (!found) {
2307                 start = i;
2308                 found = true;
2309         }
2310
2311         total_found += found_bits;
2312
2313         if (cluster->max_size < found_bits * block_group->sectorsize)
2314                 cluster->max_size = found_bits * block_group->sectorsize;
2315
2316         if (total_found < total_bits) {
2317                 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2318                 if (i - start > total_bits * 2) {
2319                         total_found = 0;
2320                         cluster->max_size = 0;
2321                         found = false;
2322                 }
2323                 goto again;
2324         }
2325
2326         cluster->window_start = start * block_group->sectorsize +
2327                 entry->offset;
2328         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2329         ret = tree_insert_offset(&cluster->root, entry->offset,
2330                                  &entry->offset_index, 1);
2331         BUG_ON(ret);
2332
2333         return 0;
2334 }
2335
2336 /*
2337  * This searches the block group for just extents to fill the cluster with.
2338  */
2339 static noinline int
2340 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2341                         struct btrfs_free_cluster *cluster,
2342                         struct list_head *bitmaps, u64 offset, u64 bytes,
2343                         u64 min_bytes)
2344 {
2345         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2346         struct btrfs_free_space *first = NULL;
2347         struct btrfs_free_space *entry = NULL;
2348         struct btrfs_free_space *prev = NULL;
2349         struct btrfs_free_space *last;
2350         struct rb_node *node;
2351         u64 window_start;
2352         u64 window_free;
2353         u64 max_extent;
2354         u64 max_gap = 128 * 1024;
2355
2356         entry = tree_search_offset(ctl, offset, 0, 1);
2357         if (!entry)
2358                 return -ENOSPC;
2359
2360         /*
2361          * We don't want bitmaps, so just move along until we find a normal
2362          * extent entry.
2363          */
2364         while (entry->bitmap) {
2365                 if (list_empty(&entry->list))
2366                         list_add_tail(&entry->list, bitmaps);
2367                 node = rb_next(&entry->offset_index);
2368                 if (!node)
2369                         return -ENOSPC;
2370                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2371         }
2372
2373         window_start = entry->offset;
2374         window_free = entry->bytes;
2375         max_extent = entry->bytes;
2376         first = entry;
2377         last = entry;
2378         prev = entry;
2379
2380         while (window_free <= min_bytes) {
2381                 node = rb_next(&entry->offset_index);
2382                 if (!node)
2383                         return -ENOSPC;
2384                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2385
2386                 if (entry->bitmap) {
2387                         if (list_empty(&entry->list))
2388                                 list_add_tail(&entry->list, bitmaps);
2389                         continue;
2390                 }
2391
2392                 /*
2393                  * we haven't filled the empty size and the window is
2394                  * very large.  reset and try again
2395                  */
2396                 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2397                     entry->offset - window_start > (min_bytes * 2)) {
2398                         first = entry;
2399                         window_start = entry->offset;
2400                         window_free = entry->bytes;
2401                         last = entry;
2402                         max_extent = entry->bytes;
2403                 } else {
2404                         last = entry;
2405                         window_free += entry->bytes;
2406                         if (entry->bytes > max_extent)
2407                                 max_extent = entry->bytes;
2408                 }
2409                 prev = entry;
2410         }
2411
2412         cluster->window_start = first->offset;
2413
2414         node = &first->offset_index;
2415
2416         /*
2417          * now we've found our entries, pull them out of the free space
2418          * cache and put them into the cluster rbtree
2419          */
2420         do {
2421                 int ret;
2422
2423                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2424                 node = rb_next(&entry->offset_index);
2425                 if (entry->bitmap)
2426                         continue;
2427
2428                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2429                 ret = tree_insert_offset(&cluster->root, entry->offset,
2430                                          &entry->offset_index, 0);
2431                 BUG_ON(ret);
2432         } while (node && entry != last);
2433
2434         cluster->max_size = max_extent;
2435
2436         return 0;
2437 }
2438
2439 /*
2440  * This specifically looks for bitmaps that may work in the cluster, we assume
2441  * that we have already failed to find extents that will work.
2442  */
2443 static noinline int
2444 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2445                      struct btrfs_free_cluster *cluster,
2446                      struct list_head *bitmaps, u64 offset, u64 bytes,
2447                      u64 min_bytes)
2448 {
2449         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2450         struct btrfs_free_space *entry;
2451         struct rb_node *node;
2452         int ret = -ENOSPC;
2453
2454         if (ctl->total_bitmaps == 0)
2455                 return -ENOSPC;
2456
2457         /*
2458          * First check our cached list of bitmaps and see if there is an entry
2459          * here that will work.
2460          */
2461         list_for_each_entry(entry, bitmaps, list) {
2462                 if (entry->bytes < min_bytes)
2463                         continue;
2464                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2465                                            bytes, min_bytes);
2466                 if (!ret)
2467                         return 0;
2468         }
2469
2470         /*
2471          * If we do have entries on our list and we are here then we didn't find
2472          * anything, so go ahead and get the next entry after the last entry in
2473          * this list and start the search from there.
2474          */
2475         if (!list_empty(bitmaps)) {
2476                 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2477                                    list);
2478                 node = rb_next(&entry->offset_index);
2479                 if (!node)
2480                         return -ENOSPC;
2481                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2482                 goto search;
2483         }
2484
2485         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2486         if (!entry)
2487                 return -ENOSPC;
2488
2489 search:
2490         node = &entry->offset_index;
2491         do {
2492                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2493                 node = rb_next(&entry->offset_index);
2494                 if (!entry->bitmap)
2495                         continue;
2496                 if (entry->bytes < min_bytes)
2497                         continue;
2498                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2499                                            bytes, min_bytes);
2500         } while (ret && node);
2501
2502         return ret;
2503 }
2504
2505 /*
2506  * here we try to find a cluster of blocks in a block group.  The goal
2507  * is to find at least bytes free and up to empty_size + bytes free.
2508  * We might not find them all in one contiguous area.
2509  *
2510  * returns zero and sets up cluster if things worked out, otherwise
2511  * it returns -enospc
2512  */
2513 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2514                              struct btrfs_root *root,
2515                              struct btrfs_block_group_cache *block_group,
2516                              struct btrfs_free_cluster *cluster,
2517                              u64 offset, u64 bytes, u64 empty_size)
2518 {
2519         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2520         struct list_head bitmaps;
2521         struct btrfs_free_space *entry, *tmp;
2522         u64 min_bytes;
2523         int ret;
2524
2525         /* for metadata, allow allocates with more holes */
2526         if (btrfs_test_opt(root, SSD_SPREAD)) {
2527                 min_bytes = bytes + empty_size;
2528         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2529                 /*
2530                  * we want to do larger allocations when we are
2531                  * flushing out the delayed refs, it helps prevent
2532                  * making more work as we go along.
2533                  */
2534                 if (trans->transaction->delayed_refs.flushing)
2535                         min_bytes = max(bytes, (bytes + empty_size) >> 1);
2536                 else
2537                         min_bytes = max(bytes, (bytes + empty_size) >> 4);
2538         } else
2539                 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2540
2541         spin_lock(&ctl->tree_lock);
2542
2543         /*
2544          * If we know we don't have enough space to make a cluster don't even
2545          * bother doing all the work to try and find one.
2546          */
2547         if (ctl->free_space < min_bytes) {
2548                 spin_unlock(&ctl->tree_lock);
2549                 return -ENOSPC;
2550         }
2551
2552         spin_lock(&cluster->lock);
2553
2554         /* someone already found a cluster, hooray */
2555         if (cluster->block_group) {
2556                 ret = 0;
2557                 goto out;
2558         }
2559
2560         INIT_LIST_HEAD(&bitmaps);
2561         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2562                                       bytes, min_bytes);
2563         if (ret)
2564                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2565                                            offset, bytes, min_bytes);
2566
2567         /* Clear our temporary list */
2568         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2569                 list_del_init(&entry->list);
2570
2571         if (!ret) {
2572                 atomic_inc(&block_group->count);
2573                 list_add_tail(&cluster->block_group_list,
2574                               &block_group->cluster_list);
2575                 cluster->block_group = block_group;
2576         }
2577 out:
2578         spin_unlock(&cluster->lock);
2579         spin_unlock(&ctl->tree_lock);
2580
2581         return ret;
2582 }
2583
2584 /*
2585  * simple code to zero out a cluster
2586  */
2587 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2588 {
2589         spin_lock_init(&cluster->lock);
2590         spin_lock_init(&cluster->refill_lock);
2591         cluster->root = RB_ROOT;
2592         cluster->max_size = 0;
2593         INIT_LIST_HEAD(&cluster->block_group_list);
2594         cluster->block_group = NULL;
2595 }
2596
2597 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2598                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2599 {
2600         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2601         struct btrfs_free_space *entry = NULL;
2602         struct btrfs_fs_info *fs_info = block_group->fs_info;
2603         u64 bytes = 0;
2604         u64 actually_trimmed;
2605         int ret = 0;
2606
2607         *trimmed = 0;
2608
2609         while (start < end) {
2610                 spin_lock(&ctl->tree_lock);
2611
2612                 if (ctl->free_space < minlen) {
2613                         spin_unlock(&ctl->tree_lock);
2614                         break;
2615                 }
2616
2617                 entry = tree_search_offset(ctl, start, 0, 1);
2618                 if (!entry)
2619                         entry = tree_search_offset(ctl,
2620                                                    offset_to_bitmap(ctl, start),
2621                                                    1, 1);
2622
2623                 if (!entry || entry->offset >= end) {
2624                         spin_unlock(&ctl->tree_lock);
2625                         break;
2626                 }
2627
2628                 if (entry->bitmap) {
2629                         ret = search_bitmap(ctl, entry, &start, &bytes);
2630                         if (!ret) {
2631                                 if (start >= end) {
2632                                         spin_unlock(&ctl->tree_lock);
2633                                         break;
2634                                 }
2635                                 bytes = min(bytes, end - start);
2636                                 bitmap_clear_bits(ctl, entry, start, bytes);
2637                                 if (entry->bytes == 0)
2638                                         free_bitmap(ctl, entry);
2639                         } else {
2640                                 start = entry->offset + BITS_PER_BITMAP *
2641                                         block_group->sectorsize;
2642                                 spin_unlock(&ctl->tree_lock);
2643                                 ret = 0;
2644                                 continue;
2645                         }
2646                 } else {
2647                         start = entry->offset;
2648                         bytes = min(entry->bytes, end - start);
2649                         unlink_free_space(ctl, entry);
2650                         kmem_cache_free(btrfs_free_space_cachep, entry);
2651                 }
2652
2653                 spin_unlock(&ctl->tree_lock);
2654
2655                 if (bytes >= minlen) {
2656                         struct btrfs_space_info *space_info;
2657                         int update = 0;
2658
2659                         space_info = block_group->space_info;
2660                         spin_lock(&space_info->lock);
2661                         spin_lock(&block_group->lock);
2662                         if (!block_group->ro) {
2663                                 block_group->reserved += bytes;
2664                                 space_info->bytes_reserved += bytes;
2665                                 update = 1;
2666                         }
2667                         spin_unlock(&block_group->lock);
2668                         spin_unlock(&space_info->lock);
2669
2670                         ret = btrfs_error_discard_extent(fs_info->extent_root,
2671                                                          start,
2672                                                          bytes,
2673                                                          &actually_trimmed);
2674
2675                         btrfs_add_free_space(block_group, start, bytes);
2676                         if (update) {
2677                                 spin_lock(&space_info->lock);
2678                                 spin_lock(&block_group->lock);
2679                                 if (block_group->ro)
2680                                         space_info->bytes_readonly += bytes;
2681                                 block_group->reserved -= bytes;
2682                                 space_info->bytes_reserved -= bytes;
2683                                 spin_unlock(&space_info->lock);
2684                                 spin_unlock(&block_group->lock);
2685                         }
2686
2687                         if (ret)
2688                                 break;
2689                         *trimmed += actually_trimmed;
2690                 }
2691                 start += bytes;
2692                 bytes = 0;
2693
2694                 if (fatal_signal_pending(current)) {
2695                         ret = -ERESTARTSYS;
2696                         break;
2697                 }
2698
2699                 cond_resched();
2700         }
2701
2702         return ret;
2703 }
2704
2705 /*
2706  * Find the left-most item in the cache tree, and then return the
2707  * smallest inode number in the item.
2708  *
2709  * Note: the returned inode number may not be the smallest one in
2710  * the tree, if the left-most item is a bitmap.
2711  */
2712 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2713 {
2714         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2715         struct btrfs_free_space *entry = NULL;
2716         u64 ino = 0;
2717
2718         spin_lock(&ctl->tree_lock);
2719
2720         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2721                 goto out;
2722
2723         entry = rb_entry(rb_first(&ctl->free_space_offset),
2724                          struct btrfs_free_space, offset_index);
2725
2726         if (!entry->bitmap) {
2727                 ino = entry->offset;
2728
2729                 unlink_free_space(ctl, entry);
2730                 entry->offset++;
2731                 entry->bytes--;
2732                 if (!entry->bytes)
2733                         kmem_cache_free(btrfs_free_space_cachep, entry);
2734                 else
2735                         link_free_space(ctl, entry);
2736         } else {
2737                 u64 offset = 0;
2738                 u64 count = 1;
2739                 int ret;
2740
2741                 ret = search_bitmap(ctl, entry, &offset, &count);
2742                 BUG_ON(ret);
2743
2744                 ino = offset;
2745                 bitmap_clear_bits(ctl, entry, offset, 1);
2746                 if (entry->bytes == 0)
2747                         free_bitmap(ctl, entry);
2748         }
2749 out:
2750         spin_unlock(&ctl->tree_lock);
2751
2752         return ino;
2753 }
2754
2755 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2756                                     struct btrfs_path *path)
2757 {
2758         struct inode *inode = NULL;
2759
2760         spin_lock(&root->cache_lock);
2761         if (root->cache_inode)
2762                 inode = igrab(root->cache_inode);
2763         spin_unlock(&root->cache_lock);
2764         if (inode)
2765                 return inode;
2766
2767         inode = __lookup_free_space_inode(root, path, 0);
2768         if (IS_ERR(inode))
2769                 return inode;
2770
2771         spin_lock(&root->cache_lock);
2772         if (!btrfs_fs_closing(root->fs_info))
2773                 root->cache_inode = igrab(inode);
2774         spin_unlock(&root->cache_lock);
2775
2776         return inode;
2777 }
2778
2779 int create_free_ino_inode(struct btrfs_root *root,
2780                           struct btrfs_trans_handle *trans,
2781                           struct btrfs_path *path)
2782 {
2783         return __create_free_space_inode(root, trans, path,
2784                                          BTRFS_FREE_INO_OBJECTID, 0);
2785 }
2786
2787 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2788 {
2789         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2790         struct btrfs_path *path;
2791         struct inode *inode;
2792         int ret = 0;
2793         u64 root_gen = btrfs_root_generation(&root->root_item);
2794
2795         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2796                 return 0;
2797
2798         /*
2799          * If we're unmounting then just return, since this does a search on the
2800          * normal root and not the commit root and we could deadlock.
2801          */
2802         if (btrfs_fs_closing(fs_info))
2803                 return 0;
2804
2805         path = btrfs_alloc_path();
2806         if (!path)
2807                 return 0;
2808
2809         inode = lookup_free_ino_inode(root, path);
2810         if (IS_ERR(inode))
2811                 goto out;
2812
2813         if (root_gen != BTRFS_I(inode)->generation)
2814                 goto out_put;
2815
2816         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2817
2818         if (ret < 0)
2819                 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2820                        "root %llu\n", root->root_key.objectid);
2821 out_put:
2822         iput(inode);
2823 out:
2824         btrfs_free_path(path);
2825         return ret;
2826 }
2827
2828 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2829                               struct btrfs_trans_handle *trans,
2830                               struct btrfs_path *path)
2831 {
2832         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2833         struct inode *inode;
2834         int ret;
2835
2836         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2837                 return 0;
2838
2839         inode = lookup_free_ino_inode(root, path);
2840         if (IS_ERR(inode))
2841                 return 0;
2842
2843         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2844         if (ret) {
2845                 btrfs_delalloc_release_metadata(inode, inode->i_size);
2846 #ifdef DEBUG
2847                 printk(KERN_ERR "btrfs: failed to write free ino cache "
2848                        "for root %llu\n", root->root_key.objectid);
2849 #endif
2850         }
2851
2852         iput(inode);
2853         return ret;
2854 }