Merge branch 'fixes' of http://ftp.arm.linux.org.uk/pub/linux/arm/kernel/git-cur...
[pandora-kernel.git] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/math64.h>
23 #include <linux/ratelimit.h>
24 #include "ctree.h"
25 #include "free-space-cache.h"
26 #include "transaction.h"
27 #include "disk-io.h"
28 #include "extent_io.h"
29 #include "inode-map.h"
30
31 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
32 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
33
34 static int link_free_space(struct btrfs_free_space_ctl *ctl,
35                            struct btrfs_free_space *info);
36
37 static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
38                                                struct btrfs_path *path,
39                                                u64 offset)
40 {
41         struct btrfs_key key;
42         struct btrfs_key location;
43         struct btrfs_disk_key disk_key;
44         struct btrfs_free_space_header *header;
45         struct extent_buffer *leaf;
46         struct inode *inode = NULL;
47         int ret;
48
49         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
50         key.offset = offset;
51         key.type = 0;
52
53         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54         if (ret < 0)
55                 return ERR_PTR(ret);
56         if (ret > 0) {
57                 btrfs_release_path(path);
58                 return ERR_PTR(-ENOENT);
59         }
60
61         leaf = path->nodes[0];
62         header = btrfs_item_ptr(leaf, path->slots[0],
63                                 struct btrfs_free_space_header);
64         btrfs_free_space_key(leaf, header, &disk_key);
65         btrfs_disk_key_to_cpu(&location, &disk_key);
66         btrfs_release_path(path);
67
68         inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
69         if (!inode)
70                 return ERR_PTR(-ENOENT);
71         if (IS_ERR(inode))
72                 return inode;
73         if (is_bad_inode(inode)) {
74                 iput(inode);
75                 return ERR_PTR(-ENOENT);
76         }
77
78         inode->i_mapping->flags &= ~__GFP_FS;
79
80         return inode;
81 }
82
83 struct inode *lookup_free_space_inode(struct btrfs_root *root,
84                                       struct btrfs_block_group_cache
85                                       *block_group, struct btrfs_path *path)
86 {
87         struct inode *inode = NULL;
88         u32 flags = BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
89
90         spin_lock(&block_group->lock);
91         if (block_group->inode)
92                 inode = igrab(block_group->inode);
93         spin_unlock(&block_group->lock);
94         if (inode)
95                 return inode;
96
97         inode = __lookup_free_space_inode(root, path,
98                                           block_group->key.objectid);
99         if (IS_ERR(inode))
100                 return inode;
101
102         spin_lock(&block_group->lock);
103         if (!((BTRFS_I(inode)->flags & flags) == flags)) {
104                 printk(KERN_INFO "Old style space inode found, converting.\n");
105                 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM |
106                         BTRFS_INODE_NODATACOW;
107                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
108         }
109
110         if (!block_group->iref) {
111                 block_group->inode = igrab(inode);
112                 block_group->iref = 1;
113         }
114         spin_unlock(&block_group->lock);
115
116         return inode;
117 }
118
119 int __create_free_space_inode(struct btrfs_root *root,
120                               struct btrfs_trans_handle *trans,
121                               struct btrfs_path *path, u64 ino, u64 offset)
122 {
123         struct btrfs_key key;
124         struct btrfs_disk_key disk_key;
125         struct btrfs_free_space_header *header;
126         struct btrfs_inode_item *inode_item;
127         struct extent_buffer *leaf;
128         u64 flags = BTRFS_INODE_NOCOMPRESS | BTRFS_INODE_PREALLOC;
129         int ret;
130
131         ret = btrfs_insert_empty_inode(trans, root, path, ino);
132         if (ret)
133                 return ret;
134
135         /* We inline crc's for the free disk space cache */
136         if (ino != BTRFS_FREE_INO_OBJECTID)
137                 flags |= BTRFS_INODE_NODATASUM | BTRFS_INODE_NODATACOW;
138
139         leaf = path->nodes[0];
140         inode_item = btrfs_item_ptr(leaf, path->slots[0],
141                                     struct btrfs_inode_item);
142         btrfs_item_key(leaf, &disk_key, path->slots[0]);
143         memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
144                              sizeof(*inode_item));
145         btrfs_set_inode_generation(leaf, inode_item, trans->transid);
146         btrfs_set_inode_size(leaf, inode_item, 0);
147         btrfs_set_inode_nbytes(leaf, inode_item, 0);
148         btrfs_set_inode_uid(leaf, inode_item, 0);
149         btrfs_set_inode_gid(leaf, inode_item, 0);
150         btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
151         btrfs_set_inode_flags(leaf, inode_item, flags);
152         btrfs_set_inode_nlink(leaf, inode_item, 1);
153         btrfs_set_inode_transid(leaf, inode_item, trans->transid);
154         btrfs_set_inode_block_group(leaf, inode_item, offset);
155         btrfs_mark_buffer_dirty(leaf);
156         btrfs_release_path(path);
157
158         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
159         key.offset = offset;
160         key.type = 0;
161
162         ret = btrfs_insert_empty_item(trans, root, path, &key,
163                                       sizeof(struct btrfs_free_space_header));
164         if (ret < 0) {
165                 btrfs_release_path(path);
166                 return ret;
167         }
168         leaf = path->nodes[0];
169         header = btrfs_item_ptr(leaf, path->slots[0],
170                                 struct btrfs_free_space_header);
171         memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
172         btrfs_set_free_space_key(leaf, header, &disk_key);
173         btrfs_mark_buffer_dirty(leaf);
174         btrfs_release_path(path);
175
176         return 0;
177 }
178
179 int create_free_space_inode(struct btrfs_root *root,
180                             struct btrfs_trans_handle *trans,
181                             struct btrfs_block_group_cache *block_group,
182                             struct btrfs_path *path)
183 {
184         int ret;
185         u64 ino;
186
187         ret = btrfs_find_free_objectid(root, &ino);
188         if (ret < 0)
189                 return ret;
190
191         return __create_free_space_inode(root, trans, path, ino,
192                                          block_group->key.objectid);
193 }
194
195 int btrfs_truncate_free_space_cache(struct btrfs_root *root,
196                                     struct btrfs_trans_handle *trans,
197                                     struct btrfs_path *path,
198                                     struct inode *inode)
199 {
200         struct btrfs_block_rsv *rsv;
201         u64 needed_bytes;
202         loff_t oldsize;
203         int ret = 0;
204
205         rsv = trans->block_rsv;
206         trans->block_rsv = &root->fs_info->global_block_rsv;
207
208         /* 1 for slack space, 1 for updating the inode */
209         needed_bytes = btrfs_calc_trunc_metadata_size(root, 1) +
210                 btrfs_calc_trans_metadata_size(root, 1);
211
212         spin_lock(&trans->block_rsv->lock);
213         if (trans->block_rsv->reserved < needed_bytes) {
214                 spin_unlock(&trans->block_rsv->lock);
215                 trans->block_rsv = rsv;
216                 return -ENOSPC;
217         }
218         spin_unlock(&trans->block_rsv->lock);
219
220         oldsize = i_size_read(inode);
221         btrfs_i_size_write(inode, 0);
222         truncate_pagecache(inode, oldsize, 0);
223
224         /*
225          * We don't need an orphan item because truncating the free space cache
226          * will never be split across transactions.
227          */
228         ret = btrfs_truncate_inode_items(trans, root, inode,
229                                          0, BTRFS_EXTENT_DATA_KEY);
230
231         if (ret) {
232                 trans->block_rsv = rsv;
233                 WARN_ON(1);
234                 return ret;
235         }
236
237         ret = btrfs_update_inode(trans, root, inode);
238         trans->block_rsv = rsv;
239
240         return ret;
241 }
242
243 static int readahead_cache(struct inode *inode)
244 {
245         struct file_ra_state *ra;
246         unsigned long last_index;
247
248         ra = kzalloc(sizeof(*ra), GFP_NOFS);
249         if (!ra)
250                 return -ENOMEM;
251
252         file_ra_state_init(ra, inode->i_mapping);
253         last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
254
255         page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
256
257         kfree(ra);
258
259         return 0;
260 }
261
262 struct io_ctl {
263         void *cur, *orig;
264         struct page *page;
265         struct page **pages;
266         struct btrfs_root *root;
267         unsigned long size;
268         int index;
269         int num_pages;
270         unsigned check_crcs:1;
271 };
272
273 static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode,
274                        struct btrfs_root *root)
275 {
276         memset(io_ctl, 0, sizeof(struct io_ctl));
277         io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
278                 PAGE_CACHE_SHIFT;
279         io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages,
280                                 GFP_NOFS);
281         if (!io_ctl->pages)
282                 return -ENOMEM;
283         io_ctl->root = root;
284         if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
285                 io_ctl->check_crcs = 1;
286         return 0;
287 }
288
289 static void io_ctl_free(struct io_ctl *io_ctl)
290 {
291         kfree(io_ctl->pages);
292 }
293
294 static void io_ctl_unmap_page(struct io_ctl *io_ctl)
295 {
296         if (io_ctl->cur) {
297                 kunmap(io_ctl->page);
298                 io_ctl->cur = NULL;
299                 io_ctl->orig = NULL;
300         }
301 }
302
303 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
304 {
305         WARN_ON(io_ctl->cur);
306         BUG_ON(io_ctl->index >= io_ctl->num_pages);
307         io_ctl->page = io_ctl->pages[io_ctl->index++];
308         io_ctl->cur = kmap(io_ctl->page);
309         io_ctl->orig = io_ctl->cur;
310         io_ctl->size = PAGE_CACHE_SIZE;
311         if (clear)
312                 memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
313 }
314
315 static void io_ctl_drop_pages(struct io_ctl *io_ctl)
316 {
317         int i;
318
319         io_ctl_unmap_page(io_ctl);
320
321         for (i = 0; i < io_ctl->num_pages; i++) {
322                 ClearPageChecked(io_ctl->pages[i]);
323                 unlock_page(io_ctl->pages[i]);
324                 page_cache_release(io_ctl->pages[i]);
325         }
326 }
327
328 static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode,
329                                 int uptodate)
330 {
331         struct page *page;
332         gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
333         int i;
334
335         for (i = 0; i < io_ctl->num_pages; i++) {
336                 page = find_or_create_page(inode->i_mapping, i, mask);
337                 if (!page) {
338                         io_ctl_drop_pages(io_ctl);
339                         return -ENOMEM;
340                 }
341                 io_ctl->pages[i] = page;
342                 if (uptodate && !PageUptodate(page)) {
343                         btrfs_readpage(NULL, page);
344                         lock_page(page);
345                         if (!PageUptodate(page)) {
346                                 printk(KERN_ERR "btrfs: error reading free "
347                                        "space cache\n");
348                                 io_ctl_drop_pages(io_ctl);
349                                 return -EIO;
350                         }
351                 }
352         }
353
354         return 0;
355 }
356
357 static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation)
358 {
359         u64 *val;
360
361         io_ctl_map_page(io_ctl, 1);
362
363         /*
364          * Skip the csum areas.  If we don't check crcs then we just have a
365          * 64bit chunk at the front of the first page.
366          */
367         if (io_ctl->check_crcs) {
368                 io_ctl->cur += (sizeof(u32) * io_ctl->num_pages);
369                 io_ctl->size -= sizeof(u64) + (sizeof(u32) * io_ctl->num_pages);
370         } else {
371                 io_ctl->cur += sizeof(u64);
372                 io_ctl->size -= sizeof(u64) * 2;
373         }
374
375         val = io_ctl->cur;
376         *val = cpu_to_le64(generation);
377         io_ctl->cur += sizeof(u64);
378 }
379
380 static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation)
381 {
382         u64 *gen;
383
384         /*
385          * Skip the crc area.  If we don't check crcs then we just have a 64bit
386          * chunk at the front of the first page.
387          */
388         if (io_ctl->check_crcs) {
389                 io_ctl->cur += sizeof(u32) * io_ctl->num_pages;
390                 io_ctl->size -= sizeof(u64) +
391                         (sizeof(u32) * io_ctl->num_pages);
392         } else {
393                 io_ctl->cur += sizeof(u64);
394                 io_ctl->size -= sizeof(u64) * 2;
395         }
396
397         gen = io_ctl->cur;
398         if (le64_to_cpu(*gen) != generation) {
399                 printk_ratelimited(KERN_ERR "btrfs: space cache generation "
400                                    "(%Lu) does not match inode (%Lu)\n", *gen,
401                                    generation);
402                 io_ctl_unmap_page(io_ctl);
403                 return -EIO;
404         }
405         io_ctl->cur += sizeof(u64);
406         return 0;
407 }
408
409 static void io_ctl_set_crc(struct io_ctl *io_ctl, int index)
410 {
411         u32 *tmp;
412         u32 crc = ~(u32)0;
413         unsigned offset = 0;
414
415         if (!io_ctl->check_crcs) {
416                 io_ctl_unmap_page(io_ctl);
417                 return;
418         }
419
420         if (index == 0)
421                 offset = sizeof(u32) * io_ctl->num_pages;;
422
423         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
424                               PAGE_CACHE_SIZE - offset);
425         btrfs_csum_final(crc, (char *)&crc);
426         io_ctl_unmap_page(io_ctl);
427         tmp = kmap(io_ctl->pages[0]);
428         tmp += index;
429         *tmp = crc;
430         kunmap(io_ctl->pages[0]);
431 }
432
433 static int io_ctl_check_crc(struct io_ctl *io_ctl, int index)
434 {
435         u32 *tmp, val;
436         u32 crc = ~(u32)0;
437         unsigned offset = 0;
438
439         if (!io_ctl->check_crcs) {
440                 io_ctl_map_page(io_ctl, 0);
441                 return 0;
442         }
443
444         if (index == 0)
445                 offset = sizeof(u32) * io_ctl->num_pages;
446
447         tmp = kmap(io_ctl->pages[0]);
448         tmp += index;
449         val = *tmp;
450         kunmap(io_ctl->pages[0]);
451
452         io_ctl_map_page(io_ctl, 0);
453         crc = btrfs_csum_data(io_ctl->root, io_ctl->orig + offset, crc,
454                               PAGE_CACHE_SIZE - offset);
455         btrfs_csum_final(crc, (char *)&crc);
456         if (val != crc) {
457                 printk_ratelimited(KERN_ERR "btrfs: csum mismatch on free "
458                                    "space cache\n");
459                 io_ctl_unmap_page(io_ctl);
460                 return -EIO;
461         }
462
463         return 0;
464 }
465
466 static int io_ctl_add_entry(struct io_ctl *io_ctl, u64 offset, u64 bytes,
467                             void *bitmap)
468 {
469         struct btrfs_free_space_entry *entry;
470
471         if (!io_ctl->cur)
472                 return -ENOSPC;
473
474         entry = io_ctl->cur;
475         entry->offset = cpu_to_le64(offset);
476         entry->bytes = cpu_to_le64(bytes);
477         entry->type = (bitmap) ? BTRFS_FREE_SPACE_BITMAP :
478                 BTRFS_FREE_SPACE_EXTENT;
479         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
480         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
481
482         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
483                 return 0;
484
485         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
486
487         /* No more pages to map */
488         if (io_ctl->index >= io_ctl->num_pages)
489                 return 0;
490
491         /* map the next page */
492         io_ctl_map_page(io_ctl, 1);
493         return 0;
494 }
495
496 static int io_ctl_add_bitmap(struct io_ctl *io_ctl, void *bitmap)
497 {
498         if (!io_ctl->cur)
499                 return -ENOSPC;
500
501         /*
502          * If we aren't at the start of the current page, unmap this one and
503          * map the next one if there is any left.
504          */
505         if (io_ctl->cur != io_ctl->orig) {
506                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
507                 if (io_ctl->index >= io_ctl->num_pages)
508                         return -ENOSPC;
509                 io_ctl_map_page(io_ctl, 0);
510         }
511
512         memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
513         io_ctl_set_crc(io_ctl, io_ctl->index - 1);
514         if (io_ctl->index < io_ctl->num_pages)
515                 io_ctl_map_page(io_ctl, 0);
516         return 0;
517 }
518
519 static void io_ctl_zero_remaining_pages(struct io_ctl *io_ctl)
520 {
521         /*
522          * If we're not on the boundary we know we've modified the page and we
523          * need to crc the page.
524          */
525         if (io_ctl->cur != io_ctl->orig)
526                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
527         else
528                 io_ctl_unmap_page(io_ctl);
529
530         while (io_ctl->index < io_ctl->num_pages) {
531                 io_ctl_map_page(io_ctl, 1);
532                 io_ctl_set_crc(io_ctl, io_ctl->index - 1);
533         }
534 }
535
536 static int io_ctl_read_entry(struct io_ctl *io_ctl,
537                             struct btrfs_free_space *entry, u8 *type)
538 {
539         struct btrfs_free_space_entry *e;
540         int ret;
541
542         if (!io_ctl->cur) {
543                 ret = io_ctl_check_crc(io_ctl, io_ctl->index);
544                 if (ret)
545                         return ret;
546         }
547
548         e = io_ctl->cur;
549         entry->offset = le64_to_cpu(e->offset);
550         entry->bytes = le64_to_cpu(e->bytes);
551         *type = e->type;
552         io_ctl->cur += sizeof(struct btrfs_free_space_entry);
553         io_ctl->size -= sizeof(struct btrfs_free_space_entry);
554
555         if (io_ctl->size >= sizeof(struct btrfs_free_space_entry))
556                 return 0;
557
558         io_ctl_unmap_page(io_ctl);
559
560         return 0;
561 }
562
563 static int io_ctl_read_bitmap(struct io_ctl *io_ctl,
564                               struct btrfs_free_space *entry)
565 {
566         int ret;
567
568         ret = io_ctl_check_crc(io_ctl, io_ctl->index);
569         if (ret)
570                 return ret;
571
572         memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
573         io_ctl_unmap_page(io_ctl);
574
575         return 0;
576 }
577
578 int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
579                             struct btrfs_free_space_ctl *ctl,
580                             struct btrfs_path *path, u64 offset)
581 {
582         struct btrfs_free_space_header *header;
583         struct extent_buffer *leaf;
584         struct io_ctl io_ctl;
585         struct btrfs_key key;
586         struct btrfs_free_space *e, *n;
587         struct list_head bitmaps;
588         u64 num_entries;
589         u64 num_bitmaps;
590         u64 generation;
591         u8 type;
592         int ret = 0;
593
594         INIT_LIST_HEAD(&bitmaps);
595
596         /* Nothing in the space cache, goodbye */
597         if (!i_size_read(inode))
598                 return 0;
599
600         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
601         key.offset = offset;
602         key.type = 0;
603
604         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
605         if (ret < 0)
606                 return 0;
607         else if (ret > 0) {
608                 btrfs_release_path(path);
609                 return 0;
610         }
611
612         ret = -1;
613
614         leaf = path->nodes[0];
615         header = btrfs_item_ptr(leaf, path->slots[0],
616                                 struct btrfs_free_space_header);
617         num_entries = btrfs_free_space_entries(leaf, header);
618         num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
619         generation = btrfs_free_space_generation(leaf, header);
620         btrfs_release_path(path);
621
622         if (BTRFS_I(inode)->generation != generation) {
623                 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
624                        " not match free space cache generation (%llu)\n",
625                        (unsigned long long)BTRFS_I(inode)->generation,
626                        (unsigned long long)generation);
627                 return 0;
628         }
629
630         if (!num_entries)
631                 return 0;
632
633         io_ctl_init(&io_ctl, inode, root);
634         ret = readahead_cache(inode);
635         if (ret)
636                 goto out;
637
638         ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
639         if (ret)
640                 goto out;
641
642         ret = io_ctl_check_crc(&io_ctl, 0);
643         if (ret)
644                 goto free_cache;
645
646         ret = io_ctl_check_generation(&io_ctl, generation);
647         if (ret)
648                 goto free_cache;
649
650         while (num_entries) {
651                 e = kmem_cache_zalloc(btrfs_free_space_cachep,
652                                       GFP_NOFS);
653                 if (!e)
654                         goto free_cache;
655
656                 ret = io_ctl_read_entry(&io_ctl, e, &type);
657                 if (ret) {
658                         kmem_cache_free(btrfs_free_space_cachep, e);
659                         goto free_cache;
660                 }
661
662                 if (!e->bytes) {
663                         kmem_cache_free(btrfs_free_space_cachep, e);
664                         goto free_cache;
665                 }
666
667                 if (type == BTRFS_FREE_SPACE_EXTENT) {
668                         spin_lock(&ctl->tree_lock);
669                         ret = link_free_space(ctl, e);
670                         spin_unlock(&ctl->tree_lock);
671                         if (ret) {
672                                 printk(KERN_ERR "Duplicate entries in "
673                                        "free space cache, dumping\n");
674                                 kmem_cache_free(btrfs_free_space_cachep, e);
675                                 goto free_cache;
676                         }
677                 } else {
678                         BUG_ON(!num_bitmaps);
679                         num_bitmaps--;
680                         e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
681                         if (!e->bitmap) {
682                                 kmem_cache_free(
683                                         btrfs_free_space_cachep, e);
684                                 goto free_cache;
685                         }
686                         spin_lock(&ctl->tree_lock);
687                         ret = link_free_space(ctl, e);
688                         ctl->total_bitmaps++;
689                         ctl->op->recalc_thresholds(ctl);
690                         spin_unlock(&ctl->tree_lock);
691                         if (ret) {
692                                 printk(KERN_ERR "Duplicate entries in "
693                                        "free space cache, dumping\n");
694                                 kmem_cache_free(btrfs_free_space_cachep, e);
695                                 goto free_cache;
696                         }
697                         list_add_tail(&e->list, &bitmaps);
698                 }
699
700                 num_entries--;
701         }
702
703         io_ctl_unmap_page(&io_ctl);
704
705         /*
706          * We add the bitmaps at the end of the entries in order that
707          * the bitmap entries are added to the cache.
708          */
709         list_for_each_entry_safe(e, n, &bitmaps, list) {
710                 list_del_init(&e->list);
711                 ret = io_ctl_read_bitmap(&io_ctl, e);
712                 if (ret)
713                         goto free_cache;
714         }
715
716         io_ctl_drop_pages(&io_ctl);
717         ret = 1;
718 out:
719         io_ctl_free(&io_ctl);
720         return ret;
721 free_cache:
722         io_ctl_drop_pages(&io_ctl);
723         __btrfs_remove_free_space_cache(ctl);
724         goto out;
725 }
726
727 int load_free_space_cache(struct btrfs_fs_info *fs_info,
728                           struct btrfs_block_group_cache *block_group)
729 {
730         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
731         struct btrfs_root *root = fs_info->tree_root;
732         struct inode *inode;
733         struct btrfs_path *path;
734         int ret = 0;
735         bool matched;
736         u64 used = btrfs_block_group_used(&block_group->item);
737
738         /*
739          * If we're unmounting then just return, since this does a search on the
740          * normal root and not the commit root and we could deadlock.
741          */
742         if (btrfs_fs_closing(fs_info))
743                 return 0;
744
745         /*
746          * If this block group has been marked to be cleared for one reason or
747          * another then we can't trust the on disk cache, so just return.
748          */
749         spin_lock(&block_group->lock);
750         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
751                 spin_unlock(&block_group->lock);
752                 return 0;
753         }
754         spin_unlock(&block_group->lock);
755
756         path = btrfs_alloc_path();
757         if (!path)
758                 return 0;
759
760         inode = lookup_free_space_inode(root, block_group, path);
761         if (IS_ERR(inode)) {
762                 btrfs_free_path(path);
763                 return 0;
764         }
765
766         /* We may have converted the inode and made the cache invalid. */
767         spin_lock(&block_group->lock);
768         if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
769                 spin_unlock(&block_group->lock);
770                 goto out;
771         }
772         spin_unlock(&block_group->lock);
773
774         ret = __load_free_space_cache(fs_info->tree_root, inode, ctl,
775                                       path, block_group->key.objectid);
776         btrfs_free_path(path);
777         if (ret <= 0)
778                 goto out;
779
780         spin_lock(&ctl->tree_lock);
781         matched = (ctl->free_space == (block_group->key.offset - used -
782                                        block_group->bytes_super));
783         spin_unlock(&ctl->tree_lock);
784
785         if (!matched) {
786                 __btrfs_remove_free_space_cache(ctl);
787                 printk(KERN_ERR "block group %llu has an wrong amount of free "
788                        "space\n", block_group->key.objectid);
789                 ret = -1;
790         }
791 out:
792         if (ret < 0) {
793                 /* This cache is bogus, make sure it gets cleared */
794                 spin_lock(&block_group->lock);
795                 block_group->disk_cache_state = BTRFS_DC_CLEAR;
796                 spin_unlock(&block_group->lock);
797                 ret = 0;
798
799                 printk(KERN_ERR "btrfs: failed to load free space cache "
800                        "for block group %llu\n", block_group->key.objectid);
801         }
802
803         iput(inode);
804         return ret;
805 }
806
807 /**
808  * __btrfs_write_out_cache - write out cached info to an inode
809  * @root - the root the inode belongs to
810  * @ctl - the free space cache we are going to write out
811  * @block_group - the block_group for this cache if it belongs to a block_group
812  * @trans - the trans handle
813  * @path - the path to use
814  * @offset - the offset for the key we'll insert
815  *
816  * This function writes out a free space cache struct to disk for quick recovery
817  * on mount.  This will return 0 if it was successfull in writing the cache out,
818  * and -1 if it was not.
819  */
820 int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
821                             struct btrfs_free_space_ctl *ctl,
822                             struct btrfs_block_group_cache *block_group,
823                             struct btrfs_trans_handle *trans,
824                             struct btrfs_path *path, u64 offset)
825 {
826         struct btrfs_free_space_header *header;
827         struct extent_buffer *leaf;
828         struct rb_node *node;
829         struct list_head *pos, *n;
830         struct extent_state *cached_state = NULL;
831         struct btrfs_free_cluster *cluster = NULL;
832         struct extent_io_tree *unpin = NULL;
833         struct io_ctl io_ctl;
834         struct list_head bitmap_list;
835         struct btrfs_key key;
836         u64 start, end, len;
837         int entries = 0;
838         int bitmaps = 0;
839         int ret;
840         int err = -1;
841
842         INIT_LIST_HEAD(&bitmap_list);
843
844         if (!i_size_read(inode))
845                 return -1;
846
847         io_ctl_init(&io_ctl, inode, root);
848
849         /* Get the cluster for this block_group if it exists */
850         if (block_group && !list_empty(&block_group->cluster_list))
851                 cluster = list_entry(block_group->cluster_list.next,
852                                      struct btrfs_free_cluster,
853                                      block_group_list);
854
855         /*
856          * We shouldn't have switched the pinned extents yet so this is the
857          * right one
858          */
859         unpin = root->fs_info->pinned_extents;
860
861         /* Lock all pages first so we can lock the extent safely. */
862         io_ctl_prepare_pages(&io_ctl, inode, 0);
863
864         lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
865                          0, &cached_state, GFP_NOFS);
866
867         /*
868          * When searching for pinned extents, we need to start at our start
869          * offset.
870          */
871         if (block_group)
872                 start = block_group->key.objectid;
873
874         node = rb_first(&ctl->free_space_offset);
875         if (!node && cluster) {
876                 node = rb_first(&cluster->root);
877                 cluster = NULL;
878         }
879
880         /* Make sure we can fit our crcs into the first page */
881         if (io_ctl.check_crcs &&
882             (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) {
883                 WARN_ON(1);
884                 goto out_nospc;
885         }
886
887         io_ctl_set_generation(&io_ctl, trans->transid);
888
889         /* Write out the extent entries */
890         while (node) {
891                 struct btrfs_free_space *e;
892
893                 e = rb_entry(node, struct btrfs_free_space, offset_index);
894                 entries++;
895
896                 ret = io_ctl_add_entry(&io_ctl, e->offset, e->bytes,
897                                        e->bitmap);
898                 if (ret)
899                         goto out_nospc;
900
901                 if (e->bitmap) {
902                         list_add_tail(&e->list, &bitmap_list);
903                         bitmaps++;
904                 }
905                 node = rb_next(node);
906                 if (!node && cluster) {
907                         node = rb_first(&cluster->root);
908                         cluster = NULL;
909                 }
910         }
911
912         /*
913          * We want to add any pinned extents to our free space cache
914          * so we don't leak the space
915          */
916         while (block_group && (start < block_group->key.objectid +
917                                block_group->key.offset)) {
918                 ret = find_first_extent_bit(unpin, start, &start, &end,
919                                             EXTENT_DIRTY);
920                 if (ret) {
921                         ret = 0;
922                         break;
923                 }
924
925                 /* This pinned extent is out of our range */
926                 if (start >= block_group->key.objectid +
927                     block_group->key.offset)
928                         break;
929
930                 len = block_group->key.objectid +
931                         block_group->key.offset - start;
932                 len = min(len, end + 1 - start);
933
934                 entries++;
935                 ret = io_ctl_add_entry(&io_ctl, start, len, NULL);
936                 if (ret)
937                         goto out_nospc;
938
939                 start = end + 1;
940         }
941
942         /* Write out the bitmaps */
943         list_for_each_safe(pos, n, &bitmap_list) {
944                 struct btrfs_free_space *entry =
945                         list_entry(pos, struct btrfs_free_space, list);
946
947                 ret = io_ctl_add_bitmap(&io_ctl, entry->bitmap);
948                 if (ret)
949                         goto out_nospc;
950                 list_del_init(&entry->list);
951         }
952
953         /* Zero out the rest of the pages just to make sure */
954         io_ctl_zero_remaining_pages(&io_ctl);
955
956         ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages,
957                                 0, i_size_read(inode), &cached_state);
958         io_ctl_drop_pages(&io_ctl);
959         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
960                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
961
962         if (ret)
963                 goto out;
964
965
966         ret = filemap_write_and_wait(inode->i_mapping);
967         if (ret)
968                 goto out;
969
970         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
971         key.offset = offset;
972         key.type = 0;
973
974         ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
975         if (ret < 0) {
976                 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
977                                  EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
978                                  GFP_NOFS);
979                 goto out;
980         }
981         leaf = path->nodes[0];
982         if (ret > 0) {
983                 struct btrfs_key found_key;
984                 BUG_ON(!path->slots[0]);
985                 path->slots[0]--;
986                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
987                 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
988                     found_key.offset != offset) {
989                         clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
990                                          inode->i_size - 1,
991                                          EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
992                                          NULL, GFP_NOFS);
993                         btrfs_release_path(path);
994                         goto out;
995                 }
996         }
997
998         BTRFS_I(inode)->generation = trans->transid;
999         header = btrfs_item_ptr(leaf, path->slots[0],
1000                                 struct btrfs_free_space_header);
1001         btrfs_set_free_space_entries(leaf, header, entries);
1002         btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
1003         btrfs_set_free_space_generation(leaf, header, trans->transid);
1004         btrfs_mark_buffer_dirty(leaf);
1005         btrfs_release_path(path);
1006
1007         err = 0;
1008 out:
1009         io_ctl_free(&io_ctl);
1010         if (err) {
1011                 invalidate_inode_pages2(inode->i_mapping);
1012                 BTRFS_I(inode)->generation = 0;
1013         }
1014         btrfs_update_inode(trans, root, inode);
1015         return err;
1016
1017 out_nospc:
1018         list_for_each_safe(pos, n, &bitmap_list) {
1019                 struct btrfs_free_space *entry =
1020                         list_entry(pos, struct btrfs_free_space, list);
1021                 list_del_init(&entry->list);
1022         }
1023         io_ctl_drop_pages(&io_ctl);
1024         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
1025                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
1026         goto out;
1027 }
1028
1029 int btrfs_write_out_cache(struct btrfs_root *root,
1030                           struct btrfs_trans_handle *trans,
1031                           struct btrfs_block_group_cache *block_group,
1032                           struct btrfs_path *path)
1033 {
1034         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1035         struct inode *inode;
1036         int ret = 0;
1037
1038         root = root->fs_info->tree_root;
1039
1040         spin_lock(&block_group->lock);
1041         if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
1042                 spin_unlock(&block_group->lock);
1043                 return 0;
1044         }
1045         spin_unlock(&block_group->lock);
1046
1047         inode = lookup_free_space_inode(root, block_group, path);
1048         if (IS_ERR(inode))
1049                 return 0;
1050
1051         ret = __btrfs_write_out_cache(root, inode, ctl, block_group, trans,
1052                                       path, block_group->key.objectid);
1053         if (ret) {
1054                 spin_lock(&block_group->lock);
1055                 block_group->disk_cache_state = BTRFS_DC_ERROR;
1056                 spin_unlock(&block_group->lock);
1057                 ret = 0;
1058 #ifdef DEBUG
1059                 printk(KERN_ERR "btrfs: failed to write free space cace "
1060                        "for block group %llu\n", block_group->key.objectid);
1061 #endif
1062         }
1063
1064         iput(inode);
1065         return ret;
1066 }
1067
1068 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
1069                                           u64 offset)
1070 {
1071         BUG_ON(offset < bitmap_start);
1072         offset -= bitmap_start;
1073         return (unsigned long)(div_u64(offset, unit));
1074 }
1075
1076 static inline unsigned long bytes_to_bits(u64 bytes, u32 unit)
1077 {
1078         return (unsigned long)(div_u64(bytes, unit));
1079 }
1080
1081 static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
1082                                    u64 offset)
1083 {
1084         u64 bitmap_start;
1085         u64 bytes_per_bitmap;
1086
1087         bytes_per_bitmap = BITS_PER_BITMAP * ctl->unit;
1088         bitmap_start = offset - ctl->start;
1089         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
1090         bitmap_start *= bytes_per_bitmap;
1091         bitmap_start += ctl->start;
1092
1093         return bitmap_start;
1094 }
1095
1096 static int tree_insert_offset(struct rb_root *root, u64 offset,
1097                               struct rb_node *node, int bitmap)
1098 {
1099         struct rb_node **p = &root->rb_node;
1100         struct rb_node *parent = NULL;
1101         struct btrfs_free_space *info;
1102
1103         while (*p) {
1104                 parent = *p;
1105                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
1106
1107                 if (offset < info->offset) {
1108                         p = &(*p)->rb_left;
1109                 } else if (offset > info->offset) {
1110                         p = &(*p)->rb_right;
1111                 } else {
1112                         /*
1113                          * we could have a bitmap entry and an extent entry
1114                          * share the same offset.  If this is the case, we want
1115                          * the extent entry to always be found first if we do a
1116                          * linear search through the tree, since we want to have
1117                          * the quickest allocation time, and allocating from an
1118                          * extent is faster than allocating from a bitmap.  So
1119                          * if we're inserting a bitmap and we find an entry at
1120                          * this offset, we want to go right, or after this entry
1121                          * logically.  If we are inserting an extent and we've
1122                          * found a bitmap, we want to go left, or before
1123                          * logically.
1124                          */
1125                         if (bitmap) {
1126                                 if (info->bitmap) {
1127                                         WARN_ON_ONCE(1);
1128                                         return -EEXIST;
1129                                 }
1130                                 p = &(*p)->rb_right;
1131                         } else {
1132                                 if (!info->bitmap) {
1133                                         WARN_ON_ONCE(1);
1134                                         return -EEXIST;
1135                                 }
1136                                 p = &(*p)->rb_left;
1137                         }
1138                 }
1139         }
1140
1141         rb_link_node(node, parent, p);
1142         rb_insert_color(node, root);
1143
1144         return 0;
1145 }
1146
1147 /*
1148  * searches the tree for the given offset.
1149  *
1150  * fuzzy - If this is set, then we are trying to make an allocation, and we just
1151  * want a section that has at least bytes size and comes at or after the given
1152  * offset.
1153  */
1154 static struct btrfs_free_space *
1155 tree_search_offset(struct btrfs_free_space_ctl *ctl,
1156                    u64 offset, int bitmap_only, int fuzzy)
1157 {
1158         struct rb_node *n = ctl->free_space_offset.rb_node;
1159         struct btrfs_free_space *entry, *prev = NULL;
1160
1161         /* find entry that is closest to the 'offset' */
1162         while (1) {
1163                 if (!n) {
1164                         entry = NULL;
1165                         break;
1166                 }
1167
1168                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1169                 prev = entry;
1170
1171                 if (offset < entry->offset)
1172                         n = n->rb_left;
1173                 else if (offset > entry->offset)
1174                         n = n->rb_right;
1175                 else
1176                         break;
1177         }
1178
1179         if (bitmap_only) {
1180                 if (!entry)
1181                         return NULL;
1182                 if (entry->bitmap)
1183                         return entry;
1184
1185                 /*
1186                  * bitmap entry and extent entry may share same offset,
1187                  * in that case, bitmap entry comes after extent entry.
1188                  */
1189                 n = rb_next(n);
1190                 if (!n)
1191                         return NULL;
1192                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1193                 if (entry->offset != offset)
1194                         return NULL;
1195
1196                 WARN_ON(!entry->bitmap);
1197                 return entry;
1198         } else if (entry) {
1199                 if (entry->bitmap) {
1200                         /*
1201                          * if previous extent entry covers the offset,
1202                          * we should return it instead of the bitmap entry
1203                          */
1204                         n = &entry->offset_index;
1205                         while (1) {
1206                                 n = rb_prev(n);
1207                                 if (!n)
1208                                         break;
1209                                 prev = rb_entry(n, struct btrfs_free_space,
1210                                                 offset_index);
1211                                 if (!prev->bitmap) {
1212                                         if (prev->offset + prev->bytes > offset)
1213                                                 entry = prev;
1214                                         break;
1215                                 }
1216                         }
1217                 }
1218                 return entry;
1219         }
1220
1221         if (!prev)
1222                 return NULL;
1223
1224         /* find last entry before the 'offset' */
1225         entry = prev;
1226         if (entry->offset > offset) {
1227                 n = rb_prev(&entry->offset_index);
1228                 if (n) {
1229                         entry = rb_entry(n, struct btrfs_free_space,
1230                                         offset_index);
1231                         BUG_ON(entry->offset > offset);
1232                 } else {
1233                         if (fuzzy)
1234                                 return entry;
1235                         else
1236                                 return NULL;
1237                 }
1238         }
1239
1240         if (entry->bitmap) {
1241                 n = &entry->offset_index;
1242                 while (1) {
1243                         n = rb_prev(n);
1244                         if (!n)
1245                                 break;
1246                         prev = rb_entry(n, struct btrfs_free_space,
1247                                         offset_index);
1248                         if (!prev->bitmap) {
1249                                 if (prev->offset + prev->bytes > offset)
1250                                         return prev;
1251                                 break;
1252                         }
1253                 }
1254                 if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset)
1255                         return entry;
1256         } else if (entry->offset + entry->bytes > offset)
1257                 return entry;
1258
1259         if (!fuzzy)
1260                 return NULL;
1261
1262         while (1) {
1263                 if (entry->bitmap) {
1264                         if (entry->offset + BITS_PER_BITMAP *
1265                             ctl->unit > offset)
1266                                 break;
1267                 } else {
1268                         if (entry->offset + entry->bytes > offset)
1269                                 break;
1270                 }
1271
1272                 n = rb_next(&entry->offset_index);
1273                 if (!n)
1274                         return NULL;
1275                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
1276         }
1277         return entry;
1278 }
1279
1280 static inline void
1281 __unlink_free_space(struct btrfs_free_space_ctl *ctl,
1282                     struct btrfs_free_space *info)
1283 {
1284         rb_erase(&info->offset_index, &ctl->free_space_offset);
1285         ctl->free_extents--;
1286 }
1287
1288 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
1289                               struct btrfs_free_space *info)
1290 {
1291         __unlink_free_space(ctl, info);
1292         ctl->free_space -= info->bytes;
1293 }
1294
1295 static int link_free_space(struct btrfs_free_space_ctl *ctl,
1296                            struct btrfs_free_space *info)
1297 {
1298         int ret = 0;
1299
1300         BUG_ON(!info->bitmap && !info->bytes);
1301         ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
1302                                  &info->offset_index, (info->bitmap != NULL));
1303         if (ret)
1304                 return ret;
1305
1306         ctl->free_space += info->bytes;
1307         ctl->free_extents++;
1308         return ret;
1309 }
1310
1311 static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1312 {
1313         struct btrfs_block_group_cache *block_group = ctl->private;
1314         u64 max_bytes;
1315         u64 bitmap_bytes;
1316         u64 extent_bytes;
1317         u64 size = block_group->key.offset;
1318         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
1319         int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1320
1321         BUG_ON(ctl->total_bitmaps > max_bitmaps);
1322
1323         /*
1324          * The goal is to keep the total amount of memory used per 1gb of space
1325          * at or below 32k, so we need to adjust how much memory we allow to be
1326          * used by extent based free space tracking
1327          */
1328         if (size < 1024 * 1024 * 1024)
1329                 max_bytes = MAX_CACHE_BYTES_PER_GIG;
1330         else
1331                 max_bytes = MAX_CACHE_BYTES_PER_GIG *
1332                         div64_u64(size, 1024 * 1024 * 1024);
1333
1334         /*
1335          * we want to account for 1 more bitmap than what we have so we can make
1336          * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
1337          * we add more bitmaps.
1338          */
1339         bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
1340
1341         if (bitmap_bytes >= max_bytes) {
1342                 ctl->extents_thresh = 0;
1343                 return;
1344         }
1345
1346         /*
1347          * we want the extent entry threshold to always be at most 1/2 the maxw
1348          * bytes we can have, or whatever is less than that.
1349          */
1350         extent_bytes = max_bytes - bitmap_bytes;
1351         extent_bytes = min_t(u64, extent_bytes, div64_u64(max_bytes, 2));
1352
1353         ctl->extents_thresh =
1354                 div64_u64(extent_bytes, (sizeof(struct btrfs_free_space)));
1355 }
1356
1357 static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1358                                        struct btrfs_free_space *info,
1359                                        u64 offset, u64 bytes)
1360 {
1361         unsigned long start, count;
1362
1363         start = offset_to_bit(info->offset, ctl->unit, offset);
1364         count = bytes_to_bits(bytes, ctl->unit);
1365         BUG_ON(start + count > BITS_PER_BITMAP);
1366
1367         bitmap_clear(info->bitmap, start, count);
1368
1369         info->bytes -= bytes;
1370 }
1371
1372 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
1373                               struct btrfs_free_space *info, u64 offset,
1374                               u64 bytes)
1375 {
1376         __bitmap_clear_bits(ctl, info, offset, bytes);
1377         ctl->free_space -= bytes;
1378 }
1379
1380 static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1381                             struct btrfs_free_space *info, u64 offset,
1382                             u64 bytes)
1383 {
1384         unsigned long start, count;
1385
1386         start = offset_to_bit(info->offset, ctl->unit, offset);
1387         count = bytes_to_bits(bytes, ctl->unit);
1388         BUG_ON(start + count > BITS_PER_BITMAP);
1389
1390         bitmap_set(info->bitmap, start, count);
1391
1392         info->bytes += bytes;
1393         ctl->free_space += bytes;
1394 }
1395
1396 static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1397                          struct btrfs_free_space *bitmap_info, u64 *offset,
1398                          u64 *bytes)
1399 {
1400         unsigned long found_bits = 0;
1401         unsigned long bits, i;
1402         unsigned long next_zero;
1403
1404         i = offset_to_bit(bitmap_info->offset, ctl->unit,
1405                           max_t(u64, *offset, bitmap_info->offset));
1406         bits = bytes_to_bits(*bytes, ctl->unit);
1407
1408         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
1409              i < BITS_PER_BITMAP;
1410              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
1411                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1412                                                BITS_PER_BITMAP, i);
1413                 if ((next_zero - i) >= bits) {
1414                         found_bits = next_zero - i;
1415                         break;
1416                 }
1417                 i = next_zero;
1418         }
1419
1420         if (found_bits) {
1421                 *offset = (u64)(i * ctl->unit) + bitmap_info->offset;
1422                 *bytes = (u64)(found_bits) * ctl->unit;
1423                 return 0;
1424         }
1425
1426         return -1;
1427 }
1428
1429 static struct btrfs_free_space *
1430 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes)
1431 {
1432         struct btrfs_free_space *entry;
1433         struct rb_node *node;
1434         int ret;
1435
1436         if (!ctl->free_space_offset.rb_node)
1437                 return NULL;
1438
1439         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1440         if (!entry)
1441                 return NULL;
1442
1443         for (node = &entry->offset_index; node; node = rb_next(node)) {
1444                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1445                 if (entry->bytes < *bytes)
1446                         continue;
1447
1448                 if (entry->bitmap) {
1449                         ret = search_bitmap(ctl, entry, offset, bytes);
1450                         if (!ret)
1451                                 return entry;
1452                         continue;
1453                 }
1454
1455                 *offset = entry->offset;
1456                 *bytes = entry->bytes;
1457                 return entry;
1458         }
1459
1460         return NULL;
1461 }
1462
1463 static void add_new_bitmap(struct btrfs_free_space_ctl *ctl,
1464                            struct btrfs_free_space *info, u64 offset)
1465 {
1466         info->offset = offset_to_bitmap(ctl, offset);
1467         info->bytes = 0;
1468         link_free_space(ctl, info);
1469         ctl->total_bitmaps++;
1470
1471         ctl->op->recalc_thresholds(ctl);
1472 }
1473
1474 static void free_bitmap(struct btrfs_free_space_ctl *ctl,
1475                         struct btrfs_free_space *bitmap_info)
1476 {
1477         unlink_free_space(ctl, bitmap_info);
1478         kfree(bitmap_info->bitmap);
1479         kmem_cache_free(btrfs_free_space_cachep, bitmap_info);
1480         ctl->total_bitmaps--;
1481         ctl->op->recalc_thresholds(ctl);
1482 }
1483
1484 static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
1485                               struct btrfs_free_space *bitmap_info,
1486                               u64 *offset, u64 *bytes)
1487 {
1488         u64 end;
1489         u64 search_start, search_bytes;
1490         int ret;
1491
1492 again:
1493         end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
1494
1495         /*
1496          * XXX - this can go away after a few releases.
1497          *
1498          * since the only user of btrfs_remove_free_space is the tree logging
1499          * stuff, and the only way to test that is under crash conditions, we
1500          * want to have this debug stuff here just in case somethings not
1501          * working.  Search the bitmap for the space we are trying to use to
1502          * make sure its actually there.  If its not there then we need to stop
1503          * because something has gone wrong.
1504          */
1505         search_start = *offset;
1506         search_bytes = *bytes;
1507         search_bytes = min(search_bytes, end - search_start + 1);
1508         ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
1509         BUG_ON(ret < 0 || search_start != *offset);
1510
1511         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
1512                 bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
1513                 *bytes -= end - *offset + 1;
1514                 *offset = end + 1;
1515         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
1516                 bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
1517                 *bytes = 0;
1518         }
1519
1520         if (*bytes) {
1521                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
1522                 if (!bitmap_info->bytes)
1523                         free_bitmap(ctl, bitmap_info);
1524
1525                 /*
1526                  * no entry after this bitmap, but we still have bytes to
1527                  * remove, so something has gone wrong.
1528                  */
1529                 if (!next)
1530                         return -EINVAL;
1531
1532                 bitmap_info = rb_entry(next, struct btrfs_free_space,
1533                                        offset_index);
1534
1535                 /*
1536                  * if the next entry isn't a bitmap we need to return to let the
1537                  * extent stuff do its work.
1538                  */
1539                 if (!bitmap_info->bitmap)
1540                         return -EAGAIN;
1541
1542                 /*
1543                  * Ok the next item is a bitmap, but it may not actually hold
1544                  * the information for the rest of this free space stuff, so
1545                  * look for it, and if we don't find it return so we can try
1546                  * everything over again.
1547                  */
1548                 search_start = *offset;
1549                 search_bytes = *bytes;
1550                 ret = search_bitmap(ctl, bitmap_info, &search_start,
1551                                     &search_bytes);
1552                 if (ret < 0 || search_start != *offset)
1553                         return -EAGAIN;
1554
1555                 goto again;
1556         } else if (!bitmap_info->bytes)
1557                 free_bitmap(ctl, bitmap_info);
1558
1559         return 0;
1560 }
1561
1562 static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
1563                                struct btrfs_free_space *info, u64 offset,
1564                                u64 bytes)
1565 {
1566         u64 bytes_to_set = 0;
1567         u64 end;
1568
1569         end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit);
1570
1571         bytes_to_set = min(end - offset, bytes);
1572
1573         bitmap_set_bits(ctl, info, offset, bytes_to_set);
1574
1575         return bytes_to_set;
1576
1577 }
1578
1579 static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1580                       struct btrfs_free_space *info)
1581 {
1582         struct btrfs_block_group_cache *block_group = ctl->private;
1583
1584         /*
1585          * If we are below the extents threshold then we can add this as an
1586          * extent, and don't have to deal with the bitmap
1587          */
1588         if (ctl->free_extents < ctl->extents_thresh) {
1589                 /*
1590                  * If this block group has some small extents we don't want to
1591                  * use up all of our free slots in the cache with them, we want
1592                  * to reserve them to larger extents, however if we have plent
1593                  * of cache left then go ahead an dadd them, no sense in adding
1594                  * the overhead of a bitmap if we don't have to.
1595                  */
1596                 if (info->bytes <= block_group->sectorsize * 4) {
1597                         if (ctl->free_extents * 2 <= ctl->extents_thresh)
1598                                 return false;
1599                 } else {
1600                         return false;
1601                 }
1602         }
1603
1604         /*
1605          * some block groups are so tiny they can't be enveloped by a bitmap, so
1606          * don't even bother to create a bitmap for this
1607          */
1608         if (BITS_PER_BITMAP * block_group->sectorsize >
1609             block_group->key.offset)
1610                 return false;
1611
1612         return true;
1613 }
1614
1615 static struct btrfs_free_space_op free_space_op = {
1616         .recalc_thresholds      = recalculate_thresholds,
1617         .use_bitmap             = use_bitmap,
1618 };
1619
1620 static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl,
1621                               struct btrfs_free_space *info)
1622 {
1623         struct btrfs_free_space *bitmap_info;
1624         struct btrfs_block_group_cache *block_group = NULL;
1625         int added = 0;
1626         u64 bytes, offset, bytes_added;
1627         int ret;
1628
1629         bytes = info->bytes;
1630         offset = info->offset;
1631
1632         if (!ctl->op->use_bitmap(ctl, info))
1633                 return 0;
1634
1635         if (ctl->op == &free_space_op)
1636                 block_group = ctl->private;
1637 again:
1638         /*
1639          * Since we link bitmaps right into the cluster we need to see if we
1640          * have a cluster here, and if so and it has our bitmap we need to add
1641          * the free space to that bitmap.
1642          */
1643         if (block_group && !list_empty(&block_group->cluster_list)) {
1644                 struct btrfs_free_cluster *cluster;
1645                 struct rb_node *node;
1646                 struct btrfs_free_space *entry;
1647
1648                 cluster = list_entry(block_group->cluster_list.next,
1649                                      struct btrfs_free_cluster,
1650                                      block_group_list);
1651                 spin_lock(&cluster->lock);
1652                 node = rb_first(&cluster->root);
1653                 if (!node) {
1654                         spin_unlock(&cluster->lock);
1655                         goto no_cluster_bitmap;
1656                 }
1657
1658                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1659                 if (!entry->bitmap) {
1660                         spin_unlock(&cluster->lock);
1661                         goto no_cluster_bitmap;
1662                 }
1663
1664                 if (entry->offset == offset_to_bitmap(ctl, offset)) {
1665                         bytes_added = add_bytes_to_bitmap(ctl, entry,
1666                                                           offset, bytes);
1667                         bytes -= bytes_added;
1668                         offset += bytes_added;
1669                 }
1670                 spin_unlock(&cluster->lock);
1671                 if (!bytes) {
1672                         ret = 1;
1673                         goto out;
1674                 }
1675         }
1676
1677 no_cluster_bitmap:
1678         bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1679                                          1, 0);
1680         if (!bitmap_info) {
1681                 BUG_ON(added);
1682                 goto new_bitmap;
1683         }
1684
1685         bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
1686         bytes -= bytes_added;
1687         offset += bytes_added;
1688         added = 0;
1689
1690         if (!bytes) {
1691                 ret = 1;
1692                 goto out;
1693         } else
1694                 goto again;
1695
1696 new_bitmap:
1697         if (info && info->bitmap) {
1698                 add_new_bitmap(ctl, info, offset);
1699                 added = 1;
1700                 info = NULL;
1701                 goto again;
1702         } else {
1703                 spin_unlock(&ctl->tree_lock);
1704
1705                 /* no pre-allocated info, allocate a new one */
1706                 if (!info) {
1707                         info = kmem_cache_zalloc(btrfs_free_space_cachep,
1708                                                  GFP_NOFS);
1709                         if (!info) {
1710                                 spin_lock(&ctl->tree_lock);
1711                                 ret = -ENOMEM;
1712                                 goto out;
1713                         }
1714                 }
1715
1716                 /* allocate the bitmap */
1717                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
1718                 spin_lock(&ctl->tree_lock);
1719                 if (!info->bitmap) {
1720                         ret = -ENOMEM;
1721                         goto out;
1722                 }
1723                 goto again;
1724         }
1725
1726 out:
1727         if (info) {
1728                 if (info->bitmap)
1729                         kfree(info->bitmap);
1730                 kmem_cache_free(btrfs_free_space_cachep, info);
1731         }
1732
1733         return ret;
1734 }
1735
1736 static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
1737                           struct btrfs_free_space *info, bool update_stat)
1738 {
1739         struct btrfs_free_space *left_info;
1740         struct btrfs_free_space *right_info;
1741         bool merged = false;
1742         u64 offset = info->offset;
1743         u64 bytes = info->bytes;
1744
1745         /*
1746          * first we want to see if there is free space adjacent to the range we
1747          * are adding, if there is remove that struct and add a new one to
1748          * cover the entire range
1749          */
1750         right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
1751         if (right_info && rb_prev(&right_info->offset_index))
1752                 left_info = rb_entry(rb_prev(&right_info->offset_index),
1753                                      struct btrfs_free_space, offset_index);
1754         else
1755                 left_info = tree_search_offset(ctl, offset - 1, 0, 0);
1756
1757         if (right_info && !right_info->bitmap) {
1758                 if (update_stat)
1759                         unlink_free_space(ctl, right_info);
1760                 else
1761                         __unlink_free_space(ctl, right_info);
1762                 info->bytes += right_info->bytes;
1763                 kmem_cache_free(btrfs_free_space_cachep, right_info);
1764                 merged = true;
1765         }
1766
1767         if (left_info && !left_info->bitmap &&
1768             left_info->offset + left_info->bytes == offset) {
1769                 if (update_stat)
1770                         unlink_free_space(ctl, left_info);
1771                 else
1772                         __unlink_free_space(ctl, left_info);
1773                 info->offset = left_info->offset;
1774                 info->bytes += left_info->bytes;
1775                 kmem_cache_free(btrfs_free_space_cachep, left_info);
1776                 merged = true;
1777         }
1778
1779         return merged;
1780 }
1781
1782 int __btrfs_add_free_space(struct btrfs_free_space_ctl *ctl,
1783                            u64 offset, u64 bytes)
1784 {
1785         struct btrfs_free_space *info;
1786         int ret = 0;
1787
1788         info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
1789         if (!info)
1790                 return -ENOMEM;
1791
1792         info->offset = offset;
1793         info->bytes = bytes;
1794
1795         spin_lock(&ctl->tree_lock);
1796
1797         if (try_merge_free_space(ctl, info, true))
1798                 goto link;
1799
1800         /*
1801          * There was no extent directly to the left or right of this new
1802          * extent then we know we're going to have to allocate a new extent, so
1803          * before we do that see if we need to drop this into a bitmap
1804          */
1805         ret = insert_into_bitmap(ctl, info);
1806         if (ret < 0) {
1807                 goto out;
1808         } else if (ret) {
1809                 ret = 0;
1810                 goto out;
1811         }
1812 link:
1813         ret = link_free_space(ctl, info);
1814         if (ret)
1815                 kmem_cache_free(btrfs_free_space_cachep, info);
1816 out:
1817         spin_unlock(&ctl->tree_lock);
1818
1819         if (ret) {
1820                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
1821                 BUG_ON(ret == -EEXIST);
1822         }
1823
1824         return ret;
1825 }
1826
1827 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1828                             u64 offset, u64 bytes)
1829 {
1830         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1831         struct btrfs_free_space *info;
1832         struct btrfs_free_space *next_info = NULL;
1833         int ret = 0;
1834
1835         spin_lock(&ctl->tree_lock);
1836
1837 again:
1838         info = tree_search_offset(ctl, offset, 0, 0);
1839         if (!info) {
1840                 /*
1841                  * oops didn't find an extent that matched the space we wanted
1842                  * to remove, look for a bitmap instead
1843                  */
1844                 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1845                                           1, 0);
1846                 if (!info) {
1847                         WARN_ON(1);
1848                         goto out_lock;
1849                 }
1850         }
1851
1852         if (info->bytes < bytes && rb_next(&info->offset_index)) {
1853                 u64 end;
1854                 next_info = rb_entry(rb_next(&info->offset_index),
1855                                              struct btrfs_free_space,
1856                                              offset_index);
1857
1858                 if (next_info->bitmap)
1859                         end = next_info->offset +
1860                               BITS_PER_BITMAP * ctl->unit - 1;
1861                 else
1862                         end = next_info->offset + next_info->bytes;
1863
1864                 if (next_info->bytes < bytes ||
1865                     next_info->offset > offset || offset > end) {
1866                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
1867                               " trying to use %llu\n",
1868                               (unsigned long long)info->offset,
1869                               (unsigned long long)info->bytes,
1870                               (unsigned long long)bytes);
1871                         WARN_ON(1);
1872                         ret = -EINVAL;
1873                         goto out_lock;
1874                 }
1875
1876                 info = next_info;
1877         }
1878
1879         if (info->bytes == bytes) {
1880                 unlink_free_space(ctl, info);
1881                 if (info->bitmap) {
1882                         kfree(info->bitmap);
1883                         ctl->total_bitmaps--;
1884                 }
1885                 kmem_cache_free(btrfs_free_space_cachep, info);
1886                 ret = 0;
1887                 goto out_lock;
1888         }
1889
1890         if (!info->bitmap && info->offset == offset) {
1891                 unlink_free_space(ctl, info);
1892                 info->offset += bytes;
1893                 info->bytes -= bytes;
1894                 ret = link_free_space(ctl, info);
1895                 WARN_ON(ret);
1896                 goto out_lock;
1897         }
1898
1899         if (!info->bitmap && info->offset <= offset &&
1900             info->offset + info->bytes >= offset + bytes) {
1901                 u64 old_start = info->offset;
1902                 /*
1903                  * we're freeing space in the middle of the info,
1904                  * this can happen during tree log replay
1905                  *
1906                  * first unlink the old info and then
1907                  * insert it again after the hole we're creating
1908                  */
1909                 unlink_free_space(ctl, info);
1910                 if (offset + bytes < info->offset + info->bytes) {
1911                         u64 old_end = info->offset + info->bytes;
1912
1913                         info->offset = offset + bytes;
1914                         info->bytes = old_end - info->offset;
1915                         ret = link_free_space(ctl, info);
1916                         WARN_ON(ret);
1917                         if (ret)
1918                                 goto out_lock;
1919                 } else {
1920                         /* the hole we're creating ends at the end
1921                          * of the info struct, just free the info
1922                          */
1923                         kmem_cache_free(btrfs_free_space_cachep, info);
1924                 }
1925                 spin_unlock(&ctl->tree_lock);
1926
1927                 /* step two, insert a new info struct to cover
1928                  * anything before the hole
1929                  */
1930                 ret = btrfs_add_free_space(block_group, old_start,
1931                                            offset - old_start);
1932                 WARN_ON(ret);
1933                 goto out;
1934         }
1935
1936         ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1937         if (ret == -EAGAIN)
1938                 goto again;
1939         BUG_ON(ret);
1940 out_lock:
1941         spin_unlock(&ctl->tree_lock);
1942 out:
1943         return ret;
1944 }
1945
1946 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
1947                            u64 bytes)
1948 {
1949         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1950         struct btrfs_free_space *info;
1951         struct rb_node *n;
1952         int count = 0;
1953
1954         for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
1955                 info = rb_entry(n, struct btrfs_free_space, offset_index);
1956                 if (info->bytes >= bytes)
1957                         count++;
1958                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
1959                        (unsigned long long)info->offset,
1960                        (unsigned long long)info->bytes,
1961                        (info->bitmap) ? "yes" : "no");
1962         }
1963         printk(KERN_INFO "block group has cluster?: %s\n",
1964                list_empty(&block_group->cluster_list) ? "no" : "yes");
1965         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
1966                "\n", count);
1967 }
1968
1969 void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
1970 {
1971         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1972
1973         spin_lock_init(&ctl->tree_lock);
1974         ctl->unit = block_group->sectorsize;
1975         ctl->start = block_group->key.objectid;
1976         ctl->private = block_group;
1977         ctl->op = &free_space_op;
1978
1979         /*
1980          * we only want to have 32k of ram per block group for keeping
1981          * track of free space, and if we pass 1/2 of that we want to
1982          * start converting things over to using bitmaps
1983          */
1984         ctl->extents_thresh = ((1024 * 32) / 2) /
1985                                 sizeof(struct btrfs_free_space);
1986 }
1987
1988 /*
1989  * for a given cluster, put all of its extents back into the free
1990  * space cache.  If the block group passed doesn't match the block group
1991  * pointed to by the cluster, someone else raced in and freed the
1992  * cluster already.  In that case, we just return without changing anything
1993  */
1994 static int
1995 __btrfs_return_cluster_to_free_space(
1996                              struct btrfs_block_group_cache *block_group,
1997                              struct btrfs_free_cluster *cluster)
1998 {
1999         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2000         struct btrfs_free_space *entry;
2001         struct rb_node *node;
2002
2003         spin_lock(&cluster->lock);
2004         if (cluster->block_group != block_group)
2005                 goto out;
2006
2007         cluster->block_group = NULL;
2008         cluster->window_start = 0;
2009         list_del_init(&cluster->block_group_list);
2010
2011         node = rb_first(&cluster->root);
2012         while (node) {
2013                 bool bitmap;
2014
2015                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2016                 node = rb_next(&entry->offset_index);
2017                 rb_erase(&entry->offset_index, &cluster->root);
2018
2019                 bitmap = (entry->bitmap != NULL);
2020                 if (!bitmap)
2021                         try_merge_free_space(ctl, entry, false);
2022                 tree_insert_offset(&ctl->free_space_offset,
2023                                    entry->offset, &entry->offset_index, bitmap);
2024         }
2025         cluster->root = RB_ROOT;
2026
2027 out:
2028         spin_unlock(&cluster->lock);
2029         btrfs_put_block_group(block_group);
2030         return 0;
2031 }
2032
2033 void __btrfs_remove_free_space_cache_locked(struct btrfs_free_space_ctl *ctl)
2034 {
2035         struct btrfs_free_space *info;
2036         struct rb_node *node;
2037
2038         while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
2039                 info = rb_entry(node, struct btrfs_free_space, offset_index);
2040                 if (!info->bitmap) {
2041                         unlink_free_space(ctl, info);
2042                         kmem_cache_free(btrfs_free_space_cachep, info);
2043                 } else {
2044                         free_bitmap(ctl, info);
2045                 }
2046                 if (need_resched()) {
2047                         spin_unlock(&ctl->tree_lock);
2048                         cond_resched();
2049                         spin_lock(&ctl->tree_lock);
2050                 }
2051         }
2052 }
2053
2054 void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
2055 {
2056         spin_lock(&ctl->tree_lock);
2057         __btrfs_remove_free_space_cache_locked(ctl);
2058         spin_unlock(&ctl->tree_lock);
2059 }
2060
2061 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2062 {
2063         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2064         struct btrfs_free_cluster *cluster;
2065         struct list_head *head;
2066
2067         spin_lock(&ctl->tree_lock);
2068         while ((head = block_group->cluster_list.next) !=
2069                &block_group->cluster_list) {
2070                 cluster = list_entry(head, struct btrfs_free_cluster,
2071                                      block_group_list);
2072
2073                 WARN_ON(cluster->block_group != block_group);
2074                 __btrfs_return_cluster_to_free_space(block_group, cluster);
2075                 if (need_resched()) {
2076                         spin_unlock(&ctl->tree_lock);
2077                         cond_resched();
2078                         spin_lock(&ctl->tree_lock);
2079                 }
2080         }
2081         __btrfs_remove_free_space_cache_locked(ctl);
2082         spin_unlock(&ctl->tree_lock);
2083
2084 }
2085
2086 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2087                                u64 offset, u64 bytes, u64 empty_size)
2088 {
2089         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2090         struct btrfs_free_space *entry = NULL;
2091         u64 bytes_search = bytes + empty_size;
2092         u64 ret = 0;
2093
2094         spin_lock(&ctl->tree_lock);
2095         entry = find_free_space(ctl, &offset, &bytes_search);
2096         if (!entry)
2097                 goto out;
2098
2099         ret = offset;
2100         if (entry->bitmap) {
2101                 bitmap_clear_bits(ctl, entry, offset, bytes);
2102                 if (!entry->bytes)
2103                         free_bitmap(ctl, entry);
2104         } else {
2105                 unlink_free_space(ctl, entry);
2106                 entry->offset += bytes;
2107                 entry->bytes -= bytes;
2108                 if (!entry->bytes)
2109                         kmem_cache_free(btrfs_free_space_cachep, entry);
2110                 else
2111                         link_free_space(ctl, entry);
2112         }
2113
2114 out:
2115         spin_unlock(&ctl->tree_lock);
2116
2117         return ret;
2118 }
2119
2120 /*
2121  * given a cluster, put all of its extents back into the free space
2122  * cache.  If a block group is passed, this function will only free
2123  * a cluster that belongs to the passed block group.
2124  *
2125  * Otherwise, it'll get a reference on the block group pointed to by the
2126  * cluster and remove the cluster from it.
2127  */
2128 int btrfs_return_cluster_to_free_space(
2129                                struct btrfs_block_group_cache *block_group,
2130                                struct btrfs_free_cluster *cluster)
2131 {
2132         struct btrfs_free_space_ctl *ctl;
2133         int ret;
2134
2135         /* first, get a safe pointer to the block group */
2136         spin_lock(&cluster->lock);
2137         if (!block_group) {
2138                 block_group = cluster->block_group;
2139                 if (!block_group) {
2140                         spin_unlock(&cluster->lock);
2141                         return 0;
2142                 }
2143         } else if (cluster->block_group != block_group) {
2144                 /* someone else has already freed it don't redo their work */
2145                 spin_unlock(&cluster->lock);
2146                 return 0;
2147         }
2148         atomic_inc(&block_group->count);
2149         spin_unlock(&cluster->lock);
2150
2151         ctl = block_group->free_space_ctl;
2152
2153         /* now return any extents the cluster had on it */
2154         spin_lock(&ctl->tree_lock);
2155         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
2156         spin_unlock(&ctl->tree_lock);
2157
2158         /* finally drop our ref */
2159         btrfs_put_block_group(block_group);
2160         return ret;
2161 }
2162
2163 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2164                                    struct btrfs_free_cluster *cluster,
2165                                    struct btrfs_free_space *entry,
2166                                    u64 bytes, u64 min_start)
2167 {
2168         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2169         int err;
2170         u64 search_start = cluster->window_start;
2171         u64 search_bytes = bytes;
2172         u64 ret = 0;
2173
2174         search_start = min_start;
2175         search_bytes = bytes;
2176
2177         err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2178         if (err)
2179                 return 0;
2180
2181         ret = search_start;
2182         __bitmap_clear_bits(ctl, entry, ret, bytes);
2183
2184         return ret;
2185 }
2186
2187 /*
2188  * given a cluster, try to allocate 'bytes' from it, returns 0
2189  * if it couldn't find anything suitably large, or a logical disk offset
2190  * if things worked out
2191  */
2192 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2193                              struct btrfs_free_cluster *cluster, u64 bytes,
2194                              u64 min_start)
2195 {
2196         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2197         struct btrfs_free_space *entry = NULL;
2198         struct rb_node *node;
2199         u64 ret = 0;
2200
2201         spin_lock(&cluster->lock);
2202         if (bytes > cluster->max_size)
2203                 goto out;
2204
2205         if (cluster->block_group != block_group)
2206                 goto out;
2207
2208         node = rb_first(&cluster->root);
2209         if (!node)
2210                 goto out;
2211
2212         entry = rb_entry(node, struct btrfs_free_space, offset_index);
2213         while(1) {
2214                 if (entry->bytes < bytes ||
2215                     (!entry->bitmap && entry->offset < min_start)) {
2216                         node = rb_next(&entry->offset_index);
2217                         if (!node)
2218                                 break;
2219                         entry = rb_entry(node, struct btrfs_free_space,
2220                                          offset_index);
2221                         continue;
2222                 }
2223
2224                 if (entry->bitmap) {
2225                         ret = btrfs_alloc_from_bitmap(block_group,
2226                                                       cluster, entry, bytes,
2227                                                       min_start);
2228                         if (ret == 0) {
2229                                 node = rb_next(&entry->offset_index);
2230                                 if (!node)
2231                                         break;
2232                                 entry = rb_entry(node, struct btrfs_free_space,
2233                                                  offset_index);
2234                                 continue;
2235                         }
2236                 } else {
2237                         ret = entry->offset;
2238
2239                         entry->offset += bytes;
2240                         entry->bytes -= bytes;
2241                 }
2242
2243                 if (entry->bytes == 0)
2244                         rb_erase(&entry->offset_index, &cluster->root);
2245                 break;
2246         }
2247 out:
2248         spin_unlock(&cluster->lock);
2249
2250         if (!ret)
2251                 return 0;
2252
2253         spin_lock(&ctl->tree_lock);
2254
2255         ctl->free_space -= bytes;
2256         if (entry->bytes == 0) {
2257                 ctl->free_extents--;
2258                 if (entry->bitmap) {
2259                         kfree(entry->bitmap);
2260                         ctl->total_bitmaps--;
2261                         ctl->op->recalc_thresholds(ctl);
2262                 }
2263                 kmem_cache_free(btrfs_free_space_cachep, entry);
2264         }
2265
2266         spin_unlock(&ctl->tree_lock);
2267
2268         return ret;
2269 }
2270
2271 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
2272                                 struct btrfs_free_space *entry,
2273                                 struct btrfs_free_cluster *cluster,
2274                                 u64 offset, u64 bytes, u64 min_bytes)
2275 {
2276         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2277         unsigned long next_zero;
2278         unsigned long i;
2279         unsigned long search_bits;
2280         unsigned long total_bits;
2281         unsigned long found_bits;
2282         unsigned long start = 0;
2283         unsigned long total_found = 0;
2284         int ret;
2285         bool found = false;
2286
2287         i = offset_to_bit(entry->offset, block_group->sectorsize,
2288                           max_t(u64, offset, entry->offset));
2289         search_bits = bytes_to_bits(bytes, block_group->sectorsize);
2290         total_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
2291
2292 again:
2293         found_bits = 0;
2294         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
2295              i < BITS_PER_BITMAP;
2296              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
2297                 next_zero = find_next_zero_bit(entry->bitmap,
2298                                                BITS_PER_BITMAP, i);
2299                 if (next_zero - i >= search_bits) {
2300                         found_bits = next_zero - i;
2301                         break;
2302                 }
2303                 i = next_zero;
2304         }
2305
2306         if (!found_bits)
2307                 return -ENOSPC;
2308
2309         if (!found) {
2310                 start = i;
2311                 found = true;
2312         }
2313
2314         total_found += found_bits;
2315
2316         if (cluster->max_size < found_bits * block_group->sectorsize)
2317                 cluster->max_size = found_bits * block_group->sectorsize;
2318
2319         if (total_found < total_bits) {
2320                 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
2321                 if (i - start > total_bits * 2) {
2322                         total_found = 0;
2323                         cluster->max_size = 0;
2324                         found = false;
2325                 }
2326                 goto again;
2327         }
2328
2329         cluster->window_start = start * block_group->sectorsize +
2330                 entry->offset;
2331         rb_erase(&entry->offset_index, &ctl->free_space_offset);
2332         ret = tree_insert_offset(&cluster->root, entry->offset,
2333                                  &entry->offset_index, 1);
2334         BUG_ON(ret);
2335
2336         return 0;
2337 }
2338
2339 /*
2340  * This searches the block group for just extents to fill the cluster with.
2341  */
2342 static noinline int
2343 setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
2344                         struct btrfs_free_cluster *cluster,
2345                         struct list_head *bitmaps, u64 offset, u64 bytes,
2346                         u64 min_bytes)
2347 {
2348         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2349         struct btrfs_free_space *first = NULL;
2350         struct btrfs_free_space *entry = NULL;
2351         struct btrfs_free_space *prev = NULL;
2352         struct btrfs_free_space *last;
2353         struct rb_node *node;
2354         u64 window_start;
2355         u64 window_free;
2356         u64 max_extent;
2357         u64 max_gap = 128 * 1024;
2358
2359         entry = tree_search_offset(ctl, offset, 0, 1);
2360         if (!entry)
2361                 return -ENOSPC;
2362
2363         /*
2364          * We don't want bitmaps, so just move along until we find a normal
2365          * extent entry.
2366          */
2367         while (entry->bitmap) {
2368                 if (list_empty(&entry->list))
2369                         list_add_tail(&entry->list, bitmaps);
2370                 node = rb_next(&entry->offset_index);
2371                 if (!node)
2372                         return -ENOSPC;
2373                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2374         }
2375
2376         window_start = entry->offset;
2377         window_free = entry->bytes;
2378         max_extent = entry->bytes;
2379         first = entry;
2380         last = entry;
2381         prev = entry;
2382
2383         while (window_free <= min_bytes) {
2384                 node = rb_next(&entry->offset_index);
2385                 if (!node)
2386                         return -ENOSPC;
2387                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2388
2389                 if (entry->bitmap) {
2390                         if (list_empty(&entry->list))
2391                                 list_add_tail(&entry->list, bitmaps);
2392                         continue;
2393                 }
2394
2395                 /*
2396                  * we haven't filled the empty size and the window is
2397                  * very large.  reset and try again
2398                  */
2399                 if (entry->offset - (prev->offset + prev->bytes) > max_gap ||
2400                     entry->offset - window_start > (min_bytes * 2)) {
2401                         first = entry;
2402                         window_start = entry->offset;
2403                         window_free = entry->bytes;
2404                         last = entry;
2405                         max_extent = entry->bytes;
2406                 } else {
2407                         last = entry;
2408                         window_free += entry->bytes;
2409                         if (entry->bytes > max_extent)
2410                                 max_extent = entry->bytes;
2411                 }
2412                 prev = entry;
2413         }
2414
2415         cluster->window_start = first->offset;
2416
2417         node = &first->offset_index;
2418
2419         /*
2420          * now we've found our entries, pull them out of the free space
2421          * cache and put them into the cluster rbtree
2422          */
2423         do {
2424                 int ret;
2425
2426                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2427                 node = rb_next(&entry->offset_index);
2428                 if (entry->bitmap)
2429                         continue;
2430
2431                 rb_erase(&entry->offset_index, &ctl->free_space_offset);
2432                 ret = tree_insert_offset(&cluster->root, entry->offset,
2433                                          &entry->offset_index, 0);
2434                 BUG_ON(ret);
2435         } while (node && entry != last);
2436
2437         cluster->max_size = max_extent;
2438
2439         return 0;
2440 }
2441
2442 /*
2443  * This specifically looks for bitmaps that may work in the cluster, we assume
2444  * that we have already failed to find extents that will work.
2445  */
2446 static noinline int
2447 setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
2448                      struct btrfs_free_cluster *cluster,
2449                      struct list_head *bitmaps, u64 offset, u64 bytes,
2450                      u64 min_bytes)
2451 {
2452         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2453         struct btrfs_free_space *entry;
2454         struct rb_node *node;
2455         int ret = -ENOSPC;
2456
2457         if (ctl->total_bitmaps == 0)
2458                 return -ENOSPC;
2459
2460         /*
2461          * First check our cached list of bitmaps and see if there is an entry
2462          * here that will work.
2463          */
2464         list_for_each_entry(entry, bitmaps, list) {
2465                 if (entry->bytes < min_bytes)
2466                         continue;
2467                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2468                                            bytes, min_bytes);
2469                 if (!ret)
2470                         return 0;
2471         }
2472
2473         /*
2474          * If we do have entries on our list and we are here then we didn't find
2475          * anything, so go ahead and get the next entry after the last entry in
2476          * this list and start the search from there.
2477          */
2478         if (!list_empty(bitmaps)) {
2479                 entry = list_entry(bitmaps->prev, struct btrfs_free_space,
2480                                    list);
2481                 node = rb_next(&entry->offset_index);
2482                 if (!node)
2483                         return -ENOSPC;
2484                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2485                 goto search;
2486         }
2487
2488         entry = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 0, 1);
2489         if (!entry)
2490                 return -ENOSPC;
2491
2492 search:
2493         node = &entry->offset_index;
2494         do {
2495                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2496                 node = rb_next(&entry->offset_index);
2497                 if (!entry->bitmap)
2498                         continue;
2499                 if (entry->bytes < min_bytes)
2500                         continue;
2501                 ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
2502                                            bytes, min_bytes);
2503         } while (ret && node);
2504
2505         return ret;
2506 }
2507
2508 /*
2509  * here we try to find a cluster of blocks in a block group.  The goal
2510  * is to find at least bytes free and up to empty_size + bytes free.
2511  * We might not find them all in one contiguous area.
2512  *
2513  * returns zero and sets up cluster if things worked out, otherwise
2514  * it returns -enospc
2515  */
2516 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
2517                              struct btrfs_root *root,
2518                              struct btrfs_block_group_cache *block_group,
2519                              struct btrfs_free_cluster *cluster,
2520                              u64 offset, u64 bytes, u64 empty_size)
2521 {
2522         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2523         struct list_head bitmaps;
2524         struct btrfs_free_space *entry, *tmp;
2525         u64 min_bytes;
2526         int ret;
2527
2528         /* for metadata, allow allocates with more holes */
2529         if (btrfs_test_opt(root, SSD_SPREAD)) {
2530                 min_bytes = bytes + empty_size;
2531         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
2532                 /*
2533                  * we want to do larger allocations when we are
2534                  * flushing out the delayed refs, it helps prevent
2535                  * making more work as we go along.
2536                  */
2537                 if (trans->transaction->delayed_refs.flushing)
2538                         min_bytes = max(bytes, (bytes + empty_size) >> 1);
2539                 else
2540                         min_bytes = max(bytes, (bytes + empty_size) >> 4);
2541         } else
2542                 min_bytes = max(bytes, (bytes + empty_size) >> 2);
2543
2544         spin_lock(&ctl->tree_lock);
2545
2546         /*
2547          * If we know we don't have enough space to make a cluster don't even
2548          * bother doing all the work to try and find one.
2549          */
2550         if (ctl->free_space < min_bytes) {
2551                 spin_unlock(&ctl->tree_lock);
2552                 return -ENOSPC;
2553         }
2554
2555         spin_lock(&cluster->lock);
2556
2557         /* someone already found a cluster, hooray */
2558         if (cluster->block_group) {
2559                 ret = 0;
2560                 goto out;
2561         }
2562
2563         INIT_LIST_HEAD(&bitmaps);
2564         ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
2565                                       bytes, min_bytes);
2566         if (ret)
2567                 ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
2568                                            offset, bytes, min_bytes);
2569
2570         /* Clear our temporary list */
2571         list_for_each_entry_safe(entry, tmp, &bitmaps, list)
2572                 list_del_init(&entry->list);
2573
2574         if (!ret) {
2575                 atomic_inc(&block_group->count);
2576                 list_add_tail(&cluster->block_group_list,
2577                               &block_group->cluster_list);
2578                 cluster->block_group = block_group;
2579         }
2580 out:
2581         spin_unlock(&cluster->lock);
2582         spin_unlock(&ctl->tree_lock);
2583
2584         return ret;
2585 }
2586
2587 /*
2588  * simple code to zero out a cluster
2589  */
2590 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
2591 {
2592         spin_lock_init(&cluster->lock);
2593         spin_lock_init(&cluster->refill_lock);
2594         cluster->root = RB_ROOT;
2595         cluster->max_size = 0;
2596         INIT_LIST_HEAD(&cluster->block_group_list);
2597         cluster->block_group = NULL;
2598 }
2599
2600 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
2601                            u64 *trimmed, u64 start, u64 end, u64 minlen)
2602 {
2603         struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2604         struct btrfs_free_space *entry = NULL;
2605         struct btrfs_fs_info *fs_info = block_group->fs_info;
2606         u64 bytes = 0;
2607         u64 actually_trimmed;
2608         int ret = 0;
2609
2610         *trimmed = 0;
2611
2612         while (start < end) {
2613                 spin_lock(&ctl->tree_lock);
2614
2615                 if (ctl->free_space < minlen) {
2616                         spin_unlock(&ctl->tree_lock);
2617                         break;
2618                 }
2619
2620                 entry = tree_search_offset(ctl, start, 0, 1);
2621                 if (!entry)
2622                         entry = tree_search_offset(ctl,
2623                                                    offset_to_bitmap(ctl, start),
2624                                                    1, 1);
2625
2626                 if (!entry || entry->offset >= end) {
2627                         spin_unlock(&ctl->tree_lock);
2628                         break;
2629                 }
2630
2631                 if (entry->bitmap) {
2632                         ret = search_bitmap(ctl, entry, &start, &bytes);
2633                         if (!ret) {
2634                                 if (start >= end) {
2635                                         spin_unlock(&ctl->tree_lock);
2636                                         break;
2637                                 }
2638                                 bytes = min(bytes, end - start);
2639                                 bitmap_clear_bits(ctl, entry, start, bytes);
2640                                 if (entry->bytes == 0)
2641                                         free_bitmap(ctl, entry);
2642                         } else {
2643                                 start = entry->offset + BITS_PER_BITMAP *
2644                                         block_group->sectorsize;
2645                                 spin_unlock(&ctl->tree_lock);
2646                                 ret = 0;
2647                                 continue;
2648                         }
2649                 } else {
2650                         start = entry->offset;
2651                         bytes = min(entry->bytes, end - start);
2652                         unlink_free_space(ctl, entry);
2653                         kmem_cache_free(btrfs_free_space_cachep, entry);
2654                 }
2655
2656                 spin_unlock(&ctl->tree_lock);
2657
2658                 if (bytes >= minlen) {
2659                         struct btrfs_space_info *space_info;
2660                         int update = 0;
2661
2662                         space_info = block_group->space_info;
2663                         spin_lock(&space_info->lock);
2664                         spin_lock(&block_group->lock);
2665                         if (!block_group->ro) {
2666                                 block_group->reserved += bytes;
2667                                 space_info->bytes_reserved += bytes;
2668                                 update = 1;
2669                         }
2670                         spin_unlock(&block_group->lock);
2671                         spin_unlock(&space_info->lock);
2672
2673                         ret = btrfs_error_discard_extent(fs_info->extent_root,
2674                                                          start,
2675                                                          bytes,
2676                                                          &actually_trimmed);
2677
2678                         btrfs_add_free_space(block_group, start, bytes);
2679                         if (update) {
2680                                 spin_lock(&space_info->lock);
2681                                 spin_lock(&block_group->lock);
2682                                 if (block_group->ro)
2683                                         space_info->bytes_readonly += bytes;
2684                                 block_group->reserved -= bytes;
2685                                 space_info->bytes_reserved -= bytes;
2686                                 spin_unlock(&space_info->lock);
2687                                 spin_unlock(&block_group->lock);
2688                         }
2689
2690                         if (ret)
2691                                 break;
2692                         *trimmed += actually_trimmed;
2693                 }
2694                 start += bytes;
2695                 bytes = 0;
2696
2697                 if (fatal_signal_pending(current)) {
2698                         ret = -ERESTARTSYS;
2699                         break;
2700                 }
2701
2702                 cond_resched();
2703         }
2704
2705         return ret;
2706 }
2707
2708 /*
2709  * Find the left-most item in the cache tree, and then return the
2710  * smallest inode number in the item.
2711  *
2712  * Note: the returned inode number may not be the smallest one in
2713  * the tree, if the left-most item is a bitmap.
2714  */
2715 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
2716 {
2717         struct btrfs_free_space_ctl *ctl = fs_root->free_ino_ctl;
2718         struct btrfs_free_space *entry = NULL;
2719         u64 ino = 0;
2720
2721         spin_lock(&ctl->tree_lock);
2722
2723         if (RB_EMPTY_ROOT(&ctl->free_space_offset))
2724                 goto out;
2725
2726         entry = rb_entry(rb_first(&ctl->free_space_offset),
2727                          struct btrfs_free_space, offset_index);
2728
2729         if (!entry->bitmap) {
2730                 ino = entry->offset;
2731
2732                 unlink_free_space(ctl, entry);
2733                 entry->offset++;
2734                 entry->bytes--;
2735                 if (!entry->bytes)
2736                         kmem_cache_free(btrfs_free_space_cachep, entry);
2737                 else
2738                         link_free_space(ctl, entry);
2739         } else {
2740                 u64 offset = 0;
2741                 u64 count = 1;
2742                 int ret;
2743
2744                 ret = search_bitmap(ctl, entry, &offset, &count);
2745                 BUG_ON(ret);
2746
2747                 ino = offset;
2748                 bitmap_clear_bits(ctl, entry, offset, 1);
2749                 if (entry->bytes == 0)
2750                         free_bitmap(ctl, entry);
2751         }
2752 out:
2753         spin_unlock(&ctl->tree_lock);
2754
2755         return ino;
2756 }
2757
2758 struct inode *lookup_free_ino_inode(struct btrfs_root *root,
2759                                     struct btrfs_path *path)
2760 {
2761         struct inode *inode = NULL;
2762
2763         spin_lock(&root->cache_lock);
2764         if (root->cache_inode)
2765                 inode = igrab(root->cache_inode);
2766         spin_unlock(&root->cache_lock);
2767         if (inode)
2768                 return inode;
2769
2770         inode = __lookup_free_space_inode(root, path, 0);
2771         if (IS_ERR(inode))
2772                 return inode;
2773
2774         spin_lock(&root->cache_lock);
2775         if (!btrfs_fs_closing(root->fs_info))
2776                 root->cache_inode = igrab(inode);
2777         spin_unlock(&root->cache_lock);
2778
2779         return inode;
2780 }
2781
2782 int create_free_ino_inode(struct btrfs_root *root,
2783                           struct btrfs_trans_handle *trans,
2784                           struct btrfs_path *path)
2785 {
2786         return __create_free_space_inode(root, trans, path,
2787                                          BTRFS_FREE_INO_OBJECTID, 0);
2788 }
2789
2790 int load_free_ino_cache(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
2791 {
2792         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2793         struct btrfs_path *path;
2794         struct inode *inode;
2795         int ret = 0;
2796         u64 root_gen = btrfs_root_generation(&root->root_item);
2797
2798         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2799                 return 0;
2800
2801         /*
2802          * If we're unmounting then just return, since this does a search on the
2803          * normal root and not the commit root and we could deadlock.
2804          */
2805         if (btrfs_fs_closing(fs_info))
2806                 return 0;
2807
2808         path = btrfs_alloc_path();
2809         if (!path)
2810                 return 0;
2811
2812         inode = lookup_free_ino_inode(root, path);
2813         if (IS_ERR(inode))
2814                 goto out;
2815
2816         if (root_gen != BTRFS_I(inode)->generation)
2817                 goto out_put;
2818
2819         ret = __load_free_space_cache(root, inode, ctl, path, 0);
2820
2821         if (ret < 0)
2822                 printk(KERN_ERR "btrfs: failed to load free ino cache for "
2823                        "root %llu\n", root->root_key.objectid);
2824 out_put:
2825         iput(inode);
2826 out:
2827         btrfs_free_path(path);
2828         return ret;
2829 }
2830
2831 int btrfs_write_out_ino_cache(struct btrfs_root *root,
2832                               struct btrfs_trans_handle *trans,
2833                               struct btrfs_path *path)
2834 {
2835         struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
2836         struct inode *inode;
2837         int ret;
2838
2839         if (!btrfs_test_opt(root, INODE_MAP_CACHE))
2840                 return 0;
2841
2842         inode = lookup_free_ino_inode(root, path);
2843         if (IS_ERR(inode))
2844                 return 0;
2845
2846         ret = __btrfs_write_out_cache(root, inode, ctl, NULL, trans, path, 0);
2847         if (ret) {
2848                 btrfs_delalloc_release_metadata(inode, inode->i_size);
2849 #ifdef DEBUG
2850                 printk(KERN_ERR "btrfs: failed to write free ino cache "
2851                        "for root %llu\n", root->root_key.objectid);
2852 #endif
2853         }
2854
2855         iput(inode);
2856         return ret;
2857 }