Btrfs: fix page leakage
[pandora-kernel.git] / fs / btrfs / extent_io.c
index 4c87847..64dc93f 100644 (file)
@@ -45,6 +45,7 @@ struct extent_page_data {
        struct bio *bio;
        struct extent_io_tree *tree;
        get_extent_t *get_extent;
+       unsigned long bio_flags;
 
        /* tells writepage not to lock the state bits for this range
         * it still does the unlocking
@@ -64,13 +65,13 @@ tree_fs_info(struct extent_io_tree *tree)
 
 int __init extent_io_init(void)
 {
-       extent_state_cache = kmem_cache_create("extent_state",
+       extent_state_cache = kmem_cache_create("btrfs_extent_state",
                        sizeof(struct extent_state), 0,
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_state_cache)
                return -ENOMEM;
 
-       extent_buffer_cache = kmem_cache_create("extent_buffers",
+       extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer",
                        sizeof(struct extent_buffer), 0,
                        SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
        if (!extent_buffer_cache)
@@ -936,6 +937,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
  * @end:       the end offset in bytes (inclusive)
  * @bits:      the bits to set in this range
  * @clear_bits:        the bits to clear in this range
+ * @cached_state:      state that we're going to cache
  * @mask:      the allocation mask
  *
  * This will go through and set bits for the given range.  If any states exist
@@ -945,7 +947,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
  * boundary bits like LOCK.
  */
 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
-                      int bits, int clear_bits, gfp_t mask)
+                      int bits, int clear_bits,
+                      struct extent_state **cached_state, gfp_t mask)
 {
        struct extent_state *state;
        struct extent_state *prealloc = NULL;
@@ -962,6 +965,15 @@ again:
        }
 
        spin_lock(&tree->lock);
+       if (cached_state && *cached_state) {
+               state = *cached_state;
+               if (state->start <= start && state->end > start &&
+                   state->tree) {
+                       node = &state->rb_node;
+                       goto hit_next;
+               }
+       }
+
        /*
         * this search will find all the extents that end after
         * our range starts.
@@ -992,6 +1004,7 @@ hit_next:
         */
        if (state->start == start && state->end <= end) {
                set_state_bits(tree, state, &bits);
+               cache_state(state, cached_state);
                state = clear_state_bit(tree, state, &clear_bits, 0);
                if (last_end == (u64)-1)
                        goto out;
@@ -1032,6 +1045,7 @@ hit_next:
                        goto out;
                if (state->end <= end) {
                        set_state_bits(tree, state, &bits);
+                       cache_state(state, cached_state);
                        state = clear_state_bit(tree, state, &clear_bits, 0);
                        if (last_end == (u64)-1)
                                goto out;
@@ -1070,6 +1084,7 @@ hit_next:
                                   &bits);
                if (err)
                        extent_io_tree_panic(tree, err);
+               cache_state(prealloc, cached_state);
                prealloc = NULL;
                start = this_end + 1;
                goto search_again;
@@ -1092,6 +1107,7 @@ hit_next:
                        extent_io_tree_panic(tree, err);
 
                set_state_bits(tree, prealloc, &bits);
+               cache_state(prealloc, cached_state);
                clear_state_bit(tree, prealloc, &clear_bits, 0);
                prealloc = NULL;
                goto out;
@@ -1144,6 +1160,14 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
                              NULL, cached_state, mask);
 }
 
+int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
+                     struct extent_state **cached_state, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end,
+                             EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
+                             NULL, cached_state, mask);
+}
+
 int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
                       gfp_t mask)
 {
@@ -1288,18 +1312,42 @@ out:
  * If nothing was found, 1 is returned. If found something, return 0.
  */
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-                         u64 *start_ret, u64 *end_ret, int bits)
+                         u64 *start_ret, u64 *end_ret, int bits,
+                         struct extent_state **cached_state)
 {
        struct extent_state *state;
+       struct rb_node *n;
        int ret = 1;
 
        spin_lock(&tree->lock);
+       if (cached_state && *cached_state) {
+               state = *cached_state;
+               if (state->end == start - 1 && state->tree) {
+                       n = rb_next(&state->rb_node);
+                       while (n) {
+                               state = rb_entry(n, struct extent_state,
+                                                rb_node);
+                               if (state->state & bits)
+                                       goto got_it;
+                               n = rb_next(n);
+                       }
+                       free_extent_state(*cached_state);
+                       *cached_state = NULL;
+                       goto out;
+               }
+               free_extent_state(*cached_state);
+               *cached_state = NULL;
+       }
+
        state = find_first_extent_bit_state(tree, start, bits);
+got_it:
        if (state) {
+               cache_state(state, cached_state);
                *start_ret = state->start;
                *end_ret = state->end;
                ret = 0;
        }
+out:
        spin_unlock(&tree->lock);
        return ret;
 }
@@ -2062,7 +2110,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
                }
                read_unlock(&em_tree->lock);
 
-               if (!em || IS_ERR(em)) {
+               if (!em) {
                        kfree(failrec);
                        return -EIO;
                }
@@ -2298,8 +2346,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                struct extent_state *cached = NULL;
                struct extent_state *state;
 
-               pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, "
-                        "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err,
+               pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
+                        "mirror=%ld\n", (u64)bio->bi_sector, err,
                         (long int)bio->bi_bdev);
                tree = &BTRFS_I(page->mapping->host)->io_tree;
 
@@ -2703,12 +2751,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                                         end_bio_extent_readpage, mirror_num,
                                         *bio_flags,
                                         this_bio_flag);
-                       BUG_ON(ret == -ENOMEM);
-                       nr++;
-                       *bio_flags = this_bio_flag;
+                       if (!ret) {
+                               nr++;
+                               *bio_flags = this_bio_flag;
+                       }
                }
-               if (ret)
+               if (ret) {
                        SetPageError(page);
+                       unlock_extent(tree, cur, cur + iosize - 1);
+               }
                cur = cur + iosize;
                pg_offset += iosize;
        }
@@ -3155,12 +3206,16 @@ static int write_one_eb(struct extent_buffer *eb,
        struct block_device *bdev = fs_info->fs_devices->latest_bdev;
        u64 offset = eb->start;
        unsigned long i, num_pages;
+       unsigned long bio_flags = 0;
        int rw = (epd->sync_io ? WRITE_SYNC : WRITE);
        int ret = 0;
 
        clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
        num_pages = num_extent_pages(eb->start, eb->len);
        atomic_set(&eb->io_pages, num_pages);
+       if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID)
+               bio_flags = EXTENT_BIO_TREE_LOG;
+
        for (i = 0; i < num_pages; i++) {
                struct page *p = extent_buffer_page(eb, i);
 
@@ -3169,7 +3224,8 @@ static int write_one_eb(struct extent_buffer *eb,
                ret = submit_extent_page(rw, eb->tree, p, offset >> 9,
                                         PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
                                         -1, end_bio_extent_buffer_writepage,
-                                        0, 0, 0);
+                                        0, epd->bio_flags, bio_flags);
+               epd->bio_flags = bio_flags;
                if (ret) {
                        set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
                        SetPageError(p);
@@ -3204,6 +3260,7 @@ int btree_write_cache_pages(struct address_space *mapping,
                .tree = tree,
                .extent_locked = 0,
                .sync_io = wbc->sync_mode == WB_SYNC_ALL,
+               .bio_flags = 0,
        };
        int ret = 0;
        int done = 0;
@@ -3248,19 +3305,34 @@ retry:
                                break;
                        }
 
+                       spin_lock(&mapping->private_lock);
+                       if (!PagePrivate(page)) {
+                               spin_unlock(&mapping->private_lock);
+                               continue;
+                       }
+
                        eb = (struct extent_buffer *)page->private;
+
+                       /*
+                        * Shouldn't happen and normally this would be a BUG_ON
+                        * but no sense in crashing the users box for something
+                        * we can survive anyway.
+                        */
                        if (!eb) {
+                               spin_unlock(&mapping->private_lock);
                                WARN_ON(1);
                                continue;
                        }
 
-                       if (eb == prev_eb)
+                       if (eb == prev_eb) {
+                               spin_unlock(&mapping->private_lock);
                                continue;
+                       }
 
-                       if (!atomic_inc_not_zero(&eb->refs)) {
-                               WARN_ON(1);
+                       ret = atomic_inc_not_zero(&eb->refs);
+                       spin_unlock(&mapping->private_lock);
+                       if (!ret)
                                continue;
-                       }
 
                        prev_eb = eb;
                        ret = lock_extent_buffer_for_io(eb, fs_info, &epd);
@@ -3451,7 +3523,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd)
                if (epd->sync_io)
                        rw = WRITE_SYNC;
 
-               ret = submit_one_bio(rw, epd->bio, 0, 0);
+               ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags);
                BUG_ON(ret < 0); /* -ENOMEM */
                epd->bio = NULL;
        }
@@ -3474,6 +3546,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
                .get_extent = get_extent,
                .extent_locked = 0,
                .sync_io = wbc->sync_mode == WB_SYNC_ALL,
+               .bio_flags = 0,
        };
 
        ret = __extent_writepage(page, wbc, &epd);
@@ -3498,6 +3571,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
                .get_extent = get_extent,
                .extent_locked = 1,
                .sync_io = mode == WB_SYNC_ALL,
+               .bio_flags = 0,
        };
        struct writeback_control wbc_writepages = {
                .sync_mode      = mode,
@@ -3537,6 +3611,7 @@ int extent_writepages(struct extent_io_tree *tree,
                .get_extent = get_extent,
                .extent_locked = 0,
                .sync_io = wbc->sync_mode == WB_SYNC_ALL,
+               .bio_flags = 0,
        };
 
        ret = extent_write_cache_pages(tree, mapping, wbc,
@@ -3914,18 +3989,6 @@ out:
        return ret;
 }
 
-inline struct page *extent_buffer_page(struct extent_buffer *eb,
-                                             unsigned long i)
-{
-       return eb->pages[i];
-}
-
-inline unsigned long num_extent_pages(u64 start, u64 len)
-{
-       return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
-               (start >> PAGE_CACHE_SHIFT);
-}
-
 static void __free_extent_buffer(struct extent_buffer *eb)
 {
 #if LEAK_DEBUG
@@ -4041,7 +4104,7 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
 
        return eb;
 err:
-       for (i--; i > 0; i--)
+       for (i--; i >= 0; i--)
                __free_page(eb->pages[i]);
        __free_extent_buffer(eb);
        return NULL;
@@ -4186,10 +4249,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 
        for (i = 0; i < num_pages; i++, index++) {
                p = find_or_create_page(mapping, index, GFP_NOFS);
-               if (!p) {
-                       WARN_ON(1);
+               if (!p)
                        goto free_eb;
-               }
 
                spin_lock(&mapping->private_lock);
                if (PagePrivate(p)) {
@@ -4332,7 +4393,6 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
 
                /* Should be safe to release our pages at this point */
                btrfs_release_extent_buffer_page(eb, 0);
-
                call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
                return 1;
        }