Btrfs: Keep extent mappings in ram until pending ordered extents are done
authorChris Mason <chris.mason@oracle.com>
Fri, 18 Jul 2008 16:01:11 +0000 (12:01 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:05 +0000 (11:04 -0400)
It was possible for stale mappings from disk to be used instead of the
new pending ordered extent.  This adds a flag to the extent map struct
to keep it pinned until the pending ordered extent is actually on disk.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/extent_io.c
fs/btrfs/extent_map.c
fs/btrfs/extent_map.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/inode.c

index 3153b4f..d4a63ae 100644 (file)
@@ -2000,7 +2000,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        struct block_device *bdev;
        int ret;
        int nr = 0;
-       size_t page_offset = 0;
+       size_t pg_offset = 0;
        size_t blocksize;
        loff_t i_size = i_size_read(inode);
        unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -2008,9 +2008,9 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        u64 delalloc_end;
 
        WARN_ON(!PageLocked(page));
-       page_offset = i_size & (PAGE_CACHE_SIZE - 1);
+       pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
        if (page->index > end_index ||
-          (page->index == end_index && !page_offset)) {
+          (page->index == end_index && !pg_offset)) {
                page->mapping->a_ops->invalidatepage(page, 0);
                unlock_page(page);
                return 0;
@@ -2020,12 +2020,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                char *userpage;
 
                userpage = kmap_atomic(page, KM_USER0);
-               memset(userpage + page_offset, 0,
-                      PAGE_CACHE_SIZE - page_offset);
+               memset(userpage + pg_offset, 0,
+                      PAGE_CACHE_SIZE - pg_offset);
                kunmap_atomic(userpage, KM_USER0);
                flush_dcache_page(page);
        }
-       page_offset = 0;
+       pg_offset = 0;
 
        set_page_extent_mapped(page);
 
@@ -2088,7 +2088,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        unlock_start = page_end + 1;
                        break;
                }
-               em = epd->get_extent(inode, page, page_offset, cur,
+               em = epd->get_extent(inode, page, pg_offset, cur,
                                     end - cur + 1, 1);
                if (IS_ERR(em) || !em) {
                        SetPageError(page);
@@ -2113,12 +2113,13 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
                        unlock_extent(tree, unlock_start, cur + iosize -1,
                                      GFP_NOFS);
+
                        if (tree->ops && tree->ops->writepage_end_io_hook)
                                tree->ops->writepage_end_io_hook(page, cur,
                                                         cur + iosize - 1,
                                                         NULL, 1);
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        unlock_start = cur;
                        continue;
                }
@@ -2127,7 +2128,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
                                   EXTENT_DIRTY, 0)) {
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        continue;
                }
                clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
@@ -2141,6 +2142,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        SetPageError(page);
                } else {
                        unsigned long max_nr = end_index + 1;
+
                        set_range_writeback(tree, cur, cur + iosize - 1);
                        if (!PageWriteback(page)) {
                                printk("warning page %lu not writeback, "
@@ -2150,14 +2152,14 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        }
 
                        ret = submit_extent_page(WRITE, tree, page, sector,
-                                                iosize, page_offset, bdev,
+                                                iosize, pg_offset, bdev,
                                                 &epd->bio, max_nr,
                                                 end_bio_extent_writepage, 0);
                        if (ret)
                                SetPageError(page);
                }
                cur = cur + iosize;
-               page_offset += iosize;
+               pg_offset += iosize;
                nr++;
        }
 done:
@@ -2579,7 +2581,8 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                                spin_unlock(&map->lock);
                                break;
                        }
-                       if (em->start != start) {
+                       if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
+                           em->start != start) {
                                spin_unlock(&map->lock);
                                free_extent_map(em);
                                break;
index 8112327..71b1ac1 100644 (file)
@@ -173,6 +173,9 @@ static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
 
 static int mergable_maps(struct extent_map *prev, struct extent_map *next)
 {
+       if (test_bit(EXTENT_FLAG_PINNED, &prev->flags))
+               return 0;
+
        if (extent_map_end(prev) == next->start &&
            prev->flags == next->flags &&
            prev->bdev == next->bdev &&
@@ -320,6 +323,7 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
 {
        int ret = 0;
 
+       WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
        BUG_ON(spin_trylock(&tree->lock));
        rb_erase(&em->rb_node, &tree->map);
        em->in_tree = 0;
index 5631421..a3978ec 100644 (file)
@@ -8,6 +8,9 @@
 #define EXTENT_MAP_INLINE (u64)-2
 #define EXTENT_MAP_DELALLOC (u64)-1
 
+/* bits for the flags field */
+#define EXTENT_FLAG_PINNED 0 /* this entry not yet on disk, don't free it */
+
 struct extent_map {
        struct rb_node rb_node;
 
index e02f1e5..d9c69e1 100644 (file)
@@ -192,7 +192,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
                                 (char *)&sector_sum->sum);
                sector_sum->offset = page_offset(bvec->bv_page) +
                        bvec->bv_offset;
-
                sector_sum++;
                bio_index++;
                total_bytes += bvec->bv_len;
@@ -201,9 +200,6 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
        }
        btrfs_add_ordered_sum(inode, ordered, sums);
        btrfs_put_ordered_extent(ordered);
-       if (total_bytes != bio->bi_size) {
-printk("warning, total bytes %lu bio size %u\n", total_bytes, bio->bi_size);
-       }
        return 0;
 }
 
@@ -372,6 +368,7 @@ next_sector:
                write_extent_buffer(leaf, &sector_sum->sum,
                                    (unsigned long)item, BTRFS_CRC32_SIZE);
        }
+
        total_bytes += root->sectorsize;
        sector_sum++;
        if (total_bytes < sums->len) {
index 40ad1b2..eccdb95 100644 (file)
@@ -358,9 +358,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
        struct extent_map *split = NULL;
        struct extent_map *split2 = NULL;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
-       struct extent_map *tmp;
        u64 len = end - start + 1;
-       u64 next_start;
        int ret;
        int testend = 1;
 
@@ -381,8 +379,16 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
                        spin_unlock(&em_tree->lock);
                        break;
                }
-               tmp = rb_entry(&em->rb_node, struct extent_map, rb_node);
-               next_start = tmp->start;
+               if (test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
+                       start = em->start + em->len;
+                       free_extent_map(em);
+                       spin_unlock(&em_tree->lock);
+                       if (start < end) {
+                               len = end - start + 1;
+                               continue;
+                       }
+                       break;
+               }
                remove_extent_mapping(em_tree, em);
 
                if (em->block_start < EXTENT_MAP_LAST_BYTE &&
index 8803abc..08dbe73 100644 (file)
@@ -144,6 +144,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
                em->len = ins.offset;
                em->block_start = ins.objectid;
                em->bdev = root->fs_info->fs_devices->latest_bdev;
+               set_bit(EXTENT_FLAG_PINNED, &em->flags);
                while(1) {
                        spin_lock(&em_tree->lock);
                        ret = add_extent_mapping(em_tree, em);
@@ -483,6 +484,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        struct btrfs_trans_handle *trans;
        struct btrfs_ordered_extent *ordered_extent;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+       struct extent_map *em;
        u64 alloc_hint = 0;
        struct list_head list;
        struct btrfs_key ins;
@@ -524,6 +527,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
                                       ordered_extent->len,
                                       ordered_extent->len, 0);
        BUG_ON(ret);
+
+
+       spin_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, ordered_extent->file_offset,
+                              ordered_extent->len);
+       if (em) {
+               clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+               free_extent_map(em);
+       }
+       spin_unlock(&em_tree->lock);
+
        btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
                                ordered_extent->file_offset +
                                ordered_extent->len - 1);
@@ -538,6 +552,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
        btrfs_ordered_update_i_size(inode, ordered_extent);
        btrfs_remove_ordered_extent(inode, ordered_extent);
+
        /* once for us */
        btrfs_put_ordered_extent(ordered_extent);
        /* once for the tree */