Merge branch 'integration' into for-linus
[pandora-kernel.git] / fs / btrfs / extent_io.c
index ba41da5..5bbdb24 100644 (file)
@@ -10,6 +10,8 @@
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/pagevec.h>
+#include <linux/prefetch.h>
+#include <linux/cleancache.h>
 #include "extent_io.h"
 #include "extent_map.h"
 #include "compat.h"
@@ -101,7 +103,7 @@ void extent_io_exit(void)
 }
 
 void extent_io_tree_init(struct extent_io_tree *tree,
-                         struct address_space *mapping, gfp_t mask)
+                        struct address_space *mapping)
 {
        tree->state = RB_ROOT;
        INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
@@ -279,11 +281,10 @@ static int merge_state(struct extent_io_tree *tree,
                if (other->start == state->end + 1 &&
                    other->state == state->state) {
                        merge_cb(tree, state, other);
-                       other->start = state->start;
-                       state->tree = NULL;
-                       rb_erase(&state->rb_node, &tree->state);
-                       free_extent_state(state);
-                       state = NULL;
+                       state->end = other->end;
+                       other->tree = NULL;
+                       rb_erase(&other->rb_node, &tree->state);
+                       free_extent_state(other);
                }
        }
 
@@ -349,7 +350,6 @@ static int insert_state(struct extent_io_tree *tree,
                       "%llu %llu\n", (unsigned long long)found->start,
                       (unsigned long long)found->end,
                       (unsigned long long)start, (unsigned long long)end);
-               free_extent_state(state);
                return -EEXIST;
        }
        state->tree = tree;
@@ -439,6 +439,15 @@ static int clear_state_bit(struct extent_io_tree *tree,
        return ret;
 }
 
+static struct extent_state *
+alloc_extent_state_atomic(struct extent_state *prealloc)
+{
+       if (!prealloc)
+               prealloc = alloc_extent_state(GFP_ATOMIC);
+
+       return prealloc;
+}
+
 /*
  * clear some bits on a range in the tree.  This may require splitting
  * or inserting elements in the tree, so the gfp mask is used to
@@ -489,7 +498,8 @@ again:
                        cached_state = NULL;
                }
 
-               if (cached && cached->tree && cached->start == start) {
+               if (cached && cached->tree && cached->start <= start &&
+                   cached->end > start) {
                        if (clear)
                                atomic_dec(&cached->refs);
                        state = cached;
@@ -529,8 +539,8 @@ hit_next:
         */
 
        if (state->start < start) {
-               if (!prealloc)
-                       prealloc = alloc_extent_state(GFP_ATOMIC);
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, start);
                BUG_ON(err == -EEXIST);
                prealloc = NULL;
@@ -551,8 +561,8 @@ hit_next:
         * on the first half
         */
        if (state->start <= end && state->end > end) {
-               if (!prealloc)
-                       prealloc = alloc_extent_state(GFP_ATOMIC);
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, end + 1);
                BUG_ON(err == -EEXIST);
                if (wake)
@@ -725,14 +735,14 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
 again:
        if (!prealloc && (mask & __GFP_WAIT)) {
                prealloc = alloc_extent_state(mask);
-               if (!prealloc)
-                       return -ENOMEM;
+               BUG_ON(!prealloc);
        }
 
        spin_lock(&tree->lock);
        if (cached_state && *cached_state) {
                state = *cached_state;
-               if (state->start == start && state->tree) {
+               if (state->start <= start && state->end > start &&
+                   state->tree) {
                        node = &state->rb_node;
                        goto hit_next;
                }
@@ -743,6 +753,8 @@ again:
         */
        node = tree_search(tree, start);
        if (!node) {
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = insert_state(tree, prealloc, start, end, &bits);
                prealloc = NULL;
                BUG_ON(err == -EEXIST);
@@ -777,14 +789,12 @@ hit_next:
                        goto out;
 
                start = last_end + 1;
-               if (start < end && prealloc && !need_resched()) {
-                       next_node = rb_next(node);
-                       if (next_node) {
-                               state = rb_entry(next_node, struct extent_state,
-                                                rb_node);
-                               if (state->start == start)
-                                       goto hit_next;
-                       }
+               next_node = rb_next(&state->rb_node);
+               if (next_node && start < end && prealloc && !need_resched()) {
+                       state = rb_entry(next_node, struct extent_state,
+                                        rb_node);
+                       if (state->start == start)
+                               goto hit_next;
                }
                goto search_again;
        }
@@ -811,6 +821,9 @@ hit_next:
                        err = -EEXIST;
                        goto out;
                }
+
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, start);
                BUG_ON(err == -EEXIST);
                prealloc = NULL;
@@ -841,10 +854,19 @@ hit_next:
                        this_end = end;
                else
                        this_end = last_start - 1;
+
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
+
+               /*
+                * Avoid to free 'prealloc' if it can be merged with
+                * the later extent.
+                */
                err = insert_state(tree, prealloc, start, this_end,
                                   &bits);
                BUG_ON(err == -EEXIST);
                if (err) {
+                       free_extent_state(prealloc);
                        prealloc = NULL;
                        goto out;
                }
@@ -865,6 +887,9 @@ hit_next:
                        err = -EEXIST;
                        goto out;
                }
+
+               prealloc = alloc_extent_state_atomic(prealloc);
+               BUG_ON(!prealloc);
                err = split_state(tree, state, prealloc, end + 1);
                BUG_ON(err == -EEXIST);
 
@@ -941,13 +966,6 @@ int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                              NULL, mask);
 }
 
-static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
-                      gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0,
-                               NULL, mask);
-}
-
 int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
                        struct extent_state **cached_state, gfp_t mask)
 {
@@ -963,11 +981,6 @@ static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
                                cached_state, mask);
 }
 
-int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
-{
-       return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
-}
-
 /*
  * either insert or lock state struct between start and end use mask to tell
  * us if waiting is desired.
@@ -1027,25 +1040,6 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
                                mask);
 }
 
-/*
- * helper function to set pages and extents in the tree dirty
- */
-int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
-{
-       unsigned long index = start >> PAGE_CACHE_SHIFT;
-       unsigned long end_index = end >> PAGE_CACHE_SHIFT;
-       struct page *page;
-
-       while (index <= end_index) {
-               page = find_get_page(tree->mapping, index);
-               BUG_ON(!page);
-               __set_page_dirty_nobuffers(page);
-               page_cache_release(page);
-               index++;
-       }
-       return 0;
-}
-
 /*
  * helper function to set both pages and extents in the tree writeback
  */
@@ -1480,7 +1474,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
                        if (total_bytes >= max_bytes)
                                break;
                        if (!found) {
-                               *start = state->start;
+                               *start = max(cur_start, state->start);
                                found = 1;
                        }
                        last = state->end;
@@ -1568,7 +1562,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
        int bitset = 0;
 
        spin_lock(&tree->lock);
-       if (cached && cached->tree && cached->start == start)
+       if (cached && cached->tree && cached->start <= start &&
+           cached->end > start)
                node = &cached->rb_node;
        else
                node = tree_search(tree, start);
@@ -1819,46 +1814,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        bio_put(bio);
 }
 
-/*
- * IO done from prepare_write is pretty simple, we just unlock
- * the structs in the extent tree when done, and set the uptodate bits
- * as appropriate.
- */
-static void end_bio_extent_preparewrite(struct bio *bio, int err)
-{
-       const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-       struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
-       struct extent_io_tree *tree;
-       u64 start;
-       u64 end;
-
-       do {
-               struct page *page = bvec->bv_page;
-               struct extent_state *cached = NULL;
-               tree = &BTRFS_I(page->mapping->host)->io_tree;
-
-               start = ((u64)page->index << PAGE_CACHE_SHIFT) +
-                       bvec->bv_offset;
-               end = start + bvec->bv_len - 1;
-
-               if (--bvec >= bio->bi_io_vec)
-                       prefetchw(&bvec->bv_page->flags);
-
-               if (uptodate) {
-                       set_extent_uptodate(tree, start, end, &cached,
-                                           GFP_ATOMIC);
-               } else {
-                       ClearPageUptodate(page);
-                       SetPageError(page);
-               }
-
-               unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
-
-       } while (bvec >= bio->bi_io_vec);
-
-       bio_put(bio);
-}
-
 struct bio *
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags)
@@ -2007,7 +1962,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
        struct btrfs_ordered_extent *ordered;
        int ret;
        int nr = 0;
-       size_t page_offset = 0;
+       size_t pg_offset = 0;
        size_t iosize;
        size_t disk_io_size;
        size_t blocksize = inode->i_sb->s_blocksize;
@@ -2015,6 +1970,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
 
        set_page_extent_mapped(page);
 
+       if (!PageUptodate(page)) {
+               if (cleancache_get_page(page) == 0) {
+                       BUG_ON(blocksize != PAGE_SIZE);
+                       goto out;
+               }
+       }
+
        end = page_end;
        while (1) {
                lock_extent(tree, start, end, GFP_NOFS);
@@ -2043,9 +2005,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        char *userpage;
                        struct extent_state *cached = NULL;
 
-                       iosize = PAGE_CACHE_SIZE - page_offset;
+                       iosize = PAGE_CACHE_SIZE - pg_offset;
                        userpage = kmap_atomic(page, KM_USER0);
-                       memset(userpage + page_offset, 0, iosize);
+                       memset(userpage + pg_offset, 0, iosize);
                        flush_dcache_page(page);
                        kunmap_atomic(userpage, KM_USER0);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
@@ -2054,9 +2016,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                                             &cached, GFP_NOFS);
                        break;
                }
-               em = get_extent(inode, page, page_offset, cur,
+               em = get_extent(inode, page, pg_offset, cur,
                                end - cur + 1, 0);
-               if (IS_ERR(em) || !em) {
+               if (IS_ERR_OR_NULL(em)) {
                        SetPageError(page);
                        unlock_extent(tree, cur, end, GFP_NOFS);
                        break;
@@ -2094,7 +2056,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        struct extent_state *cached = NULL;
 
                        userpage = kmap_atomic(page, KM_USER0);
-                       memset(userpage + page_offset, 0, iosize);
+                       memset(userpage + pg_offset, 0, iosize);
                        flush_dcache_page(page);
                        kunmap_atomic(userpage, KM_USER0);
 
@@ -2103,7 +2065,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        unlock_extent_cached(tree, cur, cur + iosize - 1,
                                             &cached, GFP_NOFS);
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        continue;
                }
                /* the get_extent function already copied into the page */
@@ -2112,7 +2074,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        check_page_uptodate(tree, page);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        continue;
                }
                /* we have an inline extent but it didn't get marked up
@@ -2122,7 +2084,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        SetPageError(page);
                        unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
                        cur = cur + iosize;
-                       page_offset += iosize;
+                       pg_offset += iosize;
                        continue;
                }
 
@@ -2135,7 +2097,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
                        pnr -= page->index;
                        ret = submit_extent_page(READ, tree, page,
-                                        sector, disk_io_size, page_offset,
+                                        sector, disk_io_size, pg_offset,
                                         bdev, bio, pnr,
                                         end_bio_extent_readpage, mirror_num,
                                         *bio_flags,
@@ -2146,8 +2108,9 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                if (ret)
                        SetPageError(page);
                cur = cur + iosize;
-               page_offset += iosize;
+               pg_offset += iosize;
        }
+out:
        if (!nr) {
                if (!PageError(page))
                        SetPageUptodate(page);
@@ -2341,7 +2304,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                }
                em = epd->get_extent(inode, page, pg_offset, cur,
                                     end - cur + 1, 1);
-               if (IS_ERR(em) || !em) {
+               if (IS_ERR_OR_NULL(em)) {
                        SetPageError(page);
                        break;
                }
@@ -2468,6 +2431,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
        pgoff_t index;
        pgoff_t end;            /* Inclusive */
        int scanned = 0;
+       int tag;
 
        pagevec_init(&pvec, 0);
        if (wbc->range_cyclic) {
@@ -2478,11 +2442,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
                end = wbc->range_end >> PAGE_CACHE_SHIFT;
                scanned = 1;
        }
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag = PAGECACHE_TAG_TOWRITE;
+       else
+               tag = PAGECACHE_TAG_DIRTY;
 retry:
+       if (wbc->sync_mode == WB_SYNC_ALL)
+               tag_pages_for_writeback(mapping, index, end);
        while (!done && !nr_to_write_done && (index <= end) &&
-              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-                             PAGECACHE_TAG_DIRTY, min(end - index,
-                                 (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+              (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
+                       min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
                unsigned i;
 
                scanned = 1;
@@ -2719,128 +2688,6 @@ int extent_invalidatepage(struct extent_io_tree *tree,
        return 0;
 }
 
-/*
- * simple commit_write call, set_range_dirty is used to mark both
- * the pages and the extent records as dirty
- */
-int extent_commit_write(struct extent_io_tree *tree,
-                       struct inode *inode, struct page *page,
-                       unsigned from, unsigned to)
-{
-       loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-
-       set_page_extent_mapped(page);
-       set_page_dirty(page);
-
-       if (pos > inode->i_size) {
-               i_size_write(inode, pos);
-               mark_inode_dirty(inode);
-       }
-       return 0;
-}
-
-int extent_prepare_write(struct extent_io_tree *tree,
-                        struct inode *inode, struct page *page,
-                        unsigned from, unsigned to, get_extent_t *get_extent)
-{
-       u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
-       u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
-       u64 block_start;
-       u64 orig_block_start;
-       u64 block_end;
-       u64 cur_end;
-       struct extent_map *em;
-       unsigned blocksize = 1 << inode->i_blkbits;
-       size_t page_offset = 0;
-       size_t block_off_start;
-       size_t block_off_end;
-       int err = 0;
-       int iocount = 0;
-       int ret = 0;
-       int isnew;
-
-       set_page_extent_mapped(page);
-
-       block_start = (page_start + from) & ~((u64)blocksize - 1);
-       block_end = (page_start + to - 1) | (blocksize - 1);
-       orig_block_start = block_start;
-
-       lock_extent(tree, page_start, page_end, GFP_NOFS);
-       while (block_start <= block_end) {
-               em = get_extent(inode, page, page_offset, block_start,
-                               block_end - block_start + 1, 1);
-               if (IS_ERR(em) || !em)
-                       goto err;
-
-               cur_end = min(block_end, extent_map_end(em) - 1);
-               block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
-               block_off_end = block_off_start + blocksize;
-               isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
-
-               if (!PageUptodate(page) && isnew &&
-                   (block_off_end > to || block_off_start < from)) {
-                       void *kaddr;
-
-                       kaddr = kmap_atomic(page, KM_USER0);
-                       if (block_off_end > to)
-                               memset(kaddr + to, 0, block_off_end - to);
-                       if (block_off_start < from)
-                               memset(kaddr + block_off_start, 0,
-                                      from - block_off_start);
-                       flush_dcache_page(page);
-                       kunmap_atomic(kaddr, KM_USER0);
-               }
-               if ((em->block_start != EXTENT_MAP_HOLE &&
-                    em->block_start != EXTENT_MAP_INLINE) &&
-                   !isnew && !PageUptodate(page) &&
-                   (block_off_end > to || block_off_start < from) &&
-                   !test_range_bit(tree, block_start, cur_end,
-                                   EXTENT_UPTODATE, 1, NULL)) {
-                       u64 sector;
-                       u64 extent_offset = block_start - em->start;
-                       size_t iosize;
-                       sector = (em->block_start + extent_offset) >> 9;
-                       iosize = (cur_end - block_start + blocksize) &
-                               ~((u64)blocksize - 1);
-                       /*
-                        * we've already got the extent locked, but we
-                        * need to split the state such that our end_bio
-                        * handler can clear the lock.
-                        */
-                       set_extent_bit(tree, block_start,
-                                      block_start + iosize - 1,
-                                      EXTENT_LOCKED, 0, NULL, NULL, GFP_NOFS);
-                       ret = submit_extent_page(READ, tree, page,
-                                        sector, iosize, page_offset, em->bdev,
-                                        NULL, 1,
-                                        end_bio_extent_preparewrite, 0,
-                                        0, 0);
-                       if (ret && !err)
-                               err = ret;
-                       iocount++;
-                       block_start = block_start + iosize;
-               } else {
-                       struct extent_state *cached = NULL;
-
-                       set_extent_uptodate(tree, block_start, cur_end, &cached,
-                                           GFP_NOFS);
-                       unlock_extent_cached(tree, block_start, cur_end,
-                                            &cached, GFP_NOFS);
-                       block_start = cur_end + 1;
-               }
-               page_offset = block_start & (PAGE_CACHE_SIZE - 1);
-               free_extent_map(em);
-       }
-       if (iocount) {
-               wait_extent_bit(tree, orig_block_start,
-                               block_end, EXTENT_LOCKED);
-       }
-       check_page_uptodate(tree, page);
-err:
-       /* FIXME, zero out newly allocated blocks on error */
-       return err;
-}
-
 /*
  * a helper for releasepage, this tests for areas of the page that
  * are locked or under IO and drops the related state bits if it is safe
@@ -2899,7 +2746,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                        len = end - start + 1;
                        write_lock(&map->lock);
                        em = lookup_extent_mapping(map, start, len);
-                       if (!em || IS_ERR(em)) {
+                       if (IS_ERR_OR_NULL(em)) {
                                write_unlock(&map->lock);
                                break;
                        }
@@ -2927,33 +2774,6 @@ int try_release_extent_mapping(struct extent_map_tree *map,
        return try_release_extent_state(map, tree, page, mask);
 }
 
-sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
-               get_extent_t *get_extent)
-{
-       struct inode *inode = mapping->host;
-       struct extent_state *cached_state = NULL;
-       u64 start = iblock << inode->i_blkbits;
-       sector_t sector = 0;
-       size_t blksize = (1 << inode->i_blkbits);
-       struct extent_map *em;
-
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
-                        0, &cached_state, GFP_NOFS);
-       em = get_extent(inode, NULL, 0, start, blksize, 0);
-       unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
-                            start + blksize - 1, &cached_state, GFP_NOFS);
-       if (!em || IS_ERR(em))
-               return 0;
-
-       if (em->block_start > EXTENT_MAP_LAST_BYTE)
-               goto out;
-
-       sector = (em->block_start + start - em->start) >> inode->i_blkbits;
-out:
-       free_extent_map(em);
-       return sector;
-}
-
 /*
  * helper function for fiemap, which doesn't want to see any holes.
  * This maps until we find something past 'last'
@@ -2976,7 +2796,7 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
                        break;
                len = (len + sectorsize - 1) & ~(sectorsize - 1);
                em = get_extent(inode, NULL, 0, offset, len, 0);
-               if (!em || IS_ERR(em))
+               if (IS_ERR_OR_NULL(em))
                        return em;
 
                /* if this isn't a hole return it */
@@ -3030,7 +2850,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
         * because there might be preallocation past i_size
         */
        ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
-                                      path, inode->i_ino, -1, 0);
+                                      path, btrfs_ino(inode), -1, 0);
        if (ret < 0) {
                btrfs_free_path(path);
                return ret;
@@ -3043,7 +2863,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        found_type = btrfs_key_type(&found_key);
 
        /* No extents, but there might be delalloc bits */
-       if (found_key.objectid != inode->i_ino ||
+       if (found_key.objectid != btrfs_ino(inode) ||
            found_type != BTRFS_EXTENT_DATA_KEY) {
                /* have to trust i_size as the end */
                last = (u64)-1;
@@ -3207,8 +3027,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
                return NULL;
        eb->start = start;
        eb->len = len;
-       spin_lock_init(&eb->lock);
-       init_waitqueue_head(&eb->lock_wq);
+       rwlock_init(&eb->lock);
+       atomic_set(&eb->write_locks, 0);
+       atomic_set(&eb->read_locks, 0);
+       atomic_set(&eb->blocking_readers, 0);
+       atomic_set(&eb->blocking_writers, 0);
+       atomic_set(&eb->spinning_readers, 0);
+       atomic_set(&eb->spinning_writers, 0);
+       init_waitqueue_head(&eb->write_lock_wq);
+       init_waitqueue_head(&eb->read_lock_wq);
 
 #if LEAK_DEBUG
        spin_lock_irqsave(&leak_lock, flags);
@@ -3266,8 +3093,7 @@ static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
 
 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                                          u64 start, unsigned long len,
-                                         struct page *page0,
-                                         gfp_t mask)
+                                         struct page *page0)
 {
        unsigned long num_pages = num_extent_pages(start, len);
        unsigned long i;
@@ -3288,7 +3114,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
        }
        rcu_read_unlock();
 
-       eb = __alloc_extent_buffer(tree, start, len, mask);
+       eb = __alloc_extent_buffer(tree, start, len, GFP_NOFS);
        if (!eb)
                return NULL;
 
@@ -3305,7 +3131,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
                i = 0;
        }
        for (; i < num_pages; i++, index++) {
-               p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
+               p = find_or_create_page(mapping, index, GFP_NOFS);
                if (!p) {
                        WARN_ON(1);
                        goto free_eb;
@@ -3377,8 +3203,7 @@ free_eb:
 }
 
 struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
-                                        u64 start, unsigned long len,
-                                         gfp_t mask)
+                                        u64 start, unsigned long len)
 {
        struct extent_buffer *eb;
 
@@ -3439,13 +3264,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
        return 0;
 }
 
-int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
-                                   struct extent_buffer *eb)
-{
-       return wait_on_extent_writeback(tree, eb->start,
-                                       eb->start + eb->len - 1);
-}
-
 int set_extent_buffer_dirty(struct extent_io_tree *tree,
                             struct extent_buffer *eb)
 {
@@ -3460,6 +3278,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
        return was_dirty;
 }
 
+static int __eb_straddles_pages(u64 start, u64 len)
+{
+       if (len < PAGE_CACHE_SIZE)
+               return 1;
+       if (start & (PAGE_CACHE_SIZE - 1))
+               return 1;
+       if ((start + len) & (PAGE_CACHE_SIZE - 1))
+               return 1;
+       return 0;
+}
+
+static int eb_straddles_pages(struct extent_buffer *eb)
+{
+       return __eb_straddles_pages(eb->start, eb->len);
+}
+
 int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
                                struct extent_buffer *eb,
                                struct extent_state **cached_state)
@@ -3471,8 +3305,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
        num_pages = num_extent_pages(eb->start, eb->len);
        clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 
-       clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                             cached_state, GFP_NOFS);
+       if (eb_straddles_pages(eb)) {
+               clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                                     cached_state, GFP_NOFS);
+       }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if (page)
@@ -3490,8 +3326,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
 
        num_pages = num_extent_pages(eb->start, eb->len);
 
-       set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                           NULL, GFP_NOFS);
+       if (eb_straddles_pages(eb)) {
+               set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                                   NULL, GFP_NOFS);
+       }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3514,9 +3352,12 @@ int extent_range_uptodate(struct extent_io_tree *tree,
        int uptodate;
        unsigned long index;
 
-       ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL);
-       if (ret)
-               return 1;
+       if (__eb_straddles_pages(start, end - start + 1)) {
+               ret = test_range_bit(tree, start, end,
+                                    EXTENT_UPTODATE, 1, NULL);
+               if (ret)
+                       return 1;
+       }
        while (start <= end) {
                index = start >> PAGE_CACHE_SHIFT;
                page = find_get_page(tree->mapping, index);
@@ -3544,10 +3385,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 1;
 
-       ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1, cached_state);
-       if (ret)
-               return ret;
+       if (eb_straddles_pages(eb)) {
+               ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                                  EXTENT_UPTODATE, 1, cached_state);
+               if (ret)
+                       return ret;
+       }
 
        num_pages = num_extent_pages(eb->start, eb->len);
        for (i = 0; i < num_pages; i++) {
@@ -3580,9 +3423,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
                return 0;
 
-       if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
-                          EXTENT_UPTODATE, 1, NULL)) {
-               return 0;
+       if (eb_straddles_pages(eb)) {
+               if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
+                                  EXTENT_UPTODATE, 1, NULL)) {
+                       return 0;
+               }
        }
 
        if (start) {
@@ -3686,9 +3531,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
                page = extent_buffer_page(eb, i);
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = page_address(page);
                memcpy(dst, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER1);
 
                dst += cur;
                len -= cur;
@@ -3698,9 +3542,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
 }
 
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                              unsigned long min_len, char **token, char **map,
+                              unsigned long min_len, char **map,
                               unsigned long *map_start,
-                              unsigned long *map_len, int km)
+                              unsigned long *map_len)
 {
        size_t offset = start & (PAGE_CACHE_SIZE - 1);
        char *kaddr;
@@ -3730,42 +3574,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
        }
 
        p = extent_buffer_page(eb, i);
-       kaddr = kmap_atomic(p, km);
-       *token = kaddr;
+       kaddr = page_address(p);
        *map = kaddr + offset;
        *map_len = PAGE_CACHE_SIZE - offset;
        return 0;
 }
 
-int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
-                     unsigned long min_len,
-                     char **token, char **map,
-                     unsigned long *map_start,
-                     unsigned long *map_len, int km)
-{
-       int err;
-       int save = 0;
-       if (eb->map_token) {
-               unmap_extent_buffer(eb, eb->map_token, km);
-               eb->map_token = NULL;
-               save = 1;
-       }
-       err = map_private_extent_buffer(eb, start, min_len, token, map,
-                                      map_start, map_len, km);
-       if (!err && save) {
-               eb->map_token = *token;
-               eb->kaddr = *map;
-               eb->map_start = *map_start;
-               eb->map_len = *map_len;
-       }
-       return err;
-}
-
-void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
-{
-       kunmap_atomic(token, km);
-}
-
 int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
                          unsigned long start,
                          unsigned long len)
@@ -3789,9 +3603,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
 
                cur = min(len, (PAGE_CACHE_SIZE - offset));
 
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                ret = memcmp(ptr, kaddr + offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
                if (ret)
                        break;
 
@@ -3824,9 +3637,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER1);
+               kaddr = page_address(page);
                memcpy(kaddr + offset, src, cur);
-               kunmap_atomic(kaddr, KM_USER1);
 
                src += cur;
                len -= cur;
@@ -3855,9 +3667,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
                WARN_ON(!PageUptodate(page));
 
                cur = min(len, PAGE_CACHE_SIZE - offset);
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                memset(kaddr + offset, c, cur);
-               kunmap_atomic(kaddr, KM_USER0);
 
                len -= cur;
                offset = 0;
@@ -3888,9 +3699,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
 
                cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
 
-               kaddr = kmap_atomic(page, KM_USER0);
+               kaddr = page_address(page);
                read_extent_buffer(src, kaddr + offset, src_offset, cur);
-               kunmap_atomic(kaddr, KM_USER0);
 
                src_offset += cur;
                len -= cur;
@@ -3903,20 +3713,17 @@ static void move_pages(struct page *dst_page, struct page *src_page,
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
 {
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *dst_kaddr = page_address(dst_page);
        if (dst_page == src_page) {
                memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
        } else {
-               char *src_kaddr = kmap_atomic(src_page, KM_USER1);
+               char *src_kaddr = page_address(src_page);
                char *p = dst_kaddr + dst_off + len;
                char *s = src_kaddr + src_off + len;
 
                while (len--)
                        *--p = *--s;
-
-               kunmap_atomic(src_kaddr, KM_USER1);
        }
-       kunmap_atomic(dst_kaddr, KM_USER0);
 }
 
 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -3929,20 +3736,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
                       unsigned long dst_off, unsigned long src_off,
                       unsigned long len)
 {
-       char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
+       char *dst_kaddr = page_address(dst_page);
        char *src_kaddr;
 
        if (dst_page != src_page) {
-               src_kaddr = kmap_atomic(src_page, KM_USER1);
+               src_kaddr = page_address(src_page);
        } else {
                src_kaddr = dst_kaddr;
                BUG_ON(areas_overlap(src_off, dst_off, len));
        }
 
        memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-       kunmap_atomic(dst_kaddr, KM_USER0);
-       if (dst_page != src_page)
-               kunmap_atomic(src_kaddr, KM_USER1);
 }
 
 void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,