Merge branch 'merge' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

author Linus Torvalds <torvalds@linux-foundation.org>

Mon, 18 Apr 2011 19:24:24 +0000 (12:24 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Mon, 18 Apr 2011 19:24:24 +0000 (12:24 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Mon, 18 Apr 2011 19:24:24 +0000 (12:24 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Mon, 18 Apr 2011 19:24:24 +0000 (12:24 -0700)
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c

index de34bfa..5d505aa 100644 (file)
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -178,16 +178,17 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
  
         if (value) {
                 acl = posix_acl_from_xattr(value, size);
-               if (acl == NULL) {
-                       value = NULL;
-                       size = 0;
+               if (acl) {
+                       ret = posix_acl_valid(acl);
+                       if (ret)
+                               goto out;
                 } else if (IS_ERR(acl)) {
                         return PTR_ERR(acl);
                 }
         }
  
         ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
-
+out:
         posix_acl_release(acl);
  
         return ret;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index 3458b57..2e61fe1 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -740,8 +740,10 @@ struct btrfs_space_info {
          */
         unsigned long reservation_progress;
  
-       int full;               /* indicates that we cannot allocate any more
+       int full:1;             /* indicates that we cannot allocate any more
                                    chunks for this space */
+       int chunk_alloc:1;      /* set if we are allocating a chunk */
+
         int force_alloc;        /* set if we need to force a chunk alloc for
                                    this space */
  
@@ -2576,6 +2578,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
  int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
                               struct inode *inode, u64 start, u64 end);
  int btrfs_release_file(struct inode *inode, struct file *file);
+void btrfs_drop_pages(struct page **pages, size_t num_pages);
+int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
+                     struct page **pages, size_t num_pages,
+                     loff_t pos, size_t write_bytes,
+                     struct extent_state **cached);
  
  /* tree-defrag.c */
  int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c

index 8f1d44b..68c84c8 100644 (file)
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3057,7 +3057,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
                 btrfs_destroy_pinned_extent(root,
                                             root->fs_info->pinned_extents);
  
-               t->use_count = 0;
+               atomic_set(&t->use_count, 0);
                 list_del_init(&t->list);
                 memset(t, 0, sizeof(*t));
                 kmem_cache_free(btrfs_transaction_cachep, t);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index f619c3c..31f33ba 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -33,6 +33,25 @@
  #include "locking.h"
  #include "free-space-cache.h"
  
+/* control flags for do_chunk_alloc's force field
+ * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
+ * if we really need one.
+ *
+ * CHUNK_ALLOC_FORCE means it must try to allocate one
+ *
+ * CHUNK_ALLOC_LIMITED means to only try and allocate one
+ * if we have very few chunks already allocated.  This is
+ * used as part of the clustering code to help make sure
+ * we have a good pool of storage to cluster in, without
+ * filling the FS with empty chunks
+ *
+ */
+enum {
+       CHUNK_ALLOC_NO_FORCE = 0,
+       CHUNK_ALLOC_FORCE = 1,
+       CHUNK_ALLOC_LIMITED = 2,
+};
+
  static int update_block_group(struct btrfs_trans_handle *trans,
                               struct btrfs_root *root,
                               u64 bytenr, u64 num_bytes, int alloc);
@@ -3019,7 +3038,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
         found->bytes_readonly = 0;
         found->bytes_may_use = 0;
         found->full = 0;
-       found->force_alloc = 0;
+       found->force_alloc = CHUNK_ALLOC_NO_FORCE;
+       found->chunk_alloc = 0;
         *space_info = found;
         list_add_rcu(&found->list, &info->space_info);
         atomic_set(&found->caching_threads, 0);
@@ -3150,7 +3170,7 @@ again:
                 if (!data_sinfo->full && alloc_chunk) {
                         u64 alloc_target;
  
-                       data_sinfo->force_alloc = 1;
+                       data_sinfo->force_alloc = CHUNK_ALLOC_FORCE;
                         spin_unlock(&data_sinfo->lock);
  alloc:
                         alloc_target = btrfs_get_alloc_profile(root, 1);
@@ -3160,7 +3180,8 @@ alloc:
  
                         ret = do_chunk_alloc(trans, root->fs_info->extent_root,
                                              bytes + 2 * 1024 * 1024,
-                                            alloc_target, 0);
+                                            alloc_target,
+                                            CHUNK_ALLOC_NO_FORCE);
                         btrfs_end_transaction(trans, root);
                         if (ret < 0) {
                                 if (ret != -ENOSPC)
@@ -3239,31 +3260,56 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
         rcu_read_lock();
         list_for_each_entry_rcu(found, head, list) {
                 if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
-                       found->force_alloc = 1;
+                       found->force_alloc = CHUNK_ALLOC_FORCE;
         }
         rcu_read_unlock();
  }
  
  static int should_alloc_chunk(struct btrfs_root *root,
-                             struct btrfs_space_info *sinfo, u64 alloc_bytes)
+                             struct btrfs_space_info *sinfo, u64 alloc_bytes,
+                             int force)
  {
         u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+       u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved;
         u64 thresh;
  
-       if (sinfo->bytes_used + sinfo->bytes_reserved +
-           alloc_bytes + 256 * 1024 * 1024 < num_bytes)
+       if (force == CHUNK_ALLOC_FORCE)
+               return 1;
+
+       /*
+        * in limited mode, we want to have some free space up to
+        * about 1% of the FS size.
+        */
+       if (force == CHUNK_ALLOC_LIMITED) {
+               thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+               thresh = max_t(u64, 64 * 1024 * 1024,
+                              div_factor_fine(thresh, 1));
+
+               if (num_bytes - num_allocated < thresh)
+                       return 1;
+       }
+
+       /*
+        * we have two similar checks here, one based on percentage
+        * and once based on a hard number of 256MB.  The idea
+        * is that if we have a good amount of free
+        * room, don't allocate a chunk.  A good mount is
+        * less than 80% utilized of the chunks we have allocated,
+        * or more than 256MB free
+        */
+       if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes)
                 return 0;
  
-       if (sinfo->bytes_used + sinfo->bytes_reserved +
-           alloc_bytes < div_factor(num_bytes, 8))
+       if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
                 return 0;
  
         thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+
+       /* 256MB or 5% of the FS */
         thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
  
         if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
                 return 0;
-
         return 1;
  }
  
@@ -3273,10 +3319,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
  {
         struct btrfs_space_info *space_info;
         struct btrfs_fs_info *fs_info = extent_root->fs_info;
+       int wait_for_alloc = 0;
         int ret = 0;
  
-       mutex_lock(&fs_info->chunk_mutex);
-
         flags = btrfs_reduce_alloc_profile(extent_root, flags);
  
         space_info = __find_space_info(extent_root->fs_info, flags);
@@ -3287,21 +3332,40 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
         }
         BUG_ON(!space_info);
  
+again:
         spin_lock(&space_info->lock);
         if (space_info->force_alloc)
-               force = 1;
+               force = space_info->force_alloc;
         if (space_info->full) {
                 spin_unlock(&space_info->lock);
-               goto out;
+               return 0;
         }
  
-       if (!force && !should_alloc_chunk(extent_root, space_info,
-                                         alloc_bytes)) {
+       if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) {
                 spin_unlock(&space_info->lock);
-               goto out;
+               return 0;
+       } else if (space_info->chunk_alloc) {
+               wait_for_alloc = 1;
+       } else {
+               space_info->chunk_alloc = 1;
         }
+
         spin_unlock(&space_info->lock);
  
+       mutex_lock(&fs_info->chunk_mutex);
+
+       /*
+        * The chunk_mutex is held throughout the entirety of a chunk
+        * allocation, so once we've acquired the chunk_mutex we know that the
+        * other guy is done and we need to recheck and see if we should
+        * allocate.
+        */
+       if (wait_for_alloc) {
+               mutex_unlock(&fs_info->chunk_mutex);
+               wait_for_alloc = 0;
+               goto again;
+       }
+
         /*
          * If we have mixed data/metadata chunks we want to make sure we keep
          * allocating mixed chunks instead of individual chunks.
@@ -3327,9 +3391,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                 space_info->full = 1;
         else
                 ret = 1;
-       space_info->force_alloc = 0;
+
+       space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
+       space_info->chunk_alloc = 0;
         spin_unlock(&space_info->lock);
-out:
         mutex_unlock(&extent_root->fs_info->chunk_mutex);
         return ret;
  }
@@ -5303,11 +5368,13 @@ loop:
  
                 if (allowed_chunk_alloc) {
                         ret = do_chunk_alloc(trans, root, num_bytes +
-                                            2 * 1024 * 1024, data, 1);
+                                            2 * 1024 * 1024, data,
+                                            CHUNK_ALLOC_LIMITED);
                         allowed_chunk_alloc = 0;
                         done_chunk_alloc = 1;
-               } else if (!done_chunk_alloc) {
-                       space_info->force_alloc = 1;
+               } else if (!done_chunk_alloc &&
+                          space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) {
+                       space_info->force_alloc = CHUNK_ALLOC_LIMITED;
                 }
  
                 if (loop < LOOP_NO_EMPTY_SIZE) {
@@ -5393,7 +5460,8 @@ again:
          */
         if (empty_size || root->ref_cows)
                 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-                                    num_bytes + 2 * 1024 * 1024, data, 0);
+                                    num_bytes + 2 * 1024 * 1024, data,
+                                    CHUNK_ALLOC_NO_FORCE);
  
         WARN_ON(num_bytes < root->sectorsize);
         ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -5405,7 +5473,7 @@ again:
                 num_bytes = num_bytes & ~(root->sectorsize - 1);
                 num_bytes = max(num_bytes, min_alloc_size);
                 do_chunk_alloc(trans, root->fs_info->extent_root,
-                              num_bytes, data, 1);
+                              num_bytes, data, CHUNK_ALLOC_FORCE);
                 goto again;
         }
         if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
@@ -8109,13 +8177,15 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
  
         alloc_flags = update_block_group_flags(root, cache->flags);
         if (alloc_flags != cache->flags)
-               do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+               do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                              CHUNK_ALLOC_FORCE);
  
         ret = set_block_group_ro(cache);
         if (!ret)
                 goto out;
         alloc_flags = get_alloc_profile(root, cache->space_info->flags);
-       ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+       ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                            CHUNK_ALLOC_FORCE);
         if (ret < 0)
                 goto out;
         ret = set_block_group_ro(cache);
@@ -8128,7 +8198,8 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 type)
  {
         u64 alloc_flags = get_alloc_profile(root, type);
-       return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+       return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags,
+                             CHUNK_ALLOC_FORCE);
  }
  
  /*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c

index 20ddb28..3151386 100644 (file)
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -690,6 +690,15 @@ static void cache_state(struct extent_state *state,
         }
  }
  
+static void uncache_state(struct extent_state **cached_ptr)
+{
+       if (cached_ptr && (*cached_ptr)) {
+               struct extent_state *state = *cached_ptr;
+               *cached_ptr = NULL;
+               free_extent_state(state);
+       }
+}
+
  /*
   * set some bits on a range in the tree.  This may require allocations or
   * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -940,10 +949,10 @@ static int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
  }
  
  int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                       gfp_t mask)
+                       struct extent_state **cached_state, gfp_t mask)
  {
-       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
-                             NULL, mask);
+       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0,
+                             NULL, cached_state, mask);
  }
  
  static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
@@ -1012,8 +1021,7 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
                                 mask);
  }
  
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
-                 gfp_t mask)
+int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
  {
         return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
                                 mask);
@@ -1735,6 +1743,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
  
         do {
                 struct page *page = bvec->bv_page;
+               struct extent_state *cached = NULL;
+               struct extent_state *state;
+
                 tree = &BTRFS_I(page->mapping->host)->io_tree;
  
                 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1749,9 +1760,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                 if (++bvec <= bvec_end)
                         prefetchw(&bvec->bv_page->flags);
  
+               spin_lock(&tree->lock);
+               state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
+               if (state && state->start == start) {
+                       /*
+                        * take a reference on the state, unlock will drop
+                        * the ref
+                        */
+                       cache_state(state, &cached);
+               }
+               spin_unlock(&tree->lock);
+
                 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                         ret = tree->ops->readpage_end_io_hook(page, start, end,
-                                                             NULL);
+                                                             state);
                         if (ret)
                                 uptodate = 0;
                 }
@@ -1764,15 +1786,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                                         test_bit(BIO_UPTODATE, &bio->bi_flags);
                                 if (err)
                                         uptodate = 0;
+                               uncache_state(&cached);
                                 continue;
                         }
                 }
  
                 if (uptodate) {
-                       set_extent_uptodate(tree, start, end,
+                       set_extent_uptodate(tree, start, end, &cached,
                                             GFP_ATOMIC);
                 }
-               unlock_extent(tree, start, end, GFP_ATOMIC);
+               unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
  
                 if (whole_page) {
                         if (uptodate) {
@@ -1811,6 +1834,7 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
  
         do {
                 struct page *page = bvec->bv_page;
+               struct extent_state *cached = NULL;
                 tree = &BTRFS_I(page->mapping->host)->io_tree;
  
                 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
@@ -1821,13 +1845,14 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
                         prefetchw(&bvec->bv_page->flags);
  
                 if (uptodate) {
-                       set_extent_uptodate(tree, start, end, GFP_ATOMIC);
+                       set_extent_uptodate(tree, start, end, &cached,
+                                           GFP_ATOMIC);
                 } else {
                         ClearPageUptodate(page);
                         SetPageError(page);
                 }
  
-               unlock_extent(tree, start, end, GFP_ATOMIC);
+               unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
  
         } while (bvec >= bio->bi_io_vec);
  
@@ -2016,14 +2041,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
         while (cur <= end) {
                 if (cur >= last_byte) {
                         char *userpage;
+                       struct extent_state *cached = NULL;
+
                         iosize = PAGE_CACHE_SIZE - page_offset;
                         userpage = kmap_atomic(page, KM_USER0);
                         memset(userpage + page_offset, 0, iosize);
                         flush_dcache_page(page);
                         kunmap_atomic(userpage, KM_USER0);
                         set_extent_uptodate(tree, cur, cur + iosize - 1,
-                                           GFP_NOFS);
-                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                                           &cached, GFP_NOFS);
+                       unlock_extent_cached(tree, cur, cur + iosize - 1,
+                                            &cached, GFP_NOFS);
                         break;
                 }
                 em = get_extent(inode, page, page_offset, cur,
@@ -2063,14 +2091,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                 /* we've found a hole, just zero and go on */
                 if (block_start == EXTENT_MAP_HOLE) {
                         char *userpage;
+                       struct extent_state *cached = NULL;
+
                         userpage = kmap_atomic(page, KM_USER0);
                         memset(userpage + page_offset, 0, iosize);
                         flush_dcache_page(page);
                         kunmap_atomic(userpage, KM_USER0);
  
                         set_extent_uptodate(tree, cur, cur + iosize - 1,
-                                           GFP_NOFS);
-                       unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
+                                           &cached, GFP_NOFS);
+                       unlock_extent_cached(tree, cur, cur + iosize - 1,
+                                            &cached, GFP_NOFS);
                         cur = cur + iosize;
                         page_offset += iosize;
                         continue;
@@ -2789,9 +2820,12 @@ int extent_prepare_write(struct extent_io_tree *tree,
                         iocount++;
                         block_start = block_start + iosize;
                 } else {
-                       set_extent_uptodate(tree, block_start, cur_end,
+                       struct extent_state *cached = NULL;
+
+                       set_extent_uptodate(tree, block_start, cur_end, &cached,
                                             GFP_NOFS);
-                       unlock_extent(tree, block_start, cur_end, GFP_NOFS);
+                       unlock_extent_cached(tree, block_start, cur_end,
+                                            &cached, GFP_NOFS);
                         block_start = cur_end + 1;
                 }
                 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
@@ -3457,7 +3491,7 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree,
         num_pages = num_extent_pages(eb->start, eb->len);
  
         set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                           GFP_NOFS);
+                           NULL, GFP_NOFS);
         for (i = 0; i < num_pages; i++) {
                 page = extent_buffer_page(eb, i);
                 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
@@ -3885,6 +3919,12 @@ static void move_pages(struct page *dst_page, struct page *src_page,
         kunmap_atomic(dst_kaddr, KM_USER0);
  }
  
+static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
+{
+       unsigned long distance = (src > dst) ? src - dst : dst - src;
+       return distance < len;
+}
+
  static void copy_pages(struct page *dst_page, struct page *src_page,
                        unsigned long dst_off, unsigned long src_off,
                        unsigned long len)
@@ -3892,10 +3932,12 @@ static void copy_pages(struct page *dst_page, struct page *src_page,
         char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
         char *src_kaddr;
  
-       if (dst_page != src_page)
+       if (dst_page != src_page) {
                 src_kaddr = kmap_atomic(src_page, KM_USER1);
-       else
+       } else {
                 src_kaddr = dst_kaddr;
+               BUG_ON(areas_overlap(src_off, dst_off, len));
+       }
  
         memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
         kunmap_atomic(dst_kaddr, KM_USER0);
@@ -3970,7 +4012,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                        "len %lu len %lu\n", dst_offset, len, dst->len);
                 BUG_ON(1);
         }
-       if (dst_offset < src_offset) {
+       if (!areas_overlap(src_offset, dst_offset, len)) {
                 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
                 return;
         }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h

index f62c544..af2d717 100644 (file)
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -208,7 +208,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                    int bits, int exclusive_bits, u64 *failed_start,
                    struct extent_state **cached_state, gfp_t mask);
  int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                       gfp_t mask);
+                       struct extent_state **cached_state, gfp_t mask);
  int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
                    gfp_t mask);
  int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index e621ea5..75899a0 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
  /*
   * unlocks pages after btrfs_file_write is done with them
   */
-static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
+void btrfs_drop_pages(struct page **pages, size_t num_pages)
  {
         size_t i;
         for (i = 0; i < num_pages; i++) {
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages)
   * this also makes the decision about creating an inline extent vs
   * doing real data extents, marking pages dirty and delalloc as required.
   */
-static noinline int dirty_and_release_pages(struct btrfs_root *root,
-                                           struct file *file,
-                                           struct page **pages,
-                                           size_t num_pages,
-                                           loff_t pos,
-                                           size_t write_bytes)
+int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
+                     struct page **pages, size_t num_pages,
+                     loff_t pos, size_t write_bytes,
+                     struct extent_state **cached)
  {
         int err = 0;
         int i;
-       struct inode *inode = fdentry(file)->d_inode;
         u64 num_bytes;
         u64 start_pos;
         u64 end_of_last_block;
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root,
  
         end_of_last_block = start_pos + num_bytes - 1;
         err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
-                                       NULL);
+                                       cached);
         if (err)
                 return err;
  
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
                 }
  
                 if (copied > 0) {
-                       ret = dirty_and_release_pages(root, file, pages,
-                                                     dirty_pages, pos,
-                                                     copied);
+                       ret = btrfs_dirty_pages(root, inode, pages,
+                                               dirty_pages, pos, copied,
+                                               NULL);
                         if (ret) {
                                 btrfs_delalloc_release_space(inode,
                                         dirty_pages << PAGE_CACHE_SHIFT);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c

index f561c95..11d2e9c 100644 (file)
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
         struct inode *inode;
         struct rb_node *node;
         struct list_head *pos, *n;
+       struct page **pages;
         struct page *page;
         struct extent_state *cached_state = NULL;
         struct btrfs_free_cluster *cluster = NULL;
@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root,
         u64 start, end, len;
         u64 bytes = 0;
         u32 *crc, *checksums;
-       pgoff_t index = 0, last_index = 0;
         unsigned long first_page_offset;
-       int num_checksums;
+       int index = 0, num_pages = 0;
         int entries = 0;
         int bitmaps = 0;
         int ret = 0;
         bool next_page = false;
+       bool out_of_space = false;
  
         root = root->fs_info->tree_root;
  
@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                 return 0;
         }
  
-       last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+       num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+               PAGE_CACHE_SHIFT;
         filemap_write_and_wait(inode->i_mapping);
         btrfs_wait_ordered_range(inode, inode->i_size &
                                  ~(root->sectorsize - 1), (u64)-1);
  
         /* We need a checksum per page. */
-       num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
-       crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+       crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
         if (!crc) {
                 iput(inode);
                 return 0;
         }
  
+       pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
+       if (!pages) {
+               kfree(crc);
+               iput(inode);
+               return 0;
+       }
+
         /* Since the first page has all of our checksums and our generation we
          * need to calculate the offset into the page that we can start writing
          * our entries.
          */
-       first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+       first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
  
         /* Get the cluster for this block_group if it exists */
         if (!list_empty(&block_group->cluster_list))
@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
          * after find_get_page at this point.  Just putting this here so people
          * know and don't freak out.
          */
-       while (index <= last_index) {
+       while (index < num_pages) {
                 page = grab_cache_page(inode->i_mapping, index);
                 if (!page) {
-                       pgoff_t i = 0;
+                       int i;
  
-                       while (i < index) {
-                               page = find_get_page(inode->i_mapping, i);
-                               unlock_page(page);
-                               page_cache_release(page);
-                               page_cache_release(page);
-                               i++;
+                       for (i = 0; i < num_pages; i++) {
+                               unlock_page(pages[i]);
+                               page_cache_release(pages[i]);
                         }
                         goto out_free;
                 }
+               pages[index] = page;
                 index++;
         }
  
@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                         offset = start_offset;
                 }
  
-               page = find_get_page(inode->i_mapping, index);
+               if (index >= num_pages) {
+                       out_of_space = true;
+                       break;
+               }
+
+               page = pages[index];
  
                 addr = kmap(page);
                 entry = addr + start_offset;
@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
  
                 bytes += PAGE_CACHE_SIZE;
  
-               ClearPageChecked(page);
-               set_page_extent_mapped(page);
-               SetPageUptodate(page);
-               set_page_dirty(page);
-
-               /*
-                * We need to release our reference we got for grab_cache_page,
-                * except for the first page which will hold our checksums, we
-                * do that below.
-                */
-               if (index != 0) {
-                       unlock_page(page);
-                       page_cache_release(page);
-               }
-
-               page_cache_release(page);
-
                 index++;
         } while (node || next_page);
  
@@ -734,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                 struct btrfs_free_space *entry =
                         list_entry(pos, struct btrfs_free_space, list);
  
-               page = find_get_page(inode->i_mapping, index);
+               if (index >= num_pages) {
+                       out_of_space = true;
+                       break;
+               }
+               page = pages[index];
  
                 addr = kmap(page);
                 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
                 crc++;
                 bytes += PAGE_CACHE_SIZE;
  
-               ClearPageChecked(page);
-               set_page_extent_mapped(page);
-               SetPageUptodate(page);
-               set_page_dirty(page);
-               unlock_page(page);
-               page_cache_release(page);
-               page_cache_release(page);
                 list_del_init(&entry->list);
                 index++;
         }
  
+       if (out_of_space) {
+               btrfs_drop_pages(pages, num_pages);
+               unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+                                    i_size_read(inode) - 1, &cached_state,
+                                    GFP_NOFS);
+               ret = 0;
+               goto out_free;
+       }
+
         /* Zero out the rest of the pages just to make sure */
-       while (index <= last_index) {
+       while (index < num_pages) {
                 void *addr;
  
-               page = find_get_page(inode->i_mapping, index);
-
+               page = pages[index];
                 addr = kmap(page);
                 memset(addr, 0, PAGE_CACHE_SIZE);
                 kunmap(page);
-               ClearPageChecked(page);
-               set_page_extent_mapped(page);
-               SetPageUptodate(page);
-               set_page_dirty(page);
-               unlock_page(page);
-               page_cache_release(page);
-               page_cache_release(page);
                 bytes += PAGE_CACHE_SIZE;
                 index++;
         }
  
-       btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
-
         /* Write the checksums and trans id to the first page */
         {
                 void *addr;
                 u64 *gen;
  
-               page = find_get_page(inode->i_mapping, 0);
+               page = pages[0];
  
                 addr = kmap(page);
-               memcpy(addr, checksums, sizeof(u32) * num_checksums);
-               gen = addr + (sizeof(u32) * num_checksums);
+               memcpy(addr, checksums, sizeof(u32) * num_pages);
+               gen = addr + (sizeof(u32) * num_pages);
                 *gen = trans->transid;
                 kunmap(page);
-               ClearPageChecked(page);
-               set_page_extent_mapped(page);
-               SetPageUptodate(page);
-               set_page_dirty(page);
-               unlock_page(page);
-               page_cache_release(page);
-               page_cache_release(page);
         }
-       BTRFS_I(inode)->generation = trans->transid;
  
+       ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
+                                           bytes, &cached_state);
+       btrfs_drop_pages(pages, num_pages);
         unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
                              i_size_read(inode) - 1, &cached_state, GFP_NOFS);
  
+       if (ret) {
+               ret = 0;
+               goto out_free;
+       }
+
+       BTRFS_I(inode)->generation = trans->transid;
+
         filemap_write_and_wait(inode->i_mapping);
  
         key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -853,6 +845,7 @@ out_free:
                 BTRFS_I(inode)->generation = 0;
         }
         kfree(checksums);
+       kfree(pages);
         btrfs_update_inode(trans, root, inode);
         iput(inode);
         return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 5cc64ab..fcd66b6 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1770,9 +1770,12 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
         add_pending_csums(trans, inode, ordered_extent->file_offset,
                           &ordered_extent->list);
  
-       btrfs_ordered_update_i_size(inode, 0, ordered_extent);
-       ret = btrfs_update_inode(trans, root, inode);
-       BUG_ON(ret);
+       ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+       if (!ret) {
+               ret = btrfs_update_inode(trans, root, inode);
+               BUG_ON(ret);
+       }
+       ret = 0;
  out:
         if (nolock) {
                 if (trans)
@@ -2590,6 +2593,13 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                             struct btrfs_inode_item *item,
                             struct inode *inode)
  {
+       if (!leaf->map_token)
+               map_private_extent_buffer(leaf, (unsigned long)item,
+                                         sizeof(struct btrfs_inode_item),
+                                         &leaf->map_token, &leaf->kaddr,
+                                         &leaf->map_start, &leaf->map_len,
+                                         KM_USER1);
+
         btrfs_set_inode_uid(leaf, item, inode->i_uid);
         btrfs_set_inode_gid(leaf, item, inode->i_gid);
         btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2618,6 +2628,11 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
         btrfs_set_inode_block_group(leaf, item, BTRFS_I(inode)->block_group);
+
+       if (leaf->map_token) {
+               unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
+               leaf->map_token = NULL;
+       }
  }
  
  /*
@@ -4207,10 +4222,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
         struct btrfs_key found_key;
         struct btrfs_path *path;
         int ret;
-       u32 nritems;
         struct extent_buffer *leaf;
         int slot;
-       int advance;
         unsigned char d_type;
         int over = 0;
         u32 di_cur;
@@ -4253,27 +4266,19 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
         if (ret < 0)
                 goto err;
-       advance = 0;
  
         while (1) {
                 leaf = path->nodes[0];
-               nritems = btrfs_header_nritems(leaf);
                 slot = path->slots[0];
-               if (advance || slot >= nritems) {
-                       if (slot >= nritems - 1) {
-                               ret = btrfs_next_leaf(root, path);
-                               if (ret)
-                                       break;
-                               leaf = path->nodes[0];
-                               nritems = btrfs_header_nritems(leaf);
-                               slot = path->slots[0];
-                       } else {
-                               slot++;
-                               path->slots[0]++;
-                       }
+               if (slot >= btrfs_header_nritems(leaf)) {
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto err;
+                       else if (ret > 0)
+                               break;
+                       continue;
                 }
  
-               advance = 1;
                 item = btrfs_item_nr(leaf, slot);
                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
  
@@ -4282,7 +4287,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                 if (btrfs_key_type(&found_key) != key_type)
                         break;
                 if (found_key.offset < filp->f_pos)
-                       continue;
+                       goto next;
  
                 filp->f_pos = found_key.offset;
  
@@ -4335,6 +4340,8 @@ skip:
                         di_cur += di_len;
                         di = (struct btrfs_dir_item *)((char *)di + di_len);
                 }
+next:
+               path->slots[0]++;
         }
  
         /* Reached end of directory/root. Bump pos past the last item. */
@@ -4527,14 +4534,17 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
         BUG_ON(!path);
  
         inode = new_inode(root->fs_info->sb);
-       if (!inode)
+       if (!inode) {
+               btrfs_free_path(path);
                 return ERR_PTR(-ENOMEM);
+       }
  
         if (dir) {
                 trace_btrfs_inode_request(dir);
  
                 ret = btrfs_set_inode_index(dir, index);
                 if (ret) {
+                       btrfs_free_path(path);
                         iput(inode);
                         return ERR_PTR(ret);
                 }
@@ -4834,9 +4844,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
         if (inode->i_nlink == ~0U)
                 return -EMLINK;
  
-       btrfs_inc_nlink(inode);
-       inode->i_ctime = CURRENT_TIME;
-
         err = btrfs_set_inode_index(dir, &index);
         if (err)
                 goto fail;
@@ -4852,6 +4859,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
                 goto fail;
         }
  
+       btrfs_inc_nlink(inode);
+       inode->i_ctime = CURRENT_TIME;
+
         btrfs_set_trans_block_group(trans, dir);
         ihold(inode);
  
@@ -5221,7 +5231,7 @@ again:
                         btrfs_mark_buffer_dirty(leaf);
                 }
                 set_extent_uptodate(io_tree, em->start,
-                                   extent_map_end(em) - 1, GFP_NOFS);
+                                   extent_map_end(em) - 1, NULL, GFP_NOFS);
                 goto insert;
         } else {
                 printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
@@ -5428,17 +5438,30 @@ out:
  }
  
  static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
+                                                 struct extent_map *em,
                                                   u64 start, u64 len)
  {
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_trans_handle *trans;
-       struct extent_map *em;
         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
         struct btrfs_key ins;
         u64 alloc_hint;
         int ret;
+       bool insert = false;
  
-       btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+       /*
+        * Ok if the extent map we looked up is a hole and is for the exact
+        * range we want, there is no reason to allocate a new one, however if
+        * it is not right then we need to free this one and drop the cache for
+        * our range.
+        */
+       if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
+           em->len != len) {
+               free_extent_map(em);
+               em = NULL;
+               insert = true;
+               btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
+       }
  
         trans = btrfs_join_transaction(root, 0);
         if (IS_ERR(trans))
@@ -5454,10 +5477,12 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                 goto out;
         }
  
-       em = alloc_extent_map(GFP_NOFS);
         if (!em) {
-               em = ERR_PTR(-ENOMEM);
-               goto out;
+               em = alloc_extent_map(GFP_NOFS);
+               if (!em) {
+                       em = ERR_PTR(-ENOMEM);
+                       goto out;
+               }
         }
  
         em->start = start;
@@ -5467,9 +5492,15 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
         em->block_start = ins.objectid;
         em->block_len = ins.offset;
         em->bdev = root->fs_info->fs_devices->latest_bdev;
+
+       /*
+        * We need to do this because if we're using the original em we searched
+        * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
+        */
+       em->flags = 0;
         set_bit(EXTENT_FLAG_PINNED, &em->flags);
  
-       while (1) {
+       while (insert) {
                 write_lock(&em_tree->lock);
                 ret = add_extent_mapping(em_tree, em);
                 write_unlock(&em_tree->lock);
@@ -5687,8 +5718,7 @@ must_cow:
          * it above
          */
         len = bh_result->b_size;
-       free_extent_map(em);
-       em = btrfs_new_extent_direct(inode, start, len);
+       em = btrfs_new_extent_direct(inode, em, start, len);
         if (IS_ERR(em))
                 return PTR_ERR(em);
         len = min(len, em->len - (start - em->start));
@@ -5851,8 +5881,10 @@ again:
         }
  
         add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
-       btrfs_ordered_update_i_size(inode, 0, ordered);
-       btrfs_update_inode(trans, root, inode);
+       ret = btrfs_ordered_update_i_size(inode, 0, ordered);
+       if (!ret)
+               btrfs_update_inode(trans, root, inode);
+       ret = 0;
  out_unlock:
         unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
                              ordered->file_offset + ordered->len - 1,
@@ -5938,7 +5970,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
  
  static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                          int rw, u64 file_offset, int skip_sum,
-                                        u32 *csums)
+                                        u32 *csums, int async_submit)
  {
         int write = rw & REQ_WRITE;
         struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5949,13 +5981,24 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
         if (ret)
                 goto err;
  
-       if (write && !skip_sum) {
+       if (skip_sum)
+               goto map;
+
+       if (write && async_submit) {
                 ret = btrfs_wq_submit_bio(root->fs_info,
                                    inode, rw, bio, 0, 0,
                                    file_offset,
                                    __btrfs_submit_bio_start_direct_io,
                                    __btrfs_submit_bio_done);
                 goto err;
+       } else if (write) {
+               /*
+                * If we aren't doing async submit, calculate the csum of the
+                * bio now.
+                */
+               ret = btrfs_csum_one_bio(root, inode, bio, file_offset, 1);
+               if (ret)
+                       goto err;
         } else if (!skip_sum) {
                 ret = btrfs_lookup_bio_sums_dio(root, inode, bio,
                                           file_offset, csums);
@@ -5963,7 +6006,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                         goto err;
         }
  
-       ret = btrfs_map_bio(root, rw, bio, 0, 1);
+map:
+       ret = btrfs_map_bio(root, rw, bio, 0, async_submit);
  err:
         bio_put(bio);
         return ret;
@@ -5985,15 +6029,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
         int nr_pages = 0;
         u32 *csums = dip->csums;
         int ret = 0;
+       int async_submit = 0;
         int write = rw & REQ_WRITE;
  
-       bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
-       if (!bio)
-               return -ENOMEM;
-       bio->bi_private = dip;
-       bio->bi_end_io = btrfs_end_dio_bio;
-       atomic_inc(&dip->pending_bios);
-
         map_length = orig_bio->bi_size;
         ret = btrfs_map_block(map_tree, READ, start_sector << 9,
                               &map_length, NULL, 0);
@@ -6002,6 +6040,19 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                 return -EIO;
         }
  
+       if (map_length >= orig_bio->bi_size) {
+               bio = orig_bio;
+               goto submit;
+       }
+
+       async_submit = 1;
+       bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+       if (!bio)
+               return -ENOMEM;
+       bio->bi_private = dip;
+       bio->bi_end_io = btrfs_end_dio_bio;
+       atomic_inc(&dip->pending_bios);
+
         while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
                 if (unlikely(map_length < submit_len + bvec->bv_len ||
                     bio_add_page(bio, bvec->bv_page, bvec->bv_len,
@@ -6015,7 +6066,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                         atomic_inc(&dip->pending_bios);
                         ret = __btrfs_submit_dio_bio(bio, inode, rw,
                                                      file_offset, skip_sum,
-                                                    csums);
+                                                    csums, async_submit);
                         if (ret) {
                                 bio_put(bio);
                                 atomic_dec(&dip->pending_bios);
@@ -6052,8 +6103,9 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                 }
         }
  
+submit:
         ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
-                                    csums);
+                                    csums, async_submit);
         if (!ret)
                 return 0;
  
@@ -6148,6 +6200,7 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
                         unsigned long nr_segs)
  {
         int seg;
+       int i;
         size_t size;
         unsigned long addr;
         unsigned blocksize_mask = root->sectorsize - 1;
@@ -6162,8 +6215,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
                 addr = (unsigned long)iov[seg].iov_base;
                 size = iov[seg].iov_len;
                 end += size;
-               if ((addr & blocksize_mask) || (size & blocksize_mask)) 
+               if ((addr & blocksize_mask) || (size & blocksize_mask))
                         goto out;
+
+               /* If this is a write we don't need to check anymore */
+               if (rw & WRITE)
+                       continue;
+
+               /*
+                * Check to make sure we don't have duplicate iov_base's in this
+                * iovec, if so return EINVAL, otherwise we'll get csum errors
+                * when reading back.
+                */
+               for (i = seg + 1; i < nr_segs; i++) {
+                       if (iov[seg].iov_base == iov[i].iov_base)
+                               goto out;
+               }
         }
         retval = 0;
  out:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index cfc264f..ffb48d6 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2287,7 +2287,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
         struct btrfs_ioctl_space_info space;
         struct btrfs_ioctl_space_info *dest;
         struct btrfs_ioctl_space_info *dest_orig;
-       struct btrfs_ioctl_space_info *user_dest;
+       struct btrfs_ioctl_space_info __user *user_dest;
         struct btrfs_space_info *info;
         u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
                        BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index 58e7de9..0ac712e 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -159,7 +159,7 @@ enum {
         Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
         Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
         Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-       Opt_enospc_debug, Opt_err,
+       Opt_enospc_debug, Opt_subvolrootid, Opt_err,
  };
  
  static match_table_t tokens = {
@@ -189,6 +189,7 @@ static match_table_t tokens = {
         {Opt_clear_cache, "clear_cache"},
         {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
         {Opt_enospc_debug, "enospc_debug"},
+       {Opt_subvolrootid, "subvolrootid=%d"},
         {Opt_err, NULL},
  };
  
@@ -232,6 +233,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                         break;
                 case Opt_subvol:
                 case Opt_subvolid:
+               case Opt_subvolrootid:
                 case Opt_device:
                         /*
                          * These are parsed by btrfs_parse_early_options
@@ -388,7 +390,7 @@ out:
   */
  static int btrfs_parse_early_options(const char *options, fmode_t flags,
                 void *holder, char **subvol_name, u64 *subvol_objectid,
-               struct btrfs_fs_devices **fs_devices)
+               u64 *subvol_rootid, struct btrfs_fs_devices **fs_devices)
  {
         substring_t args[MAX_OPT_ARGS];
         char *opts, *orig, *p;
@@ -429,6 +431,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                                         *subvol_objectid = intarg;
                         }
                         break;
+               case Opt_subvolrootid:
+                       intarg = 0;
+                       error = match_int(&args[0], &intarg);
+                       if (!error) {
+                               /* we want the original fs_tree */
+                               if (!intarg)
+                                       *subvol_rootid =
+                                               BTRFS_FS_TREE_OBJECTID;
+                               else
+                                       *subvol_rootid = intarg;
+                       }
+                       break;
                 case Opt_device:
                         error = btrfs_scan_one_device(match_strdup(&args[0]),
                                         flags, holder, fs_devices);
@@ -736,6 +750,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
         fmode_t mode = FMODE_READ;
         char *subvol_name = NULL;
         u64 subvol_objectid = 0;
+       u64 subvol_rootid = 0;
         int error = 0;
  
         if (!(flags & MS_RDONLY))
@@ -743,7 +758,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
  
         error = btrfs_parse_early_options(data, mode, fs_type,
                                           &subvol_name, &subvol_objectid,
-                                         &fs_devices);
+                                         &subvol_rootid, &fs_devices);
         if (error)
                 return ERR_PTR(error);
  
@@ -807,15 +822,17 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                 s->s_flags |= MS_ACTIVE;
         }
  
-       root = get_default_root(s, subvol_objectid);
-       if (IS_ERR(root)) {
-               error = PTR_ERR(root);
-               deactivate_locked_super(s);
-               goto error_free_subvol_name;
-       }
         /* if they gave us a subvolume name bind mount into that */
         if (strcmp(subvol_name, ".")) {
                 struct dentry *new_root;
+
+               root = get_default_root(s, subvol_rootid);
+               if (IS_ERR(root)) {
+                       error = PTR_ERR(root);
+                       deactivate_locked_super(s);
+                       goto error_free_subvol_name;
+               }
+
                 mutex_lock(&root->d_inode->i_mutex);
                 new_root = lookup_one_len(subvol_name, root,
                                       strlen(subvol_name));
@@ -836,6 +853,13 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
                 }
                 dput(root);
                 root = new_root;
+       } else {
+               root = get_default_root(s, subvol_objectid);
+               if (IS_ERR(root)) {
+                       error = PTR_ERR(root);
+                       deactivate_locked_super(s);
+                       goto error_free_subvol_name;
+               }
         }
  
         kfree(subvol_name);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 5b158da..c571734 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -32,10 +32,8 @@
  
  static noinline void put_transaction(struct btrfs_transaction *transaction)
  {
-       WARN_ON(transaction->use_count == 0);
-       transaction->use_count--;
-       if (transaction->use_count == 0) {
-               list_del_init(&transaction->list);
+       WARN_ON(atomic_read(&transaction->use_count) == 0);
+       if (atomic_dec_and_test(&transaction->use_count)) {
                 memset(transaction, 0, sizeof(*transaction));
                 kmem_cache_free(btrfs_transaction_cachep, transaction);
         }
@@ -60,14 +58,14 @@ static noinline int join_transaction(struct btrfs_root *root)
                 if (!cur_trans)
                         return -ENOMEM;
                 root->fs_info->generation++;
-               cur_trans->num_writers = 1;
+               atomic_set(&cur_trans->num_writers, 1);
                 cur_trans->num_joined = 0;
                 cur_trans->transid = root->fs_info->generation;
                 init_waitqueue_head(&cur_trans->writer_wait);
                 init_waitqueue_head(&cur_trans->commit_wait);
                 cur_trans->in_commit = 0;
                 cur_trans->blocked = 0;
-               cur_trans->use_count = 1;
+               atomic_set(&cur_trans->use_count, 1);
                 cur_trans->commit_done = 0;
                 cur_trans->start_time = get_seconds();
  
@@ -88,7 +86,7 @@ static noinline int join_transaction(struct btrfs_root *root)
                 root->fs_info->running_transaction = cur_trans;
                 spin_unlock(&root->fs_info->new_trans_lock);
         } else {
-               cur_trans->num_writers++;
+               atomic_inc(&cur_trans->num_writers);
                 cur_trans->num_joined++;
         }
  
@@ -145,7 +143,7 @@ static void wait_current_trans(struct btrfs_root *root)
         cur_trans = root->fs_info->running_transaction;
         if (cur_trans && cur_trans->blocked) {
                 DEFINE_WAIT(wait);
-               cur_trans->use_count++;
+               atomic_inc(&cur_trans->use_count);
                 while (1) {
                         prepare_to_wait(&root->fs_info->transaction_wait, &wait,
                                         TASK_UNINTERRUPTIBLE);
@@ -181,6 +179,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
  {
         struct btrfs_trans_handle *h;
         struct btrfs_transaction *cur_trans;
+       int retries = 0;
         int ret;
  
         if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -204,7 +203,7 @@ again:
         }
  
         cur_trans = root->fs_info->running_transaction;
-       cur_trans->use_count++;
+       atomic_inc(&cur_trans->use_count);
         if (type != TRANS_JOIN_NOLOCK)
                 mutex_unlock(&root->fs_info->trans_mutex);
  
@@ -224,10 +223,18 @@ again:
  
         if (num_items > 0) {
                 ret = btrfs_trans_reserve_metadata(h, root, num_items);
-               if (ret == -EAGAIN) {
+               if (ret == -EAGAIN && !retries) {
+                       retries++;
                         btrfs_commit_transaction(h, root);
                         goto again;
+               } else if (ret == -EAGAIN) {
+                       /*
+                        * We have already retried and got EAGAIN, so really we
+                        * don't have space, so set ret to -ENOSPC.
+                        */
+                       ret = -ENOSPC;
                 }
+
                 if (ret < 0) {
                         btrfs_end_transaction(h, root);
                         return ERR_PTR(ret);
@@ -327,7 +334,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
                         goto out_unlock;  /* nothing committing|committed */
         }
  
-       cur_trans->use_count++;
+       atomic_inc(&cur_trans->use_count);
         mutex_unlock(&root->fs_info->trans_mutex);
  
         wait_for_commit(root, cur_trans);
@@ -457,18 +464,14 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                         wake_up_process(info->transaction_kthread);
         }
  
-       if (lock)
-               mutex_lock(&info->trans_mutex);
         WARN_ON(cur_trans != info->running_transaction);
-       WARN_ON(cur_trans->num_writers < 1);
-       cur_trans->num_writers--;
+       WARN_ON(atomic_read(&cur_trans->num_writers) < 1);
+       atomic_dec(&cur_trans->num_writers);
  
         smp_mb();
         if (waitqueue_active(&cur_trans->writer_wait))
                 wake_up(&cur_trans->writer_wait);
         put_transaction(cur_trans);
-       if (lock)
-               mutex_unlock(&info->trans_mutex);
  
         if (current->journal_info == trans)
                 current->journal_info = NULL;
@@ -1178,7 +1181,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
         /* take transaction reference */
         mutex_lock(&root->fs_info->trans_mutex);
         cur_trans = trans->transaction;
-       cur_trans->use_count++;
+       atomic_inc(&cur_trans->use_count);
         mutex_unlock(&root->fs_info->trans_mutex);
  
         btrfs_end_transaction(trans, root);
@@ -1237,7 +1240,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  
         mutex_lock(&root->fs_info->trans_mutex);
         if (cur_trans->in_commit) {
-               cur_trans->use_count++;
+               atomic_inc(&cur_trans->use_count);
                 mutex_unlock(&root->fs_info->trans_mutex);
                 btrfs_end_transaction(trans, root);
  
@@ -1259,7 +1262,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                 prev_trans = list_entry(cur_trans->list.prev,
                                         struct btrfs_transaction, list);
                 if (!prev_trans->commit_done) {
-                       prev_trans->use_count++;
+                       atomic_inc(&prev_trans->use_count);
                         mutex_unlock(&root->fs_info->trans_mutex);
  
                         wait_for_commit(root, prev_trans);
@@ -1300,14 +1303,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                                 TASK_UNINTERRUPTIBLE);
  
                 smp_mb();
-               if (cur_trans->num_writers > 1)
+               if (atomic_read(&cur_trans->num_writers) > 1)
                         schedule_timeout(MAX_SCHEDULE_TIMEOUT);
                 else if (should_grow)
                         schedule_timeout(1);
  
                 mutex_lock(&root->fs_info->trans_mutex);
                 finish_wait(&cur_trans->writer_wait, &wait);
-       } while (cur_trans->num_writers > 1 ||
+       } while (atomic_read(&cur_trans->num_writers) > 1 ||
                  (should_grow && cur_trans->num_joined != joined));
  
         ret = create_pending_snapshots(trans, root->fs_info);
@@ -1394,6 +1397,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  
         wake_up(&cur_trans->commit_wait);
  
+       list_del_init(&cur_trans->list);
         put_transaction(cur_trans);
         put_transaction(cur_trans);
  
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h

index 229a594..e441acc 100644 (file)
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,11 +27,11 @@ struct btrfs_transaction {
          * total writers in this transaction, it must be zero before the
          * transaction can end
          */
-       unsigned long num_writers;
+       atomic_t num_writers;
  
         unsigned long num_joined;
         int in_commit;
-       int use_count;
+       atomic_t use_count;
         int commit_done;
         int blocked;
         struct list_head list;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c

index a5303b8..cfd6605 100644 (file)
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -180,11 +180,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
         struct btrfs_path *path;
         struct extent_buffer *leaf;
         struct btrfs_dir_item *di;
-       int ret = 0, slot, advance;
+       int ret = 0, slot;
         size_t total_size = 0, size_left = size;
         unsigned long name_ptr;
         size_t name_len;
-       u32 nritems;
  
         /*
          * ok we want all objects associated with this id.
@@ -204,34 +203,24 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
         if (ret < 0)
                 goto err;
-       advance = 0;
+
         while (1) {
                 leaf = path->nodes[0];
-               nritems = btrfs_header_nritems(leaf);
                 slot = path->slots[0];
  
                 /* this is where we start walking through the path */
-               if (advance || slot >= nritems) {
+               if (slot >= btrfs_header_nritems(leaf)) {
                         /*
                          * if we've reached the last slot in this leaf we need
                          * to go to the next leaf and reset everything
                          */
-                       if (slot >= nritems-1) {
-                               ret = btrfs_next_leaf(root, path);
-                               if (ret)
-                                       break;
-                               leaf = path->nodes[0];
-                               nritems = btrfs_header_nritems(leaf);
-                               slot = path->slots[0];
-                       } else {
-                               /*
-                                * just walking through the slots on this leaf
-                                */
-                               slot++;
-                               path->slots[0]++;
-                       }
+                       ret = btrfs_next_leaf(root, path);
+                       if (ret < 0)
+                               goto err;
+                       else if (ret > 0)
+                               break;
+                       continue;
                 }
-               advance = 1;
  
                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
  
@@ -250,7 +239,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
  
                 /* we are just looking for how big our buffer needs to be */
                 if (!size)
-                       continue;
+                       goto next;
  
                 if (!buffer || (name_len + 1) > size_left) {
                         ret = -ERANGE;
@@ -263,6 +252,8 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
  
                 size_left -= name_len + 1;
                 buffer += name_len + 1;
+next:
+               path->slots[0]++;
         }
         ret = total_size;
  
diff --git a/fs/proc/base.c b/fs/proc/base.c

index dd6628d..dfa5327 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -3124,11 +3124,16 @@ static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldi
  /* for the /proc/ directory itself, after non-process stuff has been done */
  int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
  {
-       unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
-       struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode);
+       unsigned int nr;
+       struct task_struct *reaper;
         struct tgid_iter iter;
         struct pid_namespace *ns;
  
+       if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+               goto out_no_task;
+       nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+
+       reaper = get_proc_task(filp->f_path.dentry->d_inode);
         if (!reaper)
                 goto out_no_task;
  
diff --git a/include/linux/pid.h b/include/linux/pid.h

index 31afb7e..cdced84 100644 (file)
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -117,7 +117,7 @@ extern struct pid *find_vpid(int nr);
   */
  extern struct pid *find_get_pid(int nr);
  extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
-int next_pidmap(struct pid_namespace *pid_ns, int last);
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
  
  extern struct pid *alloc_pid(struct pid_namespace *ns);
  extern void free_pid(struct pid *pid);
diff --git a/kernel/pid.c b/kernel/pid.c

index 02f2212..57a8346 100644 (file)
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -217,11 +217,14 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
         return -1;
  }
  
-int next_pidmap(struct pid_namespace *pid_ns, int last)
+int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
  {
         int offset;
         struct pidmap *map, *end;
  
+       if (last >= PID_MAX_LIMIT)
+               return -1;
+
         offset = (last + 1) & BITS_PER_PAGE_MASK;
         map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
         end = &pid_ns->pidmap[PIDMAP_ENTRIES];
author	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 18 Apr 2011 19:24:24 +0000 (12:24 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Mon, 18 Apr 2011 19:24:24 +0000 (12:24 -0700)
fs/btrfs/acl.c		patch \| blob \| history
fs/btrfs/ctree.h		patch \| blob \| history
fs/btrfs/disk-io.c		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/extent_io.c		patch \| blob \| history
fs/btrfs/extent_io.h		patch \| blob \| history
fs/btrfs/file.c		patch \| blob \| history
fs/btrfs/free-space-cache.c		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/ioctl.c		patch \| blob \| history
fs/btrfs/super.c		patch \| blob \| history
fs/btrfs/transaction.c		patch \| blob \| history
fs/btrfs/transaction.h		patch \| blob \| history
fs/btrfs/xattr.c		patch \| blob \| history
fs/proc/base.c		patch \| blob \| history
include/linux/pid.h		patch \| blob \| history
kernel/pid.c		patch \| blob \| history