Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Feb 2011 22:03:39 +0000 (14:03 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 25 Feb 2011 22:03:39 +0000 (14:03 -0800)
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
  Btrfs: fix fiemap bugs with delalloc
  Btrfs: set FMODE_EXCL in btrfs_device->mode
  Btrfs: make btrfs_rm_device() fail gracefully
  Btrfs: Avoid accessing unmapped kernel address
  Btrfs: Fix BTRFS_IOC_SUBVOL_SETFLAGS ioctl
  Btrfs: allow balance to explicitly allocate chunks as it relocates
  Btrfs: put ENOSPC debugging under a mount option

fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/lzo.c
fs/btrfs/relocation.c
fs/btrfs/super.c
fs/btrfs/volumes.c

index 2c98b3a..6f820fa 100644 (file)
@@ -1254,6 +1254,7 @@ struct btrfs_root {
 #define BTRFS_MOUNT_SPACE_CACHE                (1 << 12)
 #define BTRFS_MOUNT_CLEAR_CACHE                (1 << 13)
 #define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
+#define BTRFS_MOUNT_ENOSPC_DEBUG        (1 << 15)
 
 #define btrfs_clear_opt(o, opt)                ((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)          ((o) |= BTRFS_MOUNT_##opt)
@@ -2218,6 +2219,8 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root,
                                   u64 start, u64 end);
 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
                               u64 num_bytes);
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, u64 type);
 
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
index f3c96fc..588ff98 100644 (file)
@@ -5376,7 +5376,7 @@ again:
                               num_bytes, data, 1);
                goto again;
        }
-       if (ret == -ENOSPC) {
+       if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) {
                struct btrfs_space_info *sinfo;
 
                sinfo = __find_space_info(root->fs_info, data);
@@ -8065,6 +8065,13 @@ out:
        return ret;
 }
 
+int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root, u64 type)
+{
+       u64 alloc_flags = get_alloc_profile(root, type);
+       return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1);
+}
+
 /*
  * helper to account the unused space of all the readonly block group in the
  * list. takes mirrors into account.
index 92ac519..fd3f172 100644 (file)
@@ -1433,12 +1433,13 @@ int extent_clear_unlock_delalloc(struct inode *inode,
  */
 u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end, u64 max_bytes,
-                    unsigned long bits)
+                    unsigned long bits, int contig)
 {
        struct rb_node *node;
        struct extent_state *state;
        u64 cur_start = *start;
        u64 total_bytes = 0;
+       u64 last = 0;
        int found = 0;
 
        if (search_end <= cur_start) {
@@ -1463,7 +1464,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
                state = rb_entry(node, struct extent_state, rb_node);
                if (state->start > search_end)
                        break;
-               if (state->end >= cur_start && (state->state & bits)) {
+               if (contig && found && state->start > last + 1)
+                       break;
+               if (state->end >= cur_start && (state->state & bits) == bits) {
                        total_bytes += min(search_end, state->end) + 1 -
                                       max(cur_start, state->start);
                        if (total_bytes >= max_bytes)
@@ -1472,6 +1475,9 @@ u64 count_range_bits(struct extent_io_tree *tree,
                                *start = state->start;
                                found = 1;
                        }
+                       last = state->end;
+               } else if (contig && found) {
+                       break;
                }
                node = rb_next(node);
                if (!node)
@@ -2912,6 +2918,46 @@ out:
        return sector;
 }
 
+/*
+ * helper function for fiemap, which doesn't want to see any holes.
+ * This maps until we find something past 'last'
+ */
+static struct extent_map *get_extent_skip_holes(struct inode *inode,
+                                               u64 offset,
+                                               u64 last,
+                                               get_extent_t *get_extent)
+{
+       u64 sectorsize = BTRFS_I(inode)->root->sectorsize;
+       struct extent_map *em;
+       u64 len;
+
+       if (offset >= last)
+               return NULL;
+
+       while(1) {
+               len = last - offset;
+               if (len == 0)
+                       break;
+               len = (len + sectorsize - 1) & ~(sectorsize - 1);
+               em = get_extent(inode, NULL, 0, offset, len, 0);
+               if (!em || IS_ERR(em))
+                       return em;
+
+               /* if this isn't a hole return it */
+               if (!test_bit(EXTENT_FLAG_VACANCY, &em->flags) &&
+                   em->block_start != EXTENT_MAP_HOLE) {
+                       return em;
+               }
+
+               /* this is a hole, advance to the next extent */
+               offset = extent_map_end(em);
+               free_extent_map(em);
+               if (offset >= last)
+                       break;
+       }
+       return NULL;
+}
+
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len, get_extent_t *get_extent)
 {
@@ -2921,16 +2967,19 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        u32 flags = 0;
        u32 found_type;
        u64 last;
+       u64 last_for_get_extent = 0;
        u64 disko = 0;
+       u64 isize = i_size_read(inode);
        struct btrfs_key found_key;
        struct extent_map *em = NULL;
        struct extent_state *cached_state = NULL;
        struct btrfs_path *path;
        struct btrfs_file_extent_item *item;
        int end = 0;
-       u64 em_start = 0, em_len = 0;
+       u64 em_start = 0;
+       u64 em_len = 0;
+       u64 em_end = 0;
        unsigned long emflags;
-       int hole = 0;
 
        if (len == 0)
                return -EINVAL;
@@ -2940,6 +2989,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                return -ENOMEM;
        path->leave_spinning = 1;
 
+       /*
+        * lookup the last file extent.  We're not using i_size here
+        * because there might be preallocation past i_size
+        */
        ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
                                       path, inode->i_ino, -1, 0);
        if (ret < 0) {
@@ -2953,18 +3006,38 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
        found_type = btrfs_key_type(&found_key);
 
-       /* No extents, just return */
+       /* No extents, but there might be delalloc bits */
        if (found_key.objectid != inode->i_ino ||
            found_type != BTRFS_EXTENT_DATA_KEY) {
-               btrfs_free_path(path);
-               return 0;
+               /* have to trust i_size as the end */
+               last = (u64)-1;
+               last_for_get_extent = isize;
+       } else {
+               /*
+                * remember the start of the last extent.  There are a
+                * bunch of different factors that go into the length of the
+                * extent, so its much less complex to remember where it started
+                */
+               last = found_key.offset;
+               last_for_get_extent = last + 1;
        }
-       last = found_key.offset;
        btrfs_free_path(path);
 
+       /*
+        * we might have some extents allocated but more delalloc past those
+        * extents.  so, we trust isize unless the start of the last extent is
+        * beyond isize
+        */
+       if (last < isize) {
+               last = (u64)-1;
+               last_for_get_extent = isize;
+       }
+
        lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
                         &cached_state, GFP_NOFS);
-       em = get_extent(inode, NULL, 0, off, max - off, 0);
+
+       em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                  get_extent);
        if (!em)
                goto out;
        if (IS_ERR(em)) {
@@ -2973,19 +3046,14 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        }
 
        while (!end) {
-               hole = 0;
-               off = em->start + em->len;
+               off = extent_map_end(em);
                if (off >= max)
                        end = 1;
 
-               if (em->block_start == EXTENT_MAP_HOLE) {
-                       hole = 1;
-                       goto next;
-               }
-
                em_start = em->start;
                em_len = em->len;
-
+               em_end = extent_map_end(em);
+               emflags = em->flags;
                disko = 0;
                flags = 0;
 
@@ -3004,37 +3072,29 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
                        flags |= FIEMAP_EXTENT_ENCODED;
 
-next:
-               emflags = em->flags;
                free_extent_map(em);
                em = NULL;
-               if (!end) {
-                       em = get_extent(inode, NULL, 0, off, max - off, 0);
-                       if (!em)
-                               goto out;
-                       if (IS_ERR(em)) {
-                               ret = PTR_ERR(em);
-                               goto out;
-                       }
-                       emflags = em->flags;
-               }
-
-               if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
+               if ((em_start >= last) || em_len == (u64)-1 ||
+                  (last == (u64)-1 && isize <= em_end)) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
 
-               if (em_start == last) {
+               /* now scan forward to see if this is really the last extent. */
+               em = get_extent_skip_holes(inode, off, last_for_get_extent,
+                                          get_extent);
+               if (IS_ERR(em)) {
+                       ret = PTR_ERR(em);
+                       goto out;
+               }
+               if (!em) {
                        flags |= FIEMAP_EXTENT_LAST;
                        end = 1;
                }
-
-               if (!hole) {
-                       ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-                                               em_len, flags);
-                       if (ret)
-                               goto out_free;
-               }
+               ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+                                             em_len, flags);
+               if (ret)
+                       goto out_free;
        }
 out_free:
        free_extent_map(em);
index 7083cfa..9318dfe 100644 (file)
@@ -191,7 +191,7 @@ void extent_io_exit(void);
 
 u64 count_range_bits(struct extent_io_tree *tree,
                     u64 *start, u64 search_end,
-                    u64 max_bytes, unsigned long bits);
+                    u64 max_bytes, unsigned long bits, int contig);
 
 void free_extent_state(struct extent_state *state);
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
index fb9bd78..0efdb65 100644 (file)
@@ -1913,7 +1913,7 @@ static int btrfs_clean_io_failures(struct inode *inode, u64 start)
 
        private = 0;
        if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
-                            (u64)-1, 1, EXTENT_DIRTY)) {
+                            (u64)-1, 1, EXTENT_DIRTY, 0)) {
                ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
                                        start, &private_failure);
                if (ret == 0) {
@@ -5280,6 +5280,128 @@ out:
        return em;
 }
 
+struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
+                                          size_t pg_offset, u64 start, u64 len,
+                                          int create)
+{
+       struct extent_map *em;
+       struct extent_map *hole_em = NULL;
+       u64 range_start = start;
+       u64 end;
+       u64 found;
+       u64 found_end;
+       int err = 0;
+
+       em = btrfs_get_extent(inode, page, pg_offset, start, len, create);
+       if (IS_ERR(em))
+               return em;
+       if (em) {
+               /*
+                * if our em maps to a hole, there might
+                * actually be delalloc bytes behind it
+                */
+               if (em->block_start != EXTENT_MAP_HOLE)
+                       return em;
+               else
+                       hole_em = em;
+       }
+
+       /* check to see if we've wrapped (len == -1 or similar) */
+       end = start + len;
+       if (end < start)
+               end = (u64)-1;
+       else
+               end -= 1;
+
+       em = NULL;
+
+       /* ok, we didn't find anything, lets look for delalloc */
+       found = count_range_bits(&BTRFS_I(inode)->io_tree, &range_start,
+                                end, len, EXTENT_DELALLOC, 1);
+       found_end = range_start + found;
+       if (found_end < range_start)
+               found_end = (u64)-1;
+
+       /*
+        * we didn't find anything useful, return
+        * the original results from get_extent()
+        */
+       if (range_start > end || found_end <= start) {
+               em = hole_em;
+               hole_em = NULL;
+               goto out;
+       }
+
+       /* adjust the range_start to make sure it doesn't
+        * go backwards from the start they passed in
+        */
+       range_start = max(start,range_start);
+       found = found_end - range_start;
+
+       if (found > 0) {
+               u64 hole_start = start;
+               u64 hole_len = len;
+
+               em = alloc_extent_map(GFP_NOFS);
+               if (!em) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               /*
+                * when btrfs_get_extent can't find anything it
+                * returns one huge hole
+                *
+                * make sure what it found really fits our range, and
+                * adjust to make sure it is based on the start from
+                * the caller
+                */
+               if (hole_em) {
+                       u64 calc_end = extent_map_end(hole_em);
+
+                       if (calc_end <= start || (hole_em->start > end)) {
+                               free_extent_map(hole_em);
+                               hole_em = NULL;
+                       } else {
+                               hole_start = max(hole_em->start, start);
+                               hole_len = calc_end - hole_start;
+                       }
+               }
+               em->bdev = NULL;
+               if (hole_em && range_start > hole_start) {
+                       /* our hole starts before our delalloc, so we
+                        * have to return just the parts of the hole
+                        * that go until  the delalloc starts
+                        */
+                       em->len = min(hole_len,
+                                     range_start - hole_start);
+                       em->start = hole_start;
+                       em->orig_start = hole_start;
+                       /*
+                        * don't adjust block start at all,
+                        * it is fixed at EXTENT_MAP_HOLE
+                        */
+                       em->block_start = hole_em->block_start;
+                       em->block_len = hole_len;
+               } else {
+                       em->start = range_start;
+                       em->len = found;
+                       em->orig_start = range_start;
+                       em->block_start = EXTENT_MAP_DELALLOC;
+                       em->block_len = found;
+               }
+       } else if (hole_em) {
+               return hole_em;
+       }
+out:
+
+       free_extent_map(hole_em);
+       if (err) {
+               free_extent_map(em);
+               return ERR_PTR(err);
+       }
+       return em;
+}
+
 static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                                                  u64 start, u64 len)
 {
@@ -6102,7 +6224,7 @@ out:
 static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
-       return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent);
+       return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
 }
 
 int btrfs_readpage(struct file *file, struct page *page)
index be2d4f6..5fdb2ab 100644 (file)
@@ -1071,12 +1071,15 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
        if (copy_from_user(&flags, arg, sizeof(flags)))
                return -EFAULT;
 
-       if (flags & ~BTRFS_SUBVOL_CREATE_ASYNC)
+       if (flags & BTRFS_SUBVOL_CREATE_ASYNC)
                return -EINVAL;
 
        if (flags & ~BTRFS_SUBVOL_RDONLY)
                return -EOPNOTSUPP;
 
+       if (!is_owner_or_cap(inode))
+               return -EACCES;
+
        down_write(&root->fs_info->subvol_sem);
 
        /* nothing to do */
@@ -1097,7 +1100,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
                goto out_reset;
        }
 
-       ret = btrfs_update_root(trans, root,
+       ret = btrfs_update_root(trans, root->fs_info->tree_root,
                                &root->root_key, &root->root_item);
 
        btrfs_commit_transaction(trans, root);
index cc9b450..a178f5e 100644 (file)
@@ -280,6 +280,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
        unsigned long tot_out;
        unsigned long tot_len;
        char *buf;
+       bool may_late_unmap, need_unmap;
 
        data_in = kmap(pages_in[0]);
        tot_len = read_compress_length(data_in);
@@ -300,11 +301,13 @@ static int lzo_decompress_biovec(struct list_head *ws,
 
                tot_in += in_len;
                working_bytes = in_len;
+               may_late_unmap = need_unmap = false;
 
                /* fast path: avoid using the working buffer */
                if (in_page_bytes_left >= in_len) {
                        buf = data_in + in_offset;
                        bytes = in_len;
+                       may_late_unmap = true;
                        goto cont;
                }
 
@@ -329,14 +332,17 @@ cont:
                                if (working_bytes == 0 && tot_in >= tot_len)
                                        break;
 
-                               kunmap(pages_in[page_in_index]);
-                               page_in_index++;
-                               if (page_in_index >= total_pages_in) {
+                               if (page_in_index + 1 >= total_pages_in) {
                                        ret = -1;
-                                       data_in = NULL;
                                        goto done;
                                }
-                               data_in = kmap(pages_in[page_in_index]);
+
+                               if (may_late_unmap)
+                                       need_unmap = true;
+                               else
+                                       kunmap(pages_in[page_in_index]);
+
+                               data_in = kmap(pages_in[++page_in_index]);
 
                                in_page_bytes_left = PAGE_CACHE_SIZE;
                                in_offset = 0;
@@ -346,6 +352,8 @@ cont:
                out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
                ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
                                            &out_len);
+               if (need_unmap)
+                       kunmap(pages_in[page_in_index - 1]);
                if (ret != LZO_E_OK) {
                        printk(KERN_WARNING "btrfs decompress failed\n");
                        ret = -1;
@@ -363,8 +371,7 @@ cont:
                        break;
        }
 done:
-       if (data_in)
-               kunmap(pages_in[page_in_index]);
+       kunmap(pages_in[page_in_index]);
        return ret;
 }
 
index 0825e4e..31ade58 100644 (file)
@@ -3654,6 +3654,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        u32 item_size;
        int ret;
        int err = 0;
+       int progress = 0;
 
        path = btrfs_alloc_path();
        if (!path)
@@ -3666,9 +3667,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        }
 
        while (1) {
+               progress++;
                trans = btrfs_start_transaction(rc->extent_root, 0);
                BUG_ON(IS_ERR(trans));
-
+restart:
                if (update_backref_cache(trans, &rc->backref_cache)) {
                        btrfs_end_transaction(trans, rc->extent_root);
                        continue;
@@ -3781,6 +3783,15 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
                        }
                }
        }
+       if (trans && progress && err == -ENOSPC) {
+               ret = btrfs_force_chunk_alloc(trans, rc->extent_root,
+                                             rc->block_group->flags);
+               if (ret == 0) {
+                       err = 0;
+                       progress = 0;
+                       goto restart;
+               }
+       }
 
        btrfs_release_path(rc->extent_root, path);
        clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,
index a004008..d39a989 100644 (file)
@@ -155,7 +155,8 @@ enum {
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
-       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, Opt_err,
+       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
+       Opt_enospc_debug, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -184,6 +185,7 @@ static match_table_t tokens = {
        {Opt_space_cache, "space_cache"},
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
+       {Opt_enospc_debug, "enospc_debug"},
        {Opt_err, NULL},
 };
 
@@ -358,6 +360,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_user_subvol_rm_allowed:
                        btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
                        break;
+               case Opt_enospc_debug:
+                       btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG);
+                       break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
index af7dbca..dd13eb8 100644 (file)
@@ -1338,11 +1338,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
        ret = btrfs_shrink_device(device, 0);
        if (ret)
-               goto error_brelse;
+               goto error_undo;
 
        ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
        if (ret)
-               goto error_brelse;
+               goto error_undo;
 
        device->in_fs_metadata = 0;
 
@@ -1416,6 +1416,13 @@ out:
        mutex_unlock(&root->fs_info->volume_mutex);
        mutex_unlock(&uuid_mutex);
        return ret;
+error_undo:
+       if (device->writeable) {
+               list_add(&device->dev_alloc_list,
+                        &root->fs_info->fs_devices->alloc_list);
+               root->fs_info->fs_devices->rw_devices++;
+       }
+       goto error_brelse;
 }
 
 /*
@@ -1633,7 +1640,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        device->dev_root = root->fs_info->dev_root;
        device->bdev = bdev;
        device->in_fs_metadata = 1;
-       device->mode = 0;
+       device->mode = FMODE_EXCL;
        set_blocksize(device->bdev, 4096);
 
        if (seeding_dev) {