btrfs: Fix possible off-by-one in btrfs_search_path_in_tree

[pandora-kernel.git] / fs / btrfs / extent-tree.c
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 97c1206..57c4ad3 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2759,13 +2759,6 @@ again:
                 goto again;
         }
  
-       /* We've already setup this transaction, go ahead and exit */
-       if (block_group->cache_generation == trans->transid &&
-           i_size_read(inode)) {
-               dcs = BTRFS_DC_SETUP;
-               goto out_put;
-       }
-
         /*
          * We want to set the generation to 0, that way if anything goes wrong
          * from here on out we know not to trust this cache when we load up next
@@ -2775,6 +2768,13 @@ again:
         ret = btrfs_update_inode(trans, root, inode);
         WARN_ON(ret);
  
+       /* We've already setup this transaction, go ahead and exit */
+       if (block_group->cache_generation == trans->transid &&
+           i_size_read(inode)) {
+               dcs = BTRFS_DC_SETUP;
+               goto out_put;
+       }
+
         if (i_size_read(inode) > 0) {
                 ret = btrfs_truncate_free_space_cache(root, trans, path,
                                                       inode);
@@ -2822,7 +2822,7 @@ out_free:
         btrfs_release_path(path);
  out:
         spin_lock(&block_group->lock);
-       if (!ret)
+       if (!ret && dcs == BTRFS_DC_SETUP)
                 block_group->cache_generation = trans->transid;
         block_group->disk_cache_state = dcs;
         spin_unlock(&block_group->lock);
@@ -3416,7 +3416,8 @@ static int shrink_delalloc(struct btrfs_root *root, u64 to_reclaim,
                 smp_mb();
                 nr_pages = min_t(unsigned long, nr_pages,
                        root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
-               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
+               writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
+                                               WB_REASON_FS_FREE_SPACE);
  
                 spin_lock(&space_info->lock);
                 if (reserved > space_info->bytes_may_use)
@@ -3995,7 +3996,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
         spin_lock(&block_rsv->lock);
         spin_lock(&sinfo->lock);
  
-       block_rsv->size = num_bytes;
+       block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024);
  
         num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
                     sinfo->bytes_reserved + sinfo->bytes_readonly +
@@ -4203,12 +4204,17 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
         struct btrfs_root *root = BTRFS_I(inode)->root;
         struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
         u64 to_reserve = 0;
+       u64 csum_bytes;
         unsigned nr_extents = 0;
+       int extra_reserve = 0;
         int flush = 1;
         int ret;
  
+       /* Need to be holding the i_mutex here if we aren't free space cache */
         if (btrfs_is_free_space_inode(root, inode))
                 flush = 0;
+       else
+               WARN_ON(!mutex_is_locked(&inode->i_mutex));
  
         if (flush && btrfs_transaction_in_commit(root->fs_info))
                 schedule_timeout(1);
@@ -4219,11 +4225,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
         BTRFS_I(inode)->outstanding_extents++;
  
         if (BTRFS_I(inode)->outstanding_extents >
-           BTRFS_I(inode)->reserved_extents) {
+           BTRFS_I(inode)->reserved_extents)
                 nr_extents = BTRFS_I(inode)->outstanding_extents -
                         BTRFS_I(inode)->reserved_extents;
-               BTRFS_I(inode)->reserved_extents += nr_extents;
-       }
  
         /*
          * Add an item to reserve for updating the inode when we complete the
@@ -4231,11 +4235,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
          */
         if (!BTRFS_I(inode)->delalloc_meta_reserved) {
                 nr_extents++;
-               BTRFS_I(inode)->delalloc_meta_reserved = 1;
+               extra_reserve = 1;
         }
  
         to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents);
         to_reserve += calc_csum_metadata_size(inode, num_bytes, 1);
+       csum_bytes = BTRFS_I(inode)->csum_bytes;
         spin_unlock(&BTRFS_I(inode)->lock);
  
         ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush);
@@ -4245,22 +4250,35 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
  
                 spin_lock(&BTRFS_I(inode)->lock);
                 dropped = drop_outstanding_extent(inode);
-               to_free = calc_csum_metadata_size(inode, num_bytes, 0);
-               spin_unlock(&BTRFS_I(inode)->lock);
-               to_free += btrfs_calc_trans_metadata_size(root, dropped);
-
                 /*
-                * Somebody could have come in and twiddled with the
-                * reservation, so if we have to free more than we would have
-                * reserved from this reservation go ahead and release those
-                * bytes.
+                * If the inodes csum_bytes is the same as the original
+                * csum_bytes then we know we haven't raced with any free()ers
+                * so we can just reduce our inodes csum bytes and carry on.
+                * Otherwise we have to do the normal free thing to account for
+                * the case that the free side didn't free up its reserve
+                * because of this outstanding reservation.
                  */
-               to_free -= to_reserve;
+               if (BTRFS_I(inode)->csum_bytes == csum_bytes)
+                       calc_csum_metadata_size(inode, num_bytes, 0);
+               else
+                       to_free = calc_csum_metadata_size(inode, num_bytes, 0);
+               spin_unlock(&BTRFS_I(inode)->lock);
+               if (dropped)
+                       to_free += btrfs_calc_trans_metadata_size(root, dropped);
+
                 if (to_free)
                         btrfs_block_rsv_release(root, block_rsv, to_free);
                 return ret;
         }
  
+       spin_lock(&BTRFS_I(inode)->lock);
+       if (extra_reserve) {
+               BTRFS_I(inode)->delalloc_meta_reserved = 1;
+               nr_extents--;
+       }
+       BTRFS_I(inode)->reserved_extents += nr_extents;
+       spin_unlock(&BTRFS_I(inode)->lock);
+
         block_rsv_add_bytes(block_rsv, to_reserve, 1);
  
         return 0;
@@ -4593,7 +4611,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
         return 0;
  }
  
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+                             const bool return_free_space)
  {
         struct btrfs_fs_info *fs_info = root->fs_info;
         struct btrfs_block_group_cache *cache = NULL;
@@ -4613,7 +4632,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
  
                 if (start < cache->last_byte_to_unpin) {
                         len = min(len, cache->last_byte_to_unpin - start);
-                       btrfs_add_free_space(cache, start, len);
+                       if (return_free_space)
+                               btrfs_add_free_space(cache, start, len);
                 }
  
                 start += len;
@@ -4658,7 +4678,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
                                                    end + 1 - start, NULL);
  
                 clear_extent_dirty(unpin, start, end, GFP_NOFS);
-               unpin_extent_range(root, start, end);
+               unpin_extent_range(root, start, end, true);
                 cond_resched();
         }
  
@@ -5106,11 +5126,11 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
         struct btrfs_root *root = orig_root->fs_info->extent_root;
         struct btrfs_free_cluster *last_ptr = NULL;
         struct btrfs_block_group_cache *block_group = NULL;
+       struct btrfs_block_group_cache *used_block_group;
         int empty_cluster = 2 * 1024 * 1024;
         int allowed_chunk_alloc = 0;
         int done_chunk_alloc = 0;
         struct btrfs_space_info *space_info;
-       int last_ptr_loop = 0;
         int loop = 0;
         int index = 0;
         int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
@@ -5172,6 +5192,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
  ideal_cache:
                 block_group = btrfs_lookup_block_group(root->fs_info,
                                                        search_start);
+               used_block_group = block_group;
                 /*
                  * we don't want to use the block group if it doesn't match our
                  * allocation bits, or if its not cached.
@@ -5209,6 +5230,7 @@ search:
                 u64 offset;
                 int cached;
  
+               used_block_group = block_group;
                 btrfs_get_block_group(block_group);
                 search_start = block_group->key.objectid;
  
@@ -5278,78 +5300,69 @@ alloc:
                 spin_lock(&block_group->free_space_ctl->tree_lock);
                 if (cached &&
                     block_group->free_space_ctl->free_space <
-                   num_bytes + empty_size) {
+                   num_bytes + empty_cluster + empty_size) {
                         spin_unlock(&block_group->free_space_ctl->tree_lock);
                         goto loop;
                 }
                 spin_unlock(&block_group->free_space_ctl->tree_lock);
  
                 /*
-                * Ok we want to try and use the cluster allocator, so lets look
-                * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will
-                * have tried the cluster allocator plenty of times at this
-                * point and not have found anything, so we are likely way too
-                * fragmented for the clustering stuff to find anything, so lets
-                * just skip it and let the allocator find whatever block it can
-                * find
+                * Ok we want to try and use the cluster allocator, so
+                * lets look there
                  */
-               if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) {
+               if (last_ptr) {
                         /*
                          * the refill lock keeps out other
                          * people trying to start a new cluster
                          */
                         spin_lock(&last_ptr->refill_lock);
-                       if (last_ptr->block_group &&
-                           (last_ptr->block_group->ro ||
-                           !block_group_bits(last_ptr->block_group, data)))
+                       used_block_group = last_ptr->block_group;
+                       if (used_block_group != block_group &&
+                           (!used_block_group ||
+                            used_block_group->ro ||
+                            !block_group_bits(used_block_group, data))) {
+                               used_block_group = block_group;
                                 goto refill_cluster;
+                       }
+
+                       if (used_block_group != block_group)
+                               btrfs_get_block_group(used_block_group);
  
-                       offset = btrfs_alloc_from_cluster(block_group, last_ptr,
-                                                num_bytes, search_start);
+                       offset = btrfs_alloc_from_cluster(used_block_group,
+                         last_ptr, num_bytes, used_block_group->key.objectid);
                         if (offset) {
                                 /* we have a block, we're done */
                                 spin_unlock(&last_ptr->refill_lock);
                                 goto checks;
                         }
  
-                       spin_lock(&last_ptr->lock);
-                       /*
-                        * whoops, this cluster doesn't actually point to
-                        * this block group.  Get a ref on the block
-                        * group is does point to and try again
-                        */
-                       if (!last_ptr_loop && last_ptr->block_group &&
-                           last_ptr->block_group != block_group &&
-                           index <=
-                                get_block_group_index(last_ptr->block_group)) {
-
-                               btrfs_put_block_group(block_group);
-                               block_group = last_ptr->block_group;
-                               btrfs_get_block_group(block_group);
-                               spin_unlock(&last_ptr->lock);
-                               spin_unlock(&last_ptr->refill_lock);
-
-                               last_ptr_loop = 1;
-                               search_start = block_group->key.objectid;
-                               /*
-                                * we know this block group is properly
-                                * in the list because
-                                * btrfs_remove_block_group, drops the
-                                * cluster before it removes the block
-                                * group from the list
-                                */
-                               goto have_block_group;
+                       WARN_ON(last_ptr->block_group != used_block_group);
+                       if (used_block_group != block_group) {
+                               btrfs_put_block_group(used_block_group);
+                               used_block_group = block_group;
                         }
-                       spin_unlock(&last_ptr->lock);
  refill_cluster:
+                       BUG_ON(used_block_group != block_group);
+                       /* If we are on LOOP_NO_EMPTY_SIZE, we can't
+                        * set up a new clusters, so lets just skip it
+                        * and let the allocator find whatever block
+                        * it can find.  If we reach this point, we
+                        * will have tried the cluster allocator
+                        * plenty of times and not have found
+                        * anything, so we are likely way too
+                        * fragmented for the clustering stuff to find
+                        * anything.  */
+                       if (loop >= LOOP_NO_EMPTY_SIZE) {
+                               spin_unlock(&last_ptr->refill_lock);
+                               goto unclustered_alloc;
+                       }
+
                         /*
                          * this cluster didn't work out, free it and
                          * start over
                          */
                         btrfs_return_cluster_to_free_space(NULL, last_ptr);
  
-                       last_ptr_loop = 0;
-
                         /* allocate a cluster in this block group */
                         ret = btrfs_find_space_cluster(trans, root,
                                                block_group, last_ptr,
@@ -5389,6 +5402,7 @@ refill_cluster:
                         goto loop;
                 }
  
+unclustered_alloc:
                 offset = btrfs_find_space_for_alloc(block_group, search_start,
                                                     num_bytes, empty_size);
                 /*
@@ -5415,14 +5429,14 @@ checks:
                 search_start = stripe_align(root, offset);
                 /* move on to the next group */
                 if (search_start + num_bytes >= search_end) {
-                       btrfs_add_free_space(block_group, offset, num_bytes);
+                       btrfs_add_free_space(used_block_group, offset, num_bytes);
                         goto loop;
                 }
  
                 /* move on to the next group */
                 if (search_start + num_bytes >
-                   block_group->key.objectid + block_group->key.offset) {
-                       btrfs_add_free_space(block_group, offset, num_bytes);
+                   used_block_group->key.objectid + used_block_group->key.offset) {
+                       btrfs_add_free_space(used_block_group, offset, num_bytes);
                         goto loop;
                 }
  
@@ -5430,14 +5444,14 @@ checks:
                 ins->offset = num_bytes;
  
                 if (offset < search_start)
-                       btrfs_add_free_space(block_group, offset,
+                       btrfs_add_free_space(used_block_group, offset,
                                              search_start - offset);
                 BUG_ON(offset > search_start);
  
-               ret = btrfs_update_reserved_bytes(block_group, num_bytes,
+               ret = btrfs_update_reserved_bytes(used_block_group, num_bytes,
                                                   alloc_type);
                 if (ret == -EAGAIN) {
-                       btrfs_add_free_space(block_group, offset, num_bytes);
+                       btrfs_add_free_space(used_block_group, offset, num_bytes);
                         goto loop;
                 }
  
@@ -5446,15 +5460,19 @@ checks:
                 ins->offset = num_bytes;
  
                 if (offset < search_start)
-                       btrfs_add_free_space(block_group, offset,
+                       btrfs_add_free_space(used_block_group, offset,
                                              search_start - offset);
                 BUG_ON(offset > search_start);
+               if (used_block_group != block_group)
+                       btrfs_put_block_group(used_block_group);
                 btrfs_put_block_group(block_group);
                 break;
  loop:
                 failed_cluster_refill = false;
                 failed_alloc = false;
                 BUG_ON(index != get_block_group_index(block_group));
+               if (used_block_group != block_group)
+                       btrfs_put_block_group(used_block_group);
                 btrfs_put_block_group(block_group);
         }
         up_read(&space_info->groups_sem);
@@ -5660,12 +5678,11 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
                 return -ENOSPC;
         }
  
-       if (btrfs_test_opt(root, DISCARD))
-               ret = btrfs_discard_extent(root, start, len, NULL);
-
         if (pin)
                 pin_down_extent(root, cache, start, len, 1);
         else {
+               if (btrfs_test_opt(root, DISCARD))
+                       ret = btrfs_discard_extent(root, start, len, NULL);
                 btrfs_add_free_space(cache, start, len);
                 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
         }
@@ -6544,6 +6561,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
         int err = 0;
         int ret;
         int level;
+       bool root_dropped = false;
  
         path = btrfs_alloc_path();
         if (!path) {
@@ -6598,6 +6616,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
                 while (1) {
                         btrfs_tree_lock(path->nodes[level]);
                         btrfs_set_lock_blocking(path->nodes[level]);
+                       path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
  
                         ret = btrfs_lookup_extent_info(trans, root,
                                                 path->nodes[level]->start,
@@ -6611,6 +6630,7 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
                                 break;
  
                         btrfs_tree_unlock(path->nodes[level]);
+                       path->locks[level] = 0;
                         WARN_ON(wc->refs[level] != 1);
                         level--;
                 }
@@ -6691,12 +6711,22 @@ void btrfs_drop_snapshot(struct btrfs_root *root,
                 free_extent_buffer(root->commit_root);
                 kfree(root);
         }
+       root_dropped = true;
  out_free:
         btrfs_end_transaction_throttle(trans, tree_root);
         kfree(wc);
         btrfs_free_path(path);
  out:
-       if (err)
+       /*
+        * So if we need to stop dropping the snapshot for whatever reason we
+        * need to make sure to add it back to the dead root list so that we
+        * keep trying to do the work later.  This also cleans up roots if we
+        * don't have it in the radix (like when we recover after a power fail
+        * or unmount) so we don't leak memory.
+        */
+       if (root_dropped == false)
+               btrfs_add_dead_root(root);
+       if (err && err != -EAGAIN)
                 btrfs_std_error(root->fs_info, err);
         return;
  }
@@ -7621,7 +7651,7 @@ out:
  
  int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
  {
-       return unpin_extent_range(root, start, end);
+       return unpin_extent_range(root, start, end, false);
  }
  
  int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,