Btrfs: fix deadlock when throttling transactions

[pandora-kernel.git] / fs / btrfs / transaction.c
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 833996a..eb55863 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -260,7 +260,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
  {
         struct btrfs_trans_handle *h;
         struct btrfs_transaction *cur_trans;
-       int retries = 0;
+       u64 num_bytes = 0;
         int ret;
  
         if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
@@ -274,6 +274,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
                 h->block_rsv = NULL;
                 goto got_it;
         }
+
+       /*
+        * Do the reservation before we join the transaction so we can do all
+        * the appropriate flushing if need be.
+        */
+       if (num_items > 0 && root != root->fs_info->chunk_root) {
+               num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
+               ret = btrfs_block_rsv_add(NULL, root,
+                                         &root->fs_info->trans_block_rsv,
+                                         num_bytes);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
  again:
         h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
         if (!h)
@@ -310,24 +323,9 @@ again:
                 goto again;
         }
  
-       if (num_items > 0) {
-               ret = btrfs_trans_reserve_metadata(h, root, num_items);
-               if (ret == -EAGAIN && !retries) {
-                       retries++;
-                       btrfs_commit_transaction(h, root);
-                       goto again;
-               } else if (ret == -EAGAIN) {
-                       /*
-                        * We have already retried and got EAGAIN, so really we
-                        * don't have space, so set ret to -ENOSPC.
-                        */
-                       ret = -ENOSPC;
-               }
-
-               if (ret < 0) {
-                       btrfs_end_transaction(h, root);
-                       return ERR_PTR(ret);
-               }
+       if (num_bytes) {
+               h->block_rsv = &root->fs_info->trans_block_rsv;
+               h->bytes_reserved = num_bytes;
         }
  
  got_it:
@@ -499,10 +497,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
         }
  
         if (lock && cur_trans->blocked && !cur_trans->in_commit) {
-               if (throttle)
+               if (throttle) {
+                       /*
+                        * We may race with somebody else here so end up having
+                        * to call end_transaction on ourselves again, so inc
+                        * our use_count.
+                        */
+                       trans->use_count++;
                         return btrfs_commit_transaction(trans, root);
-               else
+               } else {
                         wake_up_process(info->transaction_kthread);
+               }
         }
  
         WARN_ON(cur_trans != info->running_transaction);
@@ -957,6 +962,15 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
         ret = btrfs_update_inode(trans, parent_root, parent_inode);
         BUG_ON(ret);
  
+       /*
+        * pull in the delayed directory update
+        * and the delayed inode item
+        * otherwise we corrupt the FS during
+        * snapshot
+        */
+       ret = btrfs_run_delayed_items(trans, root);
+       BUG_ON(ret);
+
         record_root_in_trans(trans, root);
         btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
         memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
@@ -1018,14 +1032,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
         int ret;
  
         list_for_each_entry(pending, head, list) {
-               /*
-                * We must deal with the delayed items before creating
-                * snapshots, or we will create a snapthot with inconsistent
-                * information.
-               */
-               ret = btrfs_run_delayed_items(trans, fs_info->fs_root);
-               BUG_ON(ret);
-
                 ret = create_pending_snapshot(trans, fs_info, pending);
                 BUG_ON(ret);
         }
@@ -1298,12 +1304,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                         schedule_timeout(1);
  
                 finish_wait(&cur_trans->writer_wait, &wait);
-               spin_lock(&root->fs_info->trans_lock);
-               root->fs_info->trans_no_join = 1;
-               spin_unlock(&root->fs_info->trans_lock);
         } while (atomic_read(&cur_trans->num_writers) > 1 ||
                  (should_grow && cur_trans->num_joined != joined));
  
+       /*
+        * Ok now we need to make sure to block out any other joins while we
+        * commit the transaction.  We could have started a join before setting
+        * no_join so make sure to wait for num_writers to == 1 again.
+        */
+       spin_lock(&root->fs_info->trans_lock);
+       root->fs_info->trans_no_join = 1;
+       spin_unlock(&root->fs_info->trans_lock);
+       wait_event(cur_trans->writer_wait,
+                  atomic_read(&cur_trans->num_writers) == 1);
+
         /*
          * the reloc mutex makes sure that we stop
          * the balancing code from coming in and moving
@@ -1311,15 +1325,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
          */
         mutex_lock(&root->fs_info->reloc_mutex);
  
-       ret = create_pending_snapshots(trans, root->fs_info);
+       ret = btrfs_run_delayed_items(trans, root);
         BUG_ON(ret);
  
-       ret = btrfs_run_delayed_items(trans, root);
+       ret = create_pending_snapshots(trans, root->fs_info);
         BUG_ON(ret);
  
         ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
         BUG_ON(ret);
  
+       /*
+        * make sure none of the code above managed to slip in a
+        * delayed item
+        */
+       btrfs_assert_delayed_root_empty(root);
+
         WARN_ON(cur_trans != trans->transaction);
  
         btrfs_scrub_pause(root);