jbd2: don't give up looking for space so easily in __jbd2_log_wait_for_space

author Theodore Ts'o <tytso@mit.edu>

Fri, 7 Nov 2008 03:38:07 +0000 (22:38 -0500)

committer Theodore Ts'o <tytso@mit.edu>

Fri, 7 Nov 2008 03:38:07 +0000 (22:38 -0500)
author Theodore Ts'o <tytso@mit.edu>
Fri, 7 Nov 2008 03:38:07 +0000 (22:38 -0500)
committer Theodore Ts'o <tytso@mit.edu>
Fri, 7 Nov 2008 03:38:07 +0000 (22:38 -0500)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c

index 9203c33..9497718 100644 (file)
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -116,7 +116,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
   */
  void __jbd2_log_wait_for_space(journal_t *journal)
  {
-       int nblocks;
+       int nblocks, space_left;
         assert_spin_locked(&journal->j_state_lock);
  
         nblocks = jbd_space_needed(journal);
@@ -129,25 +129,43 @@ void __jbd2_log_wait_for_space(journal_t *journal)
                 /*
                  * Test again, another process may have checkpointed while we
                  * were waiting for the checkpoint lock. If there are no
-                * outstanding transactions there is nothing to checkpoint and
-                * we can't make progress. Abort the journal in this case.
+                * transactions ready to be checkpointed, try to recover
+                * journal space by calling cleanup_journal_tail(), and if
+                * that doesn't work, by waiting for the currently committing
+                * transaction to complete.  If there is absolutely no way
+                * to make progress, this is either a BUG or corrupted
+                * filesystem, so abort the journal and leave a stack
+                * trace for forensic evidence.
                  */
                 spin_lock(&journal->j_state_lock);
                 spin_lock(&journal->j_list_lock);
                 nblocks = jbd_space_needed(journal);
-               if (__jbd2_log_space_left(journal) < nblocks) {
+               space_left = __jbd2_log_space_left(journal);
+               if (space_left < nblocks) {
                         int chkpt = journal->j_checkpoint_transactions != NULL;
+                       tid_t tid = 0;
  
+                       if (journal->j_committing_transaction)
+                               tid = journal->j_committing_transaction->t_tid;
                         spin_unlock(&journal->j_list_lock);
                         spin_unlock(&journal->j_state_lock);
                         if (chkpt) {
                                 jbd2_log_do_checkpoint(journal);
+                       } else if (jbd2_cleanup_journal_tail(journal) == 0) {
+                               /* We were able to recover space; yay! */
+                               ;
+                       } else if (tid) {
+                               jbd2_log_wait_commit(journal, tid);
                         } else {
-                               printk(KERN_ERR "%s: no transactions\n",
-                                      __func__);
+                               printk(KERN_ERR "%s: needed %d blocks and "
+                                      "only had %d space available\n",
+                                      __func__, nblocks, space_left);
+                               printk(KERN_ERR "%s: no way to get more "
+                                      "journal space in %s\n", __func__,
+                                      journal->j_devname);
+                               WARN_ON(1);
                                 jbd2_journal_abort(journal, 0);
                         }
-
                         spin_lock(&journal->j_state_lock);
                 } else {
                         spin_unlock(&journal->j_list_lock);
author	Theodore Ts'o <tytso@mit.edu>
	Fri, 7 Nov 2008 03:38:07 +0000 (22:38 -0500)
committer	Theodore Ts'o <tytso@mit.edu>
	Fri, 7 Nov 2008 03:38:07 +0000 (22:38 -0500)