ext4: inline ext4_writepage() into mpage_da_submit_io()
[pandora-kernel.git] / fs / jbd2 / transaction.c
index b8e0806..6bf0a24 100644 (file)
@@ -26,6 +26,8 @@
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/hrtimer.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
 
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
 
@@ -53,6 +55,9 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
        transaction->t_tid = journal->j_transaction_sequence++;
        transaction->t_expires = jiffies + journal->j_commit_interval;
        spin_lock_init(&transaction->t_handle_lock);
+       atomic_set(&transaction->t_updates, 0);
+       atomic_set(&transaction->t_outstanding_credits, 0);
+       atomic_set(&transaction->t_handle_count, 0);
        INIT_LIST_HEAD(&transaction->t_inode_list);
        INIT_LIST_HEAD(&transaction->t_private_list);
 
@@ -76,6 +81,32 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
  * of that one update.
  */
 
+/*
+ * Update transiaction's maximum wait time, if debugging is enabled.
+ *
+ * In order for t_max_wait to be reliable, it must be protected by a
+ * lock.  But doing so will mean that start_this_handle() can not be
+ * run in parallel on SMP systems, which limits our scalability.  So
+ * unless debugging is enabled, we no longer update t_max_wait, which
+ * means that maximum wait time reported by the jbd2_run_stats
+ * tracepoint will always be zero.
+ */
+static inline void update_t_max_wait(transaction_t *transaction)
+{
+#ifdef CONFIG_JBD2_DEBUG
+       unsigned long ts = jiffies;
+
+       if (jbd2_journal_enable_debug &&
+           time_after(transaction->t_start, ts)) {
+               ts = jbd2_time_diff(ts, transaction->t_start);
+               spin_lock(&transaction->t_handle_lock);
+               if (ts > transaction->t_max_wait)
+                       transaction->t_max_wait = ts;
+               spin_unlock(&transaction->t_handle_lock);
+       }
+#endif
+}
+
 /*
  * start_this_handle: Given a handle, deal with any locking or stalling
  * needed to make sure that there is enough journal space for the handle
@@ -83,65 +114,75 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
  * transaction's buffer credits.
  */
 
-static int start_this_handle(journal_t *journal, handle_t *handle)
+static int start_this_handle(journal_t *journal, handle_t *handle,
+                            int gfp_mask)
 {
        transaction_t *transaction;
        int needed;
        int nblocks = handle->h_buffer_credits;
        transaction_t *new_transaction = NULL;
-       int ret = 0;
-       unsigned long ts = jiffies;
 
        if (nblocks > journal->j_max_transaction_buffers) {
                printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
                       current->comm, nblocks,
                       journal->j_max_transaction_buffers);
-               ret = -ENOSPC;
-               goto out;
+               return -ENOSPC;
        }
 
 alloc_transaction:
        if (!journal->j_running_transaction) {
-               new_transaction = kzalloc(sizeof(*new_transaction),
-                                               GFP_NOFS|__GFP_NOFAIL);
+               new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask);
                if (!new_transaction) {
-                       ret = -ENOMEM;
-                       goto out;
+                       /*
+                        * If __GFP_FS is not present, then we may be
+                        * being called from inside the fs writeback
+                        * layer, so we MUST NOT fail.  Since
+                        * __GFP_NOFAIL is going away, we will arrange
+                        * to retry the allocation ourselves.
+                        */
+                       if ((gfp_mask & __GFP_FS) == 0) {
+                               congestion_wait(BLK_RW_ASYNC, HZ/50);
+                               goto alloc_transaction;
+                       }
+                       return -ENOMEM;
                }
        }
 
        jbd_debug(3, "New handle %p going live.\n", handle);
 
-repeat:
-
        /*
         * We need to hold j_state_lock until t_updates has been incremented,
         * for proper journal barrier handling
         */
-       spin_lock(&journal->j_state_lock);
-repeat_locked:
+repeat:
+       read_lock(&journal->j_state_lock);
+       BUG_ON(journal->j_flags & JBD2_UNMOUNT);
        if (is_journal_aborted(journal) ||
            (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
-               spin_unlock(&journal->j_state_lock);
-               ret = -EROFS;
-               goto out;
+               read_unlock(&journal->j_state_lock);
+               kfree(new_transaction);
+               return -EROFS;
        }
 
        /* Wait on the journal's transaction barrier if necessary */
        if (journal->j_barrier_count) {
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                wait_event(journal->j_wait_transaction_locked,
                                journal->j_barrier_count == 0);
                goto repeat;
        }
 
        if (!journal->j_running_transaction) {
-               if (!new_transaction) {
-                       spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
+               if (!new_transaction)
                        goto alloc_transaction;
+               write_lock(&journal->j_state_lock);
+               if (!journal->j_running_transaction) {
+                       jbd2_get_transaction(journal, new_transaction);
+                       new_transaction = NULL;
                }
-               jbd2_get_transaction(journal, new_transaction);
-               new_transaction = NULL;
+               write_unlock(&journal->j_state_lock);
+               goto repeat;
        }
 
        transaction = journal->j_running_transaction;
@@ -155,7 +196,7 @@ repeat_locked:
 
                prepare_to_wait(&journal->j_wait_transaction_locked,
                                        &wait, TASK_UNINTERRUPTIBLE);
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                schedule();
                finish_wait(&journal->j_wait_transaction_locked, &wait);
                goto repeat;
@@ -166,8 +207,8 @@ repeat_locked:
         * buffers requested by this operation, we need to stall pending a log
         * checkpoint to free some more log space.
         */
-       spin_lock(&transaction->t_handle_lock);
-       needed = transaction->t_outstanding_credits + nblocks;
+       needed = atomic_add_return(nblocks,
+                                  &transaction->t_outstanding_credits);
 
        if (needed > journal->j_max_transaction_buffers) {
                /*
@@ -178,11 +219,11 @@ repeat_locked:
                DEFINE_WAIT(wait);
 
                jbd_debug(2, "Handle %p starting new commit...\n", handle);
-               spin_unlock(&transaction->t_handle_lock);
+               atomic_sub(nblocks, &transaction->t_outstanding_credits);
                prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
                                TASK_UNINTERRUPTIBLE);
                __jbd2_log_start_commit(journal, transaction->t_tid);
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                schedule();
                finish_wait(&journal->j_wait_transaction_locked, &wait);
                goto repeat;
@@ -215,35 +256,31 @@ repeat_locked:
         */
        if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
                jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
-               spin_unlock(&transaction->t_handle_lock);
-               __jbd2_log_wait_for_space(journal);
-               goto repeat_locked;
+               atomic_sub(nblocks, &transaction->t_outstanding_credits);
+               read_unlock(&journal->j_state_lock);
+               write_lock(&journal->j_state_lock);
+               if (__jbd2_log_space_left(journal) < jbd_space_needed(journal))
+                       __jbd2_log_wait_for_space(journal);
+               write_unlock(&journal->j_state_lock);
+               goto repeat;
        }
 
        /* OK, account for the buffers that this operation expects to
-        * use and add the handle to the running transaction. */
-
-       if (time_after(transaction->t_start, ts)) {
-               ts = jbd2_time_diff(ts, transaction->t_start);
-               if (ts > transaction->t_max_wait)
-                       transaction->t_max_wait = ts;
-       }
-
+        * use and add the handle to the running transaction. 
+        */
+       update_t_max_wait(transaction);
        handle->h_transaction = transaction;
-       transaction->t_outstanding_credits += nblocks;
-       transaction->t_updates++;
-       transaction->t_handle_count++;
+       atomic_inc(&transaction->t_updates);
+       atomic_inc(&transaction->t_handle_count);
        jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
-                 handle, nblocks, transaction->t_outstanding_credits,
+                 handle, nblocks,
+                 atomic_read(&transaction->t_outstanding_credits),
                  __jbd2_log_space_left(journal));
-       spin_unlock(&transaction->t_handle_lock);
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
 
        lock_map_acquire(&handle->h_lockdep_map);
-out:
-       if (unlikely(new_transaction))          /* It's usually NULL */
-               kfree(new_transaction);
-       return ret;
+       kfree(new_transaction);
+       return 0;
 }
 
 static struct lock_class_key jbd2_handle_key;
@@ -278,7 +315,7 @@ static handle_t *new_handle(int nblocks)
  *
  * Return a pointer to a newly allocated handle, or NULL on failure
  */
-handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
+handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
 {
        handle_t *handle = journal_current_handle();
        int err;
@@ -298,7 +335,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 
        current->journal_info = handle;
 
-       err = start_this_handle(journal, handle);
+       err = start_this_handle(journal, handle, gfp_mask);
        if (err < 0) {
                jbd2_free_handle(handle);
                current->journal_info = NULL;
@@ -308,6 +345,15 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 out:
        return handle;
 }
+EXPORT_SYMBOL(jbd2__journal_start);
+
+
+handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
+{
+       return jbd2__journal_start(journal, nblocks, GFP_NOFS);
+}
+EXPORT_SYMBOL(jbd2_journal_start);
+
 
 /**
  * int jbd2_journal_extend() - extend buffer credits.
@@ -342,7 +388,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
 
        result = 1;
 
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
 
        /* Don't extend a locked-down transaction! */
        if (handle->h_transaction->t_state != T_RUNNING) {
@@ -352,7 +398,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
        }
 
        spin_lock(&transaction->t_handle_lock);
-       wanted = transaction->t_outstanding_credits + nblocks;
+       wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks;
 
        if (wanted > journal->j_max_transaction_buffers) {
                jbd_debug(3, "denied handle %p %d blocks: "
@@ -367,14 +413,14 @@ int jbd2_journal_extend(handle_t *handle, int nblocks)
        }
 
        handle->h_buffer_credits += nblocks;
-       transaction->t_outstanding_credits += nblocks;
+       atomic_add(nblocks, &transaction->t_outstanding_credits);
        result = 0;
 
        jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
 unlock:
        spin_unlock(&transaction->t_handle_lock);
 error_out:
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
 out:
        return result;
 }
@@ -394,8 +440,7 @@ out:
  * transaction capabable of guaranteeing the requested number of
  * credits.
  */
-
-int jbd2_journal_restart(handle_t *handle, int nblocks)
+int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
@@ -410,29 +455,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks)
         * First unlink the handle from its current transaction, and start the
         * commit on that.
         */
-       J_ASSERT(transaction->t_updates > 0);
+       J_ASSERT(atomic_read(&transaction->t_updates) > 0);
        J_ASSERT(journal_current_handle() == handle);
 
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
        spin_lock(&transaction->t_handle_lock);
-       transaction->t_outstanding_credits -= handle->h_buffer_credits;
-       transaction->t_updates--;
-
-       if (!transaction->t_updates)
+       atomic_sub(handle->h_buffer_credits,
+                  &transaction->t_outstanding_credits);
+       if (atomic_dec_and_test(&transaction->t_updates))
                wake_up(&journal->j_wait_updates);
        spin_unlock(&transaction->t_handle_lock);
 
        jbd_debug(2, "restarting handle %p\n", handle);
        __jbd2_log_start_commit(journal, transaction->t_tid);
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
 
        lock_map_release(&handle->h_lockdep_map);
        handle->h_buffer_credits = nblocks;
-       ret = start_this_handle(journal, handle);
+       ret = start_this_handle(journal, handle, gfp_mask);
        return ret;
 }
+EXPORT_SYMBOL(jbd2__journal_restart);
 
 
+int jbd2_journal_restart(handle_t *handle, int nblocks)
+{
+       return jbd2__journal_restart(handle, nblocks, GFP_NOFS);
+}
+EXPORT_SYMBOL(jbd2_journal_restart);
+
 /**
  * void jbd2_journal_lock_updates () - establish a transaction barrier.
  * @journal:  Journal to establish a barrier on.
@@ -447,7 +498,7 @@ void jbd2_journal_lock_updates(journal_t *journal)
 {
        DEFINE_WAIT(wait);
 
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
        ++journal->j_barrier_count;
 
        /* Wait until there are no running updates */
@@ -458,19 +509,19 @@ void jbd2_journal_lock_updates(journal_t *journal)
                        break;
 
                spin_lock(&transaction->t_handle_lock);
-               if (!transaction->t_updates) {
+               if (!atomic_read(&transaction->t_updates)) {
                        spin_unlock(&transaction->t_handle_lock);
                        break;
                }
                prepare_to_wait(&journal->j_wait_updates, &wait,
                                TASK_UNINTERRUPTIBLE);
                spin_unlock(&transaction->t_handle_lock);
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                schedule();
                finish_wait(&journal->j_wait_updates, &wait);
-               spin_lock(&journal->j_state_lock);
+               write_lock(&journal->j_state_lock);
        }
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
 
        /*
         * We have now established a barrier against other normal updates, but
@@ -494,9 +545,9 @@ void jbd2_journal_unlock_updates (journal_t *journal)
        J_ASSERT(journal->j_barrier_count != 0);
 
        mutex_unlock(&journal->j_barrier);
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
        --journal->j_barrier_count;
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
        wake_up(&journal->j_wait_transaction_locked);
 }
 
@@ -1238,7 +1289,8 @@ int jbd2_journal_stop(handle_t *handle)
 {
        transaction_t *transaction = handle->h_transaction;
        journal_t *journal = transaction->t_journal;
-       int err;
+       int err, wait_for_commit = 0;
+       tid_t tid;
        pid_t pid;
 
        J_ASSERT(journal_current_handle() == handle);
@@ -1246,7 +1298,7 @@ int jbd2_journal_stop(handle_t *handle)
        if (is_handle_aborted(handle))
                err = -EIO;
        else {
-               J_ASSERT(transaction->t_updates > 0);
+               J_ASSERT(atomic_read(&transaction->t_updates) > 0);
                err = 0;
        }
 
@@ -1291,9 +1343,9 @@ int jbd2_journal_stop(handle_t *handle)
 
                journal->j_last_sync_writer = pid;
 
-               spin_lock(&journal->j_state_lock);
+               read_lock(&journal->j_state_lock);
                commit_time = journal->j_average_commit_time;
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
 
                trans_time = ktime_to_ns(ktime_sub(ktime_get(),
                                                   transaction->t_start_time));
@@ -1314,14 +1366,8 @@ int jbd2_journal_stop(handle_t *handle)
        if (handle->h_sync)
                transaction->t_synchronous_commit = 1;
        current->journal_info = NULL;
-       spin_lock(&transaction->t_handle_lock);
-       transaction->t_outstanding_credits -= handle->h_buffer_credits;
-       transaction->t_updates--;
-       if (!transaction->t_updates) {
-               wake_up(&journal->j_wait_updates);
-               if (journal->j_barrier_count)
-                       wake_up(&journal->j_wait_transaction_locked);
-       }
+       atomic_sub(handle->h_buffer_credits,
+                  &transaction->t_outstanding_credits);
 
        /*
         * If the handle is marked SYNC, we need to set another commit
@@ -1330,15 +1376,13 @@ int jbd2_journal_stop(handle_t *handle)
         * transaction is too old now.
         */
        if (handle->h_sync ||
-                       transaction->t_outstanding_credits >
-                               journal->j_max_transaction_buffers ||
-                       time_after_eq(jiffies, transaction->t_expires)) {
+           (atomic_read(&transaction->t_outstanding_credits) >
+            journal->j_max_transaction_buffers) ||
+           time_after_eq(jiffies, transaction->t_expires)) {
                /* Do this even for aborted journals: an abort still
                 * completes the commit thread, it just doesn't write
                 * anything to disk. */
-               tid_t tid = transaction->t_tid;
 
-               spin_unlock(&transaction->t_handle_lock);
                jbd_debug(2, "transaction too old, requesting commit for "
                                        "handle %p\n", handle);
                /* This is non-blocking */
@@ -1349,11 +1393,25 @@ int jbd2_journal_stop(handle_t *handle)
                 * to wait for the commit to complete.
                 */
                if (handle->h_sync && !(current->flags & PF_MEMALLOC))
-                       err = jbd2_log_wait_commit(journal, tid);
-       } else {
-               spin_unlock(&transaction->t_handle_lock);
+                       wait_for_commit = 1;
        }
 
+       /*
+        * Once we drop t_updates, if it goes to zero the transaction
+        * could start commiting on us and eventually disappear.  So
+        * once we do this, we must not dereference transaction
+        * pointer again.
+        */
+       tid = transaction->t_tid;
+       if (atomic_dec_and_test(&transaction->t_updates)) {
+               wake_up(&journal->j_wait_updates);
+               if (journal->j_barrier_count)
+                       wake_up(&journal->j_wait_transaction_locked);
+       }
+
+       if (wait_for_commit)
+               err = jbd2_log_wait_commit(journal, tid);
+
        lock_map_release(&handle->h_lockdep_map);
 
        jbd2_free_handle(handle);
@@ -1719,7 +1777,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                goto zap_buffer_unlocked;
 
        /* OK, we have data buffer in journaled mode */
-       spin_lock(&journal->j_state_lock);
+       write_lock(&journal->j_state_lock);
        jbd_lock_bh_state(bh);
        spin_lock(&journal->j_list_lock);
 
@@ -1772,7 +1830,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                        jbd2_journal_put_journal_head(jh);
                        spin_unlock(&journal->j_list_lock);
                        jbd_unlock_bh_state(bh);
-                       spin_unlock(&journal->j_state_lock);
+                       write_unlock(&journal->j_state_lock);
                        return ret;
                } else {
                        /* There is no currently-running transaction. So the
@@ -1786,7 +1844,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                                jbd2_journal_put_journal_head(jh);
                                spin_unlock(&journal->j_list_lock);
                                jbd_unlock_bh_state(bh);
-                               spin_unlock(&journal->j_state_lock);
+                               write_unlock(&journal->j_state_lock);
                                return ret;
                        } else {
                                /* The orphan record's transaction has
@@ -1810,7 +1868,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
                jbd2_journal_put_journal_head(jh);
                spin_unlock(&journal->j_list_lock);
                jbd_unlock_bh_state(bh);
-               spin_unlock(&journal->j_state_lock);
+               write_unlock(&journal->j_state_lock);
                return 0;
        } else {
                /* Good, the buffer belongs to the running transaction.
@@ -1829,7 +1887,7 @@ zap_buffer:
 zap_buffer_no_jh:
        spin_unlock(&journal->j_list_lock);
        jbd_unlock_bh_state(bh);
-       spin_unlock(&journal->j_state_lock);
+       write_unlock(&journal->j_state_lock);
 zap_buffer_unlocked:
        clear_buffer_dirty(bh);
        J_ASSERT_BH(bh, !buffer_jbddirty(bh));
@@ -2136,9 +2194,9 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
        /* Locks are here just to force reading of recent values, it is
         * enough that the transaction was not committing before we started
         * a transaction adding the inode to orphan list */
-       spin_lock(&journal->j_state_lock);
+       read_lock(&journal->j_state_lock);
        commit_trans = journal->j_committing_transaction;
-       spin_unlock(&journal->j_state_lock);
+       read_unlock(&journal->j_state_lock);
        spin_lock(&journal->j_list_lock);
        inode_trans = jinode->i_transaction;
        spin_unlock(&journal->j_list_lock);