Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
[pandora-kernel.git] / fs / reiserfs / journal.c
index 16b526f..1b73529 100644 (file)
@@ -846,6 +846,14 @@ static int write_ordered_buffers(spinlock_t * lock,
                        spin_lock(lock);
                        goto loop_next;
                }
+               /* in theory, dirty non-uptodate buffers should never get here,
+                * but the upper layer io error paths still have a few quirks.
+                * Handle them here as gracefully as we can
+                */
+               if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
+                       clear_buffer_dirty(bh);
+                       ret = -EIO;
+               }
                if (buffer_dirty(bh)) {
                        list_del_init(&jh->list);
                        list_add(&jh->list, &tmp);
@@ -877,6 +885,19 @@ static int write_ordered_buffers(spinlock_t * lock,
                if (!buffer_uptodate(bh)) {
                        ret = -EIO;
                }
+               /* ugly interaction with invalidatepage here.
+                * reiserfs_invalidate_page will pin any buffer that has a valid
+                * journal head from an older transaction.  If someone else sets
+                * our buffer dirty after we write it in the first loop, and
+                * then someone truncates the page away, nobody will ever write
+                * the buffer. We're safe if we write the page one last time
+                * after freeing the journal header.
+                */
+               if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
+                       spin_unlock(lock);
+                       ll_rw_block(WRITE, 1, &bh);
+                       spin_lock(lock);
+               }
                put_bh(bh);
                cond_resched_lock(lock);
        }
@@ -975,6 +996,7 @@ static int flush_commit_list(struct super_block *s,
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        int barrier = 0;
        int retval = 0;
+       int write_len;
 
        reiserfs_check_lock_depth(s, "flush_commit_list");
 
@@ -1016,24 +1038,35 @@ static int flush_commit_list(struct super_block *s,
        }
 
        if (!list_empty(&jl->j_bh_list)) {
+               int ret;
                unlock_kernel();
-               write_ordered_buffers(&journal->j_dirty_buffers_lock,
-                                     journal, jl, &jl->j_bh_list);
+               ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
+                                           journal, jl, &jl->j_bh_list);
+               if (ret < 0 && retval == 0)
+                       retval = ret;
                lock_kernel();
        }
        BUG_ON(!list_empty(&jl->j_bh_list));
        /*
         * for the description block and all the log blocks, submit any buffers
-        * that haven't already reached the disk
+        * that haven't already reached the disk.  Try to write at least 256
+        * log blocks. later on, we will only wait on blocks that correspond
+        * to this transaction, but while we're unplugging we might as well
+        * get a chunk of data on there.
         */
        atomic_inc(&journal->j_async_throttle);
-       for (i = 0; i < (jl->j_len + 1); i++) {
+       write_len = jl->j_len + 1;
+       if (write_len < 256)
+               write_len = 256;
+       for (i = 0 ; i < write_len ; i++) {
                bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
                    SB_ONDISK_JOURNAL_SIZE(s);
                tbh = journal_find_get_block(s, bn);
-               if (buffer_dirty(tbh))  /* redundant, ll_rw_block() checks */
-                       ll_rw_block(SWRITE, 1, &tbh);
-               put_bh(tbh);
+               if (tbh) {
+                       if (buffer_dirty(tbh))
+                           ll_rw_block(WRITE, 1, &tbh) ;
+                       put_bh(tbh) ;
+               }
        }
        atomic_dec(&journal->j_async_throttle);
 
@@ -2194,6 +2227,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
        journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
        journal->j_last_flush_trans_id = trans_id;
        journal->j_trans_id = trans_id + 1;
+       /* check for trans_id overflow */
+       if (journal->j_trans_id == 0)
+               journal->j_trans_id = 10;
        brelse(c_bh);
        brelse(d_bh);
        kfree(log_blocks);
@@ -2286,8 +2322,7 @@ static int journal_read(struct super_block *p_s_sb)
                return 1;
        }
        jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
-       if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
-           le32_to_cpu(jh->j_first_unflushed_offset) <
+       if (le32_to_cpu(jh->j_first_unflushed_offset) <
            SB_ONDISK_JOURNAL_SIZE(p_s_sb)
            && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
                oldest_start =
@@ -2418,6 +2453,9 @@ static int journal_read(struct super_block *p_s_sb)
                journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
                journal->j_trans_id =
                    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+               /* check for trans_id overflow */
+               if (journal->j_trans_id == 0)
+                       journal->j_trans_id = 10;
                journal->j_last_flush_trans_id =
                    le32_to_cpu(jh->j_last_flush_trans_id);
                journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
@@ -2446,12 +2484,8 @@ static int journal_read(struct super_block *p_s_sb)
 static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
 {
        struct reiserfs_journal_list *jl;
-      retry:
-       jl = kzalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS);
-       if (!jl) {
-               yield();
-               goto retry;
-       }
+       jl = kzalloc(sizeof(struct reiserfs_journal_list),
+                    GFP_NOFS | __GFP_NOFAIL);
        INIT_LIST_HEAD(&jl->j_list);
        INIT_LIST_HEAD(&jl->j_working_list);
        INIT_LIST_HEAD(&jl->j_tail_bh_list);
@@ -2794,6 +2828,9 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
            journal->j_cnode_free < (journal->j_trans_max * 3)) {
                return 1;
        }
+       /* protected by the BKL here */
+       journal->j_len_alloc += new_alloc;
+       th->t_blocks_allocated += new_alloc ;
        return 0;
 }
 
@@ -3842,8 +3879,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        int cur_write_start = 0;        /* start index of current log write */
        int old_start;
        int i;
-       int flush = flags & FLUSH_ALL;
-       int wait_on_commit = flags & WAIT;
+       int flush;
+       int wait_on_commit;
        struct reiserfs_journal_list *jl, *temp_jl;
        struct list_head *entry, *safe;
        unsigned long jindex;
@@ -3853,6 +3890,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        BUG_ON(th->t_refcount > 1);
        BUG_ON(!th->t_trans_id);
 
+       /* protect flush_older_commits from doing mistakes if the
+           transaction ID counter gets overflowed.  */
+       if (th->t_trans_id == ~0UL)
+               flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+       flush = flags & FLUSH_ALL;
+       wait_on_commit = flags & WAIT;
+
        put_fs_excl();
        current->journal_info = th->t_handle_save;
        reiserfs_check_lock_depth(p_s_sb, "journal end");
@@ -4074,7 +4118,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        journal->j_first = NULL;
        journal->j_len = 0;
        journal->j_trans_start_time = 0;
-       journal->j_trans_id++;
+       /* check for trans_id overflow */
+       if (++journal->j_trans_id == 0)
+               journal->j_trans_id = 10;
        journal->j_current_jl->j_trans_id = journal->j_trans_id;
        journal->j_must_wait = 0;
        journal->j_len_alloc = 0;