Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git

[pandora-kernel.git] / fs / reiserfs / journal.c
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c

index 16b526f..1b73529 100644 (file)
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -846,6 +846,14 @@ static int write_ordered_buffers(spinlock_t * lock,
                         spin_lock(lock);
                         goto loop_next;
                 }
+               /* in theory, dirty non-uptodate buffers should never get here,
+                * but the upper layer io error paths still have a few quirks.
+                * Handle them here as gracefully as we can
+                */
+               if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
+                       clear_buffer_dirty(bh);
+                       ret = -EIO;
+               }
                 if (buffer_dirty(bh)) {
                         list_del_init(&jh->list);
                         list_add(&jh->list, &tmp);
@@ -877,6 +885,19 @@ static int write_ordered_buffers(spinlock_t * lock,
                 if (!buffer_uptodate(bh)) {
                         ret = -EIO;
                 }
+               /* ugly interaction with invalidatepage here.
+                * reiserfs_invalidate_page will pin any buffer that has a valid
+                * journal head from an older transaction.  If someone else sets
+                * our buffer dirty after we write it in the first loop, and
+                * then someone truncates the page away, nobody will ever write
+                * the buffer. We're safe if we write the page one last time
+                * after freeing the journal header.
+                */
+               if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
+                       spin_unlock(lock);
+                       ll_rw_block(WRITE, 1, &bh);
+                       spin_lock(lock);
+               }
                 put_bh(bh);
                 cond_resched_lock(lock);
         }
@@ -975,6 +996,7 @@ static int flush_commit_list(struct super_block *s,
         struct reiserfs_journal *journal = SB_JOURNAL(s);
         int barrier = 0;
         int retval = 0;
+       int write_len;
  
         reiserfs_check_lock_depth(s, "flush_commit_list");
  
@@ -1016,24 +1038,35 @@ static int flush_commit_list(struct super_block *s,
         }
  
         if (!list_empty(&jl->j_bh_list)) {
+               int ret;
                 unlock_kernel();
-               write_ordered_buffers(&journal->j_dirty_buffers_lock,
-                                     journal, jl, &jl->j_bh_list);
+               ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
+                                           journal, jl, &jl->j_bh_list);
+               if (ret < 0 && retval == 0)
+                       retval = ret;
                 lock_kernel();
         }
         BUG_ON(!list_empty(&jl->j_bh_list));
         /*
          * for the description block and all the log blocks, submit any buffers
-        * that haven't already reached the disk
+        * that haven't already reached the disk.  Try to write at least 256
+        * log blocks. later on, we will only wait on blocks that correspond
+        * to this transaction, but while we're unplugging we might as well
+        * get a chunk of data on there.
          */
         atomic_inc(&journal->j_async_throttle);
-       for (i = 0; i < (jl->j_len + 1); i++) {
+       write_len = jl->j_len + 1;
+       if (write_len < 256)
+               write_len = 256;
+       for (i = 0 ; i < write_len ; i++) {
                 bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
                     SB_ONDISK_JOURNAL_SIZE(s);
                 tbh = journal_find_get_block(s, bn);
-               if (buffer_dirty(tbh))  /* redundant, ll_rw_block() checks */
-                       ll_rw_block(SWRITE, 1, &tbh);
-               put_bh(tbh);
+               if (tbh) {
+                       if (buffer_dirty(tbh))
+                           ll_rw_block(WRITE, 1, &tbh) ;
+                       put_bh(tbh) ;
+               }
         }
         atomic_dec(&journal->j_async_throttle);
  
@@ -2194,6 +2227,9 @@ static int journal_read_transaction(struct super_block *p_s_sb,
         journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
         journal->j_last_flush_trans_id = trans_id;
         journal->j_trans_id = trans_id + 1;
+       /* check for trans_id overflow */
+       if (journal->j_trans_id == 0)
+               journal->j_trans_id = 10;
         brelse(c_bh);
         brelse(d_bh);
         kfree(log_blocks);
@@ -2286,8 +2322,7 @@ static int journal_read(struct super_block *p_s_sb)
                 return 1;
         }
         jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
-       if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
-           le32_to_cpu(jh->j_first_unflushed_offset) <
+       if (le32_to_cpu(jh->j_first_unflushed_offset) <
             SB_ONDISK_JOURNAL_SIZE(p_s_sb)
             && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
                 oldest_start =
@@ -2418,6 +2453,9 @@ static int journal_read(struct super_block *p_s_sb)
                 journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
                 journal->j_trans_id =
                     le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+               /* check for trans_id overflow */
+               if (journal->j_trans_id == 0)
+                       journal->j_trans_id = 10;
                 journal->j_last_flush_trans_id =
                     le32_to_cpu(jh->j_last_flush_trans_id);
                 journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
@@ -2446,12 +2484,8 @@ static int journal_read(struct super_block *p_s_sb)
  static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
  {
         struct reiserfs_journal_list *jl;
-      retry:
-       jl = kzalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS);
-       if (!jl) {
-               yield();
-               goto retry;
-       }
+       jl = kzalloc(sizeof(struct reiserfs_journal_list),
+                    GFP_NOFS | __GFP_NOFAIL);
         INIT_LIST_HEAD(&jl->j_list);
         INIT_LIST_HEAD(&jl->j_working_list);
         INIT_LIST_HEAD(&jl->j_tail_bh_list);
@@ -2794,6 +2828,9 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
             journal->j_cnode_free < (journal->j_trans_max * 3)) {
                 return 1;
         }
+       /* protected by the BKL here */
+       journal->j_len_alloc += new_alloc;
+       th->t_blocks_allocated += new_alloc ;
         return 0;
  }
  
@@ -3842,8 +3879,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
         int cur_write_start = 0;        /* start index of current log write */
         int old_start;
         int i;
-       int flush = flags & FLUSH_ALL;
-       int wait_on_commit = flags & WAIT;
+       int flush;
+       int wait_on_commit;
         struct reiserfs_journal_list *jl, *temp_jl;
         struct list_head *entry, *safe;
         unsigned long jindex;
@@ -3853,6 +3890,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
         BUG_ON(th->t_refcount > 1);
         BUG_ON(!th->t_trans_id);
  
+       /* protect flush_older_commits from doing mistakes if the
+           transaction ID counter gets overflowed.  */
+       if (th->t_trans_id == ~0UL)
+               flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+       flush = flags & FLUSH_ALL;
+       wait_on_commit = flags & WAIT;
+
         put_fs_excl();
         current->journal_info = th->t_handle_save;
         reiserfs_check_lock_depth(p_s_sb, "journal end");
@@ -4074,7 +4118,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
         journal->j_first = NULL;
         journal->j_len = 0;
         journal->j_trans_start_time = 0;
-       journal->j_trans_id++;
+       /* check for trans_id overflow */
+       if (++journal->j_trans_id == 0)
+               journal->j_trans_id = 10;
         journal->j_current_jl->j_trans_id = journal->j_trans_id;
         journal->j_must_wait = 0;
         journal->j_len_alloc = 0;