Merge with /pub/scm/linux/kernel/git/torvalds/linux-2.6.git
[pandora-kernel.git] / fs / reiserfs / journal.c
index 4491fcf..1b73529 100644 (file)
@@ -152,18 +152,16 @@ static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block
        struct reiserfs_bitmap_node *bn;
        static int id;
 
-       bn = reiserfs_kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS,
-                             p_s_sb);
+       bn = kmalloc(sizeof(struct reiserfs_bitmap_node), GFP_NOFS);
        if (!bn) {
                return NULL;
        }
-       bn->data = reiserfs_kmalloc(p_s_sb->s_blocksize, GFP_NOFS, p_s_sb);
+       bn->data = kzalloc(p_s_sb->s_blocksize, GFP_NOFS);
        if (!bn->data) {
-               reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+               kfree(bn);
                return NULL;
        }
        bn->id = id++;
-       memset(bn->data, 0, p_s_sb->s_blocksize);
        INIT_LIST_HEAD(&bn->list);
        return bn;
 }
@@ -197,8 +195,8 @@ static inline void free_bitmap_node(struct super_block *p_s_sb,
        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
        journal->j_used_bitmap_nodes--;
        if (journal->j_free_bitmap_nodes > REISERFS_MAX_BITMAP_NODES) {
-               reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
-               reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+               kfree(bn->data);
+               kfree(bn);
        } else {
                list_add(&bn->list, &journal->j_bitmap_nodes);
                journal->j_free_bitmap_nodes++;
@@ -276,8 +274,8 @@ static int free_bitmap_nodes(struct super_block *p_s_sb)
        while (next != &journal->j_bitmap_nodes) {
                bn = list_entry(next, struct reiserfs_bitmap_node, list);
                list_del(next);
-               reiserfs_kfree(bn->data, p_s_sb->s_blocksize, p_s_sb);
-               reiserfs_kfree(bn, sizeof(struct reiserfs_bitmap_node), p_s_sb);
+               kfree(bn->data);
+               kfree(bn);
                next = journal->j_bitmap_nodes.next;
                journal->j_free_bitmap_nodes--;
        }
@@ -581,7 +579,7 @@ static inline void put_journal_list(struct super_block *s,
                               jl->j_trans_id, jl->j_refcount);
        }
        if (--jl->j_refcount == 0)
-               reiserfs_kfree(jl, sizeof(struct reiserfs_journal_list), s);
+               kfree(jl);
 }
 
 /*
@@ -848,6 +846,14 @@ static int write_ordered_buffers(spinlock_t * lock,
                        spin_lock(lock);
                        goto loop_next;
                }
+               /* in theory, dirty non-uptodate buffers should never get here,
+                * but the upper layer io error paths still have a few quirks.
+                * Handle them here as gracefully as we can
+                */
+               if (!buffer_uptodate(bh) && buffer_dirty(bh)) {
+                       clear_buffer_dirty(bh);
+                       ret = -EIO;
+               }
                if (buffer_dirty(bh)) {
                        list_del_init(&jh->list);
                        list_add(&jh->list, &tmp);
@@ -879,6 +885,19 @@ static int write_ordered_buffers(spinlock_t * lock,
                if (!buffer_uptodate(bh)) {
                        ret = -EIO;
                }
+               /* ugly interaction with invalidatepage here.
+                * reiserfs_invalidate_page will pin any buffer that has a valid
+                * journal head from an older transaction.  If someone else sets
+                * our buffer dirty after we write it in the first loop, and
+                * then someone truncates the page away, nobody will ever write
+                * the buffer. We're safe if we write the page one last time
+                * after freeing the journal header.
+                */
+               if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) {
+                       spin_unlock(lock);
+                       ll_rw_block(WRITE, 1, &bh);
+                       spin_lock(lock);
+               }
                put_bh(bh);
                cond_resched_lock(lock);
        }
@@ -977,6 +996,7 @@ static int flush_commit_list(struct super_block *s,
        struct reiserfs_journal *journal = SB_JOURNAL(s);
        int barrier = 0;
        int retval = 0;
+       int write_len;
 
        reiserfs_check_lock_depth(s, "flush_commit_list");
 
@@ -1018,24 +1038,35 @@ static int flush_commit_list(struct super_block *s,
        }
 
        if (!list_empty(&jl->j_bh_list)) {
+               int ret;
                unlock_kernel();
-               write_ordered_buffers(&journal->j_dirty_buffers_lock,
-                                     journal, jl, &jl->j_bh_list);
+               ret = write_ordered_buffers(&journal->j_dirty_buffers_lock,
+                                           journal, jl, &jl->j_bh_list);
+               if (ret < 0 && retval == 0)
+                       retval = ret;
                lock_kernel();
        }
        BUG_ON(!list_empty(&jl->j_bh_list));
        /*
         * for the description block and all the log blocks, submit any buffers
-        * that haven't already reached the disk
+        * that haven't already reached the disk.  Try to write at least 256
+        * log blocks. later on, we will only wait on blocks that correspond
+        * to this transaction, but while we're unplugging we might as well
+        * get a chunk of data on there.
         */
        atomic_inc(&journal->j_async_throttle);
-       for (i = 0; i < (jl->j_len + 1); i++) {
+       write_len = jl->j_len + 1;
+       if (write_len < 256)
+               write_len = 256;
+       for (i = 0 ; i < write_len ; i++) {
                bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + (jl->j_start + i) %
                    SB_ONDISK_JOURNAL_SIZE(s);
                tbh = journal_find_get_block(s, bn);
-               if (buffer_dirty(tbh))  /* redundant, ll_rw_block() checks */
-                       ll_rw_block(SWRITE, 1, &tbh);
-               put_bh(tbh);
+               if (tbh) {
+                       if (buffer_dirty(tbh))
+                           ll_rw_block(WRITE, 1, &tbh) ;
+                       put_bh(tbh) ;
+               }
        }
        atomic_dec(&journal->j_async_throttle);
 
@@ -1818,8 +1849,7 @@ void remove_journal_hash(struct super_block *sb,
 static void free_journal_ram(struct super_block *p_s_sb)
 {
        struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb);
-       reiserfs_kfree(journal->j_current_jl,
-                      sizeof(struct reiserfs_journal_list), p_s_sb);
+       kfree(journal->j_current_jl);
        journal->j_num_lists--;
 
        vfree(journal->j_cnode_free_orig);
@@ -2093,21 +2123,15 @@ static int journal_read_transaction(struct super_block *p_s_sb,
        }
        trans_id = get_desc_trans_id(desc);
        /* now we know we've got a good transaction, and it was inside the valid time ranges */
-       log_blocks =
-           reiserfs_kmalloc(get_desc_trans_len(desc) *
-                            sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
-       real_blocks =
-           reiserfs_kmalloc(get_desc_trans_len(desc) *
-                            sizeof(struct buffer_head *), GFP_NOFS, p_s_sb);
+       log_blocks = kmalloc(get_desc_trans_len(desc) *
+                            sizeof(struct buffer_head *), GFP_NOFS);
+       real_blocks = kmalloc(get_desc_trans_len(desc) *
+                             sizeof(struct buffer_head *), GFP_NOFS);
        if (!log_blocks || !real_blocks) {
                brelse(c_bh);
                brelse(d_bh);
-               reiserfs_kfree(log_blocks,
-                              get_desc_trans_len(desc) *
-                              sizeof(struct buffer_head *), p_s_sb);
-               reiserfs_kfree(real_blocks,
-                              get_desc_trans_len(desc) *
-                              sizeof(struct buffer_head *), p_s_sb);
+               kfree(log_blocks);
+               kfree(real_blocks);
                reiserfs_warning(p_s_sb,
                                 "journal-1169: kmalloc failed, unable to mount FS");
                return -1;
@@ -2145,12 +2169,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
                        brelse_array(real_blocks, i);
                        brelse(c_bh);
                        brelse(d_bh);
-                       reiserfs_kfree(log_blocks,
-                                      get_desc_trans_len(desc) *
-                                      sizeof(struct buffer_head *), p_s_sb);
-                       reiserfs_kfree(real_blocks,
-                                      get_desc_trans_len(desc) *
-                                      sizeof(struct buffer_head *), p_s_sb);
+                       kfree(log_blocks);
+                       kfree(real_blocks);
                        return -1;
                }
        }
@@ -2166,12 +2186,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
                        brelse_array(real_blocks, get_desc_trans_len(desc));
                        brelse(c_bh);
                        brelse(d_bh);
-                       reiserfs_kfree(log_blocks,
-                                      get_desc_trans_len(desc) *
-                                      sizeof(struct buffer_head *), p_s_sb);
-                       reiserfs_kfree(real_blocks,
-                                      get_desc_trans_len(desc) *
-                                      sizeof(struct buffer_head *), p_s_sb);
+                       kfree(log_blocks);
+                       kfree(real_blocks);
                        return -1;
                }
                memcpy(real_blocks[i]->b_data, log_blocks[i]->b_data,
@@ -2193,12 +2209,8 @@ static int journal_read_transaction(struct super_block *p_s_sb,
                                     get_desc_trans_len(desc) - i);
                        brelse(c_bh);
                        brelse(d_bh);
-                       reiserfs_kfree(log_blocks,
-                                      get_desc_trans_len(desc) *
-                                      sizeof(struct buffer_head *), p_s_sb);
-                       reiserfs_kfree(real_blocks,
-                                      get_desc_trans_len(desc) *
-                                      sizeof(struct buffer_head *), p_s_sb);
+                       kfree(log_blocks);
+                       kfree(real_blocks);
                        return -1;
                }
                brelse(real_blocks[i]);
@@ -2215,14 +2227,13 @@ static int journal_read_transaction(struct super_block *p_s_sb,
        journal->j_start = cur_dblock - SB_ONDISK_JOURNAL_1st_BLOCK(p_s_sb);
        journal->j_last_flush_trans_id = trans_id;
        journal->j_trans_id = trans_id + 1;
+       /* check for trans_id overflow */
+       if (journal->j_trans_id == 0)
+               journal->j_trans_id = 10;
        brelse(c_bh);
        brelse(d_bh);
-       reiserfs_kfree(log_blocks,
-                      le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
-                      p_s_sb);
-       reiserfs_kfree(real_blocks,
-                      le32_to_cpu(desc->j_len) * sizeof(struct buffer_head *),
-                      p_s_sb);
+       kfree(log_blocks);
+       kfree(real_blocks);
        return 0;
 }
 
@@ -2311,8 +2322,7 @@ static int journal_read(struct super_block *p_s_sb)
                return 1;
        }
        jh = (struct reiserfs_journal_header *)(journal->j_header_bh->b_data);
-       if (le32_to_cpu(jh->j_first_unflushed_offset) >= 0 &&
-           le32_to_cpu(jh->j_first_unflushed_offset) <
+       if (le32_to_cpu(jh->j_first_unflushed_offset) <
            SB_ONDISK_JOURNAL_SIZE(p_s_sb)
            && le32_to_cpu(jh->j_last_flush_trans_id) > 0) {
                oldest_start =
@@ -2443,6 +2453,9 @@ static int journal_read(struct super_block *p_s_sb)
                journal->j_start = le32_to_cpu(jh->j_first_unflushed_offset);
                journal->j_trans_id =
                    le32_to_cpu(jh->j_last_flush_trans_id) + 1;
+               /* check for trans_id overflow */
+               if (journal->j_trans_id == 0)
+                       journal->j_trans_id = 10;
                journal->j_last_flush_trans_id =
                    le32_to_cpu(jh->j_last_flush_trans_id);
                journal->j_mount_id = le32_to_cpu(jh->j_mount_id) + 1;
@@ -2471,14 +2484,8 @@ static int journal_read(struct super_block *p_s_sb)
 static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
 {
        struct reiserfs_journal_list *jl;
-      retry:
-       jl = reiserfs_kmalloc(sizeof(struct reiserfs_journal_list), GFP_NOFS,
-                             s);
-       if (!jl) {
-               yield();
-               goto retry;
-       }
-       memset(jl, 0, sizeof(*jl));
+       jl = kzalloc(sizeof(struct reiserfs_journal_list),
+                    GFP_NOFS | __GFP_NOFAIL);
        INIT_LIST_HEAD(&jl->j_list);
        INIT_LIST_HEAD(&jl->j_working_list);
        INIT_LIST_HEAD(&jl->j_tail_bh_list);
@@ -2821,6 +2828,9 @@ int journal_transaction_should_end(struct reiserfs_transaction_handle *th,
            journal->j_cnode_free < (journal->j_trans_max * 3)) {
                return 1;
        }
+       /* protected by the BKL here */
+       journal->j_len_alloc += new_alloc;
+       th->t_blocks_allocated += new_alloc ;
        return 0;
 }
 
@@ -3042,14 +3052,12 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct
                }
                return th;
        }
-       th = reiserfs_kmalloc(sizeof(struct reiserfs_transaction_handle),
-                             GFP_NOFS, s);
+       th = kmalloc(sizeof(struct reiserfs_transaction_handle), GFP_NOFS);
        if (!th)
                return NULL;
        ret = journal_begin(th, s, nblocks);
        if (ret) {
-               reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
-                              s);
+               kfree(th);
                return NULL;
        }
 
@@ -3067,8 +3075,7 @@ int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *th)
                ret = -EIO;
        if (th->t_refcount == 0) {
                SB_JOURNAL(s)->j_persistent_trans--;
-               reiserfs_kfree(th, sizeof(struct reiserfs_transaction_handle),
-                              s);
+               kfree(th);
        }
        return ret;
 }
@@ -3872,8 +3879,8 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        int cur_write_start = 0;        /* start index of current log write */
        int old_start;
        int i;
-       int flush = flags & FLUSH_ALL;
-       int wait_on_commit = flags & WAIT;
+       int flush;
+       int wait_on_commit;
        struct reiserfs_journal_list *jl, *temp_jl;
        struct list_head *entry, *safe;
        unsigned long jindex;
@@ -3883,6 +3890,13 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        BUG_ON(th->t_refcount > 1);
        BUG_ON(!th->t_trans_id);
 
+       /* protect flush_older_commits from doing mistakes if the
+           transaction ID counter gets overflowed.  */
+       if (th->t_trans_id == ~0UL)
+               flags |= FLUSH_ALL | COMMIT_NOW | WAIT;
+       flush = flags & FLUSH_ALL;
+       wait_on_commit = flags & WAIT;
+
        put_fs_excl();
        current->journal_info = th->t_handle_save;
        reiserfs_check_lock_depth(p_s_sb, "journal end");
@@ -4104,7 +4118,9 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
        journal->j_first = NULL;
        journal->j_len = 0;
        journal->j_trans_start_time = 0;
-       journal->j_trans_id++;
+       /* check for trans_id overflow */
+       if (++journal->j_trans_id == 0)
+               journal->j_trans_id = 10;
        journal->j_current_jl->j_trans_id = journal->j_trans_id;
        journal->j_must_wait = 0;
        journal->j_len_alloc = 0;