2 * linux/fs/jbd2/commit.c
4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
6 * Copyright 1998 Red Hat corp --- All Rights Reserved
8 * This file is part of the Linux kernel and is made available under
9 * the terms of the GNU General Public License, version 2, or at your
10 * option, any later version, incorporated herein by reference.
12 * Journal commit routines for the generic filesystem journaling code;
13 * part of the ext2fs journaling system.
16 #include <linux/time.h>
18 #include <linux/jbd2.h>
19 #include <linux/errno.h>
20 #include <linux/slab.h>
22 #include <linux/pagemap.h>
23 #include <linux/jiffies.h>
24 #include <linux/crc32.h>
27 * Default IO end handler for temporary BJ_IO buffer_heads.
29 static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
33 set_buffer_uptodate(bh);
35 clear_buffer_uptodate(bh);
40 * When an ext4 file is truncated, it is possible that some pages are not
41 * successfully freed, because they are attached to a committing transaction.
42 * After the transaction commits, these pages are left on the LRU, with no
43 * ->mapping, and with attached buffers. These pages are trivially reclaimable
44 * by the VM, but their apparent absence upsets the VM accounting, and it makes
45 * the numbers in /proc/meminfo look odd.
47 * So here, we have a buffer which has just come off the forget list. Look to
48 * see if we can strip all buffers from the backing page.
50 * Called under lock_journal(), and possibly under journal_datalist_lock. The
51 * caller provided us with a ref against the buffer, and we drop that here.
53 static void release_buffer_page(struct buffer_head *bh)
59 if (atomic_read(&bh->b_count) != 1)
67 /* OK, it's a truncated page */
68 if (TestSetPageLocked(page))
73 try_to_free_buffers(page);
75 page_cache_release(page);
83 * Done it all: now submit the commit record. We should have
84 * cleaned up our previous buffers by now, so if we are in abort
85 * mode we can now just skip the rest of the journal write
88 * Returns 1 if the journal needs to be aborted or 0 on success
90 static int journal_submit_commit_record(journal_t *journal,
91 transaction_t *commit_transaction,
92 struct buffer_head **cbh,
95 struct journal_head *descriptor;
96 struct commit_header *tmp;
97 struct buffer_head *bh;
100 struct timespec now = current_kernel_time();
102 if (is_journal_aborted(journal))
105 descriptor = jbd2_journal_get_descriptor_buffer(journal);
109 bh = jh2bh(descriptor);
111 tmp = (struct commit_header *)bh->b_data;
112 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
113 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
114 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
115 tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
116 tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
118 if (JBD2_HAS_COMPAT_FEATURE(journal,
119 JBD2_FEATURE_COMPAT_CHECKSUM)) {
120 tmp->h_chksum_type = JBD2_CRC32_CHKSUM;
121 tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE;
122 tmp->h_chksum[0] = cpu_to_be32(crc32_sum);
125 JBUFFER_TRACE(descriptor, "submit commit block");
128 set_buffer_dirty(bh);
129 set_buffer_uptodate(bh);
130 bh->b_end_io = journal_end_buffer_io_sync;
132 if (journal->j_flags & JBD2_BARRIER &&
133 !JBD2_HAS_INCOMPAT_FEATURE(journal,
134 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
135 set_buffer_ordered(bh);
138 ret = submit_bh(WRITE, bh);
140 clear_buffer_ordered(bh);
142 /* is it possible for another commit to fail at roughly
143 * the same time as this one? If so, we don't want to
144 * trust the barrier flag in the super, but instead want
145 * to remember if we sent a barrier request
147 if (ret == -EOPNOTSUPP && barrier_done) {
148 char b[BDEVNAME_SIZE];
151 "JBD: barrier-based sync failed on %s - "
152 "disabling barriers\n",
153 bdevname(journal->j_dev, b));
154 spin_lock(&journal->j_state_lock);
155 journal->j_flags &= ~JBD2_BARRIER;
156 spin_unlock(&journal->j_state_lock);
158 /* And try again, without the barrier */
160 set_buffer_uptodate(bh);
161 set_buffer_dirty(bh);
162 ret = submit_bh(WRITE, bh);
169 * This function along with journal_submit_commit_record
170 * allows to write the commit record asynchronously.
172 static int journal_wait_on_commit_record(struct buffer_head *bh)
176 clear_buffer_dirty(bh);
179 if (unlikely(!buffer_uptodate(bh)))
181 put_bh(bh); /* One for getblk() */
182 jbd2_journal_put_journal_head(bh2jh(bh));
188 * Submit all the data buffers of inode associated with the transaction to
191 * We are in a committing transaction. Therefore no new inode can be added to
192 * our inode list. We use JI_COMMIT_RUNNING flag to protect inode we currently
193 * operate on from being released while we write out pages.
195 static int journal_submit_inode_data_buffers(journal_t *journal,
196 transaction_t *commit_transaction)
198 struct jbd2_inode *jinode;
200 struct address_space *mapping;
202 spin_lock(&journal->j_list_lock);
203 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
204 mapping = jinode->i_vfs_inode->i_mapping;
205 jinode->i_flags |= JI_COMMIT_RUNNING;
206 spin_unlock(&journal->j_list_lock);
207 err = filemap_fdatawrite_range(mapping, 0,
208 i_size_read(jinode->i_vfs_inode));
211 spin_lock(&journal->j_list_lock);
212 J_ASSERT(jinode->i_transaction == commit_transaction);
213 jinode->i_flags &= ~JI_COMMIT_RUNNING;
214 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
216 spin_unlock(&journal->j_list_lock);
221 * Wait for data submitted for writeout, refile inodes to proper
222 * transaction if needed.
225 static int journal_finish_inode_data_buffers(journal_t *journal,
226 transaction_t *commit_transaction)
228 struct jbd2_inode *jinode, *next_i;
231 /* For locking, see the comment in journal_submit_inode_data_buffers() */
232 spin_lock(&journal->j_list_lock);
233 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
234 jinode->i_flags |= JI_COMMIT_RUNNING;
235 spin_unlock(&journal->j_list_lock);
236 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
239 spin_lock(&journal->j_list_lock);
240 jinode->i_flags &= ~JI_COMMIT_RUNNING;
241 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
244 /* Now refile inode to proper lists */
245 list_for_each_entry_safe(jinode, next_i,
246 &commit_transaction->t_inode_list, i_list) {
247 list_del(&jinode->i_list);
248 if (jinode->i_next_transaction) {
249 jinode->i_transaction = jinode->i_next_transaction;
250 jinode->i_next_transaction = NULL;
251 list_add(&jinode->i_list,
252 &jinode->i_transaction->t_inode_list);
254 jinode->i_transaction = NULL;
257 spin_unlock(&journal->j_list_lock);
262 static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
264 struct page *page = bh->b_page;
268 addr = kmap_atomic(page, KM_USER0);
269 checksum = crc32_be(crc32_sum,
270 (void *)(addr + offset_in_page(bh->b_data)), bh->b_size);
271 kunmap_atomic(addr, KM_USER0);
276 static void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
277 unsigned long long block)
279 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
280 if (tag_bytes > JBD2_TAG_SIZE32)
281 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
285 * jbd2_journal_commit_transaction
287 * The primary function for committing a transaction to the log. This
288 * function is called by the journal thread to begin a complete commit.
290 void jbd2_journal_commit_transaction(journal_t *journal)
292 struct transaction_stats_s stats;
293 transaction_t *commit_transaction;
294 struct journal_head *jh, *new_jh, *descriptor;
295 struct buffer_head **wbuf = journal->j_wbuf;
299 unsigned long long blocknr;
301 journal_header_t *header;
302 journal_block_tag_t *tag = NULL;
307 int tag_bytes = journal_tag_bytes(journal);
308 struct buffer_head *cbh = NULL; /* For transactional checksums */
309 __u32 crc32_sum = ~0;
312 * First job: lock down the current transaction and wait for
313 * all outstanding updates to complete.
317 spin_lock(&journal->j_list_lock);
318 summarise_journal_usage(journal);
319 spin_unlock(&journal->j_list_lock);
322 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
323 if (journal->j_flags & JBD2_FLUSHED) {
324 jbd_debug(3, "super block updated\n");
325 jbd2_journal_update_superblock(journal, 1);
327 jbd_debug(3, "superblock not updated\n");
330 J_ASSERT(journal->j_running_transaction != NULL);
331 J_ASSERT(journal->j_committing_transaction == NULL);
333 commit_transaction = journal->j_running_transaction;
334 J_ASSERT(commit_transaction->t_state == T_RUNNING);
336 jbd_debug(1, "JBD: starting commit of transaction %d\n",
337 commit_transaction->t_tid);
339 spin_lock(&journal->j_state_lock);
340 commit_transaction->t_state = T_LOCKED;
342 stats.u.run.rs_wait = commit_transaction->t_max_wait;
343 stats.u.run.rs_locked = jiffies;
344 stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
345 stats.u.run.rs_locked);
347 spin_lock(&commit_transaction->t_handle_lock);
348 while (commit_transaction->t_updates) {
351 prepare_to_wait(&journal->j_wait_updates, &wait,
352 TASK_UNINTERRUPTIBLE);
353 if (commit_transaction->t_updates) {
354 spin_unlock(&commit_transaction->t_handle_lock);
355 spin_unlock(&journal->j_state_lock);
357 spin_lock(&journal->j_state_lock);
358 spin_lock(&commit_transaction->t_handle_lock);
360 finish_wait(&journal->j_wait_updates, &wait);
362 spin_unlock(&commit_transaction->t_handle_lock);
364 J_ASSERT (commit_transaction->t_outstanding_credits <=
365 journal->j_max_transaction_buffers);
368 * First thing we are allowed to do is to discard any remaining
369 * BJ_Reserved buffers. Note, it is _not_ permissible to assume
370 * that there are no such buffers: if a large filesystem
371 * operation like a truncate needs to split itself over multiple
372 * transactions, then it may try to do a jbd2_journal_restart() while
373 * there are still BJ_Reserved buffers outstanding. These must
374 * be released cleanly from the current transaction.
376 * In this case, the filesystem must still reserve write access
377 * again before modifying the buffer in the new transaction, but
378 * we do not require it to remember exactly which old buffers it
379 * has reserved. This is consistent with the existing behaviour
380 * that multiple jbd2_journal_get_write_access() calls to the same
381 * buffer are perfectly permissable.
383 while (commit_transaction->t_reserved_list) {
384 jh = commit_transaction->t_reserved_list;
385 JBUFFER_TRACE(jh, "reserved, unused: refile");
387 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
388 * leave undo-committed data.
390 if (jh->b_committed_data) {
391 struct buffer_head *bh = jh2bh(jh);
393 jbd_lock_bh_state(bh);
394 jbd2_free(jh->b_committed_data, bh->b_size);
395 jh->b_committed_data = NULL;
396 jbd_unlock_bh_state(bh);
398 jbd2_journal_refile_buffer(journal, jh);
402 * Now try to drop any written-back buffers from the journal's
403 * checkpoint lists. We do this *before* commit because it potentially
406 spin_lock(&journal->j_list_lock);
407 __jbd2_journal_clean_checkpoint_list(journal);
408 spin_unlock(&journal->j_list_lock);
410 jbd_debug (3, "JBD: commit phase 1\n");
413 * Switch to a new revoke table.
415 jbd2_journal_switch_revoke_table(journal);
417 stats.u.run.rs_flushing = jiffies;
418 stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
419 stats.u.run.rs_flushing);
421 commit_transaction->t_state = T_FLUSH;
422 journal->j_committing_transaction = commit_transaction;
423 journal->j_running_transaction = NULL;
424 commit_transaction->t_log_start = journal->j_head;
425 wake_up(&journal->j_wait_transaction_locked);
426 spin_unlock(&journal->j_state_lock);
428 jbd_debug (3, "JBD: commit phase 2\n");
431 * Now start flushing things to disk, in the order they appear
432 * on the transaction lists. Data blocks go first.
434 err = journal_submit_inode_data_buffers(journal, commit_transaction);
436 jbd2_journal_abort(journal, err);
438 jbd2_journal_write_revoke_records(journal, commit_transaction);
440 jbd_debug(3, "JBD: commit phase 2\n");
443 * Way to go: we have now written out all of the data for a
444 * transaction! Now comes the tricky part: we need to write out
445 * metadata. Loop over the transaction's entire buffer list:
447 spin_lock(&journal->j_state_lock);
448 commit_transaction->t_state = T_COMMIT;
449 spin_unlock(&journal->j_state_lock);
451 stats.u.run.rs_logging = jiffies;
452 stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
453 stats.u.run.rs_logging);
454 stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
455 stats.u.run.rs_blocks_logged = 0;
457 J_ASSERT(commit_transaction->t_nr_buffers <=
458 commit_transaction->t_outstanding_credits);
463 while (commit_transaction->t_buffers) {
465 /* Find the next buffer to be journaled... */
467 jh = commit_transaction->t_buffers;
469 /* If we're in abort mode, we just un-journal the buffer and
470 release it for background writing. */
472 if (is_journal_aborted(journal)) {
473 JBUFFER_TRACE(jh, "journal is aborting: refile");
474 jbd2_journal_refile_buffer(journal, jh);
475 /* If that was the last one, we need to clean up
476 * any descriptor buffers which may have been
477 * already allocated, even if we are now
479 if (!commit_transaction->t_buffers)
480 goto start_journal_io;
484 /* Make sure we have a descriptor block in which to
485 record the metadata buffer. */
488 struct buffer_head *bh;
490 J_ASSERT (bufs == 0);
492 jbd_debug(4, "JBD: get descriptor\n");
494 descriptor = jbd2_journal_get_descriptor_buffer(journal);
496 jbd2_journal_abort(journal, -EIO);
500 bh = jh2bh(descriptor);
501 jbd_debug(4, "JBD: got buffer %llu (%p)\n",
502 (unsigned long long)bh->b_blocknr, bh->b_data);
503 header = (journal_header_t *)&bh->b_data[0];
504 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
505 header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
506 header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
508 tagp = &bh->b_data[sizeof(journal_header_t)];
509 space_left = bh->b_size - sizeof(journal_header_t);
511 set_buffer_jwrite(bh);
512 set_buffer_dirty(bh);
515 /* Record it so that we can wait for IO
517 BUFFER_TRACE(bh, "ph3: file as descriptor");
518 jbd2_journal_file_buffer(descriptor, commit_transaction,
522 /* Where is the buffer to be written? */
524 err = jbd2_journal_next_log_block(journal, &blocknr);
525 /* If the block mapping failed, just abandon the buffer
526 and repeat this loop: we'll fall into the
527 refile-on-abort condition above. */
529 jbd2_journal_abort(journal, err);
534 * start_this_handle() uses t_outstanding_credits to determine
535 * the free space in the log, but this counter is changed
536 * by jbd2_journal_next_log_block() also.
538 commit_transaction->t_outstanding_credits--;
540 /* Bump b_count to prevent truncate from stumbling over
541 the shadowed buffer! @@@ This can go if we ever get
542 rid of the BJ_IO/BJ_Shadow pairing of buffers. */
543 atomic_inc(&jh2bh(jh)->b_count);
545 /* Make a temporary IO buffer with which to write it out
546 (this will requeue both the metadata buffer and the
547 temporary IO buffer). new_bh goes on BJ_IO*/
549 set_bit(BH_JWrite, &jh2bh(jh)->b_state);
551 * akpm: jbd2_journal_write_metadata_buffer() sets
552 * new_bh->b_transaction to commit_transaction.
553 * We need to clean this up before we release new_bh
554 * (which is of type BJ_IO)
556 JBUFFER_TRACE(jh, "ph3: write metadata");
557 flags = jbd2_journal_write_metadata_buffer(commit_transaction,
558 jh, &new_jh, blocknr);
559 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
560 wbuf[bufs++] = jh2bh(new_jh);
562 /* Record the new block's tag in the current descriptor
567 tag_flag |= JBD2_FLAG_ESCAPE;
569 tag_flag |= JBD2_FLAG_SAME_UUID;
571 tag = (journal_block_tag_t *) tagp;
572 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
573 tag->t_flags = cpu_to_be32(tag_flag);
575 space_left -= tag_bytes;
578 memcpy (tagp, journal->j_uuid, 16);
584 /* If there's no more to do, or if the descriptor is full,
587 if (bufs == journal->j_wbufsize ||
588 commit_transaction->t_buffers == NULL ||
589 space_left < tag_bytes + 16) {
591 jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
593 /* Write an end-of-descriptor marker before
594 submitting the IOs. "tag" still points to
595 the last tag we set up. */
597 tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG);
600 for (i = 0; i < bufs; i++) {
601 struct buffer_head *bh = wbuf[i];
605 if (JBD2_HAS_COMPAT_FEATURE(journal,
606 JBD2_FEATURE_COMPAT_CHECKSUM)) {
608 jbd2_checksum_data(crc32_sum, bh);
612 clear_buffer_dirty(bh);
613 set_buffer_uptodate(bh);
614 bh->b_end_io = journal_end_buffer_io_sync;
615 submit_bh(WRITE, bh);
618 stats.u.run.rs_blocks_logged += bufs;
620 /* Force a new descriptor to be generated next
621 time round the loop. */
627 /* Done it all: now write the commit record asynchronously. */
629 if (JBD2_HAS_INCOMPAT_FEATURE(journal,
630 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
631 err = journal_submit_commit_record(journal, commit_transaction,
634 __jbd2_journal_abort_hard(journal);
638 * This is the right place to wait for data buffers both for ASYNC
639 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
640 * the commit block went to disk (which happens above). If commit is
641 * SYNC, we need to wait for data buffers before we start writing
642 * commit block, which happens below in such setting.
644 err = journal_finish_inode_data_buffers(journal, commit_transaction);
646 jbd2_journal_abort(journal, err);
648 /* Lo and behold: we have just managed to send a transaction to
649 the log. Before we can commit it, wait for the IO so far to
650 complete. Control buffers being written are on the
651 transaction's t_log_list queue, and metadata buffers are on
652 the t_iobuf_list queue.
654 Wait for the buffers in reverse order. That way we are
655 less likely to be woken up until all IOs have completed, and
656 so we incur less scheduling load.
659 jbd_debug(3, "JBD: commit phase 3\n");
662 * akpm: these are BJ_IO, and j_list_lock is not needed.
663 * See __journal_try_to_free_buffer.
666 while (commit_transaction->t_iobuf_list != NULL) {
667 struct buffer_head *bh;
669 jh = commit_transaction->t_iobuf_list->b_tprev;
671 if (buffer_locked(bh)) {
678 if (unlikely(!buffer_uptodate(bh)))
681 clear_buffer_jwrite(bh);
683 JBUFFER_TRACE(jh, "ph4: unfile after journal write");
684 jbd2_journal_unfile_buffer(journal, jh);
687 * ->t_iobuf_list should contain only dummy buffer_heads
688 * which were created by jbd2_journal_write_metadata_buffer().
690 BUFFER_TRACE(bh, "dumping temporary bh");
691 jbd2_journal_put_journal_head(jh);
693 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
694 free_buffer_head(bh);
696 /* We also have to unlock and free the corresponding
698 jh = commit_transaction->t_shadow_list->b_tprev;
700 clear_bit(BH_JWrite, &bh->b_state);
701 J_ASSERT_BH(bh, buffer_jbddirty(bh));
703 /* The metadata is now released for reuse, but we need
704 to remember it against this transaction so that when
705 we finally commit, we can do any checkpointing
707 JBUFFER_TRACE(jh, "file as BJ_Forget");
708 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
709 /* Wake up any transactions which were waiting for this
711 wake_up_bit(&bh->b_state, BH_Unshadow);
712 JBUFFER_TRACE(jh, "brelse shadowed buffer");
716 J_ASSERT (commit_transaction->t_shadow_list == NULL);
718 jbd_debug(3, "JBD: commit phase 4\n");
720 /* Here we wait for the revoke record and descriptor record buffers */
722 while (commit_transaction->t_log_list != NULL) {
723 struct buffer_head *bh;
725 jh = commit_transaction->t_log_list->b_tprev;
727 if (buffer_locked(bh)) {
729 goto wait_for_ctlbuf;
732 goto wait_for_ctlbuf;
734 if (unlikely(!buffer_uptodate(bh)))
737 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
738 clear_buffer_jwrite(bh);
739 jbd2_journal_unfile_buffer(journal, jh);
740 jbd2_journal_put_journal_head(jh);
741 __brelse(bh); /* One for getblk */
742 /* AKPM: bforget here */
745 jbd_debug(3, "JBD: commit phase 5\n");
747 if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
748 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
749 err = journal_submit_commit_record(journal, commit_transaction,
752 __jbd2_journal_abort_hard(journal);
754 if (!err && !is_journal_aborted(journal))
755 err = journal_wait_on_commit_record(cbh);
758 jbd2_journal_abort(journal, err);
760 /* End of a transaction! Finally, we can do checkpoint
761 processing: any buffers committed as a result of this
762 transaction can be removed from any checkpoint list it was on
765 jbd_debug(3, "JBD: commit phase 6\n");
767 J_ASSERT(list_empty(&commit_transaction->t_inode_list));
768 J_ASSERT(commit_transaction->t_buffers == NULL);
769 J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
770 J_ASSERT(commit_transaction->t_iobuf_list == NULL);
771 J_ASSERT(commit_transaction->t_shadow_list == NULL);
772 J_ASSERT(commit_transaction->t_log_list == NULL);
776 * As there are other places (journal_unmap_buffer()) adding buffers
777 * to this list we have to be careful and hold the j_list_lock.
779 spin_lock(&journal->j_list_lock);
780 while (commit_transaction->t_forget) {
781 transaction_t *cp_transaction;
782 struct buffer_head *bh;
784 jh = commit_transaction->t_forget;
785 spin_unlock(&journal->j_list_lock);
787 jbd_lock_bh_state(bh);
788 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
789 jh->b_transaction == journal->j_running_transaction);
792 * If there is undo-protected committed data against
793 * this buffer, then we can remove it now. If it is a
794 * buffer needing such protection, the old frozen_data
795 * field now points to a committed version of the
796 * buffer, so rotate that field to the new committed
799 * Otherwise, we can just throw away the frozen data now.
801 if (jh->b_committed_data) {
802 jbd2_free(jh->b_committed_data, bh->b_size);
803 jh->b_committed_data = NULL;
804 if (jh->b_frozen_data) {
805 jh->b_committed_data = jh->b_frozen_data;
806 jh->b_frozen_data = NULL;
808 } else if (jh->b_frozen_data) {
809 jbd2_free(jh->b_frozen_data, bh->b_size);
810 jh->b_frozen_data = NULL;
813 spin_lock(&journal->j_list_lock);
814 cp_transaction = jh->b_cp_transaction;
815 if (cp_transaction) {
816 JBUFFER_TRACE(jh, "remove from old cp transaction");
817 cp_transaction->t_chp_stats.cs_dropped++;
818 __jbd2_journal_remove_checkpoint(jh);
821 /* Only re-checkpoint the buffer_head if it is marked
822 * dirty. If the buffer was added to the BJ_Forget list
823 * by jbd2_journal_forget, it may no longer be dirty and
824 * there's no point in keeping a checkpoint record for
827 /* A buffer which has been freed while still being
828 * journaled by a previous transaction may end up still
829 * being dirty here, but we want to avoid writing back
830 * that buffer in the future now that the last use has
831 * been committed. That's not only a performance gain,
832 * it also stops aliasing problems if the buffer is left
833 * behind for writeback and gets reallocated for another
834 * use in a different page. */
835 if (buffer_freed(bh)) {
836 clear_buffer_freed(bh);
837 clear_buffer_jbddirty(bh);
840 if (buffer_jbddirty(bh)) {
841 JBUFFER_TRACE(jh, "add to new checkpointing trans");
842 __jbd2_journal_insert_checkpoint(jh, commit_transaction);
843 JBUFFER_TRACE(jh, "refile for checkpoint writeback");
844 __jbd2_journal_refile_buffer(jh);
845 jbd_unlock_bh_state(bh);
847 J_ASSERT_BH(bh, !buffer_dirty(bh));
848 /* The buffer on BJ_Forget list and not jbddirty means
849 * it has been freed by this transaction and hence it
850 * could not have been reallocated until this
851 * transaction has committed. *BUT* it could be
852 * reallocated once we have written all the data to
853 * disk and before we process the buffer on BJ_Forget
855 JBUFFER_TRACE(jh, "refile or unfile freed buffer");
856 __jbd2_journal_refile_buffer(jh);
857 if (!jh->b_transaction) {
858 jbd_unlock_bh_state(bh);
860 jbd2_journal_remove_journal_head(bh);
861 release_buffer_page(bh);
863 jbd_unlock_bh_state(bh);
865 cond_resched_lock(&journal->j_list_lock);
867 spin_unlock(&journal->j_list_lock);
869 * This is a bit sleazy. We use j_list_lock to protect transition
870 * of a transaction into T_FINISHED state and calling
871 * __jbd2_journal_drop_transaction(). Otherwise we could race with
872 * other checkpointing code processing the transaction...
874 spin_lock(&journal->j_state_lock);
875 spin_lock(&journal->j_list_lock);
877 * Now recheck if some buffers did not get attached to the transaction
878 * while the lock was dropped...
880 if (commit_transaction->t_forget) {
881 spin_unlock(&journal->j_list_lock);
882 spin_unlock(&journal->j_state_lock);
886 /* Done with this transaction! */
888 jbd_debug(3, "JBD: commit phase 7\n");
890 J_ASSERT(commit_transaction->t_state == T_COMMIT);
892 commit_transaction->t_start = jiffies;
893 stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging,
894 commit_transaction->t_start);
897 * File the transaction for history
899 stats.ts_type = JBD2_STATS_RUN;
900 stats.ts_tid = commit_transaction->t_tid;
901 stats.u.run.rs_handle_count = commit_transaction->t_handle_count;
902 spin_lock(&journal->j_history_lock);
903 memcpy(journal->j_history + journal->j_history_cur, &stats,
905 if (++journal->j_history_cur == journal->j_history_max)
906 journal->j_history_cur = 0;
909 * Calculate overall stats
911 journal->j_stats.ts_tid++;
912 journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait;
913 journal->j_stats.u.run.rs_running += stats.u.run.rs_running;
914 journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked;
915 journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing;
916 journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging;
917 journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count;
918 journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks;
919 journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged;
920 spin_unlock(&journal->j_history_lock);
922 commit_transaction->t_state = T_FINISHED;
923 J_ASSERT(commit_transaction == journal->j_committing_transaction);
924 journal->j_commit_sequence = commit_transaction->t_tid;
925 journal->j_committing_transaction = NULL;
926 spin_unlock(&journal->j_state_lock);
928 if (commit_transaction->t_checkpoint_list == NULL &&
929 commit_transaction->t_checkpoint_io_list == NULL) {
930 __jbd2_journal_drop_transaction(journal, commit_transaction);
932 if (journal->j_checkpoint_transactions == NULL) {
933 journal->j_checkpoint_transactions = commit_transaction;
934 commit_transaction->t_cpnext = commit_transaction;
935 commit_transaction->t_cpprev = commit_transaction;
937 commit_transaction->t_cpnext =
938 journal->j_checkpoint_transactions;
939 commit_transaction->t_cpprev =
940 commit_transaction->t_cpnext->t_cpprev;
941 commit_transaction->t_cpnext->t_cpprev =
943 commit_transaction->t_cpprev->t_cpnext =
947 spin_unlock(&journal->j_list_lock);
949 jbd_debug(1, "JBD: commit %d complete, head %d\n",
950 journal->j_commit_sequence, journal->j_tail_sequence);
952 wake_up(&journal->j_wait_done_commit);