Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Sep 2015 19:52:19 +0000 (12:52 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Sep 2015 19:52:19 +0000 (12:52 -0700)
Pull ext4 updates from Ted Ts'o:
 "Pretty much all bug fixes and clean ups for 4.3, after a lot of
  features and other churn going into 4.2"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  Revert "ext4: remove block_device_ejected"
  ext4: ratelimit the file system mounted message
  ext4: silence a format string false positive
  ext4: simplify some code in read_mmp_block()
  ext4: don't manipulate recovery flag when freezing no-journal fs
  jbd2: limit number of reserved credits
  ext4 crypto: remove duplicate header file
  ext4: update c/mtime on truncate up
  jbd2: avoid infinite loop when destroying aborted journal
  ext4, jbd2: add REQ_FUA flag when recording an error in the superblock
  ext4 crypto: fix spelling typo in comment
  ext4 crypto: exit cleanly if ext4_derive_key_aes() fails
  ext4: reject journal options for ext2 mounts
  ext4: implement cgroup writeback support
  ext4: replace ext4_io_submit->io_op with ->io_wbc
  ext4 crypto: check for too-short encrypted file names
  ext4 crypto: use a jbd2 transaction when adding a crypto policy
  jbd2: speedup jbd2_journal_dirty_metadata()

13 files changed:
fs/ext4/crypto_fname.c
fs/ext4/crypto_key.c
fs/ext4/crypto_policy.c
fs/ext4/ext4.h
fs/ext4/inode.c
fs/ext4/mmp.c
fs/ext4/page-io.c
fs/ext4/super.c
fs/jbd2/checkpoint.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/transaction.c
include/linux/jbd2.h

index 7dc4eb5..847f919 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/key.h>
-#include <linux/key.h>
 #include <linux/list.h>
 #include <linux/mempool.h>
 #include <linux/random.h>
@@ -329,6 +328,10 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
                        return oname->len;
                }
        }
+       if (iname->len < EXT4_CRYPTO_BLOCK_SIZE) {
+               EXT4_ERROR_INODE(inode, "encrypted inode too small");
+               return -EUCLEAN;
+       }
        if (EXT4_I(inode)->i_crypt_info)
                return ext4_fname_decrypt(inode, iname, oname);
 
index 442d24e..1d510c1 100644 (file)
@@ -30,7 +30,7 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
 
 /**
  * ext4_derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivatio.
+ * @deriving_key: Encryption key used for derivation.
  * @source_key:   Source key to which to apply derivation.
  * @derived_key:  Derived key.
  *
@@ -220,6 +220,8 @@ retry:
        BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
        res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
                                  raw_key);
+       if (res)
+               goto out;
 got_key:
        ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
        if (!ctfm || IS_ERR(ctfm)) {
index 02c4e5d..a640ec2 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 
+#include "ext4_jbd2.h"
 #include "ext4.h"
 #include "xattr.h"
 
@@ -49,7 +50,8 @@ static int ext4_create_encryption_context_from_policy(
        struct inode *inode, const struct ext4_encryption_policy *policy)
 {
        struct ext4_encryption_context ctx;
-       int res = 0;
+       handle_t *handle;
+       int res, res2;
 
        res = ext4_convert_inline_data(inode);
        if (res)
@@ -78,11 +80,22 @@ static int ext4_create_encryption_context_from_policy(
        BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
        get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
 
+       handle = ext4_journal_start(inode, EXT4_HT_MISC,
+                                   ext4_jbd2_credits_xattr(inode));
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
        res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
                             EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
                             sizeof(ctx), 0);
-       if (!res)
+       if (!res) {
                ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+               res = ext4_mark_inode_dirty(handle, inode);
+               if (res)
+                       EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
+       }
+       res2 = ext4_journal_stop(handle);
+       if (!res)
+               res = res2;
        return res;
 }
 
index f5e9f04..32071f5 100644 (file)
@@ -187,7 +187,7 @@ typedef struct ext4_io_end {
 } ext4_io_end_t;
 
 struct ext4_io_submit {
-       int                     io_op;
+       struct writeback_control *io_wbc;
        struct bio              *io_bio;
        ext4_io_end_t           *io_end;
        sector_t                io_next_block;
index fed7ee7..29f1af7 100644 (file)
@@ -4728,6 +4728,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                                error = ext4_orphan_add(handle, inode);
                                orphan = 1;
                        }
+                       /*
+                        * Update c/mtime on truncate up, ext4_truncate() will
+                        * update c/mtime in shrink case below
+                        */
+                       if (!shrink) {
+                               inode->i_mtime = ext4_current_time(inode);
+                               inode->i_ctime = inode->i_mtime;
+                       }
                        down_write(&EXT4_I(inode)->i_data_sem);
                        EXT4_I(inode)->i_disksize = attr->ia_size;
                        rc = ext4_mark_inode_dirty(handle, inode);
index 8313ca3..6eb1a61 100644 (file)
@@ -69,6 +69,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
                          ext4_fsblk_t mmp_block)
 {
        struct mmp_struct *mmp;
+       int ret;
 
        if (*bh)
                clear_buffer_uptodate(*bh);
@@ -76,33 +77,36 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
        /* This would be sb_bread(sb, mmp_block), except we need to be sure
         * that the MD RAID device cache has been bypassed, and that the read
         * is not blocked in the elevator. */
-       if (!*bh)
+       if (!*bh) {
                *bh = sb_getblk(sb, mmp_block);
-       if (!*bh)
-               return -ENOMEM;
-       if (*bh) {
-               get_bh(*bh);
-               lock_buffer(*bh);
-               (*bh)->b_end_io = end_buffer_read_sync;
-               submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
-               wait_on_buffer(*bh);
-               if (!buffer_uptodate(*bh)) {
-                       brelse(*bh);
-                       *bh = NULL;
+               if (!*bh) {
+                       ret = -ENOMEM;
+                       goto warn_exit;
                }
        }
-       if (unlikely(!*bh)) {
-               ext4_warning(sb, "Error while reading MMP block %llu",
-                            mmp_block);
-               return -EIO;
+
+       get_bh(*bh);
+       lock_buffer(*bh);
+       (*bh)->b_end_io = end_buffer_read_sync;
+       submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
+       wait_on_buffer(*bh);
+       if (!buffer_uptodate(*bh)) {
+               brelse(*bh);
+               *bh = NULL;
+               ret = -EIO;
+               goto warn_exit;
        }
 
        mmp = (struct mmp_struct *)((*bh)->b_data);
-       if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
-           !ext4_mmp_csum_verify(sb, mmp))
-               return -EINVAL;
-
-       return 0;
+       if (le32_to_cpu(mmp->mmp_magic) == EXT4_MMP_MAGIC &&
+           ext4_mmp_csum_verify(sb, mmp))
+               return 0;
+       ret = -EINVAL;
+
+warn_exit:
+       ext4_warning(sb, "Error %d while reading MMP block %llu",
+                    ret, mmp_block);
+       return ret;
 }
 
 /*
@@ -111,7 +115,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
                    const char *function, unsigned int line, const char *msg)
 {
-       __ext4_warning(sb, function, line, msg);
+       __ext4_warning(sb, function, line, "%s", msg);
        __ext4_warning(sb, function, line,
                       "MMP failure info: last update time: %llu, last update "
                       "node: %s, last update device: %s\n",
index 8a9d63a..84ba4d2 100644 (file)
@@ -354,8 +354,10 @@ void ext4_io_submit(struct ext4_io_submit *io)
        struct bio *bio = io->io_bio;
 
        if (bio) {
+               int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
+                           WRITE_SYNC : WRITE;
                bio_get(io->io_bio);
-               submit_bio(io->io_op, io->io_bio);
+               submit_bio(io_op, io->io_bio);
                bio_put(io->io_bio);
        }
        io->io_bio = NULL;
@@ -364,7 +366,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
 void ext4_io_submit_init(struct ext4_io_submit *io,
                         struct writeback_control *wbc)
 {
-       io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
+       io->io_wbc = wbc;
        io->io_bio = NULL;
        io->io_end = NULL;
 }
@@ -377,6 +379,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
        bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
        if (!bio)
                return -ENOMEM;
+       wbc_init_bio(io->io_wbc, bio);
        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio->bi_bdev = bh->b_bdev;
        bio->bi_end_io = ext4_end_bio;
@@ -405,6 +408,7 @@ submit_and_retry:
        ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
        if (ret != bh->b_size)
                goto submit_and_retry;
+       wbc_account_io(io->io_wbc, page, bh->b_size);
        io->io_next_block++;
        return 0;
 }
index 06b4b14..ee38782 100644 (file)
@@ -60,6 +60,7 @@ static struct ext4_lazy_init *ext4_li_info;
 static struct mutex ext4_li_mtx;
 static struct ext4_features *ext4_feat;
 static int ext4_mballoc_ready;
+static struct ratelimit_state ext4_mount_msg_ratelimit;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
                             unsigned long journal_devnum);
@@ -321,6 +322,22 @@ static void save_error_info(struct super_block *sb, const char *func,
        ext4_commit_super(sb, 1);
 }
 
+/*
+ * The del_gendisk() function uninitializes the disk-specific data
+ * structures, including the bdi structure, without telling anyone
+ * else.  Once this happens, any attempt to call mark_buffer_dirty()
+ * (for example, by ext4_commit_super), will cause a kernel OOPS.
+ * This is a kludge to prevent these oops until we can put in a proper
+ * hook in del_gendisk() to inform the VFS and file system layers.
+ */
+static int block_device_ejected(struct super_block *sb)
+{
+       struct inode *bd_inode = sb->s_bdev->bd_inode;
+       struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
+
+       return bdi->dev == NULL;
+}
+
 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 {
        struct super_block              *sb = journal->j_private;
@@ -1390,9 +1407,9 @@ static const struct mount_opts {
        {Opt_stripe, 0, MOPT_GTE0},
        {Opt_resuid, 0, MOPT_GTE0},
        {Opt_resgid, 0, MOPT_GTE0},
-       {Opt_journal_dev, 0, MOPT_GTE0},
-       {Opt_journal_path, 0, MOPT_STRING},
-       {Opt_journal_ioprio, 0, MOPT_GTE0},
+       {Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
+       {Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
+       {Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
        {Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
        {Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
        {Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
@@ -3639,6 +3656,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                }
                if (test_opt(sb, DELALLOC))
                        clear_opt(sb, DELALLOC);
+       } else {
+               sb->s_iflags |= SB_I_CGROUPWB;
        }
 
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -4271,9 +4290,10 @@ no_journal:
                                 "the device does not support discard");
        }
 
-       ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
-                "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
-                *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+       if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
+               ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+                        "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+                        *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
 
        if (es->s_error_count)
                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@@ -4613,7 +4633,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
        int error = 0;
 
-       if (!sbh)
+       if (!sbh || block_device_ejected(sb))
                return error;
        if (buffer_write_io_error(sbh)) {
                /*
@@ -4661,7 +4681,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        ext4_superblock_csum_set(sb);
        mark_buffer_dirty(sbh);
        if (sync) {
-               error = sync_dirty_buffer(sbh);
+               error = __sync_dirty_buffer(sbh,
+                       test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
                if (error)
                        return error;
 
@@ -4829,10 +4850,11 @@ static int ext4_freeze(struct super_block *sb)
                error = jbd2_journal_flush(journal);
                if (error < 0)
                        goto out;
+
+               /* Journal blocked and flushed, clear needs_recovery flag. */
+               EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
        }
 
-       /* Journal blocked and flushed, clear needs_recovery flag. */
-       EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
        error = ext4_commit_super(sb, 1);
 out:
        if (journal)
@@ -4850,8 +4872,11 @@ static int ext4_unfreeze(struct super_block *sb)
        if (sb->s_flags & MS_RDONLY)
                return 0;
 
-       /* Reset the needs_recovery flag before the fs is unlocked. */
-       EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+       if (EXT4_SB(sb)->s_journal) {
+               /* Reset the needs_recovery flag before the fs is unlocked. */
+               EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+       }
+
        ext4_commit_super(sb, 1);
        return 0;
 }
@@ -5600,6 +5625,7 @@ static int __init ext4_init_fs(void)
 {
        int i, err;
 
+       ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
        ext4_li_info = NULL;
        mutex_init(&ext4_li_mtx);
 
index 4227dc4..8c44654 100644 (file)
@@ -417,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  * journal_clean_one_cp_list
  *
  * Find all the written-back checkpoint buffers in the given list and
- * release them.
+ * release them. If 'destroy' is set, clean all buffers unconditionally.
  *
  * Called with j_list_lock held.
  * Returns 1 if we freed the transaction, 0 otherwise.
  */
-static int journal_clean_one_cp_list(struct journal_head *jh)
+static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
 {
        struct journal_head *last_jh;
        struct journal_head *next_jh = jh;
@@ -436,7 +436,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
        do {
                jh = next_jh;
                next_jh = jh->b_cpnext;
-               ret = __try_to_free_cp_buf(jh);
+               if (!destroy)
+                       ret = __try_to_free_cp_buf(jh);
+               else
+                       ret = __jbd2_journal_remove_checkpoint(jh) + 1;
                if (!ret)
                        return freed;
                if (ret == 2)
@@ -459,10 +462,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
  * journal_clean_checkpoint_list
  *
  * Find all the written-back checkpoint buffers in the journal and release them.
+ * If 'destroy' is set, release all buffers unconditionally.
  *
  * Called with j_list_lock held.
  */
-void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
 {
        transaction_t *transaction, *last_transaction, *next_transaction;
        int ret;
@@ -476,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
        do {
                transaction = next_transaction;
                next_transaction = transaction->t_cpnext;
-               ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
+               ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
+                                               destroy);
                /*
                 * This function only frees up some memory if possible so we
                 * dont have an obligation to finish processing. Bail out if
@@ -492,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
                 * we can possibly see not yet submitted buffers on io_list
                 */
                ret = journal_clean_one_cp_list(transaction->
-                               t_checkpoint_io_list);
+                               t_checkpoint_io_list, destroy);
                if (need_resched())
                        return;
                /*
@@ -505,6 +510,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
        } while (transaction != last_transaction);
 }
 
+/*
+ * Remove buffers from all checkpoint lists as journal is aborted and we just
+ * need to free memory
+ */
+void jbd2_journal_destroy_checkpoint(journal_t *journal)
+{
+       /*
+        * We loop because __jbd2_journal_clean_checkpoint_list() may abort
+        * early due to a need of rescheduling.
+        */
+       while (1) {
+               spin_lock(&journal->j_list_lock);
+               if (!journal->j_checkpoint_transactions) {
+                       spin_unlock(&journal->j_list_lock);
+                       break;
+               }
+               __jbd2_journal_clean_checkpoint_list(journal, true);
+               spin_unlock(&journal->j_list_lock);
+               cond_resched();
+       }
+}
+
 /*
  * journal_remove_checkpoint: called after a buffer has been committed
  * to disk (either by being write-back flushed to disk, or being
index b73e021..362e5f6 100644 (file)
@@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
         * frees some memory
         */
        spin_lock(&journal->j_list_lock);
-       __jbd2_journal_clean_checkpoint_list(journal);
+       __jbd2_journal_clean_checkpoint_list(journal, false);
        spin_unlock(&journal->j_list_lock);
 
        jbd_debug(3, "JBD2: commit phase 1\n");
index 4ff3fad..8270fe9 100644 (file)
@@ -1456,7 +1456,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
        sb->s_errno    = cpu_to_be32(journal->j_errno);
        read_unlock(&journal->j_state_lock);
 
-       jbd2_write_superblock(journal, WRITE_SYNC);
+       jbd2_write_superblock(journal, WRITE_FUA);
 }
 EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
 
@@ -1693,8 +1693,17 @@ int jbd2_journal_destroy(journal_t *journal)
        while (journal->j_checkpoint_transactions != NULL) {
                spin_unlock(&journal->j_list_lock);
                mutex_lock(&journal->j_checkpoint_mutex);
-               jbd2_log_do_checkpoint(journal);
+               err = jbd2_log_do_checkpoint(journal);
                mutex_unlock(&journal->j_checkpoint_mutex);
+               /*
+                * If checkpointing failed, just free the buffers to avoid
+                * looping forever
+                */
+               if (err) {
+                       jbd2_journal_destroy_checkpoint(journal);
+                       spin_lock(&journal->j_list_lock);
+                       break;
+               }
                spin_lock(&journal->j_list_lock);
        }
 
index f3d0617..6b8338e 100644 (file)
@@ -204,6 +204,20 @@ static int add_transaction_credits(journal_t *journal, int blocks,
                 * attach this handle to a new transaction.
                 */
                atomic_sub(total, &t->t_outstanding_credits);
+
+               /*
+                * Is the number of reserved credits in the current transaction too
+                * big to fit this handle? Wait until reserved credits are freed.
+                */
+               if (atomic_read(&journal->j_reserved_credits) + total >
+                   journal->j_max_transaction_buffers) {
+                       read_unlock(&journal->j_state_lock);
+                       wait_event(journal->j_wait_reserved,
+                                  atomic_read(&journal->j_reserved_credits) + total <=
+                                  journal->j_max_transaction_buffers);
+                       return 1;
+               }
+
                wait_transaction_locked(journal);
                return 1;
        }
@@ -262,20 +276,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
        int             rsv_blocks = 0;
        unsigned long ts = jiffies;
 
+       if (handle->h_rsv_handle)
+               rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
+
        /*
-        * 1/2 of transaction can be reserved so we can practically handle
-        * only 1/2 of maximum transaction size per operation
+        * Limit the number of reserved credits to 1/2 of maximum transaction
+        * size and limit the number of total credits to not exceed maximum
+        * transaction size per operation.
         */
-       if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) {
-               printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
-                      current->comm, blocks,
-                      journal->j_max_transaction_buffers / 2);
+       if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
+           (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
+               printk(KERN_ERR "JBD2: %s wants too many credits "
+                      "credits:%d rsv_credits:%d max:%d\n",
+                      current->comm, blocks, rsv_blocks,
+                      journal->j_max_transaction_buffers);
+               WARN_ON(1);
                return -ENOSPC;
        }
 
-       if (handle->h_rsv_handle)
-               rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
-
 alloc_transaction:
        if (!journal->j_running_transaction) {
                /*
@@ -1280,8 +1298,6 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
        triggers->t_abort(triggers, jh2bh(jh));
 }
 
-
-
 /**
  * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
@@ -1314,12 +1330,41 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 
        if (is_handle_aborted(handle))
                return -EROFS;
-       journal = transaction->t_journal;
-       jh = jbd2_journal_grab_journal_head(bh);
-       if (!jh) {
+       if (!buffer_jbd(bh)) {
                ret = -EUCLEAN;
                goto out;
        }
+       /*
+        * We don't grab jh reference here since the buffer must be part
+        * of the running transaction.
+        */
+       jh = bh2jh(bh);
+       /*
+        * This and the following assertions are unreliable since we may see jh
+        * in inconsistent state unless we grab bh_state lock. But this is
+        * crucial to catch bugs so let's do a reliable check until the
+        * lockless handling is fully proven.
+        */
+       if (jh->b_transaction != transaction &&
+           jh->b_next_transaction != transaction) {
+               jbd_lock_bh_state(bh);
+               J_ASSERT_JH(jh, jh->b_transaction == transaction ||
+                               jh->b_next_transaction == transaction);
+               jbd_unlock_bh_state(bh);
+       }
+       if (jh->b_modified == 1) {
+               /* If it's in our transaction it must be in BJ_Metadata list. */
+               if (jh->b_transaction == transaction &&
+                   jh->b_jlist != BJ_Metadata) {
+                       jbd_lock_bh_state(bh);
+                       J_ASSERT_JH(jh, jh->b_transaction != transaction ||
+                                       jh->b_jlist == BJ_Metadata);
+                       jbd_unlock_bh_state(bh);
+               }
+               goto out;
+       }
+
+       journal = transaction->t_journal;
        jbd_debug(5, "journal_head %p\n", jh);
        JBUFFER_TRACE(jh, "entry");
 
@@ -1410,7 +1455,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
        spin_unlock(&journal->j_list_lock);
 out_unlock_bh:
        jbd_unlock_bh_state(bh);
-       jbd2_journal_put_journal_head(jh);
 out:
        JBUFFER_TRACE(jh, "exit");
        return ret;
index ad4b286..df07e78 100644 (file)
@@ -1081,8 +1081,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 extern void jbd2_journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
-void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
+void jbd2_journal_destroy_checkpoint(journal_t *journal);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);