ext4: fix deadlock in ext4_symlink() in ENOSPC conditions
authorJan Kara <jack@suse.cz>
Tue, 3 May 2011 15:12:58 +0000 (11:12 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 3 May 2011 15:12:58 +0000 (11:12 -0400)
ext4_symlink() cannot call __page_symlink() with transaction open.
__page_symlink() calls ext4_write_begin() which can wait for
transaction commit if we are running out of space thus causing a
deadlock. Also error recovery in ext4_truncate_failed_write() does not
count with the transaction being already started (although I'm not
aware of any particular deadlock here).

Fix the problem by stopping a transaction before calling
__page_symlink() (we have to be careful and put inode to orphan list
so that it gets deleted in case of crash) and starting another one
after __page_symlink() returns for addition of symlink into a
directory.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/namei.c

index cadf04b..3c7a06e 100644 (file)
@@ -2250,6 +2250,7 @@ static int ext4_symlink(struct inode *dir,
        handle_t *handle;
        struct inode *inode;
        int l, err, retries = 0;
+       int credits;
 
        l = strlen(symname)+1;
        if (l > dir->i_sb->s_blocksize)
@@ -2257,10 +2258,26 @@ static int ext4_symlink(struct inode *dir,
 
        dquot_initialize(dir);
 
+       if (l > EXT4_N_BLOCKS * 4) {
+               /*
+                * For non-fast symlinks, we just allocate inode and put it on
+                * orphan list in the first transaction => we need bitmap,
+                * group descriptor, sb, inode block, quota blocks.
+                */
+               credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+       } else {
+               /*
+                * Fast symlink. We have to add entry to directory
+                * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
+                * allocate new inode (bitmap, group descriptor, inode block,
+                * quota blocks, sb is already counted in previous macros).
+                */
+               credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                         EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+                         EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
+       }
 retry:
-       handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
-                                       EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-                                       EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
+       handle = ext4_journal_start(dir, credits);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
 
@@ -2273,21 +2290,44 @@ retry:
        if (IS_ERR(inode))
                goto out_stop;
 
-       if (l > sizeof(EXT4_I(inode)->i_data)) {
+       if (l > EXT4_N_BLOCKS * 4) {
                inode->i_op = &ext4_symlink_inode_operations;
                ext4_set_aops(inode);
                /*
-                * page_symlink() calls into ext4_prepare/commit_write.
-                * We have a transaction open.  All is sweetness.  It also sets
-                * i_size in generic_commit_write().
+                * We cannot call page_symlink() with transaction started
+                * because it calls into ext4_write_begin() which can wait
+                * for transaction commit if we are running out of space
+                * and thus we deadlock. So we have to stop transaction now
+                * and restart it when symlink contents is written.
+                * 
+                * To keep fs consistent in case of crash, we have to put inode
+                * to orphan list in the mean time.
                 */
+               drop_nlink(inode);
+               err = ext4_orphan_add(handle, inode);
+               ext4_journal_stop(handle);
+               if (err)
+                       goto err_drop_inode;
                err = __page_symlink(inode, symname, l, 1);
+               if (err)
+                       goto err_drop_inode;
+               /*
+                * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
+                * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
+                */
+               handle = ext4_journal_start(dir,
+                               EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+                               EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
+               if (IS_ERR(handle)) {
+                       err = PTR_ERR(handle);
+                       goto err_drop_inode;
+               }
+               inc_nlink(inode);
+               err = ext4_orphan_del(handle, inode);
                if (err) {
+                       ext4_journal_stop(handle);
                        clear_nlink(inode);
-                       unlock_new_inode(inode);
-                       ext4_mark_inode_dirty(handle, inode);
-                       iput(inode);
-                       goto out_stop;
+                       goto err_drop_inode;
                }
        } else {
                /* clear the extent format for fast symlink */
@@ -2303,6 +2343,10 @@ out_stop:
        if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
                goto retry;
        return err;
+err_drop_inode:
+       unlock_new_inode(inode);
+       iput(inode);
+       return err;
 }
 
 static int ext4_link(struct dentry *old_dentry,