#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+#include <linux/bitops.h>
#include "ext4_jbd2.h"
#include "xattr.h"
* don't use page cache.
*/
if (ext4_should_journal_data(inode) &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
+ inode->i_ino != EXT4_JOURNAL_INO) {
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
- jbd2_log_start_commit(journal, commit_tid);
- jbd2_log_wait_commit(journal, commit_tid);
+ jbd2_complete_transaction(journal, commit_tid);
filemap_write_and_wait(&inode->i_data);
}
truncate_inode_pages(&inode->i_data, 0);
used = ei->i_reserved_data_blocks;
}
+ if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
+ "with only %d reserved metadata blocks\n", __func__,
+ inode->i_ino, ei->i_allocated_meta_blocks,
+ ei->i_reserved_meta_blocks);
+ WARN_ON(1);
+ ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
+ }
+
/* Update per-inode reservations */
ei->i_reserved_data_blocks -= used;
ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
"logical block %lu\n", inode->i_ino, flags, map->m_len,
(unsigned long) map->m_lblk);
+
+ /* We can handle the block number less than EXT_MAX_BLOCKS */
+ if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
+ return -EIO;
+
/*
* Try to see if we can get the block without requesting a new
* file system block.
bh = sb_getblk(inode->i_sb, map.m_pblk);
if (!bh) {
- *errp = -EIO;
+ *errp = -ENOMEM;
return NULL;
}
if (map.m_flags & EXT4_MAP_NEW) {
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
int ret;
+ ext4_lblk_t save_last_lblock;
+ int save_len;
+
+ /*
+ * We will charge metadata quota at writeout time; this saves
+ * us from metadata over-estimation, though we may go over by
+ * a small amount in the end. Here we just reserve for data.
+ */
+ ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
+ if (ret)
+ return ret;
/*
* recalculate the amount of metadata blocks to reserve
*/
repeat:
spin_lock(&ei->i_block_reservation_lock);
+ /*
+ * ext4_calc_metadata_amount() has side effects, which we have
+ * to be prepared undo if we fail to claim space.
+ */
+ save_len = ei->i_da_metadata_calc_len;
+ save_last_lblock = ei->i_da_metadata_calc_last_lblock;
md_needed = EXT4_NUM_B2C(sbi,
ext4_calc_metadata_amount(inode, lblock));
trace_ext4_da_reserve_space(inode, md_needed);
- spin_unlock(&ei->i_block_reservation_lock);
- /*
- * We will charge metadata quota at writeout time; this saves
- * us from metadata over-estimation, though we may go over by
- * a small amount in the end. Here we just reserve for data.
- */
- ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
- if (ret)
- return ret;
/*
* We do still charge estimated metadata to the sb though;
* we cannot afford to run out of free blocks.
*/
if (ext4_claim_free_clusters(sbi, md_needed + 1, 0)) {
- dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
+ ei->i_da_metadata_calc_len = save_len;
+ ei->i_da_metadata_calc_last_lblock = save_last_lblock;
+ spin_unlock(&ei->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
}
+ dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
- spin_lock(&ei->i_block_reservation_lock);
ei->i_reserved_data_blocks++;
ei->i_reserved_meta_blocks += md_needed;
spin_unlock(&ei->i_block_reservation_lock);
clear_buffer_unwritten(bh);
}
- /* skip page if block allocation undone */
- if (buffer_delay(bh) || buffer_unwritten(bh))
+ /*
+ * skip page if block allocation undone and
+ * block is dirty
+ */
+ if (ext4_bh_delay_or_unwritten(NULL, bh))
skip_page = 1;
bh = bh->b_this_page;
block_start += bh->b_size;
index = mpd->first_page;
end = mpd->next_page - 1;
+ pagevec_init(&pvec, 0);
while (index <= end) {
nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
if (nr_pages == 0)
free_blocks = EXT4_C2B(sbi,
percpu_counter_read_positive(&sbi->s_freeclusters_counter));
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
+ /*
+ * Start pushing delalloc when 1/2 of free blocks are dirty.
+ */
+ if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
+ !writeback_in_progress(sb->s_bdi) &&
+ down_read_trylock(&sb->s_umount)) {
+ writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
+ up_read(&sb->s_umount);
+ }
+
if (2 * free_blocks < 3 * dirty_blocks ||
free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
/*
*/
return 1;
}
- /*
- * Even if we don't switch but are nearing capacity,
- * start pushing delalloc when 1/2 of free blocks are dirty.
- */
- if (free_blocks < 2 * dirty_blocks)
- writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
-
return 0;
}
pgoff_t index;
struct inode *inode = mapping->host;
handle_t *handle;
- loff_t page_len;
index = pos >> PAGE_CACHE_SHIFT;
*/
if (pos + len > inode->i_size)
ext4_truncate_failed_write(inode);
- } else {
- page_len = pos & (PAGE_CACHE_SIZE - 1);
- if (page_len > 0) {
- ret = ext4_discard_partial_page_buffers_no_lock(handle,
- inode, page, pos - page_len, page_len,
- EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
- }
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
loff_t new_i_size;
unsigned long start, end;
int write_mode = (int)(unsigned long)fsdata;
- loff_t page_len;
if (write_mode == FALL_BACK_TO_NONDELALLOC) {
- if (ext4_should_order_data(inode)) {
+ switch (ext4_inode_journal_mode(inode)) {
+ case EXT4_INODE_ORDERED_DATA_MODE:
return ext4_ordered_write_end(file, mapping, pos,
len, copied, page, fsdata);
- } else if (ext4_should_writeback_data(inode)) {
+ case EXT4_INODE_WRITEBACK_DATA_MODE:
return ext4_writeback_write_end(file, mapping, pos,
len, copied, page, fsdata);
- } else {
+ default:
BUG();
}
}
*/
new_i_size = pos + copied;
- if (new_i_size > EXT4_I(inode)->i_disksize) {
+ if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
if (ext4_da_should_update_i_disksize(page, end)) {
down_write(&EXT4_I(inode)->i_data_sem);
if (new_i_size > EXT4_I(inode)->i_disksize) {
}
ret2 = generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
-
- page_len = PAGE_CACHE_SIZE -
- ((pos + copied - 1) & (PAGE_CACHE_SIZE - 1));
-
- if (page_len > 0) {
- ret = ext4_discard_partial_page_buffers_no_lock(handle,
- inode, page, pos + copied - 1, page_len,
- EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED);
- }
-
copied = ret2;
if (ret2 < 0)
ret = ret2;
iocb->private, io_end->inode->i_ino, iocb, offset,
size);
+ iocb->private = NULL;
+
/* if not aio dio with unwritten extents, just free io and return */
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
- iocb->private = NULL;
out:
+ inode_dio_done(inode);
if (is_async)
aio_complete(iocb, ret, 0);
- inode_dio_done(inode);
return;
}
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
/* queue the work to convert unwritten extents to written */
- iocb->private = NULL;
queue_work(wq, &io_end->work);
-
- /* XXX: probably should move into the real I/O completion handler */
- inode_dio_done(inode);
}
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
iocb->private = NULL;
EXT4_I(inode)->cur_aio_dio = NULL;
if (!is_sync_kiocb(iocb)) {
- iocb->private = ext4_init_io_end(inode, GFP_NOFS);
- if (!iocb->private)
+ ext4_io_end_t *io_end =
+ ext4_init_io_end(inode, GFP_NOFS);
+ if (!io_end)
return -ENOMEM;
+ io_end->flag |= EXT4_IO_END_DIRECT;
+ iocb->private = io_end;
/*
* we save the io structure for current async
* direct IO, so that later ext4_map_blocks()
void ext4_set_aops(struct inode *inode)
{
- if (ext4_should_order_data(inode) &&
- test_opt(inode->i_sb, DELALLOC))
- inode->i_mapping->a_ops = &ext4_da_aops;
- else if (ext4_should_order_data(inode))
- inode->i_mapping->a_ops = &ext4_ordered_aops;
- else if (ext4_should_writeback_data(inode) &&
- test_opt(inode->i_sb, DELALLOC))
- inode->i_mapping->a_ops = &ext4_da_aops;
- else if (ext4_should_writeback_data(inode))
- inode->i_mapping->a_ops = &ext4_writeback_aops;
- else
+ switch (ext4_inode_journal_mode(inode)) {
+ case EXT4_INODE_ORDERED_DATA_MODE:
+ if (test_opt(inode->i_sb, DELALLOC))
+ inode->i_mapping->a_ops = &ext4_da_aops;
+ else
+ inode->i_mapping->a_ops = &ext4_ordered_aops;
+ break;
+ case EXT4_INODE_WRITEBACK_DATA_MODE:
+ if (test_opt(inode->i_sb, DELALLOC))
+ inode->i_mapping->a_ops = &ext4_da_aops;
+ else
+ inode->i_mapping->a_ops = &ext4_writeback_aops;
+ break;
+ case EXT4_INODE_JOURNAL_DATA_MODE:
inode->i_mapping->a_ops = &ext4_journalled_aops;
+ break;
+ default:
+ BUG();
+ }
}
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
- if (!page_has_buffers(page)) {
- /*
- * If the range to be discarded covers a partial block
- * we need to get the page buffers. This is because
- * partial blocks cannot be released and the page needs
- * to be updated with the contents of the block before
- * we write the zeros on top of it.
- */
- if ((from & (blocksize - 1)) ||
- ((from + length) & (blocksize - 1))) {
- create_empty_buffers(page, blocksize, 0);
- } else {
- /*
- * If there are no partial blocks,
- * there is nothing to update,
- * so we can return now
- */
- return 0;
- }
- }
+ if (!page_has_buffers(page))
+ create_empty_buffers(page, blocksize, 0);
/* Find the buffer that contains "offset" */
bh = page_buffers(page);
iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
bh = sb_getblk(sb, block);
- if (!bh) {
- EXT4_ERROR_INODE_BLOCK(inode, block,
- "unable to read itable block");
- return -EIO;
- }
+ if (!bh)
+ return -ENOMEM;
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
+ unsigned int new_fl = 0;
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
if (flags & EXT4_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & EXT4_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & EXT4_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+ set_mask_bits(&inode->i_flags,
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
struct ext4_inode_info *ei = EXT4_I(inode);
struct buffer_head *bh = iloc->bh;
int err = 0, rc, block;
+ int need_datasync = 0;
/* For fields not not tracking in the in-memory inode,
* initialise them to zero for new inodes. */
raw_inode->i_file_acl_high =
cpu_to_le16(ei->i_file_acl >> 32);
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
- ext4_isize_set(raw_inode, ei->i_disksize);
+ if (ei->i_disksize != ext4_isize(raw_inode)) {
+ ext4_isize_set(raw_inode, ei->i_disksize);
+ need_datasync = 1;
+ }
if (ei->i_disksize > 0x7fffffffULL) {
struct super_block *sb = inode->i_sb;
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
err = rc;
ext4_clear_inode_state(inode, EXT4_STATE_NEW);
- ext4_update_inode_fsync_trans(handle, inode, 0);
+ ext4_update_inode_fsync_trans(handle, inode, need_datasync);
out_brelse:
brelse(bh);
ext4_std_error(inode->i_sb, err);
struct kstat *stat)
{
struct inode *inode;
- unsigned long delalloc_blocks;
+ unsigned long long delalloc_blocks;
inode = dentry->d_inode;
generic_fillattr(inode, stat);
*/
delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
- stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
+ stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9);
return 0;
}