static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
{
+ trace_ext4_begin_ordered_truncate(inode, new_size);
return jbd2_journal_begin_ordered_truncate(
EXT4_SB(inode->i_sb)->s_journal,
&EXT4_I(inode)->jinode,
handle_t *handle;
int err;
+ trace_ext4_evict_inode(inode);
if (inode->i_nlink) {
truncate_inode_pages(&inode->i_data, 0);
goto no_delete;
* parent to disk.
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+ if (unlikely(!bh)) {
+ err = -EIO;
+ goto failed;
+ }
+
branch[n].bh = bh;
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
/*
- * __block_prepare_write() could have dirtied some buffers. Clean
+ * __block_write_begin() could have dirtied some buffers. Clean
* the dirty bit as jbd2_journal_get_write_access() could complain
* otherwise about fs integrity issues. Setting of the dirty bit
- * by __block_prepare_write() isn't a real problem here as we clear
+ * by __block_write_begin() isn't a real problem here as we clear
* the bit before releasing a page lock and thus writeback cannot
* ever write the buffer.
*/
*
* As pages are already locked by write_cache_pages(), we can't use it
*/
-static int mpage_da_submit_io(struct mpage_da_data *mpd)
+static int mpage_da_submit_io(struct mpage_da_data *mpd,
+ struct ext4_map_blocks *map)
{
struct pagevec pvec;
unsigned long index, end;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
loff_t size = i_size_read(inode);
- unsigned int len;
- struct buffer_head *page_bufs = NULL;
+ unsigned int len, block_start;
+ struct buffer_head *bh, *page_bufs = NULL;
int journal_data = ext4_should_journal_data(inode);
+ sector_t pblock = 0, cur_logical = 0;
+ struct ext4_io_submit io_submit;
BUG_ON(mpd->next_page <= mpd->first_page);
+ memset(&io_submit, 0, sizeof(io_submit));
/*
* We need to start from the first_page to the next_page - 1
* to make sure we also write the mapped dirty buffer_heads.
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
- int commit_write = 0;
+ int commit_write = 0, redirty_page = 0;
struct page *page = pvec.pages[i];
index = page->index;
len = size & ~PAGE_CACHE_MASK;
else
len = PAGE_CACHE_SIZE;
+ if (map) {
+ cur_logical = index << (PAGE_CACHE_SHIFT -
+ inode->i_blkbits);
+ pblock = map->m_pblk + (cur_logical -
+ map->m_lblk);
+ }
index++;
BUG_ON(!PageLocked(page));
/*
* If the page does not have buffers (for
* whatever reason), try to create them using
- * block_prepare_write. If this fails,
+ * __block_write_begin. If this fails,
* redirty the page and move on.
*/
if (!page_has_buffers(page)) {
- if (block_prepare_write(page, 0, len,
+ if (__block_write_begin(page, 0, len,
noalloc_get_block_write)) {
redirty_page:
redirty_page_for_writepage(mpd->wbc,
}
commit_write = 1;
}
- page_bufs = page_buffers(page);
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- /*
- * We couldn't do block allocation for
- * some reason.
- */
+
+ bh = page_bufs = page_buffers(page);
+ block_start = 0;
+ do {
+ if (!bh)
+ goto redirty_page;
+ if (map && (cur_logical >= map->m_lblk) &&
+ (cur_logical <= (map->m_lblk +
+ (map->m_len - 1)))) {
+ if (buffer_delay(bh)) {
+ clear_buffer_delay(bh);
+ bh->b_blocknr = pblock;
+ }
+ if (buffer_unwritten(bh) ||
+ buffer_mapped(bh))
+ BUG_ON(bh->b_blocknr != pblock);
+ if (map->m_flags & EXT4_MAP_UNINIT)
+ set_buffer_uninit(bh);
+ clear_buffer_unwritten(bh);
+ }
+
+ /* redirty page if block allocation undone */
+ if (buffer_delay(bh) || buffer_unwritten(bh))
+ redirty_page = 1;
+ bh = bh->b_this_page;
+ block_start += bh->b_size;
+ cur_logical++;
+ pblock++;
+ } while (bh != page_bufs);
+
+ if (redirty_page)
goto redirty_page;
- }
if (commit_write)
/* mark the buffer_heads as dirty & uptodate */
block_commit_write(page, 0, len);
- if (journal_data && PageChecked(page))
+ /*
+ * Delalloc doesn't support data journalling,
+ * but eventually maybe we'll lift this
+ * restriction.
+ */
+ if (unlikely(journal_data && PageChecked(page)))
err = __ext4_journalled_writepage(page, len);
- else if (buffer_uninit(page_bufs)) {
- ext4_set_bh_endio(page_bufs, inode);
- err = block_write_full_page_endio(page,
- noalloc_get_block_write,
- mpd->wbc, ext4_end_io_buffer_write);
- } else
- err = block_write_full_page(page,
- noalloc_get_block_write, mpd->wbc);
+ else
+ err = ext4_bio_write_page(&io_submit, page,
+ len, mpd->wbc);
if (!err)
mpd->pages_written++;
}
pagevec_release(&pvec);
}
+ ext4_io_submit(&io_submit);
return ret;
}
-/*
- * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
- *
- * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
- */
-static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
- struct ext4_map_blocks *map)
-{
- struct inode *inode = mpd->inode;
- struct address_space *mapping = inode->i_mapping;
- int blocks = map->m_len;
- sector_t pblock = map->m_pblk, cur_logical;
- struct buffer_head *head, *bh;
- pgoff_t index, end;
- struct pagevec pvec;
- int nr_pages, i;
-
- index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- pagevec_init(&pvec, 0);
-
- while (index <= end) {
- /* XXX: optimize tail */
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- index = page->index;
- if (index > end)
- break;
- index++;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
- BUG_ON(!page_has_buffers(page));
-
- bh = page_buffers(page);
- head = bh;
-
- /* skip blocks out of the range */
- do {
- if (cur_logical >= map->m_lblk)
- break;
- cur_logical++;
- } while ((bh = bh->b_this_page) != head);
-
- do {
- if (cur_logical > map->m_lblk + (blocks - 1))
- break;
-
- if (buffer_delay(bh) || buffer_unwritten(bh)) {
-
- BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
-
- if (buffer_delay(bh)) {
- clear_buffer_delay(bh);
- bh->b_blocknr = pblock;
- } else {
- /*
- * unwritten already should have
- * blocknr assigned. Verify that
- */
- clear_buffer_unwritten(bh);
- BUG_ON(bh->b_blocknr != pblock);
- }
-
- } else if (buffer_mapped(bh))
- BUG_ON(bh->b_blocknr != pblock);
-
- if (map->m_flags & EXT4_MAP_UNINIT)
- set_buffer_uninit(bh);
- cur_logical++;
- pblock++;
- } while ((bh = bh->b_this_page) != head);
- }
- pagevec_release(&pvec);
- }
-}
-
-
static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
sector_t logical, long blk_cnt)
{
static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
{
int err, blks, get_blocks_flags;
- struct ext4_map_blocks map;
+ struct ext4_map_blocks map, *mapp = NULL;
sector_t next = mpd->b_blocknr;
unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
}
BUG_ON(blks == 0);
+ mapp = ↦
if (map.m_flags & EXT4_MAP_NEW) {
struct block_device *bdev = mpd->inode->i_sb->s_bdev;
int i;
unmap_underlying_metadata(bdev, map.m_pblk + i);
}
- /*
- * If blocks are delayed marked, we need to
- * put actual blocknr and drop delayed bit
- */
- if ((mpd->b_state & (1 << BH_Delay)) ||
- (mpd->b_state & (1 << BH_Unwritten)))
- mpage_put_bnr_to_bhs(mpd, &map);
-
if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode);
if (err)
}
submit_io:
- mpage_da_submit_io(mpd);
+ mpage_da_submit_io(mpd, mapp);
mpd->io_done = 1;
}
* The function finds extents of pages and scan them for all blocks.
*/
static int __mpage_da_writepage(struct page *page,
- struct writeback_control *wbc, void *data)
+ struct writeback_control *wbc,
+ struct mpage_da_data *mpd)
{
- struct mpage_da_data *mpd = data;
struct inode *inode = mpd->inode;
struct buffer_head *bh, *head;
sector_t logical;
if (buffer_delay(bh))
return 0; /* Not sure this could or should happen */
/*
- * XXX: __block_prepare_write() unmaps passed block,
- * is it OK?
+ * XXX: __block_write_begin() unmaps passed block, is it OK?
*/
ret = ext4_da_reserve_space(inode, iblock);
if (ret)
/*
* This function is used as a standard get_block_t calback function
* when there is no desire to allocate any blocks. It is used as a
- * callback function for block_prepare_write() and block_write_full_page().
+ * callback function for block_write_begin() and block_write_full_page().
* These functions should only try to map a single block at a time.
*
* Since this function doesn't do block allocations even if the caller
/*
* If the page does not have buffers (for whatever reason),
- * try to create them using block_prepare_write. If this
+ * try to create them using __block_write_begin. If this
* fails, redirty the page and move on.
*/
- if (!page_buffers(page)) {
- if (block_prepare_write(page, 0, len,
+ if (!page_has_buffers(page)) {
+ if (__block_write_begin(page, 0, len,
noalloc_get_block_write)) {
redirty_page:
redirty_page_for_writepage(wbc, page);
if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
ext4_bh_delay_or_unwritten)) {
/*
- * We don't want to do block allocation So redirty the
- * page and return We may reach here when we do a
- * journal commit via
- * journal_submit_inode_data_buffers. If we don't
- * have mapping block we just ignore them. We can also
- * reach here via shrink_page_list
+ * We don't want to do block allocation, so redirty
+ * the page and return. We may reach here when we do
+ * a journal commit via journal_submit_inode_data_buffers.
+ * We can also reach here via shrink_page_list
*/
goto redirty_page;
}
*/
static int write_cache_pages_da(struct address_space *mapping,
struct writeback_control *wbc,
- struct mpage_da_data *mpd)
+ struct mpage_da_data *mpd,
+ pgoff_t *done_index)
{
int ret = 0;
int done = 0;
struct pagevec pvec;
- int nr_pages;
+ unsigned nr_pages;
pgoff_t index;
pgoff_t end; /* Inclusive */
long nr_to_write = wbc->nr_to_write;
+ int tag;
pagevec_init(&pvec, 0);
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag = PAGECACHE_TAG_TOWRITE;
+ else
+ tag = PAGECACHE_TAG_DIRTY;
+
+ *done_index = index;
while (!done && (index <= end)) {
int i;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
break;
}
+ *done_index = page->index + 1;
+
lock_page(page);
/*
long desired_nr_to_write, nr_to_writebump = 0;
loff_t range_start = wbc->range_start;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+ pgoff_t done_index = 0;
+ pgoff_t end;
trace_ext4_da_writepages(inode, wbc);
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = LLONG_MAX;
wbc->range_cyclic = 0;
- } else
+ end = -1;
+ } else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ }
/*
* This works around two forms of stupidity. The first is in
pages_skipped = wbc->pages_skipped;
retry:
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag_pages_for_writeback(mapping, index, end);
+
while (!ret && wbc->nr_to_write > 0) {
/*
mpd.io_done = 0;
mpd.pages_written = 0;
mpd.retval = 0;
- ret = write_cache_pages_da(mapping, wbc, &mpd);
+ ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
/*
* If we have a contiguous extent of pages and we
* haven't done the I/O yet, map the blocks and submit
__func__, wbc->nr_to_write, ret);
/* Update index */
- index += pages_written;
wbc->range_cyclic = range_cyclic;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
/*
* set the writeback_index so that range_cyclic
* mode will write it back later
*/
- mapping->writeback_index = index;
+ mapping->writeback_index = done_index;
out_writepages:
wbc->nr_to_write -= nr_to_writebump;
return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
}
-static void ext4_free_io_end(ext4_io_end_t *io)
-{
- BUG_ON(!io);
- if (io->page)
- put_page(io->page);
- iput(io->inode);
- kfree(io);
-}
-
static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
{
struct buffer_head *head, *bh;
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
-static void dump_completed_IO(struct inode * inode)
-{
-#ifdef EXT4_DEBUG
- struct list_head *cur, *before, *after;
- ext4_io_end_t *io, *io0, *io1;
- unsigned long flags;
-
- if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
- ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
- return;
- }
-
- ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
- list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
- cur = &io->list;
- before = cur->prev;
- io0 = container_of(before, ext4_io_end_t, list);
- after = cur->next;
- io1 = container_of(after, ext4_io_end_t, list);
-
- ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
- io, inode->i_ino, io0, io1);
- }
- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-#endif
-}
-
-/*
- * check a range of space and convert unwritten extents to written.
- */
-static int ext4_end_io_nolock(ext4_io_end_t *io)
-{
- struct inode *inode = io->inode;
- loff_t offset = io->offset;
- ssize_t size = io->size;
- int ret = 0;
-
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
- "list->prev 0x%p\n",
- io, inode->i_ino, io->list.next, io->list.prev);
-
- if (list_empty(&io->list))
- return ret;
-
- if (io->flag != EXT4_IO_UNWRITTEN)
- return ret;
-
- ret = ext4_convert_unwritten_extents(inode, offset, size);
- if (ret < 0) {
- printk(KERN_EMERG "%s: failed to convert unwritten"
- "extents to written extents, error is %d"
- " io is still on inode %lu aio dio list\n",
- __func__, ret, inode->i_ino);
- return ret;
- }
-
- if (io->iocb)
- aio_complete(io->iocb, io->result, 0);
- /* clear the DIO AIO unwritten flag */
- io->flag = 0;
- return ret;
-}
-
-/*
- * work on completed aio dio IO, to convert unwritten extents to extents
- */
-static void ext4_end_io_work(struct work_struct *work)
-{
- ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
- struct inode *inode = io->inode;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret;
-
- mutex_lock(&inode->i_mutex);
- ret = ext4_end_io_nolock(io);
- if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- return;
- }
-
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (!list_empty(&io->list))
- list_del_init(&io->list);
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- mutex_unlock(&inode->i_mutex);
- ext4_free_io_end(io);
-}
-
-/*
- * This function is called from ext4_sync_file().
- *
- * When IO is completed, the work to convert unwritten extents to
- * written is queued on workqueue but may not get immediately
- * scheduled. When fsync is called, we need to ensure the
- * conversion is complete before fsync returns.
- * The inode keeps track of a list of pending/completed IO that
- * might needs to do the conversion. This function walks through
- * the list and convert the related unwritten extents for completed IO
- * to written.
- * The function return the number of pending IOs on success.
- */
-int flush_completed_IO(struct inode *inode)
-{
- ext4_io_end_t *io;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret = 0;
- int ret2 = 0;
-
- if (list_empty(&ei->i_completed_io_list))
- return ret;
-
- dump_completed_IO(inode);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- while (!list_empty(&ei->i_completed_io_list)){
- io = list_entry(ei->i_completed_io_list.next,
- ext4_io_end_t, list);
- /*
- * Calling ext4_end_io_nolock() to convert completed
- * IO to written.
- *
- * When ext4_sync_file() is called, run_queue() may already
- * about to flush the work corresponding to this io structure.
- * It will be upset if it founds the io structure related
- * to the work-to-be schedule is freed.
- *
- * Thus we need to keep the io structure still valid here after
- * convertion finished. The io structure has a flag to
- * avoid double converting from both fsync and background work
- * queue work.
- */
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- ret = ext4_end_io_nolock(io);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (ret < 0)
- ret2 = ret;
- else
- list_del_init(&io->list);
- }
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- return (ret2 < 0) ? ret2 : 0;
-}
-
-static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
-{
- ext4_io_end_t *io = NULL;
-
- io = kmalloc(sizeof(*io), flags);
-
- if (io) {
- igrab(inode);
- io->inode = inode;
- io->flag = 0;
- io->offset = 0;
- io->size = 0;
- io->page = NULL;
- io->iocb = NULL;
- io->result = 0;
- INIT_WORK(&io->work, ext4_end_io_work);
- INIT_LIST_HEAD(&io->list);
- }
-
- return io;
-}
-
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private, int ret,
bool is_async)
size);
/* if not aio dio with unwritten extents, just free io and return */
- if (io_end->flag != EXT4_IO_UNWRITTEN){
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
iocb->private = NULL;
out:
goto out;
}
- io_end->flag = EXT4_IO_UNWRITTEN;
+ io_end->flag = EXT4_IO_END_UNWRITTEN;
inode = io_end->inode;
/* Add the io_end to per-inode completed io list*/
{
struct inode *inode = dentry->d_inode;
int error, rc = 0;
+ int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
error = inode_change_ok(inode, attr);
error = PTR_ERR(handle);
goto err_out;
}
-
- error = ext4_orphan_add(handle, inode);
+ if (ext4_handle_valid(handle)) {
+ error = ext4_orphan_add(handle, inode);
+ orphan = 1;
+ }
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
if (!error)
goto err_out;
}
ext4_orphan_del(handle, inode);
+ orphan = 0;
ext4_journal_stop(handle);
goto err_out;
}
* If the call to ext4_truncate failed to get a transaction handle at
* all, we need to clean up the in-core orphan list manually.
*/
- if (inode->i_nlink)
+ if (orphan && inode->i_nlink)
ext4_orphan_del(NULL, inode);
if (!rc && (ia_valid & ATTR_MODE))
* will return the blocks that include the delayed allocation
* blocks for this file.
*/
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
return 0;
*
* Also account for superblock, inode, quota and xattr blocks
*/
-int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
int gdpblocks;
int err, ret;
might_sleep();
+ trace_ext4_mark_inode_dirty(inode, _RET_IP_);
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (ext4_handle_valid(handle) &&
EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&