X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=fs%2Fext4%2Finode.c;h=a0ab3754d0d61a26aa366a68b9e5704b292b8924;hp=0afc8c1d8cf3597bf075ecb731a16accb67687ad;hb=a42afc5f56f319107e987aa6adf2f65d93d527c7;hpb=5abd9ccced7a726c817dd6b5b96bc933859138d1 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0afc8c1d8cf3..a08ec795995f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -60,6 +60,7 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, } static void ext4_invalidatepage(struct page *page, unsigned long offset); +static int ext4_writepage(struct page *page, struct writeback_control *wbc); /* * Test whether an inode is a fast symlink. @@ -167,11 +168,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, /* * Called at the last iput() if i_nlink is zero. */ -void ext4_delete_inode(struct inode *inode) +void ext4_evict_inode(struct inode *inode) { handle_t *handle; int err; + if (inode->i_nlink) { + truncate_inode_pages(&inode->i_data, 0); + goto no_delete; + } + if (!is_bad_inode(inode)) dquot_initialize(inode); @@ -221,6 +227,7 @@ void ext4_delete_inode(struct inode *inode) "couldn't extend journal (err %d)", err); stop_handle: ext4_journal_stop(handle); + ext4_orphan_del(NULL, inode); goto no_delete; } } @@ -245,13 +252,13 @@ void ext4_delete_inode(struct inode *inode) */ if (ext4_mark_inode_dirty(handle, inode)) /* If that failed, just do the required in-core inode clear. */ - clear_inode(inode); + ext4_clear_inode(inode); else ext4_free_inode(handle, inode); ext4_journal_stop(handle); return; no_delete: - clear_inode(inode); /* We must guarantee clearing of inode... */ + ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } typedef struct { @@ -337,9 +344,11 @@ static int ext4_block_to_path(struct inode *inode, return n; } -static int __ext4_check_blockref(const char *function, struct inode *inode, +static int __ext4_check_blockref(const char *function, unsigned int line, + struct inode *inode, __le32 *p, unsigned int max) { + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; __le32 *bref = p; unsigned int blk; @@ -348,8 +357,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, if (blk && unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), blk, 1))) { - ext4_error_inode(function, inode, - "invalid block reference %u", blk); + es->s_last_error_block = cpu_to_le64(blk); + ext4_error_inode(inode, function, line, blk, + "invalid block"); return -EIO; } } @@ -358,11 +368,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode, #define ext4_check_indirect_blockref(inode, bh) \ - __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data, \ + __ext4_check_blockref(__func__, __LINE__, inode, \ + (__le32 *)(bh)->b_data, \ EXT4_ADDR_PER_BLOCK((inode)->i_sb)) #define ext4_check_inode_blockref(inode) \ - __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data, \ + __ext4_check_blockref(__func__, __LINE__, inode, \ + EXT4_I(inode)->i_data, \ EXT4_NDIR_BLOCKS) /** @@ -1128,20 +1140,24 @@ void ext4_da_update_reserve_space(struct inode *inode, ext4_discard_preallocations(inode); } -static int check_block_validity(struct inode *inode, const char *func, +static int __check_block_validity(struct inode *inode, const char *func, + unsigned int line, struct ext4_map_blocks *map) { if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, map->m_len)) { - ext4_error_inode(func, inode, - "lblock %lu mapped to illegal pblock %llu " - "(length %d)", (unsigned long) map->m_lblk, - map->m_pblk, map->m_len); + ext4_error_inode(inode, func, line, map->m_pblk, + "lblock %lu mapped to illegal pblock " + "(length %d)", (unsigned long) map->m_lblk, + map->m_len); return -EIO; } return 0; } +#define check_block_validity(inode, map) \ + __check_block_validity((inode), __func__, __LINE__, (map)) + /* * Return the number of contiguous dirty pages in a given inode * starting at page frame idx. @@ -1192,8 +1208,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, break; idx++; num++; - if (num >= max_pages) + if (num >= max_pages) { + done = 1; break; + } } pagevec_release(&pvec); } @@ -1244,7 +1262,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, up_read((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { - int ret = check_block_validity(inode, __func__, map); + int ret = check_block_validity(inode, map); if (ret != 0) return ret; } @@ -1324,9 +1342,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { - int ret = check_block_validity(inode, - "ext4_map_blocks_after_alloc", - map); + int ret = check_block_validity(inode, map); if (ret != 0) return ret; } @@ -1519,9 +1535,25 @@ static int walk_page_buffers(handle_t *handle, static int do_journal_get_write_access(handle_t *handle, struct buffer_head *bh) { + int dirty = buffer_dirty(bh); + int ret; + if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; - return ext4_journal_get_write_access(handle, bh); + /* + * __block_prepare_write() could have dirtied some buffers. Clean + * the dirty bit as jbd2_journal_get_write_access() could complain + * otherwise about fs integrity issues. Setting of the dirty bit + * by __block_prepare_write() isn't a real problem here as we clear + * the bit before releasing a page lock and thus writeback cannot + * ever write the buffer. + */ + if (dirty) + clear_buffer_dirty(bh); + ret = ext4_journal_get_write_access(handle, bh); + if (!ret && dirty) + ret = ext4_handle_dirty_metadata(handle, NULL, bh); + return ret; } /* @@ -1578,11 +1610,9 @@ retry: *pagep = page; if (ext4_should_dioread_nolock(inode)) - ret = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, ext4_get_block_write); + ret = __block_write_begin(page, pos, len, ext4_get_block_write); else - ret = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, ext4_get_block); + ret = __block_write_begin(page, pos, len, ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), @@ -1593,7 +1623,7 @@ retry: unlock_page(page); page_cache_release(page); /* - * block_write_begin may have instantiated a few blocks + * __block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold i_mutex. * @@ -2004,7 +2034,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) BUG_ON(PageWriteback(page)); pages_skipped = mpd->wbc->pages_skipped; - err = mapping->a_ops->writepage(page, mpd->wbc); + err = ext4_writepage(page, mpd->wbc); if (!err && (pages_skipped == mpd->wbc->pages_skipped)) /* * have successfully written the page @@ -2076,7 +2106,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, } while ((bh = bh->b_this_page) != head); do { - if (cur_logical >= map->m_lblk + blocks) + if (cur_logical > map->m_lblk + (blocks - 1)) break; if (buffer_delay(bh) || buffer_unwritten(bh)) { @@ -2160,14 +2190,15 @@ static void ext4_print_free_blocks(struct inode *inode) } /* - * mpage_da_map_blocks - go through given space + * mpage_da_map_and_submit - go through given space, map them + * if necessary, and then submit them for I/O * * @mpd - bh describing space * * The function skips space we know is already mapped to disk blocks. * */ -static int mpage_da_map_blocks(struct mpage_da_data *mpd) +static void mpage_da_map_and_submit(struct mpage_da_data *mpd) { int err, blks, get_blocks_flags; struct ext4_map_blocks map; @@ -2177,24 +2208,20 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) handle_t *handle = NULL; /* - * We consider only non-mapped and non-allocated blocks - */ - if ((mpd->b_state & (1 << BH_Mapped)) && - !(mpd->b_state & (1 << BH_Delay)) && - !(mpd->b_state & (1 << BH_Unwritten))) - return 0; - - /* - * If we didn't accumulate anything to write simply return + * If the blocks are mapped already, or we couldn't accumulate + * any blocks, then proceed immediately to the submission stage. */ - if (!mpd->b_size) - return 0; + if ((mpd->b_size == 0) || + ((mpd->b_state & (1 << BH_Mapped)) && + !(mpd->b_state & (1 << BH_Delay)) && + !(mpd->b_state & (1 << BH_Unwritten)))) + goto submit_io; handle = ext4_journal_current_handle(); BUG_ON(!handle); /* - * Call ext4_get_blocks() to allocate any delayed allocation + * Call ext4_map_blocks() to allocate any delayed allocation * blocks, or to convert an uninitialized extent to be * initialized (in the case where we have written into * one or more preallocated blocks). @@ -2203,7 +2230,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * indicate that we are on the delayed allocation path. This * affects functions in many different parts of the allocation * call path. This flag exists primarily because we don't - * want to change *many* call functions, so ext4_get_blocks() + * want to change *many* call functions, so ext4_map_blocks() * will set the magic i_delalloc_reserved_flag once the * inode's allocation semaphore is taken. * @@ -2221,19 +2248,22 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); if (blks < 0) { + struct super_block *sb = mpd->inode->i_sb; + err = blks; /* - * If get block returns with error we simply - * return. Later writepage will redirty the page and - * writepages will find the dirty page again + * If get block returns EAGAIN or ENOSPC and there + * appears to be free blocks we will call + * ext4_writepage() for all of the pages which will + * just redirty the pages. */ if (err == -EAGAIN) - return 0; + goto submit_io; if (err == -ENOSPC && - ext4_count_free_blocks(mpd->inode->i_sb)) { + ext4_count_free_blocks(sb)) { mpd->retval = err; - return 0; + goto submit_io; } /* @@ -2243,21 +2273,22 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) * writepage and writepages will again try to write * the same. */ - ext4_msg(mpd->inode->i_sb, KERN_CRIT, - "delayed block allocation failed for inode %lu at " - "logical offset %llu with max blocks %zd with " - "error %d", mpd->inode->i_ino, - (unsigned long long) next, - mpd->b_size >> mpd->inode->i_blkbits, err); - printk(KERN_CRIT "This should not happen!! " - "Data will be lost\n"); - if (err == -ENOSPC) { - ext4_print_free_blocks(mpd->inode); + if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { + ext4_msg(sb, KERN_CRIT, + "delayed block allocation failed for inode %lu " + "at logical offset %llu with max blocks %zd " + "with error %d", mpd->inode->i_ino, + (unsigned long long) next, + mpd->b_size >> mpd->inode->i_blkbits, err); + ext4_msg(sb, KERN_CRIT, + "This should not happen!! Data will be lost\n"); + if (err == -ENOSPC) + ext4_print_free_blocks(mpd->inode); } /* invalidate all the pages */ ext4_da_block_invalidatepages(mpd, next, mpd->b_size >> mpd->inode->i_blkbits); - return err; + return; } BUG_ON(blks == 0); @@ -2280,7 +2311,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) if (ext4_should_order_data(mpd->inode)) { err = ext4_jbd2_file_inode(handle, mpd->inode); if (err) - return err; + /* This only happens if the journal is aborted */ + return; } /* @@ -2291,10 +2323,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) disksize = i_size_read(mpd->inode); if (disksize > EXT4_I(mpd->inode)->i_disksize) { ext4_update_i_disksize(mpd->inode, disksize); - return ext4_mark_inode_dirty(handle, mpd->inode); + err = ext4_mark_inode_dirty(handle, mpd->inode); + if (err) + ext4_error(mpd->inode->i_sb, + "Failed to mark inode %lu dirty", + mpd->inode->i_ino); } - return 0; +submit_io: + mpage_da_submit_io(mpd); + mpd->io_done = 1; } #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ @@ -2320,7 +2358,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, * XXX Don't go larger than mballoc is willing to allocate * This is a stopgap solution. We eventually need to fold * mpage_da_submit_io() into this function and then call - * ext4_get_blocks() multiple times in a loop + * ext4_map_blocks() multiple times in a loop */ if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize) goto flush_it; @@ -2371,9 +2409,7 @@ flush_it: * We couldn't merge the block to our extent, so we * need to flush current extent and start new one */ - if (mpage_da_map_blocks(mpd) == 0) - mpage_da_submit_io(mpd); - mpd->io_done = 1; + mpage_da_map_and_submit(mpd); return; } @@ -2405,15 +2441,13 @@ static int __mpage_da_writepage(struct page *page, if (mpd->next_page != page->index) { /* * Nope, we can't. So, we map non-allocated blocks - * and start IO on them using writepage() + * and start IO on them */ if (mpd->next_page != mpd->first_page) { - if (mpage_da_map_blocks(mpd) == 0) - mpage_da_submit_io(mpd); + mpage_da_map_and_submit(mpd); /* * skip rest of the page in the page_vec */ - mpd->io_done = 1; redirty_page_for_writepage(wbc, page); unlock_page(page); return MPAGE_DA_EXTENT_TAIL; @@ -2553,18 +2587,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, /* * This function is used as a standard get_block_t calback function * when there is no desire to allocate any blocks. It is used as a - * callback function for block_prepare_write(), nobh_writepage(), and - * block_write_full_page(). These functions should only try to map a - * single block at a time. + * callback function for block_prepare_write() and block_write_full_page(). + * These functions should only try to map a single block at a time. * * Since this function doesn't do block allocations even if the caller * requests it by passing in create=1, it is critically important that * any caller checks to make sure that any buffer heads are returned * by this function are either all already mapped or marked for - * delayed allocation before calling nobh_writepage() or - * block_write_full_page(). Otherwise, b_blocknr could be left - * unitialized, and the page write functions will be taken by - * surprise. + * delayed allocation before calling block_write_full_page(). Otherwise, + * b_blocknr could be left unitialized, and the page write functions will + * be taken by surprise. */ static int noalloc_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -2672,7 +2704,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); static int ext4_writepage(struct page *page, struct writeback_control *wbc) { - int ret = 0; + int ret = 0, commit_write = 0; loff_t size; unsigned int len; struct buffer_head *page_bufs = NULL; @@ -2685,60 +2717,37 @@ static int ext4_writepage(struct page *page, else len = PAGE_CACHE_SIZE; - if (page_has_buffers(page)) { - page_bufs = page_buffers(page); - if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, - ext4_bh_delay_or_unwritten)) { - /* - * We don't want to do block allocation - * So redirty the page and return - * We may reach here when we do a journal commit - * via journal_submit_inode_data_buffers. - * If we don't have mapping block we just ignore - * them. We can also reach here via shrink_page_list - */ + /* + * If the page does not have buffers (for whatever reason), + * try to create them using block_prepare_write. If this + * fails, redirty the page and move on. + */ + if (!page_buffers(page)) { + if (block_prepare_write(page, 0, len, + noalloc_get_block_write)) { + redirty_page: redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; } - } else { + commit_write = 1; + } + page_bufs = page_buffers(page); + if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, + ext4_bh_delay_or_unwritten)) { /* - * The test for page_has_buffers() is subtle: - * We know the page is dirty but it lost buffers. That means - * that at some moment in time after write_begin()/write_end() - * has been called all buffers have been clean and thus they - * must have been written at least once. So they are all - * mapped and we can happily proceed with mapping them - * and writing the page. - * - * Try to initialize the buffer_heads and check whether - * all are mapped and non delay. We don't want to - * do block allocation here. + * We don't want to do block allocation So redirty the + * page and return We may reach here when we do a + * journal commit via + * journal_submit_inode_data_buffers. If we don't + * have mapping block we just ignore them. We can also + * reach here via shrink_page_list */ - ret = block_prepare_write(page, 0, len, - noalloc_get_block_write); - if (!ret) { - page_bufs = page_buffers(page); - /* check whether all are mapped and non delay */ - if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, - ext4_bh_delay_or_unwritten)) { - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } - } else { - /* - * We can't do block allocation here - * so just redity the page and unlock - * and return - */ - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; - } + goto redirty_page; + } + if (commit_write) /* now mark the buffer_heads as dirty and uptodate */ block_commit_write(page, 0, len); - } if (PageChecked(page) && ext4_should_journal_data(inode)) { /* @@ -2749,9 +2758,7 @@ static int ext4_writepage(struct page *page, return __ext4_journalled_writepage(page, len); } - if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_writepage(page, noalloc_get_block_write, wbc); - else if (page_bufs && buffer_uninit(page_bufs)) { + if (buffer_uninit(page_bufs)) { ext4_set_bh_endio(page_bufs, inode); ret = block_write_full_page_endio(page, noalloc_get_block_write, wbc, ext4_end_io_buffer_write); @@ -2976,9 +2983,12 @@ static int ext4_da_writepages(struct address_space *mapping, * sbi->max_writeback_mb_bump whichever is smaller. */ max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); - if (!range_cyclic && range_whole) - desired_nr_to_write = wbc->nr_to_write * 8; - else + if (!range_cyclic && range_whole) { + if (wbc->nr_to_write == LONG_MAX) + desired_nr_to_write = wbc->nr_to_write; + else + desired_nr_to_write = wbc->nr_to_write * 8; + } else desired_nr_to_write = ext4_num_dirty_pages(inode, index, max_pages); if (desired_nr_to_write > max_pages) @@ -3040,9 +3050,7 @@ retry: * them for I/O. */ if (!mpd.io_done && mpd.next_page != mpd.first_page) { - if (mpage_da_map_blocks(&mpd) == 0) - mpage_da_submit_io(&mpd); - mpd.io_done = 1; + mpage_da_map_and_submit(&mpd); ret = MPAGE_DA_EXTENT_TAIL; } trace_ext4_da_write_pages(inode, &mpd); @@ -3146,13 +3154,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, int ret, retries = 0; struct page *page; pgoff_t index; - unsigned from, to; struct inode *inode = mapping->host; handle_t *handle; index = pos >> PAGE_CACHE_SHIFT; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; if (ext4_nonda_switch(inode->i_sb)) { *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; @@ -3185,8 +3190,7 @@ retry: } *pagep = page; - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_da_get_block_prep); + ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); if (ret < 0) { unlock_page(page); ext4_journal_stop(handle); @@ -3545,15 +3549,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, retry: if (rw == READ && ext4_should_dioread_nolock(inode)) - ret = blockdev_direct_IO_no_locking(rw, iocb, inode, + ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, - ext4_get_block, NULL); - else + ext4_get_block, NULL, NULL, 0); + else { ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block, NULL); + + if (unlikely((rw & WRITE) && ret < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } + } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; @@ -3668,6 +3681,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io) return ret; } + if (io->iocb) + aio_complete(io->iocb, io->result, 0); /* clear the DIO AIO unwritten flag */ io->flag = 0; return ret; @@ -3767,6 +3782,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags) io->offset = 0; io->size = 0; io->page = NULL; + io->iocb = NULL; + io->result = 0; INIT_WORK(&io->work, ext4_end_io_work); INIT_LIST_HEAD(&io->list); } @@ -3796,26 +3813,29 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, if (io_end->flag != EXT4_IO_UNWRITTEN){ ext4_free_io_end(io_end); iocb->private = NULL; - goto out; +out: + if (is_async) + aio_complete(iocb, ret, 0); + return; } io_end->offset = offset; io_end->size = size; - io_end->flag = EXT4_IO_UNWRITTEN; + if (is_async) { + io_end->iocb = iocb; + io_end->result = ret; + } wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; - /* queue the work to convert unwritten extents to written */ - queue_work(wq, &io_end->work); - /* Add the io_end to per-inode completed aio dio list*/ ei = EXT4_I(io_end->inode); spin_lock_irqsave(&ei->i_completed_io_lock, flags); list_add_tail(&io_end->list, &ei->i_completed_io_list); spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); + + /* queue the work to convert unwritten extents to written */ + queue_work(wq, &io_end->work); iocb->private = NULL; -out: - if (is_async) - aio_complete(iocb, ret, 0); } static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) @@ -3941,7 +3961,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, return -ENOMEM; /* * we save the io structure for current async - * direct IO, so that later ext4_get_blocks() + * direct IO, so that later ext4_map_blocks() * could flag the io structure whether there * is a unwritten extents needs to be converted * when IO is completed. @@ -4132,17 +4152,6 @@ int ext4_block_truncate_page(handle_t *handle, length = blocksize - (offset & (blocksize - 1)); iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); - /* - * For "nobh" option, we can only work if we don't need to - * read-in the page - otherwise we create buffers to do the IO. - */ - if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && - ext4_should_writeback_data(inode) && PageUptodate(page)) { - zero_user(page, offset, length); - set_page_dirty(page); - goto unlock; - } - if (!page_has_buffers(page)) create_empty_buffers(page, blocksize, 0); @@ -4492,9 +4501,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, * (should be rare). */ if (!bh) { - EXT4_ERROR_INODE(inode, - "Read failure block=%llu", - (unsigned long long) nr); + EXT4_ERROR_INODE_BLOCK(inode, nr, + "Read failure"); continue; } @@ -4505,27 +4513,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, (__le32 *) bh->b_data + addr_per_block, depth); - /* - * We've probably journalled the indirect block several - * times during the truncate. But it's no longer - * needed and we now drop it from the transaction via - * jbd2_journal_revoke(). - * - * That's easy if it's exclusively part of this - * transaction. But if it's part of the committing - * transaction then jbd2_journal_forget() will simply - * brelse() it. That means that if the underlying - * block is reallocated in ext4_get_block(), - * unmap_underlying_metadata() will find this block - * and will try to get rid of it. damn, damn. - * - * If this block has already been committed to the - * journal, a revoke record will be written. And - * revoke records must be emitted *before* clearing - * this block's bit in the bitmaps. - */ - ext4_forget(handle, 1, inode, bh, bh->b_blocknr); - /* * Everything below this this pointer has been * released. Now let this top-of-subtree go. @@ -4550,8 +4537,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, blocks_for_truncate(inode)); } + /* + * The forget flag here is critical because if + * we are journaling (and not doing data + * journaling), we have to make sure a revoke + * record is written to prevent the journal + * replay from overwriting the (former) + * indirect block if it gets reallocated as a + * data block. This must happen in the same + * transaction where the data blocks are + * actually freed. + */ ext4_free_blocks(handle, inode, 0, nr, 1, - EXT4_FREE_BLOCKS_METADATA); + EXT4_FREE_BLOCKS_METADATA| + EXT4_FREE_BLOCKS_FORGET); if (parent_bh) { /* @@ -4809,8 +4808,8 @@ static int __ext4_get_inode_loc(struct inode *inode, bh = sb_getblk(sb, block); if (!bh) { - EXT4_ERROR_INODE(inode, "unable to read inode block - " - "block %llu", block); + EXT4_ERROR_INODE_BLOCK(inode, block, + "unable to read itable block"); return -EIO; } if (!buffer_uptodate(bh)) { @@ -4908,8 +4907,8 @@ make_io: submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - EXT4_ERROR_INODE(inode, "unable to read inode " - "block %llu", block); + EXT4_ERROR_INODE_BLOCK(inode, block, + "unable to read itable block"); brelse(bh); return -EIO; } @@ -4980,7 +4979,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, /* we are using combined 48 bit field */ i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | le32_to_cpu(raw_inode->i_blocks_lo); - if (ei->i_flags & EXT4_HUGE_FILE_FL) { + if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { /* i_blocks represent file system block size */ return i_blocks << (inode->i_blkbits - 9); } else { @@ -5076,7 +5075,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) transaction_t *transaction; tid_t tid; - spin_lock(&journal->j_state_lock); + read_lock(&journal->j_state_lock); if (journal->j_running_transaction) transaction = journal->j_running_transaction; else @@ -5085,7 +5084,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) tid = transaction->t_tid; else tid = journal->j_commit_sequence; - spin_unlock(&journal->j_state_lock); + read_unlock(&journal->j_state_lock); ei->i_sync_tid = tid; ei->i_datasync_tid = tid; } @@ -5130,7 +5129,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_file_acl); ret = -EIO; goto bad_inode; - } else if (ei->i_flags & EXT4_EXTENTS_FL) { + } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || (S_ISLNK(inode->i_mode) && !ext4_inode_is_fast_symlink(inode))) @@ -5410,9 +5409,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) if (wbc->sync_mode == WB_SYNC_ALL) sync_dirty_buffer(iloc.bh); if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { - EXT4_ERROR_INODE(inode, - "IO error syncing inode (block=%llu)", - (unsigned long long) iloc.bh->b_blocknr); + EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, + "IO error syncing inode"); err = -EIO; } brelse(iloc.bh); @@ -5487,10 +5485,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - if (attr->ia_size > sbi->s_bitmap_maxbytes) { - error = -EFBIG; - goto err_out; - } + if (attr->ia_size > sbi->s_bitmap_maxbytes) + return -EFBIG; } } @@ -5533,11 +5529,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_truncate(inode); } - rc = inode_setattr(inode, attr); + if ((attr->ia_valid & ATTR_SIZE) && + attr->ia_size != i_size_read(inode)) + rc = vmtruncate(inode, attr->ia_size); + + if (!rc) { + setattr_copy(inode, attr); + mark_inode_dirty(inode); + } - /* If inode_setattr's call to ext4_truncate failed to get a - * transaction handle at all, we need to clean up the in-core - * orphan list manually. */ + /* + * If the call to ext4_truncate failed to get a transaction handle at + * all, we need to clean up the in-core orphan list manually. + */ if (inode->i_nlink) ext4_orphan_del(NULL, inode); @@ -5692,7 +5696,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) * Calculate the journal credits for a chunk of data modification. * * This is called from DIO, fallocate or whoever calling - * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks. + * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. * * journal buffers for data blocks are not included here, as DIO * and fallocate do no need to journal data buffers. @@ -5758,7 +5762,6 @@ static int ext4_expand_extra_isize(struct inode *inode, { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; - struct ext4_xattr_entry *entry; if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) return 0; @@ -5766,7 +5769,6 @@ static int ext4_expand_extra_isize(struct inode *inode, raw_inode = ext4_raw_inode(&iloc); header = IHDR(inode, raw_inode); - entry = IFIRST(header); /* No extended attributes present */ if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||