From: Theodore Ts'o Date: Thu, 28 Oct 2010 03:44:47 +0000 (-0400) Subject: Merge branch 'next' into upstream-merge X-Git-Tag: v2.6.37-rc1~76^2 X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=commitdiff_plain;h=a107e5a3a473a2ea62bd5af24e11b84adf1486ff Merge branch 'next' into upstream-merge Conflicts: fs/ext4/inode.c fs/ext4/mballoc.c include/trace/events/ext4.h --- a107e5a3a473a2ea62bd5af24e11b84adf1486ff diff --cc fs/ext4/extents.c index 06328d3e5717,a17a676a3106..0554c48cb1fd --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@@ -2538,74 -2491,19 +2491,18 @@@ void ext4_ext_release(struct super_bloc /* FIXME!! we need to try to merge to left or right after zero-out */ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) { + ext4_fsblk_t ee_pblock; + unsigned int ee_len; int ret; - struct bio *bio; - int blkbits, blocksize; - sector_t ee_pblock; - struct completion event; - unsigned int ee_len, len, done, offset; - - blkbits = inode->i_blkbits; - blocksize = inode->i_sb->s_blocksize; ee_len = ext4_ext_get_actual_len(ex); - ee_pblock = ext_pblock(ex); - - /* convert ee_pblock to 512 byte sectors */ - ee_pblock = ee_pblock << (blkbits - 9); - - while (ee_len > 0) { - - if (ee_len > BIO_MAX_PAGES) - len = BIO_MAX_PAGES; - else - len = ee_len; - - bio = bio_alloc(GFP_NOIO, len); - if (!bio) - return -ENOMEM; + ee_pblock = ext4_ext_pblock(ex); - bio->bi_sector = ee_pblock; - bio->bi_bdev = inode->i_sb->s_bdev; - ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, - GFP_NOFS, BLKDEV_IFL_WAIT); ++ ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS); + if (ret > 0) + ret = 0; - done = 0; - offset = 0; - while (done < len) { - ret = bio_add_page(bio, ZERO_PAGE(0), - blocksize, offset); - if (ret != blocksize) { - /* - * We can't add any more pages because of - * hardware limitations. Start a new bio. - */ - break; - } - done++; - offset += blocksize; - if (offset >= PAGE_CACHE_SIZE) - offset = 0; - } - - init_completion(&event); - bio->bi_private = &event; - bio->bi_end_io = bi_complete; - submit_bio(WRITE, bio); - wait_for_completion(&event); - - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { - bio_put(bio); - return -EIO; - } - bio_put(bio); - ee_len -= done; - ee_pblock += done << (blkbits - 9); - } - return 0; + return ret; } #define EXT4_EXT_ZERO_LEN 7 diff --cc fs/ext4/ialloc.c index 45853e0d1f21,509f429f71e8..1ce240a23ebb --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@@ -1205,3 -1220,110 +1220,109 @@@ unsigned long ext4_count_dirs(struct su } return count; } + + /* + * Zeroes not yet zeroed inode table - just write zeroes through the whole + * inode table. Must be called without any spinlock held. The only place + * where it is called from on active part of filesystem is ext4lazyinit + * thread, so we do not need any special locks, however we have to prevent + * inode allocation from the current group, so we take alloc_sem lock, to + * block ext4_claim_inode until we are finished. + */ + extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group, + int barrier) + { + struct ext4_group_info *grp = ext4_get_group_info(sb, group); + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_desc *gdp = NULL; + struct buffer_head *group_desc_bh; + handle_t *handle; + ext4_fsblk_t blk; + int num, ret = 0, used_blks = 0; - unsigned long flags = BLKDEV_IFL_WAIT; + + /* This should not happen, but just to be sure check this */ + if (sb->s_flags & MS_RDONLY) { + ret = 1; + goto out; + } + + gdp = ext4_get_group_desc(sb, group, &group_desc_bh); + if (!gdp) + goto out; + + /* + * We do not need to lock this, because we are the only one + * handling this flag. + */ + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)) + goto out; + + handle = ext4_journal_start_sb(sb, 1); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + + down_write(&grp->alloc_sem); + /* + * If inode bitmap was already initialized there may be some + * used inodes so we need to skip blocks with used inodes in + * inode table. + */ + if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) + used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp)), + sbi->s_inodes_per_block); + + if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) { + ext4_error(sb, "Something is wrong with group %u\n" + "Used itable blocks: %d" + "itable unused count: %u\n", + group, used_blks, + ext4_itable_unused_count(sb, gdp)); + ret = 1; + goto out; + } + + blk = ext4_inode_table(sb, gdp) + used_blks; + num = sbi->s_itb_per_group - used_blks; + + BUFFER_TRACE(group_desc_bh, "get_write_access"); + ret = ext4_journal_get_write_access(handle, + group_desc_bh); + if (ret) + goto err_out; + + /* + * Skip zeroout if the inode table is full. But we set the ZEROED + * flag anyway, because obviously, when it is full it does not need + * further zeroing. + */ + if (unlikely(num == 0)) + goto skip_zeroout; + + ext4_debug("going to zero out inode table in group %d\n", + group); - if (barrier) - flags |= BLKDEV_IFL_BARRIER; - ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS, flags); ++ ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS); + if (ret < 0) + goto err_out; ++ if (barrier) ++ blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL); + + skip_zeroout: + ext4_lock_group(sb, group); + gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED); + gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); + ext4_unlock_group(sb, group); + + BUFFER_TRACE(group_desc_bh, + "call ext4_handle_dirty_metadata"); + ret = ext4_handle_dirty_metadata(handle, NULL, + group_desc_bh); + + err_out: + up_write(&grp->alloc_sem); + ext4_journal_stop(handle); + out: + return ret; + } diff --cc fs/ext4/inode.c index 49635ef236f8,3ba237b0b2aa..2d6c6c8c036d --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@@ -2030,86 -2062,32 +2062,32 @@@ static int mpage_da_submit_io(struct mp BUG_ON(!PageLocked(page)); BUG_ON(PageWriteback(page)); - pages_skipped = mpd->wbc->pages_skipped; - err = mapping->a_ops->writepage(page, mpd->wbc); - if (!err && (pages_skipped == mpd->wbc->pages_skipped)) - /* - * have successfully written the page - * without skipping the same - */ - mpd->pages_written++; /* - * In error case, we have to continue because - * remaining pages are still locked - * XXX: unlock and re-dirty them? + * If the page does not have buffers (for + * whatever reason), try to create them using - * block_prepare_write. If this fails, ++ * __block_write_begin. If this fails, + * redirty the page and move on. */ - if (ret == 0) - ret = err; - } - pagevec_release(&pvec); - } - return ret; - } - - /* - * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers - * - * the function goes through all passed space and put actual disk - * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten - */ - static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, - struct ext4_map_blocks *map) - { - struct inode *inode = mpd->inode; - struct address_space *mapping = inode->i_mapping; - int blocks = map->m_len; - sector_t pblock = map->m_pblk, cur_logical; - struct buffer_head *head, *bh; - pgoff_t index, end; - struct pagevec pvec; - int nr_pages, i; - - index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits); - end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits); - cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - - pagevec_init(&pvec, 0); - - while (index <= end) { - /* XXX: optimize tail */ - nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); - if (nr_pages == 0) - break; - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - - index = page->index; - if (index > end) - break; - index++; - - BUG_ON(!PageLocked(page)); - BUG_ON(PageWriteback(page)); - BUG_ON(!page_has_buffers(page)); - - bh = page_buffers(page); - head = bh; - - /* skip blocks out of the range */ - do { - if (cur_logical >= map->m_lblk) - break; - cur_logical++; - } while ((bh = bh->b_this_page) != head); + if (!page_has_buffers(page)) { - if (block_prepare_write(page, 0, len, ++ if (__block_write_begin(page, 0, len, + noalloc_get_block_write)) { + redirty_page: + redirty_page_for_writepage(mpd->wbc, + page); + unlock_page(page); + continue; + } + commit_write = 1; + } + bh = page_bufs = page_buffers(page); + block_start = 0; do { - if (cur_logical >= map->m_lblk + blocks) - break; - - if (buffer_delay(bh) || buffer_unwritten(bh)) { - - BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); - + if (!bh) + goto redirty_page; + if (map && (cur_logical >= map->m_lblk) && + (cur_logical <= (map->m_lblk + + (map->m_len - 1)))) { if (buffer_delay(bh)) { clear_buffer_delay(bh); bh->b_blocknr = pblock; @@@ -2712,18 -2714,15 +2713,15 @@@ static int ext4_writepage(struct page * else len = PAGE_CACHE_SIZE; - if (page_has_buffers(page)) { - page_bufs = page_buffers(page); - if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, - ext4_bh_delay_or_unwritten)) { - /* - * We don't want to do block allocation - * So redirty the page and return - * We may reach here when we do a journal commit - * via journal_submit_inode_data_buffers. - * If we don't have mapping block we just ignore - * them. We can also reach here via shrink_page_list - */ + /* + * If the page does not have buffers (for whatever reason), - * try to create them using block_prepare_write. If this ++ * try to create them using __block_write_begin. If this + * fails, redirty the page and move on. + */ + if (!page_buffers(page)) { - if (block_prepare_write(page, 0, len, ++ if (__block_write_begin(page, 0, len, + noalloc_get_block_write)) { + redirty_page: redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; diff --cc fs/ext4/mballoc.c index 42f77b1dc72d,328ea9cec57b..c58eba34724a --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@@ -2567,8 -2613,8 +2614,8 @@@ static inline int ext4_issue_discard(st discard_block = block + ext4_group_first_block_no(sb, block_group); trace_ext4_discard_blocks(sb, (unsigned long long) discard_block, count); - ret = sb_issue_discard(sb, discard_block, count); + ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); - if (ret == EOPNOTSUPP) { + if (ret == -EOPNOTSUPP) { ext4_warning(sb, "discard not supported, disabling"); clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); } diff --cc fs/ext4/resize.c index ca5c8aa00a2f,f398474e2784..dc963929de65 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@@ -226,23 -226,13 +226,13 @@@ static int setup_new_group_blocks(struc } /* Zero out all of the reserved backup group descriptor table blocks */ - for (i = 0, bit = gdblocks + 1, block = start + bit; - i < reserved_gdb; i++, block++, bit++) { - struct buffer_head *gdb; - - ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit); - - if ((err = extend_or_restart_transaction(handle, 1, bh))) - goto exit_bh; + ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", + block, sbi->s_itb_per_group); + err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, - GFP_NOFS, BLKDEV_IFL_WAIT); ++ GFP_NOFS); + if (err) + goto exit_bh; - if (IS_ERR(gdb = bclean(handle, sb, block))) { - err = PTR_ERR(gdb); - goto exit_bh; - } - ext4_handle_dirty_metadata(handle, NULL, gdb); - ext4_set_bit(bit, bh->b_data); - brelse(gdb); - } ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, input->block_bitmap - start); ext4_set_bit(input->block_bitmap - start, bh->b_data); @@@ -251,23 -241,13 +241,12 @@@ ext4_set_bit(input->inode_bitmap - start, bh->b_data); /* Zero out all of the inode table blocks */ - for (i = 0, block = input->inode_table, bit = block - start; - i < sbi->s_itb_per_group; i++, bit++, block++) { - struct buffer_head *it; - - ext4_debug("clear inode block %#04llx (+%d)\n", block, bit); - - if ((err = extend_or_restart_transaction(handle, 1, bh))) - goto exit_bh; - - if (IS_ERR(it = bclean(handle, sb, block))) { - err = PTR_ERR(it); - goto exit_bh; - } - ext4_handle_dirty_metadata(handle, NULL, it); - brelse(it); - ext4_set_bit(bit, bh->b_data); - } + block = input->inode_table; + ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", + block, sbi->s_itb_per_group); - err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, - GFP_NOFS, BLKDEV_IFL_WAIT); ++ err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); + if (err) + goto exit_bh; if ((err = extend_or_restart_transaction(handle, 2, bh))) goto exit_bh; diff --cc include/linux/blkdev.h index 646b462d04df,e5cb4d029689..5027a599077d --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@@ -883,14 -932,23 +883,22 @@@ extern int blkdev_issue_flush(struct bl extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -static inline int sb_issue_discard(struct super_block *sb, - sector_t block, sector_t nr_blocks) + sector_t nr_sects, gfp_t gfp_mask); +static inline int sb_issue_discard(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - block <<= (sb->s_blocksize_bits - 9); - nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); } + static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, - sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) ++ sector_t nr_blocks, gfp_t gfp_mask) + { + return blkdev_issue_zeroout(sb->s_bdev, + block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), - gfp_mask, flags); ++ gfp_mask); + } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); diff --cc include/linux/fs.h index b2a6009cba10,7008268e9b5a..6ed7ace74b7c --- a/include/linux/fs.h +++ b/include/linux/fs.h @@@ -32,11 -32,17 +32,17 @@@ #define SEEK_END 2 /* seek relative to end of file */ #define SEEK_MAX SEEK_END + struct fstrim_range { + uint64_t start; + uint64_t len; + uint64_t minlen; + }; + /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { - int nr_files; /* read only */ - int nr_free_files; /* read only */ - int max_files; /* tunable */ + unsigned long nr_files; /* read only */ + unsigned long nr_free_files; /* read only */ + unsigned long max_files; /* tunable */ }; struct inodes_stat_t { diff --cc include/trace/events/ext4.h index 6bcb00645de4,8f59db107bbb..289010d3270b --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@@ -228,7 -245,7 +245,6 @@@ TRACE_EVENT(ext4_da_writepages __field( long, pages_skipped ) __field( loff_t, range_start ) __field( loff_t, range_end ) -- __field( char, nonblocking ) __field( char, for_kupdate ) __field( char, for_reclaim ) __field( char, range_cyclic ) @@@ -248,14 -267,11 +265,14 @@@ __entry->writeback_index = inode->i_mapping->writeback_index; ), - TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld " - TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu", ++ TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld " + "range_start %llu range_end %llu " + "for_kupdate %d for_reclaim %d " + "range_cyclic %d writeback_index %lu", - jbd2_dev_to_name(__entry->dev), + __entry->dev_major, __entry->dev_minor, (unsigned long) __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, - __entry->range_end, __entry->nonblocking, + __entry->range_end, __entry->for_kupdate, __entry->for_reclaim, __entry->range_cyclic, (unsigned long) __entry->writeback_index)