From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 28 Oct 2010 03:44:47 +0000 (-0400)
Subject: Merge branch 'next' into upstream-merge
X-Git-Tag: v2.6.37-rc1~76^2
X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=commitdiff_plain;h=a107e5a3a473a2ea62bd5af24e11b84adf1486ff

Merge branch 'next' into upstream-merge

Conflicts:
	fs/ext4/inode.c
	fs/ext4/mballoc.c
	include/trace/events/ext4.h
---

a107e5a3a473a2ea62bd5af24e11b84adf1486ff
diff --cc fs/ext4/extents.c
index 06328d3e5717,a17a676a3106..0554c48cb1fd
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@@ -2538,74 -2491,19 +2491,18 @@@ void ext4_ext_release(struct super_bloc
  /* FIXME!! we need to try to merge to left or right after zero-out  */
  static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
  {
+ 	ext4_fsblk_t ee_pblock;
+ 	unsigned int ee_len;
  	int ret;
- 	struct bio *bio;
- 	int blkbits, blocksize;
- 	sector_t ee_pblock;
- 	struct completion event;
- 	unsigned int ee_len, len, done, offset;
  
- 
- 	blkbits   = inode->i_blkbits;
- 	blocksize = inode->i_sb->s_blocksize;
  	ee_len    = ext4_ext_get_actual_len(ex);
- 	ee_pblock = ext_pblock(ex);
- 
- 	/* convert ee_pblock to 512 byte sectors */
- 	ee_pblock = ee_pblock << (blkbits - 9);
- 
- 	while (ee_len > 0) {
- 
- 		if (ee_len > BIO_MAX_PAGES)
- 			len = BIO_MAX_PAGES;
- 		else
- 			len = ee_len;
- 
- 		bio = bio_alloc(GFP_NOIO, len);
- 		if (!bio)
- 			return -ENOMEM;
+ 	ee_pblock = ext4_ext_pblock(ex);
  
- 		bio->bi_sector = ee_pblock;
- 		bio->bi_bdev   = inode->i_sb->s_bdev;
 -	ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len,
 -			       GFP_NOFS, BLKDEV_IFL_WAIT);
++	ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
+ 	if (ret > 0)
+ 		ret = 0;
  
- 		done = 0;
- 		offset = 0;
- 		while (done < len) {
- 			ret = bio_add_page(bio, ZERO_PAGE(0),
- 							blocksize, offset);
- 			if (ret != blocksize) {
- 				/*
- 				 * We can't add any more pages because of
- 				 * hardware limitations.  Start a new bio.
- 				 */
- 				break;
- 			}
- 			done++;
- 			offset += blocksize;
- 			if (offset >= PAGE_CACHE_SIZE)
- 				offset = 0;
- 		}
- 
- 		init_completion(&event);
- 		bio->bi_private = &event;
- 		bio->bi_end_io = bi_complete;
- 		submit_bio(WRITE, bio);
- 		wait_for_completion(&event);
- 
- 		if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
- 			bio_put(bio);
- 			return -EIO;
- 		}
- 		bio_put(bio);
- 		ee_len    -= done;
- 		ee_pblock += done  << (blkbits - 9);
- 	}
- 	return 0;
+ 	return ret;
  }
  
  #define EXT4_EXT_ZERO_LEN 7
diff --cc fs/ext4/ialloc.c
index 45853e0d1f21,509f429f71e8..1ce240a23ebb
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@@ -1205,3 -1220,110 +1220,109 @@@ unsigned long ext4_count_dirs(struct su
  	}
  	return count;
  }
+ 
+ /*
+  * Zeroes not yet zeroed inode table - just write zeroes through the whole
+  * inode table. Must be called without any spinlock held. The only place
+  * where it is called from on active part of filesystem is ext4lazyinit
+  * thread, so we do not need any special locks, however we have to prevent
+  * inode allocation from the current group, so we take alloc_sem lock, to
+  * block ext4_claim_inode until we are finished.
+  */
+ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+ 				 int barrier)
+ {
+ 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 	struct ext4_group_desc *gdp = NULL;
+ 	struct buffer_head *group_desc_bh;
+ 	handle_t *handle;
+ 	ext4_fsblk_t blk;
+ 	int num, ret = 0, used_blks = 0;
 -	unsigned long flags = BLKDEV_IFL_WAIT;
+ 
+ 	/* This should not happen, but just to be sure check this */
+ 	if (sb->s_flags & MS_RDONLY) {
+ 		ret = 1;
+ 		goto out;
+ 	}
+ 
+ 	gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+ 	if (!gdp)
+ 		goto out;
+ 
+ 	/*
+ 	 * We do not need to lock this, because we are the only one
+ 	 * handling this flag.
+ 	 */
+ 	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
+ 		goto out;
+ 
+ 	handle = ext4_journal_start_sb(sb, 1);
+ 	if (IS_ERR(handle)) {
+ 		ret = PTR_ERR(handle);
+ 		goto out;
+ 	}
+ 
+ 	down_write(&grp->alloc_sem);
+ 	/*
+ 	 * If inode bitmap was already initialized there may be some
+ 	 * used inodes so we need to skip blocks with used inodes in
+ 	 * inode table.
+ 	 */
+ 	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
+ 		used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
+ 			    ext4_itable_unused_count(sb, gdp)),
+ 			    sbi->s_inodes_per_block);
+ 
+ 	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+ 		ext4_error(sb, "Something is wrong with group %u\n"
+ 			   "Used itable blocks: %d"
+ 			   "itable unused count: %u\n",
+ 			   group, used_blks,
+ 			   ext4_itable_unused_count(sb, gdp));
+ 		ret = 1;
+ 		goto out;
+ 	}
+ 
+ 	blk = ext4_inode_table(sb, gdp) + used_blks;
+ 	num = sbi->s_itb_per_group - used_blks;
+ 
+ 	BUFFER_TRACE(group_desc_bh, "get_write_access");
+ 	ret = ext4_journal_get_write_access(handle,
+ 					    group_desc_bh);
+ 	if (ret)
+ 		goto err_out;
+ 
+ 	/*
+ 	 * Skip zeroout if the inode table is full. But we set the ZEROED
+ 	 * flag anyway, because obviously, when it is full it does not need
+ 	 * further zeroing.
+ 	 */
+ 	if (unlikely(num == 0))
+ 		goto skip_zeroout;
+ 
+ 	ext4_debug("going to zero out inode table in group %d\n",
+ 		   group);
 -	if (barrier)
 -		flags |= BLKDEV_IFL_BARRIER;
 -	ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS, flags);
++	ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
+ 	if (ret < 0)
+ 		goto err_out;
++	if (barrier)
++		blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
+ 
+ skip_zeroout:
+ 	ext4_lock_group(sb, group);
+ 	gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
+ 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+ 	ext4_unlock_group(sb, group);
+ 
+ 	BUFFER_TRACE(group_desc_bh,
+ 		     "call ext4_handle_dirty_metadata");
+ 	ret = ext4_handle_dirty_metadata(handle, NULL,
+ 					 group_desc_bh);
+ 
+ err_out:
+ 	up_write(&grp->alloc_sem);
+ 	ext4_journal_stop(handle);
+ out:
+ 	return ret;
+ }
diff --cc fs/ext4/inode.c
index 49635ef236f8,3ba237b0b2aa..2d6c6c8c036d
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@@ -2030,86 -2062,32 +2062,32 @@@ static int mpage_da_submit_io(struct mp
  			BUG_ON(!PageLocked(page));
  			BUG_ON(PageWriteback(page));
  
- 			pages_skipped = mpd->wbc->pages_skipped;
- 			err = mapping->a_ops->writepage(page, mpd->wbc);
- 			if (!err && (pages_skipped == mpd->wbc->pages_skipped))
- 				/*
- 				 * have successfully written the page
- 				 * without skipping the same
- 				 */
- 				mpd->pages_written++;
  			/*
- 			 * In error case, we have to continue because
- 			 * remaining pages are still locked
- 			 * XXX: unlock and re-dirty them?
+ 			 * If the page does not have buffers (for
+ 			 * whatever reason), try to create them using
 -			 * block_prepare_write.  If this fails,
++			 * __block_write_begin.  If this fails,
+ 			 * redirty the page and move on.
  			 */
- 			if (ret == 0)
- 				ret = err;
- 		}
- 		pagevec_release(&pvec);
- 	}
- 	return ret;
- }
- 
- /*
-  * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
-  *
-  * the function goes through all passed space and put actual disk
-  * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
-  */
- static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
- 				 struct ext4_map_blocks *map)
- {
- 	struct inode *inode = mpd->inode;
- 	struct address_space *mapping = inode->i_mapping;
- 	int blocks = map->m_len;
- 	sector_t pblock = map->m_pblk, cur_logical;
- 	struct buffer_head *head, *bh;
- 	pgoff_t index, end;
- 	struct pagevec pvec;
- 	int nr_pages, i;
- 
- 	index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- 	end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- 	cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- 
- 	pagevec_init(&pvec, 0);
- 
- 	while (index <= end) {
- 		/* XXX: optimize tail */
- 		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- 		if (nr_pages == 0)
- 			break;
- 		for (i = 0; i < nr_pages; i++) {
- 			struct page *page = pvec.pages[i];
- 
- 			index = page->index;
- 			if (index > end)
- 				break;
- 			index++;
- 
- 			BUG_ON(!PageLocked(page));
- 			BUG_ON(PageWriteback(page));
- 			BUG_ON(!page_has_buffers(page));
- 
- 			bh = page_buffers(page);
- 			head = bh;
- 
- 			/* skip blocks out of the range */
- 			do {
- 				if (cur_logical >= map->m_lblk)
- 					break;
- 				cur_logical++;
- 			} while ((bh = bh->b_this_page) != head);
+ 			if (!page_has_buffers(page)) {
 -				if (block_prepare_write(page, 0, len,
++				if (__block_write_begin(page, 0, len,
+ 						noalloc_get_block_write)) {
+ 				redirty_page:
+ 					redirty_page_for_writepage(mpd->wbc,
+ 								   page);
+ 					unlock_page(page);
+ 					continue;
+ 				}
+ 				commit_write = 1;
+ 			}
  
+ 			bh = page_bufs = page_buffers(page);
+ 			block_start = 0;
  			do {
- 				if (cur_logical >= map->m_lblk + blocks)
- 					break;
- 
- 				if (buffer_delay(bh) || buffer_unwritten(bh)) {
- 
- 					BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
- 
+ 				if (!bh)
+ 					goto redirty_page;
+ 				if (map && (cur_logical >= map->m_lblk) &&
+ 				    (cur_logical <= (map->m_lblk +
+ 						     (map->m_len - 1)))) {
  					if (buffer_delay(bh)) {
  						clear_buffer_delay(bh);
  						bh->b_blocknr = pblock;
@@@ -2712,18 -2714,15 +2713,15 @@@ static int ext4_writepage(struct page *
  	else
  		len = PAGE_CACHE_SIZE;
  
- 	if (page_has_buffers(page)) {
- 		page_bufs = page_buffers(page);
- 		if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- 					ext4_bh_delay_or_unwritten)) {
- 			/*
- 			 * We don't want to do  block allocation
- 			 * So redirty the page and return
- 			 * We may reach here when we do a journal commit
- 			 * via journal_submit_inode_data_buffers.
- 			 * If we don't have mapping block we just ignore
- 			 * them. We can also reach here via shrink_page_list
- 			 */
+ 	/*
+ 	 * If the page does not have buffers (for whatever reason),
 -	 * try to create them using block_prepare_write.  If this
++	 * try to create them using __block_write_begin.  If this
+ 	 * fails, redirty the page and move on.
+ 	 */
+ 	if (!page_buffers(page)) {
 -		if (block_prepare_write(page, 0, len,
++		if (__block_write_begin(page, 0, len,
+ 					noalloc_get_block_write)) {
+ 		redirty_page:
  			redirty_page_for_writepage(wbc, page);
  			unlock_page(page);
  			return 0;
diff --cc fs/ext4/mballoc.c
index 42f77b1dc72d,328ea9cec57b..c58eba34724a
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@@ -2567,8 -2613,8 +2614,8 @@@ static inline int ext4_issue_discard(st
  	discard_block = block + ext4_group_first_block_no(sb, block_group);
  	trace_ext4_discard_blocks(sb,
  			(unsigned long long) discard_block, count);
 -	ret = sb_issue_discard(sb, discard_block, count);
 +	ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
- 	if (ret == EOPNOTSUPP) {
+ 	if (ret == -EOPNOTSUPP) {
  		ext4_warning(sb, "discard not supported, disabling");
  		clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
  	}
diff --cc fs/ext4/resize.c
index ca5c8aa00a2f,f398474e2784..dc963929de65
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@@ -226,23 -226,13 +226,13 @@@ static int setup_new_group_blocks(struc
  	}
  
  	/* Zero out all of the reserved backup group descriptor table blocks */
- 	for (i = 0, bit = gdblocks + 1, block = start + bit;
- 	     i < reserved_gdb; i++, block++, bit++) {
- 		struct buffer_head *gdb;
- 
- 		ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
- 
- 		if ((err = extend_or_restart_transaction(handle, 1, bh)))
- 			goto exit_bh;
+ 	ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ 			block, sbi->s_itb_per_group);
+ 	err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
 -			       GFP_NOFS, BLKDEV_IFL_WAIT);
++			       GFP_NOFS);
+ 	if (err)
+ 		goto exit_bh;
  
- 		if (IS_ERR(gdb = bclean(handle, sb, block))) {
- 			err = PTR_ERR(gdb);
- 			goto exit_bh;
- 		}
- 		ext4_handle_dirty_metadata(handle, NULL, gdb);
- 		ext4_set_bit(bit, bh->b_data);
- 		brelse(gdb);
- 	}
  	ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
  		   input->block_bitmap - start);
  	ext4_set_bit(input->block_bitmap - start, bh->b_data);
@@@ -251,23 -241,13 +241,12 @@@
  	ext4_set_bit(input->inode_bitmap - start, bh->b_data);
  
  	/* Zero out all of the inode table blocks */
- 	for (i = 0, block = input->inode_table, bit = block - start;
- 	     i < sbi->s_itb_per_group; i++, bit++, block++) {
- 		struct buffer_head *it;
- 
- 		ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
- 
- 		if ((err = extend_or_restart_transaction(handle, 1, bh)))
- 			goto exit_bh;
- 
- 		if (IS_ERR(it = bclean(handle, sb, block))) {
- 			err = PTR_ERR(it);
- 			goto exit_bh;
- 		}
- 		ext4_handle_dirty_metadata(handle, NULL, it);
- 		brelse(it);
- 		ext4_set_bit(bit, bh->b_data);
- 	}
+ 	block = input->inode_table;
+ 	ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ 			block, sbi->s_itb_per_group);
 -	err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
 -			       GFP_NOFS, BLKDEV_IFL_WAIT);
++	err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
+ 	if (err)
+ 		goto exit_bh;
  
  	if ((err = extend_or_restart_transaction(handle, 2, bh)))
  		goto exit_bh;
diff --cc include/linux/blkdev.h
index 646b462d04df,e5cb4d029689..5027a599077d
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@@ -883,14 -932,23 +883,22 @@@ extern int blkdev_issue_flush(struct bl
  extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
  		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
  extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 -			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 -static inline int sb_issue_discard(struct super_block *sb,
 -				   sector_t block, sector_t nr_blocks)
 +			sector_t nr_sects, gfp_t gfp_mask);
 +static inline int sb_issue_discard(struct super_block *sb, sector_t block,
 +		sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
  {
 -	block <<= (sb->s_blocksize_bits - 9);
 -	nr_blocks <<= (sb->s_blocksize_bits - 9);
 -	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
 -				   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 +	return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9),
 +				    nr_blocks << (sb->s_blocksize_bits - 9),
 +				    gfp_mask, flags);
  }
+ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
 -		sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
++		sector_t nr_blocks, gfp_t gfp_mask)
+ {
+ 	return blkdev_issue_zeroout(sb->s_bdev,
+ 				    block << (sb->s_blocksize_bits - 9),
+ 				    nr_blocks << (sb->s_blocksize_bits - 9),
 -				    gfp_mask, flags);
++				    gfp_mask);
+ }
  
  extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
  
diff --cc include/linux/fs.h
index b2a6009cba10,7008268e9b5a..6ed7ace74b7c
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@@ -32,11 -32,17 +32,17 @@@
  #define SEEK_END	2	/* seek relative to end of file */
  #define SEEK_MAX	SEEK_END
  
+ struct fstrim_range {
+ 	uint64_t start;
+ 	uint64_t len;
+ 	uint64_t minlen;
+ };
+ 
  /* And dynamically-tunable limits and defaults: */
  struct files_stat_struct {
 -	int nr_files;		/* read only */
 -	int nr_free_files;	/* read only */
 -	int max_files;		/* tunable */
 +	unsigned long nr_files;		/* read only */
 +	unsigned long nr_free_files;	/* read only */
 +	unsigned long max_files;		/* tunable */
  };
  
  struct inodes_stat_t {
diff --cc include/trace/events/ext4.h
index 6bcb00645de4,8f59db107bbb..289010d3270b
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@@ -228,7 -245,7 +245,6 @@@ TRACE_EVENT(ext4_da_writepages
  		__field(	long,	pages_skipped		)
  		__field(	loff_t,	range_start		)
  		__field(	loff_t,	range_end		)
--		__field(	char,	nonblocking		)
  		__field(	char,	for_kupdate		)
  		__field(	char,	for_reclaim		)
  		__field(	char,	range_cyclic		)
@@@ -248,14 -267,11 +265,14 @@@
  		__entry->writeback_index = inode->i_mapping->writeback_index;
  	),
  
- 	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld "
 -	TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
++	TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
 +		  "range_start %llu range_end %llu "
 +		  "for_kupdate %d for_reclaim %d "
 +		  "range_cyclic %d writeback_index %lu",
- 		  jbd2_dev_to_name(__entry->dev),
+ 		  __entry->dev_major, __entry->dev_minor,
  		  (unsigned long) __entry->ino, __entry->nr_to_write,
  		  __entry->pages_skipped, __entry->range_start,
 -		  __entry->range_end, __entry->nonblocking,
 +		  __entry->range_end,
  		  __entry->for_kupdate, __entry->for_reclaim,
  		  __entry->range_cyclic,
  		  (unsigned long) __entry->writeback_index)