Merge branch 'next' into upstream-merge
authorTheodore Ts'o <tytso@mit.edu>
Thu, 28 Oct 2010 03:44:47 +0000 (23:44 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 28 Oct 2010 03:44:47 +0000 (23:44 -0400)
Conflicts:
fs/ext4/inode.c
fs/ext4/mballoc.c
include/trace/events/ext4.h

15 files changed:
1  2 
fs/ext4/extents.c
fs/ext4/fsync.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/namei.c
fs/ext4/resize.c
fs/ext4/super.c
fs/jbd2/checkpoint.c
fs/jbd2/commit.c
fs/jbd2/journal.c
include/linux/blkdev.h
include/linux/fs.h
include/linux/writeback.h
include/trace/events/ext4.h

@@@ -2538,74 -2491,19 +2491,18 @@@ void ext4_ext_release(struct super_bloc
  /* FIXME!! we need to try to merge to left or right after zero-out  */
  static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
  {
+       ext4_fsblk_t ee_pblock;
+       unsigned int ee_len;
        int ret;
-       struct bio *bio;
-       int blkbits, blocksize;
-       sector_t ee_pblock;
-       struct completion event;
-       unsigned int ee_len, len, done, offset;
  
-       blkbits   = inode->i_blkbits;
-       blocksize = inode->i_sb->s_blocksize;
        ee_len    = ext4_ext_get_actual_len(ex);
-       ee_pblock = ext_pblock(ex);
-       /* convert ee_pblock to 512 byte sectors */
-       ee_pblock = ee_pblock << (blkbits - 9);
-       while (ee_len > 0) {
-               if (ee_len > BIO_MAX_PAGES)
-                       len = BIO_MAX_PAGES;
-               else
-                       len = ee_len;
-               bio = bio_alloc(GFP_NOIO, len);
-               if (!bio)
-                       return -ENOMEM;
+       ee_pblock = ext4_ext_pblock(ex);
  
-               bio->bi_sector = ee_pblock;
-               bio->bi_bdev   = inode->i_sb->s_bdev;
 -      ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len,
 -                             GFP_NOFS, BLKDEV_IFL_WAIT);
++      ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
+       if (ret > 0)
+               ret = 0;
  
-               done = 0;
-               offset = 0;
-               while (done < len) {
-                       ret = bio_add_page(bio, ZERO_PAGE(0),
-                                                       blocksize, offset);
-                       if (ret != blocksize) {
-                               /*
-                                * We can't add any more pages because of
-                                * hardware limitations.  Start a new bio.
-                                */
-                               break;
-                       }
-                       done++;
-                       offset += blocksize;
-                       if (offset >= PAGE_CACHE_SIZE)
-                               offset = 0;
-               }
-               init_completion(&event);
-               bio->bi_private = &event;
-               bio->bi_end_io = bi_complete;
-               submit_bio(WRITE, bio);
-               wait_for_completion(&event);
-               if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-                       bio_put(bio);
-                       return -EIO;
-               }
-               bio_put(bio);
-               ee_len    -= done;
-               ee_pblock += done  << (blkbits - 9);
-       }
-       return 0;
+       return ret;
  }
  
  #define EXT4_EXT_ZERO_LEN 7
diff --cc fs/ext4/fsync.c
Simple merge
@@@ -1205,3 -1220,110 +1220,109 @@@ unsigned long ext4_count_dirs(struct su
        }
        return count;
  }
 -      unsigned long flags = BLKDEV_IFL_WAIT;
+ /*
+  * Zeroes not yet zeroed inode table - just write zeroes through the whole
+  * inode table. Must be called without any spinlock held. The only place
+  * where it is called from on active part of filesystem is ext4lazyinit
+  * thread, so we do not need any special locks, however we have to prevent
+  * inode allocation from the current group, so we take alloc_sem lock, to
+  * block ext4_claim_inode until we are finished.
+  */
+ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+                                int barrier)
+ {
+       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_group_desc *gdp = NULL;
+       struct buffer_head *group_desc_bh;
+       handle_t *handle;
+       ext4_fsblk_t blk;
+       int num, ret = 0, used_blks = 0;
 -      if (barrier)
 -              flags |= BLKDEV_IFL_BARRIER;
 -      ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS, flags);
+       /* This should not happen, but just to be sure check this */
+       if (sb->s_flags & MS_RDONLY) {
+               ret = 1;
+               goto out;
+       }
+       gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+       if (!gdp)
+               goto out;
+       /*
+        * We do not need to lock this, because we are the only one
+        * handling this flag.
+        */
+       if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
+               goto out;
+       handle = ext4_journal_start_sb(sb, 1);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               goto out;
+       }
+       down_write(&grp->alloc_sem);
+       /*
+        * If inode bitmap was already initialized there may be some
+        * used inodes so we need to skip blocks with used inodes in
+        * inode table.
+        */
+       if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
+               used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
+                           ext4_itable_unused_count(sb, gdp)),
+                           sbi->s_inodes_per_block);
+       if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+               ext4_error(sb, "Something is wrong with group %u\n"
+                          "Used itable blocks: %d"
+                          "itable unused count: %u\n",
+                          group, used_blks,
+                          ext4_itable_unused_count(sb, gdp));
+               ret = 1;
+               goto out;
+       }
+       blk = ext4_inode_table(sb, gdp) + used_blks;
+       num = sbi->s_itb_per_group - used_blks;
+       BUFFER_TRACE(group_desc_bh, "get_write_access");
+       ret = ext4_journal_get_write_access(handle,
+                                           group_desc_bh);
+       if (ret)
+               goto err_out;
+       /*
+        * Skip zeroout if the inode table is full. But we set the ZEROED
+        * flag anyway, because obviously, when it is full it does not need
+        * further zeroing.
+        */
+       if (unlikely(num == 0))
+               goto skip_zeroout;
+       ext4_debug("going to zero out inode table in group %d\n",
+                  group);
++      ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
+       if (ret < 0)
+               goto err_out;
++      if (barrier)
++              blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
+ skip_zeroout:
+       ext4_lock_group(sb, group);
+       gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
+       gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+       ext4_unlock_group(sb, group);
+       BUFFER_TRACE(group_desc_bh,
+                    "call ext4_handle_dirty_metadata");
+       ret = ext4_handle_dirty_metadata(handle, NULL,
+                                        group_desc_bh);
+ err_out:
+       up_write(&grp->alloc_sem);
+       ext4_journal_stop(handle);
+ out:
+       return ret;
+ }
diff --cc fs/ext4/inode.c
@@@ -2030,86 -2062,32 +2062,32 @@@ static int mpage_da_submit_io(struct mp
                        BUG_ON(!PageLocked(page));
                        BUG_ON(PageWriteback(page));
  
-                       pages_skipped = mpd->wbc->pages_skipped;
-                       err = mapping->a_ops->writepage(page, mpd->wbc);
-                       if (!err && (pages_skipped == mpd->wbc->pages_skipped))
-                               /*
-                                * have successfully written the page
-                                * without skipping the same
-                                */
-                               mpd->pages_written++;
                        /*
-                        * In error case, we have to continue because
-                        * remaining pages are still locked
-                        * XXX: unlock and re-dirty them?
+                        * If the page does not have buffers (for
+                        * whatever reason), try to create them using
 -                       * block_prepare_write.  If this fails,
++                       * __block_write_begin.  If this fails,
+                        * redirty the page and move on.
                         */
-                       if (ret == 0)
-                               ret = err;
-               }
-               pagevec_release(&pvec);
-       }
-       return ret;
- }
- /*
-  * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
-  *
-  * the function goes through all passed space and put actual disk
-  * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
-  */
- static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
-                                struct ext4_map_blocks *map)
- {
-       struct inode *inode = mpd->inode;
-       struct address_space *mapping = inode->i_mapping;
-       int blocks = map->m_len;
-       sector_t pblock = map->m_pblk, cur_logical;
-       struct buffer_head *head, *bh;
-       pgoff_t index, end;
-       struct pagevec pvec;
-       int nr_pages, i;
-       index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       pagevec_init(&pvec, 0);
-       while (index <= end) {
-               /* XXX: optimize tail */
-               nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
-               if (nr_pages == 0)
-                       break;
-               for (i = 0; i < nr_pages; i++) {
-                       struct page *page = pvec.pages[i];
-                       index = page->index;
-                       if (index > end)
-                               break;
-                       index++;
-                       BUG_ON(!PageLocked(page));
-                       BUG_ON(PageWriteback(page));
-                       BUG_ON(!page_has_buffers(page));
-                       bh = page_buffers(page);
-                       head = bh;
-                       /* skip blocks out of the range */
-                       do {
-                               if (cur_logical >= map->m_lblk)
-                                       break;
-                               cur_logical++;
-                       } while ((bh = bh->b_this_page) != head);
+                       if (!page_has_buffers(page)) {
 -                              if (block_prepare_write(page, 0, len,
++                              if (__block_write_begin(page, 0, len,
+                                               noalloc_get_block_write)) {
+                               redirty_page:
+                                       redirty_page_for_writepage(mpd->wbc,
+                                                                  page);
+                                       unlock_page(page);
+                                       continue;
+                               }
+                               commit_write = 1;
+                       }
  
+                       bh = page_bufs = page_buffers(page);
+                       block_start = 0;
                        do {
-                               if (cur_logical >= map->m_lblk + blocks)
-                                       break;
-                               if (buffer_delay(bh) || buffer_unwritten(bh)) {
-                                       BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
+                               if (!bh)
+                                       goto redirty_page;
+                               if (map && (cur_logical >= map->m_lblk) &&
+                                   (cur_logical <= (map->m_lblk +
+                                                    (map->m_len - 1)))) {
                                        if (buffer_delay(bh)) {
                                                clear_buffer_delay(bh);
                                                bh->b_blocknr = pblock;
@@@ -2712,18 -2714,15 +2713,15 @@@ static int ext4_writepage(struct page *
        else
                len = PAGE_CACHE_SIZE;
  
-       if (page_has_buffers(page)) {
-               page_bufs = page_buffers(page);
-               if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-                                       ext4_bh_delay_or_unwritten)) {
-                       /*
-                        * We don't want to do  block allocation
-                        * So redirty the page and return
-                        * We may reach here when we do a journal commit
-                        * via journal_submit_inode_data_buffers.
-                        * If we don't have mapping block we just ignore
-                        * them. We can also reach here via shrink_page_list
-                        */
+       /*
+        * If the page does not have buffers (for whatever reason),
 -       * try to create them using block_prepare_write.  If this
++       * try to create them using __block_write_begin.  If this
+        * fails, redirty the page and move on.
+        */
+       if (!page_buffers(page)) {
 -              if (block_prepare_write(page, 0, len,
++              if (__block_write_begin(page, 0, len,
+                                       noalloc_get_block_write)) {
+               redirty_page:
                        redirty_page_for_writepage(wbc, page);
                        unlock_page(page);
                        return 0;
@@@ -2567,8 -2613,8 +2614,8 @@@ static inline int ext4_issue_discard(st
        discard_block = block + ext4_group_first_block_no(sb, block_group);
        trace_ext4_discard_blocks(sb,
                        (unsigned long long) discard_block, count);
 -      ret = sb_issue_discard(sb, discard_block, count);
 +      ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
-       if (ret == EOPNOTSUPP) {
+       if (ret == -EOPNOTSUPP) {
                ext4_warning(sb, "discard not supported, disabling");
                clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
        }
diff --cc fs/ext4/namei.c
Simple merge
@@@ -226,23 -226,13 +226,13 @@@ static int setup_new_group_blocks(struc
        }
  
        /* Zero out all of the reserved backup group descriptor table blocks */
-       for (i = 0, bit = gdblocks + 1, block = start + bit;
-            i < reserved_gdb; i++, block++, bit++) {
-               struct buffer_head *gdb;
-               ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
-               if ((err = extend_or_restart_transaction(handle, 1, bh)))
-                       goto exit_bh;
+       ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+                       block, sbi->s_itb_per_group);
+       err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
 -                             GFP_NOFS, BLKDEV_IFL_WAIT);
++                             GFP_NOFS);
+       if (err)
+               goto exit_bh;
  
-               if (IS_ERR(gdb = bclean(handle, sb, block))) {
-                       err = PTR_ERR(gdb);
-                       goto exit_bh;
-               }
-               ext4_handle_dirty_metadata(handle, NULL, gdb);
-               ext4_set_bit(bit, bh->b_data);
-               brelse(gdb);
-       }
        ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
                   input->block_bitmap - start);
        ext4_set_bit(input->block_bitmap - start, bh->b_data);
        ext4_set_bit(input->inode_bitmap - start, bh->b_data);
  
        /* Zero out all of the inode table blocks */
-       for (i = 0, block = input->inode_table, bit = block - start;
-            i < sbi->s_itb_per_group; i++, bit++, block++) {
-               struct buffer_head *it;
-               ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
-               if ((err = extend_or_restart_transaction(handle, 1, bh)))
-                       goto exit_bh;
-               if (IS_ERR(it = bclean(handle, sb, block))) {
-                       err = PTR_ERR(it);
-                       goto exit_bh;
-               }
-               ext4_handle_dirty_metadata(handle, NULL, it);
-               brelse(it);
-               ext4_set_bit(bit, bh->b_data);
-       }
+       block = input->inode_table;
+       ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+                       block, sbi->s_itb_per_group);
 -      err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
 -                             GFP_NOFS, BLKDEV_IFL_WAIT);
++      err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
+       if (err)
+               goto exit_bh;
  
        if ((err = extend_or_restart_transaction(handle, 2, bh)))
                goto exit_bh;
diff --cc fs/ext4/super.c
Simple merge
Simple merge
Simple merge
Simple merge
@@@ -883,14 -932,23 +883,22 @@@ extern int blkdev_issue_flush(struct bl
  extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
  extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 -                      sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
 -static inline int sb_issue_discard(struct super_block *sb,
 -                                 sector_t block, sector_t nr_blocks)
 +                      sector_t nr_sects, gfp_t gfp_mask);
 +static inline int sb_issue_discard(struct super_block *sb, sector_t block,
 +              sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
  {
 -      block <<= (sb->s_blocksize_bits - 9);
 -      nr_blocks <<= (sb->s_blocksize_bits - 9);
 -      return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
 -                                 BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 +      return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9),
 +                                  nr_blocks << (sb->s_blocksize_bits - 9),
 +                                  gfp_mask, flags);
  }
 -              sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
+ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
 -                                  gfp_mask, flags);
++              sector_t nr_blocks, gfp_t gfp_mask)
+ {
+       return blkdev_issue_zeroout(sb->s_bdev,
+                                   block << (sb->s_blocksize_bits - 9),
+                                   nr_blocks << (sb->s_blocksize_bits - 9),
++                                  gfp_mask);
+ }
  
  extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
  
  #define SEEK_END      2       /* seek relative to end of file */
  #define SEEK_MAX      SEEK_END
  
+ struct fstrim_range {
+       uint64_t start;
+       uint64_t len;
+       uint64_t minlen;
+ };
  /* And dynamically-tunable limits and defaults: */
  struct files_stat_struct {
 -      int nr_files;           /* read only */
 -      int nr_free_files;      /* read only */
 -      int max_files;          /* tunable */
 +      unsigned long nr_files;         /* read only */
 +      unsigned long nr_free_files;    /* read only */
 +      unsigned long max_files;                /* tunable */
  };
  
  struct inodes_stat_t {
Simple merge
@@@ -228,7 -245,7 +245,6 @@@ TRACE_EVENT(ext4_da_writepages
                __field(        long,   pages_skipped           )
                __field(        loff_t, range_start             )
                __field(        loff_t, range_end               )
--              __field(        char,   nonblocking             )
                __field(        char,   for_kupdate             )
                __field(        char,   for_reclaim             )
                __field(        char,   range_cyclic            )
                __entry->writeback_index = inode->i_mapping->writeback_index;
        ),
  
-       TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld "
 -      TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
++      TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
 +                "range_start %llu range_end %llu "
 +                "for_kupdate %d for_reclaim %d "
 +                "range_cyclic %d writeback_index %lu",
-                 jbd2_dev_to_name(__entry->dev),
+                 __entry->dev_major, __entry->dev_minor,
                  (unsigned long) __entry->ino, __entry->nr_to_write,
                  __entry->pages_skipped, __entry->range_start,
 -                __entry->range_end, __entry->nonblocking,
 +                __entry->range_end,
                  __entry->for_kupdate, __entry->for_reclaim,
                  __entry->range_cyclic,
                  (unsigned long) __entry->writeback_index)