ext4: inline ext4_writepage() into mpage_da_submit_io()

[pandora-kernel.git] / fs / ext4 / inode.c
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index d6a7701..97a0c35 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -60,6 +60,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
  }
  
  static void ext4_invalidatepage(struct page *page, unsigned long offset);
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
+                                  struct buffer_head *bh_result, int create);
+static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
+static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
+static int __ext4_journalled_writepage(struct page *page, unsigned int len);
+static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
  
  /*
   * Test whether an inode is a fast symlink.
@@ -167,11 +173,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
  /*
   * Called at the last iput() if i_nlink is zero.
   */
-void ext4_delete_inode(struct inode *inode)
+void ext4_evict_inode(struct inode *inode)
  {
         handle_t *handle;
         int err;
  
+       if (inode->i_nlink) {
+               truncate_inode_pages(&inode->i_data, 0);
+               goto no_delete;
+       }
+
         if (!is_bad_inode(inode))
                 dquot_initialize(inode);
  
@@ -221,6 +232,7 @@ void ext4_delete_inode(struct inode *inode)
                                      "couldn't extend journal (err %d)", err);
                 stop_handle:
                         ext4_journal_stop(handle);
+                       ext4_orphan_del(NULL, inode);
                         goto no_delete;
                 }
         }
@@ -245,13 +257,13 @@ void ext4_delete_inode(struct inode *inode)
          */
         if (ext4_mark_inode_dirty(handle, inode))
                 /* If that failed, just do the required in-core inode clear. */
-               clear_inode(inode);
+               ext4_clear_inode(inode);
         else
                 ext4_free_inode(handle, inode);
         ext4_journal_stop(handle);
         return;
  no_delete:
-       clear_inode(inode);     /* We must guarantee clearing of inode... */
+       ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
  }
  
  typedef struct {
@@ -337,9 +349,11 @@ static int ext4_block_to_path(struct inode *inode,
         return n;
  }
  
-static int __ext4_check_blockref(const char *function, struct inode *inode,
+static int __ext4_check_blockref(const char *function, unsigned int line,
+                                struct inode *inode,
                                  __le32 *p, unsigned int max)
  {
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
         __le32 *bref = p;
         unsigned int blk;
  
@@ -348,8 +362,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
                 if (blk &&
                     unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                     blk, 1))) {
-                       ext4_error_inode(function, inode,
-                                        "invalid block reference %u", blk);
+                       es->s_last_error_block = cpu_to_le64(blk);
+                       ext4_error_inode(inode, function, line, blk,
+                                        "invalid block");
                         return -EIO;
                 }
         }
@@ -358,11 +373,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
  
  
  #define ext4_check_indirect_blockref(inode, bh)                         \
-       __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data,  \
+       __ext4_check_blockref(__func__, __LINE__, inode,                \
+                             (__le32 *)(bh)->b_data,                   \
                               EXT4_ADDR_PER_BLOCK((inode)->i_sb))
  
  #define ext4_check_inode_blockref(inode)                                \
-       __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data,   \
+       __ext4_check_blockref(__func__, __LINE__, inode,                \
+                             EXT4_I(inode)->i_data,                    \
                               EXT4_NDIR_BLOCKS)
  
  /**
@@ -1128,20 +1145,24 @@ void ext4_da_update_reserve_space(struct inode *inode,
                 ext4_discard_preallocations(inode);
  }
  
-static int check_block_validity(struct inode *inode, const char *func,
+static int __check_block_validity(struct inode *inode, const char *func,
+                               unsigned int line,
                                 struct ext4_map_blocks *map)
  {
         if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
                                    map->m_len)) {
-               ext4_error_inode(func, inode,
-                          "lblock %lu mapped to illegal pblock %llu "
-                          "(length %d)", (unsigned long) map->m_lblk,
-                                map->m_pblk, map->m_len);
+               ext4_error_inode(inode, func, line, map->m_pblk,
+                                "lblock %lu mapped to illegal pblock "
+                                "(length %d)", (unsigned long) map->m_lblk,
+                                map->m_len);
                 return -EIO;
         }
         return 0;
  }
  
+#define check_block_validity(inode, map)       \
+       __check_block_validity((inode), __func__, __LINE__, (map))
+
  /*
   * Return the number of contiguous dirty pages in a given inode
   * starting at page frame idx.
@@ -1192,8 +1213,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
                                 break;
                         idx++;
                         num++;
-                       if (num >= max_pages)
+                       if (num >= max_pages) {
+                               done = 1;
                                 break;
+                       }
                 }
                 pagevec_release(&pvec);
         }
@@ -1244,7 +1267,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         up_read((&EXT4_I(inode)->i_data_sem));
  
         if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-               int ret = check_block_validity(inode, __func__, map);
+               int ret = check_block_validity(inode, map);
                 if (ret != 0)
                         return ret;
         }
@@ -1324,9 +1347,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
  
         up_write((&EXT4_I(inode)->i_data_sem));
         if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-               int ret = check_block_validity(inode,
-                                              "ext4_map_blocks_after_alloc",
-                                              map);
+               int ret = check_block_validity(inode, map);
                 if (ret != 0)
                         return ret;
         }
@@ -1519,9 +1540,25 @@ static int walk_page_buffers(handle_t *handle,
  static int do_journal_get_write_access(handle_t *handle,
                                        struct buffer_head *bh)
  {
+       int dirty = buffer_dirty(bh);
+       int ret;
+
         if (!buffer_mapped(bh) || buffer_freed(bh))
                 return 0;
-       return ext4_journal_get_write_access(handle, bh);
+       /*
+        * __block_prepare_write() could have dirtied some buffers. Clean
+        * the dirty bit as jbd2_journal_get_write_access() could complain
+        * otherwise about fs integrity issues. Setting of the dirty bit
+        * by __block_prepare_write() isn't a real problem here as we clear
+        * the bit before releasing a page lock and thus writeback cannot
+        * ever write the buffer.
+        */
+       if (dirty)
+               clear_buffer_dirty(bh);
+       ret = ext4_journal_get_write_access(handle, bh);
+       if (!ret && dirty)
+               ret = ext4_handle_dirty_metadata(handle, NULL, bh);
+       return ret;
  }
  
  /*
@@ -1578,11 +1615,9 @@ retry:
         *pagep = page;
  
         if (ext4_should_dioread_nolock(inode))
-               ret = block_write_begin(file, mapping, pos, len, flags, pagep,
-                               fsdata, ext4_get_block_write);
+               ret = __block_write_begin(page, pos, len, ext4_get_block_write);
         else
-               ret = block_write_begin(file, mapping, pos, len, flags, pagep,
-                               fsdata, ext4_get_block);
+               ret = __block_write_begin(page, pos, len, ext4_get_block);
  
         if (!ret && ext4_should_journal_data(inode)) {
                 ret = walk_page_buffers(handle, page_buffers(page),
@@ -1593,7 +1628,7 @@ retry:
                 unlock_page(page);
                 page_cache_release(page);
                 /*
-                * block_write_begin may have instantiated a few blocks
+                * __block_write_begin may have instantiated a few blocks
                  * outside i_size.  Trim these off again. Don't need
                  * i_size_read because we hold i_mutex.
                  *
@@ -1970,12 +2005,15 @@ static void ext4_da_page_release_reservation(struct page *page,
   */
  static int mpage_da_submit_io(struct mpage_da_data *mpd)
  {
-       long pages_skipped;
         struct pagevec pvec;
         unsigned long index, end;
         int ret = 0, err, nr_pages, i;
         struct inode *inode = mpd->inode;
         struct address_space *mapping = inode->i_mapping;
+       loff_t size = i_size_read(inode);
+       unsigned int len;
+       struct buffer_head *page_bufs = NULL;
+       int journal_data = ext4_should_journal_data(inode);
  
         BUG_ON(mpd->next_page <= mpd->first_page);
         /*
@@ -1993,28 +2031,69 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
                 if (nr_pages == 0)
                         break;
                 for (i = 0; i < nr_pages; i++) {
+                       int commit_write = 0;
                         struct page *page = pvec.pages[i];
  
                         index = page->index;
                         if (index > end)
                                 break;
+
+                       if (index == size >> PAGE_CACHE_SHIFT)
+                               len = size & ~PAGE_CACHE_MASK;
+                       else
+                               len = PAGE_CACHE_SIZE;
                         index++;
  
                         BUG_ON(!PageLocked(page));
                         BUG_ON(PageWriteback(page));
  
-                       pages_skipped = mpd->wbc->pages_skipped;
-                       err = mapping->a_ops->writepage(page, mpd->wbc);
-                       if (!err && (pages_skipped == mpd->wbc->pages_skipped))
+                       /*
+                        * If the page does not have buffers (for
+                        * whatever reason), try to create them using
+                        * block_prepare_write.  If this fails,
+                        * redirty the page and move on.
+                        */
+                       if (!page_has_buffers(page)) {
+                               if (block_prepare_write(page, 0, len,
+                                               noalloc_get_block_write)) {
+                               redirty_page:
+                                       redirty_page_for_writepage(mpd->wbc,
+                                                                  page);
+                                       unlock_page(page);
+                                       continue;
+                               }
+                               commit_write = 1;
+                       }
+                       page_bufs = page_buffers(page);
+                       if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+                                             ext4_bh_delay_or_unwritten)) {
                                 /*
-                                * have successfully written the page
-                                * without skipping the same
+                                * We couldn't do block allocation for
+                                * some reason.
                                  */
+                               goto redirty_page;
+                       }
+
+                       if (commit_write)
+                               /* mark the buffer_heads as dirty & uptodate */
+                               block_commit_write(page, 0, len);
+
+                       if (journal_data && PageChecked(page))
+                               err = __ext4_journalled_writepage(page, len);
+                       else if (buffer_uninit(page_bufs)) {
+                               ext4_set_bh_endio(page_bufs, inode);
+                               err = block_write_full_page_endio(page,
+                                       noalloc_get_block_write,
+                                       mpd->wbc, ext4_end_io_buffer_write);
+                       } else
+                               err = block_write_full_page(page,
+                                           noalloc_get_block_write, mpd->wbc);
+
+                       if (!err)
                                 mpd->pages_written++;
                         /*
                          * In error case, we have to continue because
                          * remaining pages are still locked
-                        * XXX: unlock and re-dirty them?
                          */
                         if (ret == 0)
                                 ret = err;
@@ -2076,7 +2155,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
                         } while ((bh = bh->b_this_page) != head);
  
                         do {
-                               if (cur_logical >= map->m_lblk + blocks)
+                               if (cur_logical > map->m_lblk + (blocks - 1))
                                         break;
  
                                 if (buffer_delay(bh) || buffer_unwritten(bh)) {
@@ -2160,14 +2239,15 @@ static void ext4_print_free_blocks(struct inode *inode)
  }
  
  /*
- * mpage_da_map_blocks - go through given space
+ * mpage_da_map_and_submit - go through given space, map them
+ *       if necessary, and then submit them for I/O
   *
   * @mpd - bh describing space
   *
   * The function skips space we know is already mapped to disk blocks.
   *
   */
-static int mpage_da_map_blocks(struct mpage_da_data *mpd)
+static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
  {
         int err, blks, get_blocks_flags;
         struct ext4_map_blocks map;
@@ -2177,24 +2257,20 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         handle_t *handle = NULL;
  
         /*
-        * We consider only non-mapped and non-allocated blocks
-        */
-       if ((mpd->b_state  & (1 << BH_Mapped)) &&
-               !(mpd->b_state & (1 << BH_Delay)) &&
-               !(mpd->b_state & (1 << BH_Unwritten)))
-               return 0;
-
-       /*
-        * If we didn't accumulate anything to write simply return
+        * If the blocks are mapped already, or we couldn't accumulate
+        * any blocks, then proceed immediately to the submission stage.
          */
-       if (!mpd->b_size)
-               return 0;
+       if ((mpd->b_size == 0) ||
+           ((mpd->b_state  & (1 << BH_Mapped)) &&
+            !(mpd->b_state & (1 << BH_Delay)) &&
+            !(mpd->b_state & (1 << BH_Unwritten))))
+               goto submit_io;
  
         handle = ext4_journal_current_handle();
         BUG_ON(!handle);
  
         /*
-        * Call ext4_get_blocks() to allocate any delayed allocation
+        * Call ext4_map_blocks() to allocate any delayed allocation
          * blocks, or to convert an uninitialized extent to be
          * initialized (in the case where we have written into
          * one or more preallocated blocks).
@@ -2203,7 +2279,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
          * indicate that we are on the delayed allocation path.  This
          * affects functions in many different parts of the allocation
          * call path.  This flag exists primarily because we don't
-        * want to change *many* call functions, so ext4_get_blocks()
+        * want to change *many* call functions, so ext4_map_blocks()
          * will set the magic i_delalloc_reserved_flag once the
          * inode's allocation semaphore is taken.
          *
@@ -2221,19 +2297,22 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
  
         blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
         if (blks < 0) {
+               struct super_block *sb = mpd->inode->i_sb;
+
                 err = blks;
                 /*
-                * If get block returns with error we simply
-                * return. Later writepage will redirty the page and
-                * writepages will find the dirty page again
+                * If get block returns EAGAIN or ENOSPC and there
+                * appears to be free blocks we will call
+                * ext4_writepage() for all of the pages which will
+                * just redirty the pages.
                  */
                 if (err == -EAGAIN)
-                       return 0;
+                       goto submit_io;
  
                 if (err == -ENOSPC &&
-                   ext4_count_free_blocks(mpd->inode->i_sb)) {
+                   ext4_count_free_blocks(sb)) {
                         mpd->retval = err;
-                       return 0;
+                       goto submit_io;
                 }
  
                 /*
@@ -2243,21 +2322,22 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                  * writepage and writepages will again try to write
                  * the same.
                  */
-               ext4_msg(mpd->inode->i_sb, KERN_CRIT,
-                        "delayed block allocation failed for inode %lu at "
-                        "logical offset %llu with max blocks %zd with "
-                        "error %d", mpd->inode->i_ino,
-                        (unsigned long long) next,
-                        mpd->b_size >> mpd->inode->i_blkbits, err);
-               printk(KERN_CRIT "This should not happen!!  "
-                      "Data will be lost\n");
-               if (err == -ENOSPC) {
-                       ext4_print_free_blocks(mpd->inode);
+               if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+                       ext4_msg(sb, KERN_CRIT,
+                                "delayed block allocation failed for inode %lu "
+                                "at logical offset %llu with max blocks %zd "
+                                "with error %d", mpd->inode->i_ino,
+                                (unsigned long long) next,
+                                mpd->b_size >> mpd->inode->i_blkbits, err);
+                       ext4_msg(sb, KERN_CRIT,
+                               "This should not happen!! Data will be lost\n");
+                       if (err == -ENOSPC)
+                               ext4_print_free_blocks(mpd->inode);
                 }
                 /* invalidate all the pages */
                 ext4_da_block_invalidatepages(mpd, next,
                                 mpd->b_size >> mpd->inode->i_blkbits);
-               return err;
+               return;
         }
         BUG_ON(blks == 0);
  
@@ -2280,7 +2360,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         if (ext4_should_order_data(mpd->inode)) {
                 err = ext4_jbd2_file_inode(handle, mpd->inode);
                 if (err)
-                       return err;
+                       /* This only happens if the journal is aborted */
+                       return;
         }
  
         /*
@@ -2291,10 +2372,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                 disksize = i_size_read(mpd->inode);
         if (disksize > EXT4_I(mpd->inode)->i_disksize) {
                 ext4_update_i_disksize(mpd->inode, disksize);
-               return ext4_mark_inode_dirty(handle, mpd->inode);
+               err = ext4_mark_inode_dirty(handle, mpd->inode);
+               if (err)
+                       ext4_error(mpd->inode->i_sb,
+                                  "Failed to mark inode %lu dirty",
+                                  mpd->inode->i_ino);
         }
  
-       return 0;
+submit_io:
+       mpage_da_submit_io(mpd);
+       mpd->io_done = 1;
  }
  
  #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -2320,7 +2407,7 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
          * XXX Don't go larger than mballoc is willing to allocate
          * This is a stopgap solution.  We eventually need to fold
          * mpage_da_submit_io() into this function and then call
-        * ext4_get_blocks() multiple times in a loop
+        * ext4_map_blocks() multiple times in a loop
          */
         if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
                 goto flush_it;
@@ -2371,9 +2458,7 @@ flush_it:
          * We couldn't merge the block to our extent, so we
          * need to flush current  extent and start new one
          */
-       if (mpage_da_map_blocks(mpd) == 0)
-               mpage_da_submit_io(mpd);
-       mpd->io_done = 1;
+       mpage_da_map_and_submit(mpd);
         return;
  }
  
@@ -2405,15 +2490,13 @@ static int __mpage_da_writepage(struct page *page,
         if (mpd->next_page != page->index) {
                 /*
                  * Nope, we can't. So, we map non-allocated blocks
-                * and start IO on them using writepage()
+                * and start IO on them
                  */
                 if (mpd->next_page != mpd->first_page) {
-                       if (mpage_da_map_blocks(mpd) == 0)
-                               mpage_da_submit_io(mpd);
+                       mpage_da_map_and_submit(mpd);
                         /*
                          * skip rest of the page in the page_vec
                          */
-                       mpd->io_done = 1;
                         redirty_page_for_writepage(wbc, page);
                         unlock_page(page);
                         return MPAGE_DA_EXTENT_TAIL;
@@ -2553,18 +2636,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
  /*
   * This function is used as a standard get_block_t calback function
   * when there is no desire to allocate any blocks.  It is used as a
- * callback function for block_prepare_write(), nobh_writepage(), and
- * block_write_full_page().  These functions should only try to map a
- * single block at a time.
+ * callback function for block_prepare_write() and block_write_full_page().
+ * These functions should only try to map a single block at a time.
   *
   * Since this function doesn't do block allocations even if the caller
   * requests it by passing in create=1, it is critically important that
   * any caller checks to make sure that any buffer heads are returned
   * by this function are either all already mapped or marked for
- * delayed allocation before calling nobh_writepage() or
- * block_write_full_page().  Otherwise, b_blocknr could be left
- * unitialized, and the page write functions will be taken by
- * surprise.
+ * delayed allocation before calling  block_write_full_page().  Otherwise,
+ * b_blocknr could be left unitialized, and the page write functions will
+ * be taken by surprise.
   */
  static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
                                    struct buffer_head *bh_result, int create)
@@ -2595,6 +2676,7 @@ static int __ext4_journalled_writepage(struct page *page,
         int ret = 0;
         int err;
  
+       ClearPageChecked(page);
         page_bufs = page_buffers(page);
         BUG_ON(!page_bufs);
         walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
@@ -2672,7 +2754,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
  static int ext4_writepage(struct page *page,
                           struct writeback_control *wbc)
  {
-       int ret = 0;
+       int ret = 0, commit_write = 0;
         loff_t size;
         unsigned int len;
         struct buffer_head *page_bufs = NULL;
@@ -2685,73 +2767,46 @@ static int ext4_writepage(struct page *page,
         else
                 len = PAGE_CACHE_SIZE;
  
-       if (page_has_buffers(page)) {
-               page_bufs = page_buffers(page);
-               if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-                                       ext4_bh_delay_or_unwritten)) {
-                       /*
-                        * We don't want to do  block allocation
-                        * So redirty the page and return
-                        * We may reach here when we do a journal commit
-                        * via journal_submit_inode_data_buffers.
-                        * If we don't have mapping block we just ignore
-                        * them. We can also reach here via shrink_page_list
-                        */
+       /*
+        * If the page does not have buffers (for whatever reason),
+        * try to create them using block_prepare_write.  If this
+        * fails, redirty the page and move on.
+        */
+       if (!page_buffers(page)) {
+               if (block_prepare_write(page, 0, len,
+                                       noalloc_get_block_write)) {
+               redirty_page:
                         redirty_page_for_writepage(wbc, page);
                         unlock_page(page);
                         return 0;
                 }
-       } else {
+               commit_write = 1;
+       }
+       page_bufs = page_buffers(page);
+       if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+                             ext4_bh_delay_or_unwritten)) {
                 /*
-                * The test for page_has_buffers() is subtle:
-                * We know the page is dirty but it lost buffers. That means
-                * that at some moment in time after write_begin()/write_end()
-                * has been called all buffers have been clean and thus they
-                * must have been written at least once. So they are all
-                * mapped and we can happily proceed with mapping them
-                * and writing the page.
-                *
-                * Try to initialize the buffer_heads and check whether
-                * all are mapped and non delay. We don't want to
-                * do block allocation here.
+                * We don't want to do block allocation So redirty the
+                * page and return We may reach here when we do a
+                * journal commit via
+                * journal_submit_inode_data_buffers.  If we don't
+                * have mapping block we just ignore them. We can also
+                * reach here via shrink_page_list
                  */
-               ret = block_prepare_write(page, 0, len,
-                                         noalloc_get_block_write);
-               if (!ret) {
-                       page_bufs = page_buffers(page);
-                       /* check whether all are mapped and non delay */
-                       if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-                                               ext4_bh_delay_or_unwritten)) {
-                               redirty_page_for_writepage(wbc, page);
-                               unlock_page(page);
-                               return 0;
-                       }
-               } else {
-                       /*
-                        * We can't do block allocation here
-                        * so just redity the page and unlock
-                        * and return
-                        */
-                       redirty_page_for_writepage(wbc, page);
-                       unlock_page(page);
-                       return 0;
-               }
+               goto redirty_page;
+       }
+       if (commit_write)
                 /* now mark the buffer_heads as dirty and uptodate */
                 block_commit_write(page, 0, len);
-       }
  
-       if (PageChecked(page) && ext4_should_journal_data(inode)) {
+       if (PageChecked(page) && ext4_should_journal_data(inode))
                 /*
                  * It's mmapped pagecache.  Add buffers and journal it.  There
                  * doesn't seem much point in redirtying the page here.
                  */
-               ClearPageChecked(page);
                 return __ext4_journalled_writepage(page, len);
-       }
  
-       if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-               ret = nobh_writepage(page, noalloc_get_block_write, wbc);
-       else if (page_bufs && buffer_uninit(page_bufs)) {
+       if (buffer_uninit(page_bufs)) {
                 ext4_set_bh_endio(page_bufs, inode);
                 ret = block_write_full_page_endio(page, noalloc_get_block_write,
                                             wbc, ext4_end_io_buffer_write);
@@ -2976,9 +3031,12 @@ static int ext4_da_writepages(struct address_space *mapping,
          * sbi->max_writeback_mb_bump whichever is smaller.
          */
         max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
-       if (!range_cyclic && range_whole)
-               desired_nr_to_write = wbc->nr_to_write * 8;
-       else
+       if (!range_cyclic && range_whole) {
+               if (wbc->nr_to_write == LONG_MAX)
+                       desired_nr_to_write = wbc->nr_to_write;
+               else
+                       desired_nr_to_write = wbc->nr_to_write * 8;
+       } else
                 desired_nr_to_write = ext4_num_dirty_pages(inode, index,
                                                            max_pages);
         if (desired_nr_to_write > max_pages)
@@ -3040,9 +3098,7 @@ retry:
                  * them for I/O.
                  */
                 if (!mpd.io_done && mpd.next_page != mpd.first_page) {
-                       if (mpage_da_map_blocks(&mpd) == 0)
-                               mpage_da_submit_io(&mpd);
-                       mpd.io_done = 1;
+                       mpage_da_map_and_submit(&mpd);
                         ret = MPAGE_DA_EXTENT_TAIL;
                 }
                 trace_ext4_da_write_pages(inode, &mpd);
@@ -3146,13 +3202,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
         int ret, retries = 0;
         struct page *page;
         pgoff_t index;
-       unsigned from, to;
         struct inode *inode = mapping->host;
         handle_t *handle;
  
         index = pos >> PAGE_CACHE_SHIFT;
-       from = pos & (PAGE_CACHE_SIZE - 1);
-       to = from + len;
  
         if (ext4_nonda_switch(inode->i_sb)) {
                 *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -3185,8 +3238,7 @@ retry:
         }
         *pagep = page;
  
-       ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-                               ext4_da_get_block_prep);
+       ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
         if (ret < 0) {
                 unlock_page(page);
                 ext4_journal_stop(handle);
@@ -3677,6 +3729,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
                 return ret;
         }
  
+       if (io->iocb)
+               aio_complete(io->iocb, io->result, 0);
         /* clear the DIO AIO unwritten flag */
         io->flag = 0;
         return ret;
@@ -3776,6 +3830,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
                 io->offset = 0;
                 io->size = 0;
                 io->page = NULL;
+               io->iocb = NULL;
+               io->result = 0;
                 INIT_WORK(&io->work, ext4_end_io_work);
                 INIT_LIST_HEAD(&io->list);
         }
@@ -3805,26 +3861,29 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
         if (io_end->flag != EXT4_IO_UNWRITTEN){
                 ext4_free_io_end(io_end);
                 iocb->private = NULL;
-               goto out;
+out:
+               if (is_async)
+                       aio_complete(iocb, ret, 0);
+               return;
         }
  
         io_end->offset = offset;
         io_end->size = size;
-       io_end->flag = EXT4_IO_UNWRITTEN;
+       if (is_async) {
+               io_end->iocb = iocb;
+               io_end->result = ret;
+       }
         wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
  
-       /* queue the work to convert unwritten extents to written */
-       queue_work(wq, &io_end->work);
-
         /* Add the io_end to per-inode completed aio dio list*/
         ei = EXT4_I(io_end->inode);
         spin_lock_irqsave(&ei->i_completed_io_lock, flags);
         list_add_tail(&io_end->list, &ei->i_completed_io_list);
         spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+
+       /* queue the work to convert unwritten extents to written */
+       queue_work(wq, &io_end->work);
         iocb->private = NULL;
-out:
-       if (is_async)
-               aio_complete(iocb, ret, 0);
  }
  
  static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@@ -3950,7 +4009,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                                 return -ENOMEM;
                         /*
                          * we save the io structure for current async
-                        * direct IO, so that later ext4_get_blocks()
+                        * direct IO, so that later ext4_map_blocks()
                          * could flag the io structure whether there
                          * is a unwritten extents needs to be converted
                          * when IO is completed.
@@ -4141,17 +4200,6 @@ int ext4_block_truncate_page(handle_t *handle,
         length = blocksize - (offset & (blocksize - 1));
         iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
  
-       /*
-        * For "nobh" option,  we can only work if we don't need to
-        * read-in the page - otherwise we create buffers to do the IO.
-        */
-       if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
-            ext4_should_writeback_data(inode) && PageUptodate(page)) {
-               zero_user(page, offset, length);
-               set_page_dirty(page);
-               goto unlock;
-       }
-
         if (!page_has_buffers(page))
                 create_empty_buffers(page, blocksize, 0);
  
@@ -4501,9 +4549,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                          * (should be rare).
                          */
                         if (!bh) {
-                               EXT4_ERROR_INODE(inode,
-                                                "Read failure block=%llu",
-                                                (unsigned long long) nr);
+                               EXT4_ERROR_INODE_BLOCK(inode, nr,
+                                                      "Read failure");
                                 continue;
                         }
  
@@ -4514,27 +4561,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                         (__le32 *) bh->b_data + addr_per_block,
                                         depth);
  
-                       /*
-                        * We've probably journalled the indirect block several
-                        * times during the truncate.  But it's no longer
-                        * needed and we now drop it from the transaction via
-                        * jbd2_journal_revoke().
-                        *
-                        * That's easy if it's exclusively part of this
-                        * transaction.  But if it's part of the committing
-                        * transaction then jbd2_journal_forget() will simply
-                        * brelse() it.  That means that if the underlying
-                        * block is reallocated in ext4_get_block(),
-                        * unmap_underlying_metadata() will find this block
-                        * and will try to get rid of it.  damn, damn.
-                        *
-                        * If this block has already been committed to the
-                        * journal, a revoke record will be written.  And
-                        * revoke records must be emitted *before* clearing
-                        * this block's bit in the bitmaps.
-                        */
-                       ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
-
                         /*
                          * Everything below this this pointer has been
                          * released.  Now let this top-of-subtree go.
@@ -4559,8 +4585,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                             blocks_for_truncate(inode));
                         }
  
+                       /*
+                        * The forget flag here is critical because if
+                        * we are journaling (and not doing data
+                        * journaling), we have to make sure a revoke
+                        * record is written to prevent the journal
+                        * replay from overwriting the (former)
+                        * indirect block if it gets reallocated as a
+                        * data block.  This must happen in the same
+                        * transaction where the data blocks are
+                        * actually freed.
+                        */
                         ext4_free_blocks(handle, inode, 0, nr, 1,
-                                        EXT4_FREE_BLOCKS_METADATA);
+                                        EXT4_FREE_BLOCKS_METADATA|
+                                        EXT4_FREE_BLOCKS_FORGET);
  
                         if (parent_bh) {
                                 /*
@@ -4818,8 +4856,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
  
         bh = sb_getblk(sb, block);
         if (!bh) {
-               EXT4_ERROR_INODE(inode, "unable to read inode block - "
-                                "block %llu", block);
+               EXT4_ERROR_INODE_BLOCK(inode, block,
+                                      "unable to read itable block");
                 return -EIO;
         }
         if (!buffer_uptodate(bh)) {
@@ -4917,8 +4955,8 @@ make_io:
                 submit_bh(READ_META, bh);
                 wait_on_buffer(bh);
                 if (!buffer_uptodate(bh)) {
-                       EXT4_ERROR_INODE(inode, "unable to read inode "
-                                        "block %llu", block);
+                       EXT4_ERROR_INODE_BLOCK(inode, block,
+                                              "unable to read itable block");
                         brelse(bh);
                         return -EIO;
                 }
@@ -4989,7 +5027,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
                 /* we are using combined 48 bit field */
                 i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
                                         le32_to_cpu(raw_inode->i_blocks_lo);
-               if (ei->i_flags & EXT4_HUGE_FILE_FL) {
+               if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
                         /* i_blocks represent file system block size */
                         return i_blocks  << (inode->i_blkbits - 9);
                 } else {
@@ -5085,7 +5123,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                 transaction_t *transaction;
                 tid_t tid;
  
-               spin_lock(&journal->j_state_lock);
+               read_lock(&journal->j_state_lock);
                 if (journal->j_running_transaction)
                         transaction = journal->j_running_transaction;
                 else
@@ -5094,7 +5132,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                         tid = transaction->t_tid;
                 else
                         tid = journal->j_commit_sequence;
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                 ei->i_sync_tid = tid;
                 ei->i_datasync_tid = tid;
         }
@@ -5139,7 +5177,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                                  ei->i_file_acl);
                 ret = -EIO;
                 goto bad_inode;
-       } else if (ei->i_flags & EXT4_EXTENTS_FL) {
+       } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                 if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
                     (S_ISLNK(inode->i_mode) &&
                      !ext4_inode_is_fast_symlink(inode)))
@@ -5419,9 +5457,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
                 if (wbc->sync_mode == WB_SYNC_ALL)
                         sync_dirty_buffer(iloc.bh);
                 if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-                       EXT4_ERROR_INODE(inode,
-                               "IO error syncing inode (block=%llu)",
-                               (unsigned long long) iloc.bh->b_blocknr);
+                       EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
+                                        "IO error syncing inode");
                         err = -EIO;
                 }
                 brelse(iloc.bh);
@@ -5496,10 +5533,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                         struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
  
-                       if (attr->ia_size > sbi->s_bitmap_maxbytes) {
-                               error = -EFBIG;
-                               goto err_out;
-                       }
+                       if (attr->ia_size > sbi->s_bitmap_maxbytes)
+                               return -EFBIG;
                 }
         }
  
@@ -5542,11 +5577,19 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                         ext4_truncate(inode);
         }
  
-       rc = inode_setattr(inode, attr);
+       if ((attr->ia_valid & ATTR_SIZE) &&
+           attr->ia_size != i_size_read(inode))
+               rc = vmtruncate(inode, attr->ia_size);
  
-       /* If inode_setattr's call to ext4_truncate failed to get a
-        * transaction handle at all, we need to clean up the in-core
-        * orphan list manually. */
+       if (!rc) {
+               setattr_copy(inode, attr);
+               mark_inode_dirty(inode);
+       }
+
+       /*
+        * If the call to ext4_truncate failed to get a transaction handle at
+        * all, we need to clean up the in-core orphan list manually.
+        */
         if (inode->i_nlink)
                 ext4_orphan_del(NULL, inode);
  
@@ -5701,7 +5744,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
   * Calculate the journal credits for a chunk of data modification.
   *
   * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
+ * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
   *
   * journal buffers for data blocks are not included here, as DIO
   * and fallocate do no need to journal data buffers.
@@ -5767,7 +5810,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
  {
         struct ext4_inode *raw_inode;
         struct ext4_xattr_ibody_header *header;
-       struct ext4_xattr_entry *entry;
  
         if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
                 return 0;
@@ -5775,7 +5817,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
         raw_inode = ext4_raw_inode(&iloc);
  
         header = IHDR(inode, raw_inode);
-       entry = IFIRST(header);
  
         /* No extended attributes present */
         if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||