ext4: fix data corruption regression by reverting commit 6de9843dab3f
[pandora-kernel.git] / fs / ext4 / inode.c
index 9f7f9e4..9c8cf81 100644 (file)
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
        BUG_ON(EXT4_JOURNAL(inode) == NULL);
        jbd_debug(2, "restarting handle %p\n", handle);
        up_write(&EXT4_I(inode)->i_data_sem);
-       ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
+       ret = ext4_journal_restart(handle, nblocks);
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_discard_preallocations(inode);
 
@@ -720,7 +720,7 @@ allocated:
        return ret;
 failed_out:
        for (i = 0; i < index; i++)
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
        return ret;
 }
 
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
        return err;
 failed:
        /* Allocation failed, free what we already allocated */
-       ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
+       ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0);
        for (i = 1; i <= n ; i++) {
                /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
                 */
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1,
                                 EXT4_FREE_BLOCKS_FORGET);
        }
        for (i = n+1; i < indirect_blks; i++)
-               ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
+               ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0);
 
-       ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
+       ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0);
 
        return err;
 }
@@ -924,7 +924,7 @@ err_out:
                ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
                                 EXT4_FREE_BLOCKS_FORGET);
        }
-       ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
+       ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key),
                         blks, 0);
 
        return err;
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
        int count = 0;
        ext4_fsblk_t first_block = 0;
 
+       trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
        J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
        J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
        depth = ext4_block_to_path(inode, map->m_lblk, offsets,
@@ -1058,6 +1059,8 @@ cleanup:
                partial--;
        }
 out:
+       trace_ext4_ind_map_blocks_exit(inode, map->m_lblk,
+                               map->m_pblk, map->m_len, err);
        return err;
 }
 
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                if (nr_pages == 0)
                        break;
                for (i = 0; i < nr_pages; i++) {
-                       int commit_write = 0, redirty_page = 0;
+                       int commit_write = 0, skip_page = 0;
                        struct page *page = pvec.pages[i];
 
                        index = page->index;
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                         * If the page does not have buffers (for
                         * whatever reason), try to create them using
                         * __block_write_begin.  If this fails,
-                        * redirty the page and move on.
+                        * skip the page and move on.
                         */
                        if (!page_has_buffers(page)) {
                                if (__block_write_begin(page, 0, len,
                                                noalloc_get_block_write)) {
-                               redirty_page:
-                                       redirty_page_for_writepage(mpd->wbc,
-                                                                  page);
+                               skip_page:
                                        unlock_page(page);
                                        continue;
                                }
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                        block_start = 0;
                        do {
                                if (!bh)
-                                       goto redirty_page;
+                                       goto skip_page;
                                if (map && (cur_logical >= map->m_lblk) &&
                                    (cur_logical <= (map->m_lblk +
                                                     (map->m_len - 1)))) {
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
                                        clear_buffer_unwritten(bh);
                                }
 
-                               /* redirty page if block allocation undone */
+                               /* skip page if block allocation undone */
                                if (buffer_delay(bh) || buffer_unwritten(bh))
-                                       redirty_page = 1;
+                                       skip_page = 1;
                                bh = bh->b_this_page;
                                block_start += bh->b_size;
                                cur_logical++;
                                pblock++;
                        } while (bh != page_bufs);
 
-                       if (redirty_page)
-                               goto redirty_page;
+                       if (skip_page)
+                               goto skip_page;
 
                        if (commit_write)
                                /* mark the buffer_heads as dirty & uptodate */
                                block_commit_write(page, 0, len);
 
+                       clear_page_dirty_for_io(page);
                        /*
                         * Delalloc doesn't support data journalling,
                         * but eventually maybe we'll lift this
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
        return ret;
 }
 
-static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
-                                       sector_t logical, long blk_cnt)
+static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
 {
        int nr_pages, i;
        pgoff_t index, end;
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
        struct inode *inode = mpd->inode;
        struct address_space *mapping = inode->i_mapping;
 
-       index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-       end   = (logical + blk_cnt - 1) >>
-                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       index = mpd->first_page;
+       end   = mpd->next_page - 1;
        while (index <= end) {
                nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
                if (nr_pages == 0)
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
                err = blks;
                /*
                 * If get block returns EAGAIN or ENOSPC and there
-                * appears to be free blocks we will call
-                * ext4_writepage() for all of the pages which will
-                * just redirty the pages.
+                * appears to be free blocks we will just let
+                * mpage_da_submit_io() unlock all of the pages.
                 */
                if (err == -EAGAIN)
                        goto submit_io;
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
                                ext4_print_free_blocks(mpd->inode);
                }
                /* invalidate all the pages */
-               ext4_da_block_invalidatepages(mpd, next,
-                               mpd->b_size >> mpd->inode->i_blkbits);
+               ext4_da_block_invalidatepages(mpd);
+
+               /* Mark this page range as having been completed */
+               mpd->io_done = 1;
                return;
        }
        BUG_ON(blks == 0);
@@ -2437,102 +2438,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
        return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh);
 }
 
-/*
- * __mpage_da_writepage - finds extent of pages and blocks
- *
- * @page: page to consider
- * @wbc: not used, we just follow rules
- * @data: context
- *
- * The function finds extents of pages and scan them for all blocks.
- */
-static int __mpage_da_writepage(struct page *page,
-                               struct writeback_control *wbc,
-                               struct mpage_da_data *mpd)
-{
-       struct inode *inode = mpd->inode;
-       struct buffer_head *bh, *head;
-       sector_t logical;
-
-       /*
-        * Can we merge this page to current extent?
-        */
-       if (mpd->next_page != page->index) {
-               /*
-                * Nope, we can't. So, we map non-allocated blocks
-                * and start IO on them
-                */
-               if (mpd->next_page != mpd->first_page) {
-                       mpage_da_map_and_submit(mpd);
-                       /*
-                        * skip rest of the page in the page_vec
-                        */
-                       redirty_page_for_writepage(wbc, page);
-                       unlock_page(page);
-                       return MPAGE_DA_EXTENT_TAIL;
-               }
-
-               /*
-                * Start next extent of pages ...
-                */
-               mpd->first_page = page->index;
-
-               /*
-                * ... and blocks
-                */
-               mpd->b_size = 0;
-               mpd->b_state = 0;
-               mpd->b_blocknr = 0;
-       }
-
-       mpd->next_page = page->index + 1;
-       logical = (sector_t) page->index <<
-                 (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
-       if (!page_has_buffers(page)) {
-               mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE,
-                                      (1 << BH_Dirty) | (1 << BH_Uptodate));
-               if (mpd->io_done)
-                       return MPAGE_DA_EXTENT_TAIL;
-       } else {
-               /*
-                * Page with regular buffer heads, just add all dirty ones
-                */
-               head = page_buffers(page);
-               bh = head;
-               do {
-                       BUG_ON(buffer_locked(bh));
-                       /*
-                        * We need to try to allocate
-                        * unmapped blocks in the same page.
-                        * Otherwise we won't make progress
-                        * with the page in ext4_writepage
-                        */
-                       if (ext4_bh_delay_or_unwritten(NULL, bh)) {
-                               mpage_add_bh_to_extent(mpd, logical,
-                                                      bh->b_size,
-                                                      bh->b_state);
-                               if (mpd->io_done)
-                                       return MPAGE_DA_EXTENT_TAIL;
-                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
-                               /*
-                                * mapped dirty buffer. We need to update
-                                * the b_state because we look at
-                                * b_state in mpage_da_map_blocks. We don't
-                                * update b_size because if we find an
-                                * unmapped buffer_head later we need to
-                                * use the b_state flag of that buffer_head.
-                                */
-                               if (mpd->b_size == 0)
-                                       mpd->b_state = bh->b_state & BH_FLAGS;
-                       }
-                       logical++;
-               } while ((bh = bh->b_this_page) != head);
-       }
-
-       return 0;
-}
-
 /*
  * This is a special get_blocks_t callback which is used by
  * ext4_da_write_begin().  It will either return mapped block or
@@ -2811,27 +2716,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
 
 /*
  * write_cache_pages_da - walk the list of dirty pages of the given
- * address space and call the callback function (which usually writes
- * the pages).
- *
- * This is a forked version of write_cache_pages().  Differences:
- *     Range cyclic is ignored.
- *     no_nrwrite_index_update is always presumed true
+ * address space and accumulate pages that need writing, and call
+ * mpage_da_map_and_submit to map a single contiguous memory region
+ * and then write them.
  */
 static int write_cache_pages_da(struct address_space *mapping,
                                struct writeback_control *wbc,
                                struct mpage_da_data *mpd,
                                pgoff_t *done_index)
 {
-       int ret = 0;
-       int done = 0;
-       struct pagevec pvec;
-       unsigned nr_pages;
-       pgoff_t index;
-       pgoff_t end;            /* Inclusive */
-       long nr_to_write = wbc->nr_to_write;
-       int tag;
-
+       struct buffer_head      *bh, *head;
+       struct inode            *inode = mapping->host;
+       struct pagevec          pvec;
+       unsigned int            nr_pages;
+       sector_t                logical;
+       pgoff_t                 index, end;
+       long                    nr_to_write = wbc->nr_to_write;
+       int                     i, tag, ret = 0;
+
+       memset(mpd, 0, sizeof(struct mpage_da_data));
+       mpd->wbc = wbc;
+       mpd->inode = inode;
        pagevec_init(&pvec, 0);
        index = wbc->range_start >> PAGE_CACHE_SHIFT;
        end = wbc->range_end >> PAGE_CACHE_SHIFT;
@@ -2842,13 +2747,11 @@ static int write_cache_pages_da(struct address_space *mapping,
                tag = PAGECACHE_TAG_DIRTY;
 
        *done_index = index;
-       while (!done && (index <= end)) {
-               int i;
-
+       while (index <= end) {
                nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
                              min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
                if (nr_pages == 0)
-                       break;
+                       return 0;
 
                for (i = 0; i < nr_pages; i++) {
                        struct page *page = pvec.pages[i];
@@ -2860,60 +2763,100 @@ static int write_cache_pages_da(struct address_space *mapping,
                         * mapping. However, page->index will not change
                         * because we have a reference on the page.
                         */
-                       if (page->index > end) {
-                               done = 1;
-                               break;
-                       }
+                       if (page->index > end)
+                               goto out;
 
                        *done_index = page->index + 1;
 
+                       /*
+                        * If we can't merge this page, and we have
+                        * accumulated an contiguous region, write it
+                        */
+                       if ((mpd->next_page != page->index) &&
+                           (mpd->next_page != mpd->first_page)) {
+                               mpage_da_map_and_submit(mpd);
+                               goto ret_extent_tail;
+                       }
+
                        lock_page(page);
 
                        /*
-                        * Page truncated or invalidated. We can freely skip it
-                        * then, even for data integrity operations: the page
-                        * has disappeared concurrently, so there could be no
-                        * real expectation of this data interity operation
-                        * even if there is now a new, dirty page at the same
-                        * pagecache address.
+                        * If the page is no longer dirty, or its
+                        * mapping no longer corresponds to inode we
+                        * are writing (which means it has been
+                        * truncated or invalidated), or the page is
+                        * already under writeback and we are not
+                        * doing a data integrity writeback, skip the page
                         */
-                       if (unlikely(page->mapping != mapping)) {
-continue_unlock:
+                       if (!PageDirty(page) ||
+                           (PageWriteback(page) &&
+                            (wbc->sync_mode == WB_SYNC_NONE)) ||
+                           unlikely(page->mapping != mapping)) {
                                unlock_page(page);
                                continue;
                        }
 
-                       if (!PageDirty(page)) {
-                               /* someone wrote it for us */
-                               goto continue_unlock;
-                       }
-
-                       if (PageWriteback(page)) {
-                               if (wbc->sync_mode != WB_SYNC_NONE)
-                                       wait_on_page_writeback(page);
-                               else
-                                       goto continue_unlock;
-                       }
+                       if (PageWriteback(page))
+                               wait_on_page_writeback(page);
 
                        BUG_ON(PageWriteback(page));
-                       if (!clear_page_dirty_for_io(page))
-                               goto continue_unlock;
 
-                       ret = __mpage_da_writepage(page, wbc, mpd);
-                       if (unlikely(ret)) {
-                               if (ret == AOP_WRITEPAGE_ACTIVATE) {
-                                       unlock_page(page);
-                                       ret = 0;
-                               } else {
-                                       done = 1;
-                                       break;
-                               }
+                       if (mpd->next_page != page->index)
+                               mpd->first_page = page->index;
+                       mpd->next_page = page->index + 1;
+                       logical = (sector_t) page->index <<
+                               (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+                       if (!page_has_buffers(page)) {
+                               mpage_add_bh_to_extent(mpd, logical,
+                                                      PAGE_CACHE_SIZE,
+                                                      (1 << BH_Dirty) | (1 << BH_Uptodate));
+                               if (mpd->io_done)
+                                       goto ret_extent_tail;
+                       } else {
+                               /*
+                                * Page with regular buffer heads,
+                                * just add all dirty ones
+                                */
+                               head = page_buffers(page);
+                               bh = head;
+                               do {
+                                       BUG_ON(buffer_locked(bh));
+                                       /*
+                                        * We need to try to allocate
+                                        * unmapped blocks in the same page.
+                                        * Otherwise we won't make progress
+                                        * with the page in ext4_writepage
+                                        */
+                                       if (ext4_bh_delay_or_unwritten(NULL, bh)) {
+                                               mpage_add_bh_to_extent(mpd, logical,
+                                                                      bh->b_size,
+                                                                      bh->b_state);
+                                               if (mpd->io_done)
+                                                       goto ret_extent_tail;
+                                       } else if (buffer_dirty(bh) && (buffer_mapped(bh))) {
+                                               /*
+                                                * mapped dirty buffer. We need
+                                                * to update the b_state
+                                                * because we look at b_state
+                                                * in mpage_da_map_blocks.  We
+                                                * don't update b_size because
+                                                * if we find an unmapped
+                                                * buffer_head later we need to
+                                                * use the b_state flag of that
+                                                * buffer_head.
+                                                */
+                                               if (mpd->b_size == 0)
+                                                       mpd->b_state = bh->b_state & BH_FLAGS;
+                                       }
+                                       logical++;
+                               } while ((bh = bh->b_this_page) != head);
                        }
 
                        if (nr_to_write > 0) {
                                nr_to_write--;
                                if (nr_to_write == 0 &&
-                                   wbc->sync_mode == WB_SYNC_NONE) {
+                                   wbc->sync_mode == WB_SYNC_NONE)
                                        /*
                                         * We stop writing back only if we are
                                         * not doing integrity sync. In case of
@@ -2924,14 +2867,18 @@ continue_unlock:
                                         * pages, but have not synced all of the
                                         * old dirty pages.
                                         */
-                                       done = 1;
-                                       break;
-                               }
+                                       goto out;
                        }
                }
                pagevec_release(&pvec);
                cond_resched();
        }
+       return 0;
+ret_extent_tail:
+       ret = MPAGE_DA_EXTENT_TAIL;
+out:
+       pagevec_release(&pvec);
+       cond_resched();
        return ret;
 }
 
@@ -2945,7 +2892,6 @@ static int ext4_da_writepages(struct address_space *mapping,
        struct mpage_da_data mpd;
        struct inode *inode = mapping->host;
        int pages_written = 0;
-       long pages_skipped;
        unsigned int max_pages;
        int range_cyclic, cycled = 1, io_done = 0;
        int needed_blocks, ret = 0;
@@ -3028,11 +2974,6 @@ static int ext4_da_writepages(struct address_space *mapping,
                wbc->nr_to_write = desired_nr_to_write;
        }
 
-       mpd.wbc = wbc;
-       mpd.inode = mapping->host;
-
-       pages_skipped = wbc->pages_skipped;
-
 retry:
        if (wbc->sync_mode == WB_SYNC_ALL)
                tag_pages_for_writeback(mapping, index, end);
@@ -3059,22 +3000,10 @@ retry:
                }
 
                /*
-                * Now call __mpage_da_writepage to find the next
+                * Now call write_cache_pages_da() to find the next
                 * contiguous region of logical blocks that need
-                * blocks to be allocated by ext4.  We don't actually
-                * submit the blocks for I/O here, even though
-                * write_cache_pages thinks it will, and will set the
-                * pages as clean for write before calling
-                * __mpage_da_writepage().
+                * blocks to be allocated by ext4 and submit them.
                 */
-               mpd.b_size = 0;
-               mpd.b_state = 0;
-               mpd.b_blocknr = 0;
-               mpd.first_page = 0;
-               mpd.next_page = 0;
-               mpd.io_done = 0;
-               mpd.pages_written = 0;
-               mpd.retval = 0;
                ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
                /*
                 * If we have a contiguous extent of pages and we
@@ -3096,7 +3025,6 @@ retry:
                         * and try again
                         */
                        jbd2_journal_force_commit_nested(sbi->s_journal);
-                       wbc->pages_skipped = pages_skipped;
                        ret = 0;
                } else if (ret == MPAGE_DA_EXTENT_TAIL) {
                        /*
@@ -3104,7 +3032,6 @@ retry:
                         * rest of the pages
                         */
                        pages_written += mpd.pages_written;
-                       wbc->pages_skipped = pages_skipped;
                        ret = 0;
                        io_done = 1;
                } else if (wbc->nr_to_write)
@@ -3122,11 +3049,6 @@ retry:
                wbc->range_end  = mapping->writeback_index - 1;
                goto retry;
        }
-       if (pages_skipped != wbc->pages_skipped)
-               ext4_msg(inode->i_sb, KERN_CRIT,
-                        "This should not happen leaving %s "
-                        "with nr_to_write = %ld ret = %d",
-                        __func__, wbc->nr_to_write, ret);
 
        /* Update index */
        wbc->range_cyclic = range_cyclic;
@@ -3460,6 +3382,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 
 static int ext4_readpage(struct file *file, struct page *page)
 {
+       trace_ext4_readpage(page);
        return mpage_readpage(page, ext4_get_block);
 }
 
@@ -3494,6 +3417,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset)
 {
        journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
+       trace_ext4_invalidatepage(page, offset);
+
        /*
         * free any io_end structure allocated for buffers to be discarded
         */
@@ -3515,6 +3440,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
 {
        journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
+       trace_ext4_releasepage(page);
+
        WARN_ON(PageChecked(page));
        if (!page_has_buffers(page))
                return 0;
@@ -3873,11 +3800,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       ssize_t ret;
 
+       trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-               return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
-
-       return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+               ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
+       else
+               ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
+       trace_ext4_direct_IO_exit(inode, offset,
+                               iov_length(iov, nr_segs), rw, ret);
+       return ret;
 }
 
 /*
@@ -3903,7 +3835,6 @@ static const struct address_space_operations ext4_ordered_aops = {
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_ordered_write_end,
        .bmap                   = ext4_bmap,
@@ -3919,7 +3850,6 @@ static const struct address_space_operations ext4_writeback_aops = {
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_writeback_write_end,
        .bmap                   = ext4_bmap,
@@ -3935,7 +3865,6 @@ static const struct address_space_operations ext4_journalled_aops = {
        .readpage               = ext4_readpage,
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
-       .sync_page              = block_sync_page,
        .write_begin            = ext4_write_begin,
        .write_end              = ext4_journalled_write_end,
        .set_page_dirty         = ext4_journalled_set_page_dirty,
@@ -3951,7 +3880,6 @@ static const struct address_space_operations ext4_da_aops = {
        .readpages              = ext4_readpages,
        .writepage              = ext4_writepage,
        .writepages             = ext4_da_writepages,
-       .sync_page              = block_sync_page,
        .write_begin            = ext4_da_write_begin,
        .write_end              = ext4_da_write_end,
        .bmap                   = ext4_bmap,
@@ -4177,6 +4105,9 @@ no_top:
  *
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
+ *
+ * Return 0 on success, 1 on invalid block range
+ * and < 0 on fatal error.
  */
 static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
                             struct buffer_head *bh,
@@ -4203,33 +4134,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
                if (bh) {
                        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
                        err = ext4_handle_dirty_metadata(handle, inode, bh);
-                       if (unlikely(err)) {
-                               ext4_std_error(inode->i_sb, err);
-                               return 1;
-                       }
+                       if (unlikely(err))
+                               goto out_err;
                }
                err = ext4_mark_inode_dirty(handle, inode);
-               if (unlikely(err)) {
-                       ext4_std_error(inode->i_sb, err);
-                       return 1;
-               }
+               if (unlikely(err))
+                       goto out_err;
                err = ext4_truncate_restart_trans(handle, inode,
                                                  blocks_for_truncate(inode));
-               if (unlikely(err)) {
-                       ext4_std_error(inode->i_sb, err);
-                       return 1;
-               }
+               if (unlikely(err))
+                       goto out_err;
                if (bh) {
                        BUFFER_TRACE(bh, "retaking write access");
-                       ext4_journal_get_write_access(handle, bh);
+                       err = ext4_journal_get_write_access(handle, bh);
+                       if (unlikely(err))
+                               goto out_err;
                }
        }
 
        for (p = first; p < last; p++)
                *p = 0;
 
-       ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
+       ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags);
        return 0;
+out_err:
+       ext4_std_error(inode->i_sb, err);
+       return err;
 }
 
 /**
@@ -4263,7 +4193,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
        ext4_fsblk_t nr;                    /* Current block # */
        __le32 *p;                          /* Pointer into inode/ind
                                               for current block */
-       int err;
+       int err = 0;
 
        if (this_bh) {                          /* For indirect block */
                BUFFER_TRACE(this_bh, "get_write_access");
@@ -4285,9 +4215,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
                        } else if (nr == block_to_free + count) {
                                count++;
                        } else {
-                               if (ext4_clear_blocks(handle, inode, this_bh,
-                                                     block_to_free, count,
-                                                     block_to_free_p, p))
+                               err = ext4_clear_blocks(handle, inode, this_bh,
+                                                       block_to_free, count,
+                                                       block_to_free_p, p);
+                               if (err)
                                        break;
                                block_to_free = nr;
                                block_to_free_p = p;
@@ -4296,9 +4227,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
                }
        }
 
-       if (count > 0)
-               ext4_clear_blocks(handle, inode, this_bh, block_to_free,
-                                 count, block_to_free_p, p);
+       if (!err && count > 0)
+               err = ext4_clear_blocks(handle, inode, this_bh, block_to_free,
+                                       count, block_to_free_p, p);
+       if (err < 0)
+               /* fatal error */
+               return;
 
        if (this_bh) {
                BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata");
@@ -4416,7 +4350,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                         * transaction where the data blocks are
                         * actually freed.
                         */
-                       ext4_free_blocks(handle, inode, 0, nr, 1,
+                       ext4_free_blocks(handle, inode, NULL, nr, 1,
                                         EXT4_FREE_BLOCKS_METADATA|
                                         EXT4_FREE_BLOCKS_FORGET);
 
@@ -4496,10 +4430,12 @@ void ext4_truncate(struct inode *inode)
        Indirect chain[4];
        Indirect *partial;
        __le32 nr = 0;
-       int n;
-       ext4_lblk_t last_block;
+       int n = 0;
+       ext4_lblk_t last_block, max_block;
        unsigned blocksize = inode->i_sb->s_blocksize;
 
+       trace_ext4_truncate_enter(inode);
+
        if (!ext4_can_truncate(inode))
                return;
 
@@ -4510,6 +4446,7 @@ void ext4_truncate(struct inode *inode)
 
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ext4_ext_truncate(inode);
+               trace_ext4_truncate_exit(inode);
                return;
        }
 
@@ -4519,14 +4456,18 @@ void ext4_truncate(struct inode *inode)
 
        last_block = (inode->i_size + blocksize-1)
                                        >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
+       max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1)
+                                       >> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
 
        if (inode->i_size & (blocksize - 1))
                if (ext4_block_truncate_page(handle, mapping, inode->i_size))
                        goto out_stop;
 
-       n = ext4_block_to_path(inode, last_block, offsets, NULL);
-       if (n == 0)
-               goto out_stop;  /* error */
+       if (last_block != max_block) {
+               n = ext4_block_to_path(inode, last_block, offsets, NULL);
+               if (n == 0)
+                       goto out_stop;  /* error */
+       }
 
        /*
         * OK.  This truncate is going to happen.  We add the inode to the
@@ -4557,7 +4498,13 @@ void ext4_truncate(struct inode *inode)
         */
        ei->i_disksize = inode->i_size;
 
-       if (n == 1) {           /* direct blocks */
+       if (last_block == max_block) {
+               /*
+                * It is unnecessary to free any data blocks if last_block is
+                * equal to the indirect block limit.
+                */
+               goto out_unlock;
+       } else if (n == 1) {            /* direct blocks */
                ext4_free_data(handle, inode, NULL, i_data+offsets[0],
                               i_data + EXT4_NDIR_BLOCKS);
                goto do_indirects;
@@ -4617,6 +4564,7 @@ do_indirects:
                ;
        }
 
+out_unlock:
        up_write(&ei->i_data_sem);
        inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
        ext4_mark_inode_dirty(handle, inode);
@@ -4639,6 +4587,7 @@ out_stop:
                ext4_orphan_del(handle, inode);
 
        ext4_journal_stop(handle);
+       trace_ext4_truncate_exit(inode);
 }
 
 /*
@@ -4770,6 +4719,7 @@ make_io:
                 * has in-inode xattrs, or we don't have this inode in memory.
                 * Read the block from disk.
                 */
+               trace_ext4_load_inode(inode);
                get_bh(bh);
                bh->b_end_io = end_buffer_read_sync;
                submit_bh(READ_META, bh);
@@ -4875,7 +4825,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                return inode;
 
        ei = EXT4_I(inode);
-       iloc.bh = 0;
+       iloc.bh = NULL;
 
        ret = __ext4_get_inode_loc(inode, &iloc, 0);
        if (ret < 0)
@@ -5460,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
        /* if nrblocks are contiguous */
        if (chunk) {
                /*
-                * With N contiguous data blocks, it need at most
-                * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks
-                * 2 dindirect blocks
-                * 1 tindirect block
+                * With N contiguous data blocks, we need at most
+                * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks,
+                * 2 dindirect blocks, and 1 tindirect block
                 */
-               indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb);
-               return indirects + 3;
+               return DIV_ROUND_UP(nrblocks,
+                                   EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4;
        }
        /*
         * if nrblocks are not contiguous, worse case, each block touch