ext4: ext4_ext_convert_to_initialized bug found in extended FSX testing
[pandora-kernel.git] / fs / ext4 / extents.c
index ccce8a7..5f243da 100644 (file)
@@ -44,6 +44,8 @@
 #include "ext4_jbd2.h"
 #include "ext4_extents.h"
 
+#include <trace/events/ext4.h>
+
 static int ext4_ext_truncate_extend_restart(handle_t *handle,
                                            struct inode *inode,
                                            int needed)
@@ -131,7 +133,7 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
                 * fragmenting the file system's free space.  Maybe we
                 * should have some hueristics or some way to allow
                 * userspace to pass a hint to file system,
-                * especiially if the latter case turns out to be
+                * especially if the latter case turns out to be
                 * common.
                 */
                ex = path[depth].p_ext;
@@ -664,6 +666,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
                if (unlikely(!bh))
                        goto err;
                if (!bh_uptodate_or_lock(bh)) {
+                       trace_ext4_ext_load_extent(inode, block,
+                                               path[ppos].p_block);
                        if (bh_submit_read(bh) < 0) {
                                put_bh(bh);
                                goto err;
@@ -1034,7 +1038,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
-                       ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+                       ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
                                         EXT4_FREE_BLOCKS_METADATA);
                }
        }
@@ -1559,7 +1563,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
  * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
  * 1 if they got merged.
  */
-static int ext4_ext_try_to_merge(struct inode *inode,
+static int ext4_ext_try_to_merge_right(struct inode *inode,
                                 struct ext4_ext_path *path,
                                 struct ext4_extent *ex)
 {
@@ -1598,6 +1602,31 @@ static int ext4_ext_try_to_merge(struct inode *inode,
        return merge_done;
 }
 
+/*
+ * This function tries to merge the @ex extent to neighbours in the tree.
+ * return 1 if merge left else 0.
+ */
+static int ext4_ext_try_to_merge(struct inode *inode,
+                                 struct ext4_ext_path *path,
+                                 struct ext4_extent *ex) {
+       struct ext4_extent_header *eh;
+       unsigned int depth;
+       int merge_done = 0;
+       int ret = 0;
+
+       depth = ext_depth(inode);
+       BUG_ON(path[depth].p_hdr == NULL);
+       eh = path[depth].p_hdr;
+
+       if (ex > EXT_FIRST_EXTENT(eh))
+               merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
+
+       if (!merge_done)
+               ret = ext4_ext_try_to_merge_right(inode, path, ex);
+
+       return ret;
+}
+
 /*
  * check if a portion of the "newext" extent overlaps with an
  * existing extent.
@@ -1725,7 +1754,7 @@ repeat:
                BUG_ON(npath->p_depth != path->p_depth);
                eh = npath[depth].p_hdr;
                if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
-                       ext_debug("next leaf isnt full(%d)\n",
+                       ext_debug("next leaf isn't full(%d)\n",
                                  le16_to_cpu(eh->eh_entries));
                        path = npath;
                        goto repeat;
@@ -2059,7 +2088,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
        if (err)
                return err;
        ext_debug("index is empty, remove it, free block %llu\n", leaf);
-       ext4_free_blocks(handle, inode, 0, leaf, 1,
+       ext4_free_blocks(handle, inode, NULL, leaf, 1,
                         EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
        return err;
 }
@@ -2156,7 +2185,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                num = le32_to_cpu(ex->ee_block) + ee_len - from;
                start = ext4_ext_pblock(ex) + ee_len - num;
                ext_debug("free last %u blocks starting %llu\n", num, start);
-               ext4_free_blocks(handle, inode, 0, start, num, flags);
+               ext4_free_blocks(handle, inode, NULL, start, num, flags);
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
                printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2525,11 +2554,198 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
        return ret;
 }
 
+/*
+ * used by extent splitting.
+ */
+#define EXT4_EXT_MAY_ZEROOUT   0x1  /* safe to zeroout if split fails \
+                                       due to ENOSPC */
+#define EXT4_EXT_MARK_UNINIT1  0x2  /* mark first half uninitialized */
+#define EXT4_EXT_MARK_UNINIT2  0x4  /* mark second half uninitialized */
+
+/*
+ * ext4_split_extent_at() splits an extent at given block.
+ *
+ * @handle: the journal handle
+ * @inode: the file inode
+ * @path: the path to the extent
+ * @split: the logical block where the extent is splitted.
+ * @split_flags: indicates if the extent could be zeroout if split fails, and
+ *              the states(init or uninit) of new extents.
+ * @flags: flags used to insert new extent to extent tree.
+ *
+ *
+ * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
+ * of which are deterimined by split_flag.
+ *
+ * There are two cases:
+ *  a> the extent are splitted into two extent.
+ *  b> split is not needed, and just mark the extent.
+ *
+ * return 0 on success.
+ */
+static int ext4_split_extent_at(handle_t *handle,
+                            struct inode *inode,
+                            struct ext4_ext_path *path,
+                            ext4_lblk_t split,
+                            int split_flag,
+                            int flags)
+{
+       ext4_fsblk_t newblock;
+       ext4_lblk_t ee_block;
+       struct ext4_extent *ex, newex, orig_ex;
+       struct ext4_extent *ex2 = NULL;
+       unsigned int ee_len, depth;
+       int err = 0;
+
+       ext_debug("ext4_split_extents_at: inode %lu, logical"
+               "block %llu\n", inode->i_ino, (unsigned long long)split);
+
+       ext4_ext_show_leaf(inode, path);
+
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
+       newblock = split - ee_block + ext4_ext_pblock(ex);
+
+       BUG_ON(split < ee_block || split >= (ee_block + ee_len));
+
+       err = ext4_ext_get_access(handle, inode, path + depth);
+       if (err)
+               goto out;
+
+       if (split == ee_block) {
+               /*
+                * case b: block @split is the block that the extent begins with
+                * then we just change the state of the extent, and splitting
+                * is not needed.
+                */
+               if (split_flag & EXT4_EXT_MARK_UNINIT2)
+                       ext4_ext_mark_uninitialized(ex);
+               else
+                       ext4_ext_mark_initialized(ex);
+
+               if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
+                       ext4_ext_try_to_merge(inode, path, ex);
+
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               goto out;
+       }
+
+       /* case a */
+       memcpy(&orig_ex, ex, sizeof(orig_ex));
+       ex->ee_len = cpu_to_le16(split - ee_block);
+       if (split_flag & EXT4_EXT_MARK_UNINIT1)
+               ext4_ext_mark_uninitialized(ex);
+
+       /*
+        * path may lead to new leaf, not to original leaf any more
+        * after ext4_ext_insert_extent() returns,
+        */
+       err = ext4_ext_dirty(handle, inode, path + depth);
+       if (err)
+               goto fix_extent_len;
+
+       ex2 = &newex;
+       ex2->ee_block = cpu_to_le32(split);
+       ex2->ee_len   = cpu_to_le16(ee_len - (split - ee_block));
+       ext4_ext_store_pblock(ex2, newblock);
+       if (split_flag & EXT4_EXT_MARK_UNINIT2)
+               ext4_ext_mark_uninitialized(ex2);
+
+       err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+       if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+               err = ext4_ext_zeroout(inode, &orig_ex);
+               if (err)
+                       goto fix_extent_len;
+               /* update the extent length and mark as initialized */
+               ex->ee_len = cpu_to_le32(ee_len);
+               ext4_ext_try_to_merge(inode, path, ex);
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               goto out;
+       } else if (err)
+               goto fix_extent_len;
+
+out:
+       ext4_ext_show_leaf(inode, path);
+       return err;
+
+fix_extent_len:
+       ex->ee_len = orig_ex.ee_len;
+       ext4_ext_dirty(handle, inode, path + depth);
+       return err;
+}
+
+/*
+ * ext4_split_extents() splits an extent and mark extent which is covered
+ * by @map as split_flags indicates
+ *
+ * It may result in splitting the extent into multiple extents (upto three)
+ * There are three possibilities:
+ *   a> There is no split required
+ *   b> Splits in two extents: Split is happening at either end of the extent
+ *   c> Splits in three extents: Somone is splitting in middle of the extent
+ *
+ */
+static int ext4_split_extent(handle_t *handle,
+                             struct inode *inode,
+                             struct ext4_ext_path *path,
+                             struct ext4_map_blocks *map,
+                             int split_flag,
+                             int flags)
+{
+       ext4_lblk_t ee_block;
+       struct ext4_extent *ex;
+       unsigned int ee_len, depth;
+       int err = 0;
+       int uninitialized;
+       int split_flag1, flags1;
+
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
+       uninitialized = ext4_ext_is_uninitialized(ex);
+
+       if (map->m_lblk + map->m_len < ee_block + ee_len) {
+               split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+                             EXT4_EXT_MAY_ZEROOUT : 0;
+               flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
+               if (uninitialized)
+                       split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
+                                      EXT4_EXT_MARK_UNINIT2;
+               err = ext4_split_extent_at(handle, inode, path,
+                               map->m_lblk + map->m_len, split_flag1, flags1);
+       }
+
+       ext4_ext_drop_refs(path);
+       path = ext4_ext_find_extent(inode, map->m_lblk, path);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+
+       if (map->m_lblk >= ee_block) {
+               split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+                             EXT4_EXT_MAY_ZEROOUT : 0;
+               if (uninitialized)
+                       split_flag1 |= EXT4_EXT_MARK_UNINIT1;
+               if (split_flag & EXT4_EXT_MARK_UNINIT2)
+                       split_flag1 |= EXT4_EXT_MARK_UNINIT2;
+               err = ext4_split_extent_at(handle, inode, path,
+                               map->m_lblk, split_flag1, flags);
+               if (err)
+                       goto out;
+       }
+
+       ext4_ext_show_leaf(inode, path);
+out:
+       return err ? err : map->m_len;
+}
+
 #define EXT4_EXT_ZERO_LEN 7
 /*
  * This function is called by ext4_ext_map_blocks() if someone tries to write
  * to an uninitialized extent. It may result in splitting the uninitialized
- * extent into multiple extents (upto three - one initialized and two
+ * extent into multiple extents (up to three - one initialized and two
  * uninitialized).
  * There are three possibilities:
  *   a> There is no split required: Entire extent should be initialized
@@ -2541,17 +2757,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                           struct ext4_map_blocks *map,
                                           struct ext4_ext_path *path)
 {
-       struct ext4_extent *ex, newex, orig_ex;
-       struct ext4_extent *ex1 = NULL;
-       struct ext4_extent *ex2 = NULL;
-       struct ext4_extent *ex3 = NULL;
-       struct ext4_extent_header *eh;
+       struct ext4_map_blocks split_map;
+       struct ext4_extent zero_ex;
+       struct ext4_extent *ex;
        ext4_lblk_t ee_block, eof_block;
        unsigned int allocated, ee_len, depth;
-       ext4_fsblk_t newblock;
        int err = 0;
-       int ret = 0;
-       int may_zeroout;
+       int split_flag = 0;
 
        ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
                "block %llu, max_blocks %u\n", inode->i_ino,
@@ -2563,280 +2775,86 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                eof_block = map->m_lblk + map->m_len;
 
        depth = ext_depth(inode);
-       eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
        allocated = ee_len - (map->m_lblk - ee_block);
-       newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
-
-       ex2 = ex;
-       orig_ex.ee_block = ex->ee_block;
-       orig_ex.ee_len   = cpu_to_le16(ee_len);
-       ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
 
+       WARN_ON(map->m_lblk < ee_block);
        /*
         * It is safe to convert extent to initialized via explicit
         * zeroout only if extent is fully insde i_size or new_size.
         */
-       may_zeroout = ee_block + ee_len <= eof_block;
+       split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
 
-       err = ext4_ext_get_access(handle, inode, path + depth);
-       if (err)
-               goto out;
        /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-       if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
+       if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
+           (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+               err = ext4_ext_zeroout(inode, ex);
                if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zeroed the full extent */
-               return allocated;
-       }
-
-       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
-       if (map->m_lblk > ee_block) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * for sanity, update the length of the ex2 extent before
-        * we insert ex3, if ex1 is NULL. This is to avoid temporary
-        * overlap of blocks.
-        */
-       if (!ex1 && allocated > map->m_len)
-               ex2->ee_len = cpu_to_le16(map->m_len);
-       /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > map->m_len) {
-               unsigned int newdepth;
-               /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-               if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
-                       /*
-                        * map->m_lblk == ee_block is handled by the zerouout
-                        * at the beginning.
-                        * Mark first half uninitialized.
-                        * Mark second half initialized and zero out the
-                        * initialized extent
-                        */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = cpu_to_le16(ee_len - allocated);
-                       ext4_ext_mark_uninitialized(ex);
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-
-                       ex3 = &newex;
-                       ex3->ee_block = cpu_to_le32(map->m_lblk);
-                       ext4_ext_store_pblock(ex3, newblock);
-                       ex3->ee_len = cpu_to_le16(allocated);
-                       err = ext4_ext_insert_extent(handle, inode, path,
-                                                       ex3, 0);
-                       if (err == -ENOSPC) {
-                               err =  ext4_ext_zeroout(inode, &orig_ex);
-                               if (err)
-                                       goto fix_extent_len;
-                               ex->ee_block = orig_ex.ee_block;
-                               ex->ee_len   = orig_ex.ee_len;
-                               ext4_ext_store_pblock(ex,
-                                       ext4_ext_pblock(&orig_ex));
-                               ext4_ext_dirty(handle, inode, path + depth);
-                               /* blocks available from map->m_lblk */
-                               return allocated;
-
-                       } else if (err)
-                               goto fix_extent_len;
-
-                       /*
-                        * We need to zero out the second half because
-                        * an fallocate request can update file size and
-                        * converting the second half to initialized extent
-                        * implies that we can leak some junk data to user
-                        * space.
-                        */
-                       err =  ext4_ext_zeroout(inode, ex3);
-                       if (err) {
-                               /*
-                                * We should actually mark the
-                                * second half as uninit and return error
-                                * Insert would have changed the extent
-                                */
-                               depth = ext_depth(inode);
-                               ext4_ext_drop_refs(path);
-                               path = ext4_ext_find_extent(inode, map->m_lblk,
-                                                           path);
-                               if (IS_ERR(path)) {
-                                       err = PTR_ERR(path);
-                                       return err;
-                               }
-                               /* get the second half extent details */
-                               ex = path[depth].p_ext;
-                               err = ext4_ext_get_access(handle, inode,
-                                                               path + depth);
-                               if (err)
-                                       return err;
-                               ext4_ext_mark_uninitialized(ex);
-                               ext4_ext_dirty(handle, inode, path + depth);
-                               return err;
-                       }
-
-                       /* zeroed the second half */
-                       return allocated;
-               }
-               ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
-               ext4_ext_store_pblock(ex3, newblock + map->m_len);
-               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
-               ext4_ext_mark_uninitialized(ex3);
-               err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-               if (err == -ENOSPC && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zeroed the full extent */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-
-               } else if (err)
-                       goto fix_extent_len;
-               /*
-                * The depth, and hence eh & ex might change
-                * as part of the insert above.
-                */
-               newdepth = ext_depth(inode);
-               /*
-                * update the extent length after successful insert of the
-                * split extent
-                */
-               ee_len -= ext4_ext_get_actual_len(ex3);
-               orig_ex.ee_len = cpu_to_le16(ee_len);
-               may_zeroout = ee_block + ee_len <= eof_block;
-
-               depth = newdepth;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
                        goto out;
-               }
-               eh = path[depth].p_hdr;
-               ex = path[depth].p_ext;
-               if (ex2 != &newex)
-                       ex2 = ex;
 
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
                        goto out;
-
-               allocated = map->m_len;
-
-               /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
-                * to insert a extent in the middle zerout directly
-                * otherwise give the extent a chance to merge to left
-                */
-               if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-                       map->m_lblk != ee_block && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zero out the first half */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-               }
-       }
-       /*
-        * If there was a change of depth as part of the
-        * insertion of ex3 above, we need to update the length
-        * of the ex1 extent again here
-        */
-       if (ex1 && ex1 != ex) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
-       ex2->ee_block = cpu_to_le32(map->m_lblk);
-       ext4_ext_store_pblock(ex2, newblock);
-       ex2->ee_len = cpu_to_le16(allocated);
-       if (ex2 != ex)
-               goto insert;
-       /*
-        * New (initialized) extent starts from the first block
-        * in the current extent. i.e., ex2 == ex
-        * We have to see if it can be merged with the extent
-        * on the left.
-        */
-       if (ex2 > EXT_FIRST_EXTENT(eh)) {
-               /*
-                * To merge left, pass "ex2 - 1" to try_to_merge(),
-                * since it merges towards right _only_.
-                */
-               ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
-               if (ret) {
-                       err = ext4_ext_correct_indexes(handle, inode, path);
-                       if (err)
-                               goto out;
-                       depth = ext_depth(inode);
-                       ex2--;
-               }
+               ext4_ext_mark_initialized(ex);
+               ext4_ext_try_to_merge(inode, path, ex);
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               goto out;
        }
+
        /*
-        * Try to Merge towards right. This might be required
-        * only when the whole extent is being written to.
-        * i.e. ex2 == ex and ex3 == NULL.
+        * four cases:
+        * 1. split the extent into three extents.
+        * 2. split the extent into two extents, zeroout the first half.
+        * 3. split the extent into two extents, zeroout the second half.
+        * 4. split the extent into two extents with out zeroout.
         */
-       if (!ex3) {
-               ret = ext4_ext_try_to_merge(inode, path, ex2);
-               if (ret) {
-                       err = ext4_ext_correct_indexes(handle, inode, path);
+       split_map.m_lblk = map->m_lblk;
+       split_map.m_len = map->m_len;
+
+       if (allocated > map->m_len) {
+               if (allocated <= EXT4_EXT_ZERO_LEN &&
+                   (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+                       /* case 3 */
+                       zero_ex.ee_block =
+                                        cpu_to_le32(map->m_lblk);
+                       zero_ex.ee_len = cpu_to_le16(allocated);
+                       ext4_ext_store_pblock(&zero_ex,
+                               ext4_ext_pblock(ex) + map->m_lblk - ee_block);
+                       err = ext4_ext_zeroout(inode, &zero_ex);
                        if (err)
                                goto out;
+                       split_map.m_lblk = map->m_lblk;
+                       split_map.m_len = allocated;
+               } else if ((map->m_lblk - ee_block + map->m_len <
+                          EXT4_EXT_ZERO_LEN) &&
+                          (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+                       /* case 2 */
+                       if (map->m_lblk != ee_block) {
+                               zero_ex.ee_block = ex->ee_block;
+                               zero_ex.ee_len = cpu_to_le16(map->m_lblk -
+                                                       ee_block);
+                               ext4_ext_store_pblock(&zero_ex,
+                                                     ext4_ext_pblock(ex));
+                               err = ext4_ext_zeroout(inode, &zero_ex);
+                               if (err)
+                                       goto out;
+                       }
+
+                       split_map.m_lblk = ee_block;
+                       split_map.m_len = map->m_lblk - ee_block + map->m_len;
+                       allocated = map->m_len;
                }
        }
-       /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + depth);
-       goto out;
-insert:
-       err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-       if (err == -ENOSPC && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
-               if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zero out the first half */
-               return allocated;
-       } else if (err)
-               goto fix_extent_len;
+
+       allocated = ext4_split_extent(handle, inode, path,
+                                      &split_map, split_flag, 0);
+       if (allocated < 0)
+               err = allocated;
+
 out:
-       ext4_ext_show_leaf(inode, path);
        return err ? err : allocated;
-
-fix_extent_len:
-       ex->ee_block = orig_ex.ee_block;
-       ex->ee_len   = orig_ex.ee_len;
-       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-       ext4_ext_mark_uninitialized(ex);
-       ext4_ext_dirty(handle, inode, path + depth);
-       return err;
 }
 
 /*
@@ -2844,7 +2862,7 @@ fix_extent_len:
  * ext4_get_blocks_dio_write() when DIO to write
  * to an uninitialized extent.
  *
- * Writing to an uninitized extent may result in splitting the uninitialized
+ * Writing to an uninitialized extent may result in splitting the uninitialized
  * extent into multiple /initialized uninitialized extents (up to three)
  * There are three possibilities:
  *   a> There is no split required: Entire extent should be uninitialized
@@ -2867,15 +2885,11 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                                        struct ext4_ext_path *path,
                                        int flags)
 {
-       struct ext4_extent *ex, newex, orig_ex;
-       struct ext4_extent *ex1 = NULL;
-       struct ext4_extent *ex2 = NULL;
-       struct ext4_extent *ex3 = NULL;
-       ext4_lblk_t ee_block, eof_block;
-       unsigned int allocated, ee_len, depth;
-       ext4_fsblk_t newblock;
-       int err = 0;
-       int may_zeroout;
+       ext4_lblk_t eof_block;
+       ext4_lblk_t ee_block;
+       struct ext4_extent *ex;
+       unsigned int ee_len;
+       int split_flag = 0, depth;
 
        ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
                "block %llu, max_blocks %u\n", inode->i_ino,
@@ -2885,156 +2899,22 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                inode->i_sb->s_blocksize_bits;
        if (eof_block < map->m_lblk + map->m_len)
                eof_block = map->m_lblk + map->m_len;
-
-       depth = ext_depth(inode);
-       ex = path[depth].p_ext;
-       ee_block = le32_to_cpu(ex->ee_block);
-       ee_len = ext4_ext_get_actual_len(ex);
-       allocated = ee_len - (map->m_lblk - ee_block);
-       newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
-
-       ex2 = ex;
-       orig_ex.ee_block = ex->ee_block;
-       orig_ex.ee_len   = cpu_to_le16(ee_len);
-       ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
-
        /*
         * It is safe to convert extent to initialized via explicit
         * zeroout only if extent is fully insde i_size or new_size.
         */
-       may_zeroout = ee_block + ee_len <= eof_block;
-
-       /*
-        * If the uninitialized extent begins at the same logical
-        * block where the write begins, and the write completely
-        * covers the extent, then we don't need to split it.
-        */
-       if ((map->m_lblk == ee_block) && (allocated <= map->m_len))
-               return allocated;
-
-       err = ext4_ext_get_access(handle, inode, path + depth);
-       if (err)
-               goto out;
-       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
-       if (map->m_lblk > ee_block) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * for sanity, update the length of the ex2 extent before
-        * we insert ex3, if ex1 is NULL. This is to avoid temporary
-        * overlap of blocks.
-        */
-       if (!ex1 && allocated > map->m_len)
-               ex2->ee_len = cpu_to_le16(map->m_len);
-       /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > map->m_len) {
-               unsigned int newdepth;
-               ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
-               ext4_ext_store_pblock(ex3, newblock + map->m_len);
-               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
-               ext4_ext_mark_uninitialized(ex3);
-               err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-               if (err == -ENOSPC && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zeroed the full extent */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-
-               } else if (err)
-                       goto fix_extent_len;
-               /*
-                * The depth, and hence eh & ex might change
-                * as part of the insert above.
-                */
-               newdepth = ext_depth(inode);
-               /*
-                * update the extent length after successful insert of the
-                * split extent
-                */
-               ee_len -= ext4_ext_get_actual_len(ex3);
-               orig_ex.ee_len = cpu_to_le16(ee_len);
-               may_zeroout = ee_block + ee_len <= eof_block;
-
-               depth = newdepth;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
-                       goto out;
-               }
-               ex = path[depth].p_ext;
-               if (ex2 != &newex)
-                       ex2 = ex;
-
-               err = ext4_ext_get_access(handle, inode, path + depth);
-               if (err)
-                       goto out;
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
 
-               allocated = map->m_len;
-       }
-       /*
-        * If there was a change of depth as part of the
-        * insertion of ex3 above, we need to update the length
-        * of the ex1 extent again here
-        */
-       if (ex1 && ex1 != ex) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written
-        * using direct I/O, uninitialised still.
-        */
-       ex2->ee_block = cpu_to_le32(map->m_lblk);
-       ext4_ext_store_pblock(ex2, newblock);
-       ex2->ee_len = cpu_to_le16(allocated);
-       ext4_ext_mark_uninitialized(ex2);
-       if (ex2 != ex)
-               goto insert;
-       /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + depth);
-       ext_debug("out here\n");
-       goto out;
-insert:
-       err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-       if (err == -ENOSPC && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
-               if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zero out the first half */
-               return allocated;
-       } else if (err)
-               goto fix_extent_len;
-out:
-       ext4_ext_show_leaf(inode, path);
-       return err ? err : allocated;
+       split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
+       split_flag |= EXT4_EXT_MARK_UNINIT2;
 
-fix_extent_len:
-       ex->ee_block = orig_ex.ee_block;
-       ex->ee_len   = orig_ex.ee_len;
-       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-       ext4_ext_mark_uninitialized(ex);
-       ext4_ext_dirty(handle, inode, path + depth);
-       return err;
+       flags |= EXT4_GET_BLOCKS_PRE_IO;
+       return ext4_split_extent(handle, inode, path, map, split_flag, flags);
 }
+
 static int ext4_convert_unwritten_extents_endio(handle_t *handle,
                                              struct inode *inode,
                                              struct ext4_ext_path *path)
@@ -3043,46 +2923,27 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
        struct ext4_extent_header *eh;
        int depth;
        int err = 0;
-       int ret = 0;
 
        depth = ext_depth(inode);
        eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
 
+       ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
+               "block %llu, max_blocks %u\n", inode->i_ino,
+               (unsigned long long)le32_to_cpu(ex->ee_block),
+               ext4_ext_get_actual_len(ex));
+
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
                goto out;
        /* first mark the extent as initialized */
        ext4_ext_mark_initialized(ex);
 
-       /*
-        * We have to see if it can be merged with the extent
-        * on the left.
-        */
-       if (ex > EXT_FIRST_EXTENT(eh)) {
-               /*
-                * To merge left, pass "ex - 1" to try_to_merge(),
-                * since it merges towards right _only_.
-                */
-               ret = ext4_ext_try_to_merge(inode, path, ex - 1);
-               if (ret) {
-                       err = ext4_ext_correct_indexes(handle, inode, path);
-                       if (err)
-                               goto out;
-                       depth = ext_depth(inode);
-                       ex--;
-               }
-       }
-       /*
-        * Try to Merge towards right.
+       /* note: ext4_ext_correct_indexes() isn't needed here because
+        * borders are not changed
         */
-       ret = ext4_ext_try_to_merge(inode, path, ex);
-       if (ret) {
-               err = ext4_ext_correct_indexes(handle, inode, path);
-               if (err)
-                       goto out;
-               depth = ext_depth(inode);
-       }
+       ext4_ext_try_to_merge(inode, path, ex);
+
        /* Mark modified extent as dirty */
        err = ext4_ext_dirty(handle, inode, path + depth);
 out:
@@ -3108,14 +2969,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
 {
        int i, depth;
        struct ext4_extent_header *eh;
-       struct ext4_extent *ex, *last_ex;
+       struct ext4_extent *last_ex;
 
        if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
                return 0;
 
        depth = ext_depth(inode);
        eh = path[depth].p_hdr;
-       ex = path[depth].p_ext;
 
        if (unlikely(!eh->eh_entries)) {
                EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
@@ -3171,7 +3031,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                                                   path, flags);
                /*
                 * Flag the inode(non aio case) or end_io struct (aio case)
-                * that this IO needs to convertion to written when IO is
+                * that this IO needs to conversion to written when IO is
                 * completed
                 */
                if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
@@ -3295,9 +3155,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        struct ext4_map_blocks *map, int flags)
 {
        struct ext4_ext_path *path = NULL;
-       struct ext4_extent_header *eh;
        struct ext4_extent newex, *ex;
-       ext4_fsblk_t newblock;
+       ext4_fsblk_t newblock = 0;
        int err = 0, depth, ret;
        unsigned int allocated = 0;
        struct ext4_allocation_request ar;
@@ -3305,6 +3164,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 
        ext_debug("blocks %u/%u requested for inode %lu\n",
                  map->m_lblk, map->m_len, inode->i_ino);
+       trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
        /* check in cache */
        if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
@@ -3352,7 +3212,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                err = -EIO;
                goto out2;
        }
-       eh = path[depth].p_hdr;
 
        ex = path[depth].p_ext;
        if (ex) {
@@ -3458,10 +3317,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                ext4_ext_mark_uninitialized(&newex);
                /*
                 * io_end structure was created for every IO write to an
-                * uninitialized extent. To avoid unecessary conversion,
+                * uninitialized extent. To avoid unnecessary conversion,
                 * here we flag the IO that really needs the conversion.
                 * For non asycn direct IO case, flag the inode state
-                * that we need to perform convertion when IO is done.
+                * that we need to perform conversion when IO is done.
                 */
                if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
                        if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
@@ -3485,7 +3344,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                /* not a good idea to call discard here directly,
                 * but otherwise we'd need to call it every free() */
                ext4_discard_preallocations(inode);
-               ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
+               ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
                                 ext4_ext_get_actual_len(&newex), 0);
                goto out2;
        }
@@ -3525,6 +3384,8 @@ out2:
                ext4_ext_drop_refs(path);
                kfree(path);
        }
+       trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+               newblock, map->m_len, err ? err : allocated);
        return err ? err : allocated;
 }
 
@@ -3658,6 +3519,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
 
+       trace_ext4_fallocate_enter(inode, offset, len, mode);
        map.m_lblk = offset >> blkbits;
        /*
         * We can't just convert len to max_blocks because
@@ -3673,6 +3535,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        ret = inode_newsize_ok(inode, (len + offset));
        if (ret) {
                mutex_unlock(&inode->i_mutex);
+               trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
                return ret;
        }
 retry:
@@ -3717,6 +3580,8 @@ retry:
                goto retry;
        }
        mutex_unlock(&inode->i_mutex);
+       trace_ext4_fallocate_exit(inode, offset, max_blocks,
+                               ret > 0 ? ret2 : ret);
        return ret > 0 ? ret2 : ret;
 }
 
@@ -3775,6 +3640,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
        }
        return ret > 0 ? ret2 : ret;
 }
+
 /*
  * Callback function called for each extent to gather FIEMAP information.
  */
@@ -3782,38 +3648,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
                       struct ext4_ext_cache *newex, struct ext4_extent *ex,
                       void *data)
 {
-       struct fiemap_extent_info *fieinfo = data;
-       unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
        __u64   logical;
        __u64   physical;
        __u64   length;
+       loff_t  size;
        __u32   flags = 0;
-       int     error;
+       int             ret = 0;
+       struct fiemap_extent_info *fieinfo = data;
+       unsigned char blksize_bits;
 
-       logical =  (__u64)newex->ec_block << blksize_bits;
+       blksize_bits = inode->i_sb->s_blocksize_bits;
+       logical = (__u64)newex->ec_block << blksize_bits;
 
        if (newex->ec_start == 0) {
-               pgoff_t offset;
-               struct page *page;
+               /*
+                * No extent in extent-tree contains block @newex->ec_start,
+                * then the block may stay in 1)a hole or 2)delayed-extent.
+                *
+                * Holes or delayed-extents are processed as follows.
+                * 1. lookup dirty pages with specified range in pagecache.
+                *    If no page is got, then there is no delayed-extent and
+                *    return with EXT_CONTINUE.
+                * 2. find the 1st mapped buffer,
+                * 3. check if the mapped buffer is both in the request range
+                *    and a delayed buffer. If not, there is no delayed-extent,
+                *    then return.
+                * 4. a delayed-extent is found, the extent will be collected.
+                */
+               ext4_lblk_t     end = 0;
+               pgoff_t         last_offset;
+               pgoff_t         offset;
+               pgoff_t         index;
+               struct page     **pages = NULL;
                struct buffer_head *bh = NULL;
+               struct buffer_head *head = NULL;
+               unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
+
+               pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
+               if (pages == NULL)
+                       return -ENOMEM;
 
                offset = logical >> PAGE_SHIFT;
-               page = find_get_page(inode->i_mapping, offset);
-               if (!page || !page_has_buffers(page))
-                       return EXT_CONTINUE;
+repeat:
+               last_offset = offset;
+               head = NULL;
+               ret = find_get_pages_tag(inode->i_mapping, &offset,
+                                       PAGECACHE_TAG_DIRTY, nr_pages, pages);
+
+               if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+                       /* First time, try to find a mapped buffer. */
+                       if (ret == 0) {
+out:
+                               for (index = 0; index < ret; index++)
+                                       page_cache_release(pages[index]);
+                               /* just a hole. */
+                               kfree(pages);
+                               return EXT_CONTINUE;
+                       }
 
-               bh = page_buffers(page);
+                       /* Try to find the 1st mapped buffer. */
+                       end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
+                                 blksize_bits;
+                       if (!page_has_buffers(pages[0]))
+                               goto out;
+                       head = page_buffers(pages[0]);
+                       if (!head)
+                               goto out;
 
-               if (!bh)
-                       return EXT_CONTINUE;
+                       bh = head;
+                       do {
+                               if (buffer_mapped(bh)) {
+                                       /* get the 1st mapped buffer. */
+                                       if (end > newex->ec_block +
+                                               newex->ec_len)
+                                               /* The buffer is out of
+                                                * the request range.
+                                                */
+                                               goto out;
+                                       goto found_mapped_buffer;
+                               }
+                               bh = bh->b_this_page;
+                               end++;
+                       } while (bh != head);
 
-               if (buffer_delay(bh)) {
-                       flags |= FIEMAP_EXTENT_DELALLOC;
-                       page_cache_release(page);
+                       /* No mapped buffer found. */
+                       goto out;
                } else {
-                       page_cache_release(page);
-                       return EXT_CONTINUE;
+                       /*Find contiguous delayed buffers. */
+                       if (ret > 0 && pages[0]->index == last_offset)
+                               head = page_buffers(pages[0]);
+                       bh = head;
+               }
+
+found_mapped_buffer:
+               if (bh != NULL && buffer_delay(bh)) {
+                       /* 1st or contiguous delayed buffer found. */
+                       if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+                               /*
+                                * 1st delayed buffer found, record
+                                * the start of extent.
+                                */
+                               flags |= FIEMAP_EXTENT_DELALLOC;
+                               newex->ec_block = end;
+                               logical = (__u64)end << blksize_bits;
+                       }
+                       /* Find contiguous delayed buffers. */
+                       do {
+                               if (!buffer_delay(bh))
+                                       goto found_delayed_extent;
+                               bh = bh->b_this_page;
+                               end++;
+                       } while (bh != head);
+
+                       for (index = 1; index < ret; index++) {
+                               if (!page_has_buffers(pages[index])) {
+                                       bh = NULL;
+                                       break;
+                               }
+                               head = page_buffers(pages[index]);
+                               if (!head) {
+                                       bh = NULL;
+                                       break;
+                               }
+                               if (pages[index]->index !=
+                                       pages[0]->index + index) {
+                                       /* Blocks are not contiguous. */
+                                       bh = NULL;
+                                       break;
+                               }
+                               bh = head;
+                               do {
+                                       if (!buffer_delay(bh))
+                                               /* Delayed-extent ends. */
+                                               goto found_delayed_extent;
+                                       bh = bh->b_this_page;
+                                       end++;
+                               } while (bh != head);
+                       }
+               } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
+                       /* a hole found. */
+                       goto out;
+
+found_delayed_extent:
+               newex->ec_len = min(end - newex->ec_block,
+                                               (ext4_lblk_t)EXT_INIT_MAX_LEN);
+               if (ret == nr_pages && bh != NULL &&
+                       newex->ec_len < EXT_INIT_MAX_LEN &&
+                       buffer_delay(bh)) {
+                       /* Have not collected an extent and continue. */
+                       for (index = 0; index < ret; index++)
+                               page_cache_release(pages[index]);
+                       goto repeat;
                }
+
+               for (index = 0; index < ret; index++)
+                       page_cache_release(pages[index]);
+               kfree(pages);
        }
 
        physical = (__u64)newex->ec_start << blksize_bits;
@@ -3822,32 +3812,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
        if (ex && ext4_ext_is_uninitialized(ex))
                flags |= FIEMAP_EXTENT_UNWRITTEN;
 
-       /*
-        * If this extent reaches EXT_MAX_BLOCK, it must be last.
-        *
-        * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
-        * this also indicates no more allocated blocks.
-        *
-        * XXX this might miss a single-block extent at EXT_MAX_BLOCK
-        */
-       if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
-           newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
-               loff_t size = i_size_read(inode);
-               loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
-
+       size = i_size_read(inode);
+       if (logical + length >= size)
                flags |= FIEMAP_EXTENT_LAST;
-               if ((flags & FIEMAP_EXTENT_DELALLOC) &&
-                   logical+length > size)
-                       length = (size - logical + bs - 1) & ~(bs-1);
-       }
 
-       error = fiemap_fill_next_extent(fieinfo, logical, physical,
+       ret = fiemap_fill_next_extent(fieinfo, logical, physical,
                                        length, flags);
-       if (error < 0)
-               return error;
-       if (error == 1)
+       if (ret < 0)
+               return ret;
+       if (ret == 1)
                return EXT_BREAK;
-
        return EXT_CONTINUE;
 }