ext4: enable "punch hole" functionality
[pandora-kernel.git] / fs / ext4 / extents.c
index dd2cb50..88ff3a7 100644 (file)
 
 #include <trace/events/ext4.h>
 
+static int ext4_split_extent(handle_t *handle,
+                               struct inode *inode,
+                               struct ext4_ext_path *path,
+                               struct ext4_map_blocks *map,
+                               int split_flag,
+                               int flags);
+
 static int ext4_ext_truncate_extend_restart(handle_t *handle,
                                            struct inode *inode,
                                            int needed)
@@ -192,12 +199,13 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 static ext4_fsblk_t
 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
                        struct ext4_ext_path *path,
-                       struct ext4_extent *ex, int *err)
+                       struct ext4_extent *ex, int *err, unsigned int flags)
 {
        ext4_fsblk_t goal, newblock;
 
        goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
-       newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err);
+       newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
+                                       NULL, err);
        return newblock;
 }
 
@@ -792,8 +800,9 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
  * - initializes subtree
  */
 static int ext4_ext_split(handle_t *handle, struct inode *inode,
-                               struct ext4_ext_path *path,
-                               struct ext4_extent *newext, int at)
+                         unsigned int flags,
+                         struct ext4_ext_path *path,
+                         struct ext4_extent *newext, int at)
 {
        struct buffer_head *bh = NULL;
        int depth = ext_depth(inode);
@@ -847,7 +856,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
        ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
        for (a = 0; a < depth - at; a++) {
                newblock = ext4_ext_new_meta_block(handle, inode, path,
-                                                  newext, &err);
+                                                  newext, &err, flags);
                if (newblock == 0)
                        goto cleanup;
                ablocks[a] = newblock;
@@ -1056,8 +1065,9 @@ cleanup:
  *   just created block
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
-                                       struct ext4_ext_path *path,
-                                       struct ext4_extent *newext)
+                                unsigned int flags,
+                                struct ext4_ext_path *path,
+                                struct ext4_extent *newext)
 {
        struct ext4_ext_path *curp = path;
        struct ext4_extent_header *neh;
@@ -1065,7 +1075,8 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
        ext4_fsblk_t newblock;
        int err = 0;
 
-       newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
+       newblock = ext4_ext_new_meta_block(handle, inode, path,
+               newext, &err, flags);
        if (newblock == 0)
                return err;
 
@@ -1140,8 +1151,9 @@ out:
  * if no free index is found, then it requests in-depth growing.
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
-                                       struct ext4_ext_path *path,
-                                       struct ext4_extent *newext)
+                                   unsigned int flags,
+                                   struct ext4_ext_path *path,
+                                   struct ext4_extent *newext)
 {
        struct ext4_ext_path *curp;
        int depth, i, err = 0;
@@ -1161,7 +1173,7 @@ repeat:
        if (EXT_HAS_FREE_INDEX(curp)) {
                /* if we found index with free entry, then use that
                 * entry: create all needed subtree and add new leaf */
-               err = ext4_ext_split(handle, inode, path, newext, i);
+               err = ext4_ext_split(handle, inode, flags, path, newext, i);
                if (err)
                        goto out;
 
@@ -1174,7 +1186,8 @@ repeat:
                        err = PTR_ERR(path);
        } else {
                /* tree is full, time to grow in depth */
-               err = ext4_ext_grow_indepth(handle, inode, path, newext);
+               err = ext4_ext_grow_indepth(handle, inode, flags,
+                                           path, newext);
                if (err)
                        goto out;
 
@@ -1563,7 +1576,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
  * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
  * 1 if they got merged.
  */
-static int ext4_ext_try_to_merge(struct inode *inode,
+static int ext4_ext_try_to_merge_right(struct inode *inode,
                                 struct ext4_ext_path *path,
                                 struct ext4_extent *ex)
 {
@@ -1602,6 +1615,31 @@ static int ext4_ext_try_to_merge(struct inode *inode,
        return merge_done;
 }
 
+/*
+ * This function tries to merge the @ex extent to neighbours in the tree.
+ * return 1 if merge left else 0.
+ */
+static int ext4_ext_try_to_merge(struct inode *inode,
+                                 struct ext4_ext_path *path,
+                                 struct ext4_extent *ex) {
+       struct ext4_extent_header *eh;
+       unsigned int depth;
+       int merge_done = 0;
+       int ret = 0;
+
+       depth = ext_depth(inode);
+       BUG_ON(path[depth].p_hdr == NULL);
+       eh = path[depth].p_hdr;
+
+       if (ex > EXT_FIRST_EXTENT(eh))
+               merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
+
+       if (!merge_done)
+               ret = ext4_ext_try_to_merge_right(inode, path, ex);
+
+       return ret;
+}
+
 /*
  * check if a portion of the "newext" extent overlaps with an
  * existing extent.
@@ -1668,6 +1706,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
        int depth, len, err;
        ext4_lblk_t next;
        unsigned uninitialized = 0;
+       int flags = 0;
 
        if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
                EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1729,7 +1768,7 @@ repeat:
                BUG_ON(npath->p_depth != path->p_depth);
                eh = npath[depth].p_hdr;
                if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
-                       ext_debug("next leaf isnt full(%d)\n",
+                       ext_debug("next leaf isn't full(%d)\n",
                                  le16_to_cpu(eh->eh_entries));
                        path = npath;
                        goto repeat;
@@ -1742,7 +1781,9 @@ repeat:
         * There is no free space in the found leaf.
         * We're gonna add a new leaf in the tree.
         */
-       err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+       if (flag & EXT4_GET_BLOCKS_PUNCH_OUT_EXT)
+               flags = EXT4_MB_USE_ROOT_BLOCKS;
+       err = ext4_ext_create_new_leaf(handle, inode, flags, path, newext);
        if (err)
                goto cleanup;
        depth = ext_depth(inode);
@@ -2003,13 +2044,25 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
 }
 
 /*
+ * ext4_ext_in_cache()
+ * Checks to see if the given block is in the cache.
+ * If it is, the cached extent is stored in the given
+ * cache extent pointer.  If the cached extent is a hole,
+ * this routine should be used instead of
+ * ext4_ext_in_cache if the calling function needs to
+ * know the size of the hole.
+ *
+ * @inode: The files inode
+ * @block: The block to look for in the cache
+ * @ex:    Pointer where the cached extent will be stored
+ *         if it contains block
+ *
  * Return 0 if cache is invalid; 1 if the cache is valid
  */
-static int
-ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
-                       struct ext4_extent *ex)
-{
+static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
+       struct ext4_ext_cache *ex){
        struct ext4_ext_cache *cex;
+       struct ext4_sb_info *sbi;
        int ret = 0;
 
        /*
@@ -2017,25 +2070,59 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
         */
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        cex = &EXT4_I(inode)->i_cached_extent;
+       sbi = EXT4_SB(inode->i_sb);
 
        /* has cache valid data? */
        if (cex->ec_len == 0)
                goto errout;
 
        if (in_range(block, cex->ec_block, cex->ec_len)) {
-               ex->ee_block = cpu_to_le32(cex->ec_block);
-               ext4_ext_store_pblock(ex, cex->ec_start);
-               ex->ee_len = cpu_to_le16(cex->ec_len);
+               memcpy(ex, cex, sizeof(struct ext4_ext_cache));
                ext_debug("%u cached by %u:%u:%llu\n",
                                block,
                                cex->ec_block, cex->ec_len, cex->ec_start);
                ret = 1;
        }
 errout:
+       if (!ret)
+               sbi->extent_cache_misses++;
+       else
+               sbi->extent_cache_hits++;
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
        return ret;
 }
 
+/*
+ * ext4_ext_in_cache()
+ * Checks to see if the given block is in the cache.
+ * If it is, the cached extent is stored in the given
+ * extent pointer.
+ *
+ * @inode: The files inode
+ * @block: The block to look for in the cache
+ * @ex:    Pointer where the cached extent will be stored
+ *         if it contains block
+ *
+ * Return 0 if cache is invalid; 1 if the cache is valid
+ */
+static int
+ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
+                       struct ext4_extent *ex)
+{
+       struct ext4_ext_cache cex;
+       int ret = 0;
+
+       if (ext4_ext_check_cache(inode, block, &cex)) {
+               ex->ee_block = cpu_to_le32(cex.ec_block);
+               ext4_ext_store_pblock(ex, cex.ec_start);
+               ex->ee_len = cpu_to_le16(cex.ec_len);
+               ret = 1;
+       }
+
+       return ret;
+}
+
+
 /*
  * ext4_ext_rm_idx:
  * removes index from the index block.
@@ -2163,8 +2250,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
                ext4_free_blocks(handle, inode, NULL, start, num, flags);
        } else if (from == le32_to_cpu(ex->ee_block)
                   && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
-               printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
-                       from, to, le32_to_cpu(ex->ee_block), ee_len);
+               /* head removal */
+               ext4_lblk_t num;
+               ext4_fsblk_t start;
+
+               num = to - from;
+               start = ext4_ext_pblock(ex);
+
+               ext_debug("free first %u blocks starting %llu\n", num, start);
+               ext4_free_blocks(handle, inode, 0, start, num, flags);
+
        } else {
                printk(KERN_INFO "strange request: removal(2) "
                                "%u-%u from %u:%u\n",
@@ -2173,9 +2268,22 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
        return 0;
 }
 
+
+/*
+ * ext4_ext_rm_leaf() Removes the extents associated with the
+ * blocks appearing between "start" and "end", and splits the extents
+ * if "start" and "end" appear in the same extent
+ *
+ * @handle: The journal handle
+ * @inode:  The files inode
+ * @path:   The path to the leaf
+ * @start:  The first block to remove
+ * @end:   The last block to remove
+ */
 static int
 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
-               struct ext4_ext_path *path, ext4_lblk_t start)
+               struct ext4_ext_path *path, ext4_lblk_t start,
+               ext4_lblk_t end)
 {
        int err = 0, correct_index = 0;
        int depth = ext_depth(inode), credits;
@@ -2186,6 +2294,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
        unsigned short ex_ee_len;
        unsigned uninitialized = 0;
        struct ext4_extent *ex;
+       struct ext4_map_blocks map;
 
        /* the header must be checked already in ext4_ext_remove_space() */
        ext_debug("truncate since %u in leaf\n", start);
@@ -2215,31 +2324,95 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                path[depth].p_ext = ex;
 
                a = ex_ee_block > start ? ex_ee_block : start;
-               b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ?
-                       ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK;
+               b = ex_ee_block+ex_ee_len - 1 < end ?
+                       ex_ee_block+ex_ee_len - 1 : end;
 
                ext_debug("  border %u:%u\n", a, b);
 
-               if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) {
-                       block = 0;
-                       num = 0;
-                       BUG();
+               /* If this extent is beyond the end of the hole, skip it */
+               if (end <= ex_ee_block) {
+                       ex--;
+                       ex_ee_block = le32_to_cpu(ex->ee_block);
+                       ex_ee_len = ext4_ext_get_actual_len(ex);
+                       continue;
+               } else if (a != ex_ee_block &&
+                       b != ex_ee_block + ex_ee_len - 1) {
+                       /*
+                        * If this is a truncate, then this condition should
+                        * never happen because at least one of the end points
+                        * needs to be on the edge of the extent.
+                        */
+                       if (end == EXT_MAX_BLOCK) {
+                               ext_debug("  bad truncate %u:%u\n",
+                                               start, end);
+                               block = 0;
+                               num = 0;
+                               err = -EIO;
+                               goto out;
+                       }
+                       /*
+                        * else this is a hole punch, so the extent needs to
+                        * be split since neither edge of the hole is on the
+                        * extent edge
+                        */
+                       else{
+                               map.m_pblk = ext4_ext_pblock(ex);
+                               map.m_lblk = ex_ee_block;
+                               map.m_len = b - ex_ee_block;
+
+                               err = ext4_split_extent(handle,
+                                       inode, path, &map, 0,
+                                       EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
+                                       EXT4_GET_BLOCKS_PRE_IO);
+
+                               if (err < 0)
+                                       goto out;
+
+                               ex_ee_len = ext4_ext_get_actual_len(ex);
+
+                               b = ex_ee_block+ex_ee_len - 1 < end ?
+                                       ex_ee_block+ex_ee_len - 1 : end;
+
+                               /* Then remove tail of this extent */
+                               block = ex_ee_block;
+                               num = a - block;
+                       }
                } else if (a != ex_ee_block) {
                        /* remove tail of the extent */
                        block = ex_ee_block;
                        num = a - block;
                } else if (b != ex_ee_block + ex_ee_len - 1) {
                        /* remove head of the extent */
-                       block = a;
-                       num = b - a;
-                       /* there is no "make a hole" API yet */
-                       BUG();
+                       block = b;
+                       num =  ex_ee_block + ex_ee_len - b;
+
+                       /*
+                        * If this is a truncate, this condition
+                        * should never happen
+                        */
+                       if (end == EXT_MAX_BLOCK) {
+                               ext_debug("  bad truncate %u:%u\n",
+                                       start, end);
+                               err = -EIO;
+                               goto out;
+                       }
                } else {
                        /* remove whole extent: excellent! */
                        block = ex_ee_block;
                        num = 0;
-                       BUG_ON(a != ex_ee_block);
-                       BUG_ON(b != ex_ee_block + ex_ee_len - 1);
+                       if (a != ex_ee_block) {
+                               ext_debug("  bad truncate %u:%u\n",
+                                       start, end);
+                               err = -EIO;
+                               goto out;
+                       }
+
+                       if (b != ex_ee_block + ex_ee_len - 1) {
+                               ext_debug("  bad truncate %u:%u\n",
+                                       start, end);
+                               err = -EIO;
+                               goto out;
+                       }
                }
 
                /*
@@ -2270,7 +2443,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                if (num == 0) {
                        /* this extent is removed; mark slot entirely unused */
                        ext4_ext_store_pblock(ex, 0);
-                       le16_add_cpu(&eh->eh_entries, -1);
+               } else if (block != ex_ee_block) {
+                       /*
+                        * If this was a head removal, then we need to update
+                        * the physical block since it is now at a different
+                        * location
+                        */
+                       ext4_ext_store_pblock(ex, ext4_ext_pblock(ex) + (b-a));
                }
 
                ex->ee_block = cpu_to_le32(block);
@@ -2286,6 +2465,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
                if (err)
                        goto out;
 
+               /*
+                * If the extent was completely released,
+                * we need to remove it from the leaf
+                */
+               if (num == 0) {
+                       if (end != EXT_MAX_BLOCK) {
+                               /*
+                                * For hole punching, we need to scoot all the
+                                * extents up when an extent is removed so that
+                                * we dont have blank extents in the middle
+                                */
+                               memmove(ex, ex+1, (EXT_LAST_EXTENT(eh) - ex) *
+                                       sizeof(struct ext4_extent));
+
+                               /* Now get rid of the one at the end */
+                               memset(EXT_LAST_EXTENT(eh), 0,
+                                       sizeof(struct ext4_extent));
+                       }
+                       le16_add_cpu(&eh->eh_entries, -1);
+               }
+
                ext_debug("new extent: %u:%u:%llu\n", block, num,
                                ext4_ext_pblock(ex));
                ex--;
@@ -2326,7 +2526,8 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
        return 1;
 }
 
-static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
+static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
+                               ext4_lblk_t end)
 {
        struct super_block *sb = inode->i_sb;
        int depth = ext_depth(inode);
@@ -2365,7 +2566,8 @@ again:
        while (i >= 0 && err == 0) {
                if (i == depth) {
                        /* this is leaf block */
-                       err = ext4_ext_rm_leaf(handle, inode, path, start);
+                       err = ext4_ext_rm_leaf(handle, inode, path,
+                                       start, end);
                        /* root level has p_bh == NULL, brelse() eats this */
                        brelse(path[i].p_bh);
                        path[i].p_bh = NULL;
@@ -2529,11 +2731,200 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
        return ret;
 }
 
+/*
+ * used by extent splitting.
+ */
+#define EXT4_EXT_MAY_ZEROOUT   0x1  /* safe to zeroout if split fails \
+                                       due to ENOSPC */
+#define EXT4_EXT_MARK_UNINIT1  0x2  /* mark first half uninitialized */
+#define EXT4_EXT_MARK_UNINIT2  0x4  /* mark second half uninitialized */
+
+/*
+ * ext4_split_extent_at() splits an extent at given block.
+ *
+ * @handle: the journal handle
+ * @inode: the file inode
+ * @path: the path to the extent
+ * @split: the logical block where the extent is splitted.
+ * @split_flags: indicates if the extent could be zeroout if split fails, and
+ *              the states(init or uninit) of new extents.
+ * @flags: flags used to insert new extent to extent tree.
+ *
+ *
+ * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
+ * of which are deterimined by split_flag.
+ *
+ * There are two cases:
+ *  a> the extent are splitted into two extent.
+ *  b> split is not needed, and just mark the extent.
+ *
+ * return 0 on success.
+ */
+static int ext4_split_extent_at(handle_t *handle,
+                            struct inode *inode,
+                            struct ext4_ext_path *path,
+                            ext4_lblk_t split,
+                            int split_flag,
+                            int flags)
+{
+       ext4_fsblk_t newblock;
+       ext4_lblk_t ee_block;
+       struct ext4_extent *ex, newex, orig_ex;
+       struct ext4_extent *ex2 = NULL;
+       unsigned int ee_len, depth;
+       int err = 0;
+
+       ext_debug("ext4_split_extents_at: inode %lu, logical"
+               "block %llu\n", inode->i_ino, (unsigned long long)split);
+
+       ext4_ext_show_leaf(inode, path);
+
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
+       newblock = split - ee_block + ext4_ext_pblock(ex);
+
+       BUG_ON(split < ee_block || split >= (ee_block + ee_len));
+
+       err = ext4_ext_get_access(handle, inode, path + depth);
+       if (err)
+               goto out;
+
+       if (split == ee_block) {
+               /*
+                * case b: block @split is the block that the extent begins with
+                * then we just change the state of the extent, and splitting
+                * is not needed.
+                */
+               if (split_flag & EXT4_EXT_MARK_UNINIT2)
+                       ext4_ext_mark_uninitialized(ex);
+               else
+                       ext4_ext_mark_initialized(ex);
+
+               if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
+                       ext4_ext_try_to_merge(inode, path, ex);
+
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               goto out;
+       }
+
+       /* case a */
+       memcpy(&orig_ex, ex, sizeof(orig_ex));
+       ex->ee_len = cpu_to_le16(split - ee_block);
+       if (split_flag & EXT4_EXT_MARK_UNINIT1)
+               ext4_ext_mark_uninitialized(ex);
+
+       /*
+        * path may lead to new leaf, not to original leaf any more
+        * after ext4_ext_insert_extent() returns,
+        */
+       err = ext4_ext_dirty(handle, inode, path + depth);
+       if (err)
+               goto fix_extent_len;
+
+       ex2 = &newex;
+       ex2->ee_block = cpu_to_le32(split);
+       ex2->ee_len   = cpu_to_le16(ee_len - (split - ee_block));
+       ext4_ext_store_pblock(ex2, newblock);
+       if (split_flag & EXT4_EXT_MARK_UNINIT2)
+               ext4_ext_mark_uninitialized(ex2);
+
+       err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+       if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+               err = ext4_ext_zeroout(inode, &orig_ex);
+               if (err)
+                       goto fix_extent_len;
+               /* update the extent length and mark as initialized */
+               ex->ee_len = cpu_to_le32(ee_len);
+               ext4_ext_try_to_merge(inode, path, ex);
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               goto out;
+       } else if (err)
+               goto fix_extent_len;
+
+out:
+       ext4_ext_show_leaf(inode, path);
+       return err;
+
+fix_extent_len:
+       ex->ee_len = orig_ex.ee_len;
+       ext4_ext_dirty(handle, inode, path + depth);
+       return err;
+}
+
+/*
+ * ext4_split_extents() splits an extent and mark extent which is covered
+ * by @map as split_flags indicates
+ *
+ * It may result in splitting the extent into multiple extents (upto three)
+ * There are three possibilities:
+ *   a> There is no split required
+ *   b> Splits in two extents: Split is happening at either end of the extent
+ *   c> Splits in three extents: Somone is splitting in middle of the extent
+ *
+ */
+static int ext4_split_extent(handle_t *handle,
+                             struct inode *inode,
+                             struct ext4_ext_path *path,
+                             struct ext4_map_blocks *map,
+                             int split_flag,
+                             int flags)
+{
+       ext4_lblk_t ee_block;
+       struct ext4_extent *ex;
+       unsigned int ee_len, depth;
+       int err = 0;
+       int uninitialized;
+       int split_flag1, flags1;
+
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
+       uninitialized = ext4_ext_is_uninitialized(ex);
+
+       if (map->m_lblk + map->m_len < ee_block + ee_len) {
+               split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+                             EXT4_EXT_MAY_ZEROOUT : 0;
+               flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
+               if (uninitialized)
+                       split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
+                                      EXT4_EXT_MARK_UNINIT2;
+               err = ext4_split_extent_at(handle, inode, path,
+                               map->m_lblk + map->m_len, split_flag1, flags1);
+               if (err)
+                       goto out;
+       }
+
+       ext4_ext_drop_refs(path);
+       path = ext4_ext_find_extent(inode, map->m_lblk, path);
+       if (IS_ERR(path))
+               return PTR_ERR(path);
+
+       if (map->m_lblk >= ee_block) {
+               split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+                             EXT4_EXT_MAY_ZEROOUT : 0;
+               if (uninitialized)
+                       split_flag1 |= EXT4_EXT_MARK_UNINIT1;
+               if (split_flag & EXT4_EXT_MARK_UNINIT2)
+                       split_flag1 |= EXT4_EXT_MARK_UNINIT2;
+               err = ext4_split_extent_at(handle, inode, path,
+                               map->m_lblk, split_flag1, flags);
+               if (err)
+                       goto out;
+       }
+
+       ext4_ext_show_leaf(inode, path);
+out:
+       return err ? err : map->m_len;
+}
+
 #define EXT4_EXT_ZERO_LEN 7
 /*
  * This function is called by ext4_ext_map_blocks() if someone tries to write
  * to an uninitialized extent. It may result in splitting the uninitialized
- * extent into multiple extents (upto three - one initialized and two
+ * extent into multiple extents (up to three - one initialized and two
  * uninitialized).
  * There are three possibilities:
  *   a> There is no split required: Entire extent should be initialized
@@ -2545,17 +2936,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                           struct ext4_map_blocks *map,
                                           struct ext4_ext_path *path)
 {
-       struct ext4_extent *ex, newex, orig_ex;
-       struct ext4_extent *ex1 = NULL;
-       struct ext4_extent *ex2 = NULL;
-       struct ext4_extent *ex3 = NULL;
-       struct ext4_extent_header *eh;
+       struct ext4_map_blocks split_map;
+       struct ext4_extent zero_ex;
+       struct ext4_extent *ex;
        ext4_lblk_t ee_block, eof_block;
        unsigned int allocated, ee_len, depth;
-       ext4_fsblk_t newblock;
        int err = 0;
-       int ret = 0;
-       int may_zeroout;
+       int split_flag = 0;
 
        ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
                "block %llu, max_blocks %u\n", inode->i_ino,
@@ -2567,280 +2954,86 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                eof_block = map->m_lblk + map->m_len;
 
        depth = ext_depth(inode);
-       eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
        allocated = ee_len - (map->m_lblk - ee_block);
-       newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
-
-       ex2 = ex;
-       orig_ex.ee_block = ex->ee_block;
-       orig_ex.ee_len   = cpu_to_le16(ee_len);
-       ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
 
+       WARN_ON(map->m_lblk < ee_block);
        /*
         * It is safe to convert extent to initialized via explicit
         * zeroout only if extent is fully insde i_size or new_size.
         */
-       may_zeroout = ee_block + ee_len <= eof_block;
+       split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
 
-       err = ext4_ext_get_access(handle, inode, path + depth);
-       if (err)
-               goto out;
        /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-       if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
+       if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
+           (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+               err = ext4_ext_zeroout(inode, ex);
                if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zeroed the full extent */
-               return allocated;
-       }
-
-       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
-       if (map->m_lblk > ee_block) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * for sanity, update the length of the ex2 extent before
-        * we insert ex3, if ex1 is NULL. This is to avoid temporary
-        * overlap of blocks.
-        */
-       if (!ex1 && allocated > map->m_len)
-               ex2->ee_len = cpu_to_le16(map->m_len);
-       /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > map->m_len) {
-               unsigned int newdepth;
-               /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-               if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
-                       /*
-                        * map->m_lblk == ee_block is handled by the zerouout
-                        * at the beginning.
-                        * Mark first half uninitialized.
-                        * Mark second half initialized and zero out the
-                        * initialized extent
-                        */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = cpu_to_le16(ee_len - allocated);
-                       ext4_ext_mark_uninitialized(ex);
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-
-                       ex3 = &newex;
-                       ex3->ee_block = cpu_to_le32(map->m_lblk);
-                       ext4_ext_store_pblock(ex3, newblock);
-                       ex3->ee_len = cpu_to_le16(allocated);
-                       err = ext4_ext_insert_extent(handle, inode, path,
-                                                       ex3, 0);
-                       if (err == -ENOSPC) {
-                               err =  ext4_ext_zeroout(inode, &orig_ex);
-                               if (err)
-                                       goto fix_extent_len;
-                               ex->ee_block = orig_ex.ee_block;
-                               ex->ee_len   = orig_ex.ee_len;
-                               ext4_ext_store_pblock(ex,
-                                       ext4_ext_pblock(&orig_ex));
-                               ext4_ext_dirty(handle, inode, path + depth);
-                               /* blocks available from map->m_lblk */
-                               return allocated;
-
-                       } else if (err)
-                               goto fix_extent_len;
-
-                       /*
-                        * We need to zero out the second half because
-                        * an fallocate request can update file size and
-                        * converting the second half to initialized extent
-                        * implies that we can leak some junk data to user
-                        * space.
-                        */
-                       err =  ext4_ext_zeroout(inode, ex3);
-                       if (err) {
-                               /*
-                                * We should actually mark the
-                                * second half as uninit and return error
-                                * Insert would have changed the extent
-                                */
-                               depth = ext_depth(inode);
-                               ext4_ext_drop_refs(path);
-                               path = ext4_ext_find_extent(inode, map->m_lblk,
-                                                           path);
-                               if (IS_ERR(path)) {
-                                       err = PTR_ERR(path);
-                                       return err;
-                               }
-                               /* get the second half extent details */
-                               ex = path[depth].p_ext;
-                               err = ext4_ext_get_access(handle, inode,
-                                                               path + depth);
-                               if (err)
-                                       return err;
-                               ext4_ext_mark_uninitialized(ex);
-                               ext4_ext_dirty(handle, inode, path + depth);
-                               return err;
-                       }
-
-                       /* zeroed the second half */
-                       return allocated;
-               }
-               ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
-               ext4_ext_store_pblock(ex3, newblock + map->m_len);
-               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
-               ext4_ext_mark_uninitialized(ex3);
-               err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-               if (err == -ENOSPC && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zeroed the full extent */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-
-               } else if (err)
-                       goto fix_extent_len;
-               /*
-                * The depth, and hence eh & ex might change
-                * as part of the insert above.
-                */
-               newdepth = ext_depth(inode);
-               /*
-                * update the extent length after successful insert of the
-                * split extent
-                */
-               ee_len -= ext4_ext_get_actual_len(ex3);
-               orig_ex.ee_len = cpu_to_le16(ee_len);
-               may_zeroout = ee_block + ee_len <= eof_block;
-
-               depth = newdepth;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
                        goto out;
-               }
-               eh = path[depth].p_hdr;
-               ex = path[depth].p_ext;
-               if (ex2 != &newex)
-                       ex2 = ex;
 
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
                        goto out;
-
-               allocated = map->m_len;
-
-               /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
-                * to insert a extent in the middle zerout directly
-                * otherwise give the extent a chance to merge to left
-                */
-               if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-                       map->m_lblk != ee_block && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zero out the first half */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-               }
-       }
-       /*
-        * If there was a change of depth as part of the
-        * insertion of ex3 above, we need to update the length
-        * of the ex1 extent again here
-        */
-       if (ex1 && ex1 != ex) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
-       ex2->ee_block = cpu_to_le32(map->m_lblk);
-       ext4_ext_store_pblock(ex2, newblock);
-       ex2->ee_len = cpu_to_le16(allocated);
-       if (ex2 != ex)
-               goto insert;
-       /*
-        * New (initialized) extent starts from the first block
-        * in the current extent. i.e., ex2 == ex
-        * We have to see if it can be merged with the extent
-        * on the left.
-        */
-       if (ex2 > EXT_FIRST_EXTENT(eh)) {
-               /*
-                * To merge left, pass "ex2 - 1" to try_to_merge(),
-                * since it merges towards right _only_.
-                */
-               ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
-               if (ret) {
-                       err = ext4_ext_correct_indexes(handle, inode, path);
-                       if (err)
-                               goto out;
-                       depth = ext_depth(inode);
-                       ex2--;
-               }
+               ext4_ext_mark_initialized(ex);
+               ext4_ext_try_to_merge(inode, path, ex);
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               goto out;
        }
+
        /*
-        * Try to Merge towards right. This might be required
-        * only when the whole extent is being written to.
-        * i.e. ex2 == ex and ex3 == NULL.
+        * four cases:
+        * 1. split the extent into three extents.
+        * 2. split the extent into two extents, zeroout the first half.
+        * 3. split the extent into two extents, zeroout the second half.
+        * 4. split the extent into two extents with out zeroout.
         */
-       if (!ex3) {
-               ret = ext4_ext_try_to_merge(inode, path, ex2);
-               if (ret) {
-                       err = ext4_ext_correct_indexes(handle, inode, path);
+       split_map.m_lblk = map->m_lblk;
+       split_map.m_len = map->m_len;
+
+       if (allocated > map->m_len) {
+               if (allocated <= EXT4_EXT_ZERO_LEN &&
+                   (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+                       /* case 3 */
+                       zero_ex.ee_block =
+                                        cpu_to_le32(map->m_lblk);
+                       zero_ex.ee_len = cpu_to_le16(allocated);
+                       ext4_ext_store_pblock(&zero_ex,
+                               ext4_ext_pblock(ex) + map->m_lblk - ee_block);
+                       err = ext4_ext_zeroout(inode, &zero_ex);
                        if (err)
                                goto out;
+                       split_map.m_lblk = map->m_lblk;
+                       split_map.m_len = allocated;
+               } else if ((map->m_lblk - ee_block + map->m_len <
+                          EXT4_EXT_ZERO_LEN) &&
+                          (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+                       /* case 2 */
+                       if (map->m_lblk != ee_block) {
+                               zero_ex.ee_block = ex->ee_block;
+                               zero_ex.ee_len = cpu_to_le16(map->m_lblk -
+                                                       ee_block);
+                               ext4_ext_store_pblock(&zero_ex,
+                                                     ext4_ext_pblock(ex));
+                               err = ext4_ext_zeroout(inode, &zero_ex);
+                               if (err)
+                                       goto out;
+                       }
+
+                       split_map.m_lblk = ee_block;
+                       split_map.m_len = map->m_lblk - ee_block + map->m_len;
+                       allocated = map->m_len;
                }
        }
-       /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + depth);
-       goto out;
-insert:
-       err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-       if (err == -ENOSPC && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
-               if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zero out the first half */
-               return allocated;
-       } else if (err)
-               goto fix_extent_len;
+
+       allocated = ext4_split_extent(handle, inode, path,
+                                      &split_map, split_flag, 0);
+       if (allocated < 0)
+               err = allocated;
+
 out:
-       ext4_ext_show_leaf(inode, path);
        return err ? err : allocated;
-
-fix_extent_len:
-       ex->ee_block = orig_ex.ee_block;
-       ex->ee_len   = orig_ex.ee_len;
-       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-       ext4_ext_mark_uninitialized(ex);
-       ext4_ext_dirty(handle, inode, path + depth);
-       return err;
 }
 
 /*
@@ -2871,15 +3064,11 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                                        struct ext4_ext_path *path,
                                        int flags)
 {
-       struct ext4_extent *ex, newex, orig_ex;
-       struct ext4_extent *ex1 = NULL;
-       struct ext4_extent *ex2 = NULL;
-       struct ext4_extent *ex3 = NULL;
-       ext4_lblk_t ee_block, eof_block;
-       unsigned int allocated, ee_len, depth;
-       ext4_fsblk_t newblock;
-       int err = 0;
-       int may_zeroout;
+       ext4_lblk_t eof_block;
+       ext4_lblk_t ee_block;
+       struct ext4_extent *ex;
+       unsigned int ee_len;
+       int split_flag = 0, depth;
 
        ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
                "block %llu, max_blocks %u\n", inode->i_ino,
@@ -2889,156 +3078,22 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                inode->i_sb->s_blocksize_bits;
        if (eof_block < map->m_lblk + map->m_len)
                eof_block = map->m_lblk + map->m_len;
-
-       depth = ext_depth(inode);
-       ex = path[depth].p_ext;
-       ee_block = le32_to_cpu(ex->ee_block);
-       ee_len = ext4_ext_get_actual_len(ex);
-       allocated = ee_len - (map->m_lblk - ee_block);
-       newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
-
-       ex2 = ex;
-       orig_ex.ee_block = ex->ee_block;
-       orig_ex.ee_len   = cpu_to_le16(ee_len);
-       ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
-
        /*
         * It is safe to convert extent to initialized via explicit
         * zeroout only if extent is fully insde i_size or new_size.
         */
-       may_zeroout = ee_block + ee_len <= eof_block;
-
-       /*
-        * If the uninitialized extent begins at the same logical
-        * block where the write begins, and the write completely
-        * covers the extent, then we don't need to split it.
-        */
-       if ((map->m_lblk == ee_block) && (allocated <= map->m_len))
-               return allocated;
-
-       err = ext4_ext_get_access(handle, inode, path + depth);
-       if (err)
-               goto out;
-       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
-       if (map->m_lblk > ee_block) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * for sanity, update the length of the ex2 extent before
-        * we insert ex3, if ex1 is NULL. This is to avoid temporary
-        * overlap of blocks.
-        */
-       if (!ex1 && allocated > map->m_len)
-               ex2->ee_len = cpu_to_le16(map->m_len);
-       /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > map->m_len) {
-               unsigned int newdepth;
-               ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
-               ext4_ext_store_pblock(ex3, newblock + map->m_len);
-               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
-               ext4_ext_mark_uninitialized(ex3);
-               err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-               if (err == -ENOSPC && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zeroed the full extent */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-
-               } else if (err)
-                       goto fix_extent_len;
-               /*
-                * The depth, and hence eh & ex might change
-                * as part of the insert above.
-                */
-               newdepth = ext_depth(inode);
-               /*
-                * update the extent length after successful insert of the
-                * split extent
-                */
-               ee_len -= ext4_ext_get_actual_len(ex3);
-               orig_ex.ee_len = cpu_to_le16(ee_len);
-               may_zeroout = ee_block + ee_len <= eof_block;
-
-               depth = newdepth;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
-                       goto out;
-               }
-               ex = path[depth].p_ext;
-               if (ex2 != &newex)
-                       ex2 = ex;
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
 
-               err = ext4_ext_get_access(handle, inode, path + depth);
-               if (err)
-                       goto out;
+       split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
+       split_flag |= EXT4_EXT_MARK_UNINIT2;
 
-               allocated = map->m_len;
-       }
-       /*
-        * If there was a change of depth as part of the
-        * insertion of ex3 above, we need to update the length
-        * of the ex1 extent again here
-        */
-       if (ex1 && ex1 != ex) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written
-        * using direct I/O, uninitialised still.
-        */
-       ex2->ee_block = cpu_to_le32(map->m_lblk);
-       ext4_ext_store_pblock(ex2, newblock);
-       ex2->ee_len = cpu_to_le16(allocated);
-       ext4_ext_mark_uninitialized(ex2);
-       if (ex2 != ex)
-               goto insert;
-       /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + depth);
-       ext_debug("out here\n");
-       goto out;
-insert:
-       err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-       if (err == -ENOSPC && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
-               if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zero out the first half */
-               return allocated;
-       } else if (err)
-               goto fix_extent_len;
-out:
-       ext4_ext_show_leaf(inode, path);
-       return err ? err : allocated;
-
-fix_extent_len:
-       ex->ee_block = orig_ex.ee_block;
-       ex->ee_len   = orig_ex.ee_len;
-       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-       ext4_ext_mark_uninitialized(ex);
-       ext4_ext_dirty(handle, inode, path + depth);
-       return err;
+       flags |= EXT4_GET_BLOCKS_PRE_IO;
+       return ext4_split_extent(handle, inode, path, map, split_flag, flags);
 }
+
 static int ext4_convert_unwritten_extents_endio(handle_t *handle,
                                              struct inode *inode,
                                              struct ext4_ext_path *path)
@@ -3047,46 +3102,27 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
        struct ext4_extent_header *eh;
        int depth;
        int err = 0;
-       int ret = 0;
 
        depth = ext_depth(inode);
        eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
 
+       ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
+               "block %llu, max_blocks %u\n", inode->i_ino,
+               (unsigned long long)le32_to_cpu(ex->ee_block),
+               ext4_ext_get_actual_len(ex));
+
        err = ext4_ext_get_access(handle, inode, path + depth);
        if (err)
                goto out;
        /* first mark the extent as initialized */
        ext4_ext_mark_initialized(ex);
 
-       /*
-        * We have to see if it can be merged with the extent
-        * on the left.
+       /* note: ext4_ext_correct_indexes() isn't needed here because
+        * borders are not changed
         */
-       if (ex > EXT_FIRST_EXTENT(eh)) {
-               /*
-                * To merge left, pass "ex - 1" to try_to_merge(),
-                * since it merges towards right _only_.
-                */
-               ret = ext4_ext_try_to_merge(inode, path, ex - 1);
-               if (ret) {
-                       err = ext4_ext_correct_indexes(handle, inode, path);
-                       if (err)
-                               goto out;
-                       depth = ext_depth(inode);
-                       ex--;
-               }
-       }
-       /*
-        * Try to Merge towards right.
-        */
-       ret = ext4_ext_try_to_merge(inode, path, ex);
-       if (ret) {
-               err = ext4_ext_correct_indexes(handle, inode, path);
-               if (err)
-                       goto out;
-               depth = ext_depth(inode);
-       }
+       ext4_ext_try_to_merge(inode, path, ex);
+
        /* Mark modified extent as dirty */
        err = ext4_ext_dirty(handle, inode, path + depth);
 out:
@@ -3174,7 +3210,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
                                                   path, flags);
                /*
                 * Flag the inode(non aio case) or end_io struct (aio case)
-                * that this IO needs to convertion to written when IO is
+                * that this IO needs to conversion to written when IO is
                 * completed
                 */
                if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
@@ -3302,15 +3338,19 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
        ext4_fsblk_t newblock = 0;
        int err = 0, depth, ret;
        unsigned int allocated = 0;
+       unsigned int punched_out = 0;
+       unsigned int result = 0;
        struct ext4_allocation_request ar;
        ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
+       struct ext4_map_blocks punch_map;
 
        ext_debug("blocks %u/%u requested for inode %lu\n",
                  map->m_lblk, map->m_len, inode->i_ino);
        trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
 
        /* check in cache */
-       if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
+       if (ext4_ext_in_cache(inode, map->m_lblk, &newex) &&
+               ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0)) {
                if (!newex.ee_start_lo && !newex.ee_start_hi) {
                        if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
                                /*
@@ -3375,16 +3415,84 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                        ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk,
                                  ee_block, ee_len, newblock);
 
-                       /* Do not put uninitialized extent in the cache */
-                       if (!ext4_ext_is_uninitialized(ex)) {
-                               ext4_ext_put_in_cache(inode, ee_block,
-                                                       ee_len, ee_start);
-                               goto out;
+                       if ((flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) == 0) {
+                               /*
+                                * Do not put uninitialized extent
+                                * in the cache
+                                */
+                               if (!ext4_ext_is_uninitialized(ex)) {
+                                       ext4_ext_put_in_cache(inode, ee_block,
+                                               ee_len, ee_start);
+                                       goto out;
+                               }
+                               ret = ext4_ext_handle_uninitialized_extents(
+                                       handle, inode, map, path, flags,
+                                       allocated, newblock);
+                               return ret;
                        }
-                       ret = ext4_ext_handle_uninitialized_extents(handle,
-                                       inode, map, path, flags, allocated,
-                                       newblock);
-                       return ret;
+
+                       /*
+                        * Punch out the map length, but only to the
+                        * end of the extent
+                        */
+                       punched_out = allocated < map->m_len ?
+                               allocated : map->m_len;
+
+                       /*
+                        * Sense extents need to be converted to
+                        * uninitialized, they must fit in an
+                        * uninitialized extent
+                        */
+                       if (punched_out > EXT_UNINIT_MAX_LEN)
+                               punched_out = EXT_UNINIT_MAX_LEN;
+
+                       punch_map.m_lblk = map->m_lblk;
+                       punch_map.m_pblk = newblock;
+                       punch_map.m_len = punched_out;
+                       punch_map.m_flags = 0;
+
+                       /* Check to see if the extent needs to be split */
+                       if (punch_map.m_len != ee_len ||
+                               punch_map.m_lblk != ee_block) {
+
+                               ret = ext4_split_extent(handle, inode,
+                               path, &punch_map, 0,
+                               EXT4_GET_BLOCKS_PUNCH_OUT_EXT |
+                               EXT4_GET_BLOCKS_PRE_IO);
+
+                               if (ret < 0) {
+                                       err = ret;
+                                       goto out2;
+                               }
+                               /*
+                                * find extent for the block at
+                                * the start of the hole
+                                */
+                               ext4_ext_drop_refs(path);
+                               kfree(path);
+
+                               path = ext4_ext_find_extent(inode,
+                               map->m_lblk, NULL);
+                               if (IS_ERR(path)) {
+                                       err = PTR_ERR(path);
+                                       path = NULL;
+                                       goto out2;
+                               }
+
+                               depth = ext_depth(inode);
+                               ex = path[depth].p_ext;
+                               ee_len = ext4_ext_get_actual_len(ex);
+                               ee_block = le32_to_cpu(ex->ee_block);
+                               ee_start = ext4_ext_pblock(ex);
+
+                       }
+
+                       ext4_ext_mark_uninitialized(ex);
+
+                       err = ext4_ext_remove_space(inode, map->m_lblk,
+                               map->m_lblk + punched_out);
+
+                       goto out2;
                }
        }
 
@@ -3460,10 +3568,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                ext4_ext_mark_uninitialized(&newex);
                /*
                 * io_end structure was created for every IO write to an
-                * uninitialized extent. To avoid unecessary conversion,
+                * uninitialized extent. To avoid unnecessary conversion,
                 * here we flag the IO that really needs the conversion.
                 * For non asycn direct IO case, flag the inode state
-                * that we need to perform convertion when IO is done.
+                * that we need to perform conversion when IO is done.
                 */
                if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
                        if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
@@ -3529,7 +3637,11 @@ out2:
        }
        trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
                newblock, map->m_len, err ? err : allocated);
-       return err ? err : allocated;
+
+       result = (flags & EXT4_GET_BLOCKS_PUNCH_OUT_EXT) ?
+                       punched_out : allocated;
+
+       return err ? err : result;
 }
 
 void ext4_ext_truncate(struct inode *inode)
@@ -3577,7 +3689,7 @@ void ext4_ext_truncate(struct inode *inode)
 
        last_block = (inode->i_size + sb->s_blocksize - 1)
                        >> EXT4_BLOCK_SIZE_BITS(sb);
-       err = ext4_ext_remove_space(inode, last_block);
+       err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK);
 
        /* In a multi-transaction truncate, we only make the final
         * transaction synchronous.
@@ -3585,8 +3697,9 @@ void ext4_ext_truncate(struct inode *inode)
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 
-out_stop:
        up_write(&EXT4_I(inode)->i_data_sem);
+
+out_stop:
        /*
         * If this was a simple ftruncate() and the file will remain alive,
         * then we need to clear up the orphan record which we created above.
@@ -3651,10 +3764,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        struct ext4_map_blocks map;
        unsigned int credits, blkbits = inode->i_blkbits;
 
-       /* We only support the FALLOC_FL_KEEP_SIZE mode */
-       if (mode & ~FALLOC_FL_KEEP_SIZE)
-               return -EOPNOTSUPP;
-
        /*
         * currently supporting (pre)allocate mode for extent-based
         * files _only_
@@ -3662,6 +3771,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
 
+       /* Return error if mode is not supported */
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+               return -EOPNOTSUPP;
+
+       if (mode & FALLOC_FL_PUNCH_HOLE)
+               return ext4_punch_hole(file, offset, len);
+
        trace_ext4_fallocate_enter(inode, offset, len, mode);
        map.m_lblk = offset >> blkbits;
        /*
@@ -3822,6 +3938,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
                pgoff_t         last_offset;
                pgoff_t         offset;
                pgoff_t         index;
+               pgoff_t         start_index = 0;
                struct page     **pages = NULL;
                struct buffer_head *bh = NULL;
                struct buffer_head *head = NULL;
@@ -3848,39 +3965,57 @@ out:
                                kfree(pages);
                                return EXT_CONTINUE;
                        }
+                       index = 0;
 
+next_page:
                        /* Try to find the 1st mapped buffer. */
-                       end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
+                       end = ((__u64)pages[index]->index << PAGE_SHIFT) >>
                                  blksize_bits;
-                       if (!page_has_buffers(pages[0]))
+                       if (!page_has_buffers(pages[index]))
                                goto out;
-                       head = page_buffers(pages[0]);
+                       head = page_buffers(pages[index]);
                        if (!head)
                                goto out;
 
+                       index++;
                        bh = head;
                        do {
-                               if (buffer_mapped(bh)) {
+                               if (end >= newex->ec_block +
+                                       newex->ec_len)
+                                       /* The buffer is out of
+                                        * the request range.
+                                        */
+                                       goto out;
+
+                               if (buffer_mapped(bh) &&
+                                   end >= newex->ec_block) {
+                                       start_index = index - 1;
                                        /* get the 1st mapped buffer. */
-                                       if (end > newex->ec_block +
-                                               newex->ec_len)
-                                               /* The buffer is out of
-                                                * the request range.
-                                                */
-                                               goto out;
                                        goto found_mapped_buffer;
                                }
+
                                bh = bh->b_this_page;
                                end++;
                        } while (bh != head);
 
-                       /* No mapped buffer found. */
-                       goto out;
+                       /* No mapped buffer in the range found in this page,
+                        * We need to look up next page.
+                        */
+                       if (index >= ret) {
+                               /* There is no page left, but we need to limit
+                                * newex->ec_len.
+                                */
+                               newex->ec_len = end - newex->ec_block;
+                               goto out;
+                       }
+                       goto next_page;
                } else {
                        /*Find contiguous delayed buffers. */
                        if (ret > 0 && pages[0]->index == last_offset)
                                head = page_buffers(pages[0]);
                        bh = head;
+                       index = 1;
+                       start_index = 0;
                }
 
 found_mapped_buffer:
@@ -3903,7 +4038,7 @@ found_mapped_buffer:
                                end++;
                        } while (bh != head);
 
-                       for (index = 1; index < ret; index++) {
+                       for (; index < ret; index++) {
                                if (!page_has_buffers(pages[index])) {
                                        bh = NULL;
                                        break;
@@ -3913,8 +4048,10 @@ found_mapped_buffer:
                                        bh = NULL;
                                        break;
                                }
+
                                if (pages[index]->index !=
-                                       pages[0]->index + index) {
+                                   pages[start_index]->index + index
+                                   - start_index) {
                                        /* Blocks are not contiguous. */
                                        bh = NULL;
                                        break;
@@ -4006,6 +4143,177 @@ static int ext4_xattr_fiemap(struct inode *inode,
        return (error < 0 ? error : 0);
 }
 
+/*
+ * ext4_ext_punch_hole
+ *
+ * Punches a hole of "length" bytes in a file starting
+ * at byte "offset"
+ *
+ * @inode:  The inode of the file to punch a hole in
+ * @offset: The starting byte offset of the hole
+ * @length: The length of the hole
+ *
+ * Returns the number of blocks removed or negative on err
+ */
+int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct super_block *sb = inode->i_sb;
+       struct ext4_ext_cache cache_ex;
+       ext4_lblk_t first_block, last_block, num_blocks, iblock, max_blocks;
+       struct address_space *mapping = inode->i_mapping;
+       struct ext4_map_blocks map;
+       handle_t *handle;
+       loff_t first_block_offset, last_block_offset, block_len;
+       loff_t first_page, last_page, first_page_offset, last_page_offset;
+       int ret, credits, blocks_released, err = 0;
+
+       first_block = (offset + sb->s_blocksize - 1) >>
+               EXT4_BLOCK_SIZE_BITS(sb);
+       last_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
+
+       first_block_offset = first_block << EXT4_BLOCK_SIZE_BITS(sb);
+       last_block_offset = last_block << EXT4_BLOCK_SIZE_BITS(sb);
+
+       first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+       last_page = (offset + length) >> PAGE_CACHE_SHIFT;
+
+       first_page_offset = first_page << PAGE_CACHE_SHIFT;
+       last_page_offset = last_page << PAGE_CACHE_SHIFT;
+
+       /*
+        * Write out all dirty pages to avoid race conditions
+        * Then release them.
+        */
+       if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+               err = filemap_write_and_wait_range(mapping,
+                       first_page_offset == 0 ? 0 : first_page_offset-1,
+                       last_page_offset);
+
+                       if (err)
+                               return err;
+       }
+
+       /* Now release the pages */
+       if (last_page_offset > first_page_offset) {
+               truncate_inode_pages_range(mapping, first_page_offset,
+                                          last_page_offset-1);
+       }
+
+       /* finish any pending end_io work */
+       ext4_flush_completed_IO(inode);
+
+       credits = ext4_writepage_trans_blocks(inode);
+       handle = ext4_journal_start(inode, credits);
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
+
+       err = ext4_orphan_add(handle, inode);
+       if (err)
+               goto out;
+
+       /*
+        * Now we need to zero out the un block aligned data.
+        * If the file is smaller than a block, just
+        * zero out the middle
+        */
+       if (first_block > last_block)
+               ext4_block_zero_page_range(handle, mapping, offset, length);
+       else {
+               /* zero out the head of the hole before the first block */
+               block_len  = first_block_offset - offset;
+               if (block_len > 0)
+                       ext4_block_zero_page_range(handle, mapping,
+                                                  offset, block_len);
+
+               /* zero out the tail of the hole after the last block */
+               block_len = offset + length - last_block_offset;
+               if (block_len > 0) {
+                       ext4_block_zero_page_range(handle, mapping,
+                                       last_block_offset, block_len);
+               }
+       }
+
+       /* If there are no blocks to remove, return now */
+       if (first_block >= last_block)
+               goto out;
+
+       down_write(&EXT4_I(inode)->i_data_sem);
+       ext4_ext_invalidate_cache(inode);
+       ext4_discard_preallocations(inode);
+
+       /*
+        * Loop over all the blocks and identify blocks
+        * that need to be punched out
+        */
+       iblock = first_block;
+       blocks_released = 0;
+       while (iblock < last_block) {
+               max_blocks = last_block - iblock;
+               num_blocks = 1;
+               memset(&map, 0, sizeof(map));
+               map.m_lblk = iblock;
+               map.m_len = max_blocks;
+               ret = ext4_ext_map_blocks(handle, inode, &map,
+                       EXT4_GET_BLOCKS_PUNCH_OUT_EXT);
+
+               if (ret > 0) {
+                       blocks_released += ret;
+                       num_blocks = ret;
+               } else if (ret == 0) {
+                       /*
+                        * If map blocks could not find the block,
+                        * then it is in a hole.  If the hole was
+                        * not already cached, then map blocks should
+                        * put it in the cache.  So we can get the hole
+                        * out of the cache
+                        */
+                       memset(&cache_ex, 0, sizeof(cache_ex));
+                       if ((ext4_ext_check_cache(inode, iblock, &cache_ex)) &&
+                               !cache_ex.ec_start) {
+
+                               /* The hole is cached */
+                               num_blocks = cache_ex.ec_block +
+                               cache_ex.ec_len - iblock;
+
+                       } else {
+                               /* The block could not be identified */
+                               err = -EIO;
+                               break;
+                       }
+               } else {
+                       /* Map blocks error */
+                       err = ret;
+                       break;
+               }
+
+               if (num_blocks == 0) {
+                       /* This condition should never happen */
+                       ext_debug("Block lookup failed");
+                       err = -EIO;
+                       break;
+               }
+
+               iblock += num_blocks;
+       }
+
+       if (blocks_released > 0) {
+               ext4_ext_invalidate_cache(inode);
+               ext4_discard_preallocations(inode);
+       }
+
+       if (IS_SYNC(inode))
+               ext4_handle_sync(handle);
+
+       up_write(&EXT4_I(inode)->i_data_sem);
+
+out:
+       ext4_orphan_del(handle, inode);
+       inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+       ext4_mark_inode_dirty(handle, inode);
+       ext4_journal_stop(handle);
+       return err;
+}
 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len)
 {
@@ -4042,4 +4350,3 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
        return error;
 }
-