ext4: reimplement convert and split_unwritten
authorYongqiang Yang <xiaoqiangnk@gmail.com>
Tue, 3 May 2011 16:25:07 +0000 (12:25 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 3 May 2011 16:25:07 +0000 (12:25 -0400)
Reimplement ext4_ext_convert_to_initialized() and
ext4_split_unwritten_extents() using ext4_split_extent()

Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Tested-by: Allison Henderson <achender@linux.vnet.ibm.com>
fs/ext4/extents.c

index 10317eb..e363f21 100644 (file)
@@ -2757,17 +2757,13 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                                           struct ext4_map_blocks *map,
                                           struct ext4_ext_path *path)
 {
-       struct ext4_extent *ex, newex, orig_ex;
-       struct ext4_extent *ex1 = NULL;
-       struct ext4_extent *ex2 = NULL;
-       struct ext4_extent *ex3 = NULL;
-       struct ext4_extent_header *eh;
+       struct ext4_map_blocks split_map;
+       struct ext4_extent zero_ex;
+       struct ext4_extent *ex;
        ext4_lblk_t ee_block, eof_block;
        unsigned int allocated, ee_len, depth;
-       ext4_fsblk_t newblock;
        int err = 0;
-       int ret = 0;
-       int may_zeroout;
+       int split_flag = 0;
 
        ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
                "block %llu, max_blocks %u\n", inode->i_ino,
@@ -2779,280 +2775,87 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
                eof_block = map->m_lblk + map->m_len;
 
        depth = ext_depth(inode);
-       eh = path[depth].p_hdr;
        ex = path[depth].p_ext;
        ee_block = le32_to_cpu(ex->ee_block);
        ee_len = ext4_ext_get_actual_len(ex);
        allocated = ee_len - (map->m_lblk - ee_block);
-       newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
-
-       ex2 = ex;
-       orig_ex.ee_block = ex->ee_block;
-       orig_ex.ee_len   = cpu_to_le16(ee_len);
-       ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
 
+       WARN_ON(map->m_lblk < ee_block);
        /*
         * It is safe to convert extent to initialized via explicit
         * zeroout only if extent is fully insde i_size or new_size.
         */
-       may_zeroout = ee_block + ee_len <= eof_block;
+       split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
 
-       err = ext4_ext_get_access(handle, inode, path + depth);
-       if (err)
-               goto out;
        /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
-       if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
+       if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
+           (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+               err = ext4_ext_zeroout(inode, ex);
                if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zeroed the full extent */
-               return allocated;
-       }
-
-       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
-       if (map->m_lblk > ee_block) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * for sanity, update the length of the ex2 extent before
-        * we insert ex3, if ex1 is NULL. This is to avoid temporary
-        * overlap of blocks.
-        */
-       if (!ex1 && allocated > map->m_len)
-               ex2->ee_len = cpu_to_le16(map->m_len);
-       /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > map->m_len) {
-               unsigned int newdepth;
-               /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
-               if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
-                       /*
-                        * map->m_lblk == ee_block is handled by the zerouout
-                        * at the beginning.
-                        * Mark first half uninitialized.
-                        * Mark second half initialized and zero out the
-                        * initialized extent
-                        */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = cpu_to_le16(ee_len - allocated);
-                       ext4_ext_mark_uninitialized(ex);
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-
-                       ex3 = &newex;
-                       ex3->ee_block = cpu_to_le32(map->m_lblk);
-                       ext4_ext_store_pblock(ex3, newblock);
-                       ex3->ee_len = cpu_to_le16(allocated);
-                       err = ext4_ext_insert_extent(handle, inode, path,
-                                                       ex3, 0);
-                       if (err == -ENOSPC) {
-                               err =  ext4_ext_zeroout(inode, &orig_ex);
-                               if (err)
-                                       goto fix_extent_len;
-                               ex->ee_block = orig_ex.ee_block;
-                               ex->ee_len   = orig_ex.ee_len;
-                               ext4_ext_store_pblock(ex,
-                                       ext4_ext_pblock(&orig_ex));
-                               ext4_ext_dirty(handle, inode, path + depth);
-                               /* blocks available from map->m_lblk */
-                               return allocated;
-
-                       } else if (err)
-                               goto fix_extent_len;
-
-                       /*
-                        * We need to zero out the second half because
-                        * an fallocate request can update file size and
-                        * converting the second half to initialized extent
-                        * implies that we can leak some junk data to user
-                        * space.
-                        */
-                       err =  ext4_ext_zeroout(inode, ex3);
-                       if (err) {
-                               /*
-                                * We should actually mark the
-                                * second half as uninit and return error
-                                * Insert would have changed the extent
-                                */
-                               depth = ext_depth(inode);
-                               ext4_ext_drop_refs(path);
-                               path = ext4_ext_find_extent(inode, map->m_lblk,
-                                                           path);
-                               if (IS_ERR(path)) {
-                                       err = PTR_ERR(path);
-                                       return err;
-                               }
-                               /* get the second half extent details */
-                               ex = path[depth].p_ext;
-                               err = ext4_ext_get_access(handle, inode,
-                                                               path + depth);
-                               if (err)
-                                       return err;
-                               ext4_ext_mark_uninitialized(ex);
-                               ext4_ext_dirty(handle, inode, path + depth);
-                               return err;
-                       }
-
-                       /* zeroed the second half */
-                       return allocated;
-               }
-               ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
-               ext4_ext_store_pblock(ex3, newblock + map->m_len);
-               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
-               ext4_ext_mark_uninitialized(ex3);
-               err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
-               if (err == -ENOSPC && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zeroed the full extent */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-
-               } else if (err)
-                       goto fix_extent_len;
-               /*
-                * The depth, and hence eh & ex might change
-                * as part of the insert above.
-                */
-               newdepth = ext_depth(inode);
-               /*
-                * update the extent length after successful insert of the
-                * split extent
-                */
-               ee_len -= ext4_ext_get_actual_len(ex3);
-               orig_ex.ee_len = cpu_to_le16(ee_len);
-               may_zeroout = ee_block + ee_len <= eof_block;
-
-               depth = newdepth;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
                        goto out;
-               }
-               eh = path[depth].p_hdr;
-               ex = path[depth].p_ext;
-               if (ex2 != &newex)
-                       ex2 = ex;
 
                err = ext4_ext_get_access(handle, inode, path + depth);
                if (err)
                        goto out;
-
-               allocated = map->m_len;
-
-               /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
-                * to insert a extent in the middle zerout directly
-                * otherwise give the extent a chance to merge to left
-                */
-               if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
-                       map->m_lblk != ee_block && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zero out the first half */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-               }
-       }
-       /*
-        * If there was a change of depth as part of the
-        * insertion of ex3 above, we need to update the length
-        * of the ex1 extent again here
-        */
-       if (ex1 && ex1 != ex) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
-       ex2->ee_block = cpu_to_le32(map->m_lblk);
-       ext4_ext_store_pblock(ex2, newblock);
-       ex2->ee_len = cpu_to_le16(allocated);
-       if (ex2 != ex)
-               goto insert;
-       /*
-        * New (initialized) extent starts from the first block
-        * in the current extent. i.e., ex2 == ex
-        * We have to see if it can be merged with the extent
-        * on the left.
-        */
-       if (ex2 > EXT_FIRST_EXTENT(eh)) {
-               /*
-                * To merge left, pass "ex2 - 1" to try_to_merge(),
-                * since it merges towards right _only_.
-                */
-               ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
-               if (ret) {
-                       err = ext4_ext_correct_indexes(handle, inode, path);
-                       if (err)
-                               goto out;
-                       depth = ext_depth(inode);
-                       ex2--;
-               }
+               ext4_ext_mark_initialized(ex);
+               ext4_ext_try_to_merge(inode, path, ex);
+               err = ext4_ext_dirty(handle, inode, path + depth);
+               goto out;
        }
+
        /*
-        * Try to Merge towards right. This might be required
-        * only when the whole extent is being written to.
-        * i.e. ex2 == ex and ex3 == NULL.
+        * four cases:
+        * 1. split the extent into three extents.
+        * 2. split the extent into two extents, zeroout the first half.
+        * 3. split the extent into two extents, zeroout the second half.
+        * 4. split the extent into two extents with out zeroout.
         */
-       if (!ex3) {
-               ret = ext4_ext_try_to_merge(inode, path, ex2);
-               if (ret) {
-                       err = ext4_ext_correct_indexes(handle, inode, path);
+       split_map.m_lblk = map->m_lblk;
+       split_map.m_len = map->m_len;
+
+       if (allocated > map->m_len) {
+               if (allocated <= EXT4_EXT_ZERO_LEN &&
+                   (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+                       /* case 3 */
+                       zero_ex.ee_block =
+                                        cpu_to_le32(map->m_lblk + map->m_len);
+                       zero_ex.ee_len = cpu_to_le16(allocated - map->m_len);
+                       ext4_ext_store_pblock(&zero_ex,
+                               ext4_ext_pblock(ex) + map->m_lblk - ee_block);
+                       err = ext4_ext_zeroout(inode, &zero_ex);
                        if (err)
                                goto out;
+                       split_map.m_lblk = map->m_lblk;
+                       split_map.m_len = allocated;
+               } else if ((map->m_lblk - ee_block + map->m_len <
+                          EXT4_EXT_ZERO_LEN) &&
+                          (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+                       /* case 2 */
+                       if (map->m_lblk != ee_block) {
+                               zero_ex.ee_block = ex->ee_block;
+                               zero_ex.ee_len = cpu_to_le16(map->m_lblk -
+                                                       ee_block);
+                               ext4_ext_store_pblock(&zero_ex,
+                                                     ext4_ext_pblock(ex));
+                               err = ext4_ext_zeroout(inode, &zero_ex);
+                               if (err)
+                                       goto out;
+                       }
+
+                       allocated = map->m_lblk - ee_block + map->m_len;
+
+                       split_map.m_lblk = ee_block;
+                       split_map.m_len = allocated;
                }
        }
-       /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + depth);
-       goto out;
-insert:
-       err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
-       if (err == -ENOSPC && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
-               if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zero out the first half */
-               return allocated;
-       } else if (err)
-               goto fix_extent_len;
+
+       allocated = ext4_split_extent(handle, inode, path,
+                                      &split_map, split_flag, 0);
+       if (allocated < 0)
+               err = allocated;
+
 out:
-       ext4_ext_show_leaf(inode, path);
        return err ? err : allocated;
-
-fix_extent_len:
-       ex->ee_block = orig_ex.ee_block;
-       ex->ee_len   = orig_ex.ee_len;
-       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-       ext4_ext_mark_uninitialized(ex);
-       ext4_ext_dirty(handle, inode, path + depth);
-       return err;
 }
 
 /*
@@ -3083,15 +2886,11 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                                        struct ext4_ext_path *path,
                                        int flags)
 {
-       struct ext4_extent *ex, newex, orig_ex;
-       struct ext4_extent *ex1 = NULL;
-       struct ext4_extent *ex2 = NULL;
-       struct ext4_extent *ex3 = NULL;
-       ext4_lblk_t ee_block, eof_block;
-       unsigned int allocated, ee_len, depth;
-       ext4_fsblk_t newblock;
-       int err = 0;
-       int may_zeroout;
+       ext4_lblk_t eof_block;
+       ext4_lblk_t ee_block;
+       struct ext4_extent *ex;
+       unsigned int ee_len;
+       int split_flag = 0, depth;
 
        ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
                "block %llu, max_blocks %u\n", inode->i_ino,
@@ -3101,155 +2900,20 @@ static int ext4_split_unwritten_extents(handle_t *handle,
                inode->i_sb->s_blocksize_bits;
        if (eof_block < map->m_lblk + map->m_len)
                eof_block = map->m_lblk + map->m_len;
-
-       depth = ext_depth(inode);
-       ex = path[depth].p_ext;
-       ee_block = le32_to_cpu(ex->ee_block);
-       ee_len = ext4_ext_get_actual_len(ex);
-       allocated = ee_len - (map->m_lblk - ee_block);
-       newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
-
-       ex2 = ex;
-       orig_ex.ee_block = ex->ee_block;
-       orig_ex.ee_len   = cpu_to_le16(ee_len);
-       ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
-
        /*
         * It is safe to convert extent to initialized via explicit
         * zeroout only if extent is fully insde i_size or new_size.
         */
-       may_zeroout = ee_block + ee_len <= eof_block;
-
-       /*
-        * If the uninitialized extent begins at the same logical
-        * block where the write begins, and the write completely
-        * covers the extent, then we don't need to split it.
-        */
-       if ((map->m_lblk == ee_block) && (allocated <= map->m_len))
-               return allocated;
-
-       err = ext4_ext_get_access(handle, inode, path + depth);
-       if (err)
-               goto out;
-       /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
-       if (map->m_lblk > ee_block) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * for sanity, update the length of the ex2 extent before
-        * we insert ex3, if ex1 is NULL. This is to avoid temporary
-        * overlap of blocks.
-        */
-       if (!ex1 && allocated > map->m_len)
-               ex2->ee_len = cpu_to_le16(map->m_len);
-       /* ex3: to ee_block + ee_len : uninitialised */
-       if (allocated > map->m_len) {
-               unsigned int newdepth;
-               ex3 = &newex;
-               ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
-               ext4_ext_store_pblock(ex3, newblock + map->m_len);
-               ex3->ee_len = cpu_to_le16(allocated - map->m_len);
-               ext4_ext_mark_uninitialized(ex3);
-               err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
-               if (err == -ENOSPC && may_zeroout) {
-                       err =  ext4_ext_zeroout(inode, &orig_ex);
-                       if (err)
-                               goto fix_extent_len;
-                       /* update the extent length and mark as initialized */
-                       ex->ee_block = orig_ex.ee_block;
-                       ex->ee_len   = orig_ex.ee_len;
-                       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-                       ext4_ext_dirty(handle, inode, path + depth);
-                       /* zeroed the full extent */
-                       /* blocks available from map->m_lblk */
-                       return allocated;
-
-               } else if (err)
-                       goto fix_extent_len;
-               /*
-                * The depth, and hence eh & ex might change
-                * as part of the insert above.
-                */
-               newdepth = ext_depth(inode);
-               /*
-                * update the extent length after successful insert of the
-                * split extent
-                */
-               ee_len -= ext4_ext_get_actual_len(ex3);
-               orig_ex.ee_len = cpu_to_le16(ee_len);
-               may_zeroout = ee_block + ee_len <= eof_block;
-
-               depth = newdepth;
-               ext4_ext_drop_refs(path);
-               path = ext4_ext_find_extent(inode, map->m_lblk, path);
-               if (IS_ERR(path)) {
-                       err = PTR_ERR(path);
-                       goto out;
-               }
-               ex = path[depth].p_ext;
-               if (ex2 != &newex)
-                       ex2 = ex;
+       depth = ext_depth(inode);
+       ex = path[depth].p_ext;
+       ee_block = le32_to_cpu(ex->ee_block);
+       ee_len = ext4_ext_get_actual_len(ex);
 
-               err = ext4_ext_get_access(handle, inode, path + depth);
-               if (err)
-                       goto out;
+       split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
+       split_flag |= EXT4_EXT_MARK_UNINIT2;
 
-               allocated = map->m_len;
-       }
-       /*
-        * If there was a change of depth as part of the
-        * insertion of ex3 above, we need to update the length
-        * of the ex1 extent again here
-        */
-       if (ex1 && ex1 != ex) {
-               ex1 = ex;
-               ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
-               ext4_ext_mark_uninitialized(ex1);
-               ex2 = &newex;
-       }
-       /*
-        * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written
-        * using direct I/O, uninitialised still.
-        */
-       ex2->ee_block = cpu_to_le32(map->m_lblk);
-       ext4_ext_store_pblock(ex2, newblock);
-       ex2->ee_len = cpu_to_le16(allocated);
-       ext4_ext_mark_uninitialized(ex2);
-       if (ex2 != ex)
-               goto insert;
-       /* Mark modified extent as dirty */
-       err = ext4_ext_dirty(handle, inode, path + depth);
-       ext_debug("out here\n");
-       goto out;
-insert:
-       err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
-       if (err == -ENOSPC && may_zeroout) {
-               err =  ext4_ext_zeroout(inode, &orig_ex);
-               if (err)
-                       goto fix_extent_len;
-               /* update the extent length and mark as initialized */
-               ex->ee_block = orig_ex.ee_block;
-               ex->ee_len   = orig_ex.ee_len;
-               ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-               ext4_ext_dirty(handle, inode, path + depth);
-               /* zero out the first half */
-               return allocated;
-       } else if (err)
-               goto fix_extent_len;
-out:
-       ext4_ext_show_leaf(inode, path);
-       return err ? err : allocated;
-
-fix_extent_len:
-       ex->ee_block = orig_ex.ee_block;
-       ex->ee_len   = orig_ex.ee_len;
-       ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
-       ext4_ext_mark_uninitialized(ex);
-       ext4_ext_dirty(handle, inode, path + depth);
-       return err;
+       flags |= EXT4_GET_BLOCKS_PRE_IO;
+       return ext4_split_extent(handle, inode, path, map, split_flag, flags);
 }
 
 static int ext4_convert_unwritten_extents_endio(handle_t *handle,