#include "ext4_jbd2.h"
#include "ext4_extents.h"
+#include <trace/events/ext4.h>
+
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
* fragmenting the file system's free space. Maybe we
* should have some hueristics or some way to allow
* userspace to pass a hint to file system,
- * especiially if the latter case turns out to be
+ * especially if the latter case turns out to be
* common.
*/
ex = path[depth].p_ext;
if (unlikely(!bh))
goto err;
if (!bh_uptodate_or_lock(bh)) {
+ trace_ext4_ext_load_extent(inode, block,
+ path[ppos].p_block);
if (bh_submit_read(bh) < 0) {
put_bh(bh);
goto err;
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
- ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
+ ext4_free_blocks(handle, inode, NULL, ablocks[i], 1,
EXT4_FREE_BLOCKS_METADATA);
}
}
* Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
* 1 if they got merged.
*/
-static int ext4_ext_try_to_merge(struct inode *inode,
+static int ext4_ext_try_to_merge_right(struct inode *inode,
struct ext4_ext_path *path,
struct ext4_extent *ex)
{
return merge_done;
}
+/*
+ * This function tries to merge the @ex extent to neighbours in the tree.
+ * return 1 if merge left else 0.
+ */
+static int ext4_ext_try_to_merge(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *ex) {
+ struct ext4_extent_header *eh;
+ unsigned int depth;
+ int merge_done = 0;
+ int ret = 0;
+
+ depth = ext_depth(inode);
+ BUG_ON(path[depth].p_hdr == NULL);
+ eh = path[depth].p_hdr;
+
+ if (ex > EXT_FIRST_EXTENT(eh))
+ merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
+
+ if (!merge_done)
+ ret = ext4_ext_try_to_merge_right(inode, path, ex);
+
+ return ret;
+}
+
/*
* check if a portion of the "newext" extent overlaps with an
* existing extent.
BUG_ON(npath->p_depth != path->p_depth);
eh = npath[depth].p_hdr;
if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
- ext_debug("next leaf isnt full(%d)\n",
+ ext_debug("next leaf isn't full(%d)\n",
le16_to_cpu(eh->eh_entries));
path = npath;
goto repeat;
if (err)
return err;
ext_debug("index is empty, remove it, free block %llu\n", leaf);
- ext4_free_blocks(handle, inode, 0, leaf, 1,
+ ext4_free_blocks(handle, inode, NULL, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return err;
}
num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext4_ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
- ext4_free_blocks(handle, inode, 0, start, num, flags);
+ ext4_free_blocks(handle, inode, NULL, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
return ret;
}
+/*
+ * used by extent splitting.
+ */
+#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
+ due to ENOSPC */
+#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */
+#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */
+
+/*
+ * ext4_split_extent_at() splits an extent at given block.
+ *
+ * @handle: the journal handle
+ * @inode: the file inode
+ * @path: the path to the extent
+ * @split: the logical block where the extent is splitted.
+ * @split_flags: indicates if the extent could be zeroout if split fails, and
+ * the states(init or uninit) of new extents.
+ * @flags: flags used to insert new extent to extent tree.
+ *
+ *
+ * Splits extent [a, b] into two extents [a, @split) and [@split, b], states
+ * of which are deterimined by split_flag.
+ *
+ * There are two cases:
+ * a> the extent are splitted into two extent.
+ * b> split is not needed, and just mark the extent.
+ *
+ * return 0 on success.
+ */
+static int ext4_split_extent_at(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t split,
+ int split_flag,
+ int flags)
+{
+ ext4_fsblk_t newblock;
+ ext4_lblk_t ee_block;
+ struct ext4_extent *ex, newex, orig_ex;
+ struct ext4_extent *ex2 = NULL;
+ unsigned int ee_len, depth;
+ int err = 0;
+
+ ext_debug("ext4_split_extents_at: inode %lu, logical"
+ "block %llu\n", inode->i_ino, (unsigned long long)split);
+
+ ext4_ext_show_leaf(inode, path);
+
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+ newblock = split - ee_block + ext4_ext_pblock(ex);
+
+ BUG_ON(split < ee_block || split >= (ee_block + ee_len));
+
+ err = ext4_ext_get_access(handle, inode, path + depth);
+ if (err)
+ goto out;
+
+ if (split == ee_block) {
+ /*
+ * case b: block @split is the block that the extent begins with
+ * then we just change the state of the extent, and splitting
+ * is not needed.
+ */
+ if (split_flag & EXT4_EXT_MARK_UNINIT2)
+ ext4_ext_mark_uninitialized(ex);
+ else
+ ext4_ext_mark_initialized(ex);
+
+ if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
+ ext4_ext_try_to_merge(inode, path, ex);
+
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ goto out;
+ }
+
+ /* case a */
+ memcpy(&orig_ex, ex, sizeof(orig_ex));
+ ex->ee_len = cpu_to_le16(split - ee_block);
+ if (split_flag & EXT4_EXT_MARK_UNINIT1)
+ ext4_ext_mark_uninitialized(ex);
+
+ /*
+ * path may lead to new leaf, not to original leaf any more
+ * after ext4_ext_insert_extent() returns,
+ */
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ if (err)
+ goto fix_extent_len;
+
+ ex2 = &newex;
+ ex2->ee_block = cpu_to_le32(split);
+ ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
+ ext4_ext_store_pblock(ex2, newblock);
+ if (split_flag & EXT4_EXT_MARK_UNINIT2)
+ ext4_ext_mark_uninitialized(ex2);
+
+ err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
+ if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_len = cpu_to_le32(ee_len);
+ ext4_ext_try_to_merge(inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ goto out;
+ } else if (err)
+ goto fix_extent_len;
+
+out:
+ ext4_ext_show_leaf(inode, path);
+ return err;
+
+fix_extent_len:
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_dirty(handle, inode, path + depth);
+ return err;
+}
+
+/*
+ * ext4_split_extents() splits an extent and mark extent which is covered
+ * by @map as split_flags indicates
+ *
+ * It may result in splitting the extent into multiple extents (upto three)
+ * There are three possibilities:
+ * a> There is no split required
+ * b> Splits in two extents: Split is happening at either end of the extent
+ * c> Splits in three extents: Somone is splitting in middle of the extent
+ *
+ */
+static int ext4_split_extent(handle_t *handle,
+ struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_map_blocks *map,
+ int split_flag,
+ int flags)
+{
+ ext4_lblk_t ee_block;
+ struct ext4_extent *ex;
+ unsigned int ee_len, depth;
+ int err = 0;
+ int uninitialized;
+ int split_flag1, flags1;
+
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
+ uninitialized = ext4_ext_is_uninitialized(ex);
+
+ if (map->m_lblk + map->m_len < ee_block + ee_len) {
+ split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+ EXT4_EXT_MAY_ZEROOUT : 0;
+ flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
+ if (uninitialized)
+ split_flag1 |= EXT4_EXT_MARK_UNINIT1 |
+ EXT4_EXT_MARK_UNINIT2;
+ err = ext4_split_extent_at(handle, inode, path,
+ map->m_lblk + map->m_len, split_flag1, flags1);
+ }
+
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode, map->m_lblk, path);
+ if (IS_ERR(path))
+ return PTR_ERR(path);
+
+ if (map->m_lblk >= ee_block) {
+ split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT ?
+ EXT4_EXT_MAY_ZEROOUT : 0;
+ if (uninitialized)
+ split_flag1 |= EXT4_EXT_MARK_UNINIT1;
+ if (split_flag & EXT4_EXT_MARK_UNINIT2)
+ split_flag1 |= EXT4_EXT_MARK_UNINIT2;
+ err = ext4_split_extent_at(handle, inode, path,
+ map->m_lblk, split_flag1, flags);
+ if (err)
+ goto out;
+ }
+
+ ext4_ext_show_leaf(inode, path);
+out:
+ return err ? err : map->m_len;
+}
+
#define EXT4_EXT_ZERO_LEN 7
/*
* This function is called by ext4_ext_map_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
- * extent into multiple extents (upto three - one initialized and two
+ * extent into multiple extents (up to three - one initialized and two
* uninitialized).
* There are three possibilities:
* a> There is no split required: Entire extent should be initialized
struct ext4_map_blocks *map,
struct ext4_ext_path *path)
{
- struct ext4_extent *ex, newex, orig_ex;
- struct ext4_extent *ex1 = NULL;
- struct ext4_extent *ex2 = NULL;
- struct ext4_extent *ex3 = NULL;
- struct ext4_extent_header *eh;
+ struct ext4_map_blocks split_map;
+ struct ext4_extent zero_ex;
+ struct ext4_extent *ex;
ext4_lblk_t ee_block, eof_block;
unsigned int allocated, ee_len, depth;
- ext4_fsblk_t newblock;
int err = 0;
- int ret = 0;
- int may_zeroout;
+ int split_flag = 0;
ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
eof_block = map->m_lblk + map->m_len;
depth = ext_depth(inode);
- eh = path[depth].p_hdr;
ex = path[depth].p_ext;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
- newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
-
- ex2 = ex;
- orig_ex.ee_block = ex->ee_block;
- orig_ex.ee_len = cpu_to_le16(ee_len);
- ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
+ WARN_ON(map->m_lblk < ee_block);
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size.
*/
- may_zeroout = ee_block + ee_len <= eof_block;
+ split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
- if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
- err = ext4_ext_zeroout(inode, &orig_ex);
+ if (ee_len <= 2*EXT4_EXT_ZERO_LEN &&
+ (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ err = ext4_ext_zeroout(inode, ex);
if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_dirty(handle, inode, path + depth);
- /* zeroed the full extent */
- return allocated;
- }
-
- /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
- if (map->m_lblk > ee_block) {
- ex1 = ex;
- ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
- ext4_ext_mark_uninitialized(ex1);
- ex2 = &newex;
- }
- /*
- * for sanity, update the length of the ex2 extent before
- * we insert ex3, if ex1 is NULL. This is to avoid temporary
- * overlap of blocks.
- */
- if (!ex1 && allocated > map->m_len)
- ex2->ee_len = cpu_to_le16(map->m_len);
- /* ex3: to ee_block + ee_len : uninitialised */
- if (allocated > map->m_len) {
- unsigned int newdepth;
- /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
- if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
- /*
- * map->m_lblk == ee_block is handled by the zerouout
- * at the beginning.
- * Mark first half uninitialized.
- * Mark second half initialized and zero out the
- * initialized extent
- */
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = cpu_to_le16(ee_len - allocated);
- ext4_ext_mark_uninitialized(ex);
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_dirty(handle, inode, path + depth);
-
- ex3 = &newex;
- ex3->ee_block = cpu_to_le32(map->m_lblk);
- ext4_ext_store_pblock(ex3, newblock);
- ex3->ee_len = cpu_to_le16(allocated);
- err = ext4_ext_insert_extent(handle, inode, path,
- ex3, 0);
- if (err == -ENOSPC) {
- err = ext4_ext_zeroout(inode, &orig_ex);
- if (err)
- goto fix_extent_len;
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex,
- ext4_ext_pblock(&orig_ex));
- ext4_ext_dirty(handle, inode, path + depth);
- /* blocks available from map->m_lblk */
- return allocated;
-
- } else if (err)
- goto fix_extent_len;
-
- /*
- * We need to zero out the second half because
- * an fallocate request can update file size and
- * converting the second half to initialized extent
- * implies that we can leak some junk data to user
- * space.
- */
- err = ext4_ext_zeroout(inode, ex3);
- if (err) {
- /*
- * We should actually mark the
- * second half as uninit and return error
- * Insert would have changed the extent
- */
- depth = ext_depth(inode);
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk,
- path);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- return err;
- }
- /* get the second half extent details */
- ex = path[depth].p_ext;
- err = ext4_ext_get_access(handle, inode,
- path + depth);
- if (err)
- return err;
- ext4_ext_mark_uninitialized(ex);
- ext4_ext_dirty(handle, inode, path + depth);
- return err;
- }
-
- /* zeroed the second half */
- return allocated;
- }
- ex3 = &newex;
- ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
- ext4_ext_store_pblock(ex3, newblock + map->m_len);
- ex3->ee_len = cpu_to_le16(allocated - map->m_len);
- ext4_ext_mark_uninitialized(ex3);
- err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
- if (err == -ENOSPC && may_zeroout) {
- err = ext4_ext_zeroout(inode, &orig_ex);
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_dirty(handle, inode, path + depth);
- /* zeroed the full extent */
- /* blocks available from map->m_lblk */
- return allocated;
-
- } else if (err)
- goto fix_extent_len;
- /*
- * The depth, and hence eh & ex might change
- * as part of the insert above.
- */
- newdepth = ext_depth(inode);
- /*
- * update the extent length after successful insert of the
- * split extent
- */
- ee_len -= ext4_ext_get_actual_len(ex3);
- orig_ex.ee_len = cpu_to_le16(ee_len);
- may_zeroout = ee_block + ee_len <= eof_block;
-
- depth = newdepth;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
goto out;
- }
- eh = path[depth].p_hdr;
- ex = path[depth].p_ext;
- if (ex2 != &newex)
- ex2 = ex;
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
-
- allocated = map->m_len;
-
- /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
- * to insert a extent in the middle zerout directly
- * otherwise give the extent a chance to merge to left
- */
- if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
- map->m_lblk != ee_block && may_zeroout) {
- err = ext4_ext_zeroout(inode, &orig_ex);
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_dirty(handle, inode, path + depth);
- /* zero out the first half */
- /* blocks available from map->m_lblk */
- return allocated;
- }
- }
- /*
- * If there was a change of depth as part of the
- * insertion of ex3 above, we need to update the length
- * of the ex1 extent again here
- */
- if (ex1 && ex1 != ex) {
- ex1 = ex;
- ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
- ext4_ext_mark_uninitialized(ex1);
- ex2 = &newex;
- }
- /* ex2: map->m_lblk to map->m_lblk + maxblocks-1 : initialised */
- ex2->ee_block = cpu_to_le32(map->m_lblk);
- ext4_ext_store_pblock(ex2, newblock);
- ex2->ee_len = cpu_to_le16(allocated);
- if (ex2 != ex)
- goto insert;
- /*
- * New (initialized) extent starts from the first block
- * in the current extent. i.e., ex2 == ex
- * We have to see if it can be merged with the extent
- * on the left.
- */
- if (ex2 > EXT_FIRST_EXTENT(eh)) {
- /*
- * To merge left, pass "ex2 - 1" to try_to_merge(),
- * since it merges towards right _only_.
- */
- ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
- if (ret) {
- err = ext4_ext_correct_indexes(handle, inode, path);
- if (err)
- goto out;
- depth = ext_depth(inode);
- ex2--;
- }
+ ext4_ext_mark_initialized(ex);
+ ext4_ext_try_to_merge(inode, path, ex);
+ err = ext4_ext_dirty(handle, inode, path + depth);
+ goto out;
}
+
/*
- * Try to Merge towards right. This might be required
- * only when the whole extent is being written to.
- * i.e. ex2 == ex and ex3 == NULL.
+ * four cases:
+ * 1. split the extent into three extents.
+ * 2. split the extent into two extents, zeroout the first half.
+ * 3. split the extent into two extents, zeroout the second half.
+ * 4. split the extent into two extents with out zeroout.
*/
- if (!ex3) {
- ret = ext4_ext_try_to_merge(inode, path, ex2);
- if (ret) {
- err = ext4_ext_correct_indexes(handle, inode, path);
+ split_map.m_lblk = map->m_lblk;
+ split_map.m_len = map->m_len;
+
+ if (allocated > map->m_len) {
+ if (allocated <= EXT4_EXT_ZERO_LEN &&
+ (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ /* case 3 */
+ zero_ex.ee_block =
+ cpu_to_le32(map->m_lblk);
+ zero_ex.ee_len = cpu_to_le16(allocated);
+ ext4_ext_store_pblock(&zero_ex,
+ ext4_ext_pblock(ex) + map->m_lblk - ee_block);
+ err = ext4_ext_zeroout(inode, &zero_ex);
if (err)
goto out;
+ split_map.m_lblk = map->m_lblk;
+ split_map.m_len = allocated;
+ } else if ((map->m_lblk - ee_block + map->m_len <
+ EXT4_EXT_ZERO_LEN) &&
+ (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
+ /* case 2 */
+ if (map->m_lblk != ee_block) {
+ zero_ex.ee_block = ex->ee_block;
+ zero_ex.ee_len = cpu_to_le16(map->m_lblk -
+ ee_block);
+ ext4_ext_store_pblock(&zero_ex,
+ ext4_ext_pblock(ex));
+ err = ext4_ext_zeroout(inode, &zero_ex);
+ if (err)
+ goto out;
+ }
+
+ split_map.m_lblk = ee_block;
+ split_map.m_len = map->m_lblk - ee_block + map->m_len;
+ allocated = map->m_len;
}
}
- /* Mark modified extent as dirty */
- err = ext4_ext_dirty(handle, inode, path + depth);
- goto out;
-insert:
- err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
- if (err == -ENOSPC && may_zeroout) {
- err = ext4_ext_zeroout(inode, &orig_ex);
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_dirty(handle, inode, path + depth);
- /* zero out the first half */
- return allocated;
- } else if (err)
- goto fix_extent_len;
+
+ allocated = ext4_split_extent(handle, inode, path,
+ &split_map, split_flag, 0);
+ if (allocated < 0)
+ err = allocated;
+
out:
- ext4_ext_show_leaf(inode, path);
return err ? err : allocated;
-
-fix_extent_len:
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_mark_uninitialized(ex);
- ext4_ext_dirty(handle, inode, path + depth);
- return err;
}
/*
* ext4_get_blocks_dio_write() when DIO to write
* to an uninitialized extent.
*
- * Writing to an uninitized extent may result in splitting the uninitialized
+ * Writing to an uninitialized extent may result in splitting the uninitialized
* extent into multiple /initialized uninitialized extents (up to three)
* There are three possibilities:
* a> There is no split required: Entire extent should be uninitialized
struct ext4_ext_path *path,
int flags)
{
- struct ext4_extent *ex, newex, orig_ex;
- struct ext4_extent *ex1 = NULL;
- struct ext4_extent *ex2 = NULL;
- struct ext4_extent *ex3 = NULL;
- ext4_lblk_t ee_block, eof_block;
- unsigned int allocated, ee_len, depth;
- ext4_fsblk_t newblock;
- int err = 0;
- int may_zeroout;
+ ext4_lblk_t eof_block;
+ ext4_lblk_t ee_block;
+ struct ext4_extent *ex;
+ unsigned int ee_len;
+ int split_flag = 0, depth;
ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
"block %llu, max_blocks %u\n", inode->i_ino,
inode->i_sb->s_blocksize_bits;
if (eof_block < map->m_lblk + map->m_len)
eof_block = map->m_lblk + map->m_len;
-
- depth = ext_depth(inode);
- ex = path[depth].p_ext;
- ee_block = le32_to_cpu(ex->ee_block);
- ee_len = ext4_ext_get_actual_len(ex);
- allocated = ee_len - (map->m_lblk - ee_block);
- newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
-
- ex2 = ex;
- orig_ex.ee_block = ex->ee_block;
- orig_ex.ee_len = cpu_to_le16(ee_len);
- ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
-
/*
* It is safe to convert extent to initialized via explicit
* zeroout only if extent is fully insde i_size or new_size.
*/
- may_zeroout = ee_block + ee_len <= eof_block;
-
- /*
- * If the uninitialized extent begins at the same logical
- * block where the write begins, and the write completely
- * covers the extent, then we don't need to split it.
- */
- if ((map->m_lblk == ee_block) && (allocated <= map->m_len))
- return allocated;
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
- /* ex1: ee_block to map->m_lblk - 1 : uninitialized */
- if (map->m_lblk > ee_block) {
- ex1 = ex;
- ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
- ext4_ext_mark_uninitialized(ex1);
- ex2 = &newex;
- }
- /*
- * for sanity, update the length of the ex2 extent before
- * we insert ex3, if ex1 is NULL. This is to avoid temporary
- * overlap of blocks.
- */
- if (!ex1 && allocated > map->m_len)
- ex2->ee_len = cpu_to_le16(map->m_len);
- /* ex3: to ee_block + ee_len : uninitialised */
- if (allocated > map->m_len) {
- unsigned int newdepth;
- ex3 = &newex;
- ex3->ee_block = cpu_to_le32(map->m_lblk + map->m_len);
- ext4_ext_store_pblock(ex3, newblock + map->m_len);
- ex3->ee_len = cpu_to_le16(allocated - map->m_len);
- ext4_ext_mark_uninitialized(ex3);
- err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
- if (err == -ENOSPC && may_zeroout) {
- err = ext4_ext_zeroout(inode, &orig_ex);
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_dirty(handle, inode, path + depth);
- /* zeroed the full extent */
- /* blocks available from map->m_lblk */
- return allocated;
-
- } else if (err)
- goto fix_extent_len;
- /*
- * The depth, and hence eh & ex might change
- * as part of the insert above.
- */
- newdepth = ext_depth(inode);
- /*
- * update the extent length after successful insert of the
- * split extent
- */
- ee_len -= ext4_ext_get_actual_len(ex3);
- orig_ex.ee_len = cpu_to_le16(ee_len);
- may_zeroout = ee_block + ee_len <= eof_block;
-
- depth = newdepth;
- ext4_ext_drop_refs(path);
- path = ext4_ext_find_extent(inode, map->m_lblk, path);
- if (IS_ERR(path)) {
- err = PTR_ERR(path);
- goto out;
- }
- ex = path[depth].p_ext;
- if (ex2 != &newex)
- ex2 = ex;
-
- err = ext4_ext_get_access(handle, inode, path + depth);
- if (err)
- goto out;
+ depth = ext_depth(inode);
+ ex = path[depth].p_ext;
+ ee_block = le32_to_cpu(ex->ee_block);
+ ee_len = ext4_ext_get_actual_len(ex);
- allocated = map->m_len;
- }
- /*
- * If there was a change of depth as part of the
- * insertion of ex3 above, we need to update the length
- * of the ex1 extent again here
- */
- if (ex1 && ex1 != ex) {
- ex1 = ex;
- ex1->ee_len = cpu_to_le16(map->m_lblk - ee_block);
- ext4_ext_mark_uninitialized(ex1);
- ex2 = &newex;
- }
- /*
- * ex2: map->m_lblk to map->m_lblk + map->m_len-1 : to be written
- * using direct I/O, uninitialised still.
- */
- ex2->ee_block = cpu_to_le32(map->m_lblk);
- ext4_ext_store_pblock(ex2, newblock);
- ex2->ee_len = cpu_to_le16(allocated);
- ext4_ext_mark_uninitialized(ex2);
- if (ex2 != ex)
- goto insert;
- /* Mark modified extent as dirty */
- err = ext4_ext_dirty(handle, inode, path + depth);
- ext_debug("out here\n");
- goto out;
-insert:
- err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
- if (err == -ENOSPC && may_zeroout) {
- err = ext4_ext_zeroout(inode, &orig_ex);
- if (err)
- goto fix_extent_len;
- /* update the extent length and mark as initialized */
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_dirty(handle, inode, path + depth);
- /* zero out the first half */
- return allocated;
- } else if (err)
- goto fix_extent_len;
-out:
- ext4_ext_show_leaf(inode, path);
- return err ? err : allocated;
+ split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
+ split_flag |= EXT4_EXT_MARK_UNINIT2;
-fix_extent_len:
- ex->ee_block = orig_ex.ee_block;
- ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
- ext4_ext_mark_uninitialized(ex);
- ext4_ext_dirty(handle, inode, path + depth);
- return err;
+ flags |= EXT4_GET_BLOCKS_PRE_IO;
+ return ext4_split_extent(handle, inode, path, map, split_flag, flags);
}
+
static int ext4_convert_unwritten_extents_endio(handle_t *handle,
struct inode *inode,
struct ext4_ext_path *path)
struct ext4_extent_header *eh;
int depth;
int err = 0;
- int ret = 0;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
+ ext_debug("ext4_convert_unwritten_extents_endio: inode %lu, logical"
+ "block %llu, max_blocks %u\n", inode->i_ino,
+ (unsigned long long)le32_to_cpu(ex->ee_block),
+ ext4_ext_get_actual_len(ex));
+
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
/* first mark the extent as initialized */
ext4_ext_mark_initialized(ex);
- /*
- * We have to see if it can be merged with the extent
- * on the left.
- */
- if (ex > EXT_FIRST_EXTENT(eh)) {
- /*
- * To merge left, pass "ex - 1" to try_to_merge(),
- * since it merges towards right _only_.
- */
- ret = ext4_ext_try_to_merge(inode, path, ex - 1);
- if (ret) {
- err = ext4_ext_correct_indexes(handle, inode, path);
- if (err)
- goto out;
- depth = ext_depth(inode);
- ex--;
- }
- }
- /*
- * Try to Merge towards right.
+ /* note: ext4_ext_correct_indexes() isn't needed here because
+ * borders are not changed
*/
- ret = ext4_ext_try_to_merge(inode, path, ex);
- if (ret) {
- err = ext4_ext_correct_indexes(handle, inode, path);
- if (err)
- goto out;
- depth = ext_depth(inode);
- }
+ ext4_ext_try_to_merge(inode, path, ex);
+
/* Mark modified extent as dirty */
err = ext4_ext_dirty(handle, inode, path + depth);
out:
{
int i, depth;
struct ext4_extent_header *eh;
- struct ext4_extent *ex, *last_ex;
+ struct ext4_extent *last_ex;
if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
return 0;
depth = ext_depth(inode);
eh = path[depth].p_hdr;
- ex = path[depth].p_ext;
if (unlikely(!eh->eh_entries)) {
EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
path, flags);
/*
* Flag the inode(non aio case) or end_io struct (aio case)
- * that this IO needs to convertion to written when IO is
+ * that this IO needs to conversion to written when IO is
* completed
*/
if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
struct ext4_map_blocks *map, int flags)
{
struct ext4_ext_path *path = NULL;
- struct ext4_extent_header *eh;
struct ext4_extent newex, *ex;
- ext4_fsblk_t newblock;
+ ext4_fsblk_t newblock = 0;
int err = 0, depth, ret;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
ext_debug("blocks %u/%u requested for inode %lu\n",
map->m_lblk, map->m_len, inode->i_ino);
+ trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
/* check in cache */
if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
err = -EIO;
goto out2;
}
- eh = path[depth].p_hdr;
ex = path[depth].p_ext;
if (ex) {
ext4_ext_mark_uninitialized(&newex);
/*
* io_end structure was created for every IO write to an
- * uninitialized extent. To avoid unecessary conversion,
+ * uninitialized extent. To avoid unnecessary conversion,
* here we flag the IO that really needs the conversion.
* For non asycn direct IO case, flag the inode state
- * that we need to perform convertion when IO is done.
+ * that we need to perform conversion when IO is done.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
+ ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
ext4_ext_drop_refs(path);
kfree(path);
}
+ trace_ext4_ext_map_blocks_exit(inode, map->m_lblk,
+ newblock, map->m_len, err ? err : allocated);
return err ? err : allocated;
}
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
return -EOPNOTSUPP;
+ trace_ext4_fallocate_enter(inode, offset, len, mode);
map.m_lblk = offset >> blkbits;
/*
* We can't just convert len to max_blocks because
ret = inode_newsize_ok(inode, (len + offset));
if (ret) {
mutex_unlock(&inode->i_mutex);
+ trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
return ret;
}
retry:
goto retry;
}
mutex_unlock(&inode->i_mutex);
+ trace_ext4_fallocate_exit(inode, offset, max_blocks,
+ ret > 0 ? ret2 : ret);
return ret > 0 ? ret2 : ret;
}
}
return ret > 0 ? ret2 : ret;
}
+
/*
* Callback function called for each extent to gather FIEMAP information.
*/
struct ext4_ext_cache *newex, struct ext4_extent *ex,
void *data)
{
- struct fiemap_extent_info *fieinfo = data;
- unsigned char blksize_bits = inode->i_sb->s_blocksize_bits;
__u64 logical;
__u64 physical;
__u64 length;
+ loff_t size;
__u32 flags = 0;
- int error;
+ int ret = 0;
+ struct fiemap_extent_info *fieinfo = data;
+ unsigned char blksize_bits;
- logical = (__u64)newex->ec_block << blksize_bits;
+ blksize_bits = inode->i_sb->s_blocksize_bits;
+ logical = (__u64)newex->ec_block << blksize_bits;
if (newex->ec_start == 0) {
- pgoff_t offset;
- struct page *page;
+ /*
+ * No extent in extent-tree contains block @newex->ec_start,
+ * then the block may stay in 1)a hole or 2)delayed-extent.
+ *
+ * Holes or delayed-extents are processed as follows.
+ * 1. lookup dirty pages with specified range in pagecache.
+ * If no page is got, then there is no delayed-extent and
+ * return with EXT_CONTINUE.
+ * 2. find the 1st mapped buffer,
+ * 3. check if the mapped buffer is both in the request range
+ * and a delayed buffer. If not, there is no delayed-extent,
+ * then return.
+ * 4. a delayed-extent is found, the extent will be collected.
+ */
+ ext4_lblk_t end = 0;
+ pgoff_t last_offset;
+ pgoff_t offset;
+ pgoff_t index;
+ struct page **pages = NULL;
struct buffer_head *bh = NULL;
+ struct buffer_head *head = NULL;
+ unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *);
+
+ pages = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (pages == NULL)
+ return -ENOMEM;
offset = logical >> PAGE_SHIFT;
- page = find_get_page(inode->i_mapping, offset);
- if (!page || !page_has_buffers(page))
- return EXT_CONTINUE;
+repeat:
+ last_offset = offset;
+ head = NULL;
+ ret = find_get_pages_tag(inode->i_mapping, &offset,
+ PAGECACHE_TAG_DIRTY, nr_pages, pages);
+
+ if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+ /* First time, try to find a mapped buffer. */
+ if (ret == 0) {
+out:
+ for (index = 0; index < ret; index++)
+ page_cache_release(pages[index]);
+ /* just a hole. */
+ kfree(pages);
+ return EXT_CONTINUE;
+ }
- bh = page_buffers(page);
+ /* Try to find the 1st mapped buffer. */
+ end = ((__u64)pages[0]->index << PAGE_SHIFT) >>
+ blksize_bits;
+ if (!page_has_buffers(pages[0]))
+ goto out;
+ head = page_buffers(pages[0]);
+ if (!head)
+ goto out;
- if (!bh)
- return EXT_CONTINUE;
+ bh = head;
+ do {
+ if (buffer_mapped(bh)) {
+ /* get the 1st mapped buffer. */
+ if (end > newex->ec_block +
+ newex->ec_len)
+ /* The buffer is out of
+ * the request range.
+ */
+ goto out;
+ goto found_mapped_buffer;
+ }
+ bh = bh->b_this_page;
+ end++;
+ } while (bh != head);
- if (buffer_delay(bh)) {
- flags |= FIEMAP_EXTENT_DELALLOC;
- page_cache_release(page);
+ /* No mapped buffer found. */
+ goto out;
} else {
- page_cache_release(page);
- return EXT_CONTINUE;
+ /*Find contiguous delayed buffers. */
+ if (ret > 0 && pages[0]->index == last_offset)
+ head = page_buffers(pages[0]);
+ bh = head;
+ }
+
+found_mapped_buffer:
+ if (bh != NULL && buffer_delay(bh)) {
+ /* 1st or contiguous delayed buffer found. */
+ if (!(flags & FIEMAP_EXTENT_DELALLOC)) {
+ /*
+ * 1st delayed buffer found, record
+ * the start of extent.
+ */
+ flags |= FIEMAP_EXTENT_DELALLOC;
+ newex->ec_block = end;
+ logical = (__u64)end << blksize_bits;
+ }
+ /* Find contiguous delayed buffers. */
+ do {
+ if (!buffer_delay(bh))
+ goto found_delayed_extent;
+ bh = bh->b_this_page;
+ end++;
+ } while (bh != head);
+
+ for (index = 1; index < ret; index++) {
+ if (!page_has_buffers(pages[index])) {
+ bh = NULL;
+ break;
+ }
+ head = page_buffers(pages[index]);
+ if (!head) {
+ bh = NULL;
+ break;
+ }
+ if (pages[index]->index !=
+ pages[0]->index + index) {
+ /* Blocks are not contiguous. */
+ bh = NULL;
+ break;
+ }
+ bh = head;
+ do {
+ if (!buffer_delay(bh))
+ /* Delayed-extent ends. */
+ goto found_delayed_extent;
+ bh = bh->b_this_page;
+ end++;
+ } while (bh != head);
+ }
+ } else if (!(flags & FIEMAP_EXTENT_DELALLOC))
+ /* a hole found. */
+ goto out;
+
+found_delayed_extent:
+ newex->ec_len = min(end - newex->ec_block,
+ (ext4_lblk_t)EXT_INIT_MAX_LEN);
+ if (ret == nr_pages && bh != NULL &&
+ newex->ec_len < EXT_INIT_MAX_LEN &&
+ buffer_delay(bh)) {
+ /* Have not collected an extent and continue. */
+ for (index = 0; index < ret; index++)
+ page_cache_release(pages[index]);
+ goto repeat;
}
+
+ for (index = 0; index < ret; index++)
+ page_cache_release(pages[index]);
+ kfree(pages);
}
physical = (__u64)newex->ec_start << blksize_bits;
if (ex && ext4_ext_is_uninitialized(ex))
flags |= FIEMAP_EXTENT_UNWRITTEN;
- /*
- * If this extent reaches EXT_MAX_BLOCK, it must be last.
- *
- * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
- * this also indicates no more allocated blocks.
- *
- * XXX this might miss a single-block extent at EXT_MAX_BLOCK
- */
- if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
- newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
- loff_t size = i_size_read(inode);
- loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
-
+ size = i_size_read(inode);
+ if (logical + length >= size)
flags |= FIEMAP_EXTENT_LAST;
- if ((flags & FIEMAP_EXTENT_DELALLOC) &&
- logical+length > size)
- length = (size - logical + bs - 1) & ~(bs-1);
- }
- error = fiemap_fill_next_extent(fieinfo, logical, physical,
+ ret = fiemap_fill_next_extent(fieinfo, logical, physical,
length, flags);
- if (error < 0)
- return error;
- if (error == 1)
+ if (ret < 0)
+ return ret;
+ if (ret == 1)
return EXT_BREAK;
-
return EXT_CONTINUE;
}