*
* The inode preallocation space is used looking at the _logical_ start
* block. If only the logical file block falls within the range of prealloc
- * space we will consume the particular prealloc space. This make sure that
- * that the we have contiguous physical blocks representing the file blocks
+ * space we will consume the particular prealloc space. This makes sure that
+ * we have contiguous physical blocks representing the file blocks
*
* The important thing to be noted in case of inode prealloc space is that
* we don't modify the values associated to inode prealloc space except
*
* If we are not able to find blocks in the inode prealloc space and if we
* have the group allocation flag set then we look at the locality group
- * prealloc space. These are per CPU prealloc list repreasented as
+ * prealloc space. These are per CPU prealloc list represented as
*
* ext4_sb_info.s_locality_groups[smp_processor_id()]
*
* we are doing a group prealloc we try to normalize the request to
* sbi->s_mb_group_prealloc. Default value of s_mb_group_prealloc is
* 512 blocks. This can be tuned via
- * /sys/fs/ext4/<partition/mb_group_prealloc. The value is represented in
+ * /sys/fs/ext4/<partition>/mb_group_prealloc. The value is represented in
* terms of number of blocks. If we have mounted the file system with -O
* stripe=<value> option the group prealloc request is normalized to the
- * stripe value (sbi->s_stripe)
+ * the smallest multiple of the stripe value (sbi->s_stripe) which is
+ * greater than the default mb_group_prealloc.
*
- * The regular allocator(using the buddy cache) supports few tunables.
+ * The regular allocator (using the buddy cache) supports a few tunables.
*
* /sys/fs/ext4/<partition>/mb_min_to_scan
* /sys/fs/ext4/<partition>/mb_max_to_scan
* best extent in the found extents. Searching for the blocks starts with
* the group specified as the goal value in allocation context via
* ac_g_ex. Each group is first checked based on the criteria whether it
- * can used for allocation. ext4_mb_good_group explains how the groups are
+ * can be used for allocation. ext4_mb_good_group explains how the groups are
* checked.
*
* Both the prealloc space are getting populated as above. So for the first
b2 = (unsigned char *) bitmap;
for (i = 0; i < e4b->bd_sb->s_blocksize; i++) {
if (b1[i] != b2[i]) {
- printk(KERN_ERR "corruption in group %u "
- "at byte %u(%u): %x in copy != %x "
- "on disk/prealloc\n",
- e4b->bd_group, i, i * 8, b1[i], b2[i]);
+ ext4_msg(e4b->bd_sb, KERN_ERR,
+ "corruption in group %u "
+ "at byte %u(%u): %x in copy != %x "
+ "on disk/prealloc",
+ e4b->bd_group, i, i * 8, b1[i], b2[i]);
BUG();
}
}
grp = ext4_get_group_info(sb, group);
e4b->bd_blkbits = sb->s_blocksize_bits;
- e4b->bd_info = ext4_get_group_info(sb, group);
+ e4b->bd_info = grp;
e4b->bd_sb = sb;
e4b->bd_group = group;
e4b->bd_buddy_page = NULL;
}
}
-static void mb_set_bits(void *bm, int cur, int len)
+void ext4_set_bits(void *bm, int cur, int len)
{
__u32 *addr;
}
mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
- mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
+ ext4_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
mb_check_buddy(e4b);
return ret;
EXT4_DESC_PER_BLOCK_BITS(sb);
meta_group_info = kmalloc(metalen, GFP_KERNEL);
if (meta_group_info == NULL) {
- printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
- "buddy group\n");
+ ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate mem "
+ "for a buddy group");
goto exit_meta_group_info;
}
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] =
meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
if (meta_group_info[i] == NULL) {
- printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
+ ext4_msg(sb, KERN_ERR, "EXT4-fs: can't allocate buddy mem");
goto exit_group_info;
}
memset(meta_group_info[i], 0, kmem_cache_size(cachep));
exit_group_info:
/* If a meta_group_info table has been allocated, release it now */
- if (group % EXT4_DESC_PER_BLOCK(sb) == 0)
+ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) {
kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]);
+ sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL;
+ }
exit_meta_group_info:
return -ENOMEM;
} /* ext4_mb_add_groupinfo */
/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
* kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
* So a two level scheme suffices for now. */
- sbi->s_group_info = kzalloc(array_size, GFP_KERNEL);
+ sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
if (sbi->s_group_info == NULL) {
- printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n");
+ ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
return -ENOMEM;
}
sbi->s_buddy_cache = new_inode(sb);
if (sbi->s_buddy_cache == NULL) {
- printk(KERN_ERR "EXT4-fs: can't get new inode\n");
+ ext4_msg(sb, KERN_ERR, "can't get new inode");
goto err_freesgi;
}
- sbi->s_buddy_cache->i_ino = get_next_ino();
+ /* To avoid potentially colliding with an valid on-disk inode number,
+ * use EXT4_BAD_INO for the buddy cache inode number. This inode is
+ * not in the inode hash, so it should never be found by iget(), but
+ * this will avoid confusion if it ever shows up during debugging. */
+ sbi->s_buddy_cache->i_ino = EXT4_BAD_INO;
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
for (i = 0; i < ngroups; i++) {
desc = ext4_get_group_desc(sb, i, NULL);
if (desc == NULL) {
- printk(KERN_ERR
- "EXT4-fs: can't read descriptor %u\n", i);
+ ext4_msg(sb, KERN_ERR, "can't read descriptor %u", i);
goto err_freebuddy;
}
if (ext4_mb_add_groupinfo(sb, i, desc) != 0)
kfree(sbi->s_group_info[i]);
iput(sbi->s_buddy_cache);
err_freesgi:
- kfree(sbi->s_group_info);
+ ext4_kvfree(sbi->s_group_info);
return -ENOMEM;
}
slab_size, 0, SLAB_RECLAIM_ACCOUNT,
NULL);
+ ext4_groupinfo_caches[cache_index] = cachep;
+
mutex_unlock(&ext4_grpinfo_slab_create_mutex);
if (!cachep) {
- printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
+ printk(KERN_EMERG
+ "EXT4-fs: no memory for groupinfo slab cache\n");
return -ENOMEM;
}
- ext4_groupinfo_caches[cache_index] = cachep;
-
return 0;
}
i++;
} while (i <= sb->s_blocksize_bits + 1);
- /* init file for buddy data */
- ret = ext4_mb_init_backend(sb);
- if (ret != 0) {
- goto out;
- }
-
spin_lock_init(&sbi->s_md_lock);
spin_lock_init(&sbi->s_bal_lock);
sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
+ /*
+ * If there is a s_stripe > 1, then we set the s_mb_group_prealloc
+ * to the lowest multiple of s_stripe which is bigger than
+ * the s_mb_group_prealloc as determined above. We want
+ * the preallocation size to be an exact multiple of the
+ * RAID stripe size so that preallocations don't fragment
+ * the stripes.
+ */
+ if (sbi->s_stripe > 1) {
+ sbi->s_mb_group_prealloc = roundup(
+ sbi->s_mb_group_prealloc, sbi->s_stripe);
+ }
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
spin_lock_init(&lg->lg_prealloc_lock);
}
+ /* init file for buddy data */
+ ret = ext4_mb_init_backend(sb);
+ if (ret != 0) {
+ goto out;
+ }
+
if (sbi->s_proc)
proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
&ext4_mb_seq_groups_fops, sb);
EXT4_DESC_PER_BLOCK_BITS(sb);
for (i = 0; i < num_meta_group_infos; i++)
kfree(sbi->s_group_info[i]);
- kfree(sbi->s_group_info);
+ ext4_kvfree(sbi->s_group_info);
}
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
if (sbi->s_buddy_cache)
iput(sbi->s_buddy_cache);
if (sbi->s_mb_stats) {
- printk(KERN_INFO
- "EXT4-fs: mballoc: %u blocks %u reqs (%u success)\n",
+ ext4_msg(sb, KERN_INFO,
+ "mballoc: %u blocks %u reqs (%u success)",
atomic_read(&sbi->s_bal_allocated),
atomic_read(&sbi->s_bal_reqs),
atomic_read(&sbi->s_bal_success));
- printk(KERN_INFO
- "EXT4-fs: mballoc: %u extents scanned, %u goal hits, "
- "%u 2^N hits, %u breaks, %u lost\n",
+ ext4_msg(sb, KERN_INFO,
+ "mballoc: %u extents scanned, %u goal hits, "
+ "%u 2^N hits, %u breaks, %u lost",
atomic_read(&sbi->s_bal_ex_scanned),
atomic_read(&sbi->s_bal_goals),
atomic_read(&sbi->s_bal_2orders),
atomic_read(&sbi->s_bal_breaks),
atomic_read(&sbi->s_mb_lost_chunks));
- printk(KERN_INFO
- "EXT4-fs: mballoc: %lu generated and it took %Lu\n",
- sbi->s_mb_buddies_generated++,
+ ext4_msg(sb, KERN_INFO,
+ "mballoc: %lu generated and it took %Lu",
+ sbi->s_mb_buddies_generated,
sbi->s_mb_generation_time);
- printk(KERN_INFO
- "EXT4-fs: mballoc: %u preallocated, %u discarded\n",
+ ext4_msg(sb, KERN_INFO,
+ "mballoc: %u preallocated, %u discarded",
atomic_read(&sbi->s_mb_preallocated),
atomic_read(&sbi->s_mb_discarded));
}
rb_erase(&entry->node, &(db->bb_free_root));
mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
+ /*
+ * Clear the trimmed flag for the group so that the next
+ * ext4_trim_fs can trim it.
+ * If the volume is mounted with -o discard, online discard
+ * is supported and the free blocks will be trimmed online.
+ */
+ if (!test_opt(sb, DISCARD))
+ EXT4_MB_GRP_CLEAR_TRIMMED(db);
+
if (!db->bb_free_root.rb_node) {
/* No more items in the per group rb tree
* balance refcounts from ext4_mb_free_metadata()
* We leak some of the blocks here.
*/
ext4_lock_group(sb, ac->ac_b_ex.fe_group);
- mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
- ac->ac_b_ex.fe_len);
+ ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+ ac->ac_b_ex.fe_len);
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!err)
}
}
#endif
- mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len);
+ ext4_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+ ac->ac_b_ex.fe_len);
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
ext4_free_blks_set(sb, gdp,
/*
* here we normalize request for locality group
- * Group request are normalized to s_strip size if we set the same via mount
- * option. If not we set it to s_mb_group_prealloc which can be configured via
+ * Group request are normalized to s_mb_group_prealloc, which goes to
+ * s_strip if we set the same via mount option.
+ * s_mb_group_prealloc can be configured via
* /sys/fs/ext4/<partition>/mb_group_prealloc
*
* XXX: should we try to preallocate more than the group has now?
struct ext4_locality_group *lg = ac->ac_lg;
BUG_ON(lg == NULL);
- if (EXT4_SB(sb)->s_stripe)
- ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
- else
- ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
+ ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
mb_debug(1, "#%u: goal %u blocks for locality group\n",
current->pid, ac->ac_g_ex.fe_len);
}
if (start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical) {
- printk(KERN_ERR "start %lu, size %lu, fe_logical %lu\n",
- (unsigned long) start, (unsigned long) size,
- (unsigned long) ac->ac_o_ex.fe_logical);
+ ext4_msg(ac->ac_sb, KERN_ERR,
+ "start %lu, size %lu, fe_logical %lu",
+ (unsigned long) start, (unsigned long) size,
+ (unsigned long) ac->ac_o_ex.fe_logical);
}
BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical);
while (n) {
entry = rb_entry(n, struct ext4_free_data, node);
- mb_set_bits(bitmap, entry->start_blk, entry->count);
+ ext4_set_bits(bitmap, entry->start_blk, entry->count);
n = rb_next(n);
}
return;
if (unlikely(len == 0))
continue;
BUG_ON(groupnr != group);
- mb_set_bits(bitmap, start, len);
+ ext4_set_bits(bitmap, start, len);
preallocated += len;
count++;
}
bit = next + 1;
}
if (free != pa->pa_free) {
- printk(KERN_CRIT "pa %p: logic %lu, phys. %lu, len %lu\n",
- pa, (unsigned long) pa->pa_lstart,
- (unsigned long) pa->pa_pstart,
- (unsigned long) pa->pa_len);
+ ext4_msg(e4b->bd_sb, KERN_CRIT,
+ "pa %p: logic %lu, phys. %lu, len %lu",
+ pa, (unsigned long) pa->pa_lstart,
+ (unsigned long) pa->pa_pstart,
+ (unsigned long) pa->pa_len);
ext4_grp_locked_error(sb, group, 0, 0, "free %u, pa_free %u",
free, pa->pa_free);
/*
* use preallocation while we're discarding it */
spin_unlock(&pa->pa_lock);
spin_unlock(&ei->i_prealloc_lock);
- printk(KERN_ERR "uh-oh! used pa while discarding\n");
+ ext4_msg(sb, KERN_ERR,
+ "uh-oh! used pa while discarding");
WARN_ON(1);
schedule_timeout_uninterruptible(HZ);
goto repeat;
(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED))
return;
- printk(KERN_ERR "EXT4-fs: Can't allocate:"
- " Allocation context details:\n");
- printk(KERN_ERR "EXT4-fs: status %d flags %d\n",
+ ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: Can't allocate:"
+ " Allocation context details:");
+ ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: status %d flags %d",
ac->ac_status, ac->ac_flags);
- printk(KERN_ERR "EXT4-fs: orig %lu/%lu/%lu@%lu, goal %lu/%lu/%lu@%lu, "
- "best %lu/%lu/%lu@%lu cr %d\n",
+ ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: orig %lu/%lu/%lu@%lu, "
+ "goal %lu/%lu/%lu@%lu, "
+ "best %lu/%lu/%lu@%lu cr %d",
(unsigned long)ac->ac_o_ex.fe_group,
(unsigned long)ac->ac_o_ex.fe_start,
(unsigned long)ac->ac_o_ex.fe_len,
(unsigned long)ac->ac_b_ex.fe_len,
(unsigned long)ac->ac_b_ex.fe_logical,
(int)ac->ac_criteria);
- printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned,
- ac->ac_found);
- printk(KERN_ERR "EXT4-fs: groups: \n");
+ ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: %lu scanned, %d found",
+ ac->ac_ex_scanned, ac->ac_found);
+ ext4_msg(ac->ac_sb, KERN_ERR, "EXT4-fs: groups: ");
ngroups = ext4_get_groups_count(sb);
for (i = 0; i < ngroups; i++) {
struct ext4_group_info *grp = ext4_get_group_info(sb, i);
}
ext4_mark_super_dirty(sb);
error_return:
- if (freed)
+ if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
}
/**
- * ext4_add_groupblocks() -- Add given blocks to an existing group
+ * ext4_group_add_blocks() -- Add given blocks to an existing group
* @handle: handle to this transaction
* @sb: super block
* @block: start physcial block to add to the block group
*
* This marks the blocks as free in the bitmap and buddy.
*/
-void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count)
{
struct buffer_head *bitmap_bh = NULL;
struct ext4_buddy e4b;
int err = 0, ret, blk_free_count;
ext4_grpblk_t blocks_freed;
- struct ext4_group_info *grp;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
+ if (count == 0)
+ return 0;
+
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
- grp = ext4_get_group_info(sb, block_group);
/*
* Check to see if we are freeing blocks across a group
* boundary.
*/
- if (bit + count > EXT4_BLOCKS_PER_GROUP(sb))
+ if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
+ ext4_warning(sb, "too much blocks added to group %u\n",
+ block_group);
+ err = -EINVAL;
goto error_return;
+ }
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
- if (!bitmap_bh)
+ if (!bitmap_bh) {
+ err = -EIO;
goto error_return;
+ }
+
desc = ext4_get_group_desc(sb, block_group, &gd_bh);
- if (!desc)
+ if (!desc) {
+ err = -EIO;
goto error_return;
+ }
if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
in_range(ext4_inode_bitmap(sb, desc), block, count) ||
ext4_error(sb, "Adding blocks in system zones - "
"Block = %llu, count = %lu",
block, count);
+ err = -EINVAL;
goto error_return;
}
error_return:
brelse(bitmap_bh);
ext4_std_error(sb, err);
- return;
+ return err;
}
/**
{
struct ext4_free_extent ex;
+ trace_ext4_trim_extent(sb, group, start, count);
+
assert_spin_locked(ext4_group_lock_ptr(sb, group));
ex.fe_start = start;
/**
* ext4_trim_all_free -- function to trim all free space in alloc. group
* @sb: super block for file system
- * @e4b: ext4 buddy
+ * @group: group to be trimmed
* @start: first group block to examine
* @max: last group block to examine
* @minblocks: minimum extent block count
ext4_grpblk_t minblocks)
{
void *bitmap;
- ext4_grpblk_t next, count = 0;
+ ext4_grpblk_t next, count = 0, free_count = 0;
struct ext4_buddy e4b;
int ret;
+ trace_ext4_trim_all_free(sb, group, start, max);
+
ret = ext4_mb_load_buddy(sb, group, &e4b);
if (ret) {
ext4_error(sb, "Error in loading buddy "
bitmap = e4b.bd_bitmap;
ext4_lock_group(sb, group);
+ if (EXT4_MB_GRP_WAS_TRIMMED(e4b.bd_info) &&
+ minblocks >= atomic_read(&EXT4_SB(sb)->s_last_trim_minblks))
+ goto out;
+
start = (e4b.bd_info->bb_first_free > start) ?
e4b.bd_info->bb_first_free : start;
next - start, group, &e4b);
count += next - start;
}
+ free_count += next - start;
start = next + 1;
if (fatal_signal_pending(current)) {
ext4_lock_group(sb, group);
}
- if ((e4b.bd_info->bb_free - count) < minblocks)
+ if ((e4b.bd_info->bb_free - free_count) < minblocks)
break;
}
+
+ if (!ret)
+ EXT4_MB_GRP_SET_TRIMMED(e4b.bd_info);
+out:
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
+ if (start + len <= first_data_blk)
+ goto out;
if (start < first_data_blk) {
len -= first_data_blk - start;
start = first_data_blk;
}
range->len = trimmed * sb->s_blocksize;
+ if (!ret)
+ atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
+
+out:
return ret;
}