X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=fs%2Fext4%2Fmballoc.c;h=5efe721c15324087bbaa5e11278495b6cd6a22a1;hp=e2d8be8f28bfb8555644bef5100b2f9b8c2cbe9a;hb=f213db429b883a2d5403de0b1ce92fb7d7ee979e;hpb=b4949b84567f3ae1227d076fc95bbd8efea06506 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e2d8be8f28bf..5efe721c1532 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -652,7 +652,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, ext4_grpblk_t min; ext4_grpblk_t max; ext4_grpblk_t chunk; - unsigned short border; + unsigned int border; BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); @@ -1312,6 +1312,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, void *buddy2; struct super_block *sb = e4b->bd_sb; + if (WARN_ON(count == 0)) + return; BUG_ON(first + count > (sb->s_blocksize << 3)); assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); mb_check_buddy(e4b); @@ -2027,7 +2029,11 @@ repeat: group = ac->ac_g_ex.fe_group; for (i = 0; i < ngroups; group++, i++) { - if (group == ngroups) + /* + * Artificially restricted ngroups for non-extent + * files makes group > ngroups possible on first loop. + */ + if (group >= ngroups) group = 0; /* This now checks without needing the buddy page */ @@ -2128,7 +2134,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) struct ext4_buddy e4b; struct sg { struct ext4_group_info info; - ext4_grpblk_t counters[16]; + ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2]; } sg; group--; @@ -2567,6 +2573,9 @@ int ext4_mb_release(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); + if (sbi->s_proc) + remove_proc_entry("mb_groups", sbi->s_proc); + if (sbi->s_group_info) { for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); @@ -2614,8 +2623,6 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); - if (sbi->s_proc) - remove_proc_entry("mb_groups", sbi->s_proc); return 0; } @@ -2817,7 +2824,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_error(sb, "Allocating blocks %llu-%llu which overlap " "fs metadata\n", block, block+len); /* File system mounted not to panic on error - * Fix the bitmap and repeat the block allocation + * Fix the bitmap and return EIO * We leak some of the blocks here. */ ext4_lock_group(sb, ac->ac_b_ex.fe_group); @@ -2826,7 +2833,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) - err = -EAGAIN; + err = -EIO; goto out_err; } @@ -2865,8 +2872,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); - atomic_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_sub(ac->ac_b_ex.fe_len, + &sbi->s_flex_groups[flex_group].free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -2996,6 +3003,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (ar->pright && start + size - 1 >= ar->lright) size -= start + size - ar->lright; + /* + * Trim allocation request for filesystems with artificially small + * groups. + */ + if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)) + size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb); + end = start + size; /* check we don't cross already preallocated blocks */ @@ -3062,7 +3076,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, } BUG_ON(start + size <= ac->ac_o_ex.fe_logical && start > ac->ac_o_ex.fe_logical); - BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); + BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); /* now prepare goal request */ @@ -3123,13 +3137,31 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac) { struct ext4_prealloc_space *pa = ac->ac_pa; - int len; + struct ext4_buddy e4b; + int err; - if (pa && pa->pa_type == MB_INODE_PA) { - len = ac->ac_b_ex.fe_len; - pa->pa_free += len; + if (pa == NULL) { + if (ac->ac_f_ex.fe_len == 0) + return; + err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); + if (err) { + /* + * This should never happen since we pin the + * pages in the ext4_allocation_context so + * ext4_mb_load_buddy() should never fail. + */ + WARN(1, "mb_load_buddy failed (%d)", err); + return; + } + ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group); + mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, + ac->ac_f_ex.fe_len); + ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); + ext4_mb_unload_buddy(&e4b); + return; } - + if (pa->pa_type == MB_INODE_PA) + pa->pa_free += ac->ac_b_ex.fe_len; } /* @@ -3373,6 +3405,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head) { struct ext4_prealloc_space *pa; pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); + + BUG_ON(atomic_read(&pa->pa_count)); + BUG_ON(pa->pa_deleted == 0); kmem_cache_free(ext4_pspace_cachep, pa); } @@ -3386,11 +3421,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, ext4_group_t grp; ext4_fsblk_t grp_blk; - if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) - return; - /* in this short window concurrent discard can set pa_deleted */ spin_lock(&pa->pa_lock); + if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { + spin_unlock(&pa->pa_lock); + return; + } + if (pa->pa_deleted == 1) { spin_unlock(&pa->pa_lock); return; @@ -3484,7 +3521,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac) win = offs; ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical - - EXT4_B2C(sbi, win); + EXT4_NUM_B2C(sbi, win); BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical); BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len); } @@ -4057,7 +4094,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, /* set up allocation goals */ memset(ac, 0, sizeof(struct ext4_allocation_context)); - ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); + ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; ac->ac_inode = ar->inode; @@ -4177,7 +4214,7 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) /* The max size of hash table is PREALLOC_TB_SIZE */ order = PREALLOC_TB_SIZE - 1; /* Add the prealloc space to lg */ - rcu_read_lock(); + spin_lock(&lg->lg_prealloc_lock); list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], pa_inode_list) { spin_lock(&tmp_pa->pa_lock); @@ -4201,12 +4238,12 @@ static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) if (!added) list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list[order]); - rcu_read_unlock(); + spin_unlock(&lg->lg_prealloc_lock); /* Now trim the list to be not more than 8 elements */ if (lg_prealloc_count > 8) { ext4_mb_discard_lg_preallocations(sb, lg, - order, lg_prealloc_count); + order, lg_prealloc_count); return; } return ; @@ -4371,18 +4408,7 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); - if (*errp == -EAGAIN) { - /* - * drop the reference that we took - * in ext4_mb_use_best_found - */ - ext4_mb_release_context(ac); - ac->ac_b_ex.fe_group = 0; - ac->ac_b_ex.fe_start = 0; - ac->ac_b_ex.fe_len = 0; - ac->ac_status = AC_STATUS_CONTINUE; - goto repeat; - } else if (*errp) + if (*errp) errout: ext4_discard_allocated_blocks(ac); else { @@ -4595,7 +4621,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * blocks at the beginning or the end unless we are explicitly * requested to avoid doing so. */ - overflow = block & (sbi->s_cluster_ratio - 1); + overflow = EXT4_PBLK_COFF(sbi, block); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { overflow = sbi->s_cluster_ratio - overflow; @@ -4609,7 +4635,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, count += overflow; } } - overflow = count & (sbi->s_cluster_ratio - 1); + overflow = EXT4_LBLK_COFF(sbi, count); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { if (count > overflow) @@ -4633,7 +4659,7 @@ do_more: EXT4_BLOCKS_PER_GROUP(sb); count -= overflow; } - count_clusters = EXT4_B2C(sbi, count); + count_clusters = EXT4_NUM_B2C(sbi, count); bitmap_bh = ext4_read_block_bitmap(sb, block_group); if (!bitmap_bh) { err = -EIO; @@ -4690,12 +4716,12 @@ do_more: /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); - if (!new_entry) { - err = -ENOMEM; - goto error_return; - } + new_entry = kmem_cache_alloc(ext4_free_ext_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->start_cluster = bit; new_entry->group = block_group; new_entry->count = count_clusters; @@ -4722,8 +4748,8 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(count_clusters, + &sbi->s_flex_groups[flex_group].free_clusters); } ext4_mb_unload_buddy(&e4b); @@ -4863,12 +4889,12 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeclusters_counter, - EXT4_B2C(sbi, blocks_freed)); + EXT4_NUM_B2C(sbi, blocks_freed)); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); - atomic_add(EXT4_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), + &sbi->s_flex_groups[flex_group].free_clusters); } ext4_mb_unload_buddy(&e4b);