ext4: fix trimming starting with block 0 with small blocksize
[pandora-kernel.git] / fs / ext4 / mballoc.c
index 19aa0d4..851f49b 100644 (file)
 static struct kmem_cache *ext4_pspace_cachep;
 static struct kmem_cache *ext4_ac_cachep;
 static struct kmem_cache *ext4_free_ext_cachep;
+
+/* We create slab caches for groupinfo data structures based on the
+ * superblock block size.  There will be one per mounted filesystem for
+ * each unique s_blocksize_bits */
+#define NR_GRPINFO_CACHES      \
+       (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
+static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
+
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
                                        ext4_group_t group);
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -938,6 +946,85 @@ out:
        return err;
 }
 
+/*
+ * lock the group_info alloc_sem of all the groups
+ * belonging to the same buddy cache page. This
+ * make sure other parallel operation on the buddy
+ * cache doesn't happen  whild holding the buddy cache
+ * lock
+ */
+static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
+                                       ext4_group_t group)
+{
+       int i;
+       int block, pnum;
+       int blocks_per_page;
+       int groups_per_page;
+       ext4_group_t ngroups = ext4_get_groups_count(sb);
+       ext4_group_t first_group;
+       struct ext4_group_info *grp;
+
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       first_group = pnum * blocks_per_page / 2;
+
+       groups_per_page = blocks_per_page >> 1;
+       if (groups_per_page == 0)
+               groups_per_page = 1;
+       /* read all groups the page covers into the cache */
+       for (i = 0; i < groups_per_page; i++) {
+
+               if ((first_group + i) >= ngroups)
+                       break;
+               grp = ext4_get_group_info(sb, first_group + i);
+               /* take all groups write allocation
+                * semaphore. This make sure there is
+                * no block allocation going on in any
+                * of that groups
+                */
+               down_write_nested(&grp->alloc_sem, i);
+       }
+       return i;
+}
+
+static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
+                                        ext4_group_t group, int locked_group)
+{
+       int i;
+       int block, pnum;
+       int blocks_per_page;
+       ext4_group_t first_group;
+       struct ext4_group_info *grp;
+
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       first_group = pnum * blocks_per_page / 2;
+       /* release locks on all the groups */
+       for (i = 0; i < locked_group; i++) {
+
+               grp = ext4_get_group_info(sb, first_group + i);
+               /* take all groups write allocation
+                * semaphore. This make sure there is
+                * no block allocation going on in any
+                * of that groups
+                */
+               up_write(&grp->alloc_sem);
+       }
+
+}
+
 /*
  * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
  * block group lock of all groups for this page; do not hold the BG lock when
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        return 0;
 }
 
-/*
- * lock the group_info alloc_sem of all the groups
- * belonging to the same buddy cache page. This
- * make sure other parallel operation on the buddy
- * cache doesn't happen  whild holding the buddy cache
- * lock
- */
-int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
-{
-       int i;
-       int block, pnum;
-       int blocks_per_page;
-       int groups_per_page;
-       ext4_group_t ngroups = ext4_get_groups_count(sb);
-       ext4_group_t first_group;
-       struct ext4_group_info *grp;
-
-       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-       /*
-        * the buddy cache inode stores the block bitmap
-        * and buddy information in consecutive blocks.
-        * So for each group we need two blocks.
-        */
-       block = group * 2;
-       pnum = block / blocks_per_page;
-       first_group = pnum * blocks_per_page / 2;
-
-       groups_per_page = blocks_per_page >> 1;
-       if (groups_per_page == 0)
-               groups_per_page = 1;
-       /* read all groups the page covers into the cache */
-       for (i = 0; i < groups_per_page; i++) {
-
-               if ((first_group + i) >= ngroups)
-                       break;
-               grp = ext4_get_group_info(sb, first_group + i);
-               /* take all groups write allocation
-                * semaphore. This make sure there is
-                * no block allocation going on in any
-                * of that groups
-                */
-               down_write_nested(&grp->alloc_sem, i);
-       }
-       return i;
-}
-
-void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
-                                       ext4_group_t group, int locked_group)
-{
-       int i;
-       int block, pnum;
-       int blocks_per_page;
-       ext4_group_t first_group;
-       struct ext4_group_info *grp;
-
-       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-       /*
-        * the buddy cache inode stores the block bitmap
-        * and buddy information in consecutive blocks.
-        * So for each group we need two blocks.
-        */
-       block = group * 2;
-       pnum = block / blocks_per_page;
-       first_group = pnum * blocks_per_page / 2;
-       /* release locks on all the groups */
-       for (i = 0; i < locked_group; i++) {
-
-               grp = ext4_get_group_info(sb, first_group + i);
-               /* take all groups write allocation
-                * semaphore. This make sure there is
-                * no block allocation going on in any
-                * of that groups
-                */
-               up_write(&grp->alloc_sem);
-       }
-
-}
-
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
        .release        = seq_release,
 };
 
+static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
+{
+       int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+       struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
+
+       BUG_ON(!cachep);
+       return cachep;
+}
 
 /* Create and initialize ext4_group_info data for the given group. */
 int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
                          struct ext4_group_desc *desc)
 {
-       int i, len;
+       int i;
        int metalen = 0;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_group_info **meta_group_info;
+       struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 
        /*
         * First check if this group is the first of a reserved block.
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
                        meta_group_info;
        }
 
-       /*
-        * calculate needed size. if change bb_counters size,
-        * don't forget about ext4_mb_generate_buddy()
-        */
-       len = offsetof(typeof(**meta_group_info),
-                      bb_counters[sb->s_blocksize_bits + 2]);
-
        meta_group_info =
                sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
        i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
 
-       meta_group_info[i] = kzalloc(len, GFP_KERNEL);
+       meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
        if (meta_group_info[i] == NULL) {
                printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
                goto exit_group_info;
        }
+       memset(meta_group_info[i], 0, kmem_cache_size(cachep));
        set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
                &(meta_group_info[i]->bb_state));
 
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
        int num_meta_group_infos_max;
        int array_size;
        struct ext4_group_desc *desc;
+       struct kmem_cache *cachep;
 
        /* This is the number of blocks used by GDT */
        num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@@ -2373,6 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
                printk(KERN_ERR "EXT4-fs: can't get new inode\n");
                goto err_freesgi;
        }
+       sbi->s_buddy_cache->i_ino = get_next_ino();
        EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
        for (i = 0; i < ngroups; i++) {
                desc = ext4_get_group_desc(sb, i, NULL);
@@ -2388,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
        return 0;
 
 err_freebuddy:
+       cachep = get_groupinfo_cache(sb->s_blocksize_bits);
        while (i-- > 0)
-               kfree(ext4_get_group_info(sb, i));
+               kmem_cache_free(cachep, ext4_get_group_info(sb, i));
        i = num_meta_group_infos;
        while (i-- > 0)
                kfree(sbi->s_group_info[i]);
@@ -2406,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        unsigned offset;
        unsigned max;
        int ret;
+       int cache_index;
+       struct kmem_cache *cachep;
+       char *namep = NULL;
 
        i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
 
        sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
        if (sbi->s_mb_offsets == NULL) {
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out;
        }
 
        i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
        sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
        if (sbi->s_mb_maxs == NULL) {
-               kfree(sbi->s_mb_offsets);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+       cachep = ext4_groupinfo_caches[cache_index];
+       if (!cachep) {
+               char name[32];
+               int len = offsetof(struct ext4_group_info,
+                                       bb_counters[sb->s_blocksize_bits + 2]);
+
+               sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
+               namep = kstrdup(name, GFP_KERNEL);
+               if (!namep) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+
+               /* Need to free the kmem_cache_name() when we
+                * destroy the slab */
+               cachep = kmem_cache_create(namep, len, 0,
+                                            SLAB_RECLAIM_ACCOUNT, NULL);
+               if (!cachep) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               ext4_groupinfo_caches[cache_index] = cachep;
        }
 
        /* order 0 is regular bitmap */
@@ -2439,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        /* init file for buddy data */
        ret = ext4_mb_init_backend(sb);
        if (ret != 0) {
-               kfree(sbi->s_mb_offsets);
-               kfree(sbi->s_mb_maxs);
-               return ret;
+               goto out;
        }
 
        spin_lock_init(&sbi->s_md_lock);
@@ -2456,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
        sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
        if (sbi->s_locality_groups == NULL) {
-               kfree(sbi->s_mb_offsets);
-               kfree(sbi->s_mb_maxs);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out;
        }
        for_each_possible_cpu(i) {
                struct ext4_locality_group *lg;
@@ -2475,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
        if (sbi->s_journal)
                sbi->s_journal->j_commit_callback = release_blocks_on_commit;
-       return 0;
+out:
+       if (ret) {
+               kfree(sbi->s_mb_offsets);
+               kfree(sbi->s_mb_maxs);
+               kfree(namep);
+       }
+       return ret;
 }
 
 /* need to called with the ext4 group lock held */
@@ -2503,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
        int num_meta_group_infos;
        struct ext4_group_info *grinfo;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 
        if (sbi->s_group_info) {
                for (i = 0; i < ngroups; i++) {
@@ -2513,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
                        ext4_lock_group(sb, i);
                        ext4_mb_cleanup_pa(grinfo);
                        ext4_unlock_group(sb, i);
-                       kfree(grinfo);
+                       kmem_cache_free(cachep, grinfo);
                }
                num_meta_group_infos = (ngroups +
                                EXT4_DESC_PER_BLOCK(sb) - 1) >>
@@ -2557,20 +2605,15 @@ int ext4_mb_release(struct super_block *sb)
        return 0;
 }
 
-static inline void ext4_issue_discard(struct super_block *sb,
+static inline int ext4_issue_discard(struct super_block *sb,
                ext4_group_t block_group, ext4_grpblk_t block, int count)
 {
-       int ret;
        ext4_fsblk_t discard_block;
 
        discard_block = block + ext4_group_first_block_no(sb, block_group);
        trace_ext4_discard_blocks(sb,
                        (unsigned long long) discard_block, count);
-       ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
-       if (ret == EOPNOTSUPP) {
-               ext4_warning(sb, "discard not supported, disabling");
-               clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
-       }
+       return sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
 }
 
 /*
@@ -2582,7 +2625,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
        struct super_block *sb = journal->j_private;
        struct ext4_buddy e4b;
        struct ext4_group_info *db;
-       int err, count = 0, count2 = 0;
+       int err, ret, count = 0, count2 = 0;
        struct ext4_free_data *entry;
        struct list_head *l, *ltmp;
 
@@ -2592,9 +2635,15 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
                         entry->count, entry->group, entry);
 
-               if (test_opt(sb, DISCARD))
-                       ext4_issue_discard(sb, entry->group,
+               if (test_opt(sb, DISCARD)) {
+                       ret = ext4_issue_discard(sb, entry->group,
                                        entry->start_blk, entry->count);
+                       if (unlikely(ret == -EOPNOTSUPP)) {
+                               ext4_warning(sb, "discard not supported, "
+                                                "disabling");
+                               clear_opt(sb, DISCARD);
+                       }
+               }
 
                err = ext4_mb_load_buddy(sb, entry->group, &e4b);
                /* we expect to find existing buddy because it's pinned */
@@ -2658,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
 
 #endif
 
-int __init init_ext4_mballoc(void)
+int __init ext4_init_mballoc(void)
 {
-       ext4_pspace_cachep =
-               kmem_cache_create("ext4_prealloc_space",
-                                    sizeof(struct ext4_prealloc_space),
-                                    0, SLAB_RECLAIM_ACCOUNT, NULL);
+       ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
+                                       SLAB_RECLAIM_ACCOUNT);
        if (ext4_pspace_cachep == NULL)
                return -ENOMEM;
 
-       ext4_ac_cachep =
-               kmem_cache_create("ext4_alloc_context",
-                                    sizeof(struct ext4_allocation_context),
-                                    0, SLAB_RECLAIM_ACCOUNT, NULL);
+       ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
+                                   SLAB_RECLAIM_ACCOUNT);
        if (ext4_ac_cachep == NULL) {
                kmem_cache_destroy(ext4_pspace_cachep);
                return -ENOMEM;
        }
 
-       ext4_free_ext_cachep =
-               kmem_cache_create("ext4_free_block_extents",
-                                    sizeof(struct ext4_free_data),
-                                    0, SLAB_RECLAIM_ACCOUNT, NULL);
+       ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
+                                         SLAB_RECLAIM_ACCOUNT);
        if (ext4_free_ext_cachep == NULL) {
                kmem_cache_destroy(ext4_pspace_cachep);
                kmem_cache_destroy(ext4_ac_cachep);
@@ -2689,8 +2732,9 @@ int __init init_ext4_mballoc(void)
        return 0;
 }
 
-void exit_ext4_mballoc(void)
+void ext4_exit_mballoc(void)
 {
+       int i;
        /*
         * Wait for completion of call_rcu()'s on ext4_pspace_cachep
         * before destroying the slab cache.
@@ -2699,6 +2743,15 @@ void exit_ext4_mballoc(void)
        kmem_cache_destroy(ext4_pspace_cachep);
        kmem_cache_destroy(ext4_ac_cachep);
        kmem_cache_destroy(ext4_free_ext_cachep);
+
+       for (i = 0; i < NR_GRPINFO_CACHES; i++) {
+               struct kmem_cache *cachep = ext4_groupinfo_caches[i];
+               if (cachep) {
+                       char *name = (char *)kmem_cache_name(cachep);
+                       kmem_cache_destroy(cachep);
+                       kfree(name);
+               }
+       }
        ext4_remove_debugfs_entry();
 }
 
@@ -3535,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  */
 static noinline_for_stack int
 ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
-                       struct ext4_prealloc_space *pa,
-                       struct ext4_allocation_context *ac)
+                       struct ext4_prealloc_space *pa)
 {
        struct super_block *sb = e4b->bd_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3554,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
        end = bit + pa->pa_len;
 
-       if (ac) {
-               ac->ac_sb = sb;
-               ac->ac_inode = pa->pa_inode;
-       }
-
        while (bit < end) {
                bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
                if (bit >= end)
@@ -3569,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
                         (unsigned) next - bit, (unsigned) group);
                free += next - bit;
 
-               if (ac) {
-                       ac->ac_b_ex.fe_group = group;
-                       ac->ac_b_ex.fe_start = bit;
-                       ac->ac_b_ex.fe_len = next - bit;
-                       ac->ac_b_ex.fe_logical = 0;
-                       trace_ext4_mballoc_discard(ac);
-               }
-
-               trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
-                                              next - bit);
+               trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
+               trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
+                                              grp_blk_start + bit, next - bit);
                mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
                bit = next + 1;
        }
@@ -3601,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 
 static noinline_for_stack int
 ext4_mb_release_group_pa(struct ext4_buddy *e4b,
-                               struct ext4_prealloc_space *pa,
-                               struct ext4_allocation_context *ac)
+                               struct ext4_prealloc_space *pa)
 {
        struct super_block *sb = e4b->bd_sb;
        ext4_group_t group;
        ext4_grpblk_t bit;
 
-       trace_ext4_mb_release_group_pa(sb, ac, pa);
+       trace_ext4_mb_release_group_pa(sb, pa);
        BUG_ON(pa->pa_deleted == 0);
        ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
        mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
        atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
-       if (ac) {
-               ac->ac_sb = sb;
-               ac->ac_inode = NULL;
-               ac->ac_b_ex.fe_group = group;
-               ac->ac_b_ex.fe_start = bit;
-               ac->ac_b_ex.fe_len = pa->pa_len;
-               ac->ac_b_ex.fe_logical = 0;
-               trace_ext4_mballoc_discard(ac);
-       }
+       trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
 
        return 0;
 }
@@ -3644,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
-       struct ext4_allocation_context *ac;
        struct list_head list;
        struct ext4_buddy e4b;
        int err;
@@ -3673,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
                needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
 
        INIT_LIST_HEAD(&list);
-       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-       if (ac)
-               ac->ac_sb = sb;
 repeat:
        ext4_lock_group(sb, group);
        list_for_each_entry_safe(pa, tmp,
@@ -3730,9 +3756,9 @@ repeat:
                spin_unlock(pa->pa_obj_lock);
 
                if (pa->pa_type == MB_GROUP_PA)
-                       ext4_mb_release_group_pa(&e4b, pa, ac);
+                       ext4_mb_release_group_pa(&e4b, pa);
                else
-                       ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+                       ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3740,8 +3766,6 @@ repeat:
 
 out:
        ext4_unlock_group(sb, group);
-       if (ac)
-               kmem_cache_free(ext4_ac_cachep, ac);
        ext4_mb_unload_buddy(&e4b);
        put_bh(bitmap_bh);
        return free;
@@ -3762,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
-       struct ext4_allocation_context *ac;
        ext4_group_t group = 0;
        struct list_head list;
        struct ext4_buddy e4b;
@@ -3778,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
 
        INIT_LIST_HEAD(&list);
 
-       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-       if (ac) {
-               ac->ac_sb = sb;
-               ac->ac_inode = inode;
-       }
 repeat:
        /* first, collect all pa's in the inode */
        spin_lock(&ei->i_prealloc_lock);
@@ -3852,7 +3870,7 @@ repeat:
 
                ext4_lock_group(sb, group);
                list_del(&pa->pa_group_list);
-               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
                ext4_unlock_group(sb, group);
 
                ext4_mb_unload_buddy(&e4b);
@@ -3861,23 +3879,8 @@ repeat:
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
        }
-       if (ac)
-               kmem_cache_free(ext4_ac_cachep, ac);
 }
 
-/*
- * finds all preallocated spaces and return blocks being freed to them
- * if preallocated space becomes full (no block is used from the space)
- * then the function frees space in buddy
- * XXX: at the moment, truncate (which is the only way to free blocks)
- * discards all preallocations
- */
-static void ext4_mb_return_to_preallocation(struct inode *inode,
-                                       struct ext4_buddy *e4b,
-                                       sector_t block, int count)
-{
-       BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
-}
 #ifdef CONFIG_EXT4_DEBUG
 static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
 {
@@ -4060,14 +4063,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
        struct ext4_buddy e4b;
        struct list_head discard_list;
        struct ext4_prealloc_space *pa, *tmp;
-       struct ext4_allocation_context *ac;
 
        mb_debug(1, "discard locality group preallocation\n");
 
        INIT_LIST_HEAD(&discard_list);
-       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-       if (ac)
-               ac->ac_sb = sb;
 
        spin_lock(&lg->lg_prealloc_lock);
        list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4119,15 +4118,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
                }
                ext4_lock_group(sb, group);
                list_del(&pa->pa_group_list);
-               ext4_mb_release_group_pa(&e4b, pa, ac);
+               ext4_mb_release_group_pa(&e4b, pa);
                ext4_unlock_group(sb, group);
 
                ext4_mb_unload_buddy(&e4b);
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
        }
-       if (ac)
-               kmem_cache_free(ext4_ac_cachep, ac);
 }
 
 /*
@@ -4273,7 +4270,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
         * EDQUOT check, as blocks and quotas have been already
         * reserved when data being copied into pagecache.
         */
-       if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+       if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
                ar->flags |= EXT4_MB_DELALLOC_RESERVED;
        else {
                /* Without delayed allocation we need to verify
@@ -4370,7 +4367,8 @@ out:
        if (inquota && ar->len < inquota)
                dquot_free_block(ar->inode, inquota - ar->len);
        if (!ar->len) {
-               if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+               if (!ext4_test_inode_state(ar->inode,
+                                          EXT4_STATE_DELALLOC_RESERVED))
                        /* release all the reserved blocks if non delalloc */
                        percpu_counter_sub(&sbi->s_dirtyblocks_counter,
                                                reserv_blks);
@@ -4491,7 +4489,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 {
        struct buffer_head *bitmap_bh = NULL;
        struct super_block *sb = inode->i_sb;
-       struct ext4_allocation_context *ac = NULL;
        struct ext4_group_desc *gdp;
        unsigned long freed = 0;
        unsigned int overflow;
@@ -4531,6 +4528,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                        if (!bh)
                                tbh = sb_find_get_block(inode->i_sb,
                                                        block + i);
+                       if (unlikely(!tbh))
+                               continue;
                        ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
                                    inode, tbh, block + i);
                }
@@ -4546,12 +4545,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
        if (!ext4_should_writeback_data(inode))
                flags |= EXT4_FREE_BLOCKS_METADATA;
 
-       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-       if (ac) {
-               ac->ac_inode = inode;
-               ac->ac_sb = sb;
-       }
-
 do_more:
        overflow = 0;
        ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4609,12 +4602,7 @@ do_more:
                        BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
        }
 #endif
-       if (ac) {
-               ac->ac_b_ex.fe_group = block_group;
-               ac->ac_b_ex.fe_start = bit;
-               ac->ac_b_ex.fe_len = count;
-               trace_ext4_mballoc_free(ac);
-       }
+       trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
 
        err = ext4_mb_load_buddy(sb, block_group, &e4b);
        if (err)
@@ -4626,7 +4614,11 @@ do_more:
                 * blocks being freed are metadata. these blocks shouldn't
                 * be used until this transaction is committed
                 */
-               new_entry  = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+               new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
+               if (!new_entry) {
+                       err = -ENOMEM;
+                       goto error_return;
+               }
                new_entry->start_blk = bit;
                new_entry->group  = block_group;
                new_entry->count = count;
@@ -4643,9 +4635,6 @@ do_more:
                ext4_lock_group(sb, block_group);
                mb_clear_bits(bitmap_bh->b_data, bit, count);
                mb_free_blocks(inode, &e4b, bit, count);
-               ext4_mb_return_to_preallocation(inode, &e4b, block, count);
-               if (test_opt(sb, DISCARD))
-                       ext4_issue_discard(sb, block_group, bit, count);
        }
 
        ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4685,7 +4674,194 @@ error_return:
                dquot_free_block(inode, freed);
        brelse(bitmap_bh);
        ext4_std_error(sb, err);
-       if (ac)
-               kmem_cache_free(ext4_ac_cachep, ac);
        return;
 }
+
+/**
+ * ext4_trim_extent -- function to TRIM one single free extent in the group
+ * @sb:                super block for the file system
+ * @start:     starting block of the free extent in the alloc. group
+ * @count:     number of blocks to TRIM
+ * @group:     alloc. group we are working with
+ * @e4b:       ext4 buddy for the group
+ *
+ * Trim "count" blocks starting at "start" in the "group". To assure that no
+ * one will allocate those blocks, mark it as used in buddy bitmap. This must
+ * be called with under the group lock.
+ */
+static int ext4_trim_extent(struct super_block *sb, int start, int count,
+               ext4_group_t group, struct ext4_buddy *e4b)
+{
+       struct ext4_free_extent ex;
+       int ret = 0;
+
+       assert_spin_locked(ext4_group_lock_ptr(sb, group));
+
+       ex.fe_start = start;
+       ex.fe_group = group;
+       ex.fe_len = count;
+
+       /*
+        * Mark blocks used, so no one can reuse them while
+        * being trimmed.
+        */
+       mb_mark_used(e4b, &ex);
+       ext4_unlock_group(sb, group);
+
+       ret = ext4_issue_discard(sb, group, start, count);
+
+       ext4_lock_group(sb, group);
+       mb_free_blocks(NULL, e4b, start, ex.fe_len);
+       return ret;
+}
+
+/**
+ * ext4_trim_all_free -- function to trim all free space in alloc. group
+ * @sb:                        super block for file system
+ * @e4b:               ext4 buddy
+ * @start:             first group block to examine
+ * @max:               last group block to examine
+ * @minblocks:         minimum extent block count
+ *
+ * ext4_trim_all_free walks through group's buddy bitmap searching for free
+ * extents. When the free block is found, ext4_trim_extent is called to TRIM
+ * the extent.
+ *
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+ * bitmap. Then issue a TRIM command on this extent and free the extent in
+ * the group buddy bitmap. This is done until whole group is scanned.
+ */
+ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+               ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
+{
+       void *bitmap;
+       ext4_grpblk_t next, count = 0;
+       ext4_group_t group;
+       int ret = 0;
+
+       BUG_ON(e4b == NULL);
+
+       bitmap = e4b->bd_bitmap;
+       group = e4b->bd_group;
+       start = (e4b->bd_info->bb_first_free > start) ?
+               e4b->bd_info->bb_first_free : start;
+       ext4_lock_group(sb, group);
+
+       while (start < max) {
+               start = mb_find_next_zero_bit(bitmap, max, start);
+               if (start >= max)
+                       break;
+               next = mb_find_next_bit(bitmap, max, start);
+
+               if ((next - start) >= minblocks) {
+                       ret = ext4_trim_extent(sb, start,
+                               next - start, group, e4b);
+                       if (ret < 0)
+                               break;
+                       count += next - start;
+               }
+               start = next + 1;
+
+               if (fatal_signal_pending(current)) {
+                       count = -ERESTARTSYS;
+                       break;
+               }
+
+               if (need_resched()) {
+                       ext4_unlock_group(sb, group);
+                       cond_resched();
+                       ext4_lock_group(sb, group);
+               }
+
+               if ((e4b->bd_info->bb_free - count) < minblocks)
+                       break;
+       }
+       ext4_unlock_group(sb, group);
+
+       ext4_debug("trimmed %d blocks in the group %d\n",
+               count, group);
+
+       if (ret < 0)
+               count = ret;
+
+       return count;
+}
+
+/**
+ * ext4_trim_fs() -- trim ioctl handle function
+ * @sb:                        superblock for filesystem
+ * @range:             fstrim_range structure
+ *
+ * start:      First Byte to trim
+ * len:                number of Bytes to trim from start
+ * minlen:     minimum extent length in Bytes
+ * ext4_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext4_trim_all_free function
+ * is invoked to trim all free space.
+ */
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+       struct ext4_buddy e4b;
+       ext4_group_t first_group, last_group;
+       ext4_group_t group, ngroups = ext4_get_groups_count(sb);
+       ext4_grpblk_t cnt = 0, first_block, last_block;
+       uint64_t start, len, minlen, trimmed;
+       ext4_fsblk_t first_data_blk =
+                       le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+       int ret = 0;
+
+       start = range->start >> sb->s_blocksize_bits;
+       len = range->len >> sb->s_blocksize_bits;
+       minlen = range->minlen >> sb->s_blocksize_bits;
+       trimmed = 0;
+
+       if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
+               return -EINVAL;
+       if (start < first_data_blk) {
+               len -= first_data_blk - start;
+               start = first_data_blk;
+       }
+
+       /* Determine first and last group to examine based on start and len */
+       ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
+                                    &first_group, &first_block);
+       ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
+                                    &last_group, &last_block);
+       last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+       last_block = EXT4_BLOCKS_PER_GROUP(sb);
+
+       if (first_group > last_group)
+               return -EINVAL;
+
+       for (group = first_group; group <= last_group; group++) {
+               ret = ext4_mb_load_buddy(sb, group, &e4b);
+               if (ret) {
+                       ext4_error(sb, "Error in loading buddy "
+                                       "information for %u", group);
+                       break;
+               }
+
+               if (len >= EXT4_BLOCKS_PER_GROUP(sb))
+                       len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
+               else
+                       last_block = first_block + len;
+
+               if (e4b.bd_info->bb_free >= minlen) {
+                       cnt = ext4_trim_all_free(sb, &e4b, first_block,
+                                               last_block, minlen);
+                       if (cnt < 0) {
+                               ret = cnt;
+                               ext4_mb_unload_buddy(&e4b);
+                               break;
+                       }
+               }
+               ext4_mb_unload_buddy(&e4b);
+               trimmed += cnt;
+               first_block = 0;
+       }
+       range->len = trimmed * sb->s_blocksize;
+
+       return ret;
+}