ext4: trim allocation requests to group size
[pandora-kernel.git] / fs / ext4 / mballoc.c
index e5dc9a6..5efe721 100644 (file)
@@ -652,7 +652,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
        ext4_grpblk_t min;
        ext4_grpblk_t max;
        ext4_grpblk_t chunk;
        ext4_grpblk_t min;
        ext4_grpblk_t max;
        ext4_grpblk_t chunk;
-       unsigned short border;
+       unsigned int border;
 
        BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
 
 
        BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
 
@@ -1312,6 +1312,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
        void *buddy2;
        struct super_block *sb = e4b->bd_sb;
 
        void *buddy2;
        struct super_block *sb = e4b->bd_sb;
 
+       if (WARN_ON(count == 0))
+               return;
        BUG_ON(first + count > (sb->s_blocksize << 3));
        assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
        mb_check_buddy(e4b);
        BUG_ON(first + count > (sb->s_blocksize << 3));
        assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
        mb_check_buddy(e4b);
@@ -2132,7 +2134,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
        struct ext4_buddy e4b;
        struct sg {
                struct ext4_group_info info;
        struct ext4_buddy e4b;
        struct sg {
                struct ext4_group_info info;
-               ext4_grpblk_t counters[16];
+               ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
        } sg;
 
        group--;
        } sg;
 
        group--;
@@ -2822,7 +2824,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
                           "fs metadata\n", block, block+len);
                /* File system mounted not to panic on error
                ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
                           "fs metadata\n", block, block+len);
                /* File system mounted not to panic on error
-                * Fix the bitmap and repeat the block allocation
+                * Fix the bitmap and return EIO
                 * We leak some of the blocks here.
                 */
                ext4_lock_group(sb, ac->ac_b_ex.fe_group);
                 * We leak some of the blocks here.
                 */
                ext4_lock_group(sb, ac->ac_b_ex.fe_group);
@@ -2831,7 +2833,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
                err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
                if (!err)
                ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
                err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
                if (!err)
-                       err = -EAGAIN;
+                       err = -EIO;
                goto out_err;
        }
 
                goto out_err;
        }
 
@@ -3001,6 +3003,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        if (ar->pright && start + size - 1 >= ar->lright)
                size -= start + size - ar->lright;
 
        if (ar->pright && start + size - 1 >= ar->lright)
                size -= start + size - ar->lright;
 
+       /*
+        * Trim allocation request for filesystems with artificially small
+        * groups.
+        */
+       if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+               size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
+
        end = start + size;
 
        /* check we don't cross already preallocated blocks */
        end = start + size;
 
        /* check we don't cross already preallocated blocks */
@@ -3067,7 +3076,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        }
        BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
                        start > ac->ac_o_ex.fe_logical);
        }
        BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
                        start > ac->ac_o_ex.fe_logical);
-       BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
+       BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 
        /* now prepare goal request */
 
 
        /* now prepare goal request */
 
@@ -3128,13 +3137,31 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
 static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 {
        struct ext4_prealloc_space *pa = ac->ac_pa;
 static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
 {
        struct ext4_prealloc_space *pa = ac->ac_pa;
-       int len;
+       struct ext4_buddy e4b;
+       int err;
 
 
-       if (pa && pa->pa_type == MB_INODE_PA) {
-               len = ac->ac_b_ex.fe_len;
-               pa->pa_free += len;
+       if (pa == NULL) {
+               if (ac->ac_f_ex.fe_len == 0)
+                       return;
+               err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
+               if (err) {
+                       /*
+                        * This should never happen since we pin the
+                        * pages in the ext4_allocation_context so
+                        * ext4_mb_load_buddy() should never fail.
+                        */
+                       WARN(1, "mb_load_buddy failed (%d)", err);
+                       return;
+               }
+               ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+               mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
+                              ac->ac_f_ex.fe_len);
+               ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+               ext4_mb_unload_buddy(&e4b);
+               return;
        }
        }
-
+       if (pa->pa_type == MB_INODE_PA)
+               pa->pa_free += ac->ac_b_ex.fe_len;
 }
 
 /*
 }
 
 /*
@@ -4067,7 +4094,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
 
        /* set up allocation goals */
        memset(ac, 0, sizeof(struct ext4_allocation_context));
 
        /* set up allocation goals */
        memset(ac, 0, sizeof(struct ext4_allocation_context));
-       ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
+       ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
        ac->ac_status = AC_STATUS_CONTINUE;
        ac->ac_sb = sb;
        ac->ac_inode = ar->inode;
        ac->ac_status = AC_STATUS_CONTINUE;
        ac->ac_sb = sb;
        ac->ac_inode = ar->inode;
@@ -4381,18 +4408,7 @@ repeat:
        }
        if (likely(ac->ac_status == AC_STATUS_FOUND)) {
                *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
        }
        if (likely(ac->ac_status == AC_STATUS_FOUND)) {
                *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
-               if (*errp == -EAGAIN) {
-                       /*
-                        * drop the reference that we took
-                        * in ext4_mb_use_best_found
-                        */
-                       ext4_mb_release_context(ac);
-                       ac->ac_b_ex.fe_group = 0;
-                       ac->ac_b_ex.fe_start = 0;
-                       ac->ac_b_ex.fe_len = 0;
-                       ac->ac_status = AC_STATUS_CONTINUE;
-                       goto repeat;
-               } else if (*errp)
+               if (*errp)
                errout:
                        ext4_discard_allocated_blocks(ac);
                else {
                errout:
                        ext4_discard_allocated_blocks(ac);
                else {
@@ -4605,7 +4621,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
         * blocks at the beginning or the end unless we are explicitly
         * requested to avoid doing so.
         */
         * blocks at the beginning or the end unless we are explicitly
         * requested to avoid doing so.
         */
-       overflow = block & (sbi->s_cluster_ratio - 1);
+       overflow = EXT4_PBLK_COFF(sbi, block);
        if (overflow) {
                if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
                        overflow = sbi->s_cluster_ratio - overflow;
        if (overflow) {
                if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
                        overflow = sbi->s_cluster_ratio - overflow;
@@ -4619,7 +4635,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                        count += overflow;
                }
        }
                        count += overflow;
                }
        }
-       overflow = count & (sbi->s_cluster_ratio - 1);
+       overflow = EXT4_LBLK_COFF(sbi, count);
        if (overflow) {
                if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
                        if (count > overflow)
        if (overflow) {
                if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
                        if (count > overflow)
@@ -4700,18 +4716,12 @@ do_more:
                /*
                 * blocks being freed are metadata. these blocks shouldn't
                 * be used until this transaction is committed
                /*
                 * blocks being freed are metadata. these blocks shouldn't
                 * be used until this transaction is committed
+                *
+                * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
+                * to fail.
                 */
                 */
-       retry:
-               new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
-               if (!new_entry) {
-                       /*
-                        * We use a retry loop because
-                        * ext4_free_blocks() is not allowed to fail.
-                        */
-                       cond_resched();
-                       congestion_wait(BLK_RW_ASYNC, HZ/50);
-                       goto retry;
-               }
+               new_entry = kmem_cache_alloc(ext4_free_ext_cachep,
+                               GFP_NOFS|__GFP_NOFAIL);
                new_entry->start_cluster = bit;
                new_entry->group  = block_group;
                new_entry->count = count_clusters;
                new_entry->start_cluster = bit;
                new_entry->group  = block_group;
                new_entry->count = count_clusters;