ext4: fix error processing in mb_free_blocks
[pandora-kernel.git] / fs / ext4 / mballoc.c
index 9d57695..b882868 100644 (file)
  * mballoc.c contains the multiblocks allocation routines
  */
 
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/namei.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/module.h>
-#include <linux/swap.h>
-#include <linux/proc_fs.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-#include <linux/version.h>
-#include "group.h"
-
+#include "mballoc.h"
 /*
  * MUSTDO:
  *   - test ext4_ext_search_left() and ext4_ext_search_right()
  *
  */
 
-/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-
-/*
- */
-#define MB_DEBUG__
-#ifdef MB_DEBUG
-#define mb_debug(fmt, a...)    printk(fmt, ##a)
-#else
-#define mb_debug(fmt, a...)
-#endif
-
-/*
- * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
- * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
- */
-#define EXT4_MB_HISTORY
-#define EXT4_MB_HISTORY_ALLOC          1       /* allocation */
-#define EXT4_MB_HISTORY_PREALLOC       2       /* preallocated blocks used */
-#define EXT4_MB_HISTORY_DISCARD                4       /* preallocation discarded */
-#define EXT4_MB_HISTORY_FREE           8       /* free */
-
-#define EXT4_MB_HISTORY_DEFAULT                (EXT4_MB_HISTORY_ALLOC | \
-                                        EXT4_MB_HISTORY_PREALLOC)
-
-/*
- * How long mballoc can look for a best extent (in found extents)
- */
-#define MB_DEFAULT_MAX_TO_SCAN         200
-
-/*
- * How long mballoc must look for a best extent
- */
-#define MB_DEFAULT_MIN_TO_SCAN         10
-
-/*
- * How many groups mballoc will scan looking for the best chunk
- */
-#define MB_DEFAULT_MAX_GROUPS_TO_SCAN  5
-
-/*
- * with 'ext4_mb_stats' allocator will collect stats that will be
- * shown at umount. The collecting costs though!
- */
-#define MB_DEFAULT_STATS               1
-
-/*
- * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
- * by the stream allocator, which purpose is to pack requests
- * as close each to other as possible to produce smooth I/O traffic
- * We use locality group prealloc space for stream request.
- * We can tune the same via /proc/fs/ext4/<parition>/stream_req
- */
-#define MB_DEFAULT_STREAM_THRESHOLD    16      /* 64K */
-
-/*
- * for which requests use 2^N search using buddies
- */
-#define MB_DEFAULT_ORDER2_REQS         2
-
-/*
- * default group prealloc size 512 blocks
- */
-#define MB_DEFAULT_GROUP_PREALLOC      512
-
-static struct kmem_cache *ext4_pspace_cachep;
-static struct kmem_cache *ext4_ac_cachep;
-
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS     30
-
-struct ext4_free_metadata {
-       ext4_group_t group;
-       unsigned short num;
-       ext4_grpblk_t  blocks[EXT4_BB_MAX_BLOCKS];
-       struct list_head list;
-};
-
-struct ext4_group_info {
-       unsigned long   bb_state;
-       unsigned long   bb_tid;
-       struct ext4_free_metadata *bb_md_cur;
-       unsigned short  bb_first_free;
-       unsigned short  bb_free;
-       unsigned short  bb_fragments;
-       struct          list_head bb_prealloc_list;
-#ifdef DOUBLE_CHECK
-       void            *bb_bitmap;
-#endif
-       unsigned short  bb_counters[];
-};
-
-#define EXT4_GROUP_INFO_NEED_INIT_BIT  0
-#define EXT4_GROUP_INFO_LOCKED_BIT     1
-
-#define EXT4_MB_GRP_NEED_INIT(grp)     \
-       (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-
-
-struct ext4_prealloc_space {
-       struct list_head        pa_inode_list;
-       struct list_head        pa_group_list;
-       union {
-               struct list_head pa_tmp_list;
-               struct rcu_head pa_rcu;
-       } u;
-       spinlock_t              pa_lock;
-       atomic_t                pa_count;
-       unsigned                pa_deleted;
-       ext4_fsblk_t            pa_pstart;      /* phys. block */
-       ext4_lblk_t             pa_lstart;      /* log. block */
-       unsigned short          pa_len;         /* len of preallocated chunk */
-       unsigned short          pa_free;        /* how many blocks are free */
-       unsigned short          pa_linear;      /* consumed in one direction
-                                                * strictly, for grp prealloc */
-       spinlock_t              *pa_obj_lock;
-       struct inode            *pa_inode;      /* hack, for history only */
-};
-
-
-struct ext4_free_extent {
-       ext4_lblk_t fe_logical;
-       ext4_grpblk_t fe_start;
-       ext4_group_t fe_group;
-       int fe_len;
-};
-
-/*
- * Locality group:
- *   we try to group all related changes together
- *   so that writeback can flush/allocate them together as well
- */
-struct ext4_locality_group {
-       /* for allocator */
-       struct mutex            lg_mutex;       /* to serialize allocates */
-       struct list_head        lg_prealloc_list;/* list of preallocations */
-       spinlock_t              lg_prealloc_lock;
-};
-
-struct ext4_allocation_context {
-       struct inode *ac_inode;
-       struct super_block *ac_sb;
-
-       /* original request */
-       struct ext4_free_extent ac_o_ex;
-
-       /* goal request (after normalization) */
-       struct ext4_free_extent ac_g_ex;
-
-       /* the best found extent */
-       struct ext4_free_extent ac_b_ex;
-
-       /* copy of the bext found extent taken before preallocation efforts */
-       struct ext4_free_extent ac_f_ex;
-
-       /* number of iterations done. we have to track to limit searching */
-       unsigned long ac_ex_scanned;
-       __u16 ac_groups_scanned;
-       __u16 ac_found;
-       __u16 ac_tail;
-       __u16 ac_buddy;
-       __u16 ac_flags;         /* allocation hints */
-       __u8 ac_status;
-       __u8 ac_criteria;
-       __u8 ac_repeats;
-       __u8 ac_2order;         /* if request is to allocate 2^N blocks and
-                                * N > 0, the field stores N, otherwise 0 */
-       __u8 ac_op;             /* operation, for history only */
-       struct page *ac_bitmap_page;
-       struct page *ac_buddy_page;
-       struct ext4_prealloc_space *ac_pa;
-       struct ext4_locality_group *ac_lg;
-};
-
-#define AC_STATUS_CONTINUE     1
-#define AC_STATUS_FOUND                2
-#define AC_STATUS_BREAK                3
-
-struct ext4_mb_history {
-       struct ext4_free_extent orig;   /* orig allocation */
-       struct ext4_free_extent goal;   /* goal allocation */
-       struct ext4_free_extent result; /* result allocation */
-       unsigned pid;
-       unsigned ino;
-       __u16 found;    /* how many extents have been found */
-       __u16 groups;   /* how many groups have been scanned */
-       __u16 tail;     /* what tail broke some buddy */
-       __u16 buddy;    /* buddy the tail ^^^ broke */
-       __u16 flags;
-       __u8 cr:3;      /* which phase the result extent was found at */
-       __u8 op:4;
-       __u8 merged:1;
-};
-
-struct ext4_buddy {
-       struct page *bd_buddy_page;
-       void *bd_buddy;
-       struct page *bd_bitmap_page;
-       void *bd_bitmap;
-       struct ext4_group_info *bd_info;
-       struct super_block *bd_sb;
-       __u16 bd_blkbits;
-       ext4_group_t bd_group;
-};
-#define EXT4_MB_BITMAP(e4b)    ((e4b)->bd_bitmap)
-#define EXT4_MB_BUDDY(e4b)     ((e4b)->bd_buddy)
-
-#ifndef EXT4_MB_HISTORY
-static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-       return;
-}
-#else
-static void ext4_mb_store_history(struct ext4_allocation_context *ac);
-#endif
-
-#define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
-
-static struct proc_dir_entry *proc_root_ext4;
-struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
-ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
-                       ext4_fsblk_t goal, unsigned long *count, int *errp);
-
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
-                                       ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
-static void ext4_mb_return_to_preallocation(struct inode *inode,
-                                       struct ext4_buddy *e4b, sector_t block,
-                                       int count);
-static void ext4_mb_put_pa(struct ext4_allocation_context *,
-                       struct super_block *, struct ext4_prealloc_space *pa);
-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
-
-
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
-{
-       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-       bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline void ext4_unlock_group(struct super_block *sb,
-                                       ext4_group_t group)
-{
-       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-       bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline int ext4_is_group_locked(struct super_block *sb,
-                                       ext4_group_t group)
-{
-       struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
-       return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
-                                               &(grinfo->bb_state));
-}
-
-static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
-                                       struct ext4_free_extent *fex)
-{
-       ext4_fsblk_t block;
-
-       block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
-                       + fex->fe_start
-                       + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-       return block;
-}
-
 static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
 {
 #if BITS_PER_LONG == 64
@@ -677,22 +381,28 @@ static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr)
 
 static inline int mb_find_next_zero_bit(void *addr, int max, int start)
 {
-       int fix = 0;
+       int fix = 0, ret, tmpmax;
        addr = mb_correct_addr_and_bit(&fix, addr);
-       max += fix;
+       tmpmax = max + fix;
        start += fix;
 
-       return ext4_find_next_zero_bit(addr, max, start) - fix;
+       ret = ext4_find_next_zero_bit(addr, tmpmax, start) - fix;
+       if (ret > max)
+               return max;
+       return ret;
 }
 
 static inline int mb_find_next_bit(void *addr, int max, int start)
 {
-       int fix = 0;
+       int fix = 0, ret, tmpmax;
        addr = mb_correct_addr_and_bit(&fix, addr);
-       max += fix;
+       tmpmax = max + fix;
        start += fix;
 
-       return ext4_find_next_bit(addr, max, start) - fix;
+       ret = ext4_find_next_bit(addr, tmpmax, start) - fix;
+       if (ret > max)
+               return max;
+       return ret;
 }
 
 static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max)
@@ -736,7 +446,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 
-                       ext4_error(sb, __FUNCTION__, "double-free of inode"
+                       ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr,
                                   first + i, e4b->bd_group);
@@ -898,17 +608,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
        list_for_each(cur, &grp->bb_prealloc_list) {
                ext4_group_t groupnr;
                struct ext4_prealloc_space *pa;
-               pa = list_entry(cur, struct ext4_prealloc_space, group_list);
-               ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k);
+               pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
+               ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
                MB_CHECK_ASSERT(groupnr == e4b->bd_group);
-               for (i = 0; i < pa->len; i++)
+               for (i = 0; i < pa->pa_len; i++)
                        MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
        }
        return 0;
 }
 #undef MB_CHECK_ASSERT
 #define mb_check_buddy(e4b) __mb_check_buddy(e4b,      \
-                                       __FILE__, __FUNCTION__, __LINE__)
+                                       __FILE__, __func__, __LINE__)
 #else
 #define mb_check_buddy(e4b)
 #endif
@@ -982,7 +692,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
        grp->bb_fragments = fragments;
 
        if (free != grp->bb_free) {
-               ext4_error(sb, __FUNCTION__,
+               ext4_error(sb, __func__,
                        "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
                        group, free, grp->bb_free);
                /*
@@ -1099,6 +809,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                if (!buffer_uptodate(bh[i]))
                        goto out;
 
+       err = 0;
        first_block = page->index * blocks_per_page;
        for (i = 0; i < blocks_per_page; i++) {
                int group;
@@ -1168,8 +879,9 @@ out:
        return err;
 }
 
-static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
-               struct ext4_buddy *e4b)
+static noinline_for_stack int
+ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+                                       struct ext4_buddy *e4b)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct inode *inode = sbi->s_buddy_cache;
@@ -1178,6 +890,7 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
        int pnum;
        int poff;
        struct page *page;
+       int ret;
 
        mb_debug("load group %lu\n", group);
 
@@ -1209,15 +922,21 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                if (page) {
                        BUG_ON(page->mapping != inode->i_mapping);
                        if (!PageUptodate(page)) {
-                               ext4_mb_init_cache(page, NULL);
+                               ret = ext4_mb_init_cache(page, NULL);
+                               if (ret) {
+                                       unlock_page(page);
+                                       goto err;
+                               }
                                mb_cmp_bitmaps(e4b, page_address(page) +
                                               (poff * sb->s_blocksize));
                        }
                        unlock_page(page);
                }
        }
-       if (page == NULL || !PageUptodate(page))
+       if (page == NULL || !PageUptodate(page)) {
+               ret = -EIO;
                goto err;
+       }
        e4b->bd_bitmap_page = page;
        e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
        mark_page_accessed(page);
@@ -1233,14 +952,20 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
                if (page) {
                        BUG_ON(page->mapping != inode->i_mapping);
-                       if (!PageUptodate(page))
-                               ext4_mb_init_cache(page, e4b->bd_bitmap);
-
+                       if (!PageUptodate(page)) {
+                               ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+                               if (ret) {
+                                       unlock_page(page);
+                                       goto err;
+                               }
+                       }
                        unlock_page(page);
                }
        }
-       if (page == NULL || !PageUptodate(page))
+       if (page == NULL || !PageUptodate(page)) {
+               ret = -EIO;
                goto err;
+       }
        e4b->bd_buddy_page = page;
        e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
        mark_page_accessed(page);
@@ -1257,7 +982,7 @@ err:
                page_cache_release(e4b->bd_buddy_page);
        e4b->bd_buddy = NULL;
        e4b->bd_bitmap = NULL;
-       return -EIO;
+       return ret;
 }
 
 static void ext4_mb_release_desc(struct ext4_buddy *e4b)
@@ -1326,7 +1051,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len)
        }
 }
 
-static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
+static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                          int first, int count)
 {
        int block = 0;
@@ -1366,11 +1091,12 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                        blocknr += block;
                        blocknr +=
                            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
-
-                       ext4_error(sb, __FUNCTION__, "double-free of inode"
+                       ext4_unlock_group(sb, e4b->bd_group);
+                       ext4_error(sb, __func__, "double-free of inode"
                                   " %lu's block %llu(bit %u in group %lu)\n",
                                   inode ? inode->i_ino : 0, blocknr, block,
                                   e4b->bd_group);
+                       ext4_lock_group(sb, e4b->bd_group);
                }
                mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
                e4b->bd_info->bb_counters[order]++;
@@ -1408,8 +1134,6 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                } while (1);
        }
        mb_check_buddy(e4b);
-
-       return 0;
 }
 
 static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
@@ -1848,7 +1572,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                         * free blocks even though group info says we
                         * we have free blocks
                         */
-                       ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+                       ext4_error(sb, __func__, "%d free blocks as per "
                                        "group info. But bitmap says 0\n",
                                        free);
                        break;
@@ -1857,7 +1581,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
                mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
                BUG_ON(ex.fe_len <= 0);
                if (free < ex.fe_len) {
-                       ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+                       ext4_error(sb, __func__, "%d free blocks as per "
                                        "group info. But got %d blocks\n",
                                        free, ex.fe_len);
                        /*
@@ -1965,7 +1689,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        return 0;
 }
 
-static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
        ext4_group_t group;
        ext4_group_t i;
@@ -2024,10 +1749,6 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
                ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
                spin_unlock(&sbi->s_md_lock);
        }
-
-       /* searching for the right group start from the goal value specified */
-       group = ac->ac_g_ex.fe_group;
-
        /* Let's just scan groups to find more-less suitable blocks */
        cr = ac->ac_2order ? 0 : 1;
        /*
@@ -2037,6 +1758,12 @@ static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 repeat:
        for (; cr < 4 && ac->ac_status == AC_STATUS_CONTINUE; cr++) {
                ac->ac_criteria = cr;
+               /*
+                * searching for the right group start
+                * from the goal value specified
+                */
+               group = ac->ac_g_ex.fe_group;
+
                for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) {
                        struct ext4_group_info *grp;
                        struct ext4_group_desc *desc;
@@ -2257,6 +1984,8 @@ static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
        int rc;
        int size;
 
+       if (unlikely(sbi->s_mb_history == NULL))
+               return -ENOMEM;
        s = kmalloc(sizeof(*s), GFP_KERNEL);
        if (s == NULL)
                return -ENOMEM;
@@ -2459,13 +2188,12 @@ static void ext4_mb_history_init(struct super_block *sb)
        sbi->s_mb_history_cur = 0;
        spin_lock_init(&sbi->s_mb_history_lock);
        i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
-       sbi->s_mb_history = kmalloc(i, GFP_KERNEL);
-       if (likely(sbi->s_mb_history != NULL))
-               memset(sbi->s_mb_history, 0, i);
+       sbi->s_mb_history = kzalloc(i, GFP_KERNEL);
        /* if we can't allocate history, then we simple won't use it */
 }
 
-static void ext4_mb_store_history(struct ext4_allocation_context *ac)
+static noinline_for_stack void
+ext4_mb_store_history(struct ext4_allocation_context *ac)
 {
        struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
        struct ext4_mb_history h;
@@ -2565,16 +2293,15 @@ static int ext4_mb_init_backend(struct super_block *sb)
                meta_group_info[j] = kzalloc(len, GFP_KERNEL);
                if (meta_group_info[j] == NULL) {
                        printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
-                       i--;
                        goto err_freebuddy;
                }
                desc = ext4_get_group_desc(sb, i, NULL);
                if (desc == NULL) {
                        printk(KERN_ERR
                                "EXT4-fs: can't read descriptor %lu\n", i);
+                       i++;
                        goto err_freebuddy;
                }
-               memset(meta_group_info[j], 0, len);
                set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
                        &(meta_group_info[j]->bb_state));
 
@@ -2598,7 +2325,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
                        meta_group_info[j]->bb_bitmap =
                                kmalloc(sb->s_blocksize, GFP_KERNEL);
                        BUG_ON(meta_group_info[j]->bb_bitmap == NULL);
-                       bh = read_block_bitmap(sb, i);
+                       bh = ext4_read_block_bitmap(sb, i);
                        BUG_ON(bh == NULL);
                        memcpy(meta_group_info[j]->bb_bitmap, bh->b_data,
                                        sb->s_blocksize);
@@ -2611,13 +2338,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
        return 0;
 
 err_freebuddy:
-       while (i >= 0) {
+       while (i-- > 0)
                kfree(ext4_get_group_info(sb, i));
-               i--;
-       }
        i = num_meta_group_infos;
 err_freemeta:
-       while (--i >= 0)
+       while (i-- > 0)
                kfree(sbi->s_group_info[i]);
        iput(sbi->s_buddy_cache);
 err_freesgi:
@@ -2631,6 +2356,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        unsigned i;
        unsigned offset;
        unsigned max;
+       int ret;
 
        if (!test_opt(sb, MBALLOC))
                return 0;
@@ -2665,12 +2391,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
        } while (i <= sb->s_blocksize_bits + 1);
 
        /* init file for buddy data */
-       i = ext4_mb_init_backend(sb);
-       if (i) {
+       ret = ext4_mb_init_backend(sb);
+       if (ret != 0) {
                clear_opt(sbi->s_mount_opt, MBALLOC);
                kfree(sbi->s_mb_offsets);
                kfree(sbi->s_mb_maxs);
-               return i;
+               return ret;
        }
 
        spin_lock_init(&sbi->s_md_lock);
@@ -2801,7 +2527,8 @@ int ext4_mb_release(struct super_block *sb)
        return 0;
 }
 
-static void ext4_mb_free_committed_blocks(struct super_block *sb)
+static noinline_for_stack void
+ext4_mb_free_committed_blocks(struct super_block *sb)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        int err;
@@ -2842,8 +2569,7 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
                ext4_lock_group(sb, md->group);
                for (i = 0; i < md->num; i++) {
                        mb_debug(" %u", md->blocks[i]);
-                       err = mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
-                       BUG_ON(err != 0);
+                       mb_free_blocks(NULL, &e4b, md->blocks[i], 1);
                }
                mb_debug("\n");
                ext4_unlock_group(sb, md->group);
@@ -2869,25 +2595,24 @@ static void ext4_mb_free_committed_blocks(struct super_block *sb)
 
 
 
-#define MB_PROC_VALUE_READ(name)                               \
-static int ext4_mb_read_##name(char *page, char **start,       \
-               off_t off, int count, int *eof, void *data)     \
+#define MB_PROC_FOPS(name)                                     \
+static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v)     \
 {                                                              \
-       struct ext4_sb_info *sbi = data;                        \
-       int len;                                                \
-       *eof = 1;                                               \
-       if (off != 0)                                           \
-               return 0;                                       \
-       len = sprintf(page, "%ld\n", sbi->s_mb_##name);         \
-       *start = page;                                          \
-       return len;                                             \
-}
-
-#define MB_PROC_VALUE_WRITE(name)                              \
-static int ext4_mb_write_##name(struct file *file,             \
-               const char __user *buf, unsigned long cnt, void *data)  \
+       struct ext4_sb_info *sbi = m->private;                  \
+                                                               \
+       seq_printf(m, "%ld\n", sbi->s_mb_##name);               \
+       return 0;                                               \
+}                                                              \
+                                                               \
+static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\
+{                                                              \
+       return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\
+}                                                              \
+                                                               \
+static ssize_t ext4_mb_##name##_proc_write(struct file *file,  \
+               const char __user *buf, size_t cnt, loff_t *ppos)       \
 {                                                              \
-       struct ext4_sb_info *sbi = data;                        \
+       struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\
        char str[32];                                           \
        long value;                                             \
        if (cnt >= sizeof(str))                                 \
@@ -2899,31 +2624,32 @@ static int ext4_mb_write_##name(struct file *file,              \
                return -ERANGE;                                 \
        sbi->s_mb_##name = value;                               \
        return cnt;                                             \
-}
+}                                                              \
+                                                               \
+static const struct file_operations ext4_mb_##name##_proc_fops = {     \
+       .owner          = THIS_MODULE,                          \
+       .open           = ext4_mb_##name##_proc_open,           \
+       .read           = seq_read,                             \
+       .llseek         = seq_lseek,                            \
+       .release        = single_release,                       \
+       .write          = ext4_mb_##name##_proc_write,          \
+};
 
-MB_PROC_VALUE_READ(stats);
-MB_PROC_VALUE_WRITE(stats);
-MB_PROC_VALUE_READ(max_to_scan);
-MB_PROC_VALUE_WRITE(max_to_scan);
-MB_PROC_VALUE_READ(min_to_scan);
-MB_PROC_VALUE_WRITE(min_to_scan);
-MB_PROC_VALUE_READ(order2_reqs);
-MB_PROC_VALUE_WRITE(order2_reqs);
-MB_PROC_VALUE_READ(stream_request);
-MB_PROC_VALUE_WRITE(stream_request);
-MB_PROC_VALUE_READ(group_prealloc);
-MB_PROC_VALUE_WRITE(group_prealloc);
+MB_PROC_FOPS(stats);
+MB_PROC_FOPS(max_to_scan);
+MB_PROC_FOPS(min_to_scan);
+MB_PROC_FOPS(order2_reqs);
+MB_PROC_FOPS(stream_request);
+MB_PROC_FOPS(group_prealloc);
 
 #define        MB_PROC_HANDLER(name, var)                                      \
 do {                                                                   \
-       proc = create_proc_entry(name, mode, sbi->s_mb_proc);           \
+       proc = proc_create_data(name, mode, sbi->s_mb_proc,             \
+                               &ext4_mb_##var##_proc_fops, sbi);       \
        if (proc == NULL) {                                             \
                printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \
                goto err_out;                                           \
        }                                                               \
-       proc->data = sbi;                                               \
-       proc->read_proc  = ext4_mb_read_##var ;                         \
-       proc->write_proc = ext4_mb_write_##var;                         \
 } while (0)
 
 static int ext4_mb_init_per_dev_proc(struct super_block *sb)
@@ -2933,8 +2659,11 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb)
        struct proc_dir_entry *proc;
        char devname[64];
 
-       snprintf(devname, sizeof(devname) - 1, "%s",
-               bdevname(sb->s_bdev, devname));
+       if (proc_root_ext4 == NULL) {
+               sbi->s_mb_proc = NULL;
+               return -EINVAL;
+       }
+       bdevname(sb->s_bdev, devname);
        sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4);
 
        MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats);
@@ -2968,8 +2697,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
        if (sbi->s_mb_proc == NULL)
                return -EINVAL;
 
-       snprintf(devname, sizeof(devname) - 1, "%s",
-               bdevname(sb->s_bdev, devname));
+       bdevname(sb->s_bdev, devname);
        remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc);
        remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc);
@@ -3021,7 +2749,8 @@ void exit_ext4_mballoc(void)
  * Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
  * Returns 0 if success or error code
  */
-static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                handle_t *handle)
 {
        struct buffer_head *bitmap_bh = NULL;
@@ -3031,7 +2760,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        struct ext4_sb_info *sbi;
        struct super_block *sb;
        ext4_fsblk_t block;
-       int err;
+       int err, len;
 
        BUG_ON(ac->ac_status != AC_STATUS_FOUND);
        BUG_ON(ac->ac_b_ex.fe_len <= 0);
@@ -3040,11 +2769,9 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        sbi = EXT4_SB(sb);
        es = sbi->s_es;
 
-       ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
-                       gdp->bg_free_blocks_count);
 
        err = -EIO;
-       bitmap_bh = read_block_bitmap(sb, ac->ac_b_ex.fe_group);
+       bitmap_bh = ext4_read_block_bitmap(sb, ac->ac_b_ex.fe_group);
        if (!bitmap_bh)
                goto out_err;
 
@@ -3057,6 +2784,9 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
        if (!gdp)
                goto out_err;
 
+       ext4_debug("using block group %lu(%d)\n", ac->ac_b_ex.fe_group,
+                       gdp->bg_free_blocks_count);
+
        err = ext4_journal_get_write_access(handle, gdp_bh);
        if (err)
                goto out_err;
@@ -3065,14 +2795,27 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                + ac->ac_b_ex.fe_start
                + le32_to_cpu(es->s_first_data_block);
 
-       if (block == ext4_block_bitmap(sb, gdp) ||
-                       block == ext4_inode_bitmap(sb, gdp) ||
-                       in_range(block, ext4_inode_table(sb, gdp),
-                               EXT4_SB(sb)->s_itb_per_group)) {
-
-               ext4_error(sb, __FUNCTION__,
+       len = ac->ac_b_ex.fe_len;
+       if (in_range(ext4_block_bitmap(sb, gdp), block, len) ||
+           in_range(ext4_inode_bitmap(sb, gdp), block, len) ||
+           in_range(block, ext4_inode_table(sb, gdp),
+                    EXT4_SB(sb)->s_itb_per_group) ||
+           in_range(block + len - 1, ext4_inode_table(sb, gdp),
+                    EXT4_SB(sb)->s_itb_per_group)) {
+               ext4_error(sb, __func__,
                           "Allocating block in system zone - block = %llu",
                           block);
+               /* File system mounted not to panic on error
+                * Fix the bitmap and repeat the block allocation
+                * We leak some of the blocks here.
+                */
+               mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group),
+                               bitmap_bh->b_data, ac->ac_b_ex.fe_start,
+                               ac->ac_b_ex.fe_len);
+               err = ext4_journal_dirty_metadata(handle, bitmap_bh);
+               if (!err)
+                       err = -EAGAIN;
+               goto out_err;
        }
 #ifdef AGGRESSIVE_CHECK
        {
@@ -3094,9 +2837,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
                                                ac->ac_b_ex.fe_group,
                                                gdp));
        }
-       gdp->bg_free_blocks_count =
-               cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
-                               - ac->ac_b_ex.fe_len);
+       le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
        percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3130,7 +2871,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
                ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
        else
                ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
-       mb_debug("#%u: goal %lu blocks for locality group\n",
+       mb_debug("#%u: goal %u blocks for locality group\n",
                current->pid, ac->ac_g_ex.fe_len);
 }
 
@@ -3138,15 +2879,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
  * Normalization means making request better in terms of
  * size and alignment
  */
-static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+static noinline_for_stack void
+ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
        int bsbits, max;
        ext4_lblk_t end;
-       struct list_head *cur;
        loff_t size, orig_size, start_off;
        ext4_lblk_t start, orig_start;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+       struct ext4_prealloc_space *pa;
 
        /* do normalize only data requests, metadata requests
           do not need preallocation */
@@ -3176,12 +2918,11 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
        if (size < i_size_read(ac->ac_inode))
                size = i_size_read(ac->ac_inode);
 
-       /* max available blocks in a free group */
-       max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 -
-                               EXT4_SB(ac->ac_sb)->s_itb_per_group;
+       /* max size of free chunks */
+       max = 2 << bsbits;
 
-#define NRL_CHECK_SIZE(req, size, max,bits)    \
-               (req <= (size) || max <= ((size) >> bits))
+#define NRL_CHECK_SIZE(req, size, max, chunk_size)     \
+               (req <= (size) || max <= (chunk_size))
 
        /* first, try to predict filesize */
        /* XXX: should this table be tunable? */
@@ -3200,16 +2941,16 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
                size = 512 * 1024;
        } else if (size <= 1024 * 1024) {
                size = 1024 * 1024;
-       } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) {
+       } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
-                                               (20 - bsbits)) << 20;
-               size = 1024 * 1024;
-       } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) {
+                                               (21 - bsbits)) << 21;
+               size = 2 * 1024 * 1024;
+       } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
                                                        (22 - bsbits)) << 22;
                size = 4 * 1024 * 1024;
        } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len,
-                                       (8<<20)>>bsbits, max, bsbits)) {
+                                       (8<<20)>>bsbits, max, 8 * 1024)) {
                start_off = ((loff_t)ac->ac_o_ex.fe_logical >>
                                                        (23 - bsbits)) << 23;
                size = 8 * 1024 * 1024;
@@ -3232,12 +2973,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
        /* check we don't cross already preallocated blocks */
        rcu_read_lock();
-       list_for_each_rcu(cur, &ei->i_prealloc_list) {
-               struct ext4_prealloc_space *pa;
+       list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
                unsigned long pa_end;
 
-               pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
-
                if (pa->pa_deleted)
                        continue;
                spin_lock(&pa->pa_lock);
@@ -3279,10 +3017,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 
        /* XXX: extra loop to check we really don't overlap preallocations */
        rcu_read_lock();
-       list_for_each_rcu(cur, &ei->i_prealloc_list) {
-               struct ext4_prealloc_space *pa;
+       list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
                unsigned long pa_end;
-               pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0) {
                        pa_end = pa->pa_lstart + pa->pa_len;
@@ -3374,7 +3110,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
        BUG_ON(pa->pa_free < len);
        pa->pa_free -= len;
 
-       mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa);
+       mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
 }
 
 /*
@@ -3383,8 +3119,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
 static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
                                struct ext4_prealloc_space *pa)
 {
-       unsigned len = ac->ac_o_ex.fe_len;
-
+       unsigned int len = ac->ac_o_ex.fe_len;
        ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
                                        &ac->ac_b_ex.fe_group,
                                        &ac->ac_b_ex.fe_start);
@@ -3404,12 +3139,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
 /*
  * search goal blocks in preallocated space
  */
-static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 {
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
        struct ext4_prealloc_space *pa;
-       struct list_head *cur;
 
        /* only data can be preallocated */
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3417,8 +3152,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
 
        /* first, try per-file preallocation */
        rcu_read_lock();
-       list_for_each_rcu(cur, &ei->i_prealloc_list) {
-               pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+       list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
 
                /* all fields in this condition don't change,
                 * so we can skip locking for them */
@@ -3450,8 +3184,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                return 0;
 
        rcu_read_lock();
-       list_for_each_rcu(cur, &lg->lg_prealloc_list) {
-               pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+       list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
                spin_lock(&pa->pa_lock);
                if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
                        atomic_inc(&pa->pa_count);
@@ -3571,7 +3304,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
 /*
  * creates new preallocated space for given inode
  */
-static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
        struct ext4_prealloc_space *pa;
@@ -3658,7 +3392,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
 /*
  * creates new preallocated space for locality group inodes belongs to
  */
-static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
 {
        struct super_block *sb = ac->ac_sb;
        struct ext4_locality_group *lg;
@@ -3731,11 +3466,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  * the caller MUST hold group/inode locks.
  * TODO: optimize the case when there are no in-core structures yet
  */
-static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
-                               struct buffer_head *bitmap_bh,
-                               struct ext4_prealloc_space *pa)
+static noinline_for_stack int
+ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
+                       struct ext4_prealloc_space *pa,
+                       struct ext4_allocation_context *ac)
 {
-       struct ext4_allocation_context *ac;
        struct super_block *sb = e4b->bd_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        unsigned long end;
@@ -3751,8 +3486,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
        BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
        end = bit + pa->pa_len;
 
-       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-
        if (ac) {
                ac->ac_sb = sb;
                ac->ac_inode = pa->pa_inode;
@@ -3764,8 +3497,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
                if (bit >= end)
                        break;
                next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
-               if (next > end)
-                       next = end;
                start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
                                le32_to_cpu(sbi->s_es->s_first_data_block);
                mb_debug("    free preallocated %u/%u in group %u\n",
@@ -3789,7 +3520,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
                        pa, (unsigned long) pa->pa_lstart,
                        (unsigned long) pa->pa_pstart,
                        (unsigned long) pa->pa_len);
-               ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
+               ext4_error(sb, __func__, "free %u, pa_free %u\n",
                                                free, pa->pa_free);
                /*
                 * pa is already deleted so we use the value obtained
@@ -3797,22 +3528,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
                 */
        }
        atomic_add(free, &sbi->s_mb_discarded);
-       if (ac)
-               kmem_cache_free(ext4_ac_cachep, ac);
 
        return err;
 }
 
-static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
-                               struct ext4_prealloc_space *pa)
+static noinline_for_stack int
+ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+                               struct ext4_prealloc_space *pa,
+                               struct ext4_allocation_context *ac)
 {
-       struct ext4_allocation_context *ac;
        struct super_block *sb = e4b->bd_sb;
        ext4_group_t group;
        ext4_grpblk_t bit;
 
-       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-
        if (ac)
                ac->ac_op = EXT4_MB_HISTORY_DISCARD;
 
@@ -3830,7 +3558,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
                ac->ac_b_ex.fe_len = pa->pa_len;
                ac->ac_b_ex.fe_logical = 0;
                ext4_mb_store_history(ac);
-               kmem_cache_free(ext4_ac_cachep, ac);
        }
 
        return 0;
@@ -3845,12 +3572,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
  * - how many do we discard
  *   1) how many requested
  */
-static int ext4_mb_discard_group_preallocations(struct super_block *sb,
+static noinline_for_stack int
+ext4_mb_discard_group_preallocations(struct super_block *sb,
                                        ext4_group_t group, int needed)
 {
        struct ext4_group_info *grp = ext4_get_group_info(sb, group);
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
+       struct ext4_allocation_context *ac;
        struct list_head list;
        struct ext4_buddy e4b;
        int err;
@@ -3862,7 +3591,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
        if (list_empty(&grp->bb_prealloc_list))
                return 0;
 
-       bitmap_bh = read_block_bitmap(sb, group);
+       bitmap_bh = ext4_read_block_bitmap(sb, group);
        if (bitmap_bh == NULL) {
                /* error handling here */
                ext4_mb_release_desc(&e4b);
@@ -3878,6 +3607,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
        grp = ext4_get_group_info(sb, group);
        INIT_LIST_HEAD(&list);
 
+       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        ext4_lock_group(sb, group);
        list_for_each_entry_safe(pa, tmp,
@@ -3932,9 +3662,9 @@ repeat:
                spin_unlock(pa->pa_obj_lock);
 
                if (pa->pa_linear)
-                       ext4_mb_release_group_pa(&e4b, pa);
+                       ext4_mb_release_group_pa(&e4b, pa, ac);
                else
-                       ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+                       ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
 
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3942,6 +3672,8 @@ repeat:
 
 out:
        ext4_unlock_group(sb, group);
+       if (ac)
+               kmem_cache_free(ext4_ac_cachep, ac);
        ext4_mb_release_desc(&e4b);
        put_bh(bitmap_bh);
        return free;
@@ -3962,6 +3694,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        struct buffer_head *bitmap_bh = NULL;
        struct ext4_prealloc_space *pa, *tmp;
+       struct ext4_allocation_context *ac;
        ext4_group_t group = 0;
        struct list_head list;
        struct ext4_buddy e4b;
@@ -3976,6 +3709,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
 
        INIT_LIST_HEAD(&list);
 
+       ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 repeat:
        /* first, collect all pa's in the inode */
        spin_lock(&ei->i_prealloc_lock);
@@ -4031,7 +3765,7 @@ repeat:
                err = ext4_mb_load_buddy(sb, group, &e4b);
                BUG_ON(err != 0); /* error handling here */
 
-               bitmap_bh = read_block_bitmap(sb, group);
+               bitmap_bh = ext4_read_block_bitmap(sb, group);
                if (bitmap_bh == NULL) {
                        /* error handling here */
                        ext4_mb_release_desc(&e4b);
@@ -4040,7 +3774,7 @@ repeat:
 
                ext4_lock_group(sb, group);
                list_del(&pa->pa_group_list);
-               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                ext4_unlock_group(sb, group);
 
                ext4_mb_release_desc(&e4b);
@@ -4049,6 +3783,8 @@ repeat:
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
        }
+       if (ac)
+               kmem_cache_free(ext4_ac_cachep, ac);
 }
 
 /*
@@ -4108,7 +3844,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
                        printk(KERN_ERR "PA:%lu:%d:%u \n", i,
                                                        start, pa->pa_len);
                }
-               ext4_lock_group(sb, i);
+               ext4_unlock_group(sb, i);
 
                if (grp->bb_free == 0)
                        continue;
@@ -4167,7 +3903,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
        mutex_lock(&ac->ac_lg->lg_mutex);
 }
 
-static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_initialize_context(struct ext4_allocation_context *ac,
                                struct ext4_allocation_request *ar)
 {
        struct super_block *sb = ar->inode->i_sb;
@@ -4330,7 +4067,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
 
                ac->ac_op = EXT4_MB_HISTORY_ALLOC;
                ext4_mb_normalize_request(ac, ar);
-
 repeat:
                /* allocate space in core */
                ext4_mb_regular_allocator(ac);
@@ -4344,10 +4080,21 @@ repeat:
        }
 
        if (likely(ac->ac_status == AC_STATUS_FOUND)) {
-               ext4_mb_mark_diskspace_used(ac, handle);
-               *errp = 0;
-               block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
-               ar->len = ac->ac_b_ex.fe_len;
+               *errp = ext4_mb_mark_diskspace_used(ac, handle);
+               if (*errp ==  -EAGAIN) {
+                       ac->ac_b_ex.fe_group = 0;
+                       ac->ac_b_ex.fe_start = 0;
+                       ac->ac_b_ex.fe_len = 0;
+                       ac->ac_status = AC_STATUS_CONTINUE;
+                       goto repeat;
+               } else if (*errp) {
+                       ac->ac_b_ex.fe_len = 0;
+                       ar->len = 0;
+                       ext4_mb_show_ac(ac);
+               } else {
+                       block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex);
+                       ar->len = ac->ac_b_ex.fe_len;
+               }
        } else {
                freed  = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len);
                if (freed)
@@ -4398,7 +4145,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
        ext4_mb_free_committed_blocks(sb);
 }
 
-static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
+static noinline_for_stack int
+ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
                          ext4_group_t group, ext4_grpblk_t block, int count)
 {
        struct ext4_group_info *db = e4b->bd_info;
@@ -4489,7 +4237,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
        if (block < le32_to_cpu(es->s_first_data_block) ||
            block + count < block ||
            block + count > ext4_blocks_count(es)) {
-               ext4_error(sb, __FUNCTION__,
+               ext4_error(sb, __func__,
                            "Freeing blocks not in datazone - "
                            "block = %lu, count = %lu", block, count);
                goto error_return;
@@ -4516,7 +4264,7 @@ do_more:
                overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
                count -= overflow;
        }
-       bitmap_bh = read_block_bitmap(sb, block_group);
+       bitmap_bh = ext4_read_block_bitmap(sb, block_group);
        if (!bitmap_bh)
                goto error_return;
        gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
@@ -4530,9 +4278,11 @@ do_more:
            in_range(block + count - 1, ext4_inode_table(sb, gdp),
                      EXT4_SB(sb)->s_itb_per_group)) {
 
-               ext4_error(sb, __FUNCTION__,
+               ext4_error(sb, __func__,
                           "Freeing blocks in system zone - "
                           "Block = %lu, count = %lu", block, count);
+               /* err = 0. ext4_std_error should be a no op */
+               goto error_return;
        }
 
        BUFFER_TRACE(bitmap_bh, "getting write access");
@@ -4581,15 +4331,13 @@ do_more:
                ext4_mb_free_metadata(handle, &e4b, block_group, bit, count);
        } else {
                ext4_lock_group(sb, block_group);
-               err = mb_free_blocks(inode, &e4b, bit, count);
+               mb_free_blocks(inode, &e4b, bit, count);
                ext4_mb_return_to_preallocation(inode, &e4b, block, count);
                ext4_unlock_group(sb, block_group);
-               BUG_ON(err != 0);
        }
 
        spin_lock(sb_bgl_lock(sbi, block_group));
-       gdp->bg_free_blocks_count =
-               cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
+       le16_add_cpu(&gdp->bg_free_blocks_count, count);
        gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
        spin_unlock(sb_bgl_lock(sbi, block_group));
        percpu_counter_add(&sbi->s_freeblocks_counter, count);