Merge branch 'for-linus' of git://git.o-hand.com/linux-rpurdie-leds
[pandora-kernel.git] / fs / ext4 / mballoc.c
index b423a36..12b3bc0 100644 (file)
@@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
        }
 }
 
+/*
+ * Cache the order of the largest free extent we have available in this block
+ * group.
+ */
+static void
+mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp)
+{
+       int i;
+       int bits;
+
+       grp->bb_largest_free_order = -1; /* uninit */
+
+       bits = sb->s_blocksize_bits + 1;
+       for (i = bits; i >= 0; i--) {
+               if (grp->bb_counters[i] > 0) {
+                       grp->bb_largest_free_order = i;
+                       break;
+               }
+       }
+}
+
 static noinline_for_stack
 void ext4_mb_generate_buddy(struct super_block *sb,
                                void *buddy, void *bitmap, ext4_group_t group)
@@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb,
                 */
                grp->bb_free = free;
        }
+       mb_set_largest_free_order(sb, grp);
 
        clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
 
@@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb,
  * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize)  blocks.
  * So it can have information regarding groups_per_page which
  * is blocks_per_page/2
+ *
+ * Locking note:  This routine takes the block group lock of all groups
+ * for this page; do not hold this lock when calling this routine!
  */
 
 static int ext4_mb_init_cache(struct page *page, char *incore)
@@ -865,6 +890,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                        BUG_ON(incore == NULL);
                        mb_debug(1, "put buddy for group %u in page %lu/%x\n",
                                group, page->index, i * blocksize);
+                       trace_ext4_mb_buddy_bitmap_load(sb, group);
                        grinfo = ext4_get_group_info(sb, group);
                        grinfo->bb_fragments = 0;
                        memset(grinfo->bb_counters, 0,
@@ -882,6 +908,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
                        BUG_ON(incore != NULL);
                        mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
                                group, page->index, i * blocksize);
+                       trace_ext4_mb_bitmap_load(sb, group);
 
                        /* see comments in ext4_mb_put_pa() */
                        ext4_lock_group(sb, group);
@@ -910,6 +937,11 @@ out:
        return err;
 }
 
+/*
+ * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
+ * block group lock of all groups for this page; do not hold the BG lock when
+ * calling this routine!
+ */
 static noinline_for_stack
 int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
 {
@@ -1004,6 +1036,11 @@ err:
        return ret;
 }
 
+/*
+ * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
+ * block group lock of all groups for this page; do not hold the BG lock when
+ * calling this routine!
+ */
 static noinline_for_stack int
 ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                                        struct ext4_buddy *e4b)
@@ -1150,7 +1187,7 @@ err:
        return ret;
 }
 
-static void ext4_mb_release_desc(struct ext4_buddy *e4b)
+static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
 {
        if (e4b->bd_bitmap_page)
                page_cache_release(e4b->bd_bitmap_page);
@@ -1299,6 +1336,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
                        buddy = buddy2;
                } while (1);
        }
+       mb_set_largest_free_order(sb, e4b->bd_info);
        mb_check_buddy(e4b);
 }
 
@@ -1427,6 +1465,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
                e4b->bd_info->bb_counters[ord]++;
                e4b->bd_info->bb_counters[ord]++;
        }
+       mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
 
        mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0);
        mb_check_buddy(e4b);
@@ -1617,7 +1656,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
        }
 
        ext4_unlock_group(ac->ac_sb, group);
-       ext4_mb_release_desc(e4b);
+       ext4_mb_unload_buddy(e4b);
 
        return 0;
 }
@@ -1672,7 +1711,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
                ext4_mb_use_best_found(ac, e4b);
        }
        ext4_unlock_group(ac->ac_sb, group);
-       ext4_mb_release_desc(e4b);
+       ext4_mb_unload_buddy(e4b);
 
        return 0;
 }
@@ -1821,16 +1860,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
        }
 }
 
+/* This is now called BEFORE we load the buddy bitmap. */
 static int ext4_mb_good_group(struct ext4_allocation_context *ac,
                                ext4_group_t group, int cr)
 {
        unsigned free, fragments;
-       unsigned i, bits;
        int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb));
        struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
 
        BUG_ON(cr < 0 || cr >= 4);
-       BUG_ON(EXT4_MB_GRP_NEED_INIT(grp));
+
+       /* We only do this if the grp has never been initialized */
+       if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
+               int ret = ext4_mb_init_group(ac->ac_sb, group);
+               if (ret)
+                       return 0;
+       }
 
        free = grp->bb_free;
        fragments = grp->bb_fragments;
@@ -1843,17 +1888,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        case 0:
                BUG_ON(ac->ac_2order == 0);
 
+               if (grp->bb_largest_free_order < ac->ac_2order)
+                       return 0;
+
                /* Avoid using the first bg of a flexgroup for data files */
                if ((ac->ac_flags & EXT4_MB_HINT_DATA) &&
                    (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) &&
                    ((group % flex_size) == 0))
                        return 0;
 
-               bits = ac->ac_sb->s_blocksize_bits + 1;
-               for (i = ac->ac_2order; i <= bits; i++)
-                       if (grp->bb_counters[i] > 0)
-                               return 1;
-               break;
+               return 1;
        case 1:
                if ((free / fragments) >= ac->ac_g_ex.fe_len)
                        return 1;
@@ -1964,7 +2008,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
        sbi = EXT4_SB(sb);
        ngroups = ext4_get_groups_count(sb);
        /* non-extent files are limited to low blocks/groups */
-       if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)))
                ngroups = sbi->s_blockfile_groups;
 
        BUG_ON(ac->ac_status == AC_STATUS_FOUND);
@@ -2024,15 +2068,11 @@ repeat:
                group = ac->ac_g_ex.fe_group;
 
                for (i = 0; i < ngroups; group++, i++) {
-                       struct ext4_group_info *grp;
-                       struct ext4_group_desc *desc;
-
                        if (group == ngroups)
                                group = 0;
 
-                       /* quick check to skip empty groups */
-                       grp = ext4_get_group_info(sb, group);
-                       if (grp->bb_free == 0)
+                       /* This now checks without needing the buddy page */
+                       if (!ext4_mb_good_group(ac, group, cr))
                                continue;
 
                        err = ext4_mb_load_buddy(sb, group, &e4b);
@@ -2040,15 +2080,18 @@ repeat:
                                goto out;
 
                        ext4_lock_group(sb, group);
+
+                       /*
+                        * We need to check again after locking the
+                        * block group
+                        */
                        if (!ext4_mb_good_group(ac, group, cr)) {
-                               /* someone did allocation from this group */
                                ext4_unlock_group(sb, group);
-                               ext4_mb_release_desc(&e4b);
+                               ext4_mb_unload_buddy(&e4b);
                                continue;
                        }
 
                        ac->ac_groups_scanned++;
-                       desc = ext4_get_group_desc(sb, group, NULL);
                        if (cr == 0)
                                ext4_mb_simple_scan_group(ac, &e4b);
                        else if (cr == 1 &&
@@ -2058,7 +2101,7 @@ repeat:
                                ext4_mb_complex_scan_group(ac, &e4b);
 
                        ext4_unlock_group(sb, group);
-                       ext4_mb_release_desc(&e4b);
+                       ext4_mb_unload_buddy(&e4b);
 
                        if (ac->ac_status != AC_STATUS_CONTINUE)
                                break;
@@ -2148,7 +2191,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
        ext4_lock_group(sb, group);
        memcpy(&sg, ext4_get_group_info(sb, group), i);
        ext4_unlock_group(sb, group);
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
 
        seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
                        sg.info.bb_fragments, sg.info.bb_first_free);
@@ -2255,6 +2298,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
        INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
        init_rwsem(&meta_group_info[i]->alloc_sem);
        meta_group_info[i]->bb_free_root = RB_ROOT;
+       meta_group_info[i]->bb_largest_free_order = -1;  /* uninit */
 
 #ifdef DOUBLE_CHECK
        {
@@ -2536,6 +2580,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                         entry->count, entry->group, entry);
 
                if (test_opt(sb, DISCARD)) {
+                       int ret;
                        ext4_fsblk_t discard_block;
 
                        discard_block = entry->start_blk +
@@ -2543,7 +2588,12 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                        trace_ext4_discard_blocks(sb,
                                        (unsigned long long)discard_block,
                                        entry->count);
-                       sb_issue_discard(sb, discard_block, entry->count);
+                       ret = sb_issue_discard(sb, discard_block, entry->count);
+                       if (ret == EOPNOTSUPP) {
+                               ext4_warning(sb,
+                                       "discard not supported, disabling");
+                               clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
+                       }
                }
 
                err = ext4_mb_load_buddy(sb, entry->group, &e4b);
@@ -2568,7 +2618,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
                }
                ext4_unlock_group(sb, entry->group);
                kmem_cache_free(ext4_free_ext_cachep, entry);
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
        }
 
        mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
@@ -2641,7 +2691,7 @@ int __init init_ext4_mballoc(void)
 
 void exit_ext4_mballoc(void)
 {
-       /* 
+       /*
         * Wait for completion of call_rcu()'s on ext4_pspace_cachep
         * before destroying the slab cache.
         */
@@ -2981,7 +3031,7 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
        if (sbi->s_mb_stats && ac->ac_g_ex.fe_len > 1) {
                atomic_inc(&sbi->s_bal_reqs);
                atomic_add(ac->ac_b_ex.fe_len, &sbi->s_bal_allocated);
-               if (ac->ac_o_ex.fe_len >= ac->ac_g_ex.fe_len)
+               if (ac->ac_b_ex.fe_len >= ac->ac_o_ex.fe_len)
                        atomic_inc(&sbi->s_bal_success);
                atomic_add(ac->ac_found, &sbi->s_bal_ex_scanned);
                if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
@@ -3123,7 +3173,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
                        continue;
 
                /* non-extent files can't have physical blocks past 2^32 */
-               if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) &&
+               if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
                        pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
                        continue;
 
@@ -3280,7 +3330,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
        spin_unlock(&pa->pa_lock);
 
        grp_blk = pa->pa_pstart;
-       /* 
+       /*
         * If doing group-based preallocation, pa_pstart may be in the
         * next group when pa is used up
         */
@@ -3697,7 +3747,7 @@ out:
        ext4_unlock_group(sb, group);
        if (ac)
                kmem_cache_free(ext4_ac_cachep, ac);
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
        put_bh(bitmap_bh);
        return free;
 }
@@ -3801,7 +3851,7 @@ repeat:
                if (bitmap_bh == NULL) {
                        ext4_error(sb, "Error reading block bitmap for %u",
                                        group);
-                       ext4_mb_release_desc(&e4b);
+                       ext4_mb_unload_buddy(&e4b);
                        continue;
                }
 
@@ -3810,7 +3860,7 @@ repeat:
                ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
                ext4_unlock_group(sb, group);
 
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
                put_bh(bitmap_bh);
 
                list_del(&pa->u.pa_tmp_list);
@@ -4074,7 +4124,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
                ext4_mb_release_group_pa(&e4b, pa, ac);
                ext4_unlock_group(sb, group);
 
-               ext4_mb_release_desc(&e4b);
+               ext4_mb_unload_buddy(&e4b);
                list_del(&pa->u.pa_tmp_list);
                call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
        }
@@ -4484,12 +4534,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
                        if (!bh)
                                tbh = sb_find_get_block(inode->i_sb,
                                                        block + i);
-                       ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, 
+                       ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
                                    inode, tbh, block + i);
                }
        }
 
-       /* 
+       /*
         * We need to make sure we don't reuse the freed block until
         * after the transaction is committed, which we can do by
         * treating the block as metadata, below.  We make an
@@ -4610,7 +4660,7 @@ do_more:
                atomic_add(count, &sbi->s_flex_groups[flex_group].free_blocks);
        }
 
-       ext4_mb_release_desc(&e4b);
+       ext4_mb_unload_buddy(&e4b);
 
        freed += count;