Merge commit 'v2.6.37-rc8' into x86/apic
[pandora-kernel.git] / fs / ext4 / super.c
index 158d1bc..fb15c9c 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/parser.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
 #include <linux/vfs.h>
@@ -74,8 +73,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int ext4_unfreeze(struct super_block *sb);
 static void ext4_write_super(struct super_block *sb);
 static int ext4_freeze(struct super_block *sb);
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
-                      const char *dev_name, void *data, struct vfsmount *mnt);
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+                      const char *dev_name, void *data);
 static void ext4_destroy_lazyinit_thread(void);
 static void ext4_unregister_li_request(struct super_block *sb);
 
@@ -83,7 +82,7 @@ static void ext4_unregister_li_request(struct super_block *sb);
 static struct file_system_type ext3_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext3",
-       .get_sb         = ext4_get_sb,
+       .mount          = ext4_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
@@ -717,7 +716,6 @@ static void ext4_put_super(struct super_block *sb)
        destroy_workqueue(sbi->dio_unwritten_wq);
 
        lock_super(sb);
-       lock_kernel();
        if (sb->s_dirt)
                ext4_commit_super(sb, 1);
 
@@ -785,7 +783,6 @@ static void ext4_put_super(struct super_block *sb)
         * Now that we are completely done shutting down the
         * superblock, we need to actually destroy the kobject.
         */
-       unlock_kernel();
        unlock_super(sb);
        kobject_put(&sbi->s_kobj);
        wait_for_completion(&sbi->s_kobj_unregister);
@@ -831,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei->cur_aio_dio = NULL;
        ei->i_sync_tid = 0;
        ei->i_datasync_tid = 0;
+       atomic_set(&ei->i_ioend_count, 0);
 
        return &ei->vfs_inode;
 }
 
+static int ext4_drop_inode(struct inode *inode)
+{
+       int drop = generic_drop_inode(inode);
+
+       trace_ext4_drop_inode(inode, drop);
+       return drop;
+}
+
 static void ext4_destroy_inode(struct inode *inode)
 {
+       ext4_ioend_wait(inode);
        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
                ext4_msg(inode->i_sb, KERN_ERR,
                         "Inode %lu (%p): orphan list check failed!",
@@ -1019,6 +1026,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
            !(def_mount_opts & EXT4_DEFM_NODELALLOC))
                seq_puts(seq, ",nodelalloc");
 
+       if (test_opt(sb, MBLK_IO_SUBMIT))
+               seq_puts(seq, ",mblk_io_submit");
        if (sbi->s_stripe)
                seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
        /*
@@ -1176,6 +1185,7 @@ static const struct super_operations ext4_sops = {
        .destroy_inode  = ext4_destroy_inode,
        .write_inode    = ext4_write_inode,
        .dirty_inode    = ext4_dirty_inode,
+       .drop_inode     = ext4_drop_inode,
        .evict_inode    = ext4_evict_inode,
        .put_super      = ext4_put_super,
        .sync_fs        = ext4_sync_fs,
@@ -1189,7 +1199,6 @@ static const struct super_operations ext4_sops = {
        .quota_write    = ext4_quota_write,
 #endif
        .bdev_try_to_free_page = bdev_try_to_free_page,
-       .trim_fs        = ext4_trim_fs
 };
 
 static const struct super_operations ext4_nojournal_sops = {
@@ -1197,6 +1206,7 @@ static const struct super_operations ext4_nojournal_sops = {
        .destroy_inode  = ext4_destroy_inode,
        .write_inode    = ext4_write_inode,
        .dirty_inode    = ext4_dirty_inode,
+       .drop_inode     = ext4_drop_inode,
        .evict_inode    = ext4_evict_inode,
        .write_super    = ext4_write_super,
        .put_super      = ext4_put_super,
@@ -1231,8 +1241,8 @@ enum {
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
        Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
        Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
-       Opt_stripe, Opt_delalloc, Opt_nodelalloc,
-       Opt_block_validity, Opt_noblock_validity,
+       Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+       Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
        Opt_inode_readahead_blks, Opt_journal_ioprio,
        Opt_dioread_nolock, Opt_dioread_lock,
        Opt_discard, Opt_nodiscard,
@@ -1296,6 +1306,8 @@ static const match_table_t tokens = {
        {Opt_resize, "resize"},
        {Opt_delalloc, "delalloc"},
        {Opt_nodelalloc, "nodelalloc"},
+       {Opt_mblk_io_submit, "mblk_io_submit"},
+       {Opt_nomblk_io_submit, "nomblk_io_submit"},
        {Opt_block_validity, "block_validity"},
        {Opt_noblock_validity, "noblock_validity"},
        {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1717,6 +1729,12 @@ set_qf_format:
                case Opt_nodelalloc:
                        clear_opt(sbi->s_mount_opt, DELALLOC);
                        break;
+               case Opt_mblk_io_submit:
+                       set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
+                       break;
+               case Opt_nomblk_io_submit:
+                       clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
+                       break;
                case Opt_stripe:
                        if (match_int(&args[0], &option))
                                return 0;
@@ -2702,7 +2720,6 @@ static int ext4_lazyinit_thread(void *arg)
        struct ext4_li_request *elr;
        unsigned long next_wakeup;
        DEFINE_WAIT(wait);
-       int ret;
 
        BUG_ON(NULL == eli);
 
@@ -2726,13 +2743,12 @@ cont_thread:
                        elr = list_entry(pos, struct ext4_li_request,
                                         lr_request);
 
-                       if (time_after_eq(jiffies, elr->lr_next_sched))
-                               ret = ext4_run_li_request(elr);
-
-                       if (ret) {
-                               ret = 0;
-                               ext4_remove_li_request(elr);
-                               continue;
+                       if (time_after_eq(jiffies, elr->lr_next_sched)) {
+                               if (ext4_run_li_request(elr) != 0) {
+                                       /* error, remove the lazy_init job */
+                                       ext4_remove_li_request(elr);
+                                       continue;
+                               }
                        }
 
                        if (time_before(elr->lr_next_sched, next_wakeup))
@@ -2743,7 +2759,8 @@ cont_thread:
                if (freezing(current))
                        refrigerator();
 
-               if (time_after_eq(jiffies, next_wakeup)) {
+               if ((time_after_eq(jiffies, next_wakeup)) ||
+                   (MAX_JIFFY_OFFSET == next_wakeup)) {
                        cond_resched();
                        continue;
                }
@@ -2791,9 +2808,6 @@ static void ext4_clear_request_list(void)
        struct ext4_li_request *elr;
 
        mutex_lock(&ext4_li_info->li_list_mtx);
-       if (list_empty(&ext4_li_info->li_request_list))
-               return;
-
        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
                elr = list_entry(pos, struct ext4_li_request,
                                 lr_request);
@@ -2902,28 +2916,26 @@ static int ext4_register_li_request(struct super_block *sb,
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_li_request *elr;
        ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
-       int ret = 0;
+       int ret;
 
        if (sbi->s_li_request != NULL)
-               goto out;
+               return 0;
 
        if (first_not_zeroed == ngroups ||
            (sb->s_flags & MS_RDONLY) ||
            !test_opt(sb, INIT_INODE_TABLE)) {
                sbi->s_li_request = NULL;
-               goto out;
+               return 0;
        }
 
        if (first_not_zeroed == ngroups) {
                sbi->s_li_request = NULL;
-               goto out;
+               return 0;
        }
 
        elr = ext4_li_request_new(sb, first_not_zeroed);
-       if (!elr) {
-               ret = -ENOMEM;
-               goto out;
-       }
+       if (!elr)
+               return -ENOMEM;
 
        mutex_lock(&ext4_li_mtx);
 
@@ -2944,14 +2956,10 @@ static int ext4_register_li_request(struct super_block *sb,
                if (ret)
                        goto out;
        }
-
-       mutex_unlock(&ext4_li_mtx);
-
 out:
-       if (ret) {
-               mutex_unlock(&ext4_li_mtx);
+       mutex_unlock(&ext4_li_mtx);
+       if (ret)
                kfree(elr);
-       }
        return ret;
 }
 
@@ -3024,8 +3032,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                sbi->s_sectors_written_start =
                        part_stat_read(sb->s_bdev->bd_part, sectors[1]);
 
-       unlock_kernel();
-
        /* Cleanup superblock name */
        for (cp = sb->s_id; (cp = strchr(cp, '/'));)
                *cp = '!';
@@ -3268,15 +3274,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         * Test whether we have more sectors than will fit in sector_t,
         * and whether the max offset is addressable by the page cache.
         */
-       if ((ext4_blocks_count(es) >
-            (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
-           (ext4_blocks_count(es) >
-            (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
+       err = generic_check_addressable(sb->s_blocksize_bits,
+                                       ext4_blocks_count(es));
+       if (err) {
                ext4_msg(sb, KERN_ERR, "filesystem"
                         " too large to mount safely on this system");
                if (sizeof(sector_t) < 8)
                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
-               ret = -EFBIG;
+               ret = err;
                goto failed_mount;
        }
 
@@ -3361,6 +3366,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
 
+       err = percpu_counter_init(&sbi->s_freeblocks_counter,
+                       ext4_count_free_blocks(sb));
+       if (!err) {
+               err = percpu_counter_init(&sbi->s_freeinodes_counter,
+                               ext4_count_free_inodes(sb));
+       }
+       if (!err) {
+               err = percpu_counter_init(&sbi->s_dirs_counter,
+                               ext4_count_dirs(sb));
+       }
+       if (!err) {
+               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+       }
+       if (err) {
+               ext4_msg(sb, KERN_ERR, "insufficient memory");
+               goto failed_mount3;
+       }
+
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_max_writeback_mb_bump = 128;
 
@@ -3459,22 +3482,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 
-no_journal:
-       err = percpu_counter_init(&sbi->s_freeblocks_counter,
-                                 ext4_count_free_blocks(sb));
-       if (!err)
-               err = percpu_counter_init(&sbi->s_freeinodes_counter,
-                                         ext4_count_free_inodes(sb));
-       if (!err)
-               err = percpu_counter_init(&sbi->s_dirs_counter,
-                                         ext4_count_dirs(sb));
-       if (!err)
-               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
-       if (err) {
-               ext4_msg(sb, KERN_ERR, "insufficient memory");
-               goto failed_mount_wq;
-       }
+       /*
+        * The journal may have updated the bg summary counts, so we
+        * need to update the global counters.
+        */
+       percpu_counter_set(&sbi->s_freeblocks_counter,
+                          ext4_count_free_blocks(sb));
+       percpu_counter_set(&sbi->s_freeinodes_counter,
+                          ext4_count_free_inodes(sb));
+       percpu_counter_set(&sbi->s_dirs_counter,
+                          ext4_count_dirs(sb));
+       percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
 
+no_journal:
        EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
        if (!EXT4_SB(sb)->dio_unwritten_wq) {
                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3607,7 +3627,6 @@ no_journal:
        if (es->s_error_count)
                mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
 
-       lock_kernel();
        kfree(orig_data);
        return 0;
 
@@ -3625,10 +3644,6 @@ failed_mount_wq:
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
-       percpu_counter_destroy(&sbi->s_freeblocks_counter);
-       percpu_counter_destroy(&sbi->s_freeinodes_counter);
-       percpu_counter_destroy(&sbi->s_dirs_counter);
-       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount3:
        if (sbi->s_flex_groups) {
                if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3636,6 +3651,10 @@ failed_mount3:
                else
                        kfree(sbi->s_flex_groups);
        }
+       percpu_counter_destroy(&sbi->s_freeblocks_counter);
+       percpu_counter_destroy(&sbi->s_freeinodes_counter);
+       percpu_counter_destroy(&sbi->s_dirs_counter);
+       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
@@ -3654,7 +3673,6 @@ out_fail:
        sb->s_fs_info = NULL;
        kfree(sbi->s_blockgroup_lock);
        kfree(sbi);
-       lock_kernel();
 out_free_orig:
        kfree(orig_data);
        return ret;
@@ -3964,13 +3982,11 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        else
                es->s_kbytes_written =
                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
-       if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
-               ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
-                                       &EXT4_SB(sb)->s_freeblocks_counter));
-       if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
-               es->s_free_inodes_count =
-                       cpu_to_le32(percpu_counter_sum_positive(
-                                       &EXT4_SB(sb)->s_freeinodes_counter));
+       ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+                                          &EXT4_SB(sb)->s_freeblocks_counter));
+       es->s_free_inodes_count =
+               cpu_to_le32(percpu_counter_sum_positive(
+                               &EXT4_SB(sb)->s_freeinodes_counter));
        sb->s_dirt = 0;
        BUFFER_TRACE(sbh, "marking dirty");
        mark_buffer_dirty(sbh);
@@ -4166,8 +4182,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 #endif
        char *orig_data = kstrdup(data, GFP_KERNEL);
 
-       lock_kernel();
-
        /* Store the original options */
        lock_super(sb);
        old_sb_flags = sb->s_flags;
@@ -4315,7 +4329,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                        kfree(old_opts.s_qf_names[i]);
 #endif
        unlock_super(sb);
-       unlock_kernel();
        if (enable_quota)
                dquot_resume(sb, -1);
 
@@ -4341,7 +4354,6 @@ restore_opts:
        }
 #endif
        unlock_super(sb);
-       unlock_kernel();
        kfree(orig_data);
        return err;
 }
@@ -4575,12 +4587,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 
 static int ext4_quota_off(struct super_block *sb, int type)
 {
-       /* Force all delayed allocation blocks to be allocated */
-       if (test_opt(sb, DELALLOC)) {
-               down_read(&sb->s_umount);
+       /* Force all delayed allocation blocks to be allocated.
+        * Caller already holds s_umount sem */
+       if (test_opt(sb, DELALLOC))
                sync_filesystem(sb);
-               up_read(&sb->s_umount);
-       }
 
        return dquot_quota_off(sb, type);
 }
@@ -4686,17 +4696,17 @@ out:
 
 #endif
 
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
-                      const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+                      const char *dev_name, void *data)
 {
-       return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
+       return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
 }
 
 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 static struct file_system_type ext2_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext2",
-       .get_sb         = ext4_get_sb,
+       .mount          = ext4_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };
@@ -4741,7 +4751,7 @@ static inline void unregister_as_ext3(void) { }
 static struct file_system_type ext4_fs_type = {
        .owner          = THIS_MODULE,
        .name           = "ext4",
-       .get_sb         = ext4_get_sb,
+       .mount          = ext4_mount,
        .kill_sb        = kill_block_super,
        .fs_flags       = FS_REQUIRES_DEV,
 };