ext4: sanity check the block and cluster size at mount time
[pandora-kernel.git] / fs / ext4 / super.c
index a93486e..e3ccddb 100644 (file)
@@ -463,9 +463,13 @@ static void ext4_handle_error(struct super_block *sb)
                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                sb->s_flags |= MS_RDONLY;
        }
-       if (test_opt(sb, ERRORS_PANIC))
+       if (test_opt(sb, ERRORS_PANIC)) {
+               if (EXT4_SB(sb)->s_journal &&
+                 !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+                       return;
                panic("EXT4-fs (device %s): panic forced after error\n",
                        sb->s_id);
+       }
 }
 
 void __ext4_error(struct super_block *sb, const char *function,
@@ -628,8 +632,12 @@ void __ext4_abort(struct super_block *sb, const char *function,
                        jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
                save_error_info(sb, function, line);
        }
-       if (test_opt(sb, ERRORS_PANIC))
+       if (test_opt(sb, ERRORS_PANIC)) {
+               if (EXT4_SB(sb)->s_journal &&
+                 !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+                       return;
                panic("EXT4-fs panic from previous error\n");
+       }
 }
 
 void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...)
@@ -819,7 +827,7 @@ static void ext4_put_super(struct super_block *sb)
                        ext4_abort(sb, "Couldn't clean up the journal");
        }
 
-       del_timer(&sbi->s_err_report);
+       del_timer_sync(&sbi->s_err_report);
        ext4_release_system_zone(sb);
        ext4_mb_release(sb);
        ext4_ext_release(sb);
@@ -857,6 +865,7 @@ static void ext4_put_super(struct super_block *sb)
                dump_orphan_list(sb, sbi);
        J_ASSERT(list_empty(&sbi->s_orphan));
 
+       sync_blockdev(sb->s_bdev);
        invalidate_bdev(sb->s_bdev);
        if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
                /*
@@ -904,6 +913,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei->i_reserved_meta_blocks = 0;
        ei->i_allocated_meta_blocks = 0;
        ei->i_da_metadata_calc_len = 0;
+       ei->i_da_metadata_calc_last_lblock = 0;
        spin_lock_init(&(ei->i_block_reservation_lock));
 #ifdef CONFIG_QUOTA
        ei->i_reserved_quota = 0;
@@ -1016,10 +1026,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
        }
 
        if (sbi->s_qf_names[USRQUOTA])
-               seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
+               seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
 
        if (sbi->s_qf_names[GRPQUOTA])
-               seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
+               seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
 
        if (test_opt(sb, USRQUOTA))
                seq_puts(seq, ",usrquota");
@@ -1184,7 +1194,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
         * Currently we don't know the generation for parent directory, so
         * a generation of 0 means "accept any"
         */
-       inode = ext4_iget(sb, ino);
+       inode = ext4_iget_normal(sb, ino);
        if (IS_ERR(inode))
                return ERR_CAST(inode);
        if (generation && inode->i_generation != generation) {
@@ -1662,8 +1672,6 @@ static int parse_options(char *options, struct super_block *sb,
                                return 0;
                        if (option < 0)
                                return 0;
-                       if (option == 0)
-                               option = EXT4_DEF_MAX_BATCH_TIME;
                        sbi->s_max_batch_time = option;
                        break;
                case Opt_min_batch_time:
@@ -1932,15 +1940,18 @@ set_qf_format:
                                        "not specified");
                        return 0;
                }
-       } else {
-               if (sbi->s_jquota_fmt) {
-                       ext4_msg(sb, KERN_ERR, "journaled quota format "
-                                       "specified with no journaling "
-                                       "enabled");
+       }
+#endif
+       if (test_opt(sb, DIOREAD_NOLOCK)) {
+               int blocksize =
+                       BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+
+               if (blocksize < PAGE_CACHE_SIZE) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "dioread_nolock if block size != PAGE_SIZE");
                        return 0;
                }
        }
-#endif
        return 1;
 }
 
@@ -2036,8 +2047,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
                flex_group = ext4_flex_group(sbi, i);
                atomic_add(ext4_free_inodes_count(sb, gdp),
                           &sbi->s_flex_groups[flex_group].free_inodes);
-               atomic_add(ext4_free_group_clusters(sb, gdp),
-                          &sbi->s_flex_groups[flex_group].free_clusters);
+               atomic64_add(ext4_free_group_clusters(sb, gdp),
+                            &sbi->s_flex_groups[flex_group].free_clusters);
                atomic_add(ext4_used_dirs_count(sb, gdp),
                           &sbi->s_flex_groups[flex_group].used_dirs);
        }
@@ -2086,6 +2097,7 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
 
 /* Called at mount-time, super-block is locked */
 static int ext4_check_descriptors(struct super_block *sb,
+                                 ext4_fsblk_t sb_block,
                                  ext4_group_t *first_not_zeroed)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2116,6 +2128,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        grp = i;
 
                block_bitmap = ext4_block_bitmap(sb, gdp);
+               if (block_bitmap == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Block bitmap for group %u overlaps "
+                                "superblock", i);
+               }
                if (block_bitmap < first_block || block_bitmap > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Block bitmap for group %u not in group "
@@ -2123,6 +2140,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        return 0;
                }
                inode_bitmap = ext4_inode_bitmap(sb, gdp);
+               if (inode_bitmap == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Inode bitmap for group %u overlaps "
+                                "superblock", i);
+               }
                if (inode_bitmap < first_block || inode_bitmap > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
                               "Inode bitmap for group %u not in group "
@@ -2130,6 +2152,11 @@ static int ext4_check_descriptors(struct super_block *sb,
                        return 0;
                }
                inode_table = ext4_inode_table(sb, gdp);
+               if (inode_table == sb_block) {
+                       ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+                                "Inode table for group %u overlaps "
+                                "superblock", i);
+               }
                if (inode_table < first_block ||
                    inode_table + sbi->s_itb_per_group - 1 > last_block) {
                        ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2235,6 +2262,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
        while (es->s_last_orphan) {
                struct inode *inode;
 
+               /*
+                * We may have encountered an error during cleanup; if
+                * so, skip the rest.
+                */
+               if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
+                       jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+                       es->s_last_orphan = 0;
+                       break;
+               }
+
                inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
                if (IS_ERR(inode)) {
                        es->s_last_orphan = 0;
@@ -2249,7 +2286,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
                                __func__, inode->i_ino, inode->i_size);
                        jbd_debug(2, "truncating inode %lu to %lld bytes\n",
                                  inode->i_ino, inode->i_size);
+                       mutex_lock(&inode->i_mutex);
                        ext4_truncate(inode);
+                       mutex_unlock(&inode->i_mutex);
                        nr_truncates++;
                } else {
                        ext4_msg(sb, KERN_DEBUG,
@@ -2713,10 +2752,11 @@ static void print_daily_error_info(unsigned long arg)
        es = sbi->s_es;
 
        if (es->s_error_count)
-               ext4_msg(sb, KERN_NOTICE, "error count: %u",
+               /* fsck newer than v1.41.13 is needed to clean this condition. */
+               ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
                         le32_to_cpu(es->s_error_count));
        if (es->s_first_error_time) {
-               printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+               printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
                       sb->s_id, le32_to_cpu(es->s_first_error_time),
                       (int) sizeof(es->s_first_error_func),
                       es->s_first_error_func,
@@ -2730,7 +2770,7 @@ static void print_daily_error_info(unsigned long arg)
                printk("\n");
        }
        if (es->s_last_error_time) {
-               printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+               printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
                       sb->s_id, le32_to_cpu(es->s_last_error_time),
                       (int) sizeof(es->s_last_error_func),
                       es->s_last_error_func,
@@ -3083,6 +3123,118 @@ static void ext4_destroy_lazyinit_thread(void)
        kthread_stop(ext4_lazyinit_task);
 }
 
+/*
+ * Note: calculating the overhead so we can be compatible with
+ * historical BSD practice is quite difficult in the face of
+ * clusters/bigalloc.  This is because multiple metadata blocks from
+ * different block group can end up in the same allocation cluster.
+ * Calculating the exact overhead in the face of clustered allocation
+ * requires either O(all block bitmaps) in memory or O(number of block
+ * groups**2) in time.  We will still calculate the superblock for
+ * older file systems --- and if we come across with a bigalloc file
+ * system with zero in s_overhead_clusters the estimate will be close to
+ * correct especially for very large cluster sizes --- but for newer
+ * file systems, it's better to calculate this figure once at mkfs
+ * time, and store it in the superblock.  If the superblock value is
+ * present (even for non-bigalloc file systems), we will use it.
+ */
+static int count_overhead(struct super_block *sb, ext4_group_t grp,
+                         char *buf)
+{
+       struct ext4_sb_info     *sbi = EXT4_SB(sb);
+       struct ext4_group_desc  *gdp;
+       ext4_fsblk_t            first_block, last_block, b;
+       ext4_group_t            i, ngroups = ext4_get_groups_count(sb);
+       int                     s, j, count = 0;
+
+       if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
+               return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+                       sbi->s_itb_per_group + 2);
+
+       first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
+               (grp * EXT4_BLOCKS_PER_GROUP(sb));
+       last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
+       for (i = 0; i < ngroups; i++) {
+               gdp = ext4_get_group_desc(sb, i, NULL);
+               b = ext4_block_bitmap(sb, gdp);
+               if (b >= first_block && b <= last_block) {
+                       ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
+                       count++;
+               }
+               b = ext4_inode_bitmap(sb, gdp);
+               if (b >= first_block && b <= last_block) {
+                       ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
+                       count++;
+               }
+               b = ext4_inode_table(sb, gdp);
+               if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
+                       for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
+                               int c = EXT4_B2C(sbi, b - first_block);
+                               ext4_set_bit(c, buf);
+                               count++;
+                       }
+               if (i != grp)
+                       continue;
+               s = 0;
+               if (ext4_bg_has_super(sb, grp)) {
+                       ext4_set_bit(s++, buf);
+                       count++;
+               }
+               for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
+                       ext4_set_bit(EXT4_B2C(sbi, s++), buf);
+                       count++;
+               }
+       }
+       if (!count)
+               return 0;
+       return EXT4_CLUSTERS_PER_GROUP(sb) -
+               ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
+}
+
+/*
+ * Compute the overhead and stash it in sbi->s_overhead
+ */
+int ext4_calculate_overhead(struct super_block *sb)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct ext4_super_block *es = sbi->s_es;
+       ext4_group_t i, ngroups = ext4_get_groups_count(sb);
+       ext4_fsblk_t overhead = 0;
+       char *buf = (char *) get_zeroed_page(GFP_KERNEL);
+
+       memset(buf, 0, PAGE_SIZE);
+       if (!buf)
+               return -ENOMEM;
+
+       /*
+        * Compute the overhead (FS structures).  This is constant
+        * for a given filesystem unless the number of block groups
+        * changes so we cache the previous value until it does.
+        */
+
+       /*
+        * All of the blocks before first_data_block are overhead
+        */
+       overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
+
+       /*
+        * Add the overhead found in each block group
+        */
+       for (i = 0; i < ngroups; i++) {
+               int blks;
+
+               blks = count_overhead(sb, i, buf);
+               overhead += blks;
+               if (blks)
+                       memset(buf, 0, PAGE_SIZE);
+               cond_resched();
+       }
+       sbi->s_overhead = overhead;
+       smp_wmb();
+       free_page((unsigned long) buf);
+       return 0;
+}
+
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 {
        char *orig_data = kstrdup(data, GFP_KERNEL);
@@ -3247,22 +3399,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                }
                if (test_opt(sb, DIOREAD_NOLOCK)) {
                        ext4_msg(sb, KERN_ERR, "can't mount with "
-                                "both data=journal and delalloc");
+                                "both data=journal and dioread_nolock");
                        goto failed_mount;
                }
                if (test_opt(sb, DELALLOC))
                        clear_opt(sb, DELALLOC);
        }
 
-       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
-       if (test_opt(sb, DIOREAD_NOLOCK)) {
-               if (blocksize < PAGE_SIZE) {
-                       ext4_msg(sb, KERN_ERR, "can't mount with "
-                                "dioread_nolock if block size != PAGE_SIZE");
-                       goto failed_mount;
-               }
-       }
-
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
                (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
 
@@ -3304,10 +3447,26 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (!ext4_feature_set_ok(sb, (sb->s_flags & MS_RDONLY)))
                goto failed_mount;
 
+       blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
        if (blocksize < EXT4_MIN_BLOCK_SIZE ||
            blocksize > EXT4_MAX_BLOCK_SIZE) {
                ext4_msg(sb, KERN_ERR,
-                      "Unsupported filesystem blocksize %d", blocksize);
+                      "Unsupported filesystem blocksize %d (%d log_block_size)",
+                        blocksize, le32_to_cpu(es->s_log_block_size));
+               goto failed_mount;
+       }
+       if (le32_to_cpu(es->s_log_block_size) >
+           (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+               ext4_msg(sb, KERN_ERR,
+                        "Invalid log block size: %u",
+                        le32_to_cpu(es->s_log_block_size));
+               goto failed_mount;
+       }
+
+       if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
+               ext4_msg(sb, KERN_ERR,
+                        "Number of reserved GDT blocks insanely large: %d",
+                        le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
                goto failed_mount;
        }
 
@@ -3417,6 +3576,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                 "block size (%d)", clustersize, blocksize);
                        goto failed_mount;
                }
+               if (le32_to_cpu(es->s_log_cluster_size) >
+                   (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+                       ext4_msg(sb, KERN_ERR,
+                                "Invalid log cluster size: %u",
+                                le32_to_cpu(es->s_log_cluster_size));
+                       goto failed_mount;
+               }
                sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
                        le32_to_cpu(es->s_log_block_size);
                sbi->s_clusters_per_group =
@@ -3539,7 +3705,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        goto failed_mount2;
                }
        }
-       if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
+       if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
                goto failed_mount2;
        }
@@ -3694,6 +3860,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);
 
 no_journal:
+       /*
+        * Get the # of file system overhead blocks from the
+        * superblock if present.
+        */
+       if (es->s_overhead_clusters)
+               sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
+       else {
+               ret = ext4_calculate_overhead(sb);
+               if (ret)
+                       goto failed_mount_wq;
+       }
+
        /*
         * The maximum number of concurrent works can be high and
         * concurrency isn't really necessary.  Limit it to 1.
@@ -3832,7 +4010,7 @@ failed_mount_wq:
                sbi->s_journal = NULL;
        }
 failed_mount3:
-       del_timer(&sbi->s_err_report);
+       del_timer_sync(&sbi->s_err_report);
        if (sbi->s_flex_groups)
                ext4_kvfree(sbi->s_flex_groups);
        percpu_counter_destroy(&sbi->s_freeclusters_counter);
@@ -4410,6 +4588,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
 
+       if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+               if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "both data=journal and delalloc");
+                       err = -EINVAL;
+                       goto restore_opts;
+               }
+               if (test_opt(sb, DIOREAD_NOLOCK)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "both data=journal and dioread_nolock");
+                       err = -EINVAL;
+                       goto restore_opts;
+               }
+       }
+
        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
                ext4_abort(sb, "Abort forced by user");
 
@@ -4527,7 +4720,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
        }
 
        ext4_setup_system_zone(sb);
-       if (sbi->s_journal == NULL)
+       if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
                ext4_commit_super(sb, 1);
 
 #ifdef CONFIG_QUOTA
@@ -4568,67 +4761,21 @@ restore_opts:
        return err;
 }
 
-/*
- * Note: calculating the overhead so we can be compatible with
- * historical BSD practice is quite difficult in the face of
- * clusters/bigalloc.  This is because multiple metadata blocks from
- * different block group can end up in the same allocation cluster.
- * Calculating the exact overhead in the face of clustered allocation
- * requires either O(all block bitmaps) in memory or O(number of block
- * groups**2) in time.  We will still calculate the superblock for
- * older file systems --- and if we come across with a bigalloc file
- * system with zero in s_overhead_clusters the estimate will be close to
- * correct especially for very large cluster sizes --- but for newer
- * file systems, it's better to calculate this figure once at mkfs
- * time, and store it in the superblock.  If the superblock value is
- * present (even for non-bigalloc file systems), we will use it.
- */
 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_super_block *es = sbi->s_es;
-       struct ext4_group_desc *gdp;
+       ext4_fsblk_t overhead = 0;
        u64 fsid;
        s64 bfree;
 
-       if (test_opt(sb, MINIX_DF)) {
-               sbi->s_overhead_last = 0;
-       } else if (es->s_overhead_clusters) {
-               sbi->s_overhead_last = le32_to_cpu(es->s_overhead_clusters);
-       } else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
-               ext4_group_t i, ngroups = ext4_get_groups_count(sb);
-               ext4_fsblk_t overhead = 0;
-
-               /*
-                * Compute the overhead (FS structures).  This is constant
-                * for a given filesystem unless the number of block groups
-                * changes so we cache the previous value until it does.
-                */
-
-               /*
-                * All of the blocks before first_data_block are
-                * overhead
-                */
-               overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
-
-               /*
-                * Add the overhead found in each block group
-                */
-               for (i = 0; i < ngroups; i++) {
-                       gdp = ext4_get_group_desc(sb, i, NULL);
-                       overhead += ext4_num_overhead_clusters(sb, i, gdp);
-                       cond_resched();
-               }
-               sbi->s_overhead_last = overhead;
-               smp_wmb();
-               sbi->s_blocks_last = ext4_blocks_count(es);
-       }
+       if (!test_opt(sb, MINIX_DF))
+               overhead = sbi->s_overhead;
 
        buf->f_type = EXT4_SUPER_MAGIC;
        buf->f_bsize = sb->s_blocksize;
-       buf->f_blocks = (ext4_blocks_count(es) -
-                        EXT4_C2B(sbi, sbi->s_overhead_last));
+       buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, sbi->s_overhead);
        bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
                percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
        /* prevent underflow in case that few free space is available */