ext4: Save error information to the superblock for analysis
authorTheodore Ts'o <tytso@mit.edu>
Tue, 27 Jul 2010 15:56:03 +0000 (11:56 -0400)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 27 Jul 2010 15:56:03 +0000 (11:56 -0400)
Save number of file system errors, and the time function name, line
number, block number, and inode number of the first and most recent
errors reported on the file system in the superblock.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
fs/ext4/block_validity.c
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.c
fs/ext4/inode.c
fs/ext4/super.c

index 5b6973f..3db5084 100644 (file)
@@ -229,16 +229,20 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
 
        if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
            (start_blk + count < start_blk) ||
-           (start_blk + count > ext4_blocks_count(sbi->s_es)))
+           (start_blk + count > ext4_blocks_count(sbi->s_es))) {
+               sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
                return 0;
+       }
        while (n) {
                entry = rb_entry(n, struct ext4_system_zone, node);
                if (start_blk + count - 1 < entry->start_blk)
                        n = n->rb_left;
                else if (start_blk >= (entry->start_blk + entry->count))
                        n = n->rb_right;
-               else
+               else {
+                       sbi->s_es->s_last_error_block = cpu_to_le64(start_blk);
                        return 0;
+               }
        }
        return 1;
 }
index 0889381..6b96125 100644 (file)
@@ -1011,9 +1011,24 @@ struct ext4_super_block {
                                              snapshot's future use */
        __le32  s_snapshot_list;        /* inode number of the head of the
                                           on-disk snapshot list */
-       __u32   s_reserved[155];        /* Padding to the end of the block */
+#define EXT4_S_ERR_START offsetof(struct ext4_super_block, s_error_count)
+       __le32  s_error_count;          /* number of fs errors */
+       __le32  s_first_error_time;     /* first time an error happened */
+       __le32  s_first_error_ino;      /* inode involved in first error */
+       __le64  s_first_error_block;    /* block involved of first error */
+       __u8    s_first_error_func[32]; /* function where the error happened */
+       __le32  s_first_error_line;     /* line number where error happened */
+       __le32  s_last_error_time;      /* most recent time of an error */
+       __le32  s_last_error_ino;       /* inode involved in last error */
+       __le32  s_last_error_line;      /* line number where error happened */
+       __le64  s_last_error_block;     /* block involved of last error */
+       __u8    s_last_error_func[32];  /* function where the error happened */
+#define EXT4_S_ERR_END offsetof(struct ext4_super_block, s_reserved)
+       __le32   s_reserved[128];        /* Padding to the end of the block */
 };
 
+#define EXT4_S_ERR_LEN (EXT4_S_ERR_END - EXT4_S_ERR_START)
+
 #ifdef __KERNEL__
 
 /*
index 23425cd..6e272ef 100644 (file)
@@ -134,6 +134,11 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
                if (inode && inode_needs_sync(inode)) {
                        sync_dirty_buffer(bh);
                        if (buffer_req(bh) && !buffer_uptodate(bh)) {
+                               struct ext4_super_block *es;
+
+                               es = EXT4_SB(inode->i_sb)->s_es;
+                               es->s_last_error_block =
+                                       cpu_to_le64(bh->b_blocknr);
                                ext4_error_inode(inode, where, line,
                                                 bh->b_blocknr,
                                        "IO error syncing itable block");
index 69ea663..755ba86 100644 (file)
@@ -341,6 +341,7 @@ static int __ext4_check_blockref(const char *function, unsigned int line,
                                 struct inode *inode,
                                 __le32 *p, unsigned int max)
 {
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
        __le32 *bref = p;
        unsigned int blk;
 
@@ -349,6 +350,7 @@ static int __ext4_check_blockref(const char *function, unsigned int line,
                if (blk &&
                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                    blk, 1))) {
+                       es->s_last_error_block = cpu_to_le64(blk);
                        ext4_error_inode(inode, function, line, blk,
                                         "invalid block");
                        return -EIO;
index bcf74b3..a94d3f5 100644 (file)
@@ -307,6 +307,35 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
        jbd2_journal_abort_handle(handle);
 }
 
+static void __save_error_info(struct super_block *sb, const char *func,
+                           unsigned int line)
+{
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+       es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+       es->s_last_error_time = cpu_to_le32(get_seconds());
+       strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+       es->s_last_error_line = cpu_to_le32(line);
+       if (!es->s_first_error_time) {
+               es->s_first_error_time = es->s_last_error_time;
+               strncpy(es->s_first_error_func, func,
+                       sizeof(es->s_first_error_func));
+               es->s_first_error_line = cpu_to_le32(line);
+               es->s_first_error_ino = es->s_last_error_ino;
+               es->s_first_error_block = es->s_last_error_block;
+       }
+       es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
+}
+
+static void save_error_info(struct super_block *sb, const char *func,
+                           unsigned int line)
+{
+       __save_error_info(sb, func, line);
+       ext4_commit_super(sb, 1);
+}
+
+
 /* Deal with the reporting of failure conditions on a filesystem such as
  * inconsistencies detected or read IO failures.
  *
@@ -324,11 +353,6 @@ void ext4_journal_abort_handle(const char *caller, unsigned int line,
 
 static void ext4_handle_error(struct super_block *sb)
 {
-       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-
-       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-       es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
-
        if (sb->s_flags & MS_RDONLY)
                return;
 
@@ -343,7 +367,6 @@ static void ext4_handle_error(struct super_block *sb)
                ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
                sb->s_flags |= MS_RDONLY;
        }
-       ext4_commit_super(sb, 1);
        if (test_opt(sb, ERRORS_PANIC))
                panic("EXT4-fs (device %s): panic forced after error\n",
                        sb->s_id);
@@ -369,7 +392,11 @@ void ext4_error_inode(struct inode *inode, const char *function,
                      const char *fmt, ...)
 {
        va_list args;
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
 
+       es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+       es->s_last_error_block = cpu_to_le64(block);
+       save_error_info(inode->i_sb, function, line);
        va_start(args, fmt);
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ",
               inode->i_sb->s_id, function, line, inode->i_ino);
@@ -387,9 +414,13 @@ void ext4_error_file(struct file *file, const char *function,
                     unsigned int line, const char *fmt, ...)
 {
        va_list args;
+       struct ext4_super_block *es;
        struct inode *inode = file->f_dentry->d_inode;
        char pathname[80], *path;
 
+       es = EXT4_SB(inode->i_sb)->s_es;
+       es->s_last_error_ino = cpu_to_le32(inode->i_ino);
+       save_error_info(inode->i_sb, function, line);
        va_start(args, fmt);
        path = d_path(&(file->f_path), pathname, sizeof(pathname));
        if (!path)
@@ -459,6 +490,7 @@ void __ext4_std_error(struct super_block *sb, const char *function,
        errstr = ext4_decode_error(sb, errno, nbuf);
        printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
               sb->s_id, function, line, errstr);
+       save_error_info(sb, function, line);
 
        ext4_handle_error(sb);
 }
@@ -478,6 +510,7 @@ void __ext4_abort(struct super_block *sb, const char *function,
 {
        va_list args;
 
+       save_error_info(sb, function, line);
        va_start(args, fmt);
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id,
               function, line);
@@ -485,18 +518,16 @@ void __ext4_abort(struct super_block *sb, const char *function,
        printk("\n");
        va_end(args);
 
+       if ((sb->s_flags & MS_RDONLY) == 0) {
+               ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
+               sb->s_flags |= MS_RDONLY;
+               EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
+               if (EXT4_SB(sb)->s_journal)
+                       jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
+               save_error_info(sb, function, line);
+       }
        if (test_opt(sb, ERRORS_PANIC))
                panic("EXT4-fs panic from previous error\n");
-
-       if (sb->s_flags & MS_RDONLY)
-               return;
-
-       ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
-       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-       sb->s_flags |= MS_RDONLY;
-       EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED;
-       if (EXT4_SB(sb)->s_journal)
-               jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
 void ext4_msg (struct super_block * sb, const char *prefix,
@@ -534,6 +565,9 @@ __acquires(bitlock)
        va_list args;
        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
+       es->s_last_error_ino = cpu_to_le32(ino);
+       es->s_last_error_block = cpu_to_le64(block);
+       __save_error_info(sb, function, line);
        va_start(args, fmt);
        printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u",
               sb->s_id, function, line, grp);
@@ -546,11 +580,10 @@ __acquires(bitlock)
        va_end(args);
 
        if (test_opt(sb, ERRORS_CONT)) {
-               EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
-               es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
                ext4_commit_super(sb, 0);
                return;
        }
+
        ext4_unlock_group(sb, grp);
        ext4_handle_error(sb);
        /*
@@ -3332,8 +3365,17 @@ static int ext4_load_journal(struct super_block *sb,
 
        if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
                err = jbd2_journal_wipe(journal, !really_read_only);
-       if (!err)
+       if (!err) {
+               char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
+               if (save)
+                       memcpy(save, ((char *) es) +
+                              EXT4_S_ERR_START, EXT4_S_ERR_LEN);
                err = jbd2_journal_load(journal);
+               if (save)
+                       memcpy(((char *) es) + EXT4_S_ERR_START,
+                              save, EXT4_S_ERR_LEN);
+               kfree(save);
+       }
 
        if (err) {
                ext4_msg(sb, KERN_ERR, "error loading journal");