Btrfs: deal with read errors on extent buffers differently
authorJosef Bacik <josef@redhat.com>
Tue, 27 Mar 2012 01:57:36 +0000 (21:57 -0400)
committerChris Mason <chris.mason@oracle.com>
Tue, 27 Mar 2012 01:57:36 +0000 (21:57 -0400)
Since we need to read and write extent buffers in their entirety we can't use
the normal bio_readpage_error stuff since it only works on a per page basis.  So
instead make it so that if we see an io error in endio we just mark the eb as
having an IO error and then in btree_read_extent_buffer_pages we will manually
try other mirrors and then overwrite the bad mirror if we find a good copy.
This works with larger than page size blocks.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/disk-io.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h

index 53c5ea7..6107b69 100644 (file)
@@ -360,9 +360,11 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                                          u64 start, u64 parent_transid)
 {
        struct extent_io_tree *io_tree;
+       int failed = 0;
        int ret;
        int num_copies = 0;
        int mirror_num = 0;
+       int failed_mirror = 0;
 
        clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
        io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
@@ -371,7 +373,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                                               WAIT_COMPLETE,
                                               btree_get_extent, mirror_num);
                if (!ret && !verify_parent_transid(io_tree, eb, parent_transid))
-                       return ret;
+                       break;
 
                /*
                 * This buffer's crc is fine, but its contents are corrupted, so
@@ -379,18 +381,31 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                 * any less wrong.
                 */
                if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
-                       return ret;
+                       break;
+
+               if (!failed_mirror) {
+                       failed = 1;
+                       printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
+                       failed_mirror = eb->failed_mirror;
+               }
 
                num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
                                              eb->start, eb->len);
                if (num_copies == 1)
-                       return ret;
+                       break;
 
                mirror_num++;
+               if (mirror_num == failed_mirror)
+                       mirror_num++;
+
                if (mirror_num > num_copies)
-                       return ret;
+                       break;
        }
-       return -EIO;
+
+       if (failed && !ret)
+               repair_eb_io_failure(root, eb, failed_mirror);
+
+       return ret;
 }
 
 /*
@@ -575,6 +590,11 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        if (!reads_done)
                goto err;
 
+       if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
+               ret = -EIO;
+               goto err;
+       }
+
        found_start = btrfs_header_bytenr(eb);
        if (found_start != eb->start) {
                printk_ratelimited(KERN_INFO "btrfs bad tree block start "
@@ -626,21 +646,16 @@ out:
        return ret;
 }
 
-static int btree_io_failed_hook(struct bio *failed_bio,
-                        struct page *page, u64 start, u64 end,
-                        int mirror_num, struct extent_state *state)
+static int btree_io_failed_hook(struct page *page, int failed_mirror)
 {
        struct extent_buffer *eb;
        struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
 
        eb = (struct extent_buffer *)page->private;
-       if (page != eb->pages[0])
-               return -EIO;
-
-       if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
-               clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
+       set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+       eb->failed_mirror = failed_mirror;
+       if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
                btree_readahead_hook(root, eb, eb->start, -EIO);
-       }
        return -EIO;    /* we fixed nothing */
 }
 
index b71cc45..49a3685 100644 (file)
@@ -1915,6 +1915,26 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
        return 0;
 }
 
+int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
+                        int mirror_num)
+{
+       struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+       u64 start = eb->start;
+       unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
+       int ret;
+
+       for (i = 0; i < num_pages; i++) {
+               struct page *p = extent_buffer_page(eb, i);
+               ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
+                                       start, p, mirror_num);
+               if (ret)
+                       break;
+               start += PAGE_CACHE_SIZE;
+       }
+
+       return ret;
+}
+
 /*
  * each time an IO finishes, we do a fast check in the IO failure tree
  * to see if we need to process or clean up an io_failure_record
@@ -2261,6 +2281,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        u64 start;
        u64 end;
        int whole_page;
+       int failed_mirror;
        int ret;
 
        if (err)
@@ -2307,9 +2328,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                        else
                                clean_io_failure(start, page);
                }
-               if (!uptodate) {
-                       int failed_mirror;
+
+               if (!uptodate)
                        failed_mirror = (int)(unsigned long)bio->bi_bdev;
+
+               if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
+                       ret = tree->ops->readpage_io_failed_hook(page, failed_mirror);
+                       if (!ret && !err &&
+                           test_bit(BIO_UPTODATE, &bio->bi_flags))
+                               uptodate = 1;
+               } else if (!uptodate) {
                        /*
                         * The generic bio_readpage_error handles errors the
                         * following way: If possible, new read requests are
@@ -2323,7 +2351,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                        ret = bio_readpage_error(bio, page, start, end,
                                                        failed_mirror, NULL);
                        if (ret == 0) {
-error_handled:
                                uptodate =
                                        test_bit(BIO_UPTODATE, &bio->bi_flags);
                                if (err)
@@ -2331,13 +2358,6 @@ error_handled:
                                uncache_state(&cached);
                                continue;
                        }
-                       if (tree->ops && tree->ops->readpage_io_failed_hook) {
-                               ret = tree->ops->readpage_io_failed_hook(
-                                                       bio, page, start, end,
-                                                       failed_mirror, state);
-                               if (ret == 0)
-                                       goto error_handled;
-                       }
                }
 
                if (uptodate && tree->track_uptodate) {
@@ -4396,6 +4416,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
                goto unlock_exit;
        }
 
+       clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
+       eb->failed_mirror = 0;
        atomic_set(&eb->io_pages, num_reads);
        for (i = start_i; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
index 489d794..38c1af7 100644 (file)
@@ -58,6 +58,7 @@
 #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
 
 struct extent_state;
+struct btrfs_root;
 
 typedef        int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
                                       struct bio *bio, int mirror_num,
@@ -73,9 +74,7 @@ struct extent_io_ops {
                              size_t size, struct bio *bio,
                              unsigned long bio_flags);
        int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
-       int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
-                                      u64 start, u64 end, int failed_mirror,
-                                      struct extent_state *state);
+       int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
        int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
                                        u64 start, u64 end,
                                       struct extent_state *state);
@@ -136,6 +135,7 @@ struct extent_buffer {
        spinlock_t refs_lock;
        atomic_t refs;
        atomic_t io_pages;
+       int failed_mirror;
        struct list_head leak_list;
        struct rcu_head rcu_head;
        pid_t lock_owner;
@@ -327,4 +327,6 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
                        u64 length, u64 logical, struct page *page,
                        int mirror_num);
 int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
+int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
+                        int mirror_num);
 #endif