[PATCH] md: fix various bugs with aligned reads in RAID5
authorNeil Brown <neilb@suse.de>
Thu, 8 Feb 2007 22:20:29 +0000 (14:20 -0800)
committerLinus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 9 Feb 2007 17:25:46 +0000 (09:25 -0800)
It is possible for raid5 to be sent a bio that is too big for an underlying
device.  So if it is a READ that we pass stright down to a device, it will
fail and confuse RAID5.

So in 'chunk_aligned_read' we check that the bio fits within the parameters
for the target device and if it doesn't fit, fall back on reading through
the stripe cache and making lots of one-page requests.

Note that this is the earliest time we can check against the device because
earlier we don't have a lock on the device, so it could change underneath
us.

Also, the code for handling a retry through the cache when a read fails has
not been tested and was badly broken.  This patch fixes that code.

Signed-off-by: Neil Brown <neilb@suse.de>
Cc: "Kai" <epimetreus@fastmail.fm>
Cc: <stable@suse.de>
Cc: <org@suse.de>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
block/ll_rw_blk.c
drivers/md/raid5.c

index fb67897..38c293b 100644 (file)
@@ -1264,7 +1264,7 @@ new_hw_segment:
        bio->bi_hw_segments = nr_hw_segs;
        bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
-
+EXPORT_SYMBOL(blk_recount_segments);
 
 static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
                                   struct bio *nxt)
index 467c169..11c3d7b 100644 (file)
@@ -2620,7 +2620,7 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf)
        }
        bi = conf->retry_read_aligned_list;
        if(bi) {
-               conf->retry_read_aligned = bi->bi_next;
+               conf->retry_read_aligned_list = bi->bi_next;
                bi->bi_next = NULL;
                bi->bi_phys_segments = 1; /* biased count of active stripes */
                bi->bi_hw_segments = 0; /* count of processed stripes */
@@ -2669,6 +2669,27 @@ static int raid5_align_endio(struct bio *bi, unsigned int bytes, int error)
        return 0;
 }
 
+static int bio_fits_rdev(struct bio *bi)
+{
+       request_queue_t *q = bdev_get_queue(bi->bi_bdev);
+
+       if ((bi->bi_size>>9) > q->max_sectors)
+               return 0;
+       blk_recount_segments(q, bi);
+       if (bi->bi_phys_segments > q->max_phys_segments ||
+           bi->bi_hw_segments > q->max_hw_segments)
+               return 0;
+
+       if (q->merge_bvec_fn)
+               /* it's too hard to apply the merge_bvec_fn at this stage,
+                * just just give up
+                */
+               return 0;
+
+       return 1;
+}
+
+
 static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
 {
        mddev_t *mddev = q->queuedata;
@@ -2715,6 +2736,13 @@ static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
                align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
                align_bi->bi_sector += rdev->data_offset;
 
+               if (!bio_fits_rdev(align_bi)) {
+                       /* too big in some way */
+                       bio_put(align_bi);
+                       rdev_dec_pending(rdev, mddev);
+                       return 0;
+               }
+
                spin_lock_irq(&conf->device_lock);
                wait_event_lock_irq(conf->wait_for_stripe,
                                    conf->quiesce == 0,
@@ -3107,7 +3135,9 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
        last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
 
        for (; logical_sector < last_sector;
-            logical_sector += STRIPE_SECTORS, scnt++) {
+            logical_sector += STRIPE_SECTORS,
+                    sector += STRIPE_SECTORS,
+                    scnt++) {
 
                if (scnt < raid_bio->bi_hw_segments)
                        /* already done this stripe */
@@ -3123,7 +3153,13 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
                }
 
                set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
-               add_stripe_bio(sh, raid_bio, dd_idx, 0);
+               if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
+                       release_stripe(sh);
+                       raid_bio->bi_hw_segments = scnt;
+                       conf->retry_read_aligned = raid_bio;
+                       return handled;
+               }
+
                handle_stripe(sh, NULL);
                release_stripe(sh);
                handled++;