md/raid5: Add support for new layouts for raid5 and raid6.
authorNeilBrown <neilb@suse.de>
Tue, 31 Mar 2009 03:39:38 +0000 (14:39 +1100)
committerNeilBrown <neilb@suse.de>
Tue, 31 Mar 2009 03:39:38 +0000 (14:39 +1100)
DDF uses different layouts for P and Q blocks than current md/raid6
so add those that are missing.
Also add support for RAID6 layouts that are identical to various
raid5 layouts with the simple addition of one device to hold all of
the 'Q' blocks.
Finally add 'raid5' layouts to match raid4.
These last to will allow online level conversion.

Note that this does not provide correct support for DDF/raid6 yet
as the order in which data blocks are summed to produce the Q block
is significant and different between current md code and DDF
requirements.

Signed-off-by: NeilBrown <neilb@suse.de>
drivers/md/raid5.c
drivers/md/raid5.h

index 2e2e64f..c1d94ed 100644 (file)
@@ -1098,7 +1098,7 @@ static void shrink_stripes(raid5_conf_t *conf)
 
 static void raid5_end_read_request(struct bio * bi, int error)
 {
-       struct stripe_head *sh = bi->bi_private;
+       struct stripe_head *sh = bi->bi_private;
        raid5_conf_t *conf = sh->raid_conf;
        int disks = sh->disks, i;
        int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1180,7 +1180,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
 
 static void raid5_end_write_request(struct bio *bi, int error)
 {
-       struct stripe_head *sh = bi->bi_private;
+       struct stripe_head *sh = bi->bi_private;
        raid5_conf_t *conf = sh->raid_conf;
        int disks = sh->disks, i;
        int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags);
@@ -1320,20 +1320,27 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
                        pd_idx = stripe % raid_disks;
                        *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
                        break;
+               case ALGORITHM_PARITY_0:
+                       pd_idx = 0;
+                       (*dd_idx)++;
+                       break;
+               case ALGORITHM_PARITY_N:
+                       pd_idx = data_disks;
+                       break;
                default:
                        printk(KERN_ERR "raid5: unsupported algorithm %d\n",
                                conf->algorithm);
+                       BUG();
                }
                break;
        case 6:
 
-               /**** FIX THIS ****/
                switch (conf->algorithm) {
                case ALGORITHM_LEFT_ASYMMETRIC:
                        pd_idx = raid_disks - 1 - (stripe % raid_disks);
                        qd_idx = pd_idx + 1;
                        if (pd_idx == raid_disks-1) {
-                               (*dd_idx)++;    /* Q D D D P */
+                               (*dd_idx)++;    /* Q D D D P */
                                qd_idx = 0;
                        } else if (*dd_idx >= pd_idx)
                                (*dd_idx) += 2; /* D D P Q D */
@@ -1342,7 +1349,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
                        pd_idx = stripe % raid_disks;
                        qd_idx = pd_idx + 1;
                        if (pd_idx == raid_disks-1) {
-                               (*dd_idx)++;    /* Q D D D P */
+                               (*dd_idx)++;    /* Q D D D P */
                                qd_idx = 0;
                        } else if (*dd_idx >= pd_idx)
                                (*dd_idx) += 2; /* D D P Q D */
@@ -1357,9 +1364,89 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector,
                        qd_idx = (pd_idx + 1) % raid_disks;
                        *dd_idx = (pd_idx + 2 + *dd_idx) % raid_disks;
                        break;
+
+               case ALGORITHM_PARITY_0:
+                       pd_idx = 0;
+                       qd_idx = 1;
+                       (*dd_idx) += 2;
+                       break;
+               case ALGORITHM_PARITY_N:
+                       pd_idx = data_disks;
+                       qd_idx = data_disks + 1;
+                       break;
+
+               case ALGORITHM_ROTATING_ZERO_RESTART:
+                       /* Exactly the same as RIGHT_ASYMMETRIC, but or
+                        * of blocks for computing Q is different.
+                        */
+                       pd_idx = stripe % raid_disks;
+                       qd_idx = pd_idx + 1;
+                       if (pd_idx == raid_disks-1) {
+                               (*dd_idx)++;    /* Q D D D P */
+                               qd_idx = 0;
+                       } else if (*dd_idx >= pd_idx)
+                               (*dd_idx) += 2; /* D D P Q D */
+                       break;
+
+               case ALGORITHM_ROTATING_N_RESTART:
+                       /* Same a left_asymmetric, by first stripe is
+                        * D D D P Q  rather than
+                        * Q D D D P
+                        */
+                       pd_idx = raid_disks - 1 - ((stripe + 1) % raid_disks);
+                       qd_idx = pd_idx + 1;
+                       if (pd_idx == raid_disks-1) {
+                               (*dd_idx)++;    /* Q D D D P */
+                               qd_idx = 0;
+                       } else if (*dd_idx >= pd_idx)
+                               (*dd_idx) += 2; /* D D P Q D */
+                       break;
+
+               case ALGORITHM_ROTATING_N_CONTINUE:
+                       /* Same as left_symmetric but Q is before P */
+                       pd_idx = raid_disks - 1 - (stripe % raid_disks);
+                       qd_idx = (pd_idx + raid_disks - 1) % raid_disks;
+                       *dd_idx = (pd_idx + 1 + *dd_idx) % raid_disks;
+                       break;
+
+               case ALGORITHM_LEFT_ASYMMETRIC_6:
+                       /* RAID5 left_asymmetric, with Q on last device */
+                       pd_idx = data_disks - stripe % (raid_disks-1);
+                       if (*dd_idx >= pd_idx)
+                               (*dd_idx)++;
+                       qd_idx = raid_disks - 1;
+                       break;
+
+               case ALGORITHM_RIGHT_ASYMMETRIC_6:
+                       pd_idx = stripe % (raid_disks-1);
+                       if (*dd_idx >= pd_idx)
+                               (*dd_idx)++;
+                       qd_idx = raid_disks - 1;
+                       break;
+
+               case ALGORITHM_LEFT_SYMMETRIC_6:
+                       pd_idx = data_disks - stripe % (raid_disks-1);
+                       *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
+                       qd_idx = raid_disks - 1;
+                       break;
+
+               case ALGORITHM_RIGHT_SYMMETRIC_6:
+                       pd_idx = stripe % (raid_disks-1);
+                       *dd_idx = (pd_idx + 1 + *dd_idx) % (raid_disks-1);
+                       qd_idx = raid_disks - 1;
+                       break;
+
+               case ALGORITHM_PARITY_0_6:
+                       pd_idx = 0;
+                       (*dd_idx)++;
+                       qd_idx = raid_disks - 1;
+                       break;
+
+
                default:
                        printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
                               conf->algorithm);
+                       BUG();
                }
                break;
        }
@@ -1411,9 +1498,15 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
                                i += raid_disks;
                        i -= (sh->pd_idx + 1);
                        break;
+               case ALGORITHM_PARITY_0:
+                       i -= 1;
+                       break;
+               case ALGORITHM_PARITY_N:
+                       break;
                default:
                        printk(KERN_ERR "raid5: unsupported algorithm %d\n",
                               conf->algorithm);
+                       BUG();
                }
                break;
        case 6:
@@ -1422,8 +1515,10 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
                switch (conf->algorithm) {
                case ALGORITHM_LEFT_ASYMMETRIC:
                case ALGORITHM_RIGHT_ASYMMETRIC:
-                       if (sh->pd_idx == raid_disks-1)
-                               i--;    /* Q D D D P */
+               case ALGORITHM_ROTATING_ZERO_RESTART:
+               case ALGORITHM_ROTATING_N_RESTART:
+                       if (sh->pd_idx == raid_disks-1)
+                               i--;    /* Q D D D P */
                        else if (i > sh->pd_idx)
                                i -= 2; /* D D P Q D */
                        break;
@@ -1438,9 +1533,35 @@ static sector_t compute_blocknr(struct stripe_head *sh, int i)
                                i -= (sh->pd_idx + 2);
                        }
                        break;
+               case ALGORITHM_PARITY_0:
+                       i -= 2;
+                       break;
+               case ALGORITHM_PARITY_N:
+                       break;
+               case ALGORITHM_ROTATING_N_CONTINUE:
+                       if (sh->pd_idx == 0)
+                               i--;    /* P D D D Q */
+                       else if (i > sh->pd_idx)
+                               i -= 2; /* D D Q P D */
+                       break;
+               case ALGORITHM_LEFT_ASYMMETRIC_6:
+               case ALGORITHM_RIGHT_ASYMMETRIC_6:
+                       if (i > sh->pd_idx)
+                               i--;
+                       break;
+               case ALGORITHM_LEFT_SYMMETRIC_6:
+               case ALGORITHM_RIGHT_SYMMETRIC_6:
+                       if (i < sh->pd_idx)
+                               i += data_disks + 1;
+                       i -= (sh->pd_idx + 1);
+                       break;
+               case ALGORITHM_PARITY_0_6:
+                       i -= 1;
+                       break;
                default:
                        printk(KERN_CRIT "raid6: unsupported algorithm %d\n",
                               conf->algorithm);
+                       BUG();
                }
                break;
        }
@@ -3308,7 +3429,7 @@ static int chunk_aligned_read(struct request_queue *q, struct bio * raid_bio)
                return 0;
        }
        /*
-        * use bio_clone to make a copy of the bio
+        * use bio_clone to make a copy of the bio
         */
        align_bi = bio_clone(raid_bio, GFP_NOIO);
        if (!align_bi)
@@ -3439,7 +3560,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
        if (rw == READ &&
             mddev->reshape_position == MaxSector &&
             chunk_aligned_read(q,bi))
-               return 0;
+               return 0;
 
        logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
        last_sector = bi->bi_sector + (bi->bi_size>>9);
@@ -4034,6 +4155,12 @@ static int run(mddev_t *mddev)
                       mdname(mddev), mddev->level);
                return -EIO;
        }
+       if ((mddev->level == 5 && !algorithm_valid_raid5(mddev->layout)) ||
+           (mddev->level == 6 && !algorithm_valid_raid6(mddev->layout))) {
+               printk(KERN_ERR "raid5: %s: layout %d not supported\n",
+                      mdname(mddev), mddev->layout);
+               return -EIO;
+       }
 
        if (mddev->chunk_size < PAGE_SIZE) {
                printk(KERN_ERR "md/raid5: chunk_size must be at least "
@@ -4185,12 +4312,6 @@ static int run(mddev_t *mddev)
                        conf->chunk_size, mdname(mddev));
                goto abort;
        }
-       if (conf->algorithm > ALGORITHM_RIGHT_SYMMETRIC) {
-               printk(KERN_ERR 
-                       "raid5: unsupported parity algorithm %d for %s\n",
-                       conf->algorithm, mdname(mddev));
-               goto abort;
-       }
        if (mddev->degraded > conf->max_degraded) {
                printk(KERN_ERR "raid5: not enough operational devices for %s"
                        " (%d/%d failed)\n",
index 0c7375a..633d792 100644 (file)
@@ -394,9 +394,62 @@ typedef struct raid5_private_data raid5_conf_t;
 /*
  * Our supported algorithms
  */
-#define ALGORITHM_LEFT_ASYMMETRIC      0
-#define ALGORITHM_RIGHT_ASYMMETRIC     1
-#define ALGORITHM_LEFT_SYMMETRIC       2
-#define ALGORITHM_RIGHT_SYMMETRIC      3
+#define ALGORITHM_LEFT_ASYMMETRIC      0 /* Rotating Parity N with Data Restart */
+#define ALGORITHM_RIGHT_ASYMMETRIC     1 /* Rotating Parity 0 with Data Restart */
+#define ALGORITHM_LEFT_SYMMETRIC       2 /* Rotating Parity N with Data Continuation */
+#define ALGORITHM_RIGHT_SYMMETRIC      3 /* Rotating Parity 0 with Data Continuation */
 
+/* Define non-rotating (raid4) algorithms.  These allow
+ * conversion of raid4 to raid5.
+ */
+#define ALGORITHM_PARITY_0             4 /* P or P,Q are initial devices */
+#define ALGORITHM_PARITY_N             5 /* P or P,Q are final devices. */
+
+/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
+ * Firstly, the exact positioning of the parity block is slightly
+ * different between the 'LEFT_*' modes of md and the "_N_*" modes
+ * of DDF.
+ * Secondly, or order of datablocks over which the Q syndrome is computed
+ * is different.
+ * Consequently we have different layouts for DDF/raid6 than md/raid6.
+ * These layouts are from the DDFv1.2 spec.
+ * Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
+ * leaves RLQ=3 as 'Vendor Specific'
+ */
+
+#define ALGORITHM_ROTATING_ZERO_RESTART        8 /* DDF PRL=6 RLQ=1 */
+#define ALGORITHM_ROTATING_N_RESTART   9 /* DDF PRL=6 RLQ=2 */
+#define ALGORITHM_ROTATING_N_CONTINUE  10 /*DDF PRL=6 RLQ=3 */
+
+
+/* For every RAID5 algorithm we define a RAID6 algorithm
+ * with exactly the same layout for data and parity, and
+ * with the Q block always on the last device (N-1).
+ * This allows trivial conversion from RAID5 to RAID6
+ */
+#define ALGORITHM_LEFT_ASYMMETRIC_6    16
+#define ALGORITHM_RIGHT_ASYMMETRIC_6   17
+#define ALGORITHM_LEFT_SYMMETRIC_6     18
+#define ALGORITHM_RIGHT_SYMMETRIC_6    19
+#define ALGORITHM_PARITY_0_6           20
+#define ALGORITHM_PARITY_N_6           ALGORITHM_PARITY_N
+
+static inline int algorithm_valid_raid5(int layout)
+{
+       return (layout >= 0) &&
+               (layout <= 5);
+}
+static inline int algorithm_valid_raid6(int layout)
+{
+       return (layout >= 0 && layout <= 5)
+               ||
+               (layout == 8 || layout == 10)
+               ||
+               (layout >= 16 && layout <= 20);
+}
+
+static inline int algorithm_is_DDF(int layout)
+{
+       return layout >= 8 && layout <= 10;
+}
 #endif