drivers/md/raid0.c

   1 /*
   2    raid0.c : Multiple Devices driver for Linux
   3              Copyright (C) 1994-96 Marc ZYNGIER
   4              <zyngier@ufr-info-p7.ibp.fr> or
   5              <maz@gloups.fdn.fr>
   6              Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
   7
   8
   9    RAID-0 management functions.
  10
  11    This program is free software; you can redistribute it and/or modify
  12    it under the terms of the GNU General Public License as published by
  13    the Free Software Foundation; either version 2, or (at your option)
  14    any later version.
  15
  16    You should have received a copy of the GNU General Public License
  17    (for example /usr/src/linux/COPYING); if not, write to the Free
  18    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  19 */
  20
  21 #include <linux/blkdev.h>
  22 #include <linux/seq_file.h>
  23 #include "md.h"
  24 #include "raid0.h"
  25 #include "raid5.h"
  26
  27 static void raid0_unplug(struct request_queue *q)
  28 {
  29         mddev_t *mddev = q->queuedata;
  30         raid0_conf_t *conf = mddev->private;
  31         mdk_rdev_t **devlist = conf->devlist;
  32         int raid_disks = conf->strip_zone[0].nb_dev;
  33         int i;
  34
  35         for (i=0; i < raid_disks; i++) {
  36                 struct request_queue *r_queue = bdev_get_queue(devlist[i]->bdev);
  37
  38                 blk_unplug(r_queue);
  39         }
  40 }
  41
  42 static int raid0_congested(void *data, int bits)
  43 {
  44         mddev_t *mddev = data;
  45         raid0_conf_t *conf = mddev->private;
  46         mdk_rdev_t **devlist = conf->devlist;
  47         int raid_disks = conf->strip_zone[0].nb_dev;
  48         int i, ret = 0;
  49
  50         if (mddev_congested(mddev, bits))
  51                 return 1;
  52
  53         for (i = 0; i < raid_disks && !ret ; i++) {
  54                 struct request_queue *q = bdev_get_queue(devlist[i]->bdev);
  55
  56                 ret |= bdi_congested(&q->backing_dev_info, bits);
  57         }
  58         return ret;
  59 }
  60
  61 /*
  62  * inform the user of the raid configuration
  63 */
  64 static void dump_zones(mddev_t *mddev)
  65 {
  66         int j, k, h;
  67         sector_t zone_size = 0;
  68         sector_t zone_start = 0;
  69         char b[BDEVNAME_SIZE];
  70         raid0_conf_t *conf = mddev->private;
  71         int raid_disks = conf->strip_zone[0].nb_dev;
  72         printk(KERN_INFO "******* %s configuration *********\n",
  73                 mdname(mddev));
  74         h = 0;
  75         for (j = 0; j < conf->nr_strip_zones; j++) {
  76                 printk(KERN_INFO "zone%d=[", j);
  77                 for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
  78                         printk(KERN_CONT "%s/",
  79                         bdevname(conf->devlist[j*raid_disks
  80                                                 + k]->bdev, b));
  81                 printk(KERN_CONT "]\n");
  82
  83                 zone_size  = conf->strip_zone[j].zone_end - zone_start;
  84                 printk(KERN_INFO "        zone offset=%llukb "
  85                                 "device offset=%llukb size=%llukb\n",
  86                         (unsigned long long)zone_start>>1,
  87                         (unsigned long long)conf->strip_zone[j].dev_start>>1,
  88                         (unsigned long long)zone_size>>1);
  89                 zone_start = conf->strip_zone[j].zone_end;
  90         }
  91         printk(KERN_INFO "**********************************\n\n");
  92 }
  93
  94 static int create_strip_zones(mddev_t *mddev, raid0_conf_t **private_conf)
  95 {
  96         int i, c, err;
  97         sector_t curr_zone_end, sectors;
  98         mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev, **dev;
  99         struct strip_zone *zone;
 100         int cnt;
 101         char b[BDEVNAME_SIZE];
 102         raid0_conf_t *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
 103
 104         if (!conf)
 105                 return -ENOMEM;
 106         list_for_each_entry(rdev1, &mddev->disks, same_set) {
 107                 printk(KERN_INFO "md/raid0:%s: looking at %s\n",
 108                        mdname(mddev),
 109                        bdevname(rdev1->bdev, b));
 110                 c = 0;
 111
 112                 /* round size to chunk_size */
 113                 sectors = rdev1->sectors;
 114                 sector_div(sectors, mddev->chunk_sectors);
 115                 rdev1->sectors = sectors * mddev->chunk_sectors;
 116
 117                 list_for_each_entry(rdev2, &mddev->disks, same_set) {
 118                         printk(KERN_INFO "md/raid0:%s:   comparing %s(%llu)",
 119                                mdname(mddev),
 120                                bdevname(rdev1->bdev,b),
 121                                (unsigned long long)rdev1->sectors);
 122                         printk(KERN_CONT " with %s(%llu)\n",
 123                                bdevname(rdev2->bdev,b),
 124                                (unsigned long long)rdev2->sectors);
 125                         if (rdev2 == rdev1) {
 126                                 printk(KERN_INFO "md/raid0:%s:   END\n",
 127                                        mdname(mddev));
 128                                 break;
 129                         }
 130                         if (rdev2->sectors == rdev1->sectors) {
 131                                 /*
 132                                  * Not unique, don't count it as a new
 133                                  * group
 134                                  */
 135                                 printk(KERN_INFO "md/raid0:%s:   EQUAL\n",
 136                                        mdname(mddev));
 137                                 c = 1;
 138                                 break;
 139                         }
 140                         printk(KERN_INFO "md/raid0:%s:   NOT EQUAL\n",
 141                                mdname(mddev));
 142                 }
 143                 if (!c) {
 144                         printk(KERN_INFO "md/raid0:%s:   ==> UNIQUE\n",
 145                                mdname(mddev));
 146                         conf->nr_strip_zones++;
 147                         printk(KERN_INFO "md/raid0:%s: %d zones\n",
 148                                mdname(mddev), conf->nr_strip_zones);
 149                 }
 150         }
 151         printk(KERN_INFO "md/raid0:%s: FINAL %d zones\n",
 152                mdname(mddev), conf->nr_strip_zones);
 153         err = -ENOMEM;
 154         conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
 155                                 conf->nr_strip_zones, GFP_KERNEL);
 156         if (!conf->strip_zone)
 157                 goto abort;
 158         conf->devlist = kzalloc(sizeof(mdk_rdev_t*)*
 159                                 conf->nr_strip_zones*mddev->raid_disks,
 160                                 GFP_KERNEL);
 161         if (!conf->devlist)
 162                 goto abort;
 163
 164         /* The first zone must contain all devices, so here we check that
 165          * there is a proper alignment of slots to devices and find them all
 166          */
 167         zone = &conf->strip_zone[0];
 168         cnt = 0;
 169         smallest = NULL;
 170         dev = conf->devlist;
 171         err = -EINVAL;
 172         list_for_each_entry(rdev1, &mddev->disks, same_set) {
 173                 int j = rdev1->raid_disk;
 174
 175                 if (mddev->level == 10)
 176                         /* taking over a raid10-n2 array */
 177                         j /= 2;
 178
 179                 if (j < 0 || j >= mddev->raid_disks) {
 180                         printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
 181                                "aborting!\n", mdname(mddev), j);
 182                         goto abort;
 183                 }
 184                 if (dev[j]) {
 185                         printk(KERN_ERR "md/raid0:%s: multiple devices for %d - "
 186                                "aborting!\n", mdname(mddev), j);
 187                         goto abort;
 188                 }
 189                 dev[j] = rdev1;
 190
 191                 disk_stack_limits(mddev->gendisk, rdev1->bdev,
 192                                   rdev1->data_offset << 9);
 193                 /* as we don't honour merge_bvec_fn, we must never risk
 194                  * violating it, so limit ->max_segments to 1, lying within
 195                  * a single page.
 196                  */
 197
 198                 if (rdev1->bdev->bd_disk->queue->merge_bvec_fn) {
 199                         blk_queue_max_segments(mddev->queue, 1);
 200                         blk_queue_segment_boundary(mddev->queue,
 201                                                    PAGE_CACHE_SIZE - 1);
 202                 }
 203                 if (!smallest || (rdev1->sectors < smallest->sectors))
 204                         smallest = rdev1;
 205                 cnt++;
 206         }
 207         if (cnt != mddev->raid_disks) {
 208                 printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
 209                        "aborting!\n", mdname(mddev), cnt, mddev->raid_disks);
 210                 goto abort;
 211         }
 212         zone->nb_dev = cnt;
 213         zone->zone_end = smallest->sectors * cnt;
 214
 215         curr_zone_end = zone->zone_end;
 216
 217         /* now do the other zones */
 218         for (i = 1; i < conf->nr_strip_zones; i++)
 219         {
 220                 int j;
 221
 222                 zone = conf->strip_zone + i;
 223                 dev = conf->devlist + i * mddev->raid_disks;
 224
 225                 printk(KERN_INFO "md/raid0:%s: zone %d\n",
 226                        mdname(mddev), i);
 227                 zone->dev_start = smallest->sectors;
 228                 smallest = NULL;
 229                 c = 0;
 230
 231                 for (j=0; j<cnt; j++) {
 232                         rdev = conf->devlist[j];
 233                         printk(KERN_INFO "md/raid0:%s: checking %s ...",
 234                                mdname(mddev),
 235                                bdevname(rdev->bdev, b));
 236                         if (rdev->sectors <= zone->dev_start) {
 237                                 printk(KERN_CONT " nope.\n");
 238                                 continue;
 239                         }
 240                         printk(KERN_CONT " contained as device %d\n", c);
 241                         dev[c] = rdev;
 242                         c++;
 243                         if (!smallest || rdev->sectors < smallest->sectors) {
 244                                 smallest = rdev;
 245                                 printk(KERN_INFO "md/raid0:%s:  (%llu) is smallest!.\n",
 246                                        mdname(mddev),
 247                                        (unsigned long long)rdev->sectors);
 248                         }
 249                 }
 250
 251                 zone->nb_dev = c;
 252                 sectors = (smallest->sectors - zone->dev_start) * c;
 253                 printk(KERN_INFO "md/raid0:%s: zone->nb_dev: %d, sectors: %llu\n",
 254                        mdname(mddev),
 255                        zone->nb_dev, (unsigned long long)sectors);
 256
 257                 curr_zone_end += sectors;
 258                 zone->zone_end = curr_zone_end;
 259
 260                 printk(KERN_INFO "md/raid0:%s: current zone start: %llu\n",
 261                        mdname(mddev),
 262                        (unsigned long long)smallest->sectors);
 263         }
 264         mddev->queue->unplug_fn = raid0_unplug;
 265         mddev->queue->backing_dev_info.congested_fn = raid0_congested;
 266         mddev->queue->backing_dev_info.congested_data = mddev;
 267
 268         /*
 269          * now since we have the hard sector sizes, we can make sure
 270          * chunk size is a multiple of that sector size
 271          */
 272         if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
 273                 printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
 274                        mdname(mddev),
 275                        mddev->chunk_sectors << 9);
 276                 goto abort;
 277         }
 278
 279         blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
 280         blk_queue_io_opt(mddev->queue,
 281                          (mddev->chunk_sectors << 9) * mddev->raid_disks);
 282
 283         printk(KERN_INFO "md/raid0:%s: done.\n", mdname(mddev));
 284         *private_conf = conf;
 285
 286         return 0;
 287 abort:
 288         kfree(conf->strip_zone);
 289         kfree(conf->devlist);
 290         kfree(conf);
 291         *private_conf = NULL;
 292         return err;
 293 }
 294
 295 /**
 296  *      raid0_mergeable_bvec -- tell bio layer if a two requests can be merged
 297  *      @q: request queue
 298  *      @bvm: properties of new bio
 299  *      @biovec: the request that could be merged to it.
 300  *
 301  *      Return amount of bytes we can accept at this offset
 302  */
 303 static int raid0_mergeable_bvec(struct request_queue *q,
 304                                 struct bvec_merge_data *bvm,
 305                                 struct bio_vec *biovec)
 306 {
 307         mddev_t *mddev = q->queuedata;
 308         sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
 309         int max;
 310         unsigned int chunk_sectors = mddev->chunk_sectors;
 311         unsigned int bio_sectors = bvm->bi_size >> 9;
 312
 313         if (is_power_of_2(chunk_sectors))
 314                 max =  (chunk_sectors - ((sector & (chunk_sectors-1))
 315                                                 + bio_sectors)) << 9;
 316         else
 317                 max =  (chunk_sectors - (sector_div(sector, chunk_sectors)
 318                                                 + bio_sectors)) << 9;
 319         if (max < 0) max = 0; /* bio_add cannot handle a negative return */
 320         if (max <= biovec->bv_len && bio_sectors == 0)
 321                 return biovec->bv_len;
 322         else
 323                 return max;
 324 }
 325
 326 static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
 327 {
 328         sector_t array_sectors = 0;
 329         mdk_rdev_t *rdev;
 330
 331         WARN_ONCE(sectors || raid_disks,
 332                   "%s does not support generic reshape\n", __func__);
 333
 334         list_for_each_entry(rdev, &mddev->disks, same_set)
 335                 array_sectors += rdev->sectors;
 336
 337         return array_sectors;
 338 }
 339
 340 static int raid0_run(mddev_t *mddev)
 341 {
 342         raid0_conf_t *conf;
 343         int ret;
 344
 345         if (mddev->chunk_sectors == 0) {
 346                 printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
 347                        mdname(mddev));
 348                 return -EINVAL;
 349         }
 350         if (md_check_no_bitmap(mddev))
 351                 return -EINVAL;
 352         blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
 353         mddev->queue->queue_lock = &mddev->queue->__queue_lock;
 354
 355         /* if private is not null, we are here after takeover */
 356         if (mddev->private == NULL) {
 357                 ret = create_strip_zones(mddev, &conf);
 358                 if (ret < 0)
 359                         return ret;
 360                 mddev->private = conf;
 361         }
 362         conf = mddev->private;
 363         if (conf->scale_raid_disks) {
 364                 int i;
 365                 for (i=0; i < conf->strip_zone[0].nb_dev; i++)
 366                         conf->devlist[i]->raid_disk /= conf->scale_raid_disks;
 367                 /* FIXME update sysfs rd links */
 368         }
 369
 370         /* calculate array device size */
 371         md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
 372
 373         printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
 374                mdname(mddev),
 375                (unsigned long long)mddev->array_sectors);
 376         /* calculate the max read-ahead size.
 377          * For read-ahead of large files to be effective, we need to
 378          * readahead at least twice a whole stripe. i.e. number of devices
 379          * multiplied by chunk size times 2.
 380          * If an individual device has an ra_pages greater than the
 381          * chunk size, then we will not drive that device as hard as it
 382          * wants.  We consider this a configuration error: a larger
 383          * chunksize should be used in that case.
 384          */
 385         {
 386                 int stripe = mddev->raid_disks *
 387                         (mddev->chunk_sectors << 9) / PAGE_SIZE;
 388                 if (mddev->queue->backing_dev_info.ra_pages < 2* stripe)
 389                         mddev->queue->backing_dev_info.ra_pages = 2* stripe;
 390         }
 391
 392         blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
 393         dump_zones(mddev);
 394         md_integrity_register(mddev);
 395         return 0;
 396 }
 397
 398 static int raid0_stop(mddev_t *mddev)
 399 {
 400         raid0_conf_t *conf = mddev->private;
 401
 402         blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 403         kfree(conf->strip_zone);
 404         kfree(conf->devlist);
 405         kfree(conf);
 406         mddev->private = NULL;
 407         return 0;
 408 }
 409
 410 /* Find the zone which holds a particular offset
 411  * Update *sectorp to be an offset in that zone
 412  */
 413 static struct strip_zone *find_zone(struct raid0_private_data *conf,
 414                                     sector_t *sectorp)
 415 {
 416         int i;
 417         struct strip_zone *z = conf->strip_zone;
 418         sector_t sector = *sectorp;
 419
 420         for (i = 0; i < conf->nr_strip_zones; i++)
 421                 if (sector < z[i].zone_end) {
 422                         if (i)
 423                                 *sectorp = sector - z[i-1].zone_end;
 424                         return z + i;
 425                 }
 426         BUG();
 427 }
 428
 429 /*
 430  * remaps the bio to the target device. we separate two flows.
 431  * power 2 flow and a general flow for the sake of perfromance
 432 */
 433 static mdk_rdev_t *map_sector(mddev_t *mddev, struct strip_zone *zone,
 434                                 sector_t sector, sector_t *sector_offset)
 435 {
 436         unsigned int sect_in_chunk;
 437         sector_t chunk;
 438         raid0_conf_t *conf = mddev->private;
 439         int raid_disks = conf->strip_zone[0].nb_dev;
 440         unsigned int chunk_sects = mddev->chunk_sectors;
 441
 442         if (is_power_of_2(chunk_sects)) {
 443                 int chunksect_bits = ffz(~chunk_sects);
 444                 /* find the sector offset inside the chunk */
 445                 sect_in_chunk  = sector & (chunk_sects - 1);
 446                 sector >>= chunksect_bits;
 447                 /* chunk in zone */
 448                 chunk = *sector_offset;
 449                 /* quotient is the chunk in real device*/
 450                 sector_div(chunk, zone->nb_dev << chunksect_bits);
 451         } else{
 452                 sect_in_chunk = sector_div(sector, chunk_sects);
 453                 chunk = *sector_offset;
 454                 sector_div(chunk, chunk_sects * zone->nb_dev);
 455         }
 456         /*
 457         *  position the bio over the real device
 458         *  real sector = chunk in device + starting of zone
 459         *       + the position in the chunk
 460         */
 461         *sector_offset = (chunk * chunk_sects) + sect_in_chunk;
 462         return conf->devlist[(zone - conf->strip_zone)*raid_disks
 463                              + sector_div(sector, zone->nb_dev)];
 464 }
 465
 466 /*
 467  * Is io distribute over 1 or more chunks ?
 468 */
 469 static inline int is_io_in_chunk_boundary(mddev_t *mddev,
 470                         unsigned int chunk_sects, struct bio *bio)
 471 {
 472         if (likely(is_power_of_2(chunk_sects))) {
 473                 return chunk_sects >= ((bio->bi_sector & (chunk_sects-1))
 474                                         + (bio->bi_size >> 9));
 475         } else{
 476                 sector_t sector = bio->bi_sector;
 477                 return chunk_sects >= (sector_div(sector, chunk_sects)
 478                                                 + (bio->bi_size >> 9));
 479         }
 480 }
 481
 482 static int raid0_make_request(mddev_t *mddev, struct bio *bio)
 483 {
 484         unsigned int chunk_sects;
 485         sector_t sector_offset;
 486         struct strip_zone *zone;
 487         mdk_rdev_t *tmp_dev;
 488
 489         if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 490                 md_barrier_request(mddev, bio);
 491                 return 0;
 492         }
 493
 494         chunk_sects = mddev->chunk_sectors;
 495         if (unlikely(!is_io_in_chunk_boundary(mddev, chunk_sects, bio))) {
 496                 sector_t sector = bio->bi_sector;
 497                 struct bio_pair *bp;
 498                 /* Sanity check -- queue functions should prevent this happening */
 499                 if (bio->bi_vcnt != 1 ||
 500                     bio->bi_idx != 0)
 501                         goto bad_map;
 502                 /* This is a one page bio that upper layers
 503                  * refuse to split for us, so we need to split it.
 504                  */
 505                 if (likely(is_power_of_2(chunk_sects)))
 506                         bp = bio_split(bio, chunk_sects - (sector &
 507                                                            (chunk_sects-1)));
 508                 else
 509                         bp = bio_split(bio, chunk_sects -
 510                                        sector_div(sector, chunk_sects));
 511                 if (raid0_make_request(mddev, &bp->bio1))
 512                         generic_make_request(&bp->bio1);
 513                 if (raid0_make_request(mddev, &bp->bio2))
 514                         generic_make_request(&bp->bio2);
 515
 516                 bio_pair_release(bp);
 517                 return 0;
 518         }
 519
 520         sector_offset = bio->bi_sector;
 521         zone =  find_zone(mddev->private, &sector_offset);
 522         tmp_dev = map_sector(mddev, zone, bio->bi_sector,
 523                              &sector_offset);
 524         bio->bi_bdev = tmp_dev->bdev;
 525         bio->bi_sector = sector_offset + zone->dev_start +
 526                 tmp_dev->data_offset;
 527         /*
 528          * Let the main block layer submit the IO and resolve recursion:
 529          */
 530         return 1;
 531
 532 bad_map:
 533         printk("md/raid0:%s: make_request bug: can't convert block across chunks"
 534                " or bigger than %dk %llu %d\n",
 535                mdname(mddev), chunk_sects / 2,
 536                (unsigned long long)bio->bi_sector, bio->bi_size >> 10);
 537
 538         bio_io_error(bio);
 539         return 0;
 540 }
 541
 542 static void raid0_status(struct seq_file *seq, mddev_t *mddev)
 543 {
 544 #undef MD_DEBUG
 545 #ifdef MD_DEBUG
 546         int j, k, h;
 547         char b[BDEVNAME_SIZE];
 548         raid0_conf_t *conf = mddev->private;
 549         int raid_disks = conf->strip_zone[0].nb_dev;
 550
 551         sector_t zone_size;
 552         sector_t zone_start = 0;
 553         h = 0;
 554
 555         for (j = 0; j < conf->nr_strip_zones; j++) {
 556                 seq_printf(seq, "      z%d", j);
 557                 seq_printf(seq, "=[");
 558                 for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
 559                         seq_printf(seq, "%s/", bdevname(
 560                                 conf->devlist[j*raid_disks + k]
 561                                                 ->bdev, b));
 562
 563                 zone_size  = conf->strip_zone[j].zone_end - zone_start;
 564                 seq_printf(seq, "] ze=%lld ds=%lld s=%lld\n",
 565                         (unsigned long long)zone_start>>1,
 566                         (unsigned long long)conf->strip_zone[j].dev_start>>1,
 567                         (unsigned long long)zone_size>>1);
 568                 zone_start = conf->strip_zone[j].zone_end;
 569         }
 570 #endif
 571         seq_printf(seq, " %dk chunks", mddev->chunk_sectors / 2);
 572         return;
 573 }
 574
 575 static void *raid0_takeover_raid5(mddev_t *mddev)
 576 {
 577         mdk_rdev_t *rdev;
 578         raid0_conf_t *priv_conf;
 579
 580         if (mddev->degraded != 1) {
 581                 printk(KERN_ERR "md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
 582                        mdname(mddev),
 583                        mddev->degraded);
 584                 return ERR_PTR(-EINVAL);
 585         }
 586
 587         list_for_each_entry(rdev, &mddev->disks, same_set) {
 588                 /* check slot number for a disk */
 589                 if (rdev->raid_disk == mddev->raid_disks-1) {
 590                         printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
 591                                mdname(mddev));
 592                         return ERR_PTR(-EINVAL);
 593                 }
 594         }
 595
 596         /* Set new parameters */
 597         mddev->new_level = 0;
 598         mddev->new_chunk_sectors = mddev->chunk_sectors;
 599         mddev->raid_disks--;
 600         mddev->delta_disks = -1;
 601         /* make sure it will be not marked as dirty */
 602         mddev->recovery_cp = MaxSector;
 603
 604         create_strip_zones(mddev, &priv_conf);
 605         return priv_conf;
 606 }
 607
 608 static void *raid0_takeover_raid10(mddev_t *mddev)
 609 {
 610         raid0_conf_t *priv_conf;
 611
 612         /* Check layout:
 613          *  - far_copies must be 1
 614          *  - near_copies must be 2
 615          *  - disks number must be even
 616          *  - all mirrors must be already degraded
 617          */
 618         if (mddev->layout != ((1 << 8) + 2)) {
 619                 printk(KERN_ERR "md/raid0:%s:: Raid0 cannot takover layout: 0x%x\n",
 620                        mdname(mddev),
 621                        mddev->layout);
 622                 return ERR_PTR(-EINVAL);
 623         }
 624         if (mddev->raid_disks & 1) {
 625                 printk(KERN_ERR "md/raid0:%s: Raid0 cannot takover Raid10 with odd disk number.\n",
 626                        mdname(mddev));
 627                 return ERR_PTR(-EINVAL);
 628         }
 629         if (mddev->degraded != (mddev->raid_disks>>1)) {
 630                 printk(KERN_ERR "md/raid0:%s: All mirrors must be already degraded!\n",
 631                        mdname(mddev));
 632                 return ERR_PTR(-EINVAL);
 633         }
 634
 635         /* Set new parameters */
 636         mddev->new_level = 0;
 637         mddev->new_chunk_sectors = mddev->chunk_sectors;
 638         mddev->delta_disks = - mddev->raid_disks / 2;
 639         mddev->raid_disks += mddev->delta_disks;
 640         mddev->degraded = 0;
 641         /* make sure it will be not marked as dirty */
 642         mddev->recovery_cp = MaxSector;
 643
 644         create_strip_zones(mddev, &priv_conf);
 645         priv_conf->scale_raid_disks = 2;
 646         return priv_conf;
 647 }
 648
 649 static void *raid0_takeover(mddev_t *mddev)
 650 {
 651         /* raid0 can take over:
 652          *  raid5 - providing it is Raid4 layout and one disk is faulty
 653          *  raid10 - assuming we have all necessary active disks
 654          */
 655         if (mddev->level == 5) {
 656                 if (mddev->layout == ALGORITHM_PARITY_N)
 657                         return raid0_takeover_raid5(mddev);
 658
 659                 printk(KERN_ERR "md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
 660                        mdname(mddev), ALGORITHM_PARITY_N);
 661         }
 662
 663         if (mddev->level == 10)
 664                 return raid0_takeover_raid10(mddev);
 665
 666         return ERR_PTR(-EINVAL);
 667 }
 668
 669 static void raid0_quiesce(mddev_t *mddev, int state)
 670 {
 671 }
 672
 673 static struct mdk_personality raid0_personality=
 674 {
 675         .name           = "raid0",
 676         .level          = 0,
 677         .owner          = THIS_MODULE,
 678         .make_request   = raid0_make_request,
 679         .run            = raid0_run,
 680         .stop           = raid0_stop,
 681         .status         = raid0_status,
 682         .size           = raid0_size,
 683         .takeover       = raid0_takeover,
 684         .quiesce        = raid0_quiesce,
 685 };
 686
 687 static int __init raid0_init (void)
 688 {
 689         return register_md_personality (&raid0_personality);
 690 }
 691
 692 static void raid0_exit (void)
 693 {
 694         unregister_md_personality (&raid0_personality);
 695 }
 696
 697 module_init(raid0_init);
 698 module_exit(raid0_exit);
 699 MODULE_LICENSE("GPL");
 700 MODULE_DESCRIPTION("RAID0 (striping) personality for MD");
 701 MODULE_ALIAS("md-personality-2"); /* RAID0 */
 702 MODULE_ALIAS("md-raid0");
 703 MODULE_ALIAS("md-level-0");