drivers/block/rd.c

   1 /*
   2  * ramdisk.c - Multiple RAM disk driver - gzip-loading version - v. 0.8 beta.
   3  *
   4  * (C) Chad Page, Theodore Ts'o, et. al, 1995.
   5  *
   6  * This RAM disk is designed to have filesystems created on it and mounted
   7  * just like a regular floppy disk.
   8  *
   9  * It also does something suggested by Linus: use the buffer cache as the
  10  * RAM disk data.  This makes it possible to dynamically allocate the RAM disk
  11  * buffer - with some consequences I have to deal with as I write this.
  12  *
  13  * This code is based on the original ramdisk.c, written mostly by
  14  * Theodore Ts'o (TYT) in 1991.  The code was largely rewritten by
  15  * Chad Page to use the buffer cache to store the RAM disk data in
  16  * 1995; Theodore then took over the driver again, and cleaned it up
  17  * for inclusion in the mainline kernel.
  18  *
  19  * The original CRAMDISK code was written by Richard Lyons, and
  20  * adapted by Chad Page to use the new RAM disk interface.  Theodore
  21  * Ts'o rewrote it so that both the compressed RAM disk loader and the
  22  * kernel decompressor uses the same inflate.c codebase.  The RAM disk
  23  * loader now also loads into a dynamic (buffer cache based) RAM disk,
  24  * not the old static RAM disk.  Support for the old static RAM disk has
  25  * been completely removed.
  26  *
  27  * Loadable module support added by Tom Dyas.
  28  *
  29  * Further cleanups by Chad Page (page0588@sundance.sjsu.edu):
  30  *      Cosmetic changes in #ifdef MODULE, code movement, etc.
  31  *      When the RAM disk module is removed, free the protected buffers
  32  *      Default RAM disk size changed to 2.88 MB
  33  *
  34  *  Added initrd: Werner Almesberger & Hans Lermen, Feb '96
  35  *
  36  * 4/25/96 : Made RAM disk size a parameter (default is now 4 MB)
  37  *              - Chad Page
  38  *
  39  * Add support for fs images split across >1 disk, Paul Gortmaker, Mar '98
  40  *
  41  * Make block size and block size shift for RAM disks a global macro
  42  * and set blk_size for -ENOSPC,     Werner Fink <werner@suse.de>, Apr '99
  43  */
  44
  45 #include <linux/string.h>
  46 #include <linux/slab.h>
  47 #include <asm/atomic.h>
  48 #include <linux/bio.h>
  49 #include <linux/module.h>
  50 #include <linux/moduleparam.h>
  51 #include <linux/init.h>
  52 #include <linux/pagemap.h>
  53 #include <linux/blkdev.h>
  54 #include <linux/genhd.h>
  55 #include <linux/buffer_head.h>          /* for invalidate_bdev() */
  56 #include <linux/backing-dev.h>
  57 #include <linux/blkpg.h>
  58 #include <linux/writeback.h>
  59 #include <linux/log2.h>
  60
  61 #include <asm/uaccess.h>
  62
  63 /* Various static variables go here.  Most are used only in the RAM disk code.
  64  */
  65
  66 static struct gendisk *rd_disks[CONFIG_BLK_DEV_RAM_COUNT];
  67 static struct block_device *rd_bdev[CONFIG_BLK_DEV_RAM_COUNT];/* Protected device data */
  68 static struct request_queue *rd_queue[CONFIG_BLK_DEV_RAM_COUNT];
  69
  70 /*
  71  * Parameters for the boot-loading of the RAM disk.  These are set by
  72  * init/main.c (from arguments to the kernel command line) or from the
  73  * architecture-specific setup routine (from the stored boot sector
  74  * information).
  75  */
  76 int rd_size = CONFIG_BLK_DEV_RAM_SIZE;          /* Size of the RAM disks */
  77 /*
  78  * It would be very desirable to have a soft-blocksize (that in the case
  79  * of the ramdisk driver is also the hardblocksize ;) of PAGE_SIZE because
  80  * doing that we'll achieve a far better MM footprint. Using a rd_blocksize of
  81  * BLOCK_SIZE in the worst case we'll make PAGE_SIZE/BLOCK_SIZE buffer-pages
  82  * unfreeable. With a rd_blocksize of PAGE_SIZE instead we are sure that only
  83  * 1 page will be protected. Depending on the size of the ramdisk you
  84  * may want to change the ramdisk blocksize to achieve a better or worse MM
  85  * behaviour. The default is still BLOCK_SIZE (needed by rd_load_image that
  86  * supposes the filesystem in the image uses a BLOCK_SIZE blocksize).
  87  */
  88 static int rd_blocksize = CONFIG_BLK_DEV_RAM_BLOCKSIZE;
  89
  90 /*
  91  * Copyright (C) 2000 Linus Torvalds.
  92  *               2000 Transmeta Corp.
  93  * aops copied from ramfs.
  94  */
  95
  96 /*
  97  * If a ramdisk page has buffers, some may be uptodate and some may be not.
  98  * To bring the page uptodate we zero out the non-uptodate buffers.  The
  99  * page must be locked.
 100  */
 101 static void make_page_uptodate(struct page *page)
 102 {
 103         if (page_has_buffers(page)) {
 104                 struct buffer_head *bh = page_buffers(page);
 105                 struct buffer_head *head = bh;
 106
 107                 do {
 108                         if (!buffer_uptodate(bh)) {
 109                                 memset(bh->b_data, 0, bh->b_size);
 110                                 /*
 111                                  * akpm: I'm totally undecided about this.  The
 112                                  * buffer has just been magically brought "up to
 113                                  * date", but nobody should want to be reading
 114                                  * it anyway, because it hasn't been used for
 115                                  * anything yet.  It is still in a "not read
 116                                  * from disk yet" state.
 117                                  *
 118                                  * But non-uptodate buffers against an uptodate
 119                                  * page are against the rules.  So do it anyway.
 120                                  */
 121                                  set_buffer_uptodate(bh);
 122                         }
 123                 } while ((bh = bh->b_this_page) != head);
 124         } else {
 125                 memset(page_address(page), 0, PAGE_CACHE_SIZE);
 126         }
 127         flush_dcache_page(page);
 128         SetPageUptodate(page);
 129 }
 130
 131 static int ramdisk_readpage(struct file *file, struct page *page)
 132 {
 133         if (!PageUptodate(page))
 134                 make_page_uptodate(page);
 135         unlock_page(page);
 136         return 0;
 137 }
 138
 139 static int ramdisk_prepare_write(struct file *file, struct page *page,
 140                                 unsigned offset, unsigned to)
 141 {
 142         if (!PageUptodate(page))
 143                 make_page_uptodate(page);
 144         return 0;
 145 }
 146
 147 static int ramdisk_commit_write(struct file *file, struct page *page,
 148                                 unsigned offset, unsigned to)
 149 {
 150         set_page_dirty(page);
 151         return 0;
 152 }
 153
 154 /*
 155  * ->writepage to the blockdev's mapping has to redirty the page so that the
 156  * VM doesn't go and steal it.  We return AOP_WRITEPAGE_ACTIVATE so that the VM
 157  * won't try to (pointlessly) write the page again for a while.
 158  *
 159  * Really, these pages should not be on the LRU at all.
 160  */
 161 static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
 162 {
 163         if (!PageUptodate(page))
 164                 make_page_uptodate(page);
 165         SetPageDirty(page);
 166         if (wbc->for_reclaim)
 167                 return AOP_WRITEPAGE_ACTIVATE;
 168         unlock_page(page);
 169         return 0;
 170 }
 171
 172 /*
 173  * This is a little speedup thing: short-circuit attempts to write back the
 174  * ramdisk blockdev inode to its non-existent backing store.
 175  */
 176 static int ramdisk_writepages(struct address_space *mapping,
 177                                 struct writeback_control *wbc)
 178 {
 179         return 0;
 180 }
 181
 182 /*
 183  * ramdisk blockdev pages have their own ->set_page_dirty() because we don't
 184  * want them to contribute to dirty memory accounting.
 185  */
 186 static int ramdisk_set_page_dirty(struct page *page)
 187 {
 188         if (!TestSetPageDirty(page))
 189                 return 1;
 190         return 0;
 191 }
 192
 193 /*
 194  * releasepage is called by pagevec_strip/try_to_release_page if
 195  * buffers_heads_over_limit is true. Without a releasepage function
 196  * try_to_free_buffers is called instead. That can unset the dirty
 197  * bit of our ram disk pages, which will be eventually freed, even
 198  * if the page is still in use.
 199  */
 200 static int ramdisk_releasepage(struct page *page, gfp_t dummy)
 201 {
 202         return 0;
 203 }
 204
 205 static const struct address_space_operations ramdisk_aops = {
 206         .readpage       = ramdisk_readpage,
 207         .prepare_write  = ramdisk_prepare_write,
 208         .commit_write   = ramdisk_commit_write,
 209         .writepage      = ramdisk_writepage,
 210         .set_page_dirty = ramdisk_set_page_dirty,
 211         .writepages     = ramdisk_writepages,
 212         .releasepage    = ramdisk_releasepage,
 213 };
 214
 215 static int rd_blkdev_pagecache_IO(int rw, struct bio_vec *vec, sector_t sector,
 216                                 struct address_space *mapping)
 217 {
 218         pgoff_t index = sector >> (PAGE_CACHE_SHIFT - 9);
 219         unsigned int vec_offset = vec->bv_offset;
 220         int offset = (sector << 9) & ~PAGE_CACHE_MASK;
 221         int size = vec->bv_len;
 222         int err = 0;
 223
 224         do {
 225                 int count;
 226                 struct page *page;
 227                 char *src;
 228                 char *dst;
 229
 230                 count = PAGE_CACHE_SIZE - offset;
 231                 if (count > size)
 232                         count = size;
 233                 size -= count;
 234
 235                 page = grab_cache_page(mapping, index);
 236                 if (!page) {
 237                         err = -ENOMEM;
 238                         goto out;
 239                 }
 240
 241                 if (!PageUptodate(page))
 242                         make_page_uptodate(page);
 243
 244                 index++;
 245
 246                 if (rw == READ) {
 247                         src = kmap_atomic(page, KM_USER0) + offset;
 248                         dst = kmap_atomic(vec->bv_page, KM_USER1) + vec_offset;
 249                 } else {
 250                         src = kmap_atomic(vec->bv_page, KM_USER0) + vec_offset;
 251                         dst = kmap_atomic(page, KM_USER1) + offset;
 252                 }
 253                 offset = 0;
 254                 vec_offset += count;
 255
 256                 memcpy(dst, src, count);
 257
 258                 kunmap_atomic(src, KM_USER0);
 259                 kunmap_atomic(dst, KM_USER1);
 260
 261                 if (rw == READ)
 262                         flush_dcache_page(vec->bv_page);
 263                 else
 264                         set_page_dirty(page);
 265                 unlock_page(page);
 266                 put_page(page);
 267         } while (size);
 268
 269  out:
 270         return err;
 271 }
 272
 273 /*
 274  *  Basically, my strategy here is to set up a buffer-head which can't be
 275  *  deleted, and make that my Ramdisk.  If the request is outside of the
 276  *  allocated size, we must get rid of it...
 277  *
 278  * 19-JAN-1998  Richard Gooch <rgooch@atnf.csiro.au>  Added devfs support
 279  *
 280  */
 281 static int rd_make_request(struct request_queue *q, struct bio *bio)
 282 {
 283         struct block_device *bdev = bio->bi_bdev;
 284         struct address_space * mapping = bdev->bd_inode->i_mapping;
 285         sector_t sector = bio->bi_sector;
 286         unsigned long len = bio->bi_size >> 9;
 287         int rw = bio_data_dir(bio);
 288         struct bio_vec *bvec;
 289         int ret = 0, i;
 290
 291         if (sector + len > get_capacity(bdev->bd_disk))
 292                 goto fail;
 293
 294         if (rw==READA)
 295                 rw=READ;
 296
 297         bio_for_each_segment(bvec, bio, i) {
 298                 ret |= rd_blkdev_pagecache_IO(rw, bvec, sector, mapping);
 299                 sector += bvec->bv_len >> 9;
 300         }
 301         if (ret)
 302                 goto fail;
 303
 304         bio_endio(bio, 0);
 305         return 0;
 306 fail:
 307         bio_io_error(bio);
 308         return 0;
 309 }
 310
 311 static int rd_ioctl(struct inode *inode, struct file *file,
 312                         unsigned int cmd, unsigned long arg)
 313 {
 314         int error;
 315         struct block_device *bdev = inode->i_bdev;
 316
 317         if (cmd != BLKFLSBUF)
 318                 return -ENOTTY;
 319
 320         /*
 321          * special: we want to release the ramdisk memory, it's not like with
 322          * the other blockdevices where this ioctl only flushes away the buffer
 323          * cache
 324          */
 325         error = -EBUSY;
 326         mutex_lock(&bdev->bd_mutex);
 327         if (bdev->bd_openers <= 2) {
 328                 truncate_inode_pages(bdev->bd_inode->i_mapping, 0);
 329                 error = 0;
 330         }
 331         mutex_unlock(&bdev->bd_mutex);
 332         return error;
 333 }
 334
 335 /*
 336  * This is the backing_dev_info for the blockdev inode itself.  It doesn't need
 337  * writeback and it does not contribute to dirty memory accounting.
 338  */
 339 static struct backing_dev_info rd_backing_dev_info = {
 340         .ra_pages       = 0,    /* No readahead */
 341         .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK | BDI_CAP_MAP_COPY,
 342         .unplug_io_fn   = default_unplug_io_fn,
 343 };
 344
 345 /*
 346  * This is the backing_dev_info for the files which live atop the ramdisk
 347  * "device".  These files do need writeback and they do contribute to dirty
 348  * memory accounting.
 349  */
 350 static struct backing_dev_info rd_file_backing_dev_info = {
 351         .ra_pages       = 0,    /* No readahead */
 352         .capabilities   = BDI_CAP_MAP_COPY,     /* Does contribute to dirty memory */
 353         .unplug_io_fn   = default_unplug_io_fn,
 354 };
 355
 356 static int rd_open(struct inode *inode, struct file *filp)
 357 {
 358         unsigned unit = iminor(inode);
 359
 360         if (rd_bdev[unit] == NULL) {
 361                 struct block_device *bdev = inode->i_bdev;
 362                 struct address_space *mapping;
 363                 unsigned bsize;
 364                 gfp_t gfp_mask;
 365
 366                 inode = igrab(bdev->bd_inode);
 367                 rd_bdev[unit] = bdev;
 368                 bdev->bd_openers++;
 369                 bsize = bdev_hardsect_size(bdev);
 370                 bdev->bd_block_size = bsize;
 371                 inode->i_blkbits = blksize_bits(bsize);
 372                 inode->i_size = get_capacity(bdev->bd_disk)<<9;
 373
 374                 mapping = inode->i_mapping;
 375                 mapping->a_ops = &ramdisk_aops;
 376                 mapping->backing_dev_info = &rd_backing_dev_info;
 377                 bdev->bd_inode_backing_dev_info = &rd_file_backing_dev_info;
 378
 379                 /*
 380                  * Deep badness.  rd_blkdev_pagecache_IO() needs to allocate
 381                  * pagecache pages within a request_fn.  We cannot recur back
 382                  * into the filesystem which is mounted atop the ramdisk, because
 383                  * that would deadlock on fs locks.  And we really don't want
 384                  * to reenter rd_blkdev_pagecache_IO when we're already within
 385                  * that function.
 386                  *
 387                  * So we turn off __GFP_FS and __GFP_IO.
 388                  *
 389                  * And to give this thing a hope of working, turn on __GFP_HIGH.
 390                  * Hopefully, there's enough regular memory allocation going on
 391                  * for the page allocator emergency pools to keep the ramdisk
 392                  * driver happy.
 393                  */
 394                 gfp_mask = mapping_gfp_mask(mapping);
 395                 gfp_mask &= ~(__GFP_FS|__GFP_IO);
 396                 gfp_mask |= __GFP_HIGH;
 397                 mapping_set_gfp_mask(mapping, gfp_mask);
 398         }
 399
 400         return 0;
 401 }
 402
 403 static struct block_device_operations rd_bd_op = {
 404         .owner =        THIS_MODULE,
 405         .open =         rd_open,
 406         .ioctl =        rd_ioctl,
 407 };
 408
 409 /*
 410  * Before freeing the module, invalidate all of the protected buffers!
 411  */
 412 static void __exit rd_cleanup(void)
 413 {
 414         int i;
 415
 416         for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
 417                 struct block_device *bdev = rd_bdev[i];
 418                 rd_bdev[i] = NULL;
 419                 if (bdev) {
 420                         invalidate_bdev(bdev);
 421                         blkdev_put(bdev);
 422                 }
 423                 del_gendisk(rd_disks[i]);
 424                 put_disk(rd_disks[i]);
 425                 blk_cleanup_queue(rd_queue[i]);
 426         }
 427         unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
 428
 429         bdi_destroy(&rd_file_backing_dev_info);
 430         bdi_destroy(&rd_backing_dev_info);
 431 }
 432
 433 /*
 434  * This is the registration and initialization section of the RAM disk driver
 435  */
 436 static int __init rd_init(void)
 437 {
 438         int i;
 439         int err;
 440
 441         err = bdi_init(&rd_backing_dev_info);
 442         if (err)
 443                 goto out2;
 444
 445         err = bdi_init(&rd_file_backing_dev_info);
 446         if (err) {
 447                 bdi_destroy(&rd_backing_dev_info);
 448                 goto out2;
 449         }
 450
 451         err = -ENOMEM;
 452
 453         if (rd_blocksize > PAGE_SIZE || rd_blocksize < 512 ||
 454                         !is_power_of_2(rd_blocksize)) {
 455                 printk("RAMDISK: wrong blocksize %d, reverting to defaults\n",
 456                        rd_blocksize);
 457                 rd_blocksize = BLOCK_SIZE;
 458         }
 459
 460         for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
 461                 rd_disks[i] = alloc_disk(1);
 462                 if (!rd_disks[i])
 463                         goto out;
 464
 465                 rd_queue[i] = blk_alloc_queue(GFP_KERNEL);
 466                 if (!rd_queue[i]) {
 467                         put_disk(rd_disks[i]);
 468                         goto out;
 469                 }
 470         }
 471
 472         if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) {
 473                 err = -EIO;
 474                 goto out;
 475         }
 476
 477         for (i = 0; i < CONFIG_BLK_DEV_RAM_COUNT; i++) {
 478                 struct gendisk *disk = rd_disks[i];
 479
 480                 blk_queue_make_request(rd_queue[i], &rd_make_request);
 481                 blk_queue_hardsect_size(rd_queue[i], rd_blocksize);
 482
 483                 /* rd_size is given in kB */
 484                 disk->major = RAMDISK_MAJOR;
 485                 disk->first_minor = i;
 486                 disk->fops = &rd_bd_op;
 487                 disk->queue = rd_queue[i];
 488                 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
 489                 sprintf(disk->disk_name, "ram%d", i);
 490                 set_capacity(disk, rd_size * 2);
 491                 add_disk(rd_disks[i]);
 492         }
 493
 494         /* rd_size is given in kB */
 495         printk("RAMDISK driver initialized: "
 496                 "%d RAM disks of %dK size %d blocksize\n",
 497                 CONFIG_BLK_DEV_RAM_COUNT, rd_size, rd_blocksize);
 498
 499         return 0;
 500 out:
 501         while (i--) {
 502                 put_disk(rd_disks[i]);
 503                 blk_cleanup_queue(rd_queue[i]);
 504         }
 505         bdi_destroy(&rd_backing_dev_info);
 506         bdi_destroy(&rd_file_backing_dev_info);
 507 out2:
 508         return err;
 509 }
 510
 511 module_init(rd_init);
 512 module_exit(rd_cleanup);
 513
 514 /* options - nonmodular */
 515 #ifndef MODULE
 516 static int __init ramdisk_size(char *str)
 517 {
 518         rd_size = simple_strtol(str,NULL,0);
 519         return 1;
 520 }
 521 static int __init ramdisk_blocksize(char *str)
 522 {
 523         rd_blocksize = simple_strtol(str,NULL,0);
 524         return 1;
 525 }
 526 __setup("ramdisk_size=", ramdisk_size);
 527 __setup("ramdisk_blocksize=", ramdisk_blocksize);
 528 #endif
 529
 530 /* options - modular */
 531 module_param(rd_size, int, 0);
 532 MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
 533 module_param(rd_blocksize, int, 0);
 534 MODULE_PARM_DESC(rd_blocksize, "Blocksize of each RAM disk in bytes.");
 535 MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
 536
 537 MODULE_LICENSE("GPL");