3 * Authors: Dave Boutcher <boutcher@us.ibm.com>
4 * Ryan Arnold <ryanarn@us.ibm.com>
5 * Colin Devilbiss <devilbis@us.ibm.com>
8 * (C) Copyright 2000-2004 IBM Corporation
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License as
12 * published by the Free Software Foundation; either version 2 of the
13 * License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 * This routine provides access to disk space (termed "DASD" in historical
25 * IBM terms) owned and managed by an OS/400 partition running on the
26 * same box as this Linux partition.
28 * All disk operations are performed by sending messages back and forth to
29 * the OS/400 partition.
32 #define pr_fmt(fmt) "viod: " fmt
34 #include <linux/major.h>
36 #include <linux/module.h>
37 #include <linux/kernel.h>
38 #include <linux/blkdev.h>
39 #include <linux/genhd.h>
40 #include <linux/hdreg.h>
41 #include <linux/errno.h>
42 #include <linux/init.h>
43 #include <linux/string.h>
44 #include <linux/mutex.h>
45 #include <linux/dma-mapping.h>
46 #include <linux/completion.h>
47 #include <linux/device.h>
48 #include <linux/scatterlist.h>
50 #include <asm/uaccess.h>
52 #include <asm/iseries/hv_types.h>
53 #include <asm/iseries/hv_lp_event.h>
54 #include <asm/iseries/hv_lp_config.h>
55 #include <asm/iseries/vio.h>
56 #include <asm/firmware.h>
58 MODULE_DESCRIPTION("iSeries Virtual DASD");
59 MODULE_AUTHOR("Dave Boutcher");
60 MODULE_LICENSE("GPL");
63 * We only support 7 partitions per physical disk....so with minor
64 * numbers 0-255 we get a maximum of 32 disks.
66 #define VIOD_GENHD_NAME "iseries/vd"
68 #define VIOD_VERS "1.64"
72 MAX_DISKNO = HVMAXARCHITECTEDVIRTUALDISKS,
73 MAX_DISK_NAME = FIELD_SIZEOF(struct gendisk, disk_name)
76 static DEFINE_MUTEX(viodasd_mutex);
77 static DEFINE_SPINLOCK(viodasd_spinlock);
81 #define DEVICE_NO(cell) ((struct viodasd_device *)(cell) - &viodasd_devices[0])
83 struct viodasd_waitevent {
84 struct completion com;
87 int max_disk; /* open */
90 static const struct vio_error_entry viodasd_err_table[] = {
91 { 0x0201, EINVAL, "Invalid Range" },
92 { 0x0202, EINVAL, "Invalid Token" },
93 { 0x0203, EIO, "DMA Error" },
94 { 0x0204, EIO, "Use Error" },
95 { 0x0205, EIO, "Release Error" },
96 { 0x0206, EINVAL, "Invalid Disk" },
97 { 0x0207, EBUSY, "Cant Lock" },
98 { 0x0208, EIO, "Already Locked" },
99 { 0x0209, EIO, "Already Unlocked" },
100 { 0x020A, EIO, "Invalid Arg" },
101 { 0x020B, EIO, "Bad IFS File" },
102 { 0x020C, EROFS, "Read Only Device" },
103 { 0x02FF, EIO, "Internal Error" },
108 * Figure out the biggest I/O request (in sectors) we can accept
110 #define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
113 * Number of disk I/O requests we've sent to OS/400
115 static int num_req_outstanding;
118 * This is our internal structure for keeping track of disk devices
120 struct viodasd_device {
124 u16 bytes_per_sector;
128 struct gendisk *disk;
130 } viodasd_devices[MAX_DISKNO];
133 * External open entry point.
135 static int viodasd_open(struct block_device *bdev, fmode_t mode)
137 struct viodasd_device *d = bdev->bd_disk->private_data;
139 struct viodasd_waitevent we;
143 if (mode & FMODE_WRITE)
145 flags = vioblockflags_ro;
148 init_completion(&we.com);
150 /* Send the open event to OS/400 */
151 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
152 HvLpEvent_Type_VirtualIo,
153 viomajorsubtype_blockio | vioblockopen,
154 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
155 viopath_sourceinst(viopath_hostLp),
156 viopath_targetinst(viopath_hostLp),
157 (u64)(unsigned long)&we, VIOVERSION << 16,
158 ((u64)DEVICE_NO(d) << 48) | ((u64)flags << 32),
161 pr_warning("HV open failed %d\n", (int)hvrc);
165 wait_for_completion(&we.com);
167 /* Check the return code */
169 const struct vio_error_entry *err =
170 vio_lookup_rc(viodasd_err_table, we.sub_result);
172 pr_warning("bad rc opening disk: %d:0x%04x (%s)\n",
173 (int)we.rc, we.sub_result, err->msg);
180 static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode)
184 mutex_lock(&viodasd_mutex);
185 ret = viodasd_open(bdev, mode);
186 mutex_unlock(&viodasd_mutex);
193 * External release entry point.
195 static int viodasd_release(struct gendisk *disk, fmode_t mode)
197 struct viodasd_device *d = disk->private_data;
200 mutex_lock(&viodasd_mutex);
201 /* Send the event to OS/400. We DON'T expect a response */
202 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
203 HvLpEvent_Type_VirtualIo,
204 viomajorsubtype_blockio | vioblockclose,
205 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
206 viopath_sourceinst(viopath_hostLp),
207 viopath_targetinst(viopath_hostLp),
209 ((u64)DEVICE_NO(d) << 48) /* | ((u64)flags << 32) */,
212 pr_warning("HV close call failed %d\n", (int)hvrc);
214 mutex_unlock(&viodasd_mutex);
220 /* External ioctl entry point.
222 static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
224 struct gendisk *disk = bdev->bd_disk;
225 struct viodasd_device *d = disk->private_data;
227 geo->sectors = d->sectors ? d->sectors : 32;
228 geo->heads = d->tracks ? d->tracks : 64;
229 geo->cylinders = d->cylinders ? d->cylinders :
230 get_capacity(disk) / (geo->sectors * geo->heads);
236 * Our file operations table
238 static const struct block_device_operations viodasd_fops = {
239 .owner = THIS_MODULE,
240 .open = viodasd_unlocked_open,
241 .release = viodasd_release,
242 .getgeo = viodasd_getgeo,
248 static void viodasd_end_request(struct request *req, int error,
251 __blk_end_request(req, error, num_sectors << 9);
255 * Send an actual I/O request to OS/400
257 static int send_request(struct request *req)
264 struct vioblocklpevent *bevent;
265 struct HvLpEvent *hev;
266 struct scatterlist sg[VIOMAXBLOCKDMA];
268 struct viodasd_device *d;
271 start = (u64)blk_rq_pos(req) << 9;
273 if (rq_data_dir(req) == READ) {
274 direction = DMA_FROM_DEVICE;
275 viocmd = viomajorsubtype_blockio | vioblockread;
277 direction = DMA_TO_DEVICE;
278 viocmd = viomajorsubtype_blockio | vioblockwrite;
281 d = req->rq_disk->private_data;
283 /* Now build the scatter-gather list */
284 sg_init_table(sg, VIOMAXBLOCKDMA);
285 nsg = blk_rq_map_sg(req->q, req, sg);
286 nsg = dma_map_sg(d->dev, sg, nsg, direction);
288 spin_lock_irqsave(&viodasd_spinlock, flags);
289 num_req_outstanding++;
291 /* This optimization handles a single DMA block */
293 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
294 HvLpEvent_Type_VirtualIo, viocmd,
295 HvLpEvent_AckInd_DoAck,
296 HvLpEvent_AckType_ImmediateAck,
297 viopath_sourceinst(viopath_hostLp),
298 viopath_targetinst(viopath_hostLp),
299 (u64)(unsigned long)req, VIOVERSION << 16,
300 ((u64)DEVICE_NO(d) << 48), start,
301 ((u64)sg_dma_address(&sg[0])) << 32,
304 bevent = (struct vioblocklpevent *)
305 vio_get_event_buffer(viomajorsubtype_blockio);
306 if (bevent == NULL) {
307 pr_warning("error allocating disk event buffer\n");
312 * Now build up the actual request. Note that we store
313 * the pointer to the request in the correlation
314 * token so we can match the response up later
316 memset(bevent, 0, sizeof(struct vioblocklpevent));
317 hev = &bevent->event;
318 hev->flags = HV_LP_EVENT_VALID | HV_LP_EVENT_DO_ACK |
320 hev->xType = HvLpEvent_Type_VirtualIo;
321 hev->xSubtype = viocmd;
322 hev->xSourceLp = HvLpConfig_getLpIndex();
323 hev->xTargetLp = viopath_hostLp;
325 offsetof(struct vioblocklpevent, u.rw_data.dma_info) +
326 (sizeof(bevent->u.rw_data.dma_info[0]) * nsg) - 1;
327 hev->xSourceInstanceId = viopath_sourceinst(viopath_hostLp);
328 hev->xTargetInstanceId = viopath_targetinst(viopath_hostLp);
329 hev->xCorrelationToken = (u64)req;
330 bevent->version = VIOVERSION;
331 bevent->disk = DEVICE_NO(d);
332 bevent->u.rw_data.offset = start;
335 * Copy just the dma information from the sg list
338 for (sgindex = 0; sgindex < nsg; sgindex++) {
339 bevent->u.rw_data.dma_info[sgindex].token =
340 sg_dma_address(&sg[sgindex]);
341 bevent->u.rw_data.dma_info[sgindex].len =
342 sg_dma_len(&sg[sgindex]);
345 /* Send the request */
346 hvrc = HvCallEvent_signalLpEvent(&bevent->event);
347 vio_free_event_buffer(viomajorsubtype_blockio, bevent);
350 if (hvrc != HvLpEvent_Rc_Good) {
351 pr_warning("error sending disk event to OS/400 (rc %d)\n",
355 spin_unlock_irqrestore(&viodasd_spinlock, flags);
359 num_req_outstanding--;
360 spin_unlock_irqrestore(&viodasd_spinlock, flags);
361 dma_unmap_sg(d->dev, sg, nsg, direction);
366 * This is the external request processing routine
368 static void do_viodasd_request(struct request_queue *q)
373 * If we already have the maximum number of requests
374 * outstanding to OS/400 just bail out. We'll come
377 while (num_req_outstanding < VIOMAXREQ) {
378 req = blk_fetch_request(q);
381 /* check that request contains a valid command */
382 if (req->cmd_type != REQ_TYPE_FS) {
383 viodasd_end_request(req, -EIO, blk_rq_sectors(req));
386 /* Try sending the request */
387 if (send_request(req) != 0)
388 viodasd_end_request(req, -EIO, blk_rq_sectors(req));
393 * Probe a single disk and fill in the viodasd_device structure
396 static int probe_disk(struct viodasd_device *d)
399 struct viodasd_waitevent we;
400 int dev_no = DEVICE_NO(d);
402 struct request_queue *q;
406 init_completion(&we.com);
408 /* Send the open event to OS/400 */
409 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
410 HvLpEvent_Type_VirtualIo,
411 viomajorsubtype_blockio | vioblockopen,
412 HvLpEvent_AckInd_DoAck, HvLpEvent_AckType_ImmediateAck,
413 viopath_sourceinst(viopath_hostLp),
414 viopath_targetinst(viopath_hostLp),
415 (u64)(unsigned long)&we, VIOVERSION << 16,
416 ((u64)dev_no << 48) | ((u64)flags<< 32),
419 pr_warning("bad rc on HV open %d\n", (int)hvrc);
423 wait_for_completion(&we.com);
428 /* try again with read only flag set */
429 flags = vioblockflags_ro;
432 if (we.max_disk > (MAX_DISKNO - 1)) {
433 printk_once(KERN_INFO pr_fmt("Only examining the first %d of %d disks connected\n"),
434 MAX_DISKNO, we.max_disk + 1);
437 /* Send the close event to OS/400. We DON'T expect a response */
438 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
439 HvLpEvent_Type_VirtualIo,
440 viomajorsubtype_blockio | vioblockclose,
441 HvLpEvent_AckInd_NoAck, HvLpEvent_AckType_ImmediateAck,
442 viopath_sourceinst(viopath_hostLp),
443 viopath_targetinst(viopath_hostLp),
445 ((u64)dev_no << 48) | ((u64)flags << 32),
448 pr_warning("bad rc sending event to OS/400 %d\n", (int)hvrc);
452 if (d->dev == NULL) {
453 /* this is when we reprobe for new disks */
454 if (vio_create_viodasd(dev_no) == NULL) {
455 pr_warning("cannot allocate virtual device for disk %d\n",
460 * The vio_create_viodasd will have recursed into this
461 * routine with d->dev set to the new vio device and
462 * will finish the setup of the disk below.
467 /* create the request queue for the disk */
468 spin_lock_init(&d->q_lock);
469 q = blk_init_queue(do_viodasd_request, &d->q_lock);
471 pr_warning("cannot allocate queue for disk %d\n", dev_no);
474 g = alloc_disk(1 << PARTITION_SHIFT);
476 pr_warning("cannot allocate disk structure for disk %d\n",
478 blk_cleanup_queue(q);
483 blk_queue_max_segments(q, VIOMAXBLOCKDMA);
484 blk_queue_max_hw_sectors(q, VIODASD_MAXSECTORS);
485 g->major = VIODASD_MAJOR;
486 g->first_minor = dev_no << PARTITION_SHIFT;
488 snprintf(g->disk_name, sizeof(g->disk_name),
489 VIOD_GENHD_NAME "%c%c",
490 'a' + (dev_no / 26) - 1, 'a' + (dev_no % 26));
492 snprintf(g->disk_name, sizeof(g->disk_name),
493 VIOD_GENHD_NAME "%c", 'a' + (dev_no % 26));
494 g->fops = &viodasd_fops;
497 g->driverfs_dev = d->dev;
498 set_capacity(g, d->size >> 9);
500 pr_info("disk %d: %lu sectors (%lu MB) CHS=%d/%d/%d sector size %d%s\n",
501 dev_no, (unsigned long)(d->size >> 9),
502 (unsigned long)(d->size >> 20),
503 (int)d->cylinders, (int)d->tracks,
504 (int)d->sectors, (int)d->bytes_per_sector,
505 d->read_only ? " (RO)" : "");
507 /* register us in the global list */
512 /* returns the total number of scatterlist elements converted */
513 static int block_event_to_scatterlist(const struct vioblocklpevent *bevent,
514 struct scatterlist *sg, int *total_len)
517 const struct rw_data *rw_data = &bevent->u.rw_data;
518 static const int offset =
519 offsetof(struct vioblocklpevent, u.rw_data.dma_info);
520 static const int element_size = sizeof(rw_data->dma_info[0]);
522 numsg = ((bevent->event.xSizeMinus1 + 1) - offset) / element_size;
523 if (numsg > VIOMAXBLOCKDMA)
524 numsg = VIOMAXBLOCKDMA;
527 sg_init_table(sg, VIOMAXBLOCKDMA);
528 for (i = 0; (i < numsg) && (rw_data->dma_info[i].len > 0); ++i) {
529 sg_dma_address(&sg[i]) = rw_data->dma_info[i].token;
530 sg_dma_len(&sg[i]) = rw_data->dma_info[i].len;
531 *total_len += rw_data->dma_info[i].len;
537 * Restart all queues, starting with the one _after_ the disk given,
538 * thus reducing the chance of starvation of higher numbered disks.
540 static void viodasd_restart_all_queues_starting_from(int first_index)
544 for (i = first_index + 1; i < MAX_DISKNO; ++i)
545 if (viodasd_devices[i].disk)
546 blk_run_queue(viodasd_devices[i].disk->queue);
547 for (i = 0; i <= first_index; ++i)
548 if (viodasd_devices[i].disk)
549 blk_run_queue(viodasd_devices[i].disk->queue);
553 * For read and write requests, decrement the number of outstanding requests,
554 * Free the DMA buffers we allocated.
556 static int viodasd_handle_read_write(struct vioblocklpevent *bevent)
558 int num_sg, num_sect, pci_direction, total_len;
560 struct scatterlist sg[VIOMAXBLOCKDMA];
561 struct HvLpEvent *event = &bevent->event;
562 unsigned long irq_flags;
563 struct viodasd_device *d;
567 num_sg = block_event_to_scatterlist(bevent, sg, &total_len);
568 num_sect = total_len >> 9;
569 if (event->xSubtype == (viomajorsubtype_blockio | vioblockread))
570 pci_direction = DMA_FROM_DEVICE;
572 pci_direction = DMA_TO_DEVICE;
573 req = (struct request *)bevent->event.xCorrelationToken;
574 d = req->rq_disk->private_data;
576 dma_unmap_sg(d->dev, sg, num_sg, pci_direction);
579 * Since this is running in interrupt mode, we need to make sure
580 * we're not stepping on any global I/O operations
582 spin_lock_irqsave(&viodasd_spinlock, irq_flags);
583 num_req_outstanding--;
584 spin_unlock_irqrestore(&viodasd_spinlock, irq_flags);
586 error = (event->xRc == HvLpEvent_Rc_Good) ? 0 : -EIO;
588 const struct vio_error_entry *err;
589 err = vio_lookup_rc(viodasd_err_table, bevent->sub_result);
590 pr_warning("read/write error %d:0x%04x (%s)\n",
591 event->xRc, bevent->sub_result, err->msg);
592 num_sect = blk_rq_sectors(req);
594 qlock = req->q->queue_lock;
595 spin_lock_irqsave(qlock, irq_flags);
596 viodasd_end_request(req, error, num_sect);
597 spin_unlock_irqrestore(qlock, irq_flags);
599 /* Finally, try to get more requests off of this device's queue */
600 viodasd_restart_all_queues_starting_from(DEVICE_NO(d));
605 /* This routine handles incoming block LP events */
606 static void handle_block_event(struct HvLpEvent *event)
608 struct vioblocklpevent *bevent = (struct vioblocklpevent *)event;
609 struct viodasd_waitevent *pwe;
612 /* Notification that a partition went away! */
614 /* First, we should NEVER get an int here...only acks */
615 if (hvlpevent_is_int(event)) {
616 pr_warning("Yikes! got an int in viodasd event handler!\n");
617 if (hvlpevent_need_ack(event)) {
618 event->xRc = HvLpEvent_Rc_InvalidSubtype;
619 HvCallEvent_ackLpEvent(event);
623 switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
626 * Handle a response to an open request. We get all the
627 * disk information in the response, so update it. The
628 * correlation token contains a pointer to a waitevent
629 * structure that has a completion in it. update the
630 * return code in the waitevent structure and post the
631 * completion to wake up the guy who sent the request
633 pwe = (struct viodasd_waitevent *)event->xCorrelationToken;
634 pwe->rc = event->xRc;
635 pwe->sub_result = bevent->sub_result;
636 if (event->xRc == HvLpEvent_Rc_Good) {
637 const struct open_data *data = &bevent->u.open_data;
638 struct viodasd_device *device =
639 &viodasd_devices[bevent->disk];
641 bevent->flags & vioblockflags_ro;
642 device->size = data->disk_size;
643 device->cylinders = data->cylinders;
644 device->tracks = data->tracks;
645 device->sectors = data->sectors;
646 device->bytes_per_sector = data->bytes_per_sector;
647 pwe->max_disk = data->max_disk;
655 viodasd_handle_read_write(bevent);
659 pr_warning("invalid subtype!");
660 if (hvlpevent_need_ack(event)) {
661 event->xRc = HvLpEvent_Rc_InvalidSubtype;
662 HvCallEvent_ackLpEvent(event);
668 * Get the driver to reprobe for more disks.
670 static ssize_t probe_disks(struct device_driver *drv, const char *buf,
673 struct viodasd_device *d;
675 for (d = viodasd_devices; d < &viodasd_devices[MAX_DISKNO]; d++) {
681 static DRIVER_ATTR(probe, S_IWUSR, NULL, probe_disks);
683 static int viodasd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
685 struct viodasd_device *d = &viodasd_devices[vdev->unit_address];
693 static int viodasd_remove(struct vio_dev *vdev)
695 struct viodasd_device *d;
697 d = &viodasd_devices[vdev->unit_address];
699 del_gendisk(d->disk);
700 blk_cleanup_queue(d->disk->queue);
709 * viodasd_device_table: Used by vio.c to match devices that we
712 static struct vio_device_id viodasd_device_table[] __devinitdata = {
713 { "block", "IBM,iSeries-viodasd" },
716 MODULE_DEVICE_TABLE(vio, viodasd_device_table);
718 static struct vio_driver viodasd_driver = {
719 .id_table = viodasd_device_table,
720 .probe = viodasd_probe,
721 .remove = viodasd_remove,
724 .owner = THIS_MODULE,
728 static int need_delete_probe;
731 * Initialize the whole device driver. Handle module and non-module
734 static int __init viodasd_init(void)
738 if (!firmware_has_feature(FW_FEATURE_ISERIES)) {
743 /* Try to open to our host lp */
744 if (viopath_hostLp == HvLpIndexInvalid)
747 if (viopath_hostLp == HvLpIndexInvalid) {
748 pr_warning("invalid hosting partition\n");
753 pr_info("vers " VIOD_VERS ", hosting partition %d\n", viopath_hostLp);
755 /* register the block device */
756 rc = register_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
758 pr_warning("Unable to get major number %d for %s\n",
759 VIODASD_MAJOR, VIOD_GENHD_NAME);
762 /* Actually open the path to the hosting partition */
763 rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio,
766 pr_warning("error opening path to host partition %d\n",
771 /* Initialize our request handler */
772 vio_setHandler(viomajorsubtype_blockio, handle_block_event);
774 rc = vio_register_driver(&viodasd_driver);
776 pr_warning("vio_register_driver failed\n");
781 * If this call fails, it just means that we cannot dynamically
782 * add virtual disks, but the driver will still work fine for
783 * all existing disk, so ignore the failure.
785 if (!driver_create_file(&viodasd_driver.driver, &driver_attr_probe))
786 need_delete_probe = 1;
791 vio_clearHandler(viomajorsubtype_blockio);
792 viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
794 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
798 module_init(viodasd_init);
800 void __exit viodasd_exit(void)
802 if (need_delete_probe)
803 driver_remove_file(&viodasd_driver.driver, &driver_attr_probe);
804 vio_unregister_driver(&viodasd_driver);
805 vio_clearHandler(viomajorsubtype_blockio);
806 viopath_close(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ + 2);
807 unregister_blkdev(VIODASD_MAJOR, VIOD_GENHD_NAME);
809 module_exit(viodasd_exit);