Staging: hv: Move the sector size check into blkvsc_drv_init
[pandora-kernel.git] / drivers / staging / hv / blkvsc_drv.c
1 /*
2  * Copyright (c) 2009, Microsoft Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Authors:
18  *   Haiyang Zhang <haiyangz@microsoft.com>
19  *   Hank Janssen  <hjanssen@microsoft.com>
20  *   K. Y. Srinivasan <kys@microsoft.com>
21  */
22 #include <linux/init.h>
23 #include <linux/module.h>
24 #include <linux/device.h>
25 #include <linux/blkdev.h>
26 #include <linux/major.h>
27 #include <linux/delay.h>
28 #include <linux/hdreg.h>
29 #include <linux/slab.h>
30 #include <scsi/scsi.h>
31 #include <scsi/scsi_cmnd.h>
32 #include <scsi/scsi_eh.h>
33 #include <scsi/scsi_dbg.h>
34 #include "hv_api.h"
35 #include "logging.h"
36 #include "version_info.h"
37 #include "vmbus.h"
38 #include "storvsc_api.h"
39
40
41 #define BLKVSC_MINORS   64
42
43 enum blkvsc_device_type {
44         UNKNOWN_DEV_TYPE,
45         HARDDISK_TYPE,
46         DVD_TYPE,
47 };
48
49 enum blkvsc_op_type {
50         DO_INQUIRY,
51         DO_CAPACITY,
52         DO_FLUSH,
53 };
54
55 /*
56  * This request ties the struct request and struct
57  * blkvsc_request/hv_storvsc_request together A struct request may be
58  * represented by 1 or more struct blkvsc_request
59  */
60 struct blkvsc_request_group {
61         int outstanding;
62         int status;
63         struct list_head blkvsc_req_list;       /* list of blkvsc_requests */
64 };
65
66 struct blkvsc_request {
67         /* blkvsc_request_group.blkvsc_req_list */
68         struct list_head req_entry;
69
70         /* block_device_context.pending_list */
71         struct list_head pend_entry;
72
73         /* This may be null if we generate a request internally */
74         struct request *req;
75
76         struct block_device_context *dev;
77
78         /* The group this request is part of. Maybe null */
79         struct blkvsc_request_group *group;
80
81         int write;
82         sector_t sector_start;
83         unsigned long sector_count;
84
85         unsigned char sense_buffer[SCSI_SENSE_BUFFERSIZE];
86         unsigned char cmd_len;
87         unsigned char cmnd[MAX_COMMAND_SIZE];
88
89         struct hv_storvsc_request request;
90 };
91
92 /* Per device structure */
93 struct block_device_context {
94         /* point back to our device context */
95         struct hv_device *device_ctx;
96         struct kmem_cache *request_pool;
97         spinlock_t lock;
98         struct gendisk *gd;
99         enum blkvsc_device_type device_type;
100         struct list_head pending_list;
101
102         unsigned char device_id[64];
103         unsigned int device_id_len;
104         int num_outstanding_reqs;
105         int shutting_down;
106         unsigned int sector_size;
107         sector_t capacity;
108         unsigned int port;
109         unsigned char path;
110         unsigned char target;
111         int users;
112 };
113
114 static const char *drv_name = "blkvsc";
115
116 /* {32412632-86cb-44a2-9b5c-50d1417354f5} */
117 static const struct hv_guid dev_type = {
118         .data = {
119                 0x32, 0x26, 0x41, 0x32, 0xcb, 0x86, 0xa2, 0x44,
120                 0x9b, 0x5c, 0x50, 0xd1, 0x41, 0x73, 0x54, 0xf5
121         }
122 };
123
124 /*
125  * There is a circular dependency involving blkvsc_request_completion()
126  * and blkvsc_do_request().
127  */
128 static void blkvsc_request_completion(struct hv_storvsc_request *request);
129
130 static int blkvsc_ringbuffer_size = BLKVSC_RING_BUFFER_SIZE;
131
132 module_param(blkvsc_ringbuffer_size, int, S_IRUGO);
133 MODULE_PARM_DESC(ring_size, "Ring buffer size (in bytes)");
134
135 /*
136  * There is a circular dependency involving blkvsc_probe()
137  * and block_ops.
138  */
139 static int blkvsc_probe(struct hv_device *dev);
140
141 static int blkvsc_device_add(struct hv_device *device,
142                                 void *additional_info)
143 {
144         struct storvsc_device_info *device_info;
145         int ret = 0;
146
147         device_info = (struct storvsc_device_info *)additional_info;
148
149         ret = storvsc_dev_add(device, additional_info);
150         if (ret != 0)
151                 return ret;
152
153         /*
154          * We need to use the device instance guid to set the path and target
155          * id. For IDE devices, the device instance id is formatted as
156          * <bus id> * - <device id> - 8899 - 000000000000.
157          */
158         device_info->path_id = device->dev_instance.data[3] << 24 |
159                              device->dev_instance.data[2] << 16 |
160                              device->dev_instance.data[1] << 8  |
161                              device->dev_instance.data[0];
162
163         device_info->target_id = device->dev_instance.data[5] << 8 |
164                                device->dev_instance.data[4];
165
166         return ret;
167 }
168
169 static int blkvsc_submit_request(struct blkvsc_request *blkvsc_req,
170                         void (*request_completion)(struct hv_storvsc_request *))
171 {
172         struct block_device_context *blkdev = blkvsc_req->dev;
173         struct hv_storvsc_request *storvsc_req;
174         struct vmscsi_request *vm_srb;
175         int ret;
176
177
178         storvsc_req = &blkvsc_req->request;
179         vm_srb = &storvsc_req->vstor_packet.vm_srb;
180
181         vm_srb->data_in = blkvsc_req->write ? WRITE_TYPE : READ_TYPE;
182
183         storvsc_req->on_io_completion = request_completion;
184         storvsc_req->context = blkvsc_req;
185
186         vm_srb->port_number = blkdev->port;
187         vm_srb->path_id = blkdev->path;
188         vm_srb->target_id = blkdev->target;
189         vm_srb->lun = 0;         /* this is not really used at all */
190
191         vm_srb->cdb_length = blkvsc_req->cmd_len;
192
193         memcpy(vm_srb->cdb, blkvsc_req->cmnd, vm_srb->cdb_length);
194
195         storvsc_req->sense_buffer = blkvsc_req->sense_buffer;
196
197         ret =  storvsc_do_io(blkdev->device_ctx,
198                                            &blkvsc_req->request);
199         if (ret == 0)
200                 blkdev->num_outstanding_reqs++;
201
202         return ret;
203 }
204
205
206 static int blkvsc_open(struct block_device *bdev, fmode_t mode)
207 {
208         struct block_device_context *blkdev = bdev->bd_disk->private_data;
209         unsigned long flags;
210
211         spin_lock_irqsave(&blkdev->lock, flags);
212
213         blkdev->users++;
214
215         spin_unlock_irqrestore(&blkdev->lock, flags);
216
217         return 0;
218 }
219
220
221 static int blkvsc_getgeo(struct block_device *bd, struct hd_geometry *hg)
222 {
223         sector_t nsect = get_capacity(bd->bd_disk);
224         sector_t cylinders = nsect;
225
226         /*
227          * We are making up these values; let us keep it simple.
228          */
229         hg->heads = 0xff;
230         hg->sectors = 0x3f;
231         sector_div(cylinders, hg->heads * hg->sectors);
232         hg->cylinders = cylinders;
233         if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
234                 hg->cylinders = 0xffff;
235         return 0;
236
237 }
238
239
240 static void blkvsc_init_rw(struct blkvsc_request *blkvsc_req)
241 {
242
243         blkvsc_req->cmd_len = 16;
244
245         if (rq_data_dir(blkvsc_req->req)) {
246                 blkvsc_req->write = 1;
247                 blkvsc_req->cmnd[0] = WRITE_16;
248         } else {
249                 blkvsc_req->write = 0;
250                 blkvsc_req->cmnd[0] = READ_16;
251         }
252
253         blkvsc_req->cmnd[1] |=
254         (blkvsc_req->req->cmd_flags & REQ_FUA) ? 0x8 : 0;
255
256         *(unsigned long long *)&blkvsc_req->cmnd[2] =
257         cpu_to_be64(blkvsc_req->sector_start);
258         *(unsigned int *)&blkvsc_req->cmnd[10] =
259         cpu_to_be32(blkvsc_req->sector_count);
260 }
261
262
263 static int blkvsc_ioctl(struct block_device *bd, fmode_t mode,
264                         unsigned cmd, unsigned long arg)
265 {
266         struct block_device_context *blkdev = bd->bd_disk->private_data;
267         int ret = 0;
268
269         switch (cmd) {
270         case HDIO_GET_IDENTITY:
271                 if (copy_to_user((void __user *)arg, blkdev->device_id,
272                                  blkdev->device_id_len))
273                         ret = -EFAULT;
274                 break;
275         default:
276                 ret = -EINVAL;
277                 break;
278         }
279
280         return ret;
281 }
282
283 static void blkvsc_cmd_completion(struct hv_storvsc_request *request)
284 {
285         struct blkvsc_request *blkvsc_req =
286                         (struct blkvsc_request *)request->context;
287         struct block_device_context *blkdev =
288                         (struct block_device_context *)blkvsc_req->dev;
289         struct scsi_sense_hdr sense_hdr;
290         struct vmscsi_request *vm_srb;
291         unsigned long flags;
292
293
294         vm_srb = &blkvsc_req->request.vstor_packet.vm_srb;
295
296         spin_lock_irqsave(&blkdev->lock, flags);
297         blkdev->num_outstanding_reqs--;
298         spin_unlock_irqrestore(&blkdev->lock, flags);
299
300         if (vm_srb->scsi_status)
301                 if (scsi_normalize_sense(blkvsc_req->sense_buffer,
302                                          SCSI_SENSE_BUFFERSIZE, &sense_hdr))
303                         scsi_print_sense_hdr("blkvsc", &sense_hdr);
304
305         complete(&blkvsc_req->request.wait_event);
306 }
307
308
309 static int blkvsc_do_operation(struct block_device_context *blkdev,
310                                 enum blkvsc_op_type op)
311 {
312         struct blkvsc_request *blkvsc_req;
313         struct page *page_buf;
314         unsigned char *buf;
315         unsigned char device_type;
316         struct scsi_sense_hdr sense_hdr;
317         struct vmscsi_request *vm_srb;
318         unsigned long flags;
319
320         int ret = 0;
321
322         blkvsc_req = kmem_cache_zalloc(blkdev->request_pool, GFP_KERNEL);
323         if (!blkvsc_req)
324                 return -ENOMEM;
325
326         page_buf = alloc_page(GFP_KERNEL);
327         if (!page_buf) {
328                 kmem_cache_free(blkvsc_req->dev->request_pool, blkvsc_req);
329                 return -ENOMEM;
330         }
331
332         vm_srb = &blkvsc_req->request.vstor_packet.vm_srb;
333         init_completion(&blkvsc_req->request.wait_event);
334         blkvsc_req->dev = blkdev;
335         blkvsc_req->req = NULL;
336         blkvsc_req->write = 0;
337
338         blkvsc_req->request.data_buffer.pfn_array[0] =
339         page_to_pfn(page_buf);
340         blkvsc_req->request.data_buffer.offset = 0;
341
342         switch (op) {
343         case DO_INQUIRY:
344                 blkvsc_req->cmnd[0] = INQUIRY;
345                 blkvsc_req->cmnd[1] = 0x1;              /* Get product data */
346                 blkvsc_req->cmnd[2] = 0x83;             /* mode page 83 */
347                 blkvsc_req->cmnd[4] = 64;
348                 blkvsc_req->cmd_len = 6;
349                 blkvsc_req->request.data_buffer.len = 64;
350                 break;
351
352         case DO_CAPACITY:
353                 blkdev->sector_size = 0;
354                 blkdev->capacity = 0;
355
356                 blkvsc_req->cmnd[0] = READ_CAPACITY;
357                 blkvsc_req->cmd_len = 16;
358                 blkvsc_req->request.data_buffer.len = 8;
359                 break;
360
361         case DO_FLUSH:
362                 blkvsc_req->cmnd[0] = SYNCHRONIZE_CACHE;
363                 blkvsc_req->cmd_len = 10;
364                 blkvsc_req->request.data_buffer.pfn_array[0] = 0;
365                 blkvsc_req->request.data_buffer.len = 0;
366                 break;
367         default:
368                 ret = -EINVAL;
369                 goto cleanup;
370         }
371
372         spin_lock_irqsave(&blkdev->lock, flags);
373         blkvsc_submit_request(blkvsc_req, blkvsc_cmd_completion);
374         spin_unlock_irqrestore(&blkdev->lock, flags);
375
376         wait_for_completion_interruptible(&blkvsc_req->request.wait_event);
377
378         /* check error */
379         if (vm_srb->scsi_status) {
380                 scsi_normalize_sense(blkvsc_req->sense_buffer,
381                                      SCSI_SENSE_BUFFERSIZE, &sense_hdr);
382
383                 return 0;
384         }
385
386         buf = kmap(page_buf);
387
388         switch (op) {
389         case DO_INQUIRY:
390                 device_type = buf[0] & 0x1F;
391
392                 if (device_type == 0x0)
393                         blkdev->device_type = HARDDISK_TYPE;
394                  else
395                         blkdev->device_type = UNKNOWN_DEV_TYPE;
396
397                 blkdev->device_id_len = buf[7];
398                 if (blkdev->device_id_len > 64)
399                         blkdev->device_id_len = 64;
400
401                 memcpy(blkdev->device_id, &buf[8], blkdev->device_id_len);
402                 break;
403
404         case DO_CAPACITY:
405                 /* be to le */
406                 blkdev->capacity =
407                 ((buf[0] << 24) | (buf[1] << 16) |
408                 (buf[2] << 8) | buf[3]) + 1;
409
410                 blkdev->sector_size =
411                 (buf[4] << 24) | (buf[5] << 16) |
412                 (buf[6] << 8) | buf[7];
413                 break;
414         default:
415                 break;
416
417         }
418
419 cleanup:
420
421         kunmap(page_buf);
422
423         __free_page(page_buf);
424
425         kmem_cache_free(blkvsc_req->dev->request_pool, blkvsc_req);
426
427         return ret;
428 }
429
430
431 static int blkvsc_cancel_pending_reqs(struct block_device_context *blkdev)
432 {
433         struct blkvsc_request *pend_req, *tmp;
434         struct blkvsc_request *comp_req, *tmp2;
435         struct vmscsi_request *vm_srb;
436
437         int ret = 0;
438
439
440         /* Flush the pending list first */
441         list_for_each_entry_safe(pend_req, tmp, &blkdev->pending_list,
442                                  pend_entry) {
443                 /*
444                  * The pend_req could be part of a partially completed
445                  * request. If so, complete those req first until we
446                  * hit the pend_req
447                  */
448                 list_for_each_entry_safe(comp_req, tmp2,
449                                          &pend_req->group->blkvsc_req_list,
450                                          req_entry) {
451
452                         if (comp_req == pend_req)
453                                 break;
454
455                         list_del(&comp_req->req_entry);
456
457                         if (comp_req->req) {
458                                 vm_srb =
459                                 &comp_req->request.vstor_packet.
460                                 vm_srb;
461                                 ret = __blk_end_request(comp_req->req,
462                                         (!vm_srb->scsi_status ? 0 : -EIO),
463                                         comp_req->sector_count *
464                                         blkdev->sector_size);
465
466                                 /* FIXME: shouldn't this do more than return? */
467                                 if (ret)
468                                         goto out;
469                         }
470
471                         kmem_cache_free(blkdev->request_pool, comp_req);
472                 }
473
474                 list_del(&pend_req->pend_entry);
475
476                 list_del(&pend_req->req_entry);
477
478                 if (comp_req->req) {
479                         if (!__blk_end_request(pend_req->req, -EIO,
480                                                pend_req->sector_count *
481                                                blkdev->sector_size)) {
482                                 /*
483                                  * All the sectors have been xferred ie the
484                                  * request is done
485                                  */
486                                 kmem_cache_free(blkdev->request_pool,
487                                                 pend_req->group);
488                         }
489                 }
490
491                 kmem_cache_free(blkdev->request_pool, pend_req);
492         }
493
494 out:
495         return ret;
496 }
497
498
499 /*
500  * blkvsc_remove() - Callback when our device is removed
501  */
502 static int blkvsc_remove(struct hv_device *dev)
503 {
504         struct block_device_context *blkdev = dev_get_drvdata(&dev->device);
505         unsigned long flags;
506
507
508         /* Get to a known state */
509         spin_lock_irqsave(&blkdev->lock, flags);
510
511         blkdev->shutting_down = 1;
512
513         blk_stop_queue(blkdev->gd->queue);
514
515         blkvsc_cancel_pending_reqs(blkdev);
516
517         spin_unlock_irqrestore(&blkdev->lock, flags);
518
519         blkvsc_do_operation(blkdev, DO_FLUSH);
520
521         blk_cleanup_queue(blkdev->gd->queue);
522
523         /*
524          * Call to the vsc driver to let it know that the device is being
525          * removed
526          */
527         storvsc_dev_remove(dev);
528
529         del_gendisk(blkdev->gd);
530
531         kmem_cache_destroy(blkdev->request_pool);
532
533         kfree(blkdev);
534
535         return 0;
536
537 }
538
539 static void blkvsc_shutdown(struct hv_device *dev)
540 {
541         struct block_device_context *blkdev = dev_get_drvdata(&dev->device);
542         unsigned long flags;
543
544         if (!blkdev)
545                 return;
546
547         spin_lock_irqsave(&blkdev->lock, flags);
548
549         blkdev->shutting_down = 1;
550
551         blk_stop_queue(blkdev->gd->queue);
552
553         blkvsc_cancel_pending_reqs(blkdev);
554
555         spin_unlock_irqrestore(&blkdev->lock, flags);
556
557         blkvsc_do_operation(blkdev, DO_FLUSH);
558
559         /*
560          * Now wait for all outgoing I/O to be drained.
561          */
562         storvsc_wait_to_drain((struct storvsc_device *)dev->ext);
563
564 }
565
566 static int blkvsc_release(struct gendisk *disk, fmode_t mode)
567 {
568         struct block_device_context *blkdev = disk->private_data;
569         unsigned long flags;
570
571         if (blkdev->users == 1) {
572                 blkvsc_do_operation(blkdev, DO_FLUSH);
573         }
574
575         spin_lock_irqsave(&blkdev->lock, flags);
576         blkdev->users--;
577         spin_unlock_irqrestore(&blkdev->lock, flags);
578
579         return 0;
580 }
581
582
583 /*
584  * We break the request into 1 or more blkvsc_requests and submit
585  * them.  If we cant submit them all, we put them on the
586  * pending_list. The blkvsc_request() will work on the pending_list.
587  */
588 static int blkvsc_do_request(struct block_device_context *blkdev,
589                              struct request *req)
590 {
591         struct bio *bio = NULL;
592         struct bio_vec *bvec = NULL;
593         struct bio_vec *prev_bvec = NULL;
594         struct blkvsc_request *blkvsc_req = NULL;
595         struct blkvsc_request *tmp;
596         int databuf_idx = 0;
597         int seg_idx = 0;
598         sector_t start_sector;
599         unsigned long num_sectors = 0;
600         int ret = 0;
601         int pending = 0;
602         struct blkvsc_request_group *group = NULL;
603
604         /* Create a group to tie req to list of blkvsc_reqs */
605         group = kmem_cache_zalloc(blkdev->request_pool, GFP_ATOMIC);
606         if (!group)
607                 return -ENOMEM;
608
609         INIT_LIST_HEAD(&group->blkvsc_req_list);
610         group->outstanding = group->status = 0;
611
612         start_sector = blk_rq_pos(req);
613
614         /* foreach bio in the request */
615         if (req->bio) {
616                 for (bio = req->bio; bio; bio = bio->bi_next) {
617                         /*
618                          * Map this bio into an existing or new storvsc request
619                          */
620                         bio_for_each_segment(bvec, bio, seg_idx) {
621                                 /* Get a new storvsc request */
622                                 /* 1st-time */
623                                 if ((!blkvsc_req) ||
624                                     (databuf_idx >= MAX_MULTIPAGE_BUFFER_COUNT)
625                                     /* hole at the begin of page */
626                                     || (bvec->bv_offset != 0) ||
627                                     /* hold at the end of page */
628                                     (prev_bvec &&
629                                      (prev_bvec->bv_len != PAGE_SIZE))) {
630                                         /* submit the prev one */
631                                         if (blkvsc_req) {
632                                                 blkvsc_req->sector_start =
633                                                 start_sector;
634                                                 sector_div(
635                                                 blkvsc_req->sector_start,
636                                                 (blkdev->sector_size >> 9));
637
638                                                 blkvsc_req->sector_count =
639                                                 num_sectors /
640                                                 (blkdev->sector_size >> 9);
641                                                 blkvsc_init_rw(blkvsc_req);
642                                         }
643
644                                         /*
645                                          * Create new blkvsc_req to represent
646                                          * the current bvec
647                                          */
648                                         blkvsc_req =
649                                         kmem_cache_zalloc(
650                                         blkdev->request_pool, GFP_ATOMIC);
651                                         if (!blkvsc_req) {
652                                                 /* free up everything */
653                                                 list_for_each_entry_safe(
654                                                         blkvsc_req, tmp,
655                                                         &group->blkvsc_req_list,
656                                                         req_entry) {
657                                                         list_del(
658                                                         &blkvsc_req->req_entry);
659                                                         kmem_cache_free(
660                                                         blkdev->request_pool,
661                                                         blkvsc_req);
662                                                 }
663
664                                                 kmem_cache_free(
665                                                 blkdev->request_pool, group);
666                                                 return -ENOMEM;
667                                         }
668
669                                         memset(blkvsc_req, 0,
670                                                sizeof(struct blkvsc_request));
671
672                                         blkvsc_req->dev = blkdev;
673                                         blkvsc_req->req = req;
674                                         blkvsc_req->request.
675                                         data_buffer.offset
676                                         = bvec->bv_offset;
677                                         blkvsc_req->request.
678                                         data_buffer.len = 0;
679
680                                         /* Add to the group */
681                                         blkvsc_req->group = group;
682                                         blkvsc_req->group->outstanding++;
683                                         list_add_tail(&blkvsc_req->req_entry,
684                                         &blkvsc_req->group->blkvsc_req_list);
685
686                                         start_sector += num_sectors;
687                                         num_sectors = 0;
688                                         databuf_idx = 0;
689                                 }
690
691                                 /*
692                                  * Add the curr bvec/segment to the curr
693                                  * blkvsc_req
694                                  */
695                                 blkvsc_req->request.data_buffer.
696                                         pfn_array[databuf_idx]
697                                                 = page_to_pfn(bvec->bv_page);
698                                 blkvsc_req->request.data_buffer.len
699                                         += bvec->bv_len;
700
701                                 prev_bvec = bvec;
702
703                                 databuf_idx++;
704                                 num_sectors += bvec->bv_len >> 9;
705
706                         } /* bio_for_each_segment */
707
708                 } /* rq_for_each_bio */
709         }
710
711         /* Handle the last one */
712         if (blkvsc_req) {
713                 blkvsc_req->sector_start = start_sector;
714                 sector_div(blkvsc_req->sector_start,
715                            (blkdev->sector_size >> 9));
716
717                 blkvsc_req->sector_count = num_sectors /
718                                            (blkdev->sector_size >> 9);
719
720                 blkvsc_init_rw(blkvsc_req);
721         }
722
723         list_for_each_entry(blkvsc_req, &group->blkvsc_req_list, req_entry) {
724                 if (pending) {
725
726                         list_add_tail(&blkvsc_req->pend_entry,
727                                       &blkdev->pending_list);
728                 } else {
729                         ret = blkvsc_submit_request(blkvsc_req,
730                                                     blkvsc_request_completion);
731                         if (ret == -1) {
732                                 pending = 1;
733                                 list_add_tail(&blkvsc_req->pend_entry,
734                                               &blkdev->pending_list);
735                         }
736
737                 }
738         }
739
740         return pending;
741 }
742
743 static int blkvsc_do_pending_reqs(struct block_device_context *blkdev)
744 {
745         struct blkvsc_request *pend_req, *tmp;
746         int ret = 0;
747
748         /* Flush the pending list first */
749         list_for_each_entry_safe(pend_req, tmp, &blkdev->pending_list,
750                                  pend_entry) {
751
752                 ret = blkvsc_submit_request(pend_req,
753                                             blkvsc_request_completion);
754                 if (ret != 0)
755                         break;
756                 else
757                         list_del(&pend_req->pend_entry);
758         }
759
760         return ret;
761 }
762
763
764 static void blkvsc_request(struct request_queue *queue)
765 {
766         struct block_device_context *blkdev = NULL;
767         struct request *req;
768         int ret = 0;
769
770         while ((req = blk_peek_request(queue)) != NULL) {
771
772                 blkdev = req->rq_disk->private_data;
773                 if (blkdev->shutting_down || req->cmd_type != REQ_TYPE_FS) {
774                         __blk_end_request_cur(req, 0);
775                         continue;
776                 }
777
778                 ret = blkvsc_do_pending_reqs(blkdev);
779
780                 if (ret != 0) {
781                         blk_stop_queue(queue);
782                         break;
783                 }
784
785                 blk_start_request(req);
786
787                 ret = blkvsc_do_request(blkdev, req);
788                 if (ret > 0) {
789                         blk_stop_queue(queue);
790                         break;
791                 } else if (ret < 0) {
792                         blk_requeue_request(queue, req);
793                         blk_stop_queue(queue);
794                         break;
795                 }
796         }
797 }
798
799
800
801 /* The one and only one */
802 static  struct storvsc_driver blkvsc_drv = {
803         .base.probe =  blkvsc_probe,
804         .base.remove =  blkvsc_remove,
805         .base.shutdown = blkvsc_shutdown,
806 };
807
808 static const struct block_device_operations block_ops = {
809         .owner = THIS_MODULE,
810         .open = blkvsc_open,
811         .release = blkvsc_release,
812         .getgeo = blkvsc_getgeo,
813         .ioctl  = blkvsc_ioctl,
814 };
815
816 /*
817  * blkvsc_drv_init -  BlkVsc driver initialization.
818  */
819 static int blkvsc_drv_init(void)
820 {
821         struct storvsc_driver *storvsc_drv = &blkvsc_drv;
822         struct hv_driver *drv = &blkvsc_drv.base;
823         int ret;
824
825         BUILD_BUG_ON(sizeof(sector_t) != 8);
826
827         storvsc_drv->ring_buffer_size = blkvsc_ringbuffer_size;
828
829         memcpy(&drv->dev_type, &dev_type, sizeof(struct hv_guid));
830         drv->name = drv_name;
831         drv->driver.name = drv_name;
832
833         /* The driver belongs to vmbus */
834         ret = vmbus_child_driver_register(&drv->driver);
835
836         return ret;
837 }
838
839 static int blkvsc_drv_exit_cb(struct device *dev, void *data)
840 {
841         struct device **curr = (struct device **)data;
842         *curr = dev;
843         return 1; /* stop iterating */
844 }
845
846 static void blkvsc_drv_exit(void)
847 {
848         struct hv_driver *drv = &blkvsc_drv.base;
849         struct device *current_dev;
850         int ret;
851
852         while (1) {
853                 current_dev = NULL;
854
855                 /* Get the device */
856                 ret = driver_for_each_device(&drv->driver, NULL,
857                                              (void *) &current_dev,
858                                              blkvsc_drv_exit_cb);
859
860                 if (ret)
861                         DPRINT_WARN(BLKVSC_DRV,
862                                     "driver_for_each_device returned %d", ret);
863
864
865                 if (current_dev == NULL)
866                         break;
867
868                 /* Initiate removal from the top-down */
869                 device_unregister(current_dev);
870         }
871
872         vmbus_child_driver_unregister(&drv->driver);
873
874         return;
875 }
876
877 /*
878  * blkvsc_probe - Add a new device for this driver
879  */
880 static int blkvsc_probe(struct hv_device *dev)
881 {
882         struct block_device_context *blkdev = NULL;
883         struct storvsc_device_info device_info;
884         struct storvsc_major_info major_info;
885         int ret = 0;
886
887         blkdev = kzalloc(sizeof(struct block_device_context), GFP_KERNEL);
888         if (!blkdev) {
889                 ret = -ENOMEM;
890                 goto cleanup;
891         }
892
893         INIT_LIST_HEAD(&blkdev->pending_list);
894
895         /* Initialize what we can here */
896         spin_lock_init(&blkdev->lock);
897
898
899         blkdev->request_pool = kmem_cache_create(dev_name(&dev->device),
900                                         sizeof(struct blkvsc_request), 0,
901                                         SLAB_HWCACHE_ALIGN, NULL);
902         if (!blkdev->request_pool) {
903                 ret = -ENOMEM;
904                 goto cleanup;
905         }
906
907
908         ret = blkvsc_device_add(dev, &device_info);
909         if (ret != 0)
910                 goto cleanup;
911
912         blkdev->device_ctx = dev;
913         /* this identified the device 0 or 1 */
914         blkdev->target = device_info.target_id;
915         /* this identified the ide ctrl 0 or 1 */
916         blkdev->path = device_info.path_id;
917
918         dev_set_drvdata(&dev->device, blkdev);
919
920         ret = storvsc_get_major_info(&device_info, &major_info);
921
922         if (ret)
923                 goto cleanup;
924
925         if (major_info.do_register) {
926                 ret = register_blkdev(major_info.major, major_info.devname);
927
928                 if (ret != 0) {
929                         DPRINT_ERR(BLKVSC_DRV,
930                                    "register_blkdev() failed! ret %d", ret);
931                         goto remove;
932                 }
933         }
934
935         DPRINT_INFO(BLKVSC_DRV, "blkvsc registered for major %d!!",
936                         major_info.major);
937
938         blkdev->gd = alloc_disk(BLKVSC_MINORS);
939         if (!blkdev->gd) {
940                 ret = -1;
941                 goto cleanup;
942         }
943
944         blkdev->gd->queue = blk_init_queue(blkvsc_request, &blkdev->lock);
945
946         blk_queue_max_segment_size(blkdev->gd->queue, PAGE_SIZE);
947         blk_queue_max_segments(blkdev->gd->queue, MAX_MULTIPAGE_BUFFER_COUNT);
948         blk_queue_segment_boundary(blkdev->gd->queue, PAGE_SIZE-1);
949         blk_queue_bounce_limit(blkdev->gd->queue, BLK_BOUNCE_ANY);
950         blk_queue_dma_alignment(blkdev->gd->queue, 511);
951
952         blkdev->gd->major = major_info.major;
953         if (major_info.index == 1 || major_info.index == 3)
954                 blkdev->gd->first_minor = BLKVSC_MINORS;
955         else
956                 blkdev->gd->first_minor = 0;
957         blkdev->gd->fops = &block_ops;
958         blkdev->gd->events = DISK_EVENT_MEDIA_CHANGE;
959         blkdev->gd->private_data = blkdev;
960         blkdev->gd->driverfs_dev = &(blkdev->device_ctx->device);
961         sprintf(blkdev->gd->disk_name, "hd%c", 'a' + major_info.index);
962
963         blkvsc_do_operation(blkdev, DO_INQUIRY);
964         blkvsc_do_operation(blkdev, DO_CAPACITY);
965
966         set_capacity(blkdev->gd, blkdev->capacity * (blkdev->sector_size/512));
967         blk_queue_logical_block_size(blkdev->gd->queue, blkdev->sector_size);
968         /* go! */
969         add_disk(blkdev->gd);
970
971         DPRINT_INFO(BLKVSC_DRV, "%s added!! capacity %lu sector_size %d",
972                     blkdev->gd->disk_name, (unsigned long)blkdev->capacity,
973                     blkdev->sector_size);
974
975         return ret;
976
977 remove:
978         storvsc_dev_remove(dev);
979
980 cleanup:
981         if (blkdev) {
982                 if (blkdev->request_pool) {
983                         kmem_cache_destroy(blkdev->request_pool);
984                         blkdev->request_pool = NULL;
985                 }
986                 kfree(blkdev);
987                 blkdev = NULL;
988         }
989
990         return ret;
991 }
992
993 static void blkvsc_request_completion(struct hv_storvsc_request *request)
994 {
995         struct blkvsc_request *blkvsc_req =
996                         (struct blkvsc_request *)request->context;
997         struct block_device_context *blkdev =
998                         (struct block_device_context *)blkvsc_req->dev;
999         unsigned long flags;
1000         struct blkvsc_request *comp_req, *tmp;
1001         struct vmscsi_request *vm_srb;
1002
1003
1004         spin_lock_irqsave(&blkdev->lock, flags);
1005
1006         blkdev->num_outstanding_reqs--;
1007         blkvsc_req->group->outstanding--;
1008
1009         /*
1010          * Only start processing when all the blkvsc_reqs are
1011          * completed. This guarantees no out-of-order blkvsc_req
1012          * completion when calling end_that_request_first()
1013          */
1014         if (blkvsc_req->group->outstanding == 0) {
1015                 list_for_each_entry_safe(comp_req, tmp,
1016                                          &blkvsc_req->group->blkvsc_req_list,
1017                                          req_entry) {
1018
1019                         list_del(&comp_req->req_entry);
1020
1021                         vm_srb =
1022                         &comp_req->request.vstor_packet.vm_srb;
1023                         if (!__blk_end_request(comp_req->req,
1024                                 (!vm_srb->scsi_status ? 0 : -EIO),
1025                                 comp_req->sector_count * blkdev->sector_size)) {
1026                                 /*
1027                                  * All the sectors have been xferred ie the
1028                                  * request is done
1029                                  */
1030                                 kmem_cache_free(blkdev->request_pool,
1031                                                 comp_req->group);
1032                         }
1033
1034                         kmem_cache_free(blkdev->request_pool, comp_req);
1035                 }
1036
1037                 if (!blkdev->shutting_down) {
1038                         blkvsc_do_pending_reqs(blkdev);
1039                         blk_start_queue(blkdev->gd->queue);
1040                         blkvsc_request(blkdev->gd->queue);
1041                 }
1042         }
1043
1044         spin_unlock_irqrestore(&blkdev->lock, flags);
1045 }
1046
1047 static int __init blkvsc_init(void)
1048 {
1049         int ret;
1050
1051         ret = blkvsc_drv_init();
1052
1053         return ret;
1054 }
1055
1056 static void __exit blkvsc_exit(void)
1057 {
1058         blkvsc_drv_exit();
1059 }
1060
1061 MODULE_LICENSE("GPL");
1062 MODULE_VERSION(HV_DRV_VERSION);
1063 MODULE_DESCRIPTION("Microsoft Hyper-V virtual block driver");
1064 module_init(blkvsc_init);
1065 module_exit(blkvsc_exit);