drivers/virtio/virtio_pci.c

   1 /*
   2  * Virtio PCI driver
   3  *
   4  * This module allows virtio devices to be used over a virtual PCI device.
   5  * This can be used with QEMU based VMMs like KVM or Xen.
   6  *
   7  * Copyright IBM Corp. 2007
   8  *
   9  * Authors:
  10  *  Anthony Liguori  <aliguori@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <linux/module.h>
  18 #include <linux/list.h>
  19 #include <linux/pci.h>
  20 #include <linux/slab.h>
  21 #include <linux/interrupt.h>
  22 #include <linux/virtio.h>
  23 #include <linux/virtio_config.h>
  24 #include <linux/virtio_ring.h>
  25 #include <linux/virtio_pci.h>
  26 #include <linux/highmem.h>
  27 #include <linux/spinlock.h>
  28
  29 MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
  30 MODULE_DESCRIPTION("virtio-pci");
  31 MODULE_LICENSE("GPL");
  32 MODULE_VERSION("1");
  33
  34 struct virtio_pci_vq_info {
  35         /* the actual virtqueue */
  36         struct virtqueue *vq;
  37
  38         /* the number of entries in the queue */
  39         int num;
  40
  41         /* the virtual address of the ring queue */
  42         void *queue;
  43
  44         /* the list node for the virtqueues list */
  45         struct list_head node;
  46
  47         /* MSI-X vector (or none) */
  48         unsigned msix_vector;
  49 };
  50
  51 /* Our device structure */
  52 struct virtio_pci_device {
  53         struct virtio_device vdev;
  54         struct pci_dev *pci_dev;
  55
  56         /* the IO mapping for the PCI config space */
  57         void __iomem *ioaddr;
  58
  59         /* the IO mapping for ISR operation */
  60         void __iomem *isr;
  61
  62         /* a list of queues so we can dispatch IRQs */
  63         spinlock_t lock;
  64         struct list_head virtqueues;
  65
  66         /* array of all queues for house-keeping */
  67         struct virtio_pci_vq_info **vqs;
  68
  69         /* MSI-X support */
  70         int msix_enabled;
  71         int intx_enabled;
  72         struct msix_entry *msix_entries;
  73         cpumask_var_t *msix_affinity_masks;
  74         /* Name strings for interrupts. This size should be enough,
  75          * and I'm too lazy to allocate each name separately. */
  76         char (*msix_names)[256];
  77         /* Number of available vectors */
  78         unsigned msix_vectors;
  79         /* Vectors allocated, excluding per-vq vectors if any */
  80         unsigned msix_used_vectors;
  81
  82         /* Whether we have vector per vq */
  83         bool per_vq_vectors;
  84
  85         struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
  86                                       struct virtio_pci_vq_info *info,
  87                                       unsigned idx,
  88                                       void (*callback)(struct virtqueue *vq),
  89                                       const char *name,
  90                                       u16 msix_vec);
  91         void (*del_vq)(struct virtio_pci_vq_info *info);
  92 };
  93
  94 /* Constants for MSI-X */
  95 /* Use first vector for configuration changes, second and the rest for
  96  * virtqueues Thus, we need at least 2 vectors for MSI. */
  97 enum {
  98         VP_MSIX_CONFIG_VECTOR = 0,
  99         VP_MSIX_VQ_VECTOR = 1,
 100 };
 101
 102 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
 103 static const struct pci_device_id virtio_pci_id_table[] = {
 104         { PCI_DEVICE(0x1af4, PCI_ANY_ID) },
 105         { 0 }
 106 };
 107
 108 MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
 109
 110 /* Convert a generic virtio device to our structure */
 111 static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 112 {
 113         return container_of(vdev, struct virtio_pci_device, vdev);
 114 }
 115
 116 /* virtio config->get_features() implementation */
 117 static u64 vp_get_features(struct virtio_device *vdev)
 118 {
 119         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 120
 121         /* When someone needs more than 32 feature bits, we'll need to
 122          * steal a bit to indicate that the rest are somewhere else. */
 123         return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
 124 }
 125
 126 /* virtio config->finalize_features() implementation */
 127 static int vp_finalize_features(struct virtio_device *vdev)
 128 {
 129         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 130
 131         /* Give virtio_ring a chance to accept features. */
 132         vring_transport_features(vdev);
 133
 134         /* Make sure we don't have any features > 32 bits! */
 135         BUG_ON((u32)vdev->features != vdev->features);
 136
 137         /* We only support 32 feature bits. */
 138         iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
 139
 140         return 0;
 141 }
 142
 143 /* virtio config->get() implementation */
 144 static void vp_get(struct virtio_device *vdev, unsigned offset,
 145                    void *buf, unsigned len)
 146 {
 147         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 148         void __iomem *ioaddr = vp_dev->ioaddr +
 149                                 VIRTIO_PCI_CONFIG(vp_dev) + offset;
 150         u8 *ptr = buf;
 151         int i;
 152
 153         for (i = 0; i < len; i++)
 154                 ptr[i] = ioread8(ioaddr + i);
 155 }
 156
 157 /* the config->set() implementation.  it's symmetric to the config->get()
 158  * implementation */
 159 static void vp_set(struct virtio_device *vdev, unsigned offset,
 160                    const void *buf, unsigned len)
 161 {
 162         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 163         void __iomem *ioaddr = vp_dev->ioaddr +
 164                                 VIRTIO_PCI_CONFIG(vp_dev) + offset;
 165         const u8 *ptr = buf;
 166         int i;
 167
 168         for (i = 0; i < len; i++)
 169                 iowrite8(ptr[i], ioaddr + i);
 170 }
 171
 172 /* config->{get,set}_status() implementations */
 173 static u8 vp_get_status(struct virtio_device *vdev)
 174 {
 175         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 176         return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 177 }
 178
 179 static void vp_set_status(struct virtio_device *vdev, u8 status)
 180 {
 181         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 182         /* We should never be setting status to 0. */
 183         BUG_ON(status == 0);
 184         iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 185 }
 186
 187 /* wait for pending irq handlers */
 188 static void vp_synchronize_vectors(struct virtio_device *vdev)
 189 {
 190         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 191         int i;
 192
 193         if (vp_dev->intx_enabled)
 194                 synchronize_irq(vp_dev->pci_dev->irq);
 195
 196         for (i = 0; i < vp_dev->msix_vectors; ++i)
 197                 synchronize_irq(vp_dev->msix_entries[i].vector);
 198 }
 199
 200 static void vp_reset(struct virtio_device *vdev)
 201 {
 202         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 203         /* 0 status means a reset. */
 204         iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 205         /* Flush out the status write, and flush in device writes,
 206          * including MSi-X interrupts, if any. */
 207         ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 208         /* Flush pending VQ/configuration callbacks. */
 209         vp_synchronize_vectors(vdev);
 210 }
 211
 212 /* the notify function used when creating a virt queue */
 213 static bool vp_notify(struct virtqueue *vq)
 214 {
 215         /* we write the queue's selector into the notification register to
 216          * signal the other end */
 217         iowrite16(vq->index, (void __iomem *)vq->priv);
 218         return true;
 219 }
 220
 221 /* Handle a configuration change: Tell driver if it wants to know. */
 222 static irqreturn_t vp_config_changed(int irq, void *opaque)
 223 {
 224         struct virtio_pci_device *vp_dev = opaque;
 225
 226         virtio_config_changed(&vp_dev->vdev);
 227         return IRQ_HANDLED;
 228 }
 229
 230 /* Notify all virtqueues on an interrupt. */
 231 static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
 232 {
 233         struct virtio_pci_device *vp_dev = opaque;
 234         struct virtio_pci_vq_info *info;
 235         irqreturn_t ret = IRQ_NONE;
 236         unsigned long flags;
 237
 238         spin_lock_irqsave(&vp_dev->lock, flags);
 239         list_for_each_entry(info, &vp_dev->virtqueues, node) {
 240                 if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
 241                         ret = IRQ_HANDLED;
 242         }
 243         spin_unlock_irqrestore(&vp_dev->lock, flags);
 244
 245         return ret;
 246 }
 247
 248 /* A small wrapper to also acknowledge the interrupt when it's handled.
 249  * I really need an EIO hook for the vring so I can ack the interrupt once we
 250  * know that we'll be handling the IRQ but before we invoke the callback since
 251  * the callback may notify the host which results in the host attempting to
 252  * raise an interrupt that we would then mask once we acknowledged the
 253  * interrupt. */
 254 static irqreturn_t vp_interrupt(int irq, void *opaque)
 255 {
 256         struct virtio_pci_device *vp_dev = opaque;
 257         u8 isr;
 258
 259         /* reading the ISR has the effect of also clearing it so it's very
 260          * important to save off the value. */
 261         isr = ioread8(vp_dev->isr);
 262
 263         /* It's definitely not us if the ISR was not high */
 264         if (!isr)
 265                 return IRQ_NONE;
 266
 267         /* Configuration change?  Tell driver if it wants to know. */
 268         if (isr & VIRTIO_PCI_ISR_CONFIG)
 269                 vp_config_changed(irq, opaque);
 270
 271         return vp_vring_interrupt(irq, opaque);
 272 }
 273
 274 static void vp_free_vectors(struct virtio_device *vdev)
 275 {
 276         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 277         int i;
 278
 279         if (vp_dev->intx_enabled) {
 280                 free_irq(vp_dev->pci_dev->irq, vp_dev);
 281                 vp_dev->intx_enabled = 0;
 282         }
 283
 284         for (i = 0; i < vp_dev->msix_used_vectors; ++i)
 285                 free_irq(vp_dev->msix_entries[i].vector, vp_dev);
 286
 287         for (i = 0; i < vp_dev->msix_vectors; i++)
 288                 if (vp_dev->msix_affinity_masks[i])
 289                         free_cpumask_var(vp_dev->msix_affinity_masks[i]);
 290
 291         if (vp_dev->msix_enabled) {
 292                 /* Disable the vector used for configuration */
 293                 iowrite16(VIRTIO_MSI_NO_VECTOR,
 294                           vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 295                 /* Flush the write out to device */
 296                 ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 297
 298                 pci_disable_msix(vp_dev->pci_dev);
 299                 vp_dev->msix_enabled = 0;
 300         }
 301
 302         vp_dev->msix_vectors = 0;
 303         vp_dev->msix_used_vectors = 0;
 304         kfree(vp_dev->msix_names);
 305         vp_dev->msix_names = NULL;
 306         kfree(vp_dev->msix_entries);
 307         vp_dev->msix_entries = NULL;
 308         kfree(vp_dev->msix_affinity_masks);
 309         vp_dev->msix_affinity_masks = NULL;
 310 }
 311
 312 static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 313                                    bool per_vq_vectors)
 314 {
 315         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 316         const char *name = dev_name(&vp_dev->vdev.dev);
 317         unsigned i, v;
 318         int err = -ENOMEM;
 319
 320         vp_dev->msix_vectors = nvectors;
 321
 322         vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
 323                                        GFP_KERNEL);
 324         if (!vp_dev->msix_entries)
 325                 goto error;
 326         vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
 327                                      GFP_KERNEL);
 328         if (!vp_dev->msix_names)
 329                 goto error;
 330         vp_dev->msix_affinity_masks
 331                 = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
 332                           GFP_KERNEL);
 333         if (!vp_dev->msix_affinity_masks)
 334                 goto error;
 335         for (i = 0; i < nvectors; ++i)
 336                 if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
 337                                         GFP_KERNEL))
 338                         goto error;
 339
 340         for (i = 0; i < nvectors; ++i)
 341                 vp_dev->msix_entries[i].entry = i;
 342
 343         err = pci_enable_msix_exact(vp_dev->pci_dev,
 344                                     vp_dev->msix_entries, nvectors);
 345         if (err)
 346                 goto error;
 347         vp_dev->msix_enabled = 1;
 348
 349         /* Set the vector used for configuration */
 350         v = vp_dev->msix_used_vectors;
 351         snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 352                  "%s-config", name);
 353         err = request_irq(vp_dev->msix_entries[v].vector,
 354                           vp_config_changed, 0, vp_dev->msix_names[v],
 355                           vp_dev);
 356         if (err)
 357                 goto error;
 358         ++vp_dev->msix_used_vectors;
 359
 360         iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 361         /* Verify we had enough resources to assign the vector */
 362         v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 363         if (v == VIRTIO_MSI_NO_VECTOR) {
 364                 err = -EBUSY;
 365                 goto error;
 366         }
 367
 368         if (!per_vq_vectors) {
 369                 /* Shared vector for all VQs */
 370                 v = vp_dev->msix_used_vectors;
 371                 snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 372                          "%s-virtqueues", name);
 373                 err = request_irq(vp_dev->msix_entries[v].vector,
 374                                   vp_vring_interrupt, 0, vp_dev->msix_names[v],
 375                                   vp_dev);
 376                 if (err)
 377                         goto error;
 378                 ++vp_dev->msix_used_vectors;
 379         }
 380         return 0;
 381 error:
 382         vp_free_vectors(vdev);
 383         return err;
 384 }
 385
 386 static int vp_request_intx(struct virtio_device *vdev)
 387 {
 388         int err;
 389         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 390
 391         err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
 392                           IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
 393         if (!err)
 394                 vp_dev->intx_enabled = 1;
 395         return err;
 396 }
 397
 398 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
 399                                   struct virtio_pci_vq_info *info,
 400                                   unsigned index,
 401                                   void (*callback)(struct virtqueue *vq),
 402                                   const char *name,
 403                                   u16 msix_vec)
 404 {
 405         struct virtqueue *vq;
 406         unsigned long size;
 407         u16 num;
 408         int err;
 409
 410         /* Select the queue we're interested in */
 411         iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 412
 413         /* Check if queue is either not available or already active. */
 414         num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
 415         if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
 416                 return ERR_PTR(-ENOENT);
 417
 418         info->num = num;
 419         info->msix_vector = msix_vec;
 420
 421         size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 422         info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
 423         if (info->queue == NULL)
 424                 return ERR_PTR(-ENOMEM);
 425
 426         /* activate the queue */
 427         iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
 428                   vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 429
 430         /* create the vring */
 431         vq = vring_new_virtqueue(index, info->num,
 432                                  VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
 433                                  true, info->queue, vp_notify, callback, name);
 434         if (!vq) {
 435                 err = -ENOMEM;
 436                 goto out_activate_queue;
 437         }
 438
 439         vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
 440
 441         if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
 442                 iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 443                 msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 444                 if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 445                         err = -EBUSY;
 446                         goto out_assign;
 447                 }
 448         }
 449
 450         return vq;
 451
 452 out_assign:
 453         vring_del_virtqueue(vq);
 454 out_activate_queue:
 455         iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 456         free_pages_exact(info->queue, size);
 457         return ERR_PTR(err);
 458 }
 459
 460 static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
 461                                      void (*callback)(struct virtqueue *vq),
 462                                      const char *name,
 463                                      u16 msix_vec)
 464 {
 465         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 466         struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
 467         struct virtqueue *vq;
 468         unsigned long flags;
 469
 470         /* fill out our structure that represents an active queue */
 471         if (!info)
 472                 return ERR_PTR(-ENOMEM);
 473
 474         vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec);
 475         if (IS_ERR(vq))
 476                 goto out_info;
 477
 478         info->vq = vq;
 479         if (callback) {
 480                 spin_lock_irqsave(&vp_dev->lock, flags);
 481                 list_add(&info->node, &vp_dev->virtqueues);
 482                 spin_unlock_irqrestore(&vp_dev->lock, flags);
 483         } else {
 484                 INIT_LIST_HEAD(&info->node);
 485         }
 486
 487         vp_dev->vqs[index] = info;
 488         return vq;
 489
 490 out_info:
 491         kfree(info);
 492         return vq;
 493 }
 494
 495 static void del_vq(struct virtio_pci_vq_info *info)
 496 {
 497         struct virtqueue *vq = info->vq;
 498         struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 499         unsigned long size;
 500
 501         iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 502
 503         if (vp_dev->msix_enabled) {
 504                 iowrite16(VIRTIO_MSI_NO_VECTOR,
 505                           vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 506                 /* Flush the write out to device */
 507                 ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
 508         }
 509
 510         vring_del_virtqueue(vq);
 511
 512         /* Select and deactivate the queue */
 513         iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 514
 515         size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
 516         free_pages_exact(info->queue, size);
 517 }
 518
 519 static void vp_del_vq(struct virtqueue *vq)
 520 {
 521         struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 522         struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
 523         unsigned long flags;
 524
 525         spin_lock_irqsave(&vp_dev->lock, flags);
 526         list_del(&info->node);
 527         spin_unlock_irqrestore(&vp_dev->lock, flags);
 528
 529         vp_dev->del_vq(info);
 530         kfree(info);
 531 }
 532
 533 /* the config->del_vqs() implementation */
 534 static void vp_del_vqs(struct virtio_device *vdev)
 535 {
 536         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 537         struct virtqueue *vq, *n;
 538         struct virtio_pci_vq_info *info;
 539
 540         list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
 541                 info = vp_dev->vqs[vq->index];
 542                 if (vp_dev->per_vq_vectors &&
 543                         info->msix_vector != VIRTIO_MSI_NO_VECTOR)
 544                         free_irq(vp_dev->msix_entries[info->msix_vector].vector,
 545                                  vq);
 546                 vp_del_vq(vq);
 547         }
 548         vp_dev->per_vq_vectors = false;
 549
 550         vp_free_vectors(vdev);
 551         kfree(vp_dev->vqs);
 552 }
 553
 554 static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 555                               struct virtqueue *vqs[],
 556                               vq_callback_t *callbacks[],
 557                               const char *names[],
 558                               bool use_msix,
 559                               bool per_vq_vectors)
 560 {
 561         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 562         u16 msix_vec;
 563         int i, err, nvectors, allocated_vectors;
 564
 565         vp_dev->vqs = kmalloc(nvqs * sizeof *vp_dev->vqs, GFP_KERNEL);
 566         if (!vp_dev->vqs)
 567                 return -ENOMEM;
 568
 569         if (!use_msix) {
 570                 /* Old style: one normal interrupt for change and all vqs. */
 571                 err = vp_request_intx(vdev);
 572                 if (err)
 573                         goto error_find;
 574         } else {
 575                 if (per_vq_vectors) {
 576                         /* Best option: one for change interrupt, one per vq. */
 577                         nvectors = 1;
 578                         for (i = 0; i < nvqs; ++i)
 579                                 if (callbacks[i])
 580                                         ++nvectors;
 581                 } else {
 582                         /* Second best: one for change, shared for all vqs. */
 583                         nvectors = 2;
 584                 }
 585
 586                 err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
 587                 if (err)
 588                         goto error_find;
 589         }
 590
 591         vp_dev->per_vq_vectors = per_vq_vectors;
 592         allocated_vectors = vp_dev->msix_used_vectors;
 593         for (i = 0; i < nvqs; ++i) {
 594                 if (!names[i]) {
 595                         vqs[i] = NULL;
 596                         continue;
 597                 } else if (!callbacks[i] || !vp_dev->msix_enabled)
 598                         msix_vec = VIRTIO_MSI_NO_VECTOR;
 599                 else if (vp_dev->per_vq_vectors)
 600                         msix_vec = allocated_vectors++;
 601                 else
 602                         msix_vec = VP_MSIX_VQ_VECTOR;
 603                 vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
 604                 if (IS_ERR(vqs[i])) {
 605                         err = PTR_ERR(vqs[i]);
 606                         goto error_find;
 607                 }
 608
 609                 if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
 610                         continue;
 611
 612                 /* allocate per-vq irq if available and necessary */
 613                 snprintf(vp_dev->msix_names[msix_vec],
 614                          sizeof *vp_dev->msix_names,
 615                          "%s-%s",
 616                          dev_name(&vp_dev->vdev.dev), names[i]);
 617                 err = request_irq(vp_dev->msix_entries[msix_vec].vector,
 618                                   vring_interrupt, 0,
 619                                   vp_dev->msix_names[msix_vec],
 620                                   vqs[i]);
 621                 if (err) {
 622                         vp_del_vq(vqs[i]);
 623                         goto error_find;
 624                 }
 625         }
 626         return 0;
 627
 628 error_find:
 629         vp_del_vqs(vdev);
 630         return err;
 631 }
 632
 633 /* the config->find_vqs() implementation */
 634 static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 635                        struct virtqueue *vqs[],
 636                        vq_callback_t *callbacks[],
 637                        const char *names[])
 638 {
 639         int err;
 640
 641         /* Try MSI-X with one vector per queue. */
 642         err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
 643         if (!err)
 644                 return 0;
 645         /* Fallback: MSI-X with one vector for config, one shared for queues. */
 646         err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
 647                                  true, false);
 648         if (!err)
 649                 return 0;
 650         /* Finally fall back to regular interrupts. */
 651         return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
 652                                   false, false);
 653 }
 654
 655 static const char *vp_bus_name(struct virtio_device *vdev)
 656 {
 657         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 658
 659         return pci_name(vp_dev->pci_dev);
 660 }
 661
 662 /* Setup the affinity for a virtqueue:
 663  * - force the affinity for per vq vector
 664  * - OR over all affinities for shared MSI
 665  * - ignore the affinity request if we're using INTX
 666  */
 667 static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 668 {
 669         struct virtio_device *vdev = vq->vdev;
 670         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 671         struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
 672         struct cpumask *mask;
 673         unsigned int irq;
 674
 675         if (!vq->callback)
 676                 return -EINVAL;
 677
 678         if (vp_dev->msix_enabled) {
 679                 mask = vp_dev->msix_affinity_masks[info->msix_vector];
 680                 irq = vp_dev->msix_entries[info->msix_vector].vector;
 681                 if (cpu == -1)
 682                         irq_set_affinity_hint(irq, NULL);
 683                 else {
 684                         cpumask_set_cpu(cpu, mask);
 685                         irq_set_affinity_hint(irq, mask);
 686                 }
 687         }
 688         return 0;
 689 }
 690
 691 static const struct virtio_config_ops virtio_pci_config_ops = {
 692         .get            = vp_get,
 693         .set            = vp_set,
 694         .get_status     = vp_get_status,
 695         .set_status     = vp_set_status,
 696         .reset          = vp_reset,
 697         .find_vqs       = vp_find_vqs,
 698         .del_vqs        = vp_del_vqs,
 699         .get_features   = vp_get_features,
 700         .finalize_features = vp_finalize_features,
 701         .bus_name       = vp_bus_name,
 702         .set_vq_affinity = vp_set_vq_affinity,
 703 };
 704
 705 static void virtio_pci_release_dev(struct device *_d)
 706 {
 707         /*
 708          * No need for a release method as we allocate/free
 709          * all devices together with the pci devices.
 710          * Provide an empty one to avoid getting a warning from core.
 711          */
 712 }
 713
 714 /* the PCI probing function */
 715 static int virtio_pci_probe(struct pci_dev *pci_dev,
 716                             const struct pci_device_id *id)
 717 {
 718         struct virtio_pci_device *vp_dev;
 719         int err;
 720
 721         /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
 722         if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
 723                 return -ENODEV;
 724
 725         if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
 726                 printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
 727                        VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
 728                 return -ENODEV;
 729         }
 730
 731         /* allocate our structure and fill it out */
 732         vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
 733         if (vp_dev == NULL)
 734                 return -ENOMEM;
 735
 736         vp_dev->vdev.dev.parent = &pci_dev->dev;
 737         vp_dev->vdev.dev.release = virtio_pci_release_dev;
 738         vp_dev->vdev.config = &virtio_pci_config_ops;
 739         vp_dev->pci_dev = pci_dev;
 740         INIT_LIST_HEAD(&vp_dev->virtqueues);
 741         spin_lock_init(&vp_dev->lock);
 742
 743         /* Disable MSI/MSIX to bring device to a known good state. */
 744         pci_msi_off(pci_dev);
 745
 746         /* enable the device */
 747         err = pci_enable_device(pci_dev);
 748         if (err)
 749                 goto out;
 750
 751         err = pci_request_regions(pci_dev, "virtio-pci");
 752         if (err)
 753                 goto out_enable_device;
 754
 755         vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
 756         if (vp_dev->ioaddr == NULL) {
 757                 err = -ENOMEM;
 758                 goto out_req_regions;
 759         }
 760
 761         vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR;
 762
 763         pci_set_drvdata(pci_dev, vp_dev);
 764         pci_set_master(pci_dev);
 765
 766         /* we use the subsystem vendor/device id as the virtio vendor/device
 767          * id.  this allows us to use the same PCI vendor/device id for all
 768          * virtio devices and to identify the particular virtio driver by
 769          * the subsystem ids */
 770         vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
 771         vp_dev->vdev.id.device = pci_dev->subsystem_device;
 772
 773         vp_dev->setup_vq = setup_vq;
 774         vp_dev->del_vq = del_vq;
 775
 776         /* finally register the virtio device */
 777         err = register_virtio_device(&vp_dev->vdev);
 778         if (err)
 779                 goto out_set_drvdata;
 780
 781         return 0;
 782
 783 out_set_drvdata:
 784         pci_iounmap(pci_dev, vp_dev->ioaddr);
 785 out_req_regions:
 786         pci_release_regions(pci_dev);
 787 out_enable_device:
 788         pci_disable_device(pci_dev);
 789 out:
 790         kfree(vp_dev);
 791         return err;
 792 }
 793
 794 static void virtio_pci_remove(struct pci_dev *pci_dev)
 795 {
 796         struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 797
 798         unregister_virtio_device(&vp_dev->vdev);
 799
 800         vp_del_vqs(&vp_dev->vdev);
 801         pci_iounmap(pci_dev, vp_dev->ioaddr);
 802         pci_release_regions(pci_dev);
 803         pci_disable_device(pci_dev);
 804         kfree(vp_dev);
 805 }
 806
 807 #ifdef CONFIG_PM_SLEEP
 808 static int virtio_pci_freeze(struct device *dev)
 809 {
 810         struct pci_dev *pci_dev = to_pci_dev(dev);
 811         struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 812         int ret;
 813
 814         ret = virtio_device_freeze(&vp_dev->vdev);
 815
 816         if (!ret)
 817                 pci_disable_device(pci_dev);
 818         return ret;
 819 }
 820
 821 static int virtio_pci_restore(struct device *dev)
 822 {
 823         struct pci_dev *pci_dev = to_pci_dev(dev);
 824         struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 825         int ret;
 826
 827         ret = pci_enable_device(pci_dev);
 828         if (ret)
 829                 return ret;
 830
 831         pci_set_master(pci_dev);
 832         return virtio_device_restore(&vp_dev->vdev);
 833 }
 834
 835 static const struct dev_pm_ops virtio_pci_pm_ops = {
 836         SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
 837 };
 838 #endif
 839
 840 static struct pci_driver virtio_pci_driver = {
 841         .name           = "virtio-pci",
 842         .id_table       = virtio_pci_id_table,
 843         .probe          = virtio_pci_probe,
 844         .remove         = virtio_pci_remove,
 845 #ifdef CONFIG_PM_SLEEP
 846         .driver.pm      = &virtio_pci_pm_ops,
 847 #endif
 848 };
 849
 850 module_pci_driver(virtio_pci_driver);