drivers/virtio/virtio_pci.c

   1 /*
   2  * Virtio PCI driver
   3  *
   4  * This module allows virtio devices to be used over a virtual PCI device.
   5  * This can be used with QEMU based VMMs like KVM or Xen.
   6  *
   7  * Copyright IBM Corp. 2007
   8  *
   9  * Authors:
  10  *  Anthony Liguori  <aliguori@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <linux/module.h>
  18 #include <linux/list.h>
  19 #include <linux/pci.h>
  20 #include <linux/slab.h>
  21 #include <linux/interrupt.h>
  22 #include <linux/virtio.h>
  23 #include <linux/virtio_config.h>
  24 #include <linux/virtio_ring.h>
  25 #include <linux/virtio_pci.h>
  26 #include <linux/highmem.h>
  27 #include <linux/spinlock.h>
  28
  29 MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
  30 MODULE_DESCRIPTION("virtio-pci");
  31 MODULE_LICENSE("GPL");
  32 MODULE_VERSION("1");
  33
  34 /* Our device structure */
  35 struct virtio_pci_device
  36 {
  37         struct virtio_device vdev;
  38         struct pci_dev *pci_dev;
  39
  40         /* the IO mapping for the PCI config space */
  41         void __iomem *ioaddr;
  42
  43         /* a list of queues so we can dispatch IRQs */
  44         spinlock_t lock;
  45         struct list_head virtqueues;
  46
  47         /* MSI-X support */
  48         int msix_enabled;
  49         int intx_enabled;
  50         struct msix_entry *msix_entries;
  51         cpumask_var_t *msix_affinity_masks;
  52         /* Name strings for interrupts. This size should be enough,
  53          * and I'm too lazy to allocate each name separately. */
  54         char (*msix_names)[256];
  55         /* Number of available vectors */
  56         unsigned msix_vectors;
  57         /* Vectors allocated, excluding per-vq vectors if any */
  58         unsigned msix_used_vectors;
  59
  60         /* Whether we have vector per vq */
  61         bool per_vq_vectors;
  62 };
  63
  64 /* Constants for MSI-X */
  65 /* Use first vector for configuration changes, second and the rest for
  66  * virtqueues Thus, we need at least 2 vectors for MSI. */
  67 enum {
  68         VP_MSIX_CONFIG_VECTOR = 0,
  69         VP_MSIX_VQ_VECTOR = 1,
  70 };
  71
  72 struct virtio_pci_vq_info
  73 {
  74         /* the actual virtqueue */
  75         struct virtqueue *vq;
  76
  77         /* the number of entries in the queue */
  78         int num;
  79
  80         /* the virtual address of the ring queue */
  81         void *queue;
  82
  83         /* the list node for the virtqueues list */
  84         struct list_head node;
  85
  86         /* MSI-X vector (or none) */
  87         unsigned msix_vector;
  88 };
  89
  90 /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
  91 static const struct pci_device_id virtio_pci_id_table[] = {
  92         { PCI_DEVICE(0x1af4, PCI_ANY_ID) },
  93         { 0 }
  94 };
  95
  96 MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
  97
  98 /* Convert a generic virtio device to our structure */
  99 static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 100 {
 101         return container_of(vdev, struct virtio_pci_device, vdev);
 102 }
 103
 104 /* virtio config->get_features() implementation */
 105 static u64 vp_get_features(struct virtio_device *vdev)
 106 {
 107         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 108
 109         /* When someone needs more than 32 feature bits, we'll need to
 110          * steal a bit to indicate that the rest are somewhere else. */
 111         return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
 112 }
 113
 114 /* virtio config->finalize_features() implementation */
 115 static void vp_finalize_features(struct virtio_device *vdev)
 116 {
 117         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 118
 119         /* Give virtio_ring a chance to accept features. */
 120         vring_transport_features(vdev);
 121
 122         /* We only support 32 feature bits. */
 123         iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
 124 }
 125
 126 /* virtio config->get() implementation */
 127 static void vp_get(struct virtio_device *vdev, unsigned offset,
 128                    void *buf, unsigned len)
 129 {
 130         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 131         void __iomem *ioaddr = vp_dev->ioaddr +
 132                                 VIRTIO_PCI_CONFIG(vp_dev) + offset;
 133         u8 *ptr = buf;
 134         int i;
 135
 136         for (i = 0; i < len; i++)
 137                 ptr[i] = ioread8(ioaddr + i);
 138 }
 139
 140 /* the config->set() implementation.  it's symmetric to the config->get()
 141  * implementation */
 142 static void vp_set(struct virtio_device *vdev, unsigned offset,
 143                    const void *buf, unsigned len)
 144 {
 145         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 146         void __iomem *ioaddr = vp_dev->ioaddr +
 147                                 VIRTIO_PCI_CONFIG(vp_dev) + offset;
 148         const u8 *ptr = buf;
 149         int i;
 150
 151         for (i = 0; i < len; i++)
 152                 iowrite8(ptr[i], ioaddr + i);
 153 }
 154
 155 /* config->{get,set}_status() implementations */
 156 static u8 vp_get_status(struct virtio_device *vdev)
 157 {
 158         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 159         return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 160 }
 161
 162 static void vp_set_status(struct virtio_device *vdev, u8 status)
 163 {
 164         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 165         /* We should never be setting status to 0. */
 166         BUG_ON(status == 0);
 167         iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 168 }
 169
 170 /* wait for pending irq handlers */
 171 static void vp_synchronize_vectors(struct virtio_device *vdev)
 172 {
 173         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 174         int i;
 175
 176         if (vp_dev->intx_enabled)
 177                 synchronize_irq(vp_dev->pci_dev->irq);
 178
 179         for (i = 0; i < vp_dev->msix_vectors; ++i)
 180                 synchronize_irq(vp_dev->msix_entries[i].vector);
 181 }
 182
 183 static void vp_reset(struct virtio_device *vdev)
 184 {
 185         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 186         /* 0 status means a reset. */
 187         iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 188         /* Flush out the status write, and flush in device writes,
 189          * including MSi-X interrupts, if any. */
 190         ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
 191         /* Flush pending VQ/configuration callbacks. */
 192         vp_synchronize_vectors(vdev);
 193 }
 194
 195 /* the notify function used when creating a virt queue */
 196 static bool vp_notify(struct virtqueue *vq)
 197 {
 198         struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 199
 200         /* we write the queue's selector into the notification register to
 201          * signal the other end */
 202         iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
 203         return true;
 204 }
 205
 206 /* Handle a configuration change: Tell driver if it wants to know. */
 207 static irqreturn_t vp_config_changed(int irq, void *opaque)
 208 {
 209         struct virtio_pci_device *vp_dev = opaque;
 210
 211         virtio_config_changed(&vp_dev->vdev);
 212         return IRQ_HANDLED;
 213 }
 214
 215 /* Notify all virtqueues on an interrupt. */
 216 static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
 217 {
 218         struct virtio_pci_device *vp_dev = opaque;
 219         struct virtio_pci_vq_info *info;
 220         irqreturn_t ret = IRQ_NONE;
 221         unsigned long flags;
 222
 223         spin_lock_irqsave(&vp_dev->lock, flags);
 224         list_for_each_entry(info, &vp_dev->virtqueues, node) {
 225                 if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
 226                         ret = IRQ_HANDLED;
 227         }
 228         spin_unlock_irqrestore(&vp_dev->lock, flags);
 229
 230         return ret;
 231 }
 232
 233 /* A small wrapper to also acknowledge the interrupt when it's handled.
 234  * I really need an EIO hook for the vring so I can ack the interrupt once we
 235  * know that we'll be handling the IRQ but before we invoke the callback since
 236  * the callback may notify the host which results in the host attempting to
 237  * raise an interrupt that we would then mask once we acknowledged the
 238  * interrupt. */
 239 static irqreturn_t vp_interrupt(int irq, void *opaque)
 240 {
 241         struct virtio_pci_device *vp_dev = opaque;
 242         u8 isr;
 243
 244         /* reading the ISR has the effect of also clearing it so it's very
 245          * important to save off the value. */
 246         isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
 247
 248         /* It's definitely not us if the ISR was not high */
 249         if (!isr)
 250                 return IRQ_NONE;
 251
 252         /* Configuration change?  Tell driver if it wants to know. */
 253         if (isr & VIRTIO_PCI_ISR_CONFIG)
 254                 vp_config_changed(irq, opaque);
 255
 256         return vp_vring_interrupt(irq, opaque);
 257 }
 258
 259 static void vp_free_vectors(struct virtio_device *vdev)
 260 {
 261         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 262         int i;
 263
 264         if (vp_dev->intx_enabled) {
 265                 free_irq(vp_dev->pci_dev->irq, vp_dev);
 266                 vp_dev->intx_enabled = 0;
 267         }
 268
 269         for (i = 0; i < vp_dev->msix_used_vectors; ++i)
 270                 free_irq(vp_dev->msix_entries[i].vector, vp_dev);
 271
 272         for (i = 0; i < vp_dev->msix_vectors; i++)
 273                 if (vp_dev->msix_affinity_masks[i])
 274                         free_cpumask_var(vp_dev->msix_affinity_masks[i]);
 275
 276         if (vp_dev->msix_enabled) {
 277                 /* Disable the vector used for configuration */
 278                 iowrite16(VIRTIO_MSI_NO_VECTOR,
 279                           vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 280                 /* Flush the write out to device */
 281                 ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 282
 283                 pci_disable_msix(vp_dev->pci_dev);
 284                 vp_dev->msix_enabled = 0;
 285         }
 286
 287         vp_dev->msix_vectors = 0;
 288         vp_dev->msix_used_vectors = 0;
 289         kfree(vp_dev->msix_names);
 290         vp_dev->msix_names = NULL;
 291         kfree(vp_dev->msix_entries);
 292         vp_dev->msix_entries = NULL;
 293         kfree(vp_dev->msix_affinity_masks);
 294         vp_dev->msix_affinity_masks = NULL;
 295 }
 296
 297 static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
 298                                    bool per_vq_vectors)
 299 {
 300         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 301         const char *name = dev_name(&vp_dev->vdev.dev);
 302         unsigned i, v;
 303         int err = -ENOMEM;
 304
 305         vp_dev->msix_vectors = nvectors;
 306
 307         vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
 308                                        GFP_KERNEL);
 309         if (!vp_dev->msix_entries)
 310                 goto error;
 311         vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
 312                                      GFP_KERNEL);
 313         if (!vp_dev->msix_names)
 314                 goto error;
 315         vp_dev->msix_affinity_masks
 316                 = kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
 317                           GFP_KERNEL);
 318         if (!vp_dev->msix_affinity_masks)
 319                 goto error;
 320         for (i = 0; i < nvectors; ++i)
 321                 if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
 322                                         GFP_KERNEL))
 323                         goto error;
 324
 325         for (i = 0; i < nvectors; ++i)
 326                 vp_dev->msix_entries[i].entry = i;
 327
 328         err = pci_enable_msix_exact(vp_dev->pci_dev,
 329                                     vp_dev->msix_entries, nvectors);
 330         if (err)
 331                 goto error;
 332         vp_dev->msix_enabled = 1;
 333
 334         /* Set the vector used for configuration */
 335         v = vp_dev->msix_used_vectors;
 336         snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 337                  "%s-config", name);
 338         err = request_irq(vp_dev->msix_entries[v].vector,
 339                           vp_config_changed, 0, vp_dev->msix_names[v],
 340                           vp_dev);
 341         if (err)
 342                 goto error;
 343         ++vp_dev->msix_used_vectors;
 344
 345         iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 346         /* Verify we had enough resources to assign the vector */
 347         v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
 348         if (v == VIRTIO_MSI_NO_VECTOR) {
 349                 err = -EBUSY;
 350                 goto error;
 351         }
 352
 353         if (!per_vq_vectors) {
 354                 /* Shared vector for all VQs */
 355                 v = vp_dev->msix_used_vectors;
 356                 snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
 357                          "%s-virtqueues", name);
 358                 err = request_irq(vp_dev->msix_entries[v].vector,
 359                                   vp_vring_interrupt, 0, vp_dev->msix_names[v],
 360                                   vp_dev);
 361                 if (err)
 362                         goto error;
 363                 ++vp_dev->msix_used_vectors;
 364         }
 365         return 0;
 366 error:
 367         vp_free_vectors(vdev);
 368         return err;
 369 }
 370
 371 static int vp_request_intx(struct virtio_device *vdev)
 372 {
 373         int err;
 374         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 375
 376         err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
 377                           IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
 378         if (!err)
 379                 vp_dev->intx_enabled = 1;
 380         return err;
 381 }
 382
 383 static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
 384                                   void (*callback)(struct virtqueue *vq),
 385                                   const char *name,
 386                                   u16 msix_vec)
 387 {
 388         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 389         struct virtio_pci_vq_info *info;
 390         struct virtqueue *vq;
 391         unsigned long flags, size;
 392         u16 num;
 393         int err;
 394
 395         /* Select the queue we're interested in */
 396         iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 397
 398         /* Check if queue is either not available or already active. */
 399         num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
 400         if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
 401                 return ERR_PTR(-ENOENT);
 402
 403         /* allocate and fill out our structure the represents an active
 404          * queue */
 405         info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL);
 406         if (!info)
 407                 return ERR_PTR(-ENOMEM);
 408
 409         info->num = num;
 410         info->msix_vector = msix_vec;
 411
 412         size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
 413         info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
 414         if (info->queue == NULL) {
 415                 err = -ENOMEM;
 416                 goto out_info;
 417         }
 418
 419         /* activate the queue */
 420         iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
 421                   vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 422
 423         /* create the vring */
 424         vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
 425                                  true, info->queue, vp_notify, callback, name);
 426         if (!vq) {
 427                 err = -ENOMEM;
 428                 goto out_activate_queue;
 429         }
 430
 431         vq->priv = info;
 432         info->vq = vq;
 433
 434         if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
 435                 iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 436                 msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 437                 if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
 438                         err = -EBUSY;
 439                         goto out_assign;
 440                 }
 441         }
 442
 443         if (callback) {
 444                 spin_lock_irqsave(&vp_dev->lock, flags);
 445                 list_add(&info->node, &vp_dev->virtqueues);
 446                 spin_unlock_irqrestore(&vp_dev->lock, flags);
 447         } else {
 448                 INIT_LIST_HEAD(&info->node);
 449         }
 450
 451         return vq;
 452
 453 out_assign:
 454         vring_del_virtqueue(vq);
 455 out_activate_queue:
 456         iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 457         free_pages_exact(info->queue, size);
 458 out_info:
 459         kfree(info);
 460         return ERR_PTR(err);
 461 }
 462
 463 static void vp_del_vq(struct virtqueue *vq)
 464 {
 465         struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
 466         struct virtio_pci_vq_info *info = vq->priv;
 467         unsigned long flags, size;
 468
 469         spin_lock_irqsave(&vp_dev->lock, flags);
 470         list_del(&info->node);
 471         spin_unlock_irqrestore(&vp_dev->lock, flags);
 472
 473         iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
 474
 475         if (vp_dev->msix_enabled) {
 476                 iowrite16(VIRTIO_MSI_NO_VECTOR,
 477                           vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
 478                 /* Flush the write out to device */
 479                 ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
 480         }
 481
 482         vring_del_virtqueue(vq);
 483
 484         /* Select and deactivate the queue */
 485         iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
 486
 487         size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
 488         free_pages_exact(info->queue, size);
 489         kfree(info);
 490 }
 491
 492 /* the config->del_vqs() implementation */
 493 static void vp_del_vqs(struct virtio_device *vdev)
 494 {
 495         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 496         struct virtqueue *vq, *n;
 497         struct virtio_pci_vq_info *info;
 498
 499         list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
 500                 info = vq->priv;
 501                 if (vp_dev->per_vq_vectors &&
 502                         info->msix_vector != VIRTIO_MSI_NO_VECTOR)
 503                         free_irq(vp_dev->msix_entries[info->msix_vector].vector,
 504                                  vq);
 505                 vp_del_vq(vq);
 506         }
 507         vp_dev->per_vq_vectors = false;
 508
 509         vp_free_vectors(vdev);
 510 }
 511
 512 static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 513                               struct virtqueue *vqs[],
 514                               vq_callback_t *callbacks[],
 515                               const char *names[],
 516                               bool use_msix,
 517                               bool per_vq_vectors)
 518 {
 519         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 520         u16 msix_vec;
 521         int i, err, nvectors, allocated_vectors;
 522
 523         if (!use_msix) {
 524                 /* Old style: one normal interrupt for change and all vqs. */
 525                 err = vp_request_intx(vdev);
 526                 if (err)
 527                         goto error_request;
 528         } else {
 529                 if (per_vq_vectors) {
 530                         /* Best option: one for change interrupt, one per vq. */
 531                         nvectors = 1;
 532                         for (i = 0; i < nvqs; ++i)
 533                                 if (callbacks[i])
 534                                         ++nvectors;
 535                 } else {
 536                         /* Second best: one for change, shared for all vqs. */
 537                         nvectors = 2;
 538                 }
 539
 540                 err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
 541                 if (err)
 542                         goto error_request;
 543         }
 544
 545         vp_dev->per_vq_vectors = per_vq_vectors;
 546         allocated_vectors = vp_dev->msix_used_vectors;
 547         for (i = 0; i < nvqs; ++i) {
 548                 if (!names[i]) {
 549                         vqs[i] = NULL;
 550                         continue;
 551                 } else if (!callbacks[i] || !vp_dev->msix_enabled)
 552                         msix_vec = VIRTIO_MSI_NO_VECTOR;
 553                 else if (vp_dev->per_vq_vectors)
 554                         msix_vec = allocated_vectors++;
 555                 else
 556                         msix_vec = VP_MSIX_VQ_VECTOR;
 557                 vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
 558                 if (IS_ERR(vqs[i])) {
 559                         err = PTR_ERR(vqs[i]);
 560                         goto error_find;
 561                 }
 562
 563                 if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
 564                         continue;
 565
 566                 /* allocate per-vq irq if available and necessary */
 567                 snprintf(vp_dev->msix_names[msix_vec],
 568                          sizeof *vp_dev->msix_names,
 569                          "%s-%s",
 570                          dev_name(&vp_dev->vdev.dev), names[i]);
 571                 err = request_irq(vp_dev->msix_entries[msix_vec].vector,
 572                                   vring_interrupt, 0,
 573                                   vp_dev->msix_names[msix_vec],
 574                                   vqs[i]);
 575                 if (err) {
 576                         vp_del_vq(vqs[i]);
 577                         goto error_find;
 578                 }
 579         }
 580         return 0;
 581
 582 error_find:
 583         vp_del_vqs(vdev);
 584
 585 error_request:
 586         return err;
 587 }
 588
 589 /* the config->find_vqs() implementation */
 590 static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 591                        struct virtqueue *vqs[],
 592                        vq_callback_t *callbacks[],
 593                        const char *names[])
 594 {
 595         int err;
 596
 597         /* Try MSI-X with one vector per queue. */
 598         err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
 599         if (!err)
 600                 return 0;
 601         /* Fallback: MSI-X with one vector for config, one shared for queues. */
 602         err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
 603                                  true, false);
 604         if (!err)
 605                 return 0;
 606         /* Finally fall back to regular interrupts. */
 607         return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
 608                                   false, false);
 609 }
 610
 611 static const char *vp_bus_name(struct virtio_device *vdev)
 612 {
 613         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 614
 615         return pci_name(vp_dev->pci_dev);
 616 }
 617
 618 /* Setup the affinity for a virtqueue:
 619  * - force the affinity for per vq vector
 620  * - OR over all affinities for shared MSI
 621  * - ignore the affinity request if we're using INTX
 622  */
 623 static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
 624 {
 625         struct virtio_device *vdev = vq->vdev;
 626         struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 627         struct virtio_pci_vq_info *info = vq->priv;
 628         struct cpumask *mask;
 629         unsigned int irq;
 630
 631         if (!vq->callback)
 632                 return -EINVAL;
 633
 634         if (vp_dev->msix_enabled) {
 635                 mask = vp_dev->msix_affinity_masks[info->msix_vector];
 636                 irq = vp_dev->msix_entries[info->msix_vector].vector;
 637                 if (cpu == -1)
 638                         irq_set_affinity_hint(irq, NULL);
 639                 else {
 640                         cpumask_set_cpu(cpu, mask);
 641                         irq_set_affinity_hint(irq, mask);
 642                 }
 643         }
 644         return 0;
 645 }
 646
 647 static const struct virtio_config_ops virtio_pci_config_ops = {
 648         .get            = vp_get,
 649         .set            = vp_set,
 650         .get_status     = vp_get_status,
 651         .set_status     = vp_set_status,
 652         .reset          = vp_reset,
 653         .find_vqs       = vp_find_vqs,
 654         .del_vqs        = vp_del_vqs,
 655         .get_features   = vp_get_features,
 656         .finalize_features = vp_finalize_features,
 657         .bus_name       = vp_bus_name,
 658         .set_vq_affinity = vp_set_vq_affinity,
 659 };
 660
 661 static void virtio_pci_release_dev(struct device *_d)
 662 {
 663         /*
 664          * No need for a release method as we allocate/free
 665          * all devices together with the pci devices.
 666          * Provide an empty one to avoid getting a warning from core.
 667          */
 668 }
 669
 670 /* the PCI probing function */
 671 static int virtio_pci_probe(struct pci_dev *pci_dev,
 672                             const struct pci_device_id *id)
 673 {
 674         struct virtio_pci_device *vp_dev;
 675         int err;
 676
 677         /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
 678         if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
 679                 return -ENODEV;
 680
 681         if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
 682                 printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
 683                        VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
 684                 return -ENODEV;
 685         }
 686
 687         /* allocate our structure and fill it out */
 688         vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
 689         if (vp_dev == NULL)
 690                 return -ENOMEM;
 691
 692         vp_dev->vdev.dev.parent = &pci_dev->dev;
 693         vp_dev->vdev.dev.release = virtio_pci_release_dev;
 694         vp_dev->vdev.config = &virtio_pci_config_ops;
 695         vp_dev->pci_dev = pci_dev;
 696         INIT_LIST_HEAD(&vp_dev->virtqueues);
 697         spin_lock_init(&vp_dev->lock);
 698
 699         /* Disable MSI/MSIX to bring device to a known good state. */
 700         pci_msi_off(pci_dev);
 701
 702         /* enable the device */
 703         err = pci_enable_device(pci_dev);
 704         if (err)
 705                 goto out;
 706
 707         err = pci_request_regions(pci_dev, "virtio-pci");
 708         if (err)
 709                 goto out_enable_device;
 710
 711         vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
 712         if (vp_dev->ioaddr == NULL) {
 713                 err = -ENOMEM;
 714                 goto out_req_regions;
 715         }
 716
 717         pci_set_drvdata(pci_dev, vp_dev);
 718         pci_set_master(pci_dev);
 719
 720         /* we use the subsystem vendor/device id as the virtio vendor/device
 721          * id.  this allows us to use the same PCI vendor/device id for all
 722          * virtio devices and to identify the particular virtio driver by
 723          * the subsystem ids */
 724         vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
 725         vp_dev->vdev.id.device = pci_dev->subsystem_device;
 726
 727         /* finally register the virtio device */
 728         err = register_virtio_device(&vp_dev->vdev);
 729         if (err)
 730                 goto out_set_drvdata;
 731
 732         return 0;
 733
 734 out_set_drvdata:
 735         pci_iounmap(pci_dev, vp_dev->ioaddr);
 736 out_req_regions:
 737         pci_release_regions(pci_dev);
 738 out_enable_device:
 739         pci_disable_device(pci_dev);
 740 out:
 741         kfree(vp_dev);
 742         return err;
 743 }
 744
 745 static void virtio_pci_remove(struct pci_dev *pci_dev)
 746 {
 747         struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 748
 749         unregister_virtio_device(&vp_dev->vdev);
 750
 751         vp_del_vqs(&vp_dev->vdev);
 752         pci_iounmap(pci_dev, vp_dev->ioaddr);
 753         pci_release_regions(pci_dev);
 754         pci_disable_device(pci_dev);
 755         kfree(vp_dev);
 756 }
 757
 758 #ifdef CONFIG_PM_SLEEP
 759 static int virtio_pci_freeze(struct device *dev)
 760 {
 761         struct pci_dev *pci_dev = to_pci_dev(dev);
 762         struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 763         int ret;
 764
 765         ret = virtio_device_freeze(&vp_dev->vdev);
 766
 767         if (!ret)
 768                 pci_disable_device(pci_dev);
 769         return ret;
 770 }
 771
 772 static int virtio_pci_restore(struct device *dev)
 773 {
 774         struct pci_dev *pci_dev = to_pci_dev(dev);
 775         struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 776         int ret;
 777
 778         ret = pci_enable_device(pci_dev);
 779         if (ret)
 780                 return ret;
 781
 782         pci_set_master(pci_dev);
 783         return virtio_device_restore(&vp_dev->vdev);
 784 }
 785
 786 static const struct dev_pm_ops virtio_pci_pm_ops = {
 787         SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
 788 };
 789 #endif
 790
 791 static struct pci_driver virtio_pci_driver = {
 792         .name           = "virtio-pci",
 793         .id_table       = virtio_pci_id_table,
 794         .probe          = virtio_pci_probe,
 795         .remove         = virtio_pci_remove,
 796 #ifdef CONFIG_PM_SLEEP
 797         .driver.pm      = &virtio_pci_pm_ops,
 798 #endif
 799 };
 800
 801 module_pci_driver(virtio_pci_driver);