2 * Virtio PCI driver - modern (virtio 1.0) device support
4 * This module allows virtio devices to be used over a virtual PCI device.
5 * This can be used with QEMU based VMMs like KVM or Xen.
7 * Copyright IBM Corp. 2007
8 * Copyright Red Hat, Inc. 2014
11 * Anthony Liguori <aliguori@us.ibm.com>
12 * Rusty Russell <rusty@rustcorp.com.au>
13 * Michael S. Tsirkin <mst@redhat.com>
15 * This work is licensed under the terms of the GNU GPL, version 2 or later.
16 * See the COPYING file in the top-level directory.
20 #define VIRTIO_PCI_NO_LEGACY
21 #include "virtio_pci_common.h"
24 * Type-safe wrappers for io accesses.
25 * Use these to enforce at compile time the following spec requirement:
27 * The driver MUST access each field using the “natural” access
28 * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
29 * for 16-bit fields and 8-bit accesses for 8-bit fields.
31 static inline u8 vp_ioread8(u8 __iomem *addr)
35 static inline u16 vp_ioread16 (u16 __iomem *addr)
37 return ioread16(addr);
40 static inline u32 vp_ioread32(u32 __iomem *addr)
42 return ioread32(addr);
45 static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
47 iowrite8(value, addr);
50 static inline void vp_iowrite16(u16 value, u16 __iomem *addr)
52 iowrite16(value, addr);
55 static inline void vp_iowrite32(u32 value, u32 __iomem *addr)
57 iowrite32(value, addr);
60 static void __iomem *map_capability(struct pci_dev *dev, int off,
70 pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
73 pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
75 pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
78 if (length <= start) {
80 "virtio_pci: bad capability len %u (>%u expected)\n",
85 if (length - start < minlen) {
87 "virtio_pci: bad capability len %u (>=%zu expected)\n",
94 if (start + offset < offset) {
96 "virtio_pci: map wrap-around %u+%u\n",
103 if (offset & (align - 1)) {
105 "virtio_pci: offset %u not aligned to %u\n",
116 if (minlen + offset < minlen ||
117 minlen + offset > pci_resource_len(dev, bar)) {
119 "virtio_pci: map virtio %zu@%u "
120 "out of range on bar %i length %lu\n",
122 bar, (unsigned long)pci_resource_len(dev, bar));
126 p = pci_iomap_range(dev, bar, offset, length);
129 "virtio_pci: unable to map virtio %u@%u on bar %i\n",
130 length, offset, bar);
134 static void iowrite64_twopart(u64 val, __le32 __iomem *lo, __le32 __iomem *hi)
136 iowrite32((u32)val, lo);
137 iowrite32(val >> 32, hi);
140 /* virtio config->get_features() implementation */
141 static u64 vp_get_features(struct virtio_device *vdev)
143 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
146 iowrite32(0, &vp_dev->common->device_feature_select);
147 features = ioread32(&vp_dev->common->device_feature);
148 iowrite32(1, &vp_dev->common->device_feature_select);
149 features |= ((u64)ioread32(&vp_dev->common->device_feature) << 32);
154 /* virtio config->finalize_features() implementation */
155 static int vp_finalize_features(struct virtio_device *vdev)
157 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
159 /* Give virtio_ring a chance to accept features. */
160 vring_transport_features(vdev);
162 if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
163 dev_err(&vdev->dev, "virtio: device uses modern interface "
164 "but does not have VIRTIO_F_VERSION_1\n");
168 iowrite32(0, &vp_dev->common->guest_feature_select);
169 iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
170 iowrite32(1, &vp_dev->common->guest_feature_select);
171 iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
176 /* virtio config->get() implementation */
177 static void vp_get(struct virtio_device *vdev, unsigned offset,
178 void *buf, unsigned len)
180 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
185 BUG_ON(offset + len > vp_dev->device_len);
189 b = ioread8(vp_dev->device + offset);
190 memcpy(buf, &b, sizeof b);
193 w = cpu_to_le16(ioread16(vp_dev->device + offset));
194 memcpy(buf, &w, sizeof w);
197 l = cpu_to_le32(ioread32(vp_dev->device + offset));
198 memcpy(buf, &l, sizeof l);
201 l = cpu_to_le32(ioread32(vp_dev->device + offset));
202 memcpy(buf, &l, sizeof l);
203 l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
204 memcpy(buf + sizeof l, &l, sizeof l);
211 /* the config->set() implementation. it's symmetric to the config->get()
213 static void vp_set(struct virtio_device *vdev, unsigned offset,
214 const void *buf, unsigned len)
216 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
221 BUG_ON(offset + len > vp_dev->device_len);
225 memcpy(&b, buf, sizeof b);
226 iowrite8(b, vp_dev->device + offset);
229 memcpy(&w, buf, sizeof w);
230 iowrite16(le16_to_cpu(w), vp_dev->device + offset);
233 memcpy(&l, buf, sizeof l);
234 iowrite32(le32_to_cpu(l), vp_dev->device + offset);
237 memcpy(&l, buf, sizeof l);
238 iowrite32(le32_to_cpu(l), vp_dev->device + offset);
239 memcpy(&l, buf + sizeof l, sizeof l);
240 iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
247 static u32 vp_generation(struct virtio_device *vdev)
249 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
250 return ioread8(&vp_dev->common->config_generation);
253 /* config->{get,set}_status() implementations */
254 static u8 vp_get_status(struct virtio_device *vdev)
256 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
257 return ioread8(&vp_dev->common->device_status);
260 static void vp_set_status(struct virtio_device *vdev, u8 status)
262 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
263 /* We should never be setting status to 0. */
265 iowrite8(status, &vp_dev->common->device_status);
268 static void vp_reset(struct virtio_device *vdev)
270 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
271 /* 0 status means a reset. */
272 iowrite8(0, &vp_dev->common->device_status);
273 /* Flush out the status write, and flush in device writes,
274 * including MSI-X interrupts, if any. */
275 ioread8(&vp_dev->common->device_status);
276 /* Flush pending VQ/configuration callbacks. */
277 vp_synchronize_vectors(vdev);
280 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
282 /* Setup the vector used for configuration events */
283 iowrite16(vector, &vp_dev->common->msix_config);
284 /* Verify we had enough resources to assign the vector */
285 /* Will also flush the write out to device */
286 return ioread16(&vp_dev->common->msix_config);
289 static size_t vring_pci_size(u16 num)
291 /* We only need a cacheline separation. */
292 return PAGE_ALIGN(vring_size(num, SMP_CACHE_BYTES));
295 static void *alloc_virtqueue_pages(int *num)
299 /* TODO: allocate each queue chunk individually */
300 for (; *num && vring_pci_size(*num) > PAGE_SIZE; *num /= 2) {
301 pages = alloc_pages_exact(vring_pci_size(*num),
302 GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
310 /* Try to get a single page. You are my only hope! */
311 return alloc_pages_exact(vring_pci_size(*num), GFP_KERNEL|__GFP_ZERO);
314 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
315 struct virtio_pci_vq_info *info,
317 void (*callback)(struct virtqueue *vq),
321 struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
322 struct virtqueue *vq;
326 if (index >= ioread16(&cfg->num_queues))
327 return ERR_PTR(-ENOENT);
329 /* Select the queue we're interested in */
330 iowrite16(index, &cfg->queue_select);
332 /* Check if queue is either not available or already active. */
333 num = ioread16(&cfg->queue_size);
334 if (!num || ioread16(&cfg->queue_enable))
335 return ERR_PTR(-ENOENT);
337 if (num & (num - 1)) {
338 dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
339 return ERR_PTR(-EINVAL);
342 /* get offset of notification word for this vq */
343 off = ioread16(&cfg->queue_notify_off);
346 info->msix_vector = msix_vec;
348 info->queue = alloc_virtqueue_pages(&info->num);
349 if (info->queue == NULL)
350 return ERR_PTR(-ENOMEM);
352 /* create the vring */
353 vq = vring_new_virtqueue(index, info->num,
354 SMP_CACHE_BYTES, &vp_dev->vdev,
355 true, info->queue, vp_notify, callback, name);
361 /* activate the queue */
362 iowrite16(num, &cfg->queue_size);
363 iowrite64_twopart(virt_to_phys(info->queue),
364 &cfg->queue_desc_lo, &cfg->queue_desc_hi);
365 iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
366 &cfg->queue_avail_lo, &cfg->queue_avail_hi);
367 iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
368 &cfg->queue_used_lo, &cfg->queue_used_hi);
370 if (vp_dev->notify_base) {
371 /* offset should not wrap */
372 if ((u64)off * vp_dev->notify_offset_multiplier + 2
373 > vp_dev->notify_len) {
374 dev_warn(&vp_dev->pci_dev->dev,
375 "bad notification offset %u (x %u) "
376 "for queue %u > %zd",
377 off, vp_dev->notify_offset_multiplier,
378 index, vp_dev->notify_len);
382 vq->priv = (void __force *)vp_dev->notify_base +
383 off * vp_dev->notify_offset_multiplier;
385 vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
386 vp_dev->notify_map_cap, 2, 2,
387 off * vp_dev->notify_offset_multiplier, 2,
396 if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
397 iowrite16(msix_vec, &cfg->queue_msix_vector);
398 msix_vec = ioread16(&cfg->queue_msix_vector);
399 if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
401 goto err_assign_vector;
408 if (!vp_dev->notify_base)
409 pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
411 vring_del_virtqueue(vq);
413 free_pages_exact(info->queue, vring_pci_size(info->num));
417 static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
418 struct virtqueue *vqs[],
419 vq_callback_t *callbacks[],
422 struct virtio_pci_device *vp_dev = to_vp_device(vdev);
423 struct virtqueue *vq;
424 int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);
429 /* Select and activate all queues. Has to be done last: once we do
430 * this, there's no way to go back except reset.
432 list_for_each_entry(vq, &vdev->vqs, list) {
433 iowrite16(vq->index, &vp_dev->common->queue_select);
434 iowrite16(1, &vp_dev->common->queue_enable);
440 static void del_vq(struct virtio_pci_vq_info *info)
442 struct virtqueue *vq = info->vq;
443 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
445 iowrite16(vq->index, &vp_dev->common->queue_select);
447 if (vp_dev->msix_enabled) {
448 iowrite16(VIRTIO_MSI_NO_VECTOR,
449 &vp_dev->common->queue_msix_vector);
450 /* Flush the write out to device */
451 ioread16(&vp_dev->common->queue_msix_vector);
454 if (!vp_dev->notify_base)
455 pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
457 vring_del_virtqueue(vq);
459 free_pages_exact(info->queue, vring_pci_size(info->num));
462 static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
465 .generation = vp_generation,
466 .get_status = vp_get_status,
467 .set_status = vp_set_status,
469 .find_vqs = vp_modern_find_vqs,
470 .del_vqs = vp_del_vqs,
471 .get_features = vp_get_features,
472 .finalize_features = vp_finalize_features,
473 .bus_name = vp_bus_name,
474 .set_vq_affinity = vp_set_vq_affinity,
477 static const struct virtio_config_ops virtio_pci_config_ops = {
480 .generation = vp_generation,
481 .get_status = vp_get_status,
482 .set_status = vp_set_status,
484 .find_vqs = vp_modern_find_vqs,
485 .del_vqs = vp_del_vqs,
486 .get_features = vp_get_features,
487 .finalize_features = vp_finalize_features,
488 .bus_name = vp_bus_name,
489 .set_vq_affinity = vp_set_vq_affinity,
493 * virtio_pci_find_capability - walk capabilities to find device info.
494 * @dev: the pci device
495 * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
496 * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
498 * Returns offset of the capability, or 0.
500 static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
501 u32 ioresource_types)
505 for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
507 pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
509 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
512 pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
516 /* Ignore structures with reserved BAR values */
520 if (type == cfg_type) {
521 if (pci_resource_len(dev, bar) &&
522 pci_resource_flags(dev, bar) & ioresource_types)
529 /* This is part of the ABI. Don't screw with it. */
530 static inline void check_offsets(void)
532 /* Note: disk space was harmed in compilation of this function. */
533 BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
534 offsetof(struct virtio_pci_cap, cap_vndr));
535 BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
536 offsetof(struct virtio_pci_cap, cap_next));
537 BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
538 offsetof(struct virtio_pci_cap, cap_len));
539 BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
540 offsetof(struct virtio_pci_cap, cfg_type));
541 BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
542 offsetof(struct virtio_pci_cap, bar));
543 BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
544 offsetof(struct virtio_pci_cap, offset));
545 BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
546 offsetof(struct virtio_pci_cap, length));
547 BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
548 offsetof(struct virtio_pci_notify_cap,
549 notify_off_multiplier));
550 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
551 offsetof(struct virtio_pci_common_cfg,
552 device_feature_select));
553 BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
554 offsetof(struct virtio_pci_common_cfg, device_feature));
555 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
556 offsetof(struct virtio_pci_common_cfg,
557 guest_feature_select));
558 BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
559 offsetof(struct virtio_pci_common_cfg, guest_feature));
560 BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
561 offsetof(struct virtio_pci_common_cfg, msix_config));
562 BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
563 offsetof(struct virtio_pci_common_cfg, num_queues));
564 BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
565 offsetof(struct virtio_pci_common_cfg, device_status));
566 BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
567 offsetof(struct virtio_pci_common_cfg, config_generation));
568 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
569 offsetof(struct virtio_pci_common_cfg, queue_select));
570 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
571 offsetof(struct virtio_pci_common_cfg, queue_size));
572 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
573 offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
574 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
575 offsetof(struct virtio_pci_common_cfg, queue_enable));
576 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
577 offsetof(struct virtio_pci_common_cfg, queue_notify_off));
578 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
579 offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
580 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
581 offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
582 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
583 offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
584 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
585 offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
586 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
587 offsetof(struct virtio_pci_common_cfg, queue_used_lo));
588 BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
589 offsetof(struct virtio_pci_common_cfg, queue_used_hi));
592 /* the PCI probing function */
593 int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
595 struct pci_dev *pci_dev = vp_dev->pci_dev;
596 int err, common, isr, notify, device;
602 /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
603 if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
606 if (pci_dev->device < 0x1040) {
607 /* Transitional devices: use the PCI subsystem device id as
608 * virtio device id, same as legacy driver always did.
610 vp_dev->vdev.id.device = pci_dev->subsystem_device;
612 /* Modern devices: simply use PCI device id, but start from 0x1040. */
613 vp_dev->vdev.id.device = pci_dev->device - 0x1040;
615 vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
617 if (virtio_device_is_legacy_only(vp_dev->vdev.id))
620 /* check for a common config: if not, use legacy mode (bar 0). */
621 common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
622 IORESOURCE_IO | IORESOURCE_MEM);
624 dev_info(&pci_dev->dev,
625 "virtio_pci: leaving for legacy driver\n");
629 /* If common is there, these should be too... */
630 isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
631 IORESOURCE_IO | IORESOURCE_MEM);
632 notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
633 IORESOURCE_IO | IORESOURCE_MEM);
634 if (!isr || !notify) {
635 dev_err(&pci_dev->dev,
636 "virtio_pci: missing capabilities %i/%i/%i\n",
637 common, isr, notify);
641 /* Device capability is only mandatory for devices that have
642 * device-specific configuration.
644 device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
645 IORESOURCE_IO | IORESOURCE_MEM);
648 vp_dev->common = map_capability(pci_dev, common,
649 sizeof(struct virtio_pci_common_cfg), 4,
650 0, sizeof(struct virtio_pci_common_cfg),
654 vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
660 /* Read notify_off_multiplier from config space. */
661 pci_read_config_dword(pci_dev,
662 notify + offsetof(struct virtio_pci_notify_cap,
663 notify_off_multiplier),
664 &vp_dev->notify_offset_multiplier);
665 /* Read notify length and offset from config space. */
666 pci_read_config_dword(pci_dev,
667 notify + offsetof(struct virtio_pci_notify_cap,
671 pci_read_config_dword(pci_dev,
672 notify + offsetof(struct virtio_pci_notify_cap,
676 /* We don't know how many VQs we'll map, ahead of the time.
677 * If notify length is small, map it all now.
678 * Otherwise, map each VQ individually later.
680 if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
681 vp_dev->notify_base = map_capability(pci_dev, notify, 2, 2,
683 &vp_dev->notify_len);
684 if (!vp_dev->notify_base)
687 vp_dev->notify_map_cap = notify;
690 /* Again, we don't know how much we should map, but PAGE_SIZE
691 * is more than enough for all existing devices.
694 vp_dev->device = map_capability(pci_dev, device, 0, 4,
696 &vp_dev->device_len);
700 vp_dev->vdev.config = &virtio_pci_config_ops;
702 vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
705 vp_dev->config_vector = vp_config_vector;
706 vp_dev->setup_vq = setup_vq;
707 vp_dev->del_vq = del_vq;
712 if (vp_dev->notify_base)
713 pci_iounmap(pci_dev, vp_dev->notify_base);
715 pci_iounmap(pci_dev, vp_dev->isr);
717 pci_iounmap(pci_dev, vp_dev->common);
722 void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
724 struct pci_dev *pci_dev = vp_dev->pci_dev;
727 pci_iounmap(pci_dev, vp_dev->device);
728 if (vp_dev->notify_base)
729 pci_iounmap(pci_dev, vp_dev->notify_base);
730 pci_iounmap(pci_dev, vp_dev->isr);
731 pci_iounmap(pci_dev, vp_dev->common);