2 * Copyright 2016,2017 IBM Corporation.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
10 #define pr_fmt(fmt) "xive: " fmt
12 #include <linux/types.h>
13 #include <linux/irq.h>
14 #include <linux/debugfs.h>
15 #include <linux/smp.h>
16 #include <linux/interrupt.h>
17 #include <linux/seq_file.h>
18 #include <linux/init.h>
20 #include <linux/slab.h>
21 #include <linux/spinlock.h>
22 #include <linux/delay.h>
23 #include <linux/cpumask.h>
30 #include <asm/errno.h>
32 #include <asm/xive-regs.h>
34 #include <asm/kvm_ppc.h>
36 #include "xive-internal.h"
39 static u32 xive_provision_size;
40 static u32 *xive_provision_chips;
41 static u32 xive_provision_chip_count;
42 static u32 xive_queue_shift;
43 static u32 xive_pool_vps = XIVE_INVALID_VP;
44 static struct kmem_cache *xive_provision_cache;
46 int xive_native_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
48 __be64 flags, eoi_page, trig_page;
49 __be32 esb_shift, src_chip;
53 memset(data, 0, sizeof(*data));
55 rc = opal_xive_get_irq_info(hw_irq, &flags, &eoi_page, &trig_page,
56 &esb_shift, &src_chip);
58 pr_err("opal_xive_get_irq_info(0x%x) returned %lld\n",
63 opal_flags = be64_to_cpu(flags);
64 if (opal_flags & OPAL_XIVE_IRQ_STORE_EOI)
65 data->flags |= XIVE_IRQ_FLAG_STORE_EOI;
66 if (opal_flags & OPAL_XIVE_IRQ_LSI)
67 data->flags |= XIVE_IRQ_FLAG_LSI;
68 if (opal_flags & OPAL_XIVE_IRQ_SHIFT_BUG)
69 data->flags |= XIVE_IRQ_FLAG_SHIFT_BUG;
70 if (opal_flags & OPAL_XIVE_IRQ_MASK_VIA_FW)
71 data->flags |= XIVE_IRQ_FLAG_MASK_FW;
72 if (opal_flags & OPAL_XIVE_IRQ_EOI_VIA_FW)
73 data->flags |= XIVE_IRQ_FLAG_EOI_FW;
74 data->eoi_page = be64_to_cpu(eoi_page);
75 data->trig_page = be64_to_cpu(trig_page);
76 data->esb_shift = be32_to_cpu(esb_shift);
77 data->src_chip = be32_to_cpu(src_chip);
79 data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift);
80 if (!data->eoi_mmio) {
81 pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq);
87 if (data->trig_page == data->eoi_page) {
88 data->trig_mmio = data->eoi_mmio;
92 data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
93 if (!data->trig_mmio) {
94 pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
99 EXPORT_SYMBOL_GPL(xive_native_populate_irq_data);
101 int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq)
106 rc = opal_xive_set_irq_config(hw_irq, target, prio, sw_irq);
111 return rc == 0 ? 0 : -ENXIO;
113 EXPORT_SYMBOL_GPL(xive_native_configure_irq);
116 /* This can be called multiple time to change a queue configuration */
117 int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
118 __be32 *qpage, u32 order, bool can_escalate)
123 u64 flags, qpage_phys;
125 /* If there's an actual queue page, clean it */
129 qpage_phys = __pa(qpage);
133 /* Initialize the rest of the fields */
134 q->msk = order ? ((1u << (order - 2)) - 1) : 0;
138 rc = opal_xive_get_queue_info(vp_id, prio, NULL, NULL,
143 pr_err("Error %lld getting queue info prio %d\n", rc, prio);
147 q->eoi_phys = be64_to_cpu(qeoi_page_be);
150 flags = OPAL_XIVE_EQ_ALWAYS_NOTIFY | OPAL_XIVE_EQ_ENABLED;
152 /* Escalation needed ? */
154 q->esc_irq = be32_to_cpu(esc_irq_be);
155 flags |= OPAL_XIVE_EQ_ESCALATE;
158 /* Configure and enable the queue in HW */
160 rc = opal_xive_set_queue_info(vp_id, prio, qpage_phys, order, flags);
166 pr_err("Error %lld setting queue for prio %d\n", rc, prio);
170 * KVM code requires all of the above to be visible before
171 * q->qpage is set due to how it manages IPI EOIs
179 EXPORT_SYMBOL_GPL(xive_native_configure_queue);
181 static void __xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
185 /* Disable the queue in HW */
187 rc = opal_xive_set_queue_info(vp_id, prio, 0, 0, 0);
192 pr_err("Error %lld disabling queue for prio %d\n", rc, prio);
195 void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio)
197 __xive_native_disable_queue(vp_id, q, prio);
199 EXPORT_SYMBOL_GPL(xive_native_disable_queue);
201 static int xive_native_setup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
203 struct xive_q *q = &xc->queue[prio];
204 unsigned int alloc_order;
208 alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
209 (xive_queue_shift - PAGE_SHIFT) : 0;
210 pages = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, alloc_order);
213 qpage = (__be32 *)page_address(pages);
214 memset(qpage, 0, 1 << xive_queue_shift);
215 return xive_native_configure_queue(get_hard_smp_processor_id(cpu),
216 q, prio, qpage, xive_queue_shift, false);
219 static void xive_native_cleanup_queue(unsigned int cpu, struct xive_cpu *xc, u8 prio)
221 struct xive_q *q = &xc->queue[prio];
222 unsigned int alloc_order;
225 * We use the variant with no iounmap as this is called on exec
226 * from an IPI and iounmap isn't safe
228 __xive_native_disable_queue(get_hard_smp_processor_id(cpu), q, prio);
229 alloc_order = (xive_queue_shift > PAGE_SHIFT) ?
230 (xive_queue_shift - PAGE_SHIFT) : 0;
231 free_pages((unsigned long)q->qpage, alloc_order);
235 static bool xive_native_match(struct device_node *node)
237 return of_device_is_compatible(node, "ibm,opal-xive-vc");
241 static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
243 struct device_node *np;
244 unsigned int chip_id;
247 /* Find the chip ID */
248 np = of_get_cpu_node(cpu, NULL);
250 if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0)
254 /* Allocate an IPI and populate info about it */
256 irq = opal_xive_allocate_irq(chip_id);
257 if (irq == OPAL_BUSY) {
262 pr_err("Failed to allocate IPI on CPU %d\n", cpu);
270 #endif /* CONFIG_SMP */
272 u32 xive_native_alloc_irq(void)
277 rc = opal_xive_allocate_irq(OPAL_XIVE_ANY_CHIP);
286 EXPORT_SYMBOL_GPL(xive_native_alloc_irq);
288 void xive_native_free_irq(u32 irq)
291 s64 rc = opal_xive_free_irq(irq);
297 EXPORT_SYMBOL_GPL(xive_native_free_irq);
300 static void xive_native_put_ipi(unsigned int cpu, struct xive_cpu *xc)
308 rc = opal_xive_free_irq(xc->hw_ipi);
309 if (rc == OPAL_BUSY) {
317 #endif /* CONFIG_SMP */
319 static void xive_native_shutdown(void)
321 /* Switch the XIVE to emulation mode */
322 opal_xive_reset(OPAL_XIVE_MODE_EMU);
326 * Perform an "ack" cycle on the current thread, thus
327 * grabbing the pending active priorities and updating
328 * the CPPR to the most favored one.
330 static void xive_native_update_pending(struct xive_cpu *xc)
335 /* Perform the acknowledge hypervisor to register cycle */
336 ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_HV_REG));
338 /* Synchronize subsequent queue accesses */
342 * Grab the CPPR and the "HE" field which indicates the source
343 * of the hypervisor interrupt (if any)
346 he = GETFIELD(TM_QW3_NSR_HE, (ack >> 8));
348 case TM_QW3_NSR_HE_NONE: /* Nothing to see here */
350 case TM_QW3_NSR_HE_PHYS: /* Physical thread interrupt */
353 /* Mark the priority pending */
354 xc->pending_prio |= 1 << cppr;
357 * A new interrupt should never have a CPPR less favored
358 * than our current one.
360 if (cppr >= xc->cppr)
361 pr_err("CPU %d odd ack CPPR, got %d at %d\n",
362 smp_processor_id(), cppr, xc->cppr);
364 /* Update our idea of what the CPPR is */
367 case TM_QW3_NSR_HE_POOL: /* HV Pool interrupt (unused) */
368 case TM_QW3_NSR_HE_LSI: /* Legacy FW LSI (unused) */
369 pr_err("CPU %d got unexpected interrupt type HE=%d\n",
370 smp_processor_id(), he);
375 static void xive_native_eoi(u32 hw_irq)
378 * Not normally used except if specific interrupts need
379 * a workaround on EOI.
381 opal_int_eoi(hw_irq);
384 static void xive_native_setup_cpu(unsigned int cpu, struct xive_cpu *xc)
391 if (xive_pool_vps == XIVE_INVALID_VP)
394 /* Enable the pool VP */
395 vp = xive_pool_vps + cpu;
396 pr_debug("CPU %d setting up pool VP 0x%x\n", cpu, vp);
398 rc = opal_xive_set_vp_info(vp, OPAL_XIVE_VP_ENABLED, 0);
404 pr_err("Failed to enable pool VP on CPU %d\n", cpu);
408 /* Grab it's CAM value */
409 rc = opal_xive_get_vp_info(vp, NULL, &vp_cam_be, NULL, NULL);
411 pr_err("Failed to get pool VP info CPU %d\n", cpu);
414 vp_cam = be64_to_cpu(vp_cam_be);
416 pr_debug("VP CAM = %llx\n", vp_cam);
418 /* Push it on the CPU (set LSMFB to 0xff to skip backlog scan) */
419 pr_debug("(Old HW value: %08x)\n",
420 in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
421 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD0, 0xff);
422 out_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2,
423 TM_QW2W2_VP | vp_cam);
424 pr_debug("(New HW value: %08x)\n",
425 in_be32(xive_tima + TM_QW2_HV_POOL + TM_WORD2));
428 static void xive_native_teardown_cpu(unsigned int cpu, struct xive_cpu *xc)
433 if (xive_pool_vps == XIVE_INVALID_VP)
436 /* Pull the pool VP from the CPU */
437 in_be64(xive_tima + TM_SPC_PULL_POOL_CTX);
440 vp = xive_pool_vps + cpu;
442 rc = opal_xive_set_vp_info(vp, 0, 0);
449 void xive_native_sync_source(u32 hw_irq)
451 opal_xive_sync(XIVE_SYNC_EAS, hw_irq);
453 EXPORT_SYMBOL_GPL(xive_native_sync_source);
455 static const struct xive_ops xive_native_ops = {
456 .populate_irq_data = xive_native_populate_irq_data,
457 .configure_irq = xive_native_configure_irq,
458 .setup_queue = xive_native_setup_queue,
459 .cleanup_queue = xive_native_cleanup_queue,
460 .match = xive_native_match,
461 .shutdown = xive_native_shutdown,
462 .update_pending = xive_native_update_pending,
463 .eoi = xive_native_eoi,
464 .setup_cpu = xive_native_setup_cpu,
465 .teardown_cpu = xive_native_teardown_cpu,
466 .sync_source = xive_native_sync_source,
468 .get_ipi = xive_native_get_ipi,
469 .put_ipi = xive_native_put_ipi,
470 #endif /* CONFIG_SMP */
474 static bool xive_parse_provisioning(struct device_node *np)
478 if (of_property_read_u32(np, "ibm,xive-provision-page-size",
479 &xive_provision_size) < 0)
481 rc = of_property_count_elems_of_size(np, "ibm,xive-provision-chips", 4);
483 pr_err("Error %d getting provision chips array\n", rc);
486 xive_provision_chip_count = rc;
490 xive_provision_chips = kzalloc(4 * xive_provision_chip_count,
492 if (WARN_ON(!xive_provision_chips))
495 rc = of_property_read_u32_array(np, "ibm,xive-provision-chips",
496 xive_provision_chips,
497 xive_provision_chip_count);
499 pr_err("Error %d reading provision chips array\n", rc);
503 xive_provision_cache = kmem_cache_create("xive-provision",
507 if (!xive_provision_cache) {
508 pr_err("Failed to allocate provision cache\n");
514 static void xive_native_setup_pools(void)
516 /* Allocate a pool big enough */
517 pr_debug("XIVE: Allocating VP block for pool size %d\n", nr_cpu_ids);
519 xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
520 if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
521 pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
523 pr_debug("XIVE: Pool VPs allocated at 0x%x for %d max CPUs\n",
524 xive_pool_vps, nr_cpu_ids);
527 u32 xive_native_default_eq_shift(void)
529 return xive_queue_shift;
531 EXPORT_SYMBOL_GPL(xive_native_default_eq_shift);
533 bool xive_native_init(void)
535 struct device_node *np;
538 struct property *prop;
544 if (xive_cmdline_disabled)
547 pr_devel("xive_native_init()\n");
548 np = of_find_compatible_node(NULL, NULL, "ibm,opal-xive-pe");
550 pr_devel("not found !\n");
553 pr_devel("Found %s\n", np->full_name);
555 /* Resource 1 is HV window */
556 if (of_address_to_resource(np, 1, &r)) {
557 pr_err("Failed to get thread mgmnt area resource\n");
560 tima = ioremap(r.start, resource_size(&r));
562 pr_err("Failed to map thread mgmnt area\n");
566 /* Read number of priorities */
567 if (of_property_read_u32(np, "ibm,xive-#priorities", &val) == 0)
570 /* Iterate the EQ sizes and pick one */
571 of_property_for_each_u32(np, "ibm,xive-eq-sizes", prop, p, val) {
572 xive_queue_shift = val;
573 if (val == PAGE_SHIFT)
577 /* Configure Thread Management areas for KVM */
578 for_each_possible_cpu(cpu)
579 kvmppc_set_xive_tima(cpu, r.start, tima);
581 /* Grab size of provisionning pages */
582 xive_parse_provisioning(np);
584 /* Switch the XIVE to exploitation mode */
585 rc = opal_xive_reset(OPAL_XIVE_MODE_EXPL);
587 pr_err("Switch to exploitation mode failed with error %lld\n", rc);
591 /* Setup some dummy HV pool VPs */
592 xive_native_setup_pools();
594 /* Initialize XIVE core with our backend */
595 if (!xive_core_init(&xive_native_ops, tima, TM_QW3_HV_PHYS,
597 opal_xive_reset(OPAL_XIVE_MODE_EMU);
600 pr_info("Using %dkB queues\n", 1 << (xive_queue_shift - 10));
604 static bool xive_native_provision_pages(void)
609 for (i = 0; i < xive_provision_chip_count; i++) {
610 u32 chip = xive_provision_chips[i];
613 * XXX TODO: Try to make the allocation local to the node where
616 p = kmem_cache_alloc(xive_provision_cache, GFP_KERNEL);
618 pr_err("Failed to allocate provisioning page\n");
621 opal_xive_donate_page(chip, __pa(p));
626 u32 xive_native_alloc_vp_block(u32 max_vcpus)
631 order = fls(max_vcpus) - 1;
632 if (max_vcpus > (1 << order))
635 pr_info("VP block alloc, for max VCPUs %d use order %d\n",
639 rc = opal_xive_alloc_vp_block(order);
644 case OPAL_XIVE_PROVISIONING:
645 if (!xive_native_provision_pages())
646 return XIVE_INVALID_VP;
650 pr_err("OPAL failed to allocate VCPUs order %d, err %lld\n",
652 return XIVE_INVALID_VP;
658 EXPORT_SYMBOL_GPL(xive_native_alloc_vp_block);
660 void xive_native_free_vp_block(u32 vp_base)
664 if (vp_base == XIVE_INVALID_VP)
667 rc = opal_xive_free_vp_block(vp_base);
669 pr_warn("OPAL error %lld freeing VP block\n", rc);
671 EXPORT_SYMBOL_GPL(xive_native_free_vp_block);
673 int xive_native_enable_vp(u32 vp_id)
678 rc = opal_xive_set_vp_info(vp_id, OPAL_XIVE_VP_ENABLED, 0);
683 return rc ? -EIO : 0;
685 EXPORT_SYMBOL_GPL(xive_native_enable_vp);
687 int xive_native_disable_vp(u32 vp_id)
692 rc = opal_xive_set_vp_info(vp_id, 0, 0);
697 return rc ? -EIO : 0;
699 EXPORT_SYMBOL_GPL(xive_native_disable_vp);
701 int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id)
704 __be32 vp_chip_id_be;
707 rc = opal_xive_get_vp_info(vp_id, NULL, &vp_cam_be, NULL, &vp_chip_id_be);
710 *out_cam_id = be64_to_cpu(vp_cam_be) & 0xffffffffu;
711 *out_chip_id = be32_to_cpu(vp_chip_id_be);
715 EXPORT_SYMBOL_GPL(xive_native_get_vp_info);