]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/x86/kvm/lapic.c
KVM: vmx: move sync_pir_to_irr from apic_find_highest_irr to callers
[karo-tx-linux.git] / arch / x86 / kvm / lapic.c
1
2 /*
3  * Local APIC virtualization
4  *
5  * Copyright (C) 2006 Qumranet, Inc.
6  * Copyright (C) 2007 Novell
7  * Copyright (C) 2007 Intel
8  * Copyright 2009 Red Hat, Inc. and/or its affiliates.
9  *
10  * Authors:
11  *   Dor Laor <dor.laor@qumranet.com>
12  *   Gregory Haskins <ghaskins@novell.com>
13  *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
14  *
15  * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
16  *
17  * This work is licensed under the terms of the GNU GPL, version 2.  See
18  * the COPYING file in the top-level directory.
19  */
20
21 #include <linux/kvm_host.h>
22 #include <linux/kvm.h>
23 #include <linux/mm.h>
24 #include <linux/highmem.h>
25 #include <linux/smp.h>
26 #include <linux/hrtimer.h>
27 #include <linux/io.h>
28 #include <linux/export.h>
29 #include <linux/math64.h>
30 #include <linux/slab.h>
31 #include <asm/processor.h>
32 #include <asm/msr.h>
33 #include <asm/page.h>
34 #include <asm/current.h>
35 #include <asm/apicdef.h>
36 #include <asm/delay.h>
37 #include <linux/atomic.h>
38 #include <linux/jump_label.h>
39 #include "kvm_cache_regs.h"
40 #include "irq.h"
41 #include "trace.h"
42 #include "x86.h"
43 #include "cpuid.h"
44 #include "hyperv.h"
45
46 #ifndef CONFIG_X86_64
47 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
48 #else
49 #define mod_64(x, y) ((x) % (y))
50 #endif
51
52 #define PRId64 "d"
53 #define PRIx64 "llx"
54 #define PRIu64 "u"
55 #define PRIo64 "o"
56
57 #define APIC_BUS_CYCLE_NS 1
58
59 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
60 #define apic_debug(fmt, arg...)
61
62 /* 14 is the version for Xeon and Pentium 8.4.8*/
63 #define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
64 #define LAPIC_MMIO_LENGTH               (1 << 12)
65 /* followed define is not in apicdef.h */
66 #define APIC_SHORT_MASK                 0xc0000
67 #define APIC_DEST_NOSHORT               0x0
68 #define APIC_DEST_MASK                  0x800
69 #define MAX_APIC_VECTOR                 256
70 #define APIC_VECTORS_PER_REG            32
71
72 #define APIC_BROADCAST                  0xFF
73 #define X2APIC_BROADCAST                0xFFFFFFFFul
74
75 static inline int apic_test_vector(int vec, void *bitmap)
76 {
77         return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
78 }
79
80 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
81 {
82         struct kvm_lapic *apic = vcpu->arch.apic;
83
84         return apic_test_vector(vector, apic->regs + APIC_ISR) ||
85                 apic_test_vector(vector, apic->regs + APIC_IRR);
86 }
87
88 static inline void apic_clear_vector(int vec, void *bitmap)
89 {
90         clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
91 }
92
93 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
94 {
95         return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
96 }
97
98 static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
99 {
100         return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
101 }
102
103 struct static_key_deferred apic_hw_disabled __read_mostly;
104 struct static_key_deferred apic_sw_disabled __read_mostly;
105
106 static inline int apic_enabled(struct kvm_lapic *apic)
107 {
108         return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
109 }
110
111 #define LVT_MASK        \
112         (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
113
114 #define LINT_MASK       \
115         (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
116          APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
117
118 static inline u8 kvm_xapic_id(struct kvm_lapic *apic)
119 {
120         return kvm_lapic_get_reg(apic, APIC_ID) >> 24;
121 }
122
123 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
124 {
125         return apic->vcpu->vcpu_id;
126 }
127
128 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
129                 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
130         switch (map->mode) {
131         case KVM_APIC_MODE_X2APIC: {
132                 u32 offset = (dest_id >> 16) * 16;
133                 u32 max_apic_id = map->max_apic_id;
134
135                 if (offset <= max_apic_id) {
136                         u8 cluster_size = min(max_apic_id - offset + 1, 16U);
137
138                         *cluster = &map->phys_map[offset];
139                         *mask = dest_id & (0xffff >> (16 - cluster_size));
140                 } else {
141                         *mask = 0;
142                 }
143
144                 return true;
145                 }
146         case KVM_APIC_MODE_XAPIC_FLAT:
147                 *cluster = map->xapic_flat_map;
148                 *mask = dest_id & 0xff;
149                 return true;
150         case KVM_APIC_MODE_XAPIC_CLUSTER:
151                 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
152                 *mask = dest_id & 0xf;
153                 return true;
154         default:
155                 /* Not optimized. */
156                 return false;
157         }
158 }
159
160 static void kvm_apic_map_free(struct rcu_head *rcu)
161 {
162         struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
163
164         kvfree(map);
165 }
166
167 static void recalculate_apic_map(struct kvm *kvm)
168 {
169         struct kvm_apic_map *new, *old = NULL;
170         struct kvm_vcpu *vcpu;
171         int i;
172         u32 max_id = 255; /* enough space for any xAPIC ID */
173
174         mutex_lock(&kvm->arch.apic_map_lock);
175
176         kvm_for_each_vcpu(i, vcpu, kvm)
177                 if (kvm_apic_present(vcpu))
178                         max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
179
180         new = kvm_kvzalloc(sizeof(struct kvm_apic_map) +
181                            sizeof(struct kvm_lapic *) * ((u64)max_id + 1));
182
183         if (!new)
184                 goto out;
185
186         new->max_apic_id = max_id;
187
188         kvm_for_each_vcpu(i, vcpu, kvm) {
189                 struct kvm_lapic *apic = vcpu->arch.apic;
190                 struct kvm_lapic **cluster;
191                 u16 mask;
192                 u32 ldr;
193                 u8 xapic_id;
194                 u32 x2apic_id;
195
196                 if (!kvm_apic_present(vcpu))
197                         continue;
198
199                 xapic_id = kvm_xapic_id(apic);
200                 x2apic_id = kvm_x2apic_id(apic);
201
202                 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
203                 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
204                                 x2apic_id <= new->max_apic_id)
205                         new->phys_map[x2apic_id] = apic;
206                 /*
207                  * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
208                  * prevent them from masking VCPUs with APIC ID <= 0xff.
209                  */
210                 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
211                         new->phys_map[xapic_id] = apic;
212
213                 ldr = kvm_lapic_get_reg(apic, APIC_LDR);
214
215                 if (apic_x2apic_mode(apic)) {
216                         new->mode |= KVM_APIC_MODE_X2APIC;
217                 } else if (ldr) {
218                         ldr = GET_APIC_LOGICAL_ID(ldr);
219                         if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
220                                 new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
221                         else
222                                 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
223                 }
224
225                 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
226                         continue;
227
228                 if (mask)
229                         cluster[ffs(mask) - 1] = apic;
230         }
231 out:
232         old = rcu_dereference_protected(kvm->arch.apic_map,
233                         lockdep_is_held(&kvm->arch.apic_map_lock));
234         rcu_assign_pointer(kvm->arch.apic_map, new);
235         mutex_unlock(&kvm->arch.apic_map_lock);
236
237         if (old)
238                 call_rcu(&old->rcu, kvm_apic_map_free);
239
240         kvm_make_scan_ioapic_request(kvm);
241 }
242
243 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
244 {
245         bool enabled = val & APIC_SPIV_APIC_ENABLED;
246
247         kvm_lapic_set_reg(apic, APIC_SPIV, val);
248
249         if (enabled != apic->sw_enabled) {
250                 apic->sw_enabled = enabled;
251                 if (enabled) {
252                         static_key_slow_dec_deferred(&apic_sw_disabled);
253                         recalculate_apic_map(apic->vcpu->kvm);
254                 } else
255                         static_key_slow_inc(&apic_sw_disabled.key);
256         }
257 }
258
259 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
260 {
261         kvm_lapic_set_reg(apic, APIC_ID, id << 24);
262         recalculate_apic_map(apic->vcpu->kvm);
263 }
264
265 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
266 {
267         kvm_lapic_set_reg(apic, APIC_LDR, id);
268         recalculate_apic_map(apic->vcpu->kvm);
269 }
270
271 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
272 {
273         u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
274
275         WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
276
277         kvm_lapic_set_reg(apic, APIC_ID, id);
278         kvm_lapic_set_reg(apic, APIC_LDR, ldr);
279         recalculate_apic_map(apic->vcpu->kvm);
280 }
281
282 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
283 {
284         return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
285 }
286
287 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
288 {
289         return kvm_lapic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
290 }
291
292 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
293 {
294         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
295 }
296
297 static inline int apic_lvtt_period(struct kvm_lapic *apic)
298 {
299         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
300 }
301
302 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
303 {
304         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
305 }
306
307 static inline int apic_lvt_nmi_mode(u32 lvt_val)
308 {
309         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
310 }
311
312 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
313 {
314         struct kvm_lapic *apic = vcpu->arch.apic;
315         struct kvm_cpuid_entry2 *feat;
316         u32 v = APIC_VERSION;
317
318         if (!lapic_in_kernel(vcpu))
319                 return;
320
321         feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
322         if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
323                 v |= APIC_LVR_DIRECTED_EOI;
324         kvm_lapic_set_reg(apic, APIC_LVR, v);
325 }
326
327 static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
328         LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
329         LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
330         LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
331         LINT_MASK, LINT_MASK,   /* LVT0-1 */
332         LVT_MASK                /* LVTERR */
333 };
334
335 static int find_highest_vector(void *bitmap)
336 {
337         int vec;
338         u32 *reg;
339
340         for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
341              vec >= 0; vec -= APIC_VECTORS_PER_REG) {
342                 reg = bitmap + REG_POS(vec);
343                 if (*reg)
344                         return __fls(*reg) + vec;
345         }
346
347         return -1;
348 }
349
350 static u8 count_vectors(void *bitmap)
351 {
352         int vec;
353         u32 *reg;
354         u8 count = 0;
355
356         for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
357                 reg = bitmap + REG_POS(vec);
358                 count += hweight32(*reg);
359         }
360
361         return count;
362 }
363
364 int __kvm_apic_update_irr(u32 *pir, void *regs)
365 {
366         u32 i, vec;
367         u32 pir_val, irr_val;
368         int max_irr = -1;
369
370         for (i = vec = 0; i <= 7; i++, vec += 32) {
371                 pir_val = READ_ONCE(pir[i]);
372                 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
373                 if (pir_val) {
374                         irr_val |= xchg(&pir[i], 0);
375                         *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
376                 }
377                 if (irr_val)
378                         max_irr = __fls(irr_val) + vec;
379         }
380
381         return max_irr;
382 }
383 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
384
385 int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
386 {
387         struct kvm_lapic *apic = vcpu->arch.apic;
388         int max_irr;
389
390         max_irr = __kvm_apic_update_irr(pir, apic->regs);
391
392         kvm_make_request(KVM_REQ_EVENT, vcpu);
393         return max_irr;
394 }
395 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
396
397 static inline int apic_search_irr(struct kvm_lapic *apic)
398 {
399         return find_highest_vector(apic->regs + APIC_IRR);
400 }
401
402 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
403 {
404         int result;
405
406         /*
407          * Note that irr_pending is just a hint. It will be always
408          * true with virtual interrupt delivery enabled.
409          */
410         if (!apic->irr_pending)
411                 return -1;
412
413         result = apic_search_irr(apic);
414         ASSERT(result == -1 || result >= 16);
415
416         return result;
417 }
418
419 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
420 {
421         struct kvm_vcpu *vcpu;
422
423         vcpu = apic->vcpu;
424
425         if (unlikely(vcpu->arch.apicv_active)) {
426                 /* try to update RVI */
427                 apic_clear_vector(vec, apic->regs + APIC_IRR);
428                 kvm_make_request(KVM_REQ_EVENT, vcpu);
429         } else {
430                 apic->irr_pending = false;
431                 apic_clear_vector(vec, apic->regs + APIC_IRR);
432                 if (apic_search_irr(apic) != -1)
433                         apic->irr_pending = true;
434         }
435 }
436
437 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
438 {
439         struct kvm_vcpu *vcpu;
440
441         if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
442                 return;
443
444         vcpu = apic->vcpu;
445
446         /*
447          * With APIC virtualization enabled, all caching is disabled
448          * because the processor can modify ISR under the hood.  Instead
449          * just set SVI.
450          */
451         if (unlikely(vcpu->arch.apicv_active))
452                 kvm_x86_ops->hwapic_isr_update(vcpu, vec);
453         else {
454                 ++apic->isr_count;
455                 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
456                 /*
457                  * ISR (in service register) bit is set when injecting an interrupt.
458                  * The highest vector is injected. Thus the latest bit set matches
459                  * the highest bit in ISR.
460                  */
461                 apic->highest_isr_cache = vec;
462         }
463 }
464
465 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
466 {
467         int result;
468
469         /*
470          * Note that isr_count is always 1, and highest_isr_cache
471          * is always -1, with APIC virtualization enabled.
472          */
473         if (!apic->isr_count)
474                 return -1;
475         if (likely(apic->highest_isr_cache != -1))
476                 return apic->highest_isr_cache;
477
478         result = find_highest_vector(apic->regs + APIC_ISR);
479         ASSERT(result == -1 || result >= 16);
480
481         return result;
482 }
483
484 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
485 {
486         struct kvm_vcpu *vcpu;
487         if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
488                 return;
489
490         vcpu = apic->vcpu;
491
492         /*
493          * We do get here for APIC virtualization enabled if the guest
494          * uses the Hyper-V APIC enlightenment.  In this case we may need
495          * to trigger a new interrupt delivery by writing the SVI field;
496          * on the other hand isr_count and highest_isr_cache are unused
497          * and must be left alone.
498          */
499         if (unlikely(vcpu->arch.apicv_active))
500                 kvm_x86_ops->hwapic_isr_update(vcpu,
501                                                apic_find_highest_isr(apic));
502         else {
503                 --apic->isr_count;
504                 BUG_ON(apic->isr_count < 0);
505                 apic->highest_isr_cache = -1;
506         }
507 }
508
509 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
510 {
511         /* This may race with setting of irr in __apic_accept_irq() and
512          * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
513          * will cause vmexit immediately and the value will be recalculated
514          * on the next vmentry.
515          */
516         return apic_find_highest_irr(vcpu->arch.apic);
517 }
518
519 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
520                              int vector, int level, int trig_mode,
521                              struct dest_map *dest_map);
522
523 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
524                      struct dest_map *dest_map)
525 {
526         struct kvm_lapic *apic = vcpu->arch.apic;
527
528         return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
529                         irq->level, irq->trig_mode, dest_map);
530 }
531
532 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
533 {
534
535         return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
536                                       sizeof(val));
537 }
538
539 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
540 {
541
542         return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
543                                       sizeof(*val));
544 }
545
546 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
547 {
548         return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
549 }
550
551 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
552 {
553         u8 val;
554         if (pv_eoi_get_user(vcpu, &val) < 0)
555                 apic_debug("Can't read EOI MSR value: 0x%llx\n",
556                            (unsigned long long)vcpu->arch.pv_eoi.msr_val);
557         return val & 0x1;
558 }
559
560 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
561 {
562         if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
563                 apic_debug("Can't set EOI MSR value: 0x%llx\n",
564                            (unsigned long long)vcpu->arch.pv_eoi.msr_val);
565                 return;
566         }
567         __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
568 }
569
570 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
571 {
572         if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
573                 apic_debug("Can't clear EOI MSR value: 0x%llx\n",
574                            (unsigned long long)vcpu->arch.pv_eoi.msr_val);
575                 return;
576         }
577         __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
578 }
579
580 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
581 {
582         int highest_irr;
583         if (apic->vcpu->arch.apicv_active)
584                 kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
585         highest_irr = apic_find_highest_irr(apic);
586         if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
587                 return -1;
588         return highest_irr;
589 }
590
591 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
592 {
593         u32 tpr, isrv, ppr, old_ppr;
594         int isr;
595
596         old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
597         tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
598         isr = apic_find_highest_isr(apic);
599         isrv = (isr != -1) ? isr : 0;
600
601         if ((tpr & 0xf0) >= (isrv & 0xf0))
602                 ppr = tpr & 0xff;
603         else
604                 ppr = isrv & 0xf0;
605
606         apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
607                    apic, ppr, isr, isrv);
608
609         *new_ppr = ppr;
610         if (old_ppr != ppr)
611                 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
612
613         return ppr < old_ppr;
614 }
615
616 static void apic_update_ppr(struct kvm_lapic *apic)
617 {
618         u32 ppr;
619
620         if (__apic_update_ppr(apic, &ppr) &&
621             apic_has_interrupt_for_ppr(apic, ppr) != -1)
622                 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
623 }
624
625 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
626 {
627         apic_update_ppr(vcpu->arch.apic);
628 }
629 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
630
631 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
632 {
633         kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
634         apic_update_ppr(apic);
635 }
636
637 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
638 {
639         return mda == (apic_x2apic_mode(apic) ?
640                         X2APIC_BROADCAST : APIC_BROADCAST);
641 }
642
643 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
644 {
645         if (kvm_apic_broadcast(apic, mda))
646                 return true;
647
648         if (apic_x2apic_mode(apic))
649                 return mda == kvm_x2apic_id(apic);
650
651         /*
652          * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
653          * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
654          * this allows unique addressing of VCPUs with APIC ID over 0xff.
655          * The 0xff condition is needed because writeable xAPIC ID.
656          */
657         if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
658                 return true;
659
660         return mda == kvm_xapic_id(apic);
661 }
662
663 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
664 {
665         u32 logical_id;
666
667         if (kvm_apic_broadcast(apic, mda))
668                 return true;
669
670         logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
671
672         if (apic_x2apic_mode(apic))
673                 return ((logical_id >> 16) == (mda >> 16))
674                        && (logical_id & mda & 0xffff) != 0;
675
676         logical_id = GET_APIC_LOGICAL_ID(logical_id);
677
678         switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
679         case APIC_DFR_FLAT:
680                 return (logical_id & mda) != 0;
681         case APIC_DFR_CLUSTER:
682                 return ((logical_id >> 4) == (mda >> 4))
683                        && (logical_id & mda & 0xf) != 0;
684         default:
685                 apic_debug("Bad DFR vcpu %d: %08x\n",
686                            apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
687                 return false;
688         }
689 }
690
691 /* The KVM local APIC implementation has two quirks:
692  *
693  *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
694  *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
695  *    KVM doesn't do that aliasing.
696  *
697  *  - in-kernel IOAPIC messages have to be delivered directly to
698  *    x2APIC, because the kernel does not support interrupt remapping.
699  *    In order to support broadcast without interrupt remapping, x2APIC
700  *    rewrites the destination of non-IPI messages from APIC_BROADCAST
701  *    to X2APIC_BROADCAST.
702  *
703  * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
704  * important when userspace wants to use x2APIC-format MSIs, because
705  * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
706  */
707 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
708                 struct kvm_lapic *source, struct kvm_lapic *target)
709 {
710         bool ipi = source != NULL;
711
712         if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
713             !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
714                 return X2APIC_BROADCAST;
715
716         return dest_id;
717 }
718
719 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
720                            int short_hand, unsigned int dest, int dest_mode)
721 {
722         struct kvm_lapic *target = vcpu->arch.apic;
723         u32 mda = kvm_apic_mda(vcpu, dest, source, target);
724
725         apic_debug("target %p, source %p, dest 0x%x, "
726                    "dest_mode 0x%x, short_hand 0x%x\n",
727                    target, source, dest, dest_mode, short_hand);
728
729         ASSERT(target);
730         switch (short_hand) {
731         case APIC_DEST_NOSHORT:
732                 if (dest_mode == APIC_DEST_PHYSICAL)
733                         return kvm_apic_match_physical_addr(target, mda);
734                 else
735                         return kvm_apic_match_logical_addr(target, mda);
736         case APIC_DEST_SELF:
737                 return target == source;
738         case APIC_DEST_ALLINC:
739                 return true;
740         case APIC_DEST_ALLBUT:
741                 return target != source;
742         default:
743                 apic_debug("kvm: apic: Bad dest shorthand value %x\n",
744                            short_hand);
745                 return false;
746         }
747 }
748 EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
749
750 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
751                        const unsigned long *bitmap, u32 bitmap_size)
752 {
753         u32 mod;
754         int i, idx = -1;
755
756         mod = vector % dest_vcpus;
757
758         for (i = 0; i <= mod; i++) {
759                 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
760                 BUG_ON(idx == bitmap_size);
761         }
762
763         return idx;
764 }
765
766 static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
767 {
768         if (!kvm->arch.disabled_lapic_found) {
769                 kvm->arch.disabled_lapic_found = true;
770                 printk(KERN_INFO
771                        "Disabled LAPIC found during irq injection\n");
772         }
773 }
774
775 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
776                 struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
777 {
778         if (kvm->arch.x2apic_broadcast_quirk_disabled) {
779                 if ((irq->dest_id == APIC_BROADCAST &&
780                                 map->mode != KVM_APIC_MODE_X2APIC))
781                         return true;
782                 if (irq->dest_id == X2APIC_BROADCAST)
783                         return true;
784         } else {
785                 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
786                 if (irq->dest_id == (x2apic_ipi ?
787                                      X2APIC_BROADCAST : APIC_BROADCAST))
788                         return true;
789         }
790
791         return false;
792 }
793
794 /* Return true if the interrupt can be handled by using *bitmap as index mask
795  * for valid destinations in *dst array.
796  * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
797  * Note: we may have zero kvm_lapic destinations when we return true, which
798  * means that the interrupt should be dropped.  In this case, *bitmap would be
799  * zero and *dst undefined.
800  */
801 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
802                 struct kvm_lapic **src, struct kvm_lapic_irq *irq,
803                 struct kvm_apic_map *map, struct kvm_lapic ***dst,
804                 unsigned long *bitmap)
805 {
806         int i, lowest;
807
808         if (irq->shorthand == APIC_DEST_SELF && src) {
809                 *dst = src;
810                 *bitmap = 1;
811                 return true;
812         } else if (irq->shorthand)
813                 return false;
814
815         if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
816                 return false;
817
818         if (irq->dest_mode == APIC_DEST_PHYSICAL) {
819                 if (irq->dest_id > map->max_apic_id) {
820                         *bitmap = 0;
821                 } else {
822                         *dst = &map->phys_map[irq->dest_id];
823                         *bitmap = 1;
824                 }
825                 return true;
826         }
827
828         *bitmap = 0;
829         if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
830                                 (u16 *)bitmap))
831                 return false;
832
833         if (!kvm_lowest_prio_delivery(irq))
834                 return true;
835
836         if (!kvm_vector_hashing_enabled()) {
837                 lowest = -1;
838                 for_each_set_bit(i, bitmap, 16) {
839                         if (!(*dst)[i])
840                                 continue;
841                         if (lowest < 0)
842                                 lowest = i;
843                         else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
844                                                 (*dst)[lowest]->vcpu) < 0)
845                                 lowest = i;
846                 }
847         } else {
848                 if (!*bitmap)
849                         return true;
850
851                 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
852                                 bitmap, 16);
853
854                 if (!(*dst)[lowest]) {
855                         kvm_apic_disabled_lapic_found(kvm);
856                         *bitmap = 0;
857                         return true;
858                 }
859         }
860
861         *bitmap = (lowest >= 0) ? 1 << lowest : 0;
862
863         return true;
864 }
865
866 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
867                 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
868 {
869         struct kvm_apic_map *map;
870         unsigned long bitmap;
871         struct kvm_lapic **dst = NULL;
872         int i;
873         bool ret;
874
875         *r = -1;
876
877         if (irq->shorthand == APIC_DEST_SELF) {
878                 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
879                 return true;
880         }
881
882         rcu_read_lock();
883         map = rcu_dereference(kvm->arch.apic_map);
884
885         ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
886         if (ret)
887                 for_each_set_bit(i, &bitmap, 16) {
888                         if (!dst[i])
889                                 continue;
890                         if (*r < 0)
891                                 *r = 0;
892                         *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
893                 }
894
895         rcu_read_unlock();
896         return ret;
897 }
898
899 /*
900  * This routine tries to handler interrupts in posted mode, here is how
901  * it deals with different cases:
902  * - For single-destination interrupts, handle it in posted mode
903  * - Else if vector hashing is enabled and it is a lowest-priority
904  *   interrupt, handle it in posted mode and use the following mechanism
905  *   to find the destinaiton vCPU.
906  *      1. For lowest-priority interrupts, store all the possible
907  *         destination vCPUs in an array.
908  *      2. Use "guest vector % max number of destination vCPUs" to find
909  *         the right destination vCPU in the array for the lowest-priority
910  *         interrupt.
911  * - Otherwise, use remapped mode to inject the interrupt.
912  */
913 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
914                         struct kvm_vcpu **dest_vcpu)
915 {
916         struct kvm_apic_map *map;
917         unsigned long bitmap;
918         struct kvm_lapic **dst = NULL;
919         bool ret = false;
920
921         if (irq->shorthand)
922                 return false;
923
924         rcu_read_lock();
925         map = rcu_dereference(kvm->arch.apic_map);
926
927         if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
928                         hweight16(bitmap) == 1) {
929                 unsigned long i = find_first_bit(&bitmap, 16);
930
931                 if (dst[i]) {
932                         *dest_vcpu = dst[i]->vcpu;
933                         ret = true;
934                 }
935         }
936
937         rcu_read_unlock();
938         return ret;
939 }
940
941 /*
942  * Add a pending IRQ into lapic.
943  * Return 1 if successfully added and 0 if discarded.
944  */
945 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
946                              int vector, int level, int trig_mode,
947                              struct dest_map *dest_map)
948 {
949         int result = 0;
950         struct kvm_vcpu *vcpu = apic->vcpu;
951
952         trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
953                                   trig_mode, vector);
954         switch (delivery_mode) {
955         case APIC_DM_LOWEST:
956                 vcpu->arch.apic_arb_prio++;
957         case APIC_DM_FIXED:
958                 if (unlikely(trig_mode && !level))
959                         break;
960
961                 /* FIXME add logic for vcpu on reset */
962                 if (unlikely(!apic_enabled(apic)))
963                         break;
964
965                 result = 1;
966
967                 if (dest_map) {
968                         __set_bit(vcpu->vcpu_id, dest_map->map);
969                         dest_map->vectors[vcpu->vcpu_id] = vector;
970                 }
971
972                 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
973                         if (trig_mode)
974                                 kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
975                         else
976                                 apic_clear_vector(vector, apic->regs + APIC_TMR);
977                 }
978
979                 if (vcpu->arch.apicv_active)
980                         kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
981                 else {
982                         kvm_lapic_set_irr(vector, apic);
983
984                         kvm_make_request(KVM_REQ_EVENT, vcpu);
985                         kvm_vcpu_kick(vcpu);
986                 }
987                 break;
988
989         case APIC_DM_REMRD:
990                 result = 1;
991                 vcpu->arch.pv.pv_unhalted = 1;
992                 kvm_make_request(KVM_REQ_EVENT, vcpu);
993                 kvm_vcpu_kick(vcpu);
994                 break;
995
996         case APIC_DM_SMI:
997                 result = 1;
998                 kvm_make_request(KVM_REQ_SMI, vcpu);
999                 kvm_vcpu_kick(vcpu);
1000                 break;
1001
1002         case APIC_DM_NMI:
1003                 result = 1;
1004                 kvm_inject_nmi(vcpu);
1005                 kvm_vcpu_kick(vcpu);
1006                 break;
1007
1008         case APIC_DM_INIT:
1009                 if (!trig_mode || level) {
1010                         result = 1;
1011                         /* assumes that there are only KVM_APIC_INIT/SIPI */
1012                         apic->pending_events = (1UL << KVM_APIC_INIT);
1013                         /* make sure pending_events is visible before sending
1014                          * the request */
1015                         smp_wmb();
1016                         kvm_make_request(KVM_REQ_EVENT, vcpu);
1017                         kvm_vcpu_kick(vcpu);
1018                 } else {
1019                         apic_debug("Ignoring de-assert INIT to vcpu %d\n",
1020                                    vcpu->vcpu_id);
1021                 }
1022                 break;
1023
1024         case APIC_DM_STARTUP:
1025                 apic_debug("SIPI to vcpu %d vector 0x%02x\n",
1026                            vcpu->vcpu_id, vector);
1027                 result = 1;
1028                 apic->sipi_vector = vector;
1029                 /* make sure sipi_vector is visible for the receiver */
1030                 smp_wmb();
1031                 set_bit(KVM_APIC_SIPI, &apic->pending_events);
1032                 kvm_make_request(KVM_REQ_EVENT, vcpu);
1033                 kvm_vcpu_kick(vcpu);
1034                 break;
1035
1036         case APIC_DM_EXTINT:
1037                 /*
1038                  * Should only be called by kvm_apic_local_deliver() with LVT0,
1039                  * before NMI watchdog was enabled. Already handled by
1040                  * kvm_apic_accept_pic_intr().
1041                  */
1042                 break;
1043
1044         default:
1045                 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1046                        delivery_mode);
1047                 break;
1048         }
1049         return result;
1050 }
1051
1052 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1053 {
1054         return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1055 }
1056
1057 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1058 {
1059         return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1060 }
1061
1062 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1063 {
1064         int trigger_mode;
1065
1066         /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1067         if (!kvm_ioapic_handles_vector(apic, vector))
1068                 return;
1069
1070         /* Request a KVM exit to inform the userspace IOAPIC. */
1071         if (irqchip_split(apic->vcpu->kvm)) {
1072                 apic->vcpu->arch.pending_ioapic_eoi = vector;
1073                 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1074                 return;
1075         }
1076
1077         if (apic_test_vector(vector, apic->regs + APIC_TMR))
1078                 trigger_mode = IOAPIC_LEVEL_TRIG;
1079         else
1080                 trigger_mode = IOAPIC_EDGE_TRIG;
1081
1082         kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1083 }
1084
1085 static int apic_set_eoi(struct kvm_lapic *apic)
1086 {
1087         int vector = apic_find_highest_isr(apic);
1088
1089         trace_kvm_eoi(apic, vector);
1090
1091         /*
1092          * Not every write EOI will has corresponding ISR,
1093          * one example is when Kernel check timer on setup_IO_APIC
1094          */
1095         if (vector == -1)
1096                 return vector;
1097
1098         apic_clear_isr(vector, apic);
1099         apic_update_ppr(apic);
1100
1101         if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
1102                 kvm_hv_synic_send_eoi(apic->vcpu, vector);
1103
1104         kvm_ioapic_send_eoi(apic, vector);
1105         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1106         return vector;
1107 }
1108
1109 /*
1110  * this interface assumes a trap-like exit, which has already finished
1111  * desired side effect including vISR and vPPR update.
1112  */
1113 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1114 {
1115         struct kvm_lapic *apic = vcpu->arch.apic;
1116
1117         trace_kvm_eoi(apic, vector);
1118
1119         kvm_ioapic_send_eoi(apic, vector);
1120         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1121 }
1122 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1123
1124 static void apic_send_ipi(struct kvm_lapic *apic)
1125 {
1126         u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
1127         u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
1128         struct kvm_lapic_irq irq;
1129
1130         irq.vector = icr_low & APIC_VECTOR_MASK;
1131         irq.delivery_mode = icr_low & APIC_MODE_MASK;
1132         irq.dest_mode = icr_low & APIC_DEST_MASK;
1133         irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1134         irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1135         irq.shorthand = icr_low & APIC_SHORT_MASK;
1136         irq.msi_redir_hint = false;
1137         if (apic_x2apic_mode(apic))
1138                 irq.dest_id = icr_high;
1139         else
1140                 irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1141
1142         trace_kvm_apic_ipi(icr_low, irq.dest_id);
1143
1144         apic_debug("icr_high 0x%x, icr_low 0x%x, "
1145                    "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
1146                    "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
1147                    "msi_redir_hint 0x%x\n",
1148                    icr_high, icr_low, irq.shorthand, irq.dest_id,
1149                    irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
1150                    irq.vector, irq.msi_redir_hint);
1151
1152         kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1153 }
1154
1155 static u32 apic_get_tmcct(struct kvm_lapic *apic)
1156 {
1157         ktime_t remaining, now;
1158         s64 ns;
1159         u32 tmcct;
1160
1161         ASSERT(apic != NULL);
1162
1163         /* if initial count is 0, current count should also be 0 */
1164         if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1165                 apic->lapic_timer.period == 0)
1166                 return 0;
1167
1168         now = ktime_get();
1169         remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1170         if (ktime_to_ns(remaining) < 0)
1171                 remaining = 0;
1172
1173         ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1174         tmcct = div64_u64(ns,
1175                          (APIC_BUS_CYCLE_NS * apic->divide_count));
1176
1177         return tmcct;
1178 }
1179
1180 static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1181 {
1182         struct kvm_vcpu *vcpu = apic->vcpu;
1183         struct kvm_run *run = vcpu->run;
1184
1185         kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1186         run->tpr_access.rip = kvm_rip_read(vcpu);
1187         run->tpr_access.is_write = write;
1188 }
1189
1190 static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1191 {
1192         if (apic->vcpu->arch.tpr_access_reporting)
1193                 __report_tpr_access(apic, write);
1194 }
1195
1196 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1197 {
1198         u32 val = 0;
1199
1200         if (offset >= LAPIC_MMIO_LENGTH)
1201                 return 0;
1202
1203         switch (offset) {
1204         case APIC_ARBPRI:
1205                 apic_debug("Access APIC ARBPRI register which is for P6\n");
1206                 break;
1207
1208         case APIC_TMCCT:        /* Timer CCR */
1209                 if (apic_lvtt_tscdeadline(apic))
1210                         return 0;
1211
1212                 val = apic_get_tmcct(apic);
1213                 break;
1214         case APIC_PROCPRI:
1215                 apic_update_ppr(apic);
1216                 val = kvm_lapic_get_reg(apic, offset);
1217                 break;
1218         case APIC_TASKPRI:
1219                 report_tpr_access(apic, false);
1220                 /* fall thru */
1221         default:
1222                 val = kvm_lapic_get_reg(apic, offset);
1223                 break;
1224         }
1225
1226         return val;
1227 }
1228
1229 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1230 {
1231         return container_of(dev, struct kvm_lapic, dev);
1232 }
1233
1234 int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1235                 void *data)
1236 {
1237         unsigned char alignment = offset & 0xf;
1238         u32 result;
1239         /* this bitmask has a bit cleared for each reserved register */
1240         static const u64 rmask = 0x43ff01ffffffe70cULL;
1241
1242         if ((alignment + len) > 4) {
1243                 apic_debug("KVM_APIC_READ: alignment error %x %d\n",
1244                            offset, len);
1245                 return 1;
1246         }
1247
1248         if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
1249                 apic_debug("KVM_APIC_READ: read reserved register %x\n",
1250                            offset);
1251                 return 1;
1252         }
1253
1254         result = __apic_read(apic, offset & ~0xf);
1255
1256         trace_kvm_apic_read(offset, result);
1257
1258         switch (len) {
1259         case 1:
1260         case 2:
1261         case 4:
1262                 memcpy(data, (char *)&result + alignment, len);
1263                 break;
1264         default:
1265                 printk(KERN_ERR "Local APIC read with len = %x, "
1266                        "should be 1,2, or 4 instead\n", len);
1267                 break;
1268         }
1269         return 0;
1270 }
1271 EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
1272
1273 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1274 {
1275         return kvm_apic_hw_enabled(apic) &&
1276             addr >= apic->base_address &&
1277             addr < apic->base_address + LAPIC_MMIO_LENGTH;
1278 }
1279
1280 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1281                            gpa_t address, int len, void *data)
1282 {
1283         struct kvm_lapic *apic = to_lapic(this);
1284         u32 offset = address - apic->base_address;
1285
1286         if (!apic_mmio_in_range(apic, address))
1287                 return -EOPNOTSUPP;
1288
1289         kvm_lapic_reg_read(apic, offset, len, data);
1290
1291         return 0;
1292 }
1293
1294 static void update_divide_count(struct kvm_lapic *apic)
1295 {
1296         u32 tmp1, tmp2, tdcr;
1297
1298         tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1299         tmp1 = tdcr & 0xf;
1300         tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1301         apic->divide_count = 0x1 << (tmp2 & 0x7);
1302
1303         apic_debug("timer divide count is 0x%x\n",
1304                                    apic->divide_count);
1305 }
1306
1307 static void apic_update_lvtt(struct kvm_lapic *apic)
1308 {
1309         u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1310                         apic->lapic_timer.timer_mode_mask;
1311
1312         if (apic->lapic_timer.timer_mode != timer_mode) {
1313                 apic->lapic_timer.timer_mode = timer_mode;
1314                 hrtimer_cancel(&apic->lapic_timer.timer);
1315         }
1316 }
1317
1318 static void apic_timer_expired(struct kvm_lapic *apic)
1319 {
1320         struct kvm_vcpu *vcpu = apic->vcpu;
1321         struct swait_queue_head *q = &vcpu->wq;
1322         struct kvm_timer *ktimer = &apic->lapic_timer;
1323
1324         if (atomic_read(&apic->lapic_timer.pending))
1325                 return;
1326
1327         atomic_inc(&apic->lapic_timer.pending);
1328         kvm_set_pending_timer(vcpu);
1329
1330         if (swait_active(q))
1331                 swake_up(q);
1332
1333         if (apic_lvtt_tscdeadline(apic))
1334                 ktimer->expired_tscdeadline = ktimer->tscdeadline;
1335 }
1336
1337 /*
1338  * On APICv, this test will cause a busy wait
1339  * during a higher-priority task.
1340  */
1341
1342 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1343 {
1344         struct kvm_lapic *apic = vcpu->arch.apic;
1345         u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1346
1347         if (kvm_apic_hw_enabled(apic)) {
1348                 int vec = reg & APIC_VECTOR_MASK;
1349                 void *bitmap = apic->regs + APIC_ISR;
1350
1351                 if (vcpu->arch.apicv_active)
1352                         bitmap = apic->regs + APIC_IRR;
1353
1354                 if (apic_test_vector(vec, bitmap))
1355                         return true;
1356         }
1357         return false;
1358 }
1359
1360 void wait_lapic_expire(struct kvm_vcpu *vcpu)
1361 {
1362         struct kvm_lapic *apic = vcpu->arch.apic;
1363         u64 guest_tsc, tsc_deadline;
1364
1365         if (!lapic_in_kernel(vcpu))
1366                 return;
1367
1368         if (apic->lapic_timer.expired_tscdeadline == 0)
1369                 return;
1370
1371         if (!lapic_timer_int_injected(vcpu))
1372                 return;
1373
1374         tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1375         apic->lapic_timer.expired_tscdeadline = 0;
1376         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1377         trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1378
1379         /* __delay is delay_tsc whenever the hardware has TSC, thus always.  */
1380         if (guest_tsc < tsc_deadline)
1381                 __delay(min(tsc_deadline - guest_tsc,
1382                         nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
1383 }
1384
1385 static void start_sw_tscdeadline(struct kvm_lapic *apic)
1386 {
1387         u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
1388         u64 ns = 0;
1389         ktime_t expire;
1390         struct kvm_vcpu *vcpu = apic->vcpu;
1391         unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1392         unsigned long flags;
1393         ktime_t now;
1394
1395         if (unlikely(!tscdeadline || !this_tsc_khz))
1396                 return;
1397
1398         local_irq_save(flags);
1399
1400         now = ktime_get();
1401         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1402         if (likely(tscdeadline > guest_tsc)) {
1403                 ns = (tscdeadline - guest_tsc) * 1000000ULL;
1404                 do_div(ns, this_tsc_khz);
1405                 expire = ktime_add_ns(now, ns);
1406                 expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
1407                 hrtimer_start(&apic->lapic_timer.timer,
1408                                 expire, HRTIMER_MODE_ABS_PINNED);
1409         } else
1410                 apic_timer_expired(apic);
1411
1412         local_irq_restore(flags);
1413 }
1414
1415 static void start_sw_period(struct kvm_lapic *apic)
1416 {
1417         if (!apic->lapic_timer.period)
1418                 return;
1419
1420         if (apic_lvtt_oneshot(apic) &&
1421             ktime_after(ktime_get(),
1422                         apic->lapic_timer.target_expiration)) {
1423                 apic_timer_expired(apic);
1424                 return;
1425         }
1426
1427         hrtimer_start(&apic->lapic_timer.timer,
1428                 apic->lapic_timer.target_expiration,
1429                 HRTIMER_MODE_ABS_PINNED);
1430 }
1431
1432 static bool set_target_expiration(struct kvm_lapic *apic)
1433 {
1434         ktime_t now;
1435         u64 tscl = rdtsc();
1436
1437         now = ktime_get();
1438         apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
1439                 * APIC_BUS_CYCLE_NS * apic->divide_count;
1440
1441         if (!apic->lapic_timer.period)
1442                 return false;
1443
1444         /*
1445          * Do not allow the guest to program periodic timers with small
1446          * interval, since the hrtimers are not throttled by the host
1447          * scheduler.
1448          */
1449         if (apic_lvtt_period(apic)) {
1450                 s64 min_period = min_timer_period_us * 1000LL;
1451
1452                 if (apic->lapic_timer.period < min_period) {
1453                         pr_info_ratelimited(
1454                             "kvm: vcpu %i: requested %lld ns "
1455                             "lapic timer period limited to %lld ns\n",
1456                             apic->vcpu->vcpu_id,
1457                             apic->lapic_timer.period, min_period);
1458                         apic->lapic_timer.period = min_period;
1459                 }
1460         }
1461
1462         apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
1463                    PRIx64 ", "
1464                    "timer initial count 0x%x, period %lldns, "
1465                    "expire @ 0x%016" PRIx64 ".\n", __func__,
1466                    APIC_BUS_CYCLE_NS, ktime_to_ns(now),
1467                    kvm_lapic_get_reg(apic, APIC_TMICT),
1468                    apic->lapic_timer.period,
1469                    ktime_to_ns(ktime_add_ns(now,
1470                                 apic->lapic_timer.period)));
1471
1472         apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1473                 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
1474         apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
1475
1476         return true;
1477 }
1478
1479 static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1480 {
1481         apic->lapic_timer.tscdeadline +=
1482                 nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
1483         apic->lapic_timer.target_expiration =
1484                 ktime_add_ns(apic->lapic_timer.target_expiration,
1485                                 apic->lapic_timer.period);
1486 }
1487
1488 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1489 {
1490         if (!lapic_in_kernel(vcpu))
1491                 return false;
1492
1493         return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1494 }
1495 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1496
1497 static void cancel_hv_timer(struct kvm_lapic *apic)
1498 {
1499         kvm_x86_ops->cancel_hv_timer(apic->vcpu);
1500         apic->lapic_timer.hv_timer_in_use = false;
1501 }
1502
1503 static bool start_hv_timer(struct kvm_lapic *apic)
1504 {
1505         u64 tscdeadline = apic->lapic_timer.tscdeadline;
1506
1507         if ((atomic_read(&apic->lapic_timer.pending) &&
1508                 !apic_lvtt_period(apic)) ||
1509                 kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
1510                 if (apic->lapic_timer.hv_timer_in_use)
1511                         cancel_hv_timer(apic);
1512         } else {
1513                 apic->lapic_timer.hv_timer_in_use = true;
1514                 hrtimer_cancel(&apic->lapic_timer.timer);
1515
1516                 /* In case the sw timer triggered in the window */
1517                 if (atomic_read(&apic->lapic_timer.pending) &&
1518                         !apic_lvtt_period(apic))
1519                         cancel_hv_timer(apic);
1520         }
1521         trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
1522                         apic->lapic_timer.hv_timer_in_use);
1523         return apic->lapic_timer.hv_timer_in_use;
1524 }
1525
1526 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1527 {
1528         struct kvm_lapic *apic = vcpu->arch.apic;
1529
1530         WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1531         WARN_ON(swait_active(&vcpu->wq));
1532         cancel_hv_timer(apic);
1533         apic_timer_expired(apic);
1534
1535         if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1536                 advance_periodic_target_expiration(apic);
1537                 if (!start_hv_timer(apic))
1538                         start_sw_period(apic);
1539         }
1540 }
1541 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1542
1543 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1544 {
1545         struct kvm_lapic *apic = vcpu->arch.apic;
1546
1547         WARN_ON(apic->lapic_timer.hv_timer_in_use);
1548
1549         start_hv_timer(apic);
1550 }
1551 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
1552
1553 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1554 {
1555         struct kvm_lapic *apic = vcpu->arch.apic;
1556
1557         /* Possibly the TSC deadline timer is not enabled yet */
1558         if (!apic->lapic_timer.hv_timer_in_use)
1559                 return;
1560
1561         cancel_hv_timer(apic);
1562
1563         if (atomic_read(&apic->lapic_timer.pending))
1564                 return;
1565
1566         if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1567                 start_sw_period(apic);
1568         else if (apic_lvtt_tscdeadline(apic))
1569                 start_sw_tscdeadline(apic);
1570 }
1571 EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
1572
1573 static void start_apic_timer(struct kvm_lapic *apic)
1574 {
1575         atomic_set(&apic->lapic_timer.pending, 0);
1576
1577         if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1578                 if (set_target_expiration(apic) &&
1579                         !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
1580                         start_sw_period(apic);
1581         } else if (apic_lvtt_tscdeadline(apic)) {
1582                 if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
1583                         start_sw_tscdeadline(apic);
1584         }
1585 }
1586
1587 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1588 {
1589         bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
1590
1591         if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
1592                 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
1593                 if (lvt0_in_nmi_mode) {
1594                         apic_debug("Receive NMI setting on APIC_LVT0 "
1595                                    "for cpu %d\n", apic->vcpu->vcpu_id);
1596                         atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1597                 } else
1598                         atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
1599         }
1600 }
1601
1602 int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1603 {
1604         int ret = 0;
1605
1606         trace_kvm_apic_write(reg, val);
1607
1608         switch (reg) {
1609         case APIC_ID:           /* Local APIC ID */
1610                 if (!apic_x2apic_mode(apic))
1611                         kvm_apic_set_xapic_id(apic, val >> 24);
1612                 else
1613                         ret = 1;
1614                 break;
1615
1616         case APIC_TASKPRI:
1617                 report_tpr_access(apic, true);
1618                 apic_set_tpr(apic, val & 0xff);
1619                 break;
1620
1621         case APIC_EOI:
1622                 apic_set_eoi(apic);
1623                 break;
1624
1625         case APIC_LDR:
1626                 if (!apic_x2apic_mode(apic))
1627                         kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1628                 else
1629                         ret = 1;
1630                 break;
1631
1632         case APIC_DFR:
1633                 if (!apic_x2apic_mode(apic)) {
1634                         kvm_lapic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1635                         recalculate_apic_map(apic->vcpu->kvm);
1636                 } else
1637                         ret = 1;
1638                 break;
1639
1640         case APIC_SPIV: {
1641                 u32 mask = 0x3ff;
1642                 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1643                         mask |= APIC_SPIV_DIRECTED_EOI;
1644                 apic_set_spiv(apic, val & mask);
1645                 if (!(val & APIC_SPIV_APIC_ENABLED)) {
1646                         int i;
1647                         u32 lvt_val;
1648
1649                         for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
1650                                 lvt_val = kvm_lapic_get_reg(apic,
1651                                                        APIC_LVTT + 0x10 * i);
1652                                 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
1653                                              lvt_val | APIC_LVT_MASKED);
1654                         }
1655                         apic_update_lvtt(apic);
1656                         atomic_set(&apic->lapic_timer.pending, 0);
1657
1658                 }
1659                 break;
1660         }
1661         case APIC_ICR:
1662                 /* No delay here, so we always clear the pending bit */
1663                 kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
1664                 apic_send_ipi(apic);
1665                 break;
1666
1667         case APIC_ICR2:
1668                 if (!apic_x2apic_mode(apic))
1669                         val &= 0xff000000;
1670                 kvm_lapic_set_reg(apic, APIC_ICR2, val);
1671                 break;
1672
1673         case APIC_LVT0:
1674                 apic_manage_nmi_watchdog(apic, val);
1675         case APIC_LVTTHMR:
1676         case APIC_LVTPC:
1677         case APIC_LVT1:
1678         case APIC_LVTERR:
1679                 /* TODO: Check vector */
1680                 if (!kvm_apic_sw_enabled(apic))
1681                         val |= APIC_LVT_MASKED;
1682
1683                 val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1684                 kvm_lapic_set_reg(apic, reg, val);
1685
1686                 break;
1687
1688         case APIC_LVTT:
1689                 if (!kvm_apic_sw_enabled(apic))
1690                         val |= APIC_LVT_MASKED;
1691                 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1692                 kvm_lapic_set_reg(apic, APIC_LVTT, val);
1693                 apic_update_lvtt(apic);
1694                 break;
1695
1696         case APIC_TMICT:
1697                 if (apic_lvtt_tscdeadline(apic))
1698                         break;
1699
1700                 hrtimer_cancel(&apic->lapic_timer.timer);
1701                 kvm_lapic_set_reg(apic, APIC_TMICT, val);
1702                 start_apic_timer(apic);
1703                 break;
1704
1705         case APIC_TDCR:
1706                 if (val & 4)
1707                         apic_debug("KVM_WRITE:TDCR %x\n", val);
1708                 kvm_lapic_set_reg(apic, APIC_TDCR, val);
1709                 update_divide_count(apic);
1710                 break;
1711
1712         case APIC_ESR:
1713                 if (apic_x2apic_mode(apic) && val != 0) {
1714                         apic_debug("KVM_WRITE:ESR not zero %x\n", val);
1715                         ret = 1;
1716                 }
1717                 break;
1718
1719         case APIC_SELF_IPI:
1720                 if (apic_x2apic_mode(apic)) {
1721                         kvm_lapic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
1722                 } else
1723                         ret = 1;
1724                 break;
1725         default:
1726                 ret = 1;
1727                 break;
1728         }
1729         if (ret)
1730                 apic_debug("Local APIC Write to read-only register %x\n", reg);
1731         return ret;
1732 }
1733 EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
1734
1735 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1736                             gpa_t address, int len, const void *data)
1737 {
1738         struct kvm_lapic *apic = to_lapic(this);
1739         unsigned int offset = address - apic->base_address;
1740         u32 val;
1741
1742         if (!apic_mmio_in_range(apic, address))
1743                 return -EOPNOTSUPP;
1744
1745         /*
1746          * APIC register must be aligned on 128-bits boundary.
1747          * 32/64/128 bits registers must be accessed thru 32 bits.
1748          * Refer SDM 8.4.1
1749          */
1750         if (len != 4 || (offset & 0xf)) {
1751                 /* Don't shout loud, $infamous_os would cause only noise. */
1752                 apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
1753                 return 0;
1754         }
1755
1756         val = *(u32*)data;
1757
1758         /* too common printing */
1759         if (offset != APIC_EOI)
1760                 apic_debug("%s: offset 0x%x with length 0x%x, and value is "
1761                            "0x%x\n", __func__, offset, len, val);
1762
1763         kvm_lapic_reg_write(apic, offset & 0xff0, val);
1764
1765         return 0;
1766 }
1767
1768 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1769 {
1770         kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
1771 }
1772 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
1773
1774 /* emulate APIC access in a trap manner */
1775 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
1776 {
1777         u32 val = 0;
1778
1779         /* hw has done the conditional check and inst decode */
1780         offset &= 0xff0;
1781
1782         kvm_lapic_reg_read(vcpu->arch.apic, offset, 4, &val);
1783
1784         /* TODO: optimize to just emulate side effect w/o one more write */
1785         kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
1786 }
1787 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
1788
1789 void kvm_free_lapic(struct kvm_vcpu *vcpu)
1790 {
1791         struct kvm_lapic *apic = vcpu->arch.apic;
1792
1793         if (!vcpu->arch.apic)
1794                 return;
1795
1796         hrtimer_cancel(&apic->lapic_timer.timer);
1797
1798         if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
1799                 static_key_slow_dec_deferred(&apic_hw_disabled);
1800
1801         if (!apic->sw_enabled)
1802                 static_key_slow_dec_deferred(&apic_sw_disabled);
1803
1804         if (apic->regs)
1805                 free_page((unsigned long)apic->regs);
1806
1807         kfree(apic);
1808 }
1809
1810 /*
1811  *----------------------------------------------------------------------
1812  * LAPIC interface
1813  *----------------------------------------------------------------------
1814  */
1815 u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu)
1816 {
1817         struct kvm_lapic *apic = vcpu->arch.apic;
1818
1819         if (!lapic_in_kernel(vcpu))
1820                 return 0;
1821
1822         return apic->lapic_timer.tscdeadline;
1823 }
1824
1825 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
1826 {
1827         struct kvm_lapic *apic = vcpu->arch.apic;
1828
1829         if (!lapic_in_kernel(vcpu) ||
1830                 !apic_lvtt_tscdeadline(apic))
1831                 return 0;
1832
1833         return apic->lapic_timer.tscdeadline;
1834 }
1835
1836 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
1837 {
1838         struct kvm_lapic *apic = vcpu->arch.apic;
1839
1840         if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
1841                         apic_lvtt_period(apic))
1842                 return;
1843
1844         hrtimer_cancel(&apic->lapic_timer.timer);
1845         apic->lapic_timer.tscdeadline = data;
1846         start_apic_timer(apic);
1847 }
1848
1849 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
1850 {
1851         struct kvm_lapic *apic = vcpu->arch.apic;
1852
1853         apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
1854                      | (kvm_lapic_get_reg(apic, APIC_TASKPRI) & 4));
1855 }
1856
1857 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
1858 {
1859         u64 tpr;
1860
1861         tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
1862
1863         return (tpr & 0xf0) >> 4;
1864 }
1865
1866 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
1867 {
1868         u64 old_value = vcpu->arch.apic_base;
1869         struct kvm_lapic *apic = vcpu->arch.apic;
1870
1871         if (!apic)
1872                 value |= MSR_IA32_APICBASE_BSP;
1873
1874         vcpu->arch.apic_base = value;
1875
1876         if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
1877                 kvm_update_cpuid(vcpu);
1878
1879         if (!apic)
1880                 return;
1881
1882         /* update jump label if enable bit changes */
1883         if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
1884                 if (value & MSR_IA32_APICBASE_ENABLE) {
1885                         kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
1886                         static_key_slow_dec_deferred(&apic_hw_disabled);
1887                 } else {
1888                         static_key_slow_inc(&apic_hw_disabled.key);
1889                         recalculate_apic_map(vcpu->kvm);
1890                 }
1891         }
1892
1893         if ((old_value ^ value) & X2APIC_ENABLE) {
1894                 if (value & X2APIC_ENABLE) {
1895                         kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
1896                         kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
1897                 } else
1898                         kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
1899         }
1900
1901         apic->base_address = apic->vcpu->arch.apic_base &
1902                              MSR_IA32_APICBASE_BASE;
1903
1904         if ((value & MSR_IA32_APICBASE_ENABLE) &&
1905              apic->base_address != APIC_DEFAULT_PHYS_BASE)
1906                 pr_warn_once("APIC base relocation is unsupported by KVM");
1907
1908         /* with FSB delivery interrupt, we can restart APIC functionality */
1909         apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
1910                    "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
1911
1912 }
1913
1914 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
1915 {
1916         struct kvm_lapic *apic;
1917         int i;
1918
1919         apic_debug("%s\n", __func__);
1920
1921         ASSERT(vcpu);
1922         apic = vcpu->arch.apic;
1923         ASSERT(apic != NULL);
1924
1925         /* Stop the timer in case it's a reset to an active apic */
1926         hrtimer_cancel(&apic->lapic_timer.timer);
1927
1928         if (!init_event) {
1929                 kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE |
1930                                          MSR_IA32_APICBASE_ENABLE);
1931                 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
1932         }
1933         kvm_apic_set_version(apic->vcpu);
1934
1935         for (i = 0; i < KVM_APIC_LVT_NUM; i++)
1936                 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
1937         apic_update_lvtt(apic);
1938         if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
1939                 kvm_lapic_set_reg(apic, APIC_LVT0,
1940                              SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
1941         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
1942
1943         kvm_lapic_set_reg(apic, APIC_DFR, 0xffffffffU);
1944         apic_set_spiv(apic, 0xff);
1945         kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
1946         if (!apic_x2apic_mode(apic))
1947                 kvm_apic_set_ldr(apic, 0);
1948         kvm_lapic_set_reg(apic, APIC_ESR, 0);
1949         kvm_lapic_set_reg(apic, APIC_ICR, 0);
1950         kvm_lapic_set_reg(apic, APIC_ICR2, 0);
1951         kvm_lapic_set_reg(apic, APIC_TDCR, 0);
1952         kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1953         for (i = 0; i < 8; i++) {
1954                 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
1955                 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
1956                 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
1957         }
1958         apic->irr_pending = vcpu->arch.apicv_active;
1959         apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
1960         apic->highest_isr_cache = -1;
1961         update_divide_count(apic);
1962         atomic_set(&apic->lapic_timer.pending, 0);
1963         if (kvm_vcpu_is_bsp(vcpu))
1964                 kvm_lapic_set_base(vcpu,
1965                                 vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
1966         vcpu->arch.pv_eoi.msr_val = 0;
1967         apic_update_ppr(apic);
1968
1969         vcpu->arch.apic_arb_prio = 0;
1970         vcpu->arch.apic_attention = 0;
1971
1972         apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
1973                    "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
1974                    vcpu, kvm_lapic_get_reg(apic, APIC_ID),
1975                    vcpu->arch.apic_base, apic->base_address);
1976 }
1977
1978 /*
1979  *----------------------------------------------------------------------
1980  * timer interface
1981  *----------------------------------------------------------------------
1982  */
1983
1984 static bool lapic_is_periodic(struct kvm_lapic *apic)
1985 {
1986         return apic_lvtt_period(apic);
1987 }
1988
1989 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
1990 {
1991         struct kvm_lapic *apic = vcpu->arch.apic;
1992
1993         if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
1994                 return atomic_read(&apic->lapic_timer.pending);
1995
1996         return 0;
1997 }
1998
1999 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2000 {
2001         u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2002         int vector, mode, trig_mode;
2003
2004         if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2005                 vector = reg & APIC_VECTOR_MASK;
2006                 mode = reg & APIC_MODE_MASK;
2007                 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2008                 return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2009                                         NULL);
2010         }
2011         return 0;
2012 }
2013
2014 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2015 {
2016         struct kvm_lapic *apic = vcpu->arch.apic;
2017
2018         if (apic)
2019                 kvm_apic_local_deliver(apic, APIC_LVT0);
2020 }
2021
2022 static const struct kvm_io_device_ops apic_mmio_ops = {
2023         .read     = apic_mmio_read,
2024         .write    = apic_mmio_write,
2025 };
2026
2027 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2028 {
2029         struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2030         struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2031
2032         apic_timer_expired(apic);
2033
2034         if (lapic_is_periodic(apic)) {
2035                 advance_periodic_target_expiration(apic);
2036                 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2037                 return HRTIMER_RESTART;
2038         } else
2039                 return HRTIMER_NORESTART;
2040 }
2041
2042 int kvm_create_lapic(struct kvm_vcpu *vcpu)
2043 {
2044         struct kvm_lapic *apic;
2045
2046         ASSERT(vcpu != NULL);
2047         apic_debug("apic_init %d\n", vcpu->vcpu_id);
2048
2049         apic = kzalloc(sizeof(*apic), GFP_KERNEL);
2050         if (!apic)
2051                 goto nomem;
2052
2053         vcpu->arch.apic = apic;
2054
2055         apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
2056         if (!apic->regs) {
2057                 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2058                        vcpu->vcpu_id);
2059                 goto nomem_free_apic;
2060         }
2061         apic->vcpu = vcpu;
2062
2063         hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2064                      HRTIMER_MODE_ABS_PINNED);
2065         apic->lapic_timer.timer.function = apic_timer_fn;
2066
2067         /*
2068          * APIC is created enabled. This will prevent kvm_lapic_set_base from
2069          * thinking that APIC satet has changed.
2070          */
2071         vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2072         static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2073         kvm_lapic_reset(vcpu, false);
2074         kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2075
2076         return 0;
2077 nomem_free_apic:
2078         kfree(apic);
2079 nomem:
2080         return -ENOMEM;
2081 }
2082
2083 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2084 {
2085         struct kvm_lapic *apic = vcpu->arch.apic;
2086         u32 ppr;
2087
2088         if (!apic_enabled(apic))
2089                 return -1;
2090
2091         __apic_update_ppr(apic, &ppr);
2092         return apic_has_interrupt_for_ppr(apic, ppr);
2093 }
2094
2095 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2096 {
2097         u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2098         int r = 0;
2099
2100         if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2101                 r = 1;
2102         if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2103             GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2104                 r = 1;
2105         return r;
2106 }
2107
2108 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2109 {
2110         struct kvm_lapic *apic = vcpu->arch.apic;
2111
2112         if (atomic_read(&apic->lapic_timer.pending) > 0) {
2113                 kvm_apic_local_deliver(apic, APIC_LVTT);
2114                 if (apic_lvtt_tscdeadline(apic))
2115                         apic->lapic_timer.tscdeadline = 0;
2116                 if (apic_lvtt_oneshot(apic)) {
2117                         apic->lapic_timer.tscdeadline = 0;
2118                         apic->lapic_timer.target_expiration = 0;
2119                 }
2120                 atomic_set(&apic->lapic_timer.pending, 0);
2121         }
2122 }
2123
2124 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2125 {
2126         int vector = kvm_apic_has_interrupt(vcpu);
2127         struct kvm_lapic *apic = vcpu->arch.apic;
2128         u32 ppr;
2129
2130         if (vector == -1)
2131                 return -1;
2132
2133         /*
2134          * We get here even with APIC virtualization enabled, if doing
2135          * nested virtualization and L1 runs with the "acknowledge interrupt
2136          * on exit" mode.  Then we cannot inject the interrupt via RVI,
2137          * because the process would deliver it through the IDT.
2138          */
2139
2140         apic_clear_irr(vector, apic);
2141         if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
2142                 /*
2143                  * For auto-EOI interrupts, there might be another pending
2144                  * interrupt above PPR, so check whether to raise another
2145                  * KVM_REQ_EVENT.
2146                  */
2147                 apic_update_ppr(apic);
2148         } else {
2149                 /*
2150                  * For normal interrupts, PPR has been raised and there cannot
2151                  * be a higher-priority pending interrupt---except if there was
2152                  * a concurrent interrupt injection, but that would have
2153                  * triggered KVM_REQ_EVENT already.
2154                  */
2155                 apic_set_isr(vector, apic);
2156                 __apic_update_ppr(apic, &ppr);
2157         }
2158
2159         return vector;
2160 }
2161
2162 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2163                 struct kvm_lapic_state *s, bool set)
2164 {
2165         if (apic_x2apic_mode(vcpu->arch.apic)) {
2166                 u32 *id = (u32 *)(s->regs + APIC_ID);
2167
2168                 if (vcpu->kvm->arch.x2apic_format) {
2169                         if (*id != vcpu->vcpu_id)
2170                                 return -EINVAL;
2171                 } else {
2172                         if (set)
2173                                 *id >>= 24;
2174                         else
2175                                 *id <<= 24;
2176                 }
2177         }
2178
2179         return 0;
2180 }
2181
2182 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2183 {
2184         memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2185         return kvm_apic_state_fixup(vcpu, s, false);
2186 }
2187
2188 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2189 {
2190         struct kvm_lapic *apic = vcpu->arch.apic;
2191         int r;
2192
2193
2194         kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2195         /* set SPIV separately to get count of SW disabled APICs right */
2196         apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2197
2198         r = kvm_apic_state_fixup(vcpu, s, true);
2199         if (r)
2200                 return r;
2201         memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
2202
2203         recalculate_apic_map(vcpu->kvm);
2204         kvm_apic_set_version(vcpu);
2205
2206         apic_update_ppr(apic);
2207         hrtimer_cancel(&apic->lapic_timer.timer);
2208         apic_update_lvtt(apic);
2209         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2210         update_divide_count(apic);
2211         start_apic_timer(apic);
2212         apic->irr_pending = true;
2213         apic->isr_count = vcpu->arch.apicv_active ?
2214                                 1 : count_vectors(apic->regs + APIC_ISR);
2215         apic->highest_isr_cache = -1;
2216         if (vcpu->arch.apicv_active) {
2217                 kvm_x86_ops->apicv_post_state_restore(vcpu);
2218                 kvm_x86_ops->hwapic_irr_update(vcpu,
2219                                 apic_find_highest_irr(apic));
2220                 kvm_x86_ops->hwapic_isr_update(vcpu,
2221                                 apic_find_highest_isr(apic));
2222         }
2223         kvm_make_request(KVM_REQ_EVENT, vcpu);
2224         if (ioapic_in_kernel(vcpu->kvm))
2225                 kvm_rtc_eoi_tracking_restore_one(vcpu);
2226
2227         vcpu->arch.apic_arb_prio = 0;
2228
2229         return 0;
2230 }
2231
2232 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2233 {
2234         struct hrtimer *timer;
2235
2236         if (!lapic_in_kernel(vcpu))
2237                 return;
2238
2239         timer = &vcpu->arch.apic->lapic_timer.timer;
2240         if (hrtimer_cancel(timer))
2241                 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
2242 }
2243
2244 /*
2245  * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2246  *
2247  * Detect whether guest triggered PV EOI since the
2248  * last entry. If yes, set EOI on guests's behalf.
2249  * Clear PV EOI in guest memory in any case.
2250  */
2251 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2252                                         struct kvm_lapic *apic)
2253 {
2254         bool pending;
2255         int vector;
2256         /*
2257          * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2258          * and KVM_PV_EOI_ENABLED in guest memory as follows:
2259          *
2260          * KVM_APIC_PV_EOI_PENDING is unset:
2261          *      -> host disabled PV EOI.
2262          * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2263          *      -> host enabled PV EOI, guest did not execute EOI yet.
2264          * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2265          *      -> host enabled PV EOI, guest executed EOI.
2266          */
2267         BUG_ON(!pv_eoi_enabled(vcpu));
2268         pending = pv_eoi_get_pending(vcpu);
2269         /*
2270          * Clear pending bit in any case: it will be set again on vmentry.
2271          * While this might not be ideal from performance point of view,
2272          * this makes sure pv eoi is only enabled when we know it's safe.
2273          */
2274         pv_eoi_clr_pending(vcpu);
2275         if (pending)
2276                 return;
2277         vector = apic_set_eoi(apic);
2278         trace_kvm_pv_eoi(apic, vector);
2279 }
2280
2281 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2282 {
2283         u32 data;
2284
2285         if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2286                 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2287
2288         if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2289                 return;
2290
2291         if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2292                                   sizeof(u32)))
2293                 return;
2294
2295         apic_set_tpr(vcpu->arch.apic, data & 0xff);
2296 }
2297
2298 /*
2299  * apic_sync_pv_eoi_to_guest - called before vmentry
2300  *
2301  * Detect whether it's safe to enable PV EOI and
2302  * if yes do so.
2303  */
2304 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2305                                         struct kvm_lapic *apic)
2306 {
2307         if (!pv_eoi_enabled(vcpu) ||
2308             /* IRR set or many bits in ISR: could be nested. */
2309             apic->irr_pending ||
2310             /* Cache not set: could be safe but we don't bother. */
2311             apic->highest_isr_cache == -1 ||
2312             /* Need EOI to update ioapic. */
2313             kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2314                 /*
2315                  * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2316                  * so we need not do anything here.
2317                  */
2318                 return;
2319         }
2320
2321         pv_eoi_set_pending(apic->vcpu);
2322 }
2323
2324 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2325 {
2326         u32 data, tpr;
2327         int max_irr, max_isr;
2328         struct kvm_lapic *apic = vcpu->arch.apic;
2329
2330         apic_sync_pv_eoi_to_guest(vcpu, apic);
2331
2332         if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2333                 return;
2334
2335         tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2336         max_irr = apic_find_highest_irr(apic);
2337         if (max_irr < 0)
2338                 max_irr = 0;
2339         max_isr = apic_find_highest_isr(apic);
2340         if (max_isr < 0)
2341                 max_isr = 0;
2342         data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2343
2344         kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2345                                 sizeof(u32));
2346 }
2347
2348 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2349 {
2350         if (vapic_addr) {
2351                 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2352                                         &vcpu->arch.apic->vapic_cache,
2353                                         vapic_addr, sizeof(u32)))
2354                         return -EINVAL;
2355                 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2356         } else {
2357                 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2358         }
2359
2360         vcpu->arch.apic->vapic_addr = vapic_addr;
2361         return 0;
2362 }
2363
2364 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2365 {
2366         struct kvm_lapic *apic = vcpu->arch.apic;
2367         u32 reg = (msr - APIC_BASE_MSR) << 4;
2368
2369         if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2370                 return 1;
2371
2372         if (reg == APIC_ICR2)
2373                 return 1;
2374
2375         /* if this is ICR write vector before command */
2376         if (reg == APIC_ICR)
2377                 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2378         return kvm_lapic_reg_write(apic, reg, (u32)data);
2379 }
2380
2381 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2382 {
2383         struct kvm_lapic *apic = vcpu->arch.apic;
2384         u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
2385
2386         if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2387                 return 1;
2388
2389         if (reg == APIC_DFR || reg == APIC_ICR2) {
2390                 apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
2391                            reg);
2392                 return 1;
2393         }
2394
2395         if (kvm_lapic_reg_read(apic, reg, 4, &low))
2396                 return 1;
2397         if (reg == APIC_ICR)
2398                 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2399
2400         *data = (((u64)high) << 32) | low;
2401
2402         return 0;
2403 }
2404
2405 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2406 {
2407         struct kvm_lapic *apic = vcpu->arch.apic;
2408
2409         if (!lapic_in_kernel(vcpu))
2410                 return 1;
2411
2412         /* if this is ICR write vector before command */
2413         if (reg == APIC_ICR)
2414                 kvm_lapic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
2415         return kvm_lapic_reg_write(apic, reg, (u32)data);
2416 }
2417
2418 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2419 {
2420         struct kvm_lapic *apic = vcpu->arch.apic;
2421         u32 low, high = 0;
2422
2423         if (!lapic_in_kernel(vcpu))
2424                 return 1;
2425
2426         if (kvm_lapic_reg_read(apic, reg, 4, &low))
2427                 return 1;
2428         if (reg == APIC_ICR)
2429                 kvm_lapic_reg_read(apic, APIC_ICR2, 4, &high);
2430
2431         *data = (((u64)high) << 32) | low;
2432
2433         return 0;
2434 }
2435
2436 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
2437 {
2438         u64 addr = data & ~KVM_MSR_ENABLED;
2439         if (!IS_ALIGNED(addr, 4))
2440                 return 1;
2441
2442         vcpu->arch.pv_eoi.msr_val = data;
2443         if (!pv_eoi_enabled(vcpu))
2444                 return 0;
2445         return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
2446                                          addr, sizeof(u8));
2447 }
2448
2449 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2450 {
2451         struct kvm_lapic *apic = vcpu->arch.apic;
2452         u8 sipi_vector;
2453         unsigned long pe;
2454
2455         if (!lapic_in_kernel(vcpu) || !apic->pending_events)
2456                 return;
2457
2458         /*
2459          * INITs are latched while in SMM.  Because an SMM CPU cannot
2460          * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
2461          * and delay processing of INIT until the next RSM.
2462          */
2463         if (is_smm(vcpu)) {
2464                 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
2465                 if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
2466                         clear_bit(KVM_APIC_SIPI, &apic->pending_events);
2467                 return;
2468         }
2469
2470         pe = xchg(&apic->pending_events, 0);
2471         if (test_bit(KVM_APIC_INIT, &pe)) {
2472                 kvm_lapic_reset(vcpu, true);
2473                 kvm_vcpu_reset(vcpu, true);
2474                 if (kvm_vcpu_is_bsp(apic->vcpu))
2475                         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2476                 else
2477                         vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
2478         }
2479         if (test_bit(KVM_APIC_SIPI, &pe) &&
2480             vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
2481                 /* evaluate pending_events before reading the vector */
2482                 smp_rmb();
2483                 sipi_vector = apic->sipi_vector;
2484                 apic_debug("vcpu %d received sipi with vector # %x\n",
2485                          vcpu->vcpu_id, sipi_vector);
2486                 kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
2487                 vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
2488         }
2489 }
2490
2491 void kvm_lapic_init(void)
2492 {
2493         /* do not patch jump label more than once per second */
2494         jump_label_rate_limit(&apic_hw_disabled, HZ);
2495         jump_label_rate_limit(&apic_sw_disabled, HZ);
2496 }
2497
2498 void kvm_lapic_exit(void)
2499 {
2500         static_key_deferred_flush(&apic_hw_disabled);
2501         static_key_deferred_flush(&apic_sw_disabled);
2502 }