Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
lapic.c
Go to the documentation of this file.
1 
2 /*
3  * Local APIC virtualization
4  *
5  * Copyright (C) 2006 Qumranet, Inc.
6  * Copyright (C) 2007 Novell
7  * Copyright (C) 2007 Intel
8  * Copyright 2009 Red Hat, Inc. and/or its affiliates.
9  *
10  * Authors:
11  * Dor Laor <[email protected]>
12  * Gregory Haskins <[email protected]>
13  * Yaozu (Eddie) Dong <[email protected]>
14  *
15  * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
16  *
17  * This work is licensed under the terms of the GNU GPL, version 2. See
18  * the COPYING file in the top-level directory.
19  */
20 
21 #include <linux/kvm_host.h>
22 #include <linux/kvm.h>
23 #include <linux/mm.h>
24 #include <linux/highmem.h>
25 #include <linux/smp.h>
26 #include <linux/hrtimer.h>
27 #include <linux/io.h>
28 #include <linux/module.h>
29 #include <linux/math64.h>
30 #include <linux/slab.h>
31 #include <asm/processor.h>
32 #include <asm/msr.h>
33 #include <asm/page.h>
34 #include <asm/current.h>
35 #include <asm/apicdef.h>
36 #include <linux/atomic.h>
37 #include <linux/jump_label.h>
38 #include "kvm_cache_regs.h"
39 #include "irq.h"
40 #include "trace.h"
41 #include "x86.h"
42 #include "cpuid.h"
43 
44 #ifndef CONFIG_X86_64
45 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
46 #else
47 #define mod_64(x, y) ((x) % (y))
48 #endif
49 
50 #define PRId64 "d"
51 #define PRIx64 "llx"
52 #define PRIu64 "u"
53 #define PRIo64 "o"
54 
55 #define APIC_BUS_CYCLE_NS 1
56 
57 /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
58 #define apic_debug(fmt, arg...)
59 
60 #define APIC_LVT_NUM 6
61 /* 14 is the version for Xeon and Pentium 8.4.8*/
62 #define APIC_VERSION (0x14UL | ((APIC_LVT_NUM - 1) << 16))
63 #define LAPIC_MMIO_LENGTH (1 << 12)
64 /* followed define is not in apicdef.h */
65 #define APIC_SHORT_MASK 0xc0000
66 #define APIC_DEST_NOSHORT 0x0
67 #define APIC_DEST_MASK 0x800
68 #define MAX_APIC_VECTOR 256
69 #define APIC_VECTORS_PER_REG 32
70 
71 #define VEC_POS(v) ((v) & (32 - 1))
72 #define REG_POS(v) (((v) >> 5) << 4)
73 
74 static unsigned int min_timer_period_us = 500;
75 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
76 
77 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
78 {
79  *((u32 *) (apic->regs + reg_off)) = val;
80 }
81 
82 static inline int apic_test_and_set_vector(int vec, void *bitmap)
83 {
84  return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
85 }
86 
87 static inline int apic_test_and_clear_vector(int vec, void *bitmap)
88 {
89  return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
90 }
91 
92 static inline int apic_test_vector(int vec, void *bitmap)
93 {
94  return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
95 }
96 
97 static inline void apic_set_vector(int vec, void *bitmap)
98 {
99  set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
100 }
101 
102 static inline void apic_clear_vector(int vec, void *bitmap)
103 {
104  clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
105 }
106 
107 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
108 {
109  return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
110 }
111 
112 static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
113 {
114  return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
115 }
116 
119 
120 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
121 {
122  if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
123  if (val & APIC_SPIV_APIC_ENABLED)
124  static_key_slow_dec_deferred(&apic_sw_disabled);
125  else
126  static_key_slow_inc(&apic_sw_disabled.key);
127  }
128  apic_set_reg(apic, APIC_SPIV, val);
129 }
130 
131 static inline int apic_enabled(struct kvm_lapic *apic)
132 {
133  return kvm_apic_sw_enabled(apic) && kvm_apic_hw_enabled(apic);
134 }
135 
136 #define LVT_MASK \
137  (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
138 
139 #define LINT_MASK \
140  (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
141  APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
142 
143 static inline int apic_x2apic_mode(struct kvm_lapic *apic)
144 {
145  return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
146 }
147 
148 static inline int kvm_apic_id(struct kvm_lapic *apic)
149 {
150  return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
151 }
152 
153 static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
154 {
155  u16 cid;
156  ldr >>= 32 - map->ldr_bits;
157  cid = (ldr >> map->cid_shift) & map->cid_mask;
158 
159  BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
160 
161  return cid;
162 }
163 
164 static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
165 {
166  ldr >>= (32 - map->ldr_bits);
167  return ldr & map->lid_mask;
168 }
169 
170 static void recalculate_apic_map(struct kvm *kvm)
171 {
172  struct kvm_apic_map *new, *old = NULL;
173  struct kvm_vcpu *vcpu;
174  int i;
175 
176  new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
177 
178  mutex_lock(&kvm->arch.apic_map_lock);
179 
180  if (!new)
181  goto out;
182 
183  new->ldr_bits = 8;
184  /* flat mode is default */
185  new->cid_shift = 8;
186  new->cid_mask = 0;
187  new->lid_mask = 0xff;
188 
189  kvm_for_each_vcpu(i, vcpu, kvm) {
190  struct kvm_lapic *apic = vcpu->arch.apic;
191  u16 cid, lid;
192  u32 ldr;
193 
194  if (!kvm_apic_present(vcpu))
195  continue;
196 
197  /*
198  * All APICs have to be configured in the same mode by an OS.
199  * We take advatage of this while building logical id loockup
200  * table. After reset APICs are in xapic/flat mode, so if we
201  * find apic with different setting we assume this is the mode
202  * OS wants all apics to be in; build lookup table accordingly.
203  */
204  if (apic_x2apic_mode(apic)) {
205  new->ldr_bits = 32;
206  new->cid_shift = 16;
207  new->cid_mask = new->lid_mask = 0xffff;
208  } else if (kvm_apic_sw_enabled(apic) &&
209  !new->cid_mask /* flat mode */ &&
210  kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
211  new->cid_shift = 4;
212  new->cid_mask = 0xf;
213  new->lid_mask = 0xf;
214  }
215 
216  new->phys_map[kvm_apic_id(apic)] = apic;
217 
218  ldr = kvm_apic_get_reg(apic, APIC_LDR);
219  cid = apic_cluster_id(new, ldr);
220  lid = apic_logical_id(new, ldr);
221 
222  if (lid)
223  new->logical_map[cid][ffs(lid) - 1] = apic;
224  }
225 out:
226  old = rcu_dereference_protected(kvm->arch.apic_map,
227  lockdep_is_held(&kvm->arch.apic_map_lock));
228  rcu_assign_pointer(kvm->arch.apic_map, new);
229  mutex_unlock(&kvm->arch.apic_map_lock);
230 
231  if (old)
232  kfree_rcu(old, rcu);
233 }
234 
235 static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
236 {
237  apic_set_reg(apic, APIC_ID, id << 24);
238  recalculate_apic_map(apic->vcpu->kvm);
239 }
240 
241 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
242 {
243  apic_set_reg(apic, APIC_LDR, id);
244  recalculate_apic_map(apic->vcpu->kvm);
245 }
246 
247 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
248 {
249  return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
250 }
251 
252 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
253 {
254  return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
255 }
256 
257 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
258 {
259  return ((kvm_apic_get_reg(apic, APIC_LVTT) &
260  apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
261 }
262 
263 static inline int apic_lvtt_period(struct kvm_lapic *apic)
264 {
265  return ((kvm_apic_get_reg(apic, APIC_LVTT) &
266  apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
267 }
268 
269 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
270 {
271  return ((kvm_apic_get_reg(apic, APIC_LVTT) &
272  apic->lapic_timer.timer_mode_mask) ==
274 }
275 
276 static inline int apic_lvt_nmi_mode(u32 lvt_val)
277 {
278  return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
279 }
280 
281 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
282 {
283  struct kvm_lapic *apic = vcpu->arch.apic;
284  struct kvm_cpuid_entry2 *feat;
285  u32 v = APIC_VERSION;
286 
287  if (!kvm_vcpu_has_lapic(vcpu))
288  return;
289 
290  feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
291  if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
293  apic_set_reg(apic, APIC_LVR, v);
294 }
295 
296 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
297  LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */
298  LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */
299  LVT_MASK | APIC_MODE_MASK, /* LVTPC */
300  LINT_MASK, LINT_MASK, /* LVT0-1 */
301  LVT_MASK /* LVTERR */
302 };
303 
304 static int find_highest_vector(void *bitmap)
305 {
306  int vec;
307  u32 *reg;
308 
310  vec >= 0; vec -= APIC_VECTORS_PER_REG) {
311  reg = bitmap + REG_POS(vec);
312  if (*reg)
313  return fls(*reg) - 1 + vec;
314  }
315 
316  return -1;
317 }
318 
319 static u8 count_vectors(void *bitmap)
320 {
321  int vec;
322  u32 *reg;
323  u8 count = 0;
324 
325  for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
326  reg = bitmap + REG_POS(vec);
327  count += hweight32(*reg);
328  }
329 
330  return count;
331 }
332 
333 static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
334 {
335  apic->irr_pending = true;
336  return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
337 }
338 
339 static inline int apic_search_irr(struct kvm_lapic *apic)
340 {
341  return find_highest_vector(apic->regs + APIC_IRR);
342 }
343 
344 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
345 {
346  int result;
347 
348  if (!apic->irr_pending)
349  return -1;
350 
351  result = apic_search_irr(apic);
352  ASSERT(result == -1 || result >= 16);
353 
354  return result;
355 }
356 
357 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
358 {
359  apic->irr_pending = false;
360  apic_clear_vector(vec, apic->regs + APIC_IRR);
361  if (apic_search_irr(apic) != -1)
362  apic->irr_pending = true;
363 }
364 
365 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
366 {
367  if (!__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
368  ++apic->isr_count;
370  /*
371  * ISR (in service register) bit is set when injecting an interrupt.
372  * The highest vector is injected. Thus the latest bit set matches
373  * the highest bit in ISR.
374  */
375  apic->highest_isr_cache = vec;
376 }
377 
378 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
379 {
380  if (__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
381  --apic->isr_count;
382  BUG_ON(apic->isr_count < 0);
383  apic->highest_isr_cache = -1;
384 }
385 
387 {
388  int highest_irr;
389 
390  /* This may race with setting of irr in __apic_accept_irq() and
391  * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
392  * will cause vmexit immediately and the value will be recalculated
393  * on the next vmentry.
394  */
395  if (!kvm_vcpu_has_lapic(vcpu))
396  return 0;
397  highest_irr = apic_find_highest_irr(vcpu->arch.apic);
398 
399  return highest_irr;
400 }
401 
402 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
403  int vector, int level, int trig_mode);
404 
405 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
406 {
407  struct kvm_lapic *apic = vcpu->arch.apic;
408 
409  return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
410  irq->level, irq->trig_mode);
411 }
412 
413 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
414 {
415 
416  return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
417  sizeof(val));
418 }
419 
420 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
421 {
422 
423  return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
424  sizeof(*val));
425 }
426 
427 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
428 {
429  return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
430 }
431 
432 static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
433 {
434  u8 val;
435  if (pv_eoi_get_user(vcpu, &val) < 0)
436  apic_debug("Can't read EOI MSR value: 0x%llx\n",
437  (unsigned long long)vcpi->arch.pv_eoi.msr_val);
438  return val & 0x1;
439 }
440 
441 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
442 {
443  if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
444  apic_debug("Can't set EOI MSR value: 0x%llx\n",
445  (unsigned long long)vcpi->arch.pv_eoi.msr_val);
446  return;
447  }
448  __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
449 }
450 
451 static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
452 {
453  if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
454  apic_debug("Can't clear EOI MSR value: 0x%llx\n",
455  (unsigned long long)vcpi->arch.pv_eoi.msr_val);
456  return;
457  }
458  __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
459 }
460 
461 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
462 {
463  int result;
464  if (!apic->isr_count)
465  return -1;
466  if (likely(apic->highest_isr_cache != -1))
467  return apic->highest_isr_cache;
468 
469  result = find_highest_vector(apic->regs + APIC_ISR);
470  ASSERT(result == -1 || result >= 16);
471 
472  return result;
473 }
474 
475 static void apic_update_ppr(struct kvm_lapic *apic)
476 {
477  u32 tpr, isrv, ppr, old_ppr;
478  int isr;
479 
480  old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI);
481  tpr = kvm_apic_get_reg(apic, APIC_TASKPRI);
482  isr = apic_find_highest_isr(apic);
483  isrv = (isr != -1) ? isr : 0;
484 
485  if ((tpr & 0xf0) >= (isrv & 0xf0))
486  ppr = tpr & 0xff;
487  else
488  ppr = isrv & 0xf0;
489 
490  apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
491  apic, ppr, isr, isrv);
492 
493  if (old_ppr != ppr) {
494  apic_set_reg(apic, APIC_PROCPRI, ppr);
495  if (ppr < old_ppr)
496  kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
497  }
498 }
499 
500 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
501 {
502  apic_set_reg(apic, APIC_TASKPRI, tpr);
503  apic_update_ppr(apic);
504 }
505 
507 {
508  return dest == 0xff || kvm_apic_id(apic) == dest;
509 }
510 
512 {
513  int result = 0;
514  u32 logical_id;
515 
516  if (apic_x2apic_mode(apic)) {
517  logical_id = kvm_apic_get_reg(apic, APIC_LDR);
518  return logical_id & mda;
519  }
520 
521  logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR));
522 
523  switch (kvm_apic_get_reg(apic, APIC_DFR)) {
524  case APIC_DFR_FLAT:
525  if (logical_id & mda)
526  result = 1;
527  break;
528  case APIC_DFR_CLUSTER:
529  if (((logical_id >> 4) == (mda >> 0x4))
530  && (logical_id & mda & 0xf))
531  result = 1;
532  break;
533  default:
534  apic_debug("Bad DFR vcpu %d: %08x\n",
535  apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
536  break;
537  }
538 
539  return result;
540 }
541 
542 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
543  int short_hand, int dest, int dest_mode)
544 {
545  int result = 0;
546  struct kvm_lapic *target = vcpu->arch.apic;
547 
548  apic_debug("target %p, source %p, dest 0x%x, "
549  "dest_mode 0x%x, short_hand 0x%x\n",
550  target, source, dest, dest_mode, short_hand);
551 
552  ASSERT(target);
553  switch (short_hand) {
554  case APIC_DEST_NOSHORT:
555  if (dest_mode == 0)
556  /* Physical mode. */
557  result = kvm_apic_match_physical_addr(target, dest);
558  else
559  /* Logical mode. */
560  result = kvm_apic_match_logical_addr(target, dest);
561  break;
562  case APIC_DEST_SELF:
563  result = (target == source);
564  break;
565  case APIC_DEST_ALLINC:
566  result = 1;
567  break;
568  case APIC_DEST_ALLBUT:
569  result = (target != source);
570  break;
571  default:
572  apic_debug("kvm: apic: Bad dest shorthand value %x\n",
573  short_hand);
574  break;
575  }
576 
577  return result;
578 }
579 
580 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
581  struct kvm_lapic_irq *irq, int *r)
582 {
583  struct kvm_apic_map *map;
584  unsigned long bitmap = 1;
585  struct kvm_lapic **dst;
586  int i;
587  bool ret = false;
588 
589  *r = -1;
590 
591  if (irq->shorthand == APIC_DEST_SELF) {
592  *r = kvm_apic_set_irq(src->vcpu, irq);
593  return true;
594  }
595 
596  if (irq->shorthand)
597  return false;
598 
599  rcu_read_lock();
600  map = rcu_dereference(kvm->arch.apic_map);
601 
602  if (!map)
603  goto out;
604 
605  if (irq->dest_mode == 0) { /* physical mode */
606  if (irq->delivery_mode == APIC_DM_LOWEST ||
607  irq->dest_id == 0xff)
608  goto out;
609  dst = &map->phys_map[irq->dest_id & 0xff];
610  } else {
611  u32 mda = irq->dest_id << (32 - map->ldr_bits);
612 
613  dst = map->logical_map[apic_cluster_id(map, mda)];
614 
615  bitmap = apic_logical_id(map, mda);
616 
617  if (irq->delivery_mode == APIC_DM_LOWEST) {
618  int l = -1;
619  for_each_set_bit(i, &bitmap, 16) {
620  if (!dst[i])
621  continue;
622  if (l < 0)
623  l = i;
624  else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
625  l = i;
626  }
627 
628  bitmap = (l >= 0) ? 1 << l : 0;
629  }
630  }
631 
632  for_each_set_bit(i, &bitmap, 16) {
633  if (!dst[i])
634  continue;
635  if (*r < 0)
636  *r = 0;
637  *r += kvm_apic_set_irq(dst[i]->vcpu, irq);
638  }
639 
640  ret = true;
641 out:
642  rcu_read_unlock();
643  return ret;
644 }
645 
646 /*
647  * Add a pending IRQ into lapic.
648  * Return 1 if successfully added and 0 if discarded.
649  */
650 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
651  int vector, int level, int trig_mode)
652 {
653  int result = 0;
654  struct kvm_vcpu *vcpu = apic->vcpu;
655 
656  switch (delivery_mode) {
657  case APIC_DM_LOWEST:
658  vcpu->arch.apic_arb_prio++;
659  case APIC_DM_FIXED:
660  /* FIXME add logic for vcpu on reset */
661  if (unlikely(!apic_enabled(apic)))
662  break;
663 
664  if (trig_mode) {
665  apic_debug("level trig mode for vector %d", vector);
666  apic_set_vector(vector, apic->regs + APIC_TMR);
667  } else
668  apic_clear_vector(vector, apic->regs + APIC_TMR);
669 
670  result = !apic_test_and_set_irr(vector, apic);
671  trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
672  trig_mode, vector, !result);
673  if (!result) {
674  if (trig_mode)
675  apic_debug("level trig mode repeatedly for "
676  "vector %d", vector);
677  break;
678  }
679 
680  kvm_make_request(KVM_REQ_EVENT, vcpu);
681  kvm_vcpu_kick(vcpu);
682  break;
683 
684  case APIC_DM_REMRD:
685  apic_debug("Ignoring delivery mode 3\n");
686  break;
687 
688  case APIC_DM_SMI:
689  apic_debug("Ignoring guest SMI\n");
690  break;
691 
692  case APIC_DM_NMI:
693  result = 1;
694  kvm_inject_nmi(vcpu);
695  kvm_vcpu_kick(vcpu);
696  break;
697 
698  case APIC_DM_INIT:
699  if (!trig_mode || level) {
700  result = 1;
701  vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
702  kvm_make_request(KVM_REQ_EVENT, vcpu);
703  kvm_vcpu_kick(vcpu);
704  } else {
705  apic_debug("Ignoring de-assert INIT to vcpu %d\n",
706  vcpu->vcpu_id);
707  }
708  break;
709 
710  case APIC_DM_STARTUP:
711  apic_debug("SIPI to vcpu %d vector 0x%02x\n",
712  vcpu->vcpu_id, vector);
713  if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
714  result = 1;
715  vcpu->arch.sipi_vector = vector;
716  vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
717  kvm_make_request(KVM_REQ_EVENT, vcpu);
718  kvm_vcpu_kick(vcpu);
719  }
720  break;
721 
722  case APIC_DM_EXTINT:
723  /*
724  * Should only be called by kvm_apic_local_deliver() with LVT0,
725  * before NMI watchdog was enabled. Already handled by
726  * kvm_apic_accept_pic_intr().
727  */
728  break;
729 
730  default:
731  printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
732  delivery_mode);
733  break;
734  }
735  return result;
736 }
737 
738 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
739 {
740  return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
741 }
742 
743 static int apic_set_eoi(struct kvm_lapic *apic)
744 {
745  int vector = apic_find_highest_isr(apic);
746 
747  trace_kvm_eoi(apic, vector);
748 
749  /*
750  * Not every write EOI will has corresponding ISR,
751  * one example is when Kernel check timer on setup_IO_APIC
752  */
753  if (vector == -1)
754  return vector;
755 
756  apic_clear_isr(vector, apic);
757  apic_update_ppr(apic);
758 
759  if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
760  kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
761  int trigger_mode;
762  if (apic_test_vector(vector, apic->regs + APIC_TMR))
763  trigger_mode = IOAPIC_LEVEL_TRIG;
764  else
765  trigger_mode = IOAPIC_EDGE_TRIG;
766  kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
767  }
768  kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
769  return vector;
770 }
771 
772 static void apic_send_ipi(struct kvm_lapic *apic)
773 {
774  u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
775  u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2);
776  struct kvm_lapic_irq irq;
777 
778  irq.vector = icr_low & APIC_VECTOR_MASK;
779  irq.delivery_mode = icr_low & APIC_MODE_MASK;
780  irq.dest_mode = icr_low & APIC_DEST_MASK;
781  irq.level = icr_low & APIC_INT_ASSERT;
782  irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
783  irq.shorthand = icr_low & APIC_SHORT_MASK;
784  if (apic_x2apic_mode(apic))
785  irq.dest_id = icr_high;
786  else
787  irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
788 
789  trace_kvm_apic_ipi(icr_low, irq.dest_id);
790 
791  apic_debug("icr_high 0x%x, icr_low 0x%x, "
792  "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
793  "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
794  icr_high, icr_low, irq.shorthand, irq.dest_id,
795  irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
796  irq.vector);
797 
798  kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
799 }
800 
801 static u32 apic_get_tmcct(struct kvm_lapic *apic)
802 {
803  ktime_t remaining;
804  s64 ns;
805  u32 tmcct;
806 
807  ASSERT(apic != NULL);
808 
809  /* if initial count is 0, current count should also be 0 */
810  if (kvm_apic_get_reg(apic, APIC_TMICT) == 0)
811  return 0;
812 
813  remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
814  if (ktime_to_ns(remaining) < 0)
815  remaining = ktime_set(0, 0);
816 
817  ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
818  tmcct = div64_u64(ns,
819  (APIC_BUS_CYCLE_NS * apic->divide_count));
820 
821  return tmcct;
822 }
823 
824 static void __report_tpr_access(struct kvm_lapic *apic, bool write)
825 {
826  struct kvm_vcpu *vcpu = apic->vcpu;
827  struct kvm_run *run = vcpu->run;
828 
829  kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
830  run->tpr_access.rip = kvm_rip_read(vcpu);
831  run->tpr_access.is_write = write;
832 }
833 
834 static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
835 {
836  if (apic->vcpu->arch.tpr_access_reporting)
837  __report_tpr_access(apic, write);
838 }
839 
840 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
841 {
842  u32 val = 0;
843 
844  if (offset >= LAPIC_MMIO_LENGTH)
845  return 0;
846 
847  switch (offset) {
848  case APIC_ID:
849  if (apic_x2apic_mode(apic))
850  val = kvm_apic_id(apic);
851  else
852  val = kvm_apic_id(apic) << 24;
853  break;
854  case APIC_ARBPRI:
855  apic_debug("Access APIC ARBPRI register which is for P6\n");
856  break;
857 
858  case APIC_TMCCT: /* Timer CCR */
859  if (apic_lvtt_tscdeadline(apic))
860  return 0;
861 
862  val = apic_get_tmcct(apic);
863  break;
864  case APIC_PROCPRI:
865  apic_update_ppr(apic);
866  val = kvm_apic_get_reg(apic, offset);
867  break;
868  case APIC_TASKPRI:
869  report_tpr_access(apic, false);
870  /* fall thru */
871  default:
872  val = kvm_apic_get_reg(apic, offset);
873  break;
874  }
875 
876  return val;
877 }
878 
879 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
880 {
881  return container_of(dev, struct kvm_lapic, dev);
882 }
883 
884 static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
885  void *data)
886 {
887  unsigned char alignment = offset & 0xf;
888  u32 result;
889  /* this bitmask has a bit cleared for each reserved register */
890  static const u64 rmask = 0x43ff01ffffffe70cULL;
891 
892  if ((alignment + len) > 4) {
893  apic_debug("KVM_APIC_READ: alignment error %x %d\n",
894  offset, len);
895  return 1;
896  }
897 
898  if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
899  apic_debug("KVM_APIC_READ: read reserved register %x\n",
900  offset);
901  return 1;
902  }
903 
904  result = __apic_read(apic, offset & ~0xf);
905 
906  trace_kvm_apic_read(offset, result);
907 
908  switch (len) {
909  case 1:
910  case 2:
911  case 4:
912  memcpy(data, (char *)&result + alignment, len);
913  break;
914  default:
915  printk(KERN_ERR "Local APIC read with len = %x, "
916  "should be 1,2, or 4 instead\n", len);
917  break;
918  }
919  return 0;
920 }
921 
922 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
923 {
924  return kvm_apic_hw_enabled(apic) &&
925  addr >= apic->base_address &&
926  addr < apic->base_address + LAPIC_MMIO_LENGTH;
927 }
928 
929 static int apic_mmio_read(struct kvm_io_device *this,
930  gpa_t address, int len, void *data)
931 {
932  struct kvm_lapic *apic = to_lapic(this);
933  u32 offset = address - apic->base_address;
934 
935  if (!apic_mmio_in_range(apic, address))
936  return -EOPNOTSUPP;
937 
938  apic_reg_read(apic, offset, len, data);
939 
940  return 0;
941 }
942 
943 static void update_divide_count(struct kvm_lapic *apic)
944 {
945  u32 tmp1, tmp2, tdcr;
946 
947  tdcr = kvm_apic_get_reg(apic, APIC_TDCR);
948  tmp1 = tdcr & 0xf;
949  tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
950  apic->divide_count = 0x1 << (tmp2 & 0x7);
951 
952  apic_debug("timer divide count is 0x%x\n",
953  apic->divide_count);
954 }
955 
956 static void start_apic_timer(struct kvm_lapic *apic)
957 {
958  ktime_t now;
959  atomic_set(&apic->lapic_timer.pending, 0);
960 
961  if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
962  /* lapic timer in oneshot or periodic mode */
963  now = apic->lapic_timer.timer.base->get_time();
964  apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT)
966 
967  if (!apic->lapic_timer.period)
968  return;
969  /*
970  * Do not allow the guest to program periodic timers with small
971  * interval, since the hrtimers are not throttled by the host
972  * scheduler.
973  */
974  if (apic_lvtt_period(apic)) {
975  s64 min_period = min_timer_period_us * 1000LL;
976 
977  if (apic->lapic_timer.period < min_period) {
979  "kvm: vcpu %i: requested %lld ns "
980  "lapic timer period limited to %lld ns\n",
981  apic->vcpu->vcpu_id,
982  apic->lapic_timer.period, min_period);
983  apic->lapic_timer.period = min_period;
984  }
985  }
986 
987  hrtimer_start(&apic->lapic_timer.timer,
988  ktime_add_ns(now, apic->lapic_timer.period),
990 
991  apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
992  PRIx64 ", "
993  "timer initial count 0x%x, period %lldns, "
994  "expire @ 0x%016" PRIx64 ".\n", __func__,
995  APIC_BUS_CYCLE_NS, ktime_to_ns(now),
996  kvm_apic_get_reg(apic, APIC_TMICT),
997  apic->lapic_timer.period,
998  ktime_to_ns(ktime_add_ns(now,
999  apic->lapic_timer.period)));
1000  } else if (apic_lvtt_tscdeadline(apic)) {
1001  /* lapic timer in tsc deadline mode */
1002  u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
1003  u64 ns = 0;
1004  struct kvm_vcpu *vcpu = apic->vcpu;
1005  unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1006  unsigned long flags;
1007 
1008  if (unlikely(!tscdeadline || !this_tsc_khz))
1009  return;
1010 
1011  local_irq_save(flags);
1012 
1013  now = apic->lapic_timer.timer.base->get_time();
1014  guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
1015  if (likely(tscdeadline > guest_tsc)) {
1016  ns = (tscdeadline - guest_tsc) * 1000000ULL;
1017  do_div(ns, this_tsc_khz);
1018  }
1019  hrtimer_start(&apic->lapic_timer.timer,
1020  ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
1021 
1022  local_irq_restore(flags);
1023  }
1024 }
1025 
1026 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
1027 {
1028  int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0));
1029 
1030  if (apic_lvt_nmi_mode(lvt0_val)) {
1031  if (!nmi_wd_enabled) {
1032  apic_debug("Receive NMI setting on APIC_LVT0 "
1033  "for cpu %d\n", apic->vcpu->vcpu_id);
1034  apic->vcpu->kvm->arch.vapics_in_nmi_mode++;
1035  }
1036  } else if (nmi_wd_enabled)
1037  apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
1038 }
1039 
1040 static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
1041 {
1042  int ret = 0;
1043 
1044  trace_kvm_apic_write(reg, val);
1045 
1046  switch (reg) {
1047  case APIC_ID: /* Local APIC ID */
1048  if (!apic_x2apic_mode(apic))
1049  kvm_apic_set_id(apic, val >> 24);
1050  else
1051  ret = 1;
1052  break;
1053 
1054  case APIC_TASKPRI:
1055  report_tpr_access(apic, true);
1056  apic_set_tpr(apic, val & 0xff);
1057  break;
1058 
1059  case APIC_EOI:
1060  apic_set_eoi(apic);
1061  break;
1062 
1063  case APIC_LDR:
1064  if (!apic_x2apic_mode(apic))
1065  kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
1066  else
1067  ret = 1;
1068  break;
1069 
1070  case APIC_DFR:
1071  if (!apic_x2apic_mode(apic)) {
1072  apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
1073  recalculate_apic_map(apic->vcpu->kvm);
1074  } else
1075  ret = 1;
1076  break;
1077 
1078  case APIC_SPIV: {
1079  u32 mask = 0x3ff;
1080  if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
1081  mask |= APIC_SPIV_DIRECTED_EOI;
1082  apic_set_spiv(apic, val & mask);
1083  if (!(val & APIC_SPIV_APIC_ENABLED)) {
1084  int i;
1085  u32 lvt_val;
1086 
1087  for (i = 0; i < APIC_LVT_NUM; i++) {
1088  lvt_val = kvm_apic_get_reg(apic,
1089  APIC_LVTT + 0x10 * i);
1090  apic_set_reg(apic, APIC_LVTT + 0x10 * i,
1091  lvt_val | APIC_LVT_MASKED);
1092  }
1093  atomic_set(&apic->lapic_timer.pending, 0);
1094 
1095  }
1096  break;
1097  }
1098  case APIC_ICR:
1099  /* No delay here, so we always clear the pending bit */
1100  apic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
1101  apic_send_ipi(apic);
1102  break;
1103 
1104  case APIC_ICR2:
1105  if (!apic_x2apic_mode(apic))
1106  val &= 0xff000000;
1107  apic_set_reg(apic, APIC_ICR2, val);
1108  break;
1109 
1110  case APIC_LVT0:
1111  apic_manage_nmi_watchdog(apic, val);
1112  case APIC_LVTTHMR:
1113  case APIC_LVTPC:
1114  case APIC_LVT1:
1115  case APIC_LVTERR:
1116  /* TODO: Check vector */
1117  if (!kvm_apic_sw_enabled(apic))
1118  val |= APIC_LVT_MASKED;
1119 
1120  val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
1121  apic_set_reg(apic, reg, val);
1122 
1123  break;
1124 
1125  case APIC_LVTT:
1126  if ((kvm_apic_get_reg(apic, APIC_LVTT) &
1127  apic->lapic_timer.timer_mode_mask) !=
1128  (val & apic->lapic_timer.timer_mode_mask))
1129  hrtimer_cancel(&apic->lapic_timer.timer);
1130 
1131  if (!kvm_apic_sw_enabled(apic))
1132  val |= APIC_LVT_MASKED;
1133  val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
1134  apic_set_reg(apic, APIC_LVTT, val);
1135  break;
1136 
1137  case APIC_TMICT:
1138  if (apic_lvtt_tscdeadline(apic))
1139  break;
1140 
1141  hrtimer_cancel(&apic->lapic_timer.timer);
1142  apic_set_reg(apic, APIC_TMICT, val);
1143  start_apic_timer(apic);
1144  break;
1145 
1146  case APIC_TDCR:
1147  if (val & 4)
1148  apic_debug("KVM_WRITE:TDCR %x\n", val);
1149  apic_set_reg(apic, APIC_TDCR, val);
1150  update_divide_count(apic);
1151  break;
1152 
1153  case APIC_ESR:
1154  if (apic_x2apic_mode(apic) && val != 0) {
1155  apic_debug("KVM_WRITE:ESR not zero %x\n", val);
1156  ret = 1;
1157  }
1158  break;
1159 
1160  case APIC_SELF_IPI:
1161  if (apic_x2apic_mode(apic)) {
1162  apic_reg_write(apic, APIC_ICR, 0x40000 | (val & 0xff));
1163  } else
1164  ret = 1;
1165  break;
1166  default:
1167  ret = 1;
1168  break;
1169  }
1170  if (ret)
1171  apic_debug("Local APIC Write to read-only register %x\n", reg);
1172  return ret;
1173 }
1174 
1175 static int apic_mmio_write(struct kvm_io_device *this,
1176  gpa_t address, int len, const void *data)
1177 {
1178  struct kvm_lapic *apic = to_lapic(this);
1179  unsigned int offset = address - apic->base_address;
1180  u32 val;
1181 
1182  if (!apic_mmio_in_range(apic, address))
1183  return -EOPNOTSUPP;
1184 
1185  /*
1186  * APIC register must be aligned on 128-bits boundary.
1187  * 32/64/128 bits registers must be accessed thru 32 bits.
1188  * Refer SDM 8.4.1
1189  */
1190  if (len != 4 || (offset & 0xf)) {
1191  /* Don't shout loud, $infamous_os would cause only noise. */
1192  apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
1193  return 0;
1194  }
1195 
1196  val = *(u32*)data;
1197 
1198  /* too common printing */
1199  if (offset != APIC_EOI)
1200  apic_debug("%s: offset 0x%x with length 0x%x, and value is "
1201  "0x%x\n", __func__, offset, len, val);
1202 
1203  apic_reg_write(apic, offset & 0xff0, val);
1204 
1205  return 0;
1206 }
1207 
1208 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
1209 {
1210  if (kvm_vcpu_has_lapic(vcpu))
1211  apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
1212 }
1214 
1215 void kvm_free_lapic(struct kvm_vcpu *vcpu)
1216 {
1217  struct kvm_lapic *apic = vcpu->arch.apic;
1218 
1219  if (!vcpu->arch.apic)
1220  return;
1221 
1222  hrtimer_cancel(&apic->lapic_timer.timer);
1223 
1224  if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
1225  static_key_slow_dec_deferred(&apic_hw_disabled);
1226 
1227  if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
1228  static_key_slow_dec_deferred(&apic_sw_disabled);
1229 
1230  if (apic->regs)
1231  free_page((unsigned long)apic->regs);
1232 
1233  kfree(apic);
1234 }
1235 
1236 /*
1237  *----------------------------------------------------------------------
1238  * LAPIC interface
1239  *----------------------------------------------------------------------
1240  */
1241 
1243 {
1244  struct kvm_lapic *apic = vcpu->arch.apic;
1245 
1246  if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
1247  apic_lvtt_period(apic))
1248  return 0;
1249 
1250  return apic->lapic_timer.tscdeadline;
1251 }
1252 
1254 {
1255  struct kvm_lapic *apic = vcpu->arch.apic;
1256 
1257  if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
1258  apic_lvtt_period(apic))
1259  return;
1260 
1261  hrtimer_cancel(&apic->lapic_timer.timer);
1262  apic->lapic_timer.tscdeadline = data;
1263  start_apic_timer(apic);
1264 }
1265 
1266 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
1267 {
1268  struct kvm_lapic *apic = vcpu->arch.apic;
1269 
1270  if (!kvm_vcpu_has_lapic(vcpu))
1271  return;
1272 
1273  apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
1274  | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4));
1275 }
1276 
1278 {
1279  u64 tpr;
1280 
1281  if (!kvm_vcpu_has_lapic(vcpu))
1282  return 0;
1283 
1284  tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
1285 
1286  return (tpr & 0xf0) >> 4;
1287 }
1288 
1290 {
1291  struct kvm_lapic *apic = vcpu->arch.apic;
1292 
1293  if (!apic) {
1294  value |= MSR_IA32_APICBASE_BSP;
1295  vcpu->arch.apic_base = value;
1296  return;
1297  }
1298 
1299  /* update jump label if enable bit changes */
1300  if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) {
1301  if (value & MSR_IA32_APICBASE_ENABLE)
1302  static_key_slow_dec_deferred(&apic_hw_disabled);
1303  else
1304  static_key_slow_inc(&apic_hw_disabled.key);
1305  recalculate_apic_map(vcpu->kvm);
1306  }
1307 
1308  if (!kvm_vcpu_is_bsp(apic->vcpu))
1309  value &= ~MSR_IA32_APICBASE_BSP;
1310 
1311  vcpu->arch.apic_base = value;
1312  if (apic_x2apic_mode(apic)) {
1313  u32 id = kvm_apic_id(apic);
1314  u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
1315  kvm_apic_set_ldr(apic, ldr);
1316  }
1317  apic->base_address = apic->vcpu->arch.apic_base &
1319 
1320  /* with FSB delivery interrupt, we can restart APIC functionality */
1321  apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
1322  "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
1323 
1324 }
1325 
1326 void kvm_lapic_reset(struct kvm_vcpu *vcpu)
1327 {
1328  struct kvm_lapic *apic;
1329  int i;
1330 
1331  apic_debug("%s\n", __func__);
1332 
1333  ASSERT(vcpu);
1334  apic = vcpu->arch.apic;
1335  ASSERT(apic != NULL);
1336 
1337  /* Stop the timer in case it's a reset to an active apic */
1338  hrtimer_cancel(&apic->lapic_timer.timer);
1339 
1340  kvm_apic_set_id(apic, vcpu->vcpu_id);
1341  kvm_apic_set_version(apic->vcpu);
1342 
1343  for (i = 0; i < APIC_LVT_NUM; i++)
1344  apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
1345  apic_set_reg(apic, APIC_LVT0,
1347 
1348  apic_set_reg(apic, APIC_DFR, 0xffffffffU);
1349  apic_set_spiv(apic, 0xff);
1350  apic_set_reg(apic, APIC_TASKPRI, 0);
1351  kvm_apic_set_ldr(apic, 0);
1352  apic_set_reg(apic, APIC_ESR, 0);
1353  apic_set_reg(apic, APIC_ICR, 0);
1354  apic_set_reg(apic, APIC_ICR2, 0);
1355  apic_set_reg(apic, APIC_TDCR, 0);
1356  apic_set_reg(apic, APIC_TMICT, 0);
1357  for (i = 0; i < 8; i++) {
1358  apic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
1359  apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
1360  apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
1361  }
1362  apic->irr_pending = false;
1363  apic->isr_count = 0;
1364  apic->highest_isr_cache = -1;
1365  update_divide_count(apic);
1366  atomic_set(&apic->lapic_timer.pending, 0);
1367  if (kvm_vcpu_is_bsp(vcpu))
1368  kvm_lapic_set_base(vcpu,
1369  vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
1370  vcpu->arch.pv_eoi.msr_val = 0;
1371  apic_update_ppr(apic);
1372 
1373  vcpu->arch.apic_arb_prio = 0;
1374  vcpu->arch.apic_attention = 0;
1375 
1376  apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
1377  "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
1378  vcpu, kvm_apic_id(apic),
1379  vcpu->arch.apic_base, apic->base_address);
1380 }
1381 
1382 /*
1383  *----------------------------------------------------------------------
1384  * timer interface
1385  *----------------------------------------------------------------------
1386  */
1387 
1388 static bool lapic_is_periodic(struct kvm_lapic *apic)
1389 {
1390  return apic_lvtt_period(apic);
1391 }
1392 
1394 {
1395  struct kvm_lapic *apic = vcpu->arch.apic;
1396 
1397  if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
1398  apic_lvt_enabled(apic, APIC_LVTT))
1399  return atomic_read(&apic->lapic_timer.pending);
1400 
1401  return 0;
1402 }
1403 
1404 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
1405 {
1406  u32 reg = kvm_apic_get_reg(apic, lvt_type);
1407  int vector, mode, trig_mode;
1408 
1409  if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
1410  vector = reg & APIC_VECTOR_MASK;
1411  mode = reg & APIC_MODE_MASK;
1412  trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
1413  return __apic_accept_irq(apic, mode, vector, 1, trig_mode);
1414  }
1415  return 0;
1416 }
1417 
1419 {
1420  struct kvm_lapic *apic = vcpu->arch.apic;
1421 
1422  if (apic)
1424 }
1425 
1426 static const struct kvm_io_device_ops apic_mmio_ops = {
1427  .read = apic_mmio_read,
1428  .write = apic_mmio_write,
1429 };
1430 
1431 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
1432 {
1433  struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
1434  struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
1435  struct kvm_vcpu *vcpu = apic->vcpu;
1436  wait_queue_head_t *q = &vcpu->wq;
1437 
1438  /*
1439  * There is a race window between reading and incrementing, but we do
1440  * not care about potentially losing timer events in the !reinject
1441  * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
1442  * in vcpu_enter_guest.
1443  */
1444  if (!atomic_read(&ktimer->pending)) {
1445  atomic_inc(&ktimer->pending);
1446  /* FIXME: this code should not know anything about vcpus */
1447  kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
1448  }
1449 
1450  if (waitqueue_active(q))
1452 
1453  if (lapic_is_periodic(apic)) {
1454  hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
1455  return HRTIMER_RESTART;
1456  } else
1457  return HRTIMER_NORESTART;
1458 }
1459 
1460 int kvm_create_lapic(struct kvm_vcpu *vcpu)
1461 {
1462  struct kvm_lapic *apic;
1463 
1464  ASSERT(vcpu != NULL);
1465  apic_debug("apic_init %d\n", vcpu->vcpu_id);
1466 
1467  apic = kzalloc(sizeof(*apic), GFP_KERNEL);
1468  if (!apic)
1469  goto nomem;
1470 
1471  vcpu->arch.apic = apic;
1472 
1473  apic->regs = (void *)get_zeroed_page(GFP_KERNEL);
1474  if (!apic->regs) {
1475  printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
1476  vcpu->vcpu_id);
1477  goto nomem_free_apic;
1478  }
1479  apic->vcpu = vcpu;
1480 
1483  apic->lapic_timer.timer.function = apic_timer_fn;
1484 
1485  /*
1486  * APIC is created enabled. This will prevent kvm_lapic_set_base from
1487  * thinking that APIC satet has changed.
1488  */
1489  vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
1490  kvm_lapic_set_base(vcpu,
1492 
1493  static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
1494  kvm_lapic_reset(vcpu);
1495  kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
1496 
1497  return 0;
1498 nomem_free_apic:
1499  kfree(apic);
1500 nomem:
1501  return -ENOMEM;
1502 }
1503 
1505 {
1506  struct kvm_lapic *apic = vcpu->arch.apic;
1507  int highest_irr;
1508 
1509  if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic))
1510  return -1;
1511 
1512  apic_update_ppr(apic);
1513  highest_irr = apic_find_highest_irr(apic);
1514  if ((highest_irr == -1) ||
1515  ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI)))
1516  return -1;
1517  return highest_irr;
1518 }
1519 
1521 {
1522  u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
1523  int r = 0;
1524 
1525  if (!kvm_apic_hw_enabled(vcpu->arch.apic))
1526  r = 1;
1527  if ((lvt0 & APIC_LVT_MASKED) == 0 &&
1529  r = 1;
1530  return r;
1531 }
1532 
1534 {
1535  struct kvm_lapic *apic = vcpu->arch.apic;
1536 
1537  if (!kvm_vcpu_has_lapic(vcpu))
1538  return;
1539 
1540  if (atomic_read(&apic->lapic_timer.pending) > 0) {
1541  if (kvm_apic_local_deliver(apic, APIC_LVTT))
1542  atomic_dec(&apic->lapic_timer.pending);
1543  }
1544 }
1545 
1547 {
1548  int vector = kvm_apic_has_interrupt(vcpu);
1549  struct kvm_lapic *apic = vcpu->arch.apic;
1550 
1551  if (vector == -1)
1552  return -1;
1553 
1554  apic_set_isr(vector, apic);
1555  apic_update_ppr(apic);
1556  apic_clear_irr(vector, apic);
1557  return vector;
1558 }
1559 
1561  struct kvm_lapic_state *s)
1562 {
1563  struct kvm_lapic *apic = vcpu->arch.apic;
1564 
1565  kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
1566  /* set SPIV separately to get count of SW disabled APICs right */
1567  apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
1568  memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
1569  /* call kvm_apic_set_id() to put apic into apic_map */
1570  kvm_apic_set_id(apic, kvm_apic_id(apic));
1571  kvm_apic_set_version(vcpu);
1572 
1573  apic_update_ppr(apic);
1574  hrtimer_cancel(&apic->lapic_timer.timer);
1575  update_divide_count(apic);
1576  start_apic_timer(apic);
1577  apic->irr_pending = true;
1578  apic->isr_count = count_vectors(apic->regs + APIC_ISR);
1579  apic->highest_isr_cache = -1;
1580  kvm_make_request(KVM_REQ_EVENT, vcpu);
1581 }
1582 
1584 {
1585  struct hrtimer *timer;
1586 
1587  if (!kvm_vcpu_has_lapic(vcpu))
1588  return;
1589 
1590  timer = &vcpu->arch.apic->lapic_timer.timer;
1591  if (hrtimer_cancel(timer))
1592  hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
1593 }
1594 
1595 /*
1596  * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
1597  *
1598  * Detect whether guest triggered PV EOI since the
1599  * last entry. If yes, set EOI on guests's behalf.
1600  * Clear PV EOI in guest memory in any case.
1601  */
1602 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
1603  struct kvm_lapic *apic)
1604 {
1605  bool pending;
1606  int vector;
1607  /*
1608  * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
1609  * and KVM_PV_EOI_ENABLED in guest memory as follows:
1610  *
1611  * KVM_APIC_PV_EOI_PENDING is unset:
1612  * -> host disabled PV EOI.
1613  * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
1614  * -> host enabled PV EOI, guest did not execute EOI yet.
1615  * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
1616  * -> host enabled PV EOI, guest executed EOI.
1617  */
1618  BUG_ON(!pv_eoi_enabled(vcpu));
1619  pending = pv_eoi_get_pending(vcpu);
1620  /*
1621  * Clear pending bit in any case: it will be set again on vmentry.
1622  * While this might not be ideal from performance point of view,
1623  * this makes sure pv eoi is only enabled when we know it's safe.
1624  */
1625  pv_eoi_clr_pending(vcpu);
1626  if (pending)
1627  return;
1628  vector = apic_set_eoi(apic);
1629  trace_kvm_pv_eoi(apic, vector);
1630 }
1631 
1633 {
1634  u32 data;
1635  void *vapic;
1636 
1637  if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
1638  apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
1639 
1640  if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
1641  return;
1642 
1643  vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
1644  data = *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr));
1645  kunmap_atomic(vapic);
1646 
1647  apic_set_tpr(vcpu->arch.apic, data & 0xff);
1648 }
1649 
1650 /*
1651  * apic_sync_pv_eoi_to_guest - called before vmentry
1652  *
1653  * Detect whether it's safe to enable PV EOI and
1654  * if yes do so.
1655  */
1656 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
1657  struct kvm_lapic *apic)
1658 {
1659  if (!pv_eoi_enabled(vcpu) ||
1660  /* IRR set or many bits in ISR: could be nested. */
1661  apic->irr_pending ||
1662  /* Cache not set: could be safe but we don't bother. */
1663  apic->highest_isr_cache == -1 ||
1664  /* Need EOI to update ioapic. */
1666  /*
1667  * PV EOI was disabled by apic_sync_pv_eoi_from_guest
1668  * so we need not do anything here.
1669  */
1670  return;
1671  }
1672 
1673  pv_eoi_set_pending(apic->vcpu);
1674 }
1675 
1677 {
1678  u32 data, tpr;
1679  int max_irr, max_isr;
1680  struct kvm_lapic *apic = vcpu->arch.apic;
1681  void *vapic;
1682 
1683  apic_sync_pv_eoi_to_guest(vcpu, apic);
1684 
1685  if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
1686  return;
1687 
1688  tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff;
1689  max_irr = apic_find_highest_irr(apic);
1690  if (max_irr < 0)
1691  max_irr = 0;
1692  max_isr = apic_find_highest_isr(apic);
1693  if (max_isr < 0)
1694  max_isr = 0;
1695  data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
1696 
1697  vapic = kmap_atomic(vcpu->arch.apic->vapic_page);
1698  *(u32 *)(vapic + offset_in_page(vcpu->arch.apic->vapic_addr)) = data;
1699  kunmap_atomic(vapic);
1700 }
1701 
1703 {
1704  vcpu->arch.apic->vapic_addr = vapic_addr;
1705  if (vapic_addr)
1706  __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
1707  else
1708  __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
1709 }
1710 
1711 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
1712 {
1713  struct kvm_lapic *apic = vcpu->arch.apic;
1714  u32 reg = (msr - APIC_BASE_MSR) << 4;
1715 
1716  if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
1717  return 1;
1718 
1719  /* if this is ICR write vector before command */
1720  if (msr == 0x830)
1721  apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
1722  return apic_reg_write(apic, reg, (u32)data);
1723 }
1724 
1725 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
1726 {
1727  struct kvm_lapic *apic = vcpu->arch.apic;
1728  u32 reg = (msr - APIC_BASE_MSR) << 4, low, high = 0;
1729 
1730  if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
1731  return 1;
1732 
1733  if (apic_reg_read(apic, reg, 4, &low))
1734  return 1;
1735  if (msr == 0x830)
1736  apic_reg_read(apic, APIC_ICR2, 4, &high);
1737 
1738  *data = (((u64)high) << 32) | low;
1739 
1740  return 0;
1741 }
1742 
1743 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
1744 {
1745  struct kvm_lapic *apic = vcpu->arch.apic;
1746 
1747  if (!kvm_vcpu_has_lapic(vcpu))
1748  return 1;
1749 
1750  /* if this is ICR write vector before command */
1751  if (reg == APIC_ICR)
1752  apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
1753  return apic_reg_write(apic, reg, (u32)data);
1754 }
1755 
1756 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
1757 {
1758  struct kvm_lapic *apic = vcpu->arch.apic;
1759  u32 low, high = 0;
1760 
1761  if (!kvm_vcpu_has_lapic(vcpu))
1762  return 1;
1763 
1764  if (apic_reg_read(apic, reg, 4, &low))
1765  return 1;
1766  if (reg == APIC_ICR)
1767  apic_reg_read(apic, APIC_ICR2, 4, &high);
1768 
1769  *data = (((u64)high) << 32) | low;
1770 
1771  return 0;
1772 }
1773 
1774 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
1775 {
1776  u64 addr = data & ~KVM_MSR_ENABLED;
1777  if (!IS_ALIGNED(addr, 4))
1778  return 1;
1779 
1780  vcpu->arch.pv_eoi.msr_val = data;
1781  if (!pv_eoi_enabled(vcpu))
1782  return 0;
1783  return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
1784  addr);
1785 }
1786 
1787 void kvm_lapic_init(void)
1788 {
1789  /* do not patch jump label more than once per second */
1790  jump_label_rate_limit(&apic_hw_disabled, HZ);
1791  jump_label_rate_limit(&apic_sw_disabled, HZ);
1792 }