1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
19 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
39 * Opcode effective-address decode tables.
40 * Note that we only emulate instructions that have at least one memory
41 * operand (excluding implicit stack references). We assume that stack
42 * references and instruction fetches will never occur in special memory
43 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
47 /* Operand sizes: 8-bit operands or specified/overridden size. */
48 #define ByteOp (1<<0) /* 8-bit operands. */
49 /* Destination operand type. */
50 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
51 #define DstReg (2<<1) /* Register operand. */
52 #define DstMem (3<<1) /* Memory operand. */
53 #define DstAcc (4<<1) /* Destination Accumulator */
54 #define DstMask (7<<1)
55 /* Source operand type. */
56 #define SrcNone (0<<4) /* No source operand. */
57 #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
58 #define SrcReg (1<<4) /* Register operand. */
59 #define SrcMem (2<<4) /* Memory operand. */
60 #define SrcMem16 (3<<4) /* Memory operand (16-bit). */
61 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */
62 #define SrcImm (5<<4) /* Immediate operand. */
63 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
64 #define SrcOne (7<<4) /* Implied '1' */
65 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
66 #define SrcImmU (9<<4) /* Immediate operand, unsigned */
67 #define SrcMask (0xf<<4)
68 /* Generic ModRM decode. */
70 /* Destination is only written; never read. */
73 #define MemAbs (1<<11) /* Memory operand is absolute displacement */
74 #define String (1<<12) /* String instruction (rep capable) */
75 #define Stack (1<<13) /* Stack instruction (push/pop) */
76 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
77 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
78 #define GroupMask 0xff /* Group number stored in bits 0:7 */
80 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
81 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
83 /* Source 2 operand type */
84 #define Src2None (0<<29)
85 #define Src2CL (1<<29)
86 #define Src2ImmByte (2<<29)
87 #define Src2One (3<<29)
88 #define Src2Imm16 (4<<29)
89 #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be
90 in memory and second argument is located
91 immediately after the first one in memory. */
92 #define Src2Mask (7<<29)
95 Group1_80, Group1_81, Group1_82, Group1_83,
96 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
100 static u32 opcode_table[256] = {
102 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
103 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
104 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
105 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
107 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
108 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
109 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
110 ImplicitOps | Stack | No64, 0,
112 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
113 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
114 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
115 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
117 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
118 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
119 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
120 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
122 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
123 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
124 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
126 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
127 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
130 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
131 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
134 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
135 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
136 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
139 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
141 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
143 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
144 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
146 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
147 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
149 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
150 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
153 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
154 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */
155 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */
157 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
158 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
160 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
161 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
163 Group | Group1_80, Group | Group1_81,
164 Group | Group1_82, Group | Group1_83,
165 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
166 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
168 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
169 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
170 DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
171 DstReg | SrcMem | ModRM | Mov, Group | Group1A,
173 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
175 0, 0, SrcImm | Src2Imm16 | No64, 0,
176 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
178 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
179 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
180 ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
181 ByteOp | ImplicitOps | String, ImplicitOps | String,
183 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
184 ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String,
185 ByteOp | ImplicitOps | String, ImplicitOps | String,
187 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
188 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
189 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
190 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
192 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
193 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
194 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
195 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
197 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
198 0, ImplicitOps | Stack, 0, 0,
199 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
201 0, 0, 0, ImplicitOps | Stack,
202 ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
204 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
205 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
208 0, 0, 0, 0, 0, 0, 0, 0,
211 ByteOp | SrcImmUByte, SrcImmUByte,
212 ByteOp | SrcImmUByte, SrcImmUByte,
214 SrcImm | Stack, SrcImm | ImplicitOps,
215 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
216 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
217 SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
220 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
222 ImplicitOps, 0, ImplicitOps, ImplicitOps,
223 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
226 static u32 twobyte_table[256] = {
228 0, Group | GroupDual | Group7, 0, 0,
229 0, ImplicitOps, ImplicitOps | Priv, 0,
230 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
231 0, ImplicitOps | ModRM, 0, 0,
233 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
235 ModRM | ImplicitOps | Priv, ModRM | Priv,
236 ModRM | ImplicitOps | Priv, ModRM | Priv,
238 0, 0, 0, 0, 0, 0, 0, 0,
240 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
241 ImplicitOps, ImplicitOps | Priv, 0, 0,
242 0, 0, 0, 0, 0, 0, 0, 0,
244 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
245 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
246 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
247 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
249 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
250 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
251 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
252 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
254 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
256 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
258 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
261 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
263 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265 ImplicitOps | Stack, ImplicitOps | Stack,
266 0, DstMem | SrcReg | ModRM | BitOp,
267 DstMem | SrcReg | Src2ImmByte | ModRM,
268 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
270 ImplicitOps | Stack, ImplicitOps | Stack,
271 0, DstMem | SrcReg | ModRM | BitOp | Lock,
272 DstMem | SrcReg | Src2ImmByte | ModRM,
273 DstMem | SrcReg | Src2CL | ModRM,
276 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
277 0, DstMem | SrcReg | ModRM | BitOp | Lock,
278 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
279 DstReg | SrcMem16 | ModRM | Mov,
282 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
283 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
284 DstReg | SrcMem16 | ModRM | Mov,
286 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
287 0, 0, 0, Group | GroupDual | Group9,
288 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
297 static u32 group_table[] = {
299 ByteOp | DstMem | SrcImm | ModRM | Lock,
300 ByteOp | DstMem | SrcImm | ModRM | Lock,
301 ByteOp | DstMem | SrcImm | ModRM | Lock,
302 ByteOp | DstMem | SrcImm | ModRM | Lock,
303 ByteOp | DstMem | SrcImm | ModRM | Lock,
304 ByteOp | DstMem | SrcImm | ModRM | Lock,
305 ByteOp | DstMem | SrcImm | ModRM | Lock,
306 ByteOp | DstMem | SrcImm | ModRM,
308 DstMem | SrcImm | ModRM | Lock,
309 DstMem | SrcImm | ModRM | Lock,
310 DstMem | SrcImm | ModRM | Lock,
311 DstMem | SrcImm | ModRM | Lock,
312 DstMem | SrcImm | ModRM | Lock,
313 DstMem | SrcImm | ModRM | Lock,
314 DstMem | SrcImm | ModRM | Lock,
315 DstMem | SrcImm | ModRM,
317 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
318 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
319 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
320 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
321 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
322 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
323 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
324 ByteOp | DstMem | SrcImm | ModRM | No64,
326 DstMem | SrcImmByte | ModRM | Lock,
327 DstMem | SrcImmByte | ModRM | Lock,
328 DstMem | SrcImmByte | ModRM | Lock,
329 DstMem | SrcImmByte | ModRM | Lock,
330 DstMem | SrcImmByte | ModRM | Lock,
331 DstMem | SrcImmByte | ModRM | Lock,
332 DstMem | SrcImmByte | ModRM | Lock,
333 DstMem | SrcImmByte | ModRM,
335 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
337 ByteOp | SrcImm | DstMem | ModRM, 0,
338 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
341 DstMem | SrcImm | ModRM, 0,
342 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
345 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
348 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
349 SrcMem | ModRM | Stack, 0,
350 SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps,
351 SrcMem | ModRM | Stack, 0,
353 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
354 SrcNone | ModRM | DstMem | Mov, 0,
355 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
358 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
359 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
361 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
364 static u32 group2_table[] = {
366 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
367 SrcNone | ModRM | DstMem | Mov, 0,
368 SrcMem16 | ModRM | Mov | Priv, 0,
370 0, 0, 0, 0, 0, 0, 0, 0,
373 /* EFLAGS bit definitions. */
374 #define EFLG_ID (1<<21)
375 #define EFLG_VIP (1<<20)
376 #define EFLG_VIF (1<<19)
377 #define EFLG_AC (1<<18)
378 #define EFLG_VM (1<<17)
379 #define EFLG_RF (1<<16)
380 #define EFLG_IOPL (3<<12)
381 #define EFLG_NT (1<<14)
382 #define EFLG_OF (1<<11)
383 #define EFLG_DF (1<<10)
384 #define EFLG_IF (1<<9)
385 #define EFLG_TF (1<<8)
386 #define EFLG_SF (1<<7)
387 #define EFLG_ZF (1<<6)
388 #define EFLG_AF (1<<4)
389 #define EFLG_PF (1<<2)
390 #define EFLG_CF (1<<0)
393 * Instruction emulation:
394 * Most instructions are emulated directly via a fragment of inline assembly
395 * code. This allows us to save/restore EFLAGS and thus very easily pick up
396 * any modified flags.
399 #if defined(CONFIG_X86_64)
400 #define _LO32 "k" /* force 32-bit operand */
401 #define _STK "%%rsp" /* stack pointer */
402 #elif defined(__i386__)
403 #define _LO32 "" /* force 32-bit operand */
404 #define _STK "%%esp" /* stack pointer */
408 * These EFLAGS bits are restored from saved value during emulation, and
409 * any changes are written back to the saved value after emulation.
411 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
413 /* Before executing instruction: restore necessary bits in EFLAGS. */
414 #define _PRE_EFLAGS(_sav, _msk, _tmp) \
415 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
416 "movl %"_sav",%"_LO32 _tmp"; " \
419 "movl %"_msk",%"_LO32 _tmp"; " \
420 "andl %"_LO32 _tmp",("_STK"); " \
422 "notl %"_LO32 _tmp"; " \
423 "andl %"_LO32 _tmp",("_STK"); " \
424 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
426 "orl %"_LO32 _tmp",("_STK"); " \
430 /* After executing instruction: write-back necessary bits in EFLAGS. */
431 #define _POST_EFLAGS(_sav, _msk, _tmp) \
432 /* _sav |= EFLAGS & _msk; */ \
435 "andl %"_msk",%"_LO32 _tmp"; " \
436 "orl %"_LO32 _tmp",%"_sav"; "
444 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
446 __asm__ __volatile__ ( \
447 _PRE_EFLAGS("0", "4", "2") \
448 _op _suffix " %"_x"3,%1; " \
449 _POST_EFLAGS("0", "4", "2") \
450 : "=m" (_eflags), "=m" ((_dst).val), \
452 : _y ((_src).val), "i" (EFLAGS_MASK)); \
456 /* Raw emulation: instruction has two explicit operands. */
457 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
459 unsigned long _tmp; \
461 switch ((_dst).bytes) { \
463 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
466 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
469 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
474 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
476 unsigned long _tmp; \
477 switch ((_dst).bytes) { \
479 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
482 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
483 _wx, _wy, _lx, _ly, _qx, _qy); \
488 /* Source operand is byte-sized and may be restricted to just %cl. */
489 #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
490 __emulate_2op(_op, _src, _dst, _eflags, \
491 "b", "c", "b", "c", "b", "c", "b", "c")
493 /* Source operand is byte, word, long or quad sized. */
494 #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
495 __emulate_2op(_op, _src, _dst, _eflags, \
496 "b", "q", "w", "r", _LO32, "r", "", "r")
498 /* Source operand is word, long or quad sized. */
499 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
500 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
501 "w", "r", _LO32, "r", "", "r")
503 /* Instruction has three operands and one operand is stored in ECX register */
504 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
506 unsigned long _tmp; \
507 _type _clv = (_cl).val; \
508 _type _srcv = (_src).val; \
509 _type _dstv = (_dst).val; \
511 __asm__ __volatile__ ( \
512 _PRE_EFLAGS("0", "5", "2") \
513 _op _suffix " %4,%1 \n" \
514 _POST_EFLAGS("0", "5", "2") \
515 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
516 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
519 (_cl).val = (unsigned long) _clv; \
520 (_src).val = (unsigned long) _srcv; \
521 (_dst).val = (unsigned long) _dstv; \
524 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
526 switch ((_dst).bytes) { \
528 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
529 "w", unsigned short); \
532 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
533 "l", unsigned int); \
536 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
537 "q", unsigned long)); \
542 #define __emulate_1op(_op, _dst, _eflags, _suffix) \
544 unsigned long _tmp; \
546 __asm__ __volatile__ ( \
547 _PRE_EFLAGS("0", "3", "2") \
548 _op _suffix " %1; " \
549 _POST_EFLAGS("0", "3", "2") \
550 : "=m" (_eflags), "+m" ((_dst).val), \
552 : "i" (EFLAGS_MASK)); \
555 /* Instruction has only one explicit operand (no source operand). */
556 #define emulate_1op(_op, _dst, _eflags) \
558 switch ((_dst).bytes) { \
559 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
560 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
561 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
562 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
566 /* Fetch next part of the instruction being emulated. */
567 #define insn_fetch(_type, _size, _eip) \
568 ({ unsigned long _x; \
569 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \
570 if (rc != X86EMUL_CONTINUE) \
576 static inline unsigned long ad_mask(struct decode_cache *c)
578 return (1UL << (c->ad_bytes << 3)) - 1;
581 /* Access/update address held in a register, based on addressing mode. */
582 static inline unsigned long
583 address_mask(struct decode_cache *c, unsigned long reg)
585 if (c->ad_bytes == sizeof(unsigned long))
588 return reg & ad_mask(c);
591 static inline unsigned long
592 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
594 return base + address_mask(c, reg);
598 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
600 if (c->ad_bytes == sizeof(unsigned long))
603 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
606 static inline void jmp_rel(struct decode_cache *c, int rel)
608 register_address_increment(c, &c->eip, rel);
611 static void set_seg_override(struct decode_cache *c, int seg)
613 c->has_seg_override = true;
614 c->seg_override = seg;
617 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
619 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
622 return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
625 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
626 struct decode_cache *c)
628 if (!c->has_seg_override)
631 return seg_base(ctxt, c->seg_override);
634 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
636 return seg_base(ctxt, VCPU_SREG_ES);
639 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
641 return seg_base(ctxt, VCPU_SREG_SS);
644 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
645 struct x86_emulate_ops *ops,
646 unsigned long linear, u8 *dest)
648 struct fetch_cache *fc = &ctxt->decode.fetch;
652 if (linear < fc->start || linear >= fc->end) {
653 size = min(15UL, PAGE_SIZE - offset_in_page(linear));
654 rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
655 if (rc != X86EMUL_CONTINUE)
658 fc->end = linear + size;
660 *dest = fc->data[linear - fc->start];
661 return X86EMUL_CONTINUE;
664 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
665 struct x86_emulate_ops *ops,
666 unsigned long eip, void *dest, unsigned size)
670 /* x86 instructions are limited to 15 bytes. */
671 if (eip + size - ctxt->eip > 15)
672 return X86EMUL_UNHANDLEABLE;
673 eip += ctxt->cs_base;
675 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
676 if (rc != X86EMUL_CONTINUE)
679 return X86EMUL_CONTINUE;
683 * Given the 'reg' portion of a ModRM byte, and a register block, return a
684 * pointer into the block that addresses the relevant register.
685 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
687 static void *decode_register(u8 modrm_reg, unsigned long *regs,
692 p = ®s[modrm_reg];
693 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
694 p = (unsigned char *)®s[modrm_reg & 3] + 1;
698 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
699 struct x86_emulate_ops *ops,
701 u16 *size, unsigned long *address, int op_bytes)
708 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
710 if (rc != X86EMUL_CONTINUE)
712 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
717 static int test_cc(unsigned int condition, unsigned int flags)
721 switch ((condition & 15) >> 1) {
723 rc |= (flags & EFLG_OF);
725 case 1: /* b/c/nae */
726 rc |= (flags & EFLG_CF);
729 rc |= (flags & EFLG_ZF);
732 rc |= (flags & (EFLG_CF|EFLG_ZF));
735 rc |= (flags & EFLG_SF);
738 rc |= (flags & EFLG_PF);
741 rc |= (flags & EFLG_ZF);
744 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
748 /* Odd condition identifiers (lsb == 1) have inverted sense. */
749 return (!!rc ^ (condition & 1));
752 static void decode_register_operand(struct operand *op,
753 struct decode_cache *c,
756 unsigned reg = c->modrm_reg;
757 int highbyte_regs = c->rex_prefix == 0;
760 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
762 if ((c->d & ByteOp) && !inhibit_bytereg) {
763 op->ptr = decode_register(reg, c->regs, highbyte_regs);
764 op->val = *(u8 *)op->ptr;
767 op->ptr = decode_register(reg, c->regs, 0);
768 op->bytes = c->op_bytes;
771 op->val = *(u16 *)op->ptr;
774 op->val = *(u32 *)op->ptr;
777 op->val = *(u64 *) op->ptr;
781 op->orig_val = op->val;
784 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
785 struct x86_emulate_ops *ops)
787 struct decode_cache *c = &ctxt->decode;
789 int index_reg = 0, base_reg = 0, scale;
790 int rc = X86EMUL_CONTINUE;
793 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
794 index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
795 c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
798 c->modrm = insn_fetch(u8, 1, c->eip);
799 c->modrm_mod |= (c->modrm & 0xc0) >> 6;
800 c->modrm_reg |= (c->modrm & 0x38) >> 3;
801 c->modrm_rm |= (c->modrm & 0x07);
805 if (c->modrm_mod == 3) {
806 c->modrm_ptr = decode_register(c->modrm_rm,
807 c->regs, c->d & ByteOp);
808 c->modrm_val = *(unsigned long *)c->modrm_ptr;
812 if (c->ad_bytes == 2) {
813 unsigned bx = c->regs[VCPU_REGS_RBX];
814 unsigned bp = c->regs[VCPU_REGS_RBP];
815 unsigned si = c->regs[VCPU_REGS_RSI];
816 unsigned di = c->regs[VCPU_REGS_RDI];
818 /* 16-bit ModR/M decode. */
819 switch (c->modrm_mod) {
821 if (c->modrm_rm == 6)
822 c->modrm_ea += insn_fetch(u16, 2, c->eip);
825 c->modrm_ea += insn_fetch(s8, 1, c->eip);
828 c->modrm_ea += insn_fetch(u16, 2, c->eip);
831 switch (c->modrm_rm) {
833 c->modrm_ea += bx + si;
836 c->modrm_ea += bx + di;
839 c->modrm_ea += bp + si;
842 c->modrm_ea += bp + di;
851 if (c->modrm_mod != 0)
858 if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
859 (c->modrm_rm == 6 && c->modrm_mod != 0))
860 if (!c->has_seg_override)
861 set_seg_override(c, VCPU_SREG_SS);
862 c->modrm_ea = (u16)c->modrm_ea;
864 /* 32/64-bit ModR/M decode. */
865 if ((c->modrm_rm & 7) == 4) {
866 sib = insn_fetch(u8, 1, c->eip);
867 index_reg |= (sib >> 3) & 7;
871 if ((base_reg & 7) == 5 && c->modrm_mod == 0)
872 c->modrm_ea += insn_fetch(s32, 4, c->eip);
874 c->modrm_ea += c->regs[base_reg];
876 c->modrm_ea += c->regs[index_reg] << scale;
877 } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
878 if (ctxt->mode == X86EMUL_MODE_PROT64)
881 c->modrm_ea += c->regs[c->modrm_rm];
882 switch (c->modrm_mod) {
884 if (c->modrm_rm == 5)
885 c->modrm_ea += insn_fetch(s32, 4, c->eip);
888 c->modrm_ea += insn_fetch(s8, 1, c->eip);
891 c->modrm_ea += insn_fetch(s32, 4, c->eip);
899 static int decode_abs(struct x86_emulate_ctxt *ctxt,
900 struct x86_emulate_ops *ops)
902 struct decode_cache *c = &ctxt->decode;
903 int rc = X86EMUL_CONTINUE;
905 switch (c->ad_bytes) {
907 c->modrm_ea = insn_fetch(u16, 2, c->eip);
910 c->modrm_ea = insn_fetch(u32, 4, c->eip);
913 c->modrm_ea = insn_fetch(u64, 8, c->eip);
921 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
923 struct decode_cache *c = &ctxt->decode;
924 int rc = X86EMUL_CONTINUE;
925 int mode = ctxt->mode;
926 int def_op_bytes, def_ad_bytes, group;
928 /* Shadow copy of register state. Committed on successful emulation. */
930 memset(c, 0, sizeof(struct decode_cache));
932 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
933 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
936 case X86EMUL_MODE_REAL:
937 case X86EMUL_MODE_VM86:
938 case X86EMUL_MODE_PROT16:
939 def_op_bytes = def_ad_bytes = 2;
941 case X86EMUL_MODE_PROT32:
942 def_op_bytes = def_ad_bytes = 4;
945 case X86EMUL_MODE_PROT64:
954 c->op_bytes = def_op_bytes;
955 c->ad_bytes = def_ad_bytes;
957 /* Legacy prefixes. */
959 switch (c->b = insn_fetch(u8, 1, c->eip)) {
960 case 0x66: /* operand-size override */
961 /* switch between 2/4 bytes */
962 c->op_bytes = def_op_bytes ^ 6;
964 case 0x67: /* address-size override */
965 if (mode == X86EMUL_MODE_PROT64)
966 /* switch between 4/8 bytes */
967 c->ad_bytes = def_ad_bytes ^ 12;
969 /* switch between 2/4 bytes */
970 c->ad_bytes = def_ad_bytes ^ 6;
972 case 0x26: /* ES override */
973 case 0x2e: /* CS override */
974 case 0x36: /* SS override */
975 case 0x3e: /* DS override */
976 set_seg_override(c, (c->b >> 3) & 3);
978 case 0x64: /* FS override */
979 case 0x65: /* GS override */
980 set_seg_override(c, c->b & 7);
982 case 0x40 ... 0x4f: /* REX */
983 if (mode != X86EMUL_MODE_PROT64)
985 c->rex_prefix = c->b;
987 case 0xf0: /* LOCK */
990 case 0xf2: /* REPNE/REPNZ */
991 c->rep_prefix = REPNE_PREFIX;
993 case 0xf3: /* REP/REPE/REPZ */
994 c->rep_prefix = REPE_PREFIX;
1000 /* Any legacy prefix after a REX prefix nullifies its effect. */
1009 if (c->rex_prefix & 8)
1010 c->op_bytes = 8; /* REX.W */
1012 /* Opcode byte(s). */
1013 c->d = opcode_table[c->b];
1015 /* Two-byte opcode? */
1018 c->b = insn_fetch(u8, 1, c->eip);
1019 c->d = twobyte_table[c->b];
1024 group = c->d & GroupMask;
1025 c->modrm = insn_fetch(u8, 1, c->eip);
1028 group = (group << 3) + ((c->modrm >> 3) & 7);
1029 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1030 c->d = group2_table[group];
1032 c->d = group_table[group];
1037 DPRINTF("Cannot emulate %02x\n", c->b);
1041 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1044 /* ModRM and SIB bytes. */
1046 rc = decode_modrm(ctxt, ops);
1047 else if (c->d & MemAbs)
1048 rc = decode_abs(ctxt, ops);
1049 if (rc != X86EMUL_CONTINUE)
1052 if (!c->has_seg_override)
1053 set_seg_override(c, VCPU_SREG_DS);
1055 if (!(!c->twobyte && c->b == 0x8d))
1056 c->modrm_ea += seg_override_base(ctxt, c);
1058 if (c->ad_bytes != 8)
1059 c->modrm_ea = (u32)c->modrm_ea;
1061 * Decode and fetch the source operand: register, memory
1064 switch (c->d & SrcMask) {
1068 decode_register_operand(&c->src, c, 0);
1077 c->src.bytes = (c->d & ByteOp) ? 1 :
1079 /* Don't fetch the address for invlpg: it could be unmapped. */
1080 if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1084 * For instructions with a ModR/M byte, switch to register
1085 * access if Mod = 3.
1087 if ((c->d & ModRM) && c->modrm_mod == 3) {
1088 c->src.type = OP_REG;
1089 c->src.val = c->modrm_val;
1090 c->src.ptr = c->modrm_ptr;
1093 c->src.type = OP_MEM;
1097 c->src.type = OP_IMM;
1098 c->src.ptr = (unsigned long *)c->eip;
1099 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1100 if (c->src.bytes == 8)
1102 /* NB. Immediates are sign-extended as necessary. */
1103 switch (c->src.bytes) {
1105 c->src.val = insn_fetch(s8, 1, c->eip);
1108 c->src.val = insn_fetch(s16, 2, c->eip);
1111 c->src.val = insn_fetch(s32, 4, c->eip);
1114 if ((c->d & SrcMask) == SrcImmU) {
1115 switch (c->src.bytes) {
1120 c->src.val &= 0xffff;
1123 c->src.val &= 0xffffffff;
1130 c->src.type = OP_IMM;
1131 c->src.ptr = (unsigned long *)c->eip;
1133 if ((c->d & SrcMask) == SrcImmByte)
1134 c->src.val = insn_fetch(s8, 1, c->eip);
1136 c->src.val = insn_fetch(u8, 1, c->eip);
1145 * Decode and fetch the second source operand: register, memory
1148 switch (c->d & Src2Mask) {
1153 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1156 c->src2.type = OP_IMM;
1157 c->src2.ptr = (unsigned long *)c->eip;
1159 c->src2.val = insn_fetch(u8, 1, c->eip);
1162 c->src2.type = OP_IMM;
1163 c->src2.ptr = (unsigned long *)c->eip;
1165 c->src2.val = insn_fetch(u16, 2, c->eip);
1173 c->src2.type = OP_MEM;
1177 /* Decode and fetch the destination operand: register or memory. */
1178 switch (c->d & DstMask) {
1180 /* Special instructions do their own operand decoding. */
1183 decode_register_operand(&c->dst, c,
1184 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1187 if ((c->d & ModRM) && c->modrm_mod == 3) {
1188 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1189 c->dst.type = OP_REG;
1190 c->dst.val = c->dst.orig_val = c->modrm_val;
1191 c->dst.ptr = c->modrm_ptr;
1194 c->dst.type = OP_MEM;
1197 c->dst.type = OP_REG;
1198 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1199 c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1200 switch (c->dst.bytes) {
1202 c->dst.val = *(u8 *)c->dst.ptr;
1205 c->dst.val = *(u16 *)c->dst.ptr;
1208 c->dst.val = *(u32 *)c->dst.ptr;
1211 c->dst.val = *(u64 *)c->dst.ptr;
1214 c->dst.orig_val = c->dst.val;
1218 if (c->rip_relative)
1219 c->modrm_ea += c->eip;
1222 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1225 static u32 desc_limit_scaled(struct desc_struct *desc)
1227 u32 limit = get_desc_limit(desc);
1229 return desc->g ? (limit << 12) | 0xfff : limit;
1232 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1233 struct x86_emulate_ops *ops,
1234 u16 selector, struct desc_ptr *dt)
1236 if (selector & 1 << 2) {
1237 struct desc_struct desc;
1238 memset (dt, 0, sizeof *dt);
1239 if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1242 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1243 dt->address = get_desc_base(&desc);
1245 ops->get_gdt(dt, ctxt->vcpu);
1248 /* allowed just for 8 bytes segments */
1249 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1250 struct x86_emulate_ops *ops,
1251 u16 selector, struct desc_struct *desc)
1254 u16 index = selector >> 3;
1259 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1261 if (dt.size < index * 8 + 7) {
1262 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1263 return X86EMUL_PROPAGATE_FAULT;
1265 addr = dt.address + index * 8;
1266 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1267 if (ret == X86EMUL_PROPAGATE_FAULT)
1268 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1273 /* allowed just for 8 bytes segments */
1274 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1275 struct x86_emulate_ops *ops,
1276 u16 selector, struct desc_struct *desc)
1279 u16 index = selector >> 3;
1284 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1286 if (dt.size < index * 8 + 7) {
1287 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1288 return X86EMUL_PROPAGATE_FAULT;
1291 addr = dt.address + index * 8;
1292 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1293 if (ret == X86EMUL_PROPAGATE_FAULT)
1294 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1299 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1300 struct x86_emulate_ops *ops,
1301 u16 selector, int seg)
1303 struct desc_struct seg_desc;
1305 unsigned err_vec = GP_VECTOR;
1307 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1310 memset(&seg_desc, 0, sizeof seg_desc);
1312 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1313 || ctxt->mode == X86EMUL_MODE_REAL) {
1314 /* set real mode segment descriptor */
1315 set_desc_base(&seg_desc, selector << 4);
1316 set_desc_limit(&seg_desc, 0xffff);
1323 /* NULL selector is not valid for TR, CS and SS */
1324 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1328 /* TR should be in GDT only */
1329 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1332 if (null_selector) /* for NULL selector skip all following checks */
1335 ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1336 if (ret != X86EMUL_CONTINUE)
1339 err_code = selector & 0xfffc;
1340 err_vec = GP_VECTOR;
1342 /* can't load system descriptor into segment selecor */
1343 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1347 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1353 cpl = ops->cpl(ctxt->vcpu);
1358 * segment is not a writable data segment or segment
1359 * selector's RPL != CPL or segment selector's RPL != CPL
1361 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1365 if (!(seg_desc.type & 8))
1368 if (seg_desc.type & 4) {
1374 if (rpl > cpl || dpl != cpl)
1377 /* CS(RPL) <- CPL */
1378 selector = (selector & 0xfffc) | cpl;
1381 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1384 case VCPU_SREG_LDTR:
1385 if (seg_desc.s || seg_desc.type != 2)
1388 default: /* DS, ES, FS, or GS */
1390 * segment is not a data or readable code segment or
1391 * ((segment is a data or nonconforming code segment)
1392 * and (both RPL and CPL > DPL))
1394 if ((seg_desc.type & 0xa) == 0x8 ||
1395 (((seg_desc.type & 0xc) != 0xc) &&
1396 (rpl > dpl && cpl > dpl)))
1402 /* mark segment as accessed */
1404 ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1405 if (ret != X86EMUL_CONTINUE)
1409 ops->set_segment_selector(selector, seg, ctxt->vcpu);
1410 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1411 return X86EMUL_CONTINUE;
1413 kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code);
1414 return X86EMUL_PROPAGATE_FAULT;
1417 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1419 struct decode_cache *c = &ctxt->decode;
1421 c->dst.type = OP_MEM;
1422 c->dst.bytes = c->op_bytes;
1423 c->dst.val = c->src.val;
1424 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1425 c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1426 c->regs[VCPU_REGS_RSP]);
1429 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1430 struct x86_emulate_ops *ops,
1431 void *dest, int len)
1433 struct decode_cache *c = &ctxt->decode;
1436 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1437 c->regs[VCPU_REGS_RSP]),
1438 dest, len, ctxt->vcpu);
1439 if (rc != X86EMUL_CONTINUE)
1442 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1446 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1447 struct x86_emulate_ops *ops,
1448 void *dest, int len)
1451 unsigned long val, change_mask;
1452 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1453 int cpl = ops->cpl(ctxt->vcpu);
1455 rc = emulate_pop(ctxt, ops, &val, len);
1456 if (rc != X86EMUL_CONTINUE)
1459 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1460 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1462 switch(ctxt->mode) {
1463 case X86EMUL_MODE_PROT64:
1464 case X86EMUL_MODE_PROT32:
1465 case X86EMUL_MODE_PROT16:
1467 change_mask |= EFLG_IOPL;
1469 change_mask |= EFLG_IF;
1471 case X86EMUL_MODE_VM86:
1473 kvm_inject_gp(ctxt->vcpu, 0);
1474 return X86EMUL_PROPAGATE_FAULT;
1476 change_mask |= EFLG_IF;
1478 default: /* real mode */
1479 change_mask |= (EFLG_IOPL | EFLG_IF);
1483 *(unsigned long *)dest =
1484 (ctxt->eflags & ~change_mask) | (val & change_mask);
1489 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1491 struct decode_cache *c = &ctxt->decode;
1492 struct kvm_segment segment;
1494 kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1496 c->src.val = segment.selector;
1500 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1501 struct x86_emulate_ops *ops, int seg)
1503 struct decode_cache *c = &ctxt->decode;
1504 unsigned long selector;
1507 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1508 if (rc != X86EMUL_CONTINUE)
1511 rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1515 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1517 struct decode_cache *c = &ctxt->decode;
1518 unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1519 int reg = VCPU_REGS_RAX;
1521 while (reg <= VCPU_REGS_RDI) {
1522 (reg == VCPU_REGS_RSP) ?
1523 (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1530 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1531 struct x86_emulate_ops *ops)
1533 struct decode_cache *c = &ctxt->decode;
1534 int rc = X86EMUL_CONTINUE;
1535 int reg = VCPU_REGS_RDI;
1537 while (reg >= VCPU_REGS_RAX) {
1538 if (reg == VCPU_REGS_RSP) {
1539 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1544 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1545 if (rc != X86EMUL_CONTINUE)
1552 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1553 struct x86_emulate_ops *ops)
1555 struct decode_cache *c = &ctxt->decode;
1557 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1560 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1562 struct decode_cache *c = &ctxt->decode;
1563 switch (c->modrm_reg) {
1565 emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1568 emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1571 emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1574 emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1576 case 4: /* sal/shl */
1577 case 6: /* sal/shl */
1578 emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1581 emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1584 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1589 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1590 struct x86_emulate_ops *ops)
1592 struct decode_cache *c = &ctxt->decode;
1594 switch (c->modrm_reg) {
1595 case 0 ... 1: /* test */
1596 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1599 c->dst.val = ~c->dst.val;
1602 emulate_1op("neg", c->dst, ctxt->eflags);
1610 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1611 struct x86_emulate_ops *ops)
1613 struct decode_cache *c = &ctxt->decode;
1615 switch (c->modrm_reg) {
1617 emulate_1op("inc", c->dst, ctxt->eflags);
1620 emulate_1op("dec", c->dst, ctxt->eflags);
1622 case 2: /* call near abs */ {
1625 c->eip = c->src.val;
1626 c->src.val = old_eip;
1630 case 4: /* jmp abs */
1631 c->eip = c->src.val;
1637 return X86EMUL_CONTINUE;
1640 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1641 struct x86_emulate_ops *ops,
1642 unsigned long memop)
1644 struct decode_cache *c = &ctxt->decode;
1648 rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
1649 if (rc != X86EMUL_CONTINUE)
1652 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1653 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1655 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1656 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1657 ctxt->eflags &= ~EFLG_ZF;
1660 new = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1661 (u32) c->regs[VCPU_REGS_RBX];
1663 rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
1664 if (rc != X86EMUL_CONTINUE)
1666 ctxt->eflags |= EFLG_ZF;
1668 return X86EMUL_CONTINUE;
1671 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1672 struct x86_emulate_ops *ops)
1674 struct decode_cache *c = &ctxt->decode;
1678 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1679 if (rc != X86EMUL_CONTINUE)
1681 if (c->op_bytes == 4)
1682 c->eip = (u32)c->eip;
1683 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1684 if (rc != X86EMUL_CONTINUE)
1686 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1690 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1691 struct x86_emulate_ops *ops)
1694 struct decode_cache *c = &ctxt->decode;
1696 switch (c->dst.type) {
1698 /* The 4-byte case *is* correct:
1699 * in 64-bit mode we zero-extend.
1701 switch (c->dst.bytes) {
1703 *(u8 *)c->dst.ptr = (u8)c->dst.val;
1706 *(u16 *)c->dst.ptr = (u16)c->dst.val;
1709 *c->dst.ptr = (u32)c->dst.val;
1710 break; /* 64b: zero-ext */
1712 *c->dst.ptr = c->dst.val;
1718 rc = ops->cmpxchg_emulated(
1719 (unsigned long)c->dst.ptr,
1725 rc = ops->write_emulated(
1726 (unsigned long)c->dst.ptr,
1730 if (rc != X86EMUL_CONTINUE)
1739 return X86EMUL_CONTINUE;
1742 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1744 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1746 * an sti; sti; sequence only disable interrupts for the first
1747 * instruction. So, if the last instruction, be it emulated or
1748 * not, left the system with the INT_STI flag enabled, it
1749 * means that the last instruction is an sti. We should not
1750 * leave the flag on in this case. The same goes for mov ss
1752 if (!(int_shadow & mask))
1753 ctxt->interruptibility = mask;
1757 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1758 struct kvm_segment *cs, struct kvm_segment *ss)
1760 memset(cs, 0, sizeof(struct kvm_segment));
1761 kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1762 memset(ss, 0, sizeof(struct kvm_segment));
1764 cs->l = 0; /* will be adjusted later */
1765 cs->base = 0; /* flat segment */
1766 cs->g = 1; /* 4kb granularity */
1767 cs->limit = 0xffffffff; /* 4GB limit */
1768 cs->type = 0x0b; /* Read, Execute, Accessed */
1770 cs->dpl = 0; /* will be adjusted later */
1775 ss->base = 0; /* flat segment */
1776 ss->limit = 0xffffffff; /* 4GB limit */
1777 ss->g = 1; /* 4kb granularity */
1779 ss->type = 0x03; /* Read/Write, Accessed */
1780 ss->db = 1; /* 32bit stack segment */
1786 emulate_syscall(struct x86_emulate_ctxt *ctxt)
1788 struct decode_cache *c = &ctxt->decode;
1789 struct kvm_segment cs, ss;
1792 /* syscall is not available in real mode */
1793 if (ctxt->mode == X86EMUL_MODE_REAL ||
1794 ctxt->mode == X86EMUL_MODE_VM86) {
1795 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1796 return X86EMUL_PROPAGATE_FAULT;
1799 setup_syscalls_segments(ctxt, &cs, &ss);
1801 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1803 cs.selector = (u16)(msr_data & 0xfffc);
1804 ss.selector = (u16)(msr_data + 8);
1806 if (is_long_mode(ctxt->vcpu)) {
1810 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1811 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1813 c->regs[VCPU_REGS_RCX] = c->eip;
1814 if (is_long_mode(ctxt->vcpu)) {
1815 #ifdef CONFIG_X86_64
1816 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1818 kvm_x86_ops->get_msr(ctxt->vcpu,
1819 ctxt->mode == X86EMUL_MODE_PROT64 ?
1820 MSR_LSTAR : MSR_CSTAR, &msr_data);
1823 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1824 ctxt->eflags &= ~(msr_data | EFLG_RF);
1828 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1829 c->eip = (u32)msr_data;
1831 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1834 return X86EMUL_CONTINUE;
1838 emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1840 struct decode_cache *c = &ctxt->decode;
1841 struct kvm_segment cs, ss;
1844 /* inject #GP if in real mode */
1845 if (ctxt->mode == X86EMUL_MODE_REAL) {
1846 kvm_inject_gp(ctxt->vcpu, 0);
1847 return X86EMUL_PROPAGATE_FAULT;
1850 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1851 * Therefore, we inject an #UD.
1853 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1854 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1855 return X86EMUL_PROPAGATE_FAULT;
1858 setup_syscalls_segments(ctxt, &cs, &ss);
1860 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1861 switch (ctxt->mode) {
1862 case X86EMUL_MODE_PROT32:
1863 if ((msr_data & 0xfffc) == 0x0) {
1864 kvm_inject_gp(ctxt->vcpu, 0);
1865 return X86EMUL_PROPAGATE_FAULT;
1868 case X86EMUL_MODE_PROT64:
1869 if (msr_data == 0x0) {
1870 kvm_inject_gp(ctxt->vcpu, 0);
1871 return X86EMUL_PROPAGATE_FAULT;
1876 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1877 cs.selector = (u16)msr_data;
1878 cs.selector &= ~SELECTOR_RPL_MASK;
1879 ss.selector = cs.selector + 8;
1880 ss.selector &= ~SELECTOR_RPL_MASK;
1881 if (ctxt->mode == X86EMUL_MODE_PROT64
1882 || is_long_mode(ctxt->vcpu)) {
1887 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1888 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1890 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1893 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1894 c->regs[VCPU_REGS_RSP] = msr_data;
1896 return X86EMUL_CONTINUE;
1900 emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1902 struct decode_cache *c = &ctxt->decode;
1903 struct kvm_segment cs, ss;
1907 /* inject #GP if in real mode or Virtual 8086 mode */
1908 if (ctxt->mode == X86EMUL_MODE_REAL ||
1909 ctxt->mode == X86EMUL_MODE_VM86) {
1910 kvm_inject_gp(ctxt->vcpu, 0);
1911 return X86EMUL_PROPAGATE_FAULT;
1914 setup_syscalls_segments(ctxt, &cs, &ss);
1916 if ((c->rex_prefix & 0x8) != 0x0)
1917 usermode = X86EMUL_MODE_PROT64;
1919 usermode = X86EMUL_MODE_PROT32;
1923 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1925 case X86EMUL_MODE_PROT32:
1926 cs.selector = (u16)(msr_data + 16);
1927 if ((msr_data & 0xfffc) == 0x0) {
1928 kvm_inject_gp(ctxt->vcpu, 0);
1929 return X86EMUL_PROPAGATE_FAULT;
1931 ss.selector = (u16)(msr_data + 24);
1933 case X86EMUL_MODE_PROT64:
1934 cs.selector = (u16)(msr_data + 32);
1935 if (msr_data == 0x0) {
1936 kvm_inject_gp(ctxt->vcpu, 0);
1937 return X86EMUL_PROPAGATE_FAULT;
1939 ss.selector = cs.selector + 8;
1944 cs.selector |= SELECTOR_RPL_MASK;
1945 ss.selector |= SELECTOR_RPL_MASK;
1947 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1948 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1950 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
1951 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
1953 return X86EMUL_CONTINUE;
1956 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
1957 struct x86_emulate_ops *ops)
1960 if (ctxt->mode == X86EMUL_MODE_REAL)
1962 if (ctxt->mode == X86EMUL_MODE_VM86)
1964 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1965 return ops->cpl(ctxt->vcpu) > iopl;
1968 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
1969 struct x86_emulate_ops *ops,
1972 struct kvm_segment tr_seg;
1975 u8 perm, bit_idx = port & 0x7;
1976 unsigned mask = (1 << len) - 1;
1978 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
1979 if (tr_seg.unusable)
1981 if (tr_seg.limit < 103)
1983 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
1985 if (r != X86EMUL_CONTINUE)
1987 if (io_bitmap_ptr + port/8 > tr_seg.limit)
1989 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
1991 if (r != X86EMUL_CONTINUE)
1993 if ((perm >> bit_idx) & mask)
1998 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
1999 struct x86_emulate_ops *ops,
2002 if (emulator_bad_iopl(ctxt, ops))
2003 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
2008 static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt,
2009 struct x86_emulate_ops *ops,
2012 struct desc_struct desc;
2013 if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu))
2014 return get_desc_base(&desc);
2019 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2020 struct x86_emulate_ops *ops,
2021 struct tss_segment_16 *tss)
2023 struct decode_cache *c = &ctxt->decode;
2026 tss->flag = ctxt->eflags;
2027 tss->ax = c->regs[VCPU_REGS_RAX];
2028 tss->cx = c->regs[VCPU_REGS_RCX];
2029 tss->dx = c->regs[VCPU_REGS_RDX];
2030 tss->bx = c->regs[VCPU_REGS_RBX];
2031 tss->sp = c->regs[VCPU_REGS_RSP];
2032 tss->bp = c->regs[VCPU_REGS_RBP];
2033 tss->si = c->regs[VCPU_REGS_RSI];
2034 tss->di = c->regs[VCPU_REGS_RDI];
2036 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2037 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2038 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2039 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2040 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2043 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2044 struct x86_emulate_ops *ops,
2045 struct tss_segment_16 *tss)
2047 struct decode_cache *c = &ctxt->decode;
2051 ctxt->eflags = tss->flag | 2;
2052 c->regs[VCPU_REGS_RAX] = tss->ax;
2053 c->regs[VCPU_REGS_RCX] = tss->cx;
2054 c->regs[VCPU_REGS_RDX] = tss->dx;
2055 c->regs[VCPU_REGS_RBX] = tss->bx;
2056 c->regs[VCPU_REGS_RSP] = tss->sp;
2057 c->regs[VCPU_REGS_RBP] = tss->bp;
2058 c->regs[VCPU_REGS_RSI] = tss->si;
2059 c->regs[VCPU_REGS_RDI] = tss->di;
2062 * SDM says that segment selectors are loaded before segment
2065 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2066 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2067 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2068 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2069 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2072 * Now load segment descriptors. If fault happenes at this stage
2073 * it is handled in a context of new task
2075 ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2076 if (ret != X86EMUL_CONTINUE)
2078 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2079 if (ret != X86EMUL_CONTINUE)
2081 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2082 if (ret != X86EMUL_CONTINUE)
2084 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2085 if (ret != X86EMUL_CONTINUE)
2087 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2088 if (ret != X86EMUL_CONTINUE)
2091 return X86EMUL_CONTINUE;
2094 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2095 struct x86_emulate_ops *ops,
2096 u16 tss_selector, u16 old_tss_sel,
2097 ulong old_tss_base, struct desc_struct *new_desc)
2099 struct tss_segment_16 tss_seg;
2101 u32 err, new_tss_base = get_desc_base(new_desc);
2103 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2105 if (ret == X86EMUL_PROPAGATE_FAULT) {
2106 /* FIXME: need to provide precise fault address */
2107 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2111 save_state_to_tss16(ctxt, ops, &tss_seg);
2113 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2115 if (ret == X86EMUL_PROPAGATE_FAULT) {
2116 /* FIXME: need to provide precise fault address */
2117 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2121 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2123 if (ret == X86EMUL_PROPAGATE_FAULT) {
2124 /* FIXME: need to provide precise fault address */
2125 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2129 if (old_tss_sel != 0xffff) {
2130 tss_seg.prev_task_link = old_tss_sel;
2132 ret = ops->write_std(new_tss_base,
2133 &tss_seg.prev_task_link,
2134 sizeof tss_seg.prev_task_link,
2136 if (ret == X86EMUL_PROPAGATE_FAULT) {
2137 /* FIXME: need to provide precise fault address */
2138 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2143 return load_state_from_tss16(ctxt, ops, &tss_seg);
2146 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2147 struct x86_emulate_ops *ops,
2148 struct tss_segment_32 *tss)
2150 struct decode_cache *c = &ctxt->decode;
2152 tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2154 tss->eflags = ctxt->eflags;
2155 tss->eax = c->regs[VCPU_REGS_RAX];
2156 tss->ecx = c->regs[VCPU_REGS_RCX];
2157 tss->edx = c->regs[VCPU_REGS_RDX];
2158 tss->ebx = c->regs[VCPU_REGS_RBX];
2159 tss->esp = c->regs[VCPU_REGS_RSP];
2160 tss->ebp = c->regs[VCPU_REGS_RBP];
2161 tss->esi = c->regs[VCPU_REGS_RSI];
2162 tss->edi = c->regs[VCPU_REGS_RDI];
2164 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2165 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2166 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2167 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2168 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2169 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2170 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2173 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2174 struct x86_emulate_ops *ops,
2175 struct tss_segment_32 *tss)
2177 struct decode_cache *c = &ctxt->decode;
2180 ops->set_cr(3, tss->cr3, ctxt->vcpu);
2182 ctxt->eflags = tss->eflags | 2;
2183 c->regs[VCPU_REGS_RAX] = tss->eax;
2184 c->regs[VCPU_REGS_RCX] = tss->ecx;
2185 c->regs[VCPU_REGS_RDX] = tss->edx;
2186 c->regs[VCPU_REGS_RBX] = tss->ebx;
2187 c->regs[VCPU_REGS_RSP] = tss->esp;
2188 c->regs[VCPU_REGS_RBP] = tss->ebp;
2189 c->regs[VCPU_REGS_RSI] = tss->esi;
2190 c->regs[VCPU_REGS_RDI] = tss->edi;
2193 * SDM says that segment selectors are loaded before segment
2196 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2197 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2198 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2199 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2200 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2201 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2202 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2205 * Now load segment descriptors. If fault happenes at this stage
2206 * it is handled in a context of new task
2208 ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2209 if (ret != X86EMUL_CONTINUE)
2211 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2212 if (ret != X86EMUL_CONTINUE)
2214 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2215 if (ret != X86EMUL_CONTINUE)
2217 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2218 if (ret != X86EMUL_CONTINUE)
2220 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2221 if (ret != X86EMUL_CONTINUE)
2223 ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2224 if (ret != X86EMUL_CONTINUE)
2226 ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2227 if (ret != X86EMUL_CONTINUE)
2230 return X86EMUL_CONTINUE;
2233 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2234 struct x86_emulate_ops *ops,
2235 u16 tss_selector, u16 old_tss_sel,
2236 ulong old_tss_base, struct desc_struct *new_desc)
2238 struct tss_segment_32 tss_seg;
2240 u32 err, new_tss_base = get_desc_base(new_desc);
2242 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2244 if (ret == X86EMUL_PROPAGATE_FAULT) {
2245 /* FIXME: need to provide precise fault address */
2246 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2250 save_state_to_tss32(ctxt, ops, &tss_seg);
2252 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2254 if (ret == X86EMUL_PROPAGATE_FAULT) {
2255 /* FIXME: need to provide precise fault address */
2256 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2260 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2262 if (ret == X86EMUL_PROPAGATE_FAULT) {
2263 /* FIXME: need to provide precise fault address */
2264 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2268 if (old_tss_sel != 0xffff) {
2269 tss_seg.prev_task_link = old_tss_sel;
2271 ret = ops->write_std(new_tss_base,
2272 &tss_seg.prev_task_link,
2273 sizeof tss_seg.prev_task_link,
2275 if (ret == X86EMUL_PROPAGATE_FAULT) {
2276 /* FIXME: need to provide precise fault address */
2277 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2282 return load_state_from_tss32(ctxt, ops, &tss_seg);
2285 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2286 struct x86_emulate_ops *ops,
2287 u16 tss_selector, int reason)
2289 struct desc_struct curr_tss_desc, next_tss_desc;
2291 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2292 ulong old_tss_base =
2293 get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
2295 /* FIXME: old_tss_base == ~0 ? */
2297 ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2298 if (ret != X86EMUL_CONTINUE)
2300 ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2301 if (ret != X86EMUL_CONTINUE)
2304 /* FIXME: check that next_tss_desc is tss */
2306 if (reason != TASK_SWITCH_IRET) {
2307 if ((tss_selector & 3) > next_tss_desc.dpl ||
2308 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2309 kvm_inject_gp(ctxt->vcpu, 0);
2310 return X86EMUL_PROPAGATE_FAULT;
2314 if (!next_tss_desc.p || desc_limit_scaled(&next_tss_desc) < 0x67) {
2315 kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
2316 tss_selector & 0xfffc);
2317 return X86EMUL_PROPAGATE_FAULT;
2320 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2321 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2322 write_segment_descriptor(ctxt, ops, old_tss_sel,
2326 if (reason == TASK_SWITCH_IRET)
2327 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2329 /* set back link to prev task only if NT bit is set in eflags
2330 note that old_tss_sel is not used afetr this point */
2331 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2332 old_tss_sel = 0xffff;
2334 if (next_tss_desc.type & 8)
2335 ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2336 old_tss_base, &next_tss_desc);
2338 ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2339 old_tss_base, &next_tss_desc);
2341 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2342 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2344 if (reason != TASK_SWITCH_IRET) {
2345 next_tss_desc.type |= (1 << 1); /* set busy flag */
2346 write_segment_descriptor(ctxt, ops, tss_selector,
2350 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2351 ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2352 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2357 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2358 struct x86_emulate_ops *ops,
2359 u16 tss_selector, int reason)
2361 struct decode_cache *c = &ctxt->decode;
2364 memset(c, 0, sizeof(struct decode_cache));
2366 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2368 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason);
2370 if (rc == X86EMUL_CONTINUE) {
2371 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2372 kvm_rip_write(ctxt->vcpu, c->eip);
2379 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2381 unsigned long memop = 0;
2383 unsigned long saved_eip = 0;
2384 struct decode_cache *c = &ctxt->decode;
2387 int rc = X86EMUL_CONTINUE;
2389 ctxt->interruptibility = 0;
2391 /* Shadow copy of register state. Committed on successful emulation.
2392 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
2396 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2399 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2400 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2404 /* LOCK prefix is allowed only with some instructions */
2405 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2406 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2410 /* Privileged instruction can be executed only in CPL=0 */
2411 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2412 kvm_inject_gp(ctxt->vcpu, 0);
2416 if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
2417 memop = c->modrm_ea;
2419 if (c->rep_prefix && (c->d & String)) {
2420 /* All REP prefixes have the same first termination condition */
2421 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2422 kvm_rip_write(ctxt->vcpu, c->eip);
2425 /* The second termination condition only applies for REPE
2426 * and REPNE. Test if the repeat string operation prefix is
2427 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2428 * corresponding termination condition according to:
2429 * - if REPE/REPZ and ZF = 0 then done
2430 * - if REPNE/REPNZ and ZF = 1 then done
2432 if ((c->b == 0xa6) || (c->b == 0xa7) ||
2433 (c->b == 0xae) || (c->b == 0xaf)) {
2434 if ((c->rep_prefix == REPE_PREFIX) &&
2435 ((ctxt->eflags & EFLG_ZF) == 0)) {
2436 kvm_rip_write(ctxt->vcpu, c->eip);
2439 if ((c->rep_prefix == REPNE_PREFIX) &&
2440 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) {
2441 kvm_rip_write(ctxt->vcpu, c->eip);
2445 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
2449 if (c->src.type == OP_MEM) {
2450 c->src.ptr = (unsigned long *)memop;
2452 rc = ops->read_emulated((unsigned long)c->src.ptr,
2456 if (rc != X86EMUL_CONTINUE)
2458 c->src.orig_val = c->src.val;
2461 if (c->src2.type == OP_MEM) {
2462 c->src2.ptr = (unsigned long *)(memop + c->src.bytes);
2464 rc = ops->read_emulated((unsigned long)c->src2.ptr,
2468 if (rc != X86EMUL_CONTINUE)
2472 if ((c->d & DstMask) == ImplicitOps)
2476 if (c->dst.type == OP_MEM) {
2477 c->dst.ptr = (unsigned long *)memop;
2478 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2481 unsigned long mask = ~(c->dst.bytes * 8 - 1);
2483 c->dst.ptr = (void *)c->dst.ptr +
2484 (c->src.val & mask) / 8;
2486 if (!(c->d & Mov)) {
2487 /* optimisation - avoid slow emulated read */
2488 rc = ops->read_emulated((unsigned long)c->dst.ptr,
2492 if (rc != X86EMUL_CONTINUE)
2496 c->dst.orig_val = c->dst.val;
2506 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2508 case 0x06: /* push es */
2509 emulate_push_sreg(ctxt, VCPU_SREG_ES);
2511 case 0x07: /* pop es */
2512 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2513 if (rc != X86EMUL_CONTINUE)
2518 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2520 case 0x0e: /* push cs */
2521 emulate_push_sreg(ctxt, VCPU_SREG_CS);
2525 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2527 case 0x16: /* push ss */
2528 emulate_push_sreg(ctxt, VCPU_SREG_SS);
2530 case 0x17: /* pop ss */
2531 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2532 if (rc != X86EMUL_CONTINUE)
2537 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2539 case 0x1e: /* push ds */
2540 emulate_push_sreg(ctxt, VCPU_SREG_DS);
2542 case 0x1f: /* pop ds */
2543 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2544 if (rc != X86EMUL_CONTINUE)
2549 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2553 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2557 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2561 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2563 case 0x40 ... 0x47: /* inc r16/r32 */
2564 emulate_1op("inc", c->dst, ctxt->eflags);
2566 case 0x48 ... 0x4f: /* dec r16/r32 */
2567 emulate_1op("dec", c->dst, ctxt->eflags);
2569 case 0x50 ... 0x57: /* push reg */
2572 case 0x58 ... 0x5f: /* pop reg */
2574 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2575 if (rc != X86EMUL_CONTINUE)
2578 case 0x60: /* pusha */
2579 emulate_pusha(ctxt);
2581 case 0x61: /* popa */
2582 rc = emulate_popa(ctxt, ops);
2583 if (rc != X86EMUL_CONTINUE)
2586 case 0x63: /* movsxd */
2587 if (ctxt->mode != X86EMUL_MODE_PROT64)
2588 goto cannot_emulate;
2589 c->dst.val = (s32) c->src.val;
2591 case 0x68: /* push imm */
2592 case 0x6a: /* push imm8 */
2595 case 0x6c: /* insb */
2596 case 0x6d: /* insw/insd */
2597 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2598 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2599 kvm_inject_gp(ctxt->vcpu, 0);
2602 if (kvm_emulate_pio_string(ctxt->vcpu,
2604 (c->d & ByteOp) ? 1 : c->op_bytes,
2606 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2607 (ctxt->eflags & EFLG_DF),
2608 register_address(c, es_base(ctxt),
2609 c->regs[VCPU_REGS_RDI]),
2611 c->regs[VCPU_REGS_RDX]) == 0) {
2616 case 0x6e: /* outsb */
2617 case 0x6f: /* outsw/outsd */
2618 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2619 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2620 kvm_inject_gp(ctxt->vcpu, 0);
2623 if (kvm_emulate_pio_string(ctxt->vcpu,
2625 (c->d & ByteOp) ? 1 : c->op_bytes,
2627 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
2628 (ctxt->eflags & EFLG_DF),
2630 seg_override_base(ctxt, c),
2631 c->regs[VCPU_REGS_RSI]),
2633 c->regs[VCPU_REGS_RDX]) == 0) {
2638 case 0x70 ... 0x7f: /* jcc (short) */
2639 if (test_cc(c->b, ctxt->eflags))
2640 jmp_rel(c, c->src.val);
2642 case 0x80 ... 0x83: /* Grp1 */
2643 switch (c->modrm_reg) {
2663 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2665 case 0x86 ... 0x87: /* xchg */
2667 /* Write back the register source. */
2668 switch (c->dst.bytes) {
2670 *(u8 *) c->src.ptr = (u8) c->dst.val;
2673 *(u16 *) c->src.ptr = (u16) c->dst.val;
2676 *c->src.ptr = (u32) c->dst.val;
2677 break; /* 64b reg: zero-extend */
2679 *c->src.ptr = c->dst.val;
2683 * Write back the memory destination with implicit LOCK
2686 c->dst.val = c->src.val;
2689 case 0x88 ... 0x8b: /* mov */
2691 case 0x8c: { /* mov r/m, sreg */
2692 struct kvm_segment segreg;
2694 if (c->modrm_reg <= VCPU_SREG_GS)
2695 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2697 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2700 c->dst.val = segreg.selector;
2703 case 0x8d: /* lea r16/r32, m */
2704 c->dst.val = c->modrm_ea;
2706 case 0x8e: { /* mov seg, r/m16 */
2711 if (c->modrm_reg == VCPU_SREG_CS ||
2712 c->modrm_reg > VCPU_SREG_GS) {
2713 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2717 if (c->modrm_reg == VCPU_SREG_SS)
2718 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS);
2720 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2722 c->dst.type = OP_NONE; /* Disable writeback. */
2725 case 0x8f: /* pop (sole member of Grp1a) */
2726 rc = emulate_grp1a(ctxt, ops);
2727 if (rc != X86EMUL_CONTINUE)
2730 case 0x90: /* nop / xchg r8,rax */
2731 if (!(c->rex_prefix & 1)) { /* nop */
2732 c->dst.type = OP_NONE;
2735 case 0x91 ... 0x97: /* xchg reg,rax */
2736 c->src.type = c->dst.type = OP_REG;
2737 c->src.bytes = c->dst.bytes = c->op_bytes;
2738 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2739 c->src.val = *(c->src.ptr);
2741 case 0x9c: /* pushf */
2742 c->src.val = (unsigned long) ctxt->eflags;
2745 case 0x9d: /* popf */
2746 c->dst.type = OP_REG;
2747 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2748 c->dst.bytes = c->op_bytes;
2749 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2750 if (rc != X86EMUL_CONTINUE)
2753 case 0xa0 ... 0xa1: /* mov */
2754 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2755 c->dst.val = c->src.val;
2757 case 0xa2 ... 0xa3: /* mov */
2758 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2760 case 0xa4 ... 0xa5: /* movs */
2761 c->dst.type = OP_MEM;
2762 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2763 c->dst.ptr = (unsigned long *)register_address(c,
2765 c->regs[VCPU_REGS_RDI]);
2766 rc = ops->read_emulated(register_address(c,
2767 seg_override_base(ctxt, c),
2768 c->regs[VCPU_REGS_RSI]),
2770 c->dst.bytes, ctxt->vcpu);
2771 if (rc != X86EMUL_CONTINUE)
2773 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2774 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2776 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2777 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2780 case 0xa6 ... 0xa7: /* cmps */
2781 c->src.type = OP_NONE; /* Disable writeback. */
2782 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2783 c->src.ptr = (unsigned long *)register_address(c,
2784 seg_override_base(ctxt, c),
2785 c->regs[VCPU_REGS_RSI]);
2786 rc = ops->read_emulated((unsigned long)c->src.ptr,
2790 if (rc != X86EMUL_CONTINUE)
2793 c->dst.type = OP_NONE; /* Disable writeback. */
2794 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2795 c->dst.ptr = (unsigned long *)register_address(c,
2797 c->regs[VCPU_REGS_RDI]);
2798 rc = ops->read_emulated((unsigned long)c->dst.ptr,
2802 if (rc != X86EMUL_CONTINUE)
2805 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2807 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2809 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2810 (ctxt->eflags & EFLG_DF) ? -c->src.bytes
2812 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2813 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2817 case 0xaa ... 0xab: /* stos */
2818 c->dst.type = OP_MEM;
2819 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2820 c->dst.ptr = (unsigned long *)register_address(c,
2822 c->regs[VCPU_REGS_RDI]);
2823 c->dst.val = c->regs[VCPU_REGS_RAX];
2824 register_address_increment(c, &c->regs[VCPU_REGS_RDI],
2825 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2828 case 0xac ... 0xad: /* lods */
2829 c->dst.type = OP_REG;
2830 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
2831 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2832 rc = ops->read_emulated(register_address(c,
2833 seg_override_base(ctxt, c),
2834 c->regs[VCPU_REGS_RSI]),
2838 if (rc != X86EMUL_CONTINUE)
2840 register_address_increment(c, &c->regs[VCPU_REGS_RSI],
2841 (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
2844 case 0xae ... 0xaf: /* scas */
2845 DPRINTF("Urk! I don't handle SCAS.\n");
2846 goto cannot_emulate;
2847 case 0xb0 ... 0xbf: /* mov r, imm */
2852 case 0xc3: /* ret */
2853 c->dst.type = OP_REG;
2854 c->dst.ptr = &c->eip;
2855 c->dst.bytes = c->op_bytes;
2856 goto pop_instruction;
2857 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
2859 c->dst.val = c->src.val;
2861 case 0xcb: /* ret far */
2862 rc = emulate_ret_far(ctxt, ops);
2863 if (rc != X86EMUL_CONTINUE)
2866 case 0xd0 ... 0xd1: /* Grp2 */
2870 case 0xd2 ... 0xd3: /* Grp2 */
2871 c->src.val = c->regs[VCPU_REGS_RCX];
2874 case 0xe4: /* inb */
2879 case 0xe6: /* outb */
2880 case 0xe7: /* out */
2884 case 0xe8: /* call (near) */ {
2885 long int rel = c->src.val;
2886 c->src.val = (unsigned long) c->eip;
2891 case 0xe9: /* jmp rel */
2893 case 0xea: /* jmp far */
2895 if (load_segment_descriptor(ctxt, ops, c->src2.val,
2899 c->eip = c->src.val;
2902 jmp: /* jmp rel short */
2903 jmp_rel(c, c->src.val);
2904 c->dst.type = OP_NONE; /* Disable writeback. */
2906 case 0xec: /* in al,dx */
2907 case 0xed: /* in (e/r)ax,dx */
2908 port = c->regs[VCPU_REGS_RDX];
2911 case 0xee: /* out al,dx */
2912 case 0xef: /* out (e/r)ax,dx */
2913 port = c->regs[VCPU_REGS_RDX];
2916 if (!emulator_io_permited(ctxt, ops, port,
2917 (c->d & ByteOp) ? 1 : c->op_bytes)) {
2918 kvm_inject_gp(ctxt->vcpu, 0);
2921 if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
2922 (c->d & ByteOp) ? 1 : c->op_bytes,
2925 goto cannot_emulate;
2928 case 0xf4: /* hlt */
2929 ctxt->vcpu->arch.halt_request = 1;
2931 case 0xf5: /* cmc */
2932 /* complement carry flag from eflags reg */
2933 ctxt->eflags ^= EFLG_CF;
2934 c->dst.type = OP_NONE; /* Disable writeback. */
2936 case 0xf6 ... 0xf7: /* Grp3 */
2937 if (!emulate_grp3(ctxt, ops))
2938 goto cannot_emulate;
2940 case 0xf8: /* clc */
2941 ctxt->eflags &= ~EFLG_CF;
2942 c->dst.type = OP_NONE; /* Disable writeback. */
2944 case 0xfa: /* cli */
2945 if (emulator_bad_iopl(ctxt, ops))
2946 kvm_inject_gp(ctxt->vcpu, 0);
2948 ctxt->eflags &= ~X86_EFLAGS_IF;
2949 c->dst.type = OP_NONE; /* Disable writeback. */
2952 case 0xfb: /* sti */
2953 if (emulator_bad_iopl(ctxt, ops))
2954 kvm_inject_gp(ctxt->vcpu, 0);
2956 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI);
2957 ctxt->eflags |= X86_EFLAGS_IF;
2958 c->dst.type = OP_NONE; /* Disable writeback. */
2961 case 0xfc: /* cld */
2962 ctxt->eflags &= ~EFLG_DF;
2963 c->dst.type = OP_NONE; /* Disable writeback. */
2965 case 0xfd: /* std */
2966 ctxt->eflags |= EFLG_DF;
2967 c->dst.type = OP_NONE; /* Disable writeback. */
2969 case 0xfe: /* Grp4 */
2971 rc = emulate_grp45(ctxt, ops);
2972 if (rc != X86EMUL_CONTINUE)
2975 case 0xff: /* Grp5 */
2976 if (c->modrm_reg == 5)
2982 rc = writeback(ctxt, ops);
2983 if (rc != X86EMUL_CONTINUE)
2986 /* Commit shadow register state. */
2987 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2988 kvm_rip_write(ctxt->vcpu, c->eip);
2991 if (rc == X86EMUL_UNHANDLEABLE) {
2999 case 0x01: /* lgdt, lidt, lmsw */
3000 switch (c->modrm_reg) {
3002 unsigned long address;
3004 case 0: /* vmcall */
3005 if (c->modrm_mod != 3 || c->modrm_rm != 1)
3006 goto cannot_emulate;
3008 rc = kvm_fix_hypercall(ctxt->vcpu);
3009 if (rc != X86EMUL_CONTINUE)
3012 /* Let the processor re-execute the fixed hypercall */
3014 /* Disable writeback. */
3015 c->dst.type = OP_NONE;
3018 rc = read_descriptor(ctxt, ops, c->src.ptr,
3019 &size, &address, c->op_bytes);
3020 if (rc != X86EMUL_CONTINUE)
3022 realmode_lgdt(ctxt->vcpu, size, address);
3023 /* Disable writeback. */
3024 c->dst.type = OP_NONE;
3026 case 3: /* lidt/vmmcall */
3027 if (c->modrm_mod == 3) {
3028 switch (c->modrm_rm) {
3030 rc = kvm_fix_hypercall(ctxt->vcpu);
3031 if (rc != X86EMUL_CONTINUE)
3035 goto cannot_emulate;
3038 rc = read_descriptor(ctxt, ops, c->src.ptr,
3041 if (rc != X86EMUL_CONTINUE)
3043 realmode_lidt(ctxt->vcpu, size, address);
3045 /* Disable writeback. */
3046 c->dst.type = OP_NONE;
3050 c->dst.val = ops->get_cr(0, ctxt->vcpu);
3053 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
3054 (c->src.val & 0x0f), ctxt->vcpu);
3055 c->dst.type = OP_NONE;
3057 case 5: /* not defined */
3058 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3061 emulate_invlpg(ctxt->vcpu, memop);
3062 /* Disable writeback. */
3063 c->dst.type = OP_NONE;
3066 goto cannot_emulate;
3069 case 0x05: /* syscall */
3070 rc = emulate_syscall(ctxt);
3071 if (rc != X86EMUL_CONTINUE)
3077 emulate_clts(ctxt->vcpu);
3078 c->dst.type = OP_NONE;
3080 case 0x08: /* invd */
3081 case 0x09: /* wbinvd */
3082 case 0x0d: /* GrpP (prefetch) */
3083 case 0x18: /* Grp16 (prefetch/nop) */
3084 c->dst.type = OP_NONE;
3086 case 0x20: /* mov cr, reg */
3087 switch (c->modrm_reg) {
3091 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3094 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3095 c->dst.type = OP_NONE; /* no writeback */
3097 case 0x21: /* mov from dr to reg */
3098 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3099 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3100 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3103 emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
3104 c->dst.type = OP_NONE; /* no writeback */
3106 case 0x22: /* mov reg, cr */
3107 ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu);
3108 c->dst.type = OP_NONE;
3110 case 0x23: /* mov from reg to dr */
3111 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3112 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3113 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3116 emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]);
3117 c->dst.type = OP_NONE; /* no writeback */
3121 msr_data = (u32)c->regs[VCPU_REGS_RAX]
3122 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
3123 if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3124 kvm_inject_gp(ctxt->vcpu, 0);
3127 rc = X86EMUL_CONTINUE;
3128 c->dst.type = OP_NONE;
3132 if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3133 kvm_inject_gp(ctxt->vcpu, 0);
3136 c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3137 c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3139 rc = X86EMUL_CONTINUE;
3140 c->dst.type = OP_NONE;
3142 case 0x34: /* sysenter */
3143 rc = emulate_sysenter(ctxt);
3144 if (rc != X86EMUL_CONTINUE)
3149 case 0x35: /* sysexit */
3150 rc = emulate_sysexit(ctxt);
3151 if (rc != X86EMUL_CONTINUE)
3156 case 0x40 ... 0x4f: /* cmov */
3157 c->dst.val = c->dst.orig_val = c->src.val;
3158 if (!test_cc(c->b, ctxt->eflags))
3159 c->dst.type = OP_NONE; /* no writeback */
3161 case 0x80 ... 0x8f: /* jnz rel, etc*/
3162 if (test_cc(c->b, ctxt->eflags))
3163 jmp_rel(c, c->src.val);
3164 c->dst.type = OP_NONE;
3166 case 0xa0: /* push fs */
3167 emulate_push_sreg(ctxt, VCPU_SREG_FS);
3169 case 0xa1: /* pop fs */
3170 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3171 if (rc != X86EMUL_CONTINUE)
3176 c->dst.type = OP_NONE;
3177 /* only subword offset */
3178 c->src.val &= (c->dst.bytes << 3) - 1;
3179 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3181 case 0xa4: /* shld imm8, r, r/m */
3182 case 0xa5: /* shld cl, r, r/m */
3183 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3185 case 0xa8: /* push gs */
3186 emulate_push_sreg(ctxt, VCPU_SREG_GS);
3188 case 0xa9: /* pop gs */
3189 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3190 if (rc != X86EMUL_CONTINUE)
3195 /* only subword offset */
3196 c->src.val &= (c->dst.bytes << 3) - 1;
3197 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3199 case 0xac: /* shrd imm8, r, r/m */
3200 case 0xad: /* shrd cl, r, r/m */
3201 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3203 case 0xae: /* clflush */
3205 case 0xb0 ... 0xb1: /* cmpxchg */
3207 * Save real source value, then compare EAX against
3210 c->src.orig_val = c->src.val;
3211 c->src.val = c->regs[VCPU_REGS_RAX];
3212 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3213 if (ctxt->eflags & EFLG_ZF) {
3214 /* Success: write back to memory. */
3215 c->dst.val = c->src.orig_val;
3217 /* Failure: write the value we saw to EAX. */
3218 c->dst.type = OP_REG;
3219 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3224 /* only subword offset */
3225 c->src.val &= (c->dst.bytes << 3) - 1;
3226 emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3228 case 0xb6 ... 0xb7: /* movzx */
3229 c->dst.bytes = c->op_bytes;
3230 c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3233 case 0xba: /* Grp8 */
3234 switch (c->modrm_reg & 3) {
3247 /* only subword offset */
3248 c->src.val &= (c->dst.bytes << 3) - 1;
3249 emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3251 case 0xbe ... 0xbf: /* movsx */
3252 c->dst.bytes = c->op_bytes;
3253 c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3256 case 0xc3: /* movnti */
3257 c->dst.bytes = c->op_bytes;
3258 c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3261 case 0xc7: /* Grp9 (cmpxchg8b) */
3262 rc = emulate_grp9(ctxt, ops, memop);
3263 if (rc != X86EMUL_CONTINUE)
3265 c->dst.type = OP_NONE;
3271 DPRINTF("Cannot emulate %02x\n", c->b);