1 /******************************************************************************
4 * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
6 * Copyright (c) 2005 Keir Fraser
8 * Linux coding style, mod r/m decoder, segment base fixes, real-mode
9 * privileged instructions:
11 * Copyright (C) 2006 Qumranet
13 * Avi Kivity <avi@qumranet.com>
14 * Yaniv Kamay <yaniv@qumranet.com>
16 * This work is licensed under the terms of the GNU GPL, version 2. See
17 * the COPYING file in the top-level directory.
19 * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4
25 #include <public/xen.h>
26 #define DPRINTF(_f, _a ...) printf(_f , ## _a)
28 #include <linux/kvm_host.h>
29 #include "kvm_cache_regs.h"
30 #define DPRINTF(x...) do {} while (0)
32 #include <linux/module.h>
33 #include <asm/kvm_emulate.h>
39 * Opcode effective-address decode tables.
40 * Note that we only emulate instructions that have at least one memory
41 * operand (excluding implicit stack references). We assume that stack
42 * references and instruction fetches will never occur in special memory
43 * areas that require emulation. So, for example, 'mov <imm>,<reg>' need
47 /* Operand sizes: 8-bit operands or specified/overridden size. */
48 #define ByteOp (1<<0) /* 8-bit operands. */
49 /* Destination operand type. */
50 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
51 #define DstReg (2<<1) /* Register operand. */
52 #define DstMem (3<<1) /* Memory operand. */
53 #define DstAcc (4<<1) /* Destination Accumulator */
54 #define DstDI (5<<1) /* Destination is in ES:(E)DI */
55 #define DstMem64 (6<<1) /* 64bit memory operand */
56 #define DstMask (7<<1)
57 /* Source operand type. */
58 #define SrcNone (0<<4) /* No source operand. */
59 #define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
60 #define SrcReg (1<<4) /* Register operand. */
61 #define SrcMem (2<<4) /* Memory operand. */
62 #define SrcMem16 (3<<4) /* Memory operand (16-bit). */
63 #define SrcMem32 (4<<4) /* Memory operand (32-bit). */
64 #define SrcImm (5<<4) /* Immediate operand. */
65 #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */
66 #define SrcOne (7<<4) /* Implied '1' */
67 #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */
68 #define SrcImmU (9<<4) /* Immediate operand, unsigned */
69 #define SrcSI (0xa<<4) /* Source is in the DS:RSI */
70 #define SrcMask (0xf<<4)
71 /* Generic ModRM decode. */
73 /* Destination is only written; never read. */
76 #define MemAbs (1<<11) /* Memory operand is absolute displacement */
77 #define String (1<<12) /* String instruction (rep capable) */
78 #define Stack (1<<13) /* Stack instruction (push/pop) */
79 #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */
80 #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
81 #define GroupMask 0xff /* Group number stored in bits 0:7 */
83 #define Lock (1<<26) /* lock prefix is allowed for the instruction */
84 #define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
86 /* Source 2 operand type */
87 #define Src2None (0<<29)
88 #define Src2CL (1<<29)
89 #define Src2ImmByte (2<<29)
90 #define Src2One (3<<29)
91 #define Src2Imm16 (4<<29)
92 #define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be
93 in memory and second argument is located
94 immediately after the first one in memory. */
95 #define Src2Mask (7<<29)
98 Group1_80, Group1_81, Group1_82, Group1_83,
99 Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
103 static u32 opcode_table[256] = {
105 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
106 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
107 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
108 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
110 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
111 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
112 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
113 ImplicitOps | Stack | No64, 0,
115 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
116 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
117 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
118 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
120 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
121 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
122 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
123 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
125 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
126 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
127 DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
129 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
130 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
133 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
134 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
137 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
138 ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
139 ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
142 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
144 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
146 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
147 SrcReg | Stack, SrcReg | Stack, SrcReg | Stack, SrcReg | Stack,
149 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
150 DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
152 ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
153 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
156 SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
157 DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */
158 SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */
160 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
161 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
163 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
164 SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte,
166 Group | Group1_80, Group | Group1_81,
167 Group | Group1_82, Group | Group1_83,
168 ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
169 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
171 ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
172 ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
173 DstMem | SrcReg | ModRM | Mov, ModRM | DstReg,
174 DstReg | SrcMem | ModRM | Mov, Group | Group1A,
176 DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
178 0, 0, SrcImm | Src2Imm16 | No64, 0,
179 ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
181 ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
182 ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
183 ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String,
184 ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String,
186 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String,
187 ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String,
188 ByteOp | DstDI | String, DstDI | String,
190 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
191 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
192 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
193 ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov,
195 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
196 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
197 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
198 DstReg | SrcImm | Mov, DstReg | SrcImm | Mov,
200 ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
201 0, ImplicitOps | Stack, 0, 0,
202 ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
204 0, 0, 0, ImplicitOps | Stack,
205 ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
207 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
208 ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
211 0, 0, 0, 0, 0, 0, 0, 0,
214 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
215 ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc,
217 SrcImm | Stack, SrcImm | ImplicitOps,
218 SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
219 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
220 SrcNone | ByteOp | DstAcc, SrcNone | DstAcc,
223 ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
225 ImplicitOps, 0, ImplicitOps, ImplicitOps,
226 ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
229 static u32 twobyte_table[256] = {
231 0, Group | GroupDual | Group7, 0, 0,
232 0, ImplicitOps, ImplicitOps | Priv, 0,
233 ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
234 0, ImplicitOps | ModRM, 0, 0,
236 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
238 ModRM | ImplicitOps | Priv, ModRM | Priv,
239 ModRM | ImplicitOps | Priv, ModRM | Priv,
241 0, 0, 0, 0, 0, 0, 0, 0,
243 ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
244 ImplicitOps, ImplicitOps | Priv, 0, 0,
245 0, 0, 0, 0, 0, 0, 0, 0,
247 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
248 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
249 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
250 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
252 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
253 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
254 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
255 DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
257 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
259 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
261 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
263 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
264 SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm,
266 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268 ImplicitOps | Stack, ImplicitOps | Stack,
269 0, DstMem | SrcReg | ModRM | BitOp,
270 DstMem | SrcReg | Src2ImmByte | ModRM,
271 DstMem | SrcReg | Src2CL | ModRM, 0, 0,
273 ImplicitOps | Stack, ImplicitOps | Stack,
274 0, DstMem | SrcReg | ModRM | BitOp | Lock,
275 DstMem | SrcReg | Src2ImmByte | ModRM,
276 DstMem | SrcReg | Src2CL | ModRM,
279 ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
280 0, DstMem | SrcReg | ModRM | BitOp | Lock,
281 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
282 DstReg | SrcMem16 | ModRM | Mov,
285 Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
286 0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
287 DstReg | SrcMem16 | ModRM | Mov,
289 0, 0, 0, DstMem | SrcReg | ModRM | Mov,
290 0, 0, 0, Group | GroupDual | Group9,
291 0, 0, 0, 0, 0, 0, 0, 0,
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
300 static u32 group_table[] = {
302 ByteOp | DstMem | SrcImm | ModRM | Lock,
303 ByteOp | DstMem | SrcImm | ModRM | Lock,
304 ByteOp | DstMem | SrcImm | ModRM | Lock,
305 ByteOp | DstMem | SrcImm | ModRM | Lock,
306 ByteOp | DstMem | SrcImm | ModRM | Lock,
307 ByteOp | DstMem | SrcImm | ModRM | Lock,
308 ByteOp | DstMem | SrcImm | ModRM | Lock,
309 ByteOp | DstMem | SrcImm | ModRM,
311 DstMem | SrcImm | ModRM | Lock,
312 DstMem | SrcImm | ModRM | Lock,
313 DstMem | SrcImm | ModRM | Lock,
314 DstMem | SrcImm | ModRM | Lock,
315 DstMem | SrcImm | ModRM | Lock,
316 DstMem | SrcImm | ModRM | Lock,
317 DstMem | SrcImm | ModRM | Lock,
318 DstMem | SrcImm | ModRM,
320 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
321 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
322 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
323 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
324 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
325 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
326 ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
327 ByteOp | DstMem | SrcImm | ModRM | No64,
329 DstMem | SrcImmByte | ModRM | Lock,
330 DstMem | SrcImmByte | ModRM | Lock,
331 DstMem | SrcImmByte | ModRM | Lock,
332 DstMem | SrcImmByte | ModRM | Lock,
333 DstMem | SrcImmByte | ModRM | Lock,
334 DstMem | SrcImmByte | ModRM | Lock,
335 DstMem | SrcImmByte | ModRM | Lock,
336 DstMem | SrcImmByte | ModRM,
338 DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
340 ByteOp | SrcImm | DstMem | ModRM, 0,
341 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
344 DstMem | SrcImm | ModRM, 0,
345 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
348 ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
351 DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
352 SrcMem | ModRM | Stack, 0,
353 SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps,
354 SrcMem | ModRM | Stack, 0,
356 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
357 SrcNone | ModRM | DstMem | Mov, 0,
358 SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
361 DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
362 DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
364 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0,
367 static u32 group2_table[] = {
369 SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv,
370 SrcNone | ModRM | DstMem | Mov, 0,
371 SrcMem16 | ModRM | Mov | Priv, 0,
373 0, 0, 0, 0, 0, 0, 0, 0,
376 /* EFLAGS bit definitions. */
377 #define EFLG_ID (1<<21)
378 #define EFLG_VIP (1<<20)
379 #define EFLG_VIF (1<<19)
380 #define EFLG_AC (1<<18)
381 #define EFLG_VM (1<<17)
382 #define EFLG_RF (1<<16)
383 #define EFLG_IOPL (3<<12)
384 #define EFLG_NT (1<<14)
385 #define EFLG_OF (1<<11)
386 #define EFLG_DF (1<<10)
387 #define EFLG_IF (1<<9)
388 #define EFLG_TF (1<<8)
389 #define EFLG_SF (1<<7)
390 #define EFLG_ZF (1<<6)
391 #define EFLG_AF (1<<4)
392 #define EFLG_PF (1<<2)
393 #define EFLG_CF (1<<0)
396 * Instruction emulation:
397 * Most instructions are emulated directly via a fragment of inline assembly
398 * code. This allows us to save/restore EFLAGS and thus very easily pick up
399 * any modified flags.
402 #if defined(CONFIG_X86_64)
403 #define _LO32 "k" /* force 32-bit operand */
404 #define _STK "%%rsp" /* stack pointer */
405 #elif defined(__i386__)
406 #define _LO32 "" /* force 32-bit operand */
407 #define _STK "%%esp" /* stack pointer */
411 * These EFLAGS bits are restored from saved value during emulation, and
412 * any changes are written back to the saved value after emulation.
414 #define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
416 /* Before executing instruction: restore necessary bits in EFLAGS. */
417 #define _PRE_EFLAGS(_sav, _msk, _tmp) \
418 /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
419 "movl %"_sav",%"_LO32 _tmp"; " \
422 "movl %"_msk",%"_LO32 _tmp"; " \
423 "andl %"_LO32 _tmp",("_STK"); " \
425 "notl %"_LO32 _tmp"; " \
426 "andl %"_LO32 _tmp",("_STK"); " \
427 "andl %"_LO32 _tmp","__stringify(BITS_PER_LONG/4)"("_STK"); " \
429 "orl %"_LO32 _tmp",("_STK"); " \
433 /* After executing instruction: write-back necessary bits in EFLAGS. */
434 #define _POST_EFLAGS(_sav, _msk, _tmp) \
435 /* _sav |= EFLAGS & _msk; */ \
438 "andl %"_msk",%"_LO32 _tmp"; " \
439 "orl %"_LO32 _tmp",%"_sav"; "
447 #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \
449 __asm__ __volatile__ ( \
450 _PRE_EFLAGS("0", "4", "2") \
451 _op _suffix " %"_x"3,%1; " \
452 _POST_EFLAGS("0", "4", "2") \
453 : "=m" (_eflags), "=m" ((_dst).val), \
455 : _y ((_src).val), "i" (EFLAGS_MASK)); \
459 /* Raw emulation: instruction has two explicit operands. */
460 #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \
462 unsigned long _tmp; \
464 switch ((_dst).bytes) { \
466 ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \
469 ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \
472 ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \
477 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
479 unsigned long _tmp; \
480 switch ((_dst).bytes) { \
482 ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \
485 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
486 _wx, _wy, _lx, _ly, _qx, _qy); \
491 /* Source operand is byte-sized and may be restricted to just %cl. */
492 #define emulate_2op_SrcB(_op, _src, _dst, _eflags) \
493 __emulate_2op(_op, _src, _dst, _eflags, \
494 "b", "c", "b", "c", "b", "c", "b", "c")
496 /* Source operand is byte, word, long or quad sized. */
497 #define emulate_2op_SrcV(_op, _src, _dst, _eflags) \
498 __emulate_2op(_op, _src, _dst, _eflags, \
499 "b", "q", "w", "r", _LO32, "r", "", "r")
501 /* Source operand is word, long or quad sized. */
502 #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags) \
503 __emulate_2op_nobyte(_op, _src, _dst, _eflags, \
504 "w", "r", _LO32, "r", "", "r")
506 /* Instruction has three operands and one operand is stored in ECX register */
507 #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \
509 unsigned long _tmp; \
510 _type _clv = (_cl).val; \
511 _type _srcv = (_src).val; \
512 _type _dstv = (_dst).val; \
514 __asm__ __volatile__ ( \
515 _PRE_EFLAGS("0", "5", "2") \
516 _op _suffix " %4,%1 \n" \
517 _POST_EFLAGS("0", "5", "2") \
518 : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \
519 : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \
522 (_cl).val = (unsigned long) _clv; \
523 (_src).val = (unsigned long) _srcv; \
524 (_dst).val = (unsigned long) _dstv; \
527 #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \
529 switch ((_dst).bytes) { \
531 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
532 "w", unsigned short); \
535 __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
536 "l", unsigned int); \
539 ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \
540 "q", unsigned long)); \
545 #define __emulate_1op(_op, _dst, _eflags, _suffix) \
547 unsigned long _tmp; \
549 __asm__ __volatile__ ( \
550 _PRE_EFLAGS("0", "3", "2") \
551 _op _suffix " %1; " \
552 _POST_EFLAGS("0", "3", "2") \
553 : "=m" (_eflags), "+m" ((_dst).val), \
555 : "i" (EFLAGS_MASK)); \
558 /* Instruction has only one explicit operand (no source operand). */
559 #define emulate_1op(_op, _dst, _eflags) \
561 switch ((_dst).bytes) { \
562 case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \
563 case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \
564 case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \
565 case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \
569 /* Fetch next part of the instruction being emulated. */
570 #define insn_fetch(_type, _size, _eip) \
571 ({ unsigned long _x; \
572 rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \
573 if (rc != X86EMUL_CONTINUE) \
579 static inline unsigned long ad_mask(struct decode_cache *c)
581 return (1UL << (c->ad_bytes << 3)) - 1;
584 /* Access/update address held in a register, based on addressing mode. */
585 static inline unsigned long
586 address_mask(struct decode_cache *c, unsigned long reg)
588 if (c->ad_bytes == sizeof(unsigned long))
591 return reg & ad_mask(c);
594 static inline unsigned long
595 register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
597 return base + address_mask(c, reg);
601 register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
603 if (c->ad_bytes == sizeof(unsigned long))
606 *reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
609 static inline void jmp_rel(struct decode_cache *c, int rel)
611 register_address_increment(c, &c->eip, rel);
614 static void set_seg_override(struct decode_cache *c, int seg)
616 c->has_seg_override = true;
617 c->seg_override = seg;
620 static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg)
622 if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS)
625 return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg);
628 static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt,
629 struct decode_cache *c)
631 if (!c->has_seg_override)
634 return seg_base(ctxt, c->seg_override);
637 static unsigned long es_base(struct x86_emulate_ctxt *ctxt)
639 return seg_base(ctxt, VCPU_SREG_ES);
642 static unsigned long ss_base(struct x86_emulate_ctxt *ctxt)
644 return seg_base(ctxt, VCPU_SREG_SS);
647 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
648 struct x86_emulate_ops *ops,
649 unsigned long eip, u8 *dest)
651 struct fetch_cache *fc = &ctxt->decode.fetch;
655 if (eip == fc->end) {
656 cur_size = fc->end - fc->start;
657 size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip));
658 rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size,
659 size, ctxt->vcpu, NULL);
660 if (rc != X86EMUL_CONTINUE)
664 *dest = fc->data[eip - fc->start];
665 return X86EMUL_CONTINUE;
668 static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
669 struct x86_emulate_ops *ops,
670 unsigned long eip, void *dest, unsigned size)
674 /* x86 instructions are limited to 15 bytes. */
675 if (eip + size - ctxt->eip > 15)
676 return X86EMUL_UNHANDLEABLE;
678 rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
679 if (rc != X86EMUL_CONTINUE)
682 return X86EMUL_CONTINUE;
686 * Given the 'reg' portion of a ModRM byte, and a register block, return a
687 * pointer into the block that addresses the relevant register.
688 * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
690 static void *decode_register(u8 modrm_reg, unsigned long *regs,
695 p = ®s[modrm_reg];
696 if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
697 p = (unsigned char *)®s[modrm_reg & 3] + 1;
701 static int read_descriptor(struct x86_emulate_ctxt *ctxt,
702 struct x86_emulate_ops *ops,
704 u16 *size, unsigned long *address, int op_bytes)
711 rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
713 if (rc != X86EMUL_CONTINUE)
715 rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
720 static int test_cc(unsigned int condition, unsigned int flags)
724 switch ((condition & 15) >> 1) {
726 rc |= (flags & EFLG_OF);
728 case 1: /* b/c/nae */
729 rc |= (flags & EFLG_CF);
732 rc |= (flags & EFLG_ZF);
735 rc |= (flags & (EFLG_CF|EFLG_ZF));
738 rc |= (flags & EFLG_SF);
741 rc |= (flags & EFLG_PF);
744 rc |= (flags & EFLG_ZF);
747 rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF));
751 /* Odd condition identifiers (lsb == 1) have inverted sense. */
752 return (!!rc ^ (condition & 1));
755 static void decode_register_operand(struct operand *op,
756 struct decode_cache *c,
759 unsigned reg = c->modrm_reg;
760 int highbyte_regs = c->rex_prefix == 0;
763 reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
765 if ((c->d & ByteOp) && !inhibit_bytereg) {
766 op->ptr = decode_register(reg, c->regs, highbyte_regs);
767 op->val = *(u8 *)op->ptr;
770 op->ptr = decode_register(reg, c->regs, 0);
771 op->bytes = c->op_bytes;
774 op->val = *(u16 *)op->ptr;
777 op->val = *(u32 *)op->ptr;
780 op->val = *(u64 *) op->ptr;
784 op->orig_val = op->val;
787 static int decode_modrm(struct x86_emulate_ctxt *ctxt,
788 struct x86_emulate_ops *ops)
790 struct decode_cache *c = &ctxt->decode;
792 int index_reg = 0, base_reg = 0, scale;
793 int rc = X86EMUL_CONTINUE;
796 c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */
797 index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
798 c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
801 c->modrm = insn_fetch(u8, 1, c->eip);
802 c->modrm_mod |= (c->modrm & 0xc0) >> 6;
803 c->modrm_reg |= (c->modrm & 0x38) >> 3;
804 c->modrm_rm |= (c->modrm & 0x07);
808 if (c->modrm_mod == 3) {
809 c->modrm_ptr = decode_register(c->modrm_rm,
810 c->regs, c->d & ByteOp);
811 c->modrm_val = *(unsigned long *)c->modrm_ptr;
815 if (c->ad_bytes == 2) {
816 unsigned bx = c->regs[VCPU_REGS_RBX];
817 unsigned bp = c->regs[VCPU_REGS_RBP];
818 unsigned si = c->regs[VCPU_REGS_RSI];
819 unsigned di = c->regs[VCPU_REGS_RDI];
821 /* 16-bit ModR/M decode. */
822 switch (c->modrm_mod) {
824 if (c->modrm_rm == 6)
825 c->modrm_ea += insn_fetch(u16, 2, c->eip);
828 c->modrm_ea += insn_fetch(s8, 1, c->eip);
831 c->modrm_ea += insn_fetch(u16, 2, c->eip);
834 switch (c->modrm_rm) {
836 c->modrm_ea += bx + si;
839 c->modrm_ea += bx + di;
842 c->modrm_ea += bp + si;
845 c->modrm_ea += bp + di;
854 if (c->modrm_mod != 0)
861 if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
862 (c->modrm_rm == 6 && c->modrm_mod != 0))
863 if (!c->has_seg_override)
864 set_seg_override(c, VCPU_SREG_SS);
865 c->modrm_ea = (u16)c->modrm_ea;
867 /* 32/64-bit ModR/M decode. */
868 if ((c->modrm_rm & 7) == 4) {
869 sib = insn_fetch(u8, 1, c->eip);
870 index_reg |= (sib >> 3) & 7;
874 if ((base_reg & 7) == 5 && c->modrm_mod == 0)
875 c->modrm_ea += insn_fetch(s32, 4, c->eip);
877 c->modrm_ea += c->regs[base_reg];
879 c->modrm_ea += c->regs[index_reg] << scale;
880 } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) {
881 if (ctxt->mode == X86EMUL_MODE_PROT64)
884 c->modrm_ea += c->regs[c->modrm_rm];
885 switch (c->modrm_mod) {
887 if (c->modrm_rm == 5)
888 c->modrm_ea += insn_fetch(s32, 4, c->eip);
891 c->modrm_ea += insn_fetch(s8, 1, c->eip);
894 c->modrm_ea += insn_fetch(s32, 4, c->eip);
902 static int decode_abs(struct x86_emulate_ctxt *ctxt,
903 struct x86_emulate_ops *ops)
905 struct decode_cache *c = &ctxt->decode;
906 int rc = X86EMUL_CONTINUE;
908 switch (c->ad_bytes) {
910 c->modrm_ea = insn_fetch(u16, 2, c->eip);
913 c->modrm_ea = insn_fetch(u32, 4, c->eip);
916 c->modrm_ea = insn_fetch(u64, 8, c->eip);
924 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
926 struct decode_cache *c = &ctxt->decode;
927 int rc = X86EMUL_CONTINUE;
928 int mode = ctxt->mode;
929 int def_op_bytes, def_ad_bytes, group;
932 /* we cannot decode insn before we complete previous rep insn */
933 WARN_ON(ctxt->restart);
935 /* Shadow copy of register state. Committed on successful emulation. */
936 memset(c, 0, sizeof(struct decode_cache));
938 c->fetch.start = c->fetch.end = c->eip;
939 ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
940 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
943 case X86EMUL_MODE_REAL:
944 case X86EMUL_MODE_VM86:
945 case X86EMUL_MODE_PROT16:
946 def_op_bytes = def_ad_bytes = 2;
948 case X86EMUL_MODE_PROT32:
949 def_op_bytes = def_ad_bytes = 4;
952 case X86EMUL_MODE_PROT64:
961 c->op_bytes = def_op_bytes;
962 c->ad_bytes = def_ad_bytes;
964 /* Legacy prefixes. */
966 switch (c->b = insn_fetch(u8, 1, c->eip)) {
967 case 0x66: /* operand-size override */
968 /* switch between 2/4 bytes */
969 c->op_bytes = def_op_bytes ^ 6;
971 case 0x67: /* address-size override */
972 if (mode == X86EMUL_MODE_PROT64)
973 /* switch between 4/8 bytes */
974 c->ad_bytes = def_ad_bytes ^ 12;
976 /* switch between 2/4 bytes */
977 c->ad_bytes = def_ad_bytes ^ 6;
979 case 0x26: /* ES override */
980 case 0x2e: /* CS override */
981 case 0x36: /* SS override */
982 case 0x3e: /* DS override */
983 set_seg_override(c, (c->b >> 3) & 3);
985 case 0x64: /* FS override */
986 case 0x65: /* GS override */
987 set_seg_override(c, c->b & 7);
989 case 0x40 ... 0x4f: /* REX */
990 if (mode != X86EMUL_MODE_PROT64)
992 c->rex_prefix = c->b;
994 case 0xf0: /* LOCK */
997 case 0xf2: /* REPNE/REPNZ */
998 c->rep_prefix = REPNE_PREFIX;
1000 case 0xf3: /* REP/REPE/REPZ */
1001 c->rep_prefix = REPE_PREFIX;
1007 /* Any legacy prefix after a REX prefix nullifies its effect. */
1016 if (c->rex_prefix & 8)
1017 c->op_bytes = 8; /* REX.W */
1019 /* Opcode byte(s). */
1020 c->d = opcode_table[c->b];
1022 /* Two-byte opcode? */
1025 c->b = insn_fetch(u8, 1, c->eip);
1026 c->d = twobyte_table[c->b];
1031 group = c->d & GroupMask;
1032 c->modrm = insn_fetch(u8, 1, c->eip);
1035 group = (group << 3) + ((c->modrm >> 3) & 7);
1036 if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
1037 c->d = group2_table[group];
1039 c->d = group_table[group];
1044 DPRINTF("Cannot emulate %02x\n", c->b);
1048 if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
1051 /* ModRM and SIB bytes. */
1053 rc = decode_modrm(ctxt, ops);
1054 else if (c->d & MemAbs)
1055 rc = decode_abs(ctxt, ops);
1056 if (rc != X86EMUL_CONTINUE)
1059 if (!c->has_seg_override)
1060 set_seg_override(c, VCPU_SREG_DS);
1062 if (!(!c->twobyte && c->b == 0x8d))
1063 c->modrm_ea += seg_override_base(ctxt, c);
1065 if (c->ad_bytes != 8)
1066 c->modrm_ea = (u32)c->modrm_ea;
1068 if (c->rip_relative)
1069 c->modrm_ea += c->eip;
1072 * Decode and fetch the source operand: register, memory
1075 switch (c->d & SrcMask) {
1079 decode_register_operand(&c->src, c, 0);
1088 c->src.bytes = (c->d & ByteOp) ? 1 :
1090 /* Don't fetch the address for invlpg: it could be unmapped. */
1091 if (c->twobyte && c->b == 0x01 && c->modrm_reg == 7)
1095 * For instructions with a ModR/M byte, switch to register
1096 * access if Mod = 3.
1098 if ((c->d & ModRM) && c->modrm_mod == 3) {
1099 c->src.type = OP_REG;
1100 c->src.val = c->modrm_val;
1101 c->src.ptr = c->modrm_ptr;
1104 c->src.type = OP_MEM;
1105 c->src.ptr = (unsigned long *)c->modrm_ea;
1110 c->src.type = OP_IMM;
1111 c->src.ptr = (unsigned long *)c->eip;
1112 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1113 if (c->src.bytes == 8)
1115 /* NB. Immediates are sign-extended as necessary. */
1116 switch (c->src.bytes) {
1118 c->src.val = insn_fetch(s8, 1, c->eip);
1121 c->src.val = insn_fetch(s16, 2, c->eip);
1124 c->src.val = insn_fetch(s32, 4, c->eip);
1127 if ((c->d & SrcMask) == SrcImmU) {
1128 switch (c->src.bytes) {
1133 c->src.val &= 0xffff;
1136 c->src.val &= 0xffffffff;
1143 c->src.type = OP_IMM;
1144 c->src.ptr = (unsigned long *)c->eip;
1146 if ((c->d & SrcMask) == SrcImmByte)
1147 c->src.val = insn_fetch(s8, 1, c->eip);
1149 c->src.val = insn_fetch(u8, 1, c->eip);
1156 c->src.type = OP_MEM;
1157 c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1158 c->src.ptr = (unsigned long *)
1159 register_address(c, seg_override_base(ctxt, c),
1160 c->regs[VCPU_REGS_RSI]);
1166 * Decode and fetch the second source operand: register, memory
1169 switch (c->d & Src2Mask) {
1174 c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8;
1177 c->src2.type = OP_IMM;
1178 c->src2.ptr = (unsigned long *)c->eip;
1180 c->src2.val = insn_fetch(u8, 1, c->eip);
1183 c->src2.type = OP_IMM;
1184 c->src2.ptr = (unsigned long *)c->eip;
1186 c->src2.val = insn_fetch(u16, 2, c->eip);
1193 c->src2.type = OP_MEM;
1195 c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes);
1200 /* Decode and fetch the destination operand: register or memory. */
1201 switch (c->d & DstMask) {
1203 /* Special instructions do their own operand decoding. */
1206 decode_register_operand(&c->dst, c,
1207 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
1211 if ((c->d & ModRM) && c->modrm_mod == 3) {
1212 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1213 c->dst.type = OP_REG;
1214 c->dst.val = c->dst.orig_val = c->modrm_val;
1215 c->dst.ptr = c->modrm_ptr;
1218 c->dst.type = OP_MEM;
1219 c->dst.ptr = (unsigned long *)c->modrm_ea;
1220 if ((c->d & DstMask) == DstMem64)
1223 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1226 unsigned long mask = ~(c->dst.bytes * 8 - 1);
1228 c->dst.ptr = (void *)c->dst.ptr +
1229 (c->src.val & mask) / 8;
1233 c->dst.type = OP_REG;
1234 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1235 c->dst.ptr = &c->regs[VCPU_REGS_RAX];
1236 switch (c->dst.bytes) {
1238 c->dst.val = *(u8 *)c->dst.ptr;
1241 c->dst.val = *(u16 *)c->dst.ptr;
1244 c->dst.val = *(u32 *)c->dst.ptr;
1247 c->dst.val = *(u64 *)c->dst.ptr;
1250 c->dst.orig_val = c->dst.val;
1253 c->dst.type = OP_MEM;
1254 c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
1255 c->dst.ptr = (unsigned long *)
1256 register_address(c, es_base(ctxt),
1257 c->regs[VCPU_REGS_RDI]);
1263 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
1266 static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
1267 struct x86_emulate_ops *ops,
1268 unsigned int size, unsigned short port,
1271 struct read_cache *rc = &ctxt->decode.io_read;
1273 if (rc->pos == rc->end) { /* refill pio read ahead */
1274 struct decode_cache *c = &ctxt->decode;
1275 unsigned int in_page, n;
1276 unsigned int count = c->rep_prefix ?
1277 address_mask(c, c->regs[VCPU_REGS_RCX]) : 1;
1278 in_page = (ctxt->eflags & EFLG_DF) ?
1279 offset_in_page(c->regs[VCPU_REGS_RDI]) :
1280 PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]);
1281 n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
1285 rc->pos = rc->end = 0;
1286 if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu))
1291 memcpy(dest, rc->data + rc->pos, size);
1296 static u32 desc_limit_scaled(struct desc_struct *desc)
1298 u32 limit = get_desc_limit(desc);
1300 return desc->g ? (limit << 12) | 0xfff : limit;
1303 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
1304 struct x86_emulate_ops *ops,
1305 u16 selector, struct desc_ptr *dt)
1307 if (selector & 1 << 2) {
1308 struct desc_struct desc;
1309 memset (dt, 0, sizeof *dt);
1310 if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu))
1313 dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */
1314 dt->address = get_desc_base(&desc);
1316 ops->get_gdt(dt, ctxt->vcpu);
1319 /* allowed just for 8 bytes segments */
1320 static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1321 struct x86_emulate_ops *ops,
1322 u16 selector, struct desc_struct *desc)
1325 u16 index = selector >> 3;
1330 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1332 if (dt.size < index * 8 + 7) {
1333 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1334 return X86EMUL_PROPAGATE_FAULT;
1336 addr = dt.address + index * 8;
1337 ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1338 if (ret == X86EMUL_PROPAGATE_FAULT)
1339 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1344 /* allowed just for 8 bytes segments */
1345 static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1346 struct x86_emulate_ops *ops,
1347 u16 selector, struct desc_struct *desc)
1350 u16 index = selector >> 3;
1355 get_descriptor_table_ptr(ctxt, ops, selector, &dt);
1357 if (dt.size < index * 8 + 7) {
1358 kvm_inject_gp(ctxt->vcpu, selector & 0xfffc);
1359 return X86EMUL_PROPAGATE_FAULT;
1362 addr = dt.address + index * 8;
1363 ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err);
1364 if (ret == X86EMUL_PROPAGATE_FAULT)
1365 kvm_inject_page_fault(ctxt->vcpu, addr, err);
1370 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1371 struct x86_emulate_ops *ops,
1372 u16 selector, int seg)
1374 struct desc_struct seg_desc;
1376 unsigned err_vec = GP_VECTOR;
1378 bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
1381 memset(&seg_desc, 0, sizeof seg_desc);
1383 if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
1384 || ctxt->mode == X86EMUL_MODE_REAL) {
1385 /* set real mode segment descriptor */
1386 set_desc_base(&seg_desc, selector << 4);
1387 set_desc_limit(&seg_desc, 0xffff);
1394 /* NULL selector is not valid for TR, CS and SS */
1395 if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR)
1399 /* TR should be in GDT only */
1400 if (seg == VCPU_SREG_TR && (selector & (1 << 2)))
1403 if (null_selector) /* for NULL selector skip all following checks */
1406 ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc);
1407 if (ret != X86EMUL_CONTINUE)
1410 err_code = selector & 0xfffc;
1411 err_vec = GP_VECTOR;
1413 /* can't load system descriptor into segment selecor */
1414 if (seg <= VCPU_SREG_GS && !seg_desc.s)
1418 err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR;
1424 cpl = ops->cpl(ctxt->vcpu);
1429 * segment is not a writable data segment or segment
1430 * selector's RPL != CPL or segment selector's RPL != CPL
1432 if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl)
1436 if (!(seg_desc.type & 8))
1439 if (seg_desc.type & 4) {
1445 if (rpl > cpl || dpl != cpl)
1448 /* CS(RPL) <- CPL */
1449 selector = (selector & 0xfffc) | cpl;
1452 if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
1455 case VCPU_SREG_LDTR:
1456 if (seg_desc.s || seg_desc.type != 2)
1459 default: /* DS, ES, FS, or GS */
1461 * segment is not a data or readable code segment or
1462 * ((segment is a data or nonconforming code segment)
1463 * and (both RPL and CPL > DPL))
1465 if ((seg_desc.type & 0xa) == 0x8 ||
1466 (((seg_desc.type & 0xc) != 0xc) &&
1467 (rpl > dpl && cpl > dpl)))
1473 /* mark segment as accessed */
1475 ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc);
1476 if (ret != X86EMUL_CONTINUE)
1480 ops->set_segment_selector(selector, seg, ctxt->vcpu);
1481 ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu);
1482 return X86EMUL_CONTINUE;
1484 kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code);
1485 return X86EMUL_PROPAGATE_FAULT;
1488 static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
1490 struct decode_cache *c = &ctxt->decode;
1492 c->dst.type = OP_MEM;
1493 c->dst.bytes = c->op_bytes;
1494 c->dst.val = c->src.val;
1495 register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
1496 c->dst.ptr = (void *) register_address(c, ss_base(ctxt),
1497 c->regs[VCPU_REGS_RSP]);
1500 static int emulate_pop(struct x86_emulate_ctxt *ctxt,
1501 struct x86_emulate_ops *ops,
1502 void *dest, int len)
1504 struct decode_cache *c = &ctxt->decode;
1507 rc = ops->read_emulated(register_address(c, ss_base(ctxt),
1508 c->regs[VCPU_REGS_RSP]),
1509 dest, len, ctxt->vcpu);
1510 if (rc != X86EMUL_CONTINUE)
1513 register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
1517 static int emulate_popf(struct x86_emulate_ctxt *ctxt,
1518 struct x86_emulate_ops *ops,
1519 void *dest, int len)
1522 unsigned long val, change_mask;
1523 int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
1524 int cpl = ops->cpl(ctxt->vcpu);
1526 rc = emulate_pop(ctxt, ops, &val, len);
1527 if (rc != X86EMUL_CONTINUE)
1530 change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
1531 | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
1533 switch(ctxt->mode) {
1534 case X86EMUL_MODE_PROT64:
1535 case X86EMUL_MODE_PROT32:
1536 case X86EMUL_MODE_PROT16:
1538 change_mask |= EFLG_IOPL;
1540 change_mask |= EFLG_IF;
1542 case X86EMUL_MODE_VM86:
1544 kvm_inject_gp(ctxt->vcpu, 0);
1545 return X86EMUL_PROPAGATE_FAULT;
1547 change_mask |= EFLG_IF;
1549 default: /* real mode */
1550 change_mask |= (EFLG_IOPL | EFLG_IF);
1554 *(unsigned long *)dest =
1555 (ctxt->eflags & ~change_mask) | (val & change_mask);
1560 static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
1562 struct decode_cache *c = &ctxt->decode;
1563 struct kvm_segment segment;
1565 kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
1567 c->src.val = segment.selector;
1571 static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
1572 struct x86_emulate_ops *ops, int seg)
1574 struct decode_cache *c = &ctxt->decode;
1575 unsigned long selector;
1578 rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
1579 if (rc != X86EMUL_CONTINUE)
1582 rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg);
1586 static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
1588 struct decode_cache *c = &ctxt->decode;
1589 unsigned long old_esp = c->regs[VCPU_REGS_RSP];
1590 int reg = VCPU_REGS_RAX;
1592 while (reg <= VCPU_REGS_RDI) {
1593 (reg == VCPU_REGS_RSP) ?
1594 (c->src.val = old_esp) : (c->src.val = c->regs[reg]);
1601 static int emulate_popa(struct x86_emulate_ctxt *ctxt,
1602 struct x86_emulate_ops *ops)
1604 struct decode_cache *c = &ctxt->decode;
1605 int rc = X86EMUL_CONTINUE;
1606 int reg = VCPU_REGS_RDI;
1608 while (reg >= VCPU_REGS_RAX) {
1609 if (reg == VCPU_REGS_RSP) {
1610 register_address_increment(c, &c->regs[VCPU_REGS_RSP],
1615 rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
1616 if (rc != X86EMUL_CONTINUE)
1623 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
1624 struct x86_emulate_ops *ops)
1626 struct decode_cache *c = &ctxt->decode;
1628 return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes);
1631 static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
1633 struct decode_cache *c = &ctxt->decode;
1634 switch (c->modrm_reg) {
1636 emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
1639 emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
1642 emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
1645 emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
1647 case 4: /* sal/shl */
1648 case 6: /* sal/shl */
1649 emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
1652 emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
1655 emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
1660 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
1661 struct x86_emulate_ops *ops)
1663 struct decode_cache *c = &ctxt->decode;
1665 switch (c->modrm_reg) {
1666 case 0 ... 1: /* test */
1667 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
1670 c->dst.val = ~c->dst.val;
1673 emulate_1op("neg", c->dst, ctxt->eflags);
1681 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
1682 struct x86_emulate_ops *ops)
1684 struct decode_cache *c = &ctxt->decode;
1686 switch (c->modrm_reg) {
1688 emulate_1op("inc", c->dst, ctxt->eflags);
1691 emulate_1op("dec", c->dst, ctxt->eflags);
1693 case 2: /* call near abs */ {
1696 c->eip = c->src.val;
1697 c->src.val = old_eip;
1701 case 4: /* jmp abs */
1702 c->eip = c->src.val;
1708 return X86EMUL_CONTINUE;
1711 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
1712 struct x86_emulate_ops *ops)
1714 struct decode_cache *c = &ctxt->decode;
1715 u64 old = c->dst.orig_val;
1717 if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
1718 ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) {
1720 c->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
1721 c->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
1722 ctxt->eflags &= ~EFLG_ZF;
1724 c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) |
1725 (u32) c->regs[VCPU_REGS_RBX];
1727 ctxt->eflags |= EFLG_ZF;
1729 return X86EMUL_CONTINUE;
1732 static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
1733 struct x86_emulate_ops *ops)
1735 struct decode_cache *c = &ctxt->decode;
1739 rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes);
1740 if (rc != X86EMUL_CONTINUE)
1742 if (c->op_bytes == 4)
1743 c->eip = (u32)c->eip;
1744 rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
1745 if (rc != X86EMUL_CONTINUE)
1747 rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS);
1751 static inline int writeback(struct x86_emulate_ctxt *ctxt,
1752 struct x86_emulate_ops *ops)
1755 struct decode_cache *c = &ctxt->decode;
1757 switch (c->dst.type) {
1759 /* The 4-byte case *is* correct:
1760 * in 64-bit mode we zero-extend.
1762 switch (c->dst.bytes) {
1764 *(u8 *)c->dst.ptr = (u8)c->dst.val;
1767 *(u16 *)c->dst.ptr = (u16)c->dst.val;
1770 *c->dst.ptr = (u32)c->dst.val;
1771 break; /* 64b: zero-ext */
1773 *c->dst.ptr = c->dst.val;
1779 rc = ops->cmpxchg_emulated(
1780 (unsigned long)c->dst.ptr,
1786 rc = ops->write_emulated(
1787 (unsigned long)c->dst.ptr,
1791 if (rc != X86EMUL_CONTINUE)
1800 return X86EMUL_CONTINUE;
1803 static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask)
1805 u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask);
1807 * an sti; sti; sequence only disable interrupts for the first
1808 * instruction. So, if the last instruction, be it emulated or
1809 * not, left the system with the INT_STI flag enabled, it
1810 * means that the last instruction is an sti. We should not
1811 * leave the flag on in this case. The same goes for mov ss
1813 if (!(int_shadow & mask))
1814 ctxt->interruptibility = mask;
1818 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
1819 struct kvm_segment *cs, struct kvm_segment *ss)
1821 memset(cs, 0, sizeof(struct kvm_segment));
1822 kvm_x86_ops->get_segment(ctxt->vcpu, cs, VCPU_SREG_CS);
1823 memset(ss, 0, sizeof(struct kvm_segment));
1825 cs->l = 0; /* will be adjusted later */
1826 cs->base = 0; /* flat segment */
1827 cs->g = 1; /* 4kb granularity */
1828 cs->limit = 0xffffffff; /* 4GB limit */
1829 cs->type = 0x0b; /* Read, Execute, Accessed */
1831 cs->dpl = 0; /* will be adjusted later */
1836 ss->base = 0; /* flat segment */
1837 ss->limit = 0xffffffff; /* 4GB limit */
1838 ss->g = 1; /* 4kb granularity */
1840 ss->type = 0x03; /* Read/Write, Accessed */
1841 ss->db = 1; /* 32bit stack segment */
1847 emulate_syscall(struct x86_emulate_ctxt *ctxt)
1849 struct decode_cache *c = &ctxt->decode;
1850 struct kvm_segment cs, ss;
1853 /* syscall is not available in real mode */
1854 if (ctxt->mode == X86EMUL_MODE_REAL ||
1855 ctxt->mode == X86EMUL_MODE_VM86) {
1856 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1857 return X86EMUL_PROPAGATE_FAULT;
1860 setup_syscalls_segments(ctxt, &cs, &ss);
1862 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1864 cs.selector = (u16)(msr_data & 0xfffc);
1865 ss.selector = (u16)(msr_data + 8);
1867 if (is_long_mode(ctxt->vcpu)) {
1871 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1872 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1874 c->regs[VCPU_REGS_RCX] = c->eip;
1875 if (is_long_mode(ctxt->vcpu)) {
1876 #ifdef CONFIG_X86_64
1877 c->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
1879 kvm_x86_ops->get_msr(ctxt->vcpu,
1880 ctxt->mode == X86EMUL_MODE_PROT64 ?
1881 MSR_LSTAR : MSR_CSTAR, &msr_data);
1884 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_SYSCALL_MASK, &msr_data);
1885 ctxt->eflags &= ~(msr_data | EFLG_RF);
1889 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_STAR, &msr_data);
1890 c->eip = (u32)msr_data;
1892 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1895 return X86EMUL_CONTINUE;
1899 emulate_sysenter(struct x86_emulate_ctxt *ctxt)
1901 struct decode_cache *c = &ctxt->decode;
1902 struct kvm_segment cs, ss;
1905 /* inject #GP if in real mode */
1906 if (ctxt->mode == X86EMUL_MODE_REAL) {
1907 kvm_inject_gp(ctxt->vcpu, 0);
1908 return X86EMUL_PROPAGATE_FAULT;
1911 /* XXX sysenter/sysexit have not been tested in 64bit mode.
1912 * Therefore, we inject an #UD.
1914 if (ctxt->mode == X86EMUL_MODE_PROT64) {
1915 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
1916 return X86EMUL_PROPAGATE_FAULT;
1919 setup_syscalls_segments(ctxt, &cs, &ss);
1921 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1922 switch (ctxt->mode) {
1923 case X86EMUL_MODE_PROT32:
1924 if ((msr_data & 0xfffc) == 0x0) {
1925 kvm_inject_gp(ctxt->vcpu, 0);
1926 return X86EMUL_PROPAGATE_FAULT;
1929 case X86EMUL_MODE_PROT64:
1930 if (msr_data == 0x0) {
1931 kvm_inject_gp(ctxt->vcpu, 0);
1932 return X86EMUL_PROPAGATE_FAULT;
1937 ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
1938 cs.selector = (u16)msr_data;
1939 cs.selector &= ~SELECTOR_RPL_MASK;
1940 ss.selector = cs.selector + 8;
1941 ss.selector &= ~SELECTOR_RPL_MASK;
1942 if (ctxt->mode == X86EMUL_MODE_PROT64
1943 || is_long_mode(ctxt->vcpu)) {
1948 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
1949 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
1951 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_EIP, &msr_data);
1954 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
1955 c->regs[VCPU_REGS_RSP] = msr_data;
1957 return X86EMUL_CONTINUE;
1961 emulate_sysexit(struct x86_emulate_ctxt *ctxt)
1963 struct decode_cache *c = &ctxt->decode;
1964 struct kvm_segment cs, ss;
1968 /* inject #GP if in real mode or Virtual 8086 mode */
1969 if (ctxt->mode == X86EMUL_MODE_REAL ||
1970 ctxt->mode == X86EMUL_MODE_VM86) {
1971 kvm_inject_gp(ctxt->vcpu, 0);
1972 return X86EMUL_PROPAGATE_FAULT;
1975 setup_syscalls_segments(ctxt, &cs, &ss);
1977 if ((c->rex_prefix & 0x8) != 0x0)
1978 usermode = X86EMUL_MODE_PROT64;
1980 usermode = X86EMUL_MODE_PROT32;
1984 kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_CS, &msr_data);
1986 case X86EMUL_MODE_PROT32:
1987 cs.selector = (u16)(msr_data + 16);
1988 if ((msr_data & 0xfffc) == 0x0) {
1989 kvm_inject_gp(ctxt->vcpu, 0);
1990 return X86EMUL_PROPAGATE_FAULT;
1992 ss.selector = (u16)(msr_data + 24);
1994 case X86EMUL_MODE_PROT64:
1995 cs.selector = (u16)(msr_data + 32);
1996 if (msr_data == 0x0) {
1997 kvm_inject_gp(ctxt->vcpu, 0);
1998 return X86EMUL_PROPAGATE_FAULT;
2000 ss.selector = cs.selector + 8;
2005 cs.selector |= SELECTOR_RPL_MASK;
2006 ss.selector |= SELECTOR_RPL_MASK;
2008 kvm_x86_ops->set_segment(ctxt->vcpu, &cs, VCPU_SREG_CS);
2009 kvm_x86_ops->set_segment(ctxt->vcpu, &ss, VCPU_SREG_SS);
2011 c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
2012 c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
2014 return X86EMUL_CONTINUE;
2017 static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt,
2018 struct x86_emulate_ops *ops)
2021 if (ctxt->mode == X86EMUL_MODE_REAL)
2023 if (ctxt->mode == X86EMUL_MODE_VM86)
2025 iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
2026 return ops->cpl(ctxt->vcpu) > iopl;
2029 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
2030 struct x86_emulate_ops *ops,
2033 struct kvm_segment tr_seg;
2036 u8 perm, bit_idx = port & 0x7;
2037 unsigned mask = (1 << len) - 1;
2039 kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
2040 if (tr_seg.unusable)
2042 if (tr_seg.limit < 103)
2044 r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
2046 if (r != X86EMUL_CONTINUE)
2048 if (io_bitmap_ptr + port/8 > tr_seg.limit)
2050 r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
2052 if (r != X86EMUL_CONTINUE)
2054 if ((perm >> bit_idx) & mask)
2059 static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
2060 struct x86_emulate_ops *ops,
2063 if (emulator_bad_iopl(ctxt, ops))
2064 if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
2069 static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt,
2070 struct x86_emulate_ops *ops,
2073 struct desc_struct desc;
2074 if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu))
2075 return get_desc_base(&desc);
2080 static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
2081 struct x86_emulate_ops *ops,
2082 struct tss_segment_16 *tss)
2084 struct decode_cache *c = &ctxt->decode;
2087 tss->flag = ctxt->eflags;
2088 tss->ax = c->regs[VCPU_REGS_RAX];
2089 tss->cx = c->regs[VCPU_REGS_RCX];
2090 tss->dx = c->regs[VCPU_REGS_RDX];
2091 tss->bx = c->regs[VCPU_REGS_RBX];
2092 tss->sp = c->regs[VCPU_REGS_RSP];
2093 tss->bp = c->regs[VCPU_REGS_RBP];
2094 tss->si = c->regs[VCPU_REGS_RSI];
2095 tss->di = c->regs[VCPU_REGS_RDI];
2097 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2098 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2099 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2100 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2101 tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2104 static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
2105 struct x86_emulate_ops *ops,
2106 struct tss_segment_16 *tss)
2108 struct decode_cache *c = &ctxt->decode;
2112 ctxt->eflags = tss->flag | 2;
2113 c->regs[VCPU_REGS_RAX] = tss->ax;
2114 c->regs[VCPU_REGS_RCX] = tss->cx;
2115 c->regs[VCPU_REGS_RDX] = tss->dx;
2116 c->regs[VCPU_REGS_RBX] = tss->bx;
2117 c->regs[VCPU_REGS_RSP] = tss->sp;
2118 c->regs[VCPU_REGS_RBP] = tss->bp;
2119 c->regs[VCPU_REGS_RSI] = tss->si;
2120 c->regs[VCPU_REGS_RDI] = tss->di;
2123 * SDM says that segment selectors are loaded before segment
2126 ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu);
2127 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2128 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2129 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2130 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2133 * Now load segment descriptors. If fault happenes at this stage
2134 * it is handled in a context of new task
2136 ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR);
2137 if (ret != X86EMUL_CONTINUE)
2139 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2140 if (ret != X86EMUL_CONTINUE)
2142 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2143 if (ret != X86EMUL_CONTINUE)
2145 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2146 if (ret != X86EMUL_CONTINUE)
2148 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2149 if (ret != X86EMUL_CONTINUE)
2152 return X86EMUL_CONTINUE;
2155 static int task_switch_16(struct x86_emulate_ctxt *ctxt,
2156 struct x86_emulate_ops *ops,
2157 u16 tss_selector, u16 old_tss_sel,
2158 ulong old_tss_base, struct desc_struct *new_desc)
2160 struct tss_segment_16 tss_seg;
2162 u32 err, new_tss_base = get_desc_base(new_desc);
2164 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2166 if (ret == X86EMUL_PROPAGATE_FAULT) {
2167 /* FIXME: need to provide precise fault address */
2168 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2172 save_state_to_tss16(ctxt, ops, &tss_seg);
2174 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2176 if (ret == X86EMUL_PROPAGATE_FAULT) {
2177 /* FIXME: need to provide precise fault address */
2178 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2182 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2184 if (ret == X86EMUL_PROPAGATE_FAULT) {
2185 /* FIXME: need to provide precise fault address */
2186 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2190 if (old_tss_sel != 0xffff) {
2191 tss_seg.prev_task_link = old_tss_sel;
2193 ret = ops->write_std(new_tss_base,
2194 &tss_seg.prev_task_link,
2195 sizeof tss_seg.prev_task_link,
2197 if (ret == X86EMUL_PROPAGATE_FAULT) {
2198 /* FIXME: need to provide precise fault address */
2199 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2204 return load_state_from_tss16(ctxt, ops, &tss_seg);
2207 static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
2208 struct x86_emulate_ops *ops,
2209 struct tss_segment_32 *tss)
2211 struct decode_cache *c = &ctxt->decode;
2213 tss->cr3 = ops->get_cr(3, ctxt->vcpu);
2215 tss->eflags = ctxt->eflags;
2216 tss->eax = c->regs[VCPU_REGS_RAX];
2217 tss->ecx = c->regs[VCPU_REGS_RCX];
2218 tss->edx = c->regs[VCPU_REGS_RDX];
2219 tss->ebx = c->regs[VCPU_REGS_RBX];
2220 tss->esp = c->regs[VCPU_REGS_RSP];
2221 tss->ebp = c->regs[VCPU_REGS_RBP];
2222 tss->esi = c->regs[VCPU_REGS_RSI];
2223 tss->edi = c->regs[VCPU_REGS_RDI];
2225 tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu);
2226 tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu);
2227 tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu);
2228 tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu);
2229 tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu);
2230 tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu);
2231 tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu);
2234 static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
2235 struct x86_emulate_ops *ops,
2236 struct tss_segment_32 *tss)
2238 struct decode_cache *c = &ctxt->decode;
2241 ops->set_cr(3, tss->cr3, ctxt->vcpu);
2243 ctxt->eflags = tss->eflags | 2;
2244 c->regs[VCPU_REGS_RAX] = tss->eax;
2245 c->regs[VCPU_REGS_RCX] = tss->ecx;
2246 c->regs[VCPU_REGS_RDX] = tss->edx;
2247 c->regs[VCPU_REGS_RBX] = tss->ebx;
2248 c->regs[VCPU_REGS_RSP] = tss->esp;
2249 c->regs[VCPU_REGS_RBP] = tss->ebp;
2250 c->regs[VCPU_REGS_RSI] = tss->esi;
2251 c->regs[VCPU_REGS_RDI] = tss->edi;
2254 * SDM says that segment selectors are loaded before segment
2257 ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu);
2258 ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu);
2259 ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu);
2260 ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu);
2261 ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu);
2262 ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu);
2263 ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu);
2266 * Now load segment descriptors. If fault happenes at this stage
2267 * it is handled in a context of new task
2269 ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR);
2270 if (ret != X86EMUL_CONTINUE)
2272 ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES);
2273 if (ret != X86EMUL_CONTINUE)
2275 ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS);
2276 if (ret != X86EMUL_CONTINUE)
2278 ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS);
2279 if (ret != X86EMUL_CONTINUE)
2281 ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS);
2282 if (ret != X86EMUL_CONTINUE)
2284 ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS);
2285 if (ret != X86EMUL_CONTINUE)
2287 ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS);
2288 if (ret != X86EMUL_CONTINUE)
2291 return X86EMUL_CONTINUE;
2294 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
2295 struct x86_emulate_ops *ops,
2296 u16 tss_selector, u16 old_tss_sel,
2297 ulong old_tss_base, struct desc_struct *new_desc)
2299 struct tss_segment_32 tss_seg;
2301 u32 err, new_tss_base = get_desc_base(new_desc);
2303 ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2305 if (ret == X86EMUL_PROPAGATE_FAULT) {
2306 /* FIXME: need to provide precise fault address */
2307 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2311 save_state_to_tss32(ctxt, ops, &tss_seg);
2313 ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2315 if (ret == X86EMUL_PROPAGATE_FAULT) {
2316 /* FIXME: need to provide precise fault address */
2317 kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err);
2321 ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu,
2323 if (ret == X86EMUL_PROPAGATE_FAULT) {
2324 /* FIXME: need to provide precise fault address */
2325 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2329 if (old_tss_sel != 0xffff) {
2330 tss_seg.prev_task_link = old_tss_sel;
2332 ret = ops->write_std(new_tss_base,
2333 &tss_seg.prev_task_link,
2334 sizeof tss_seg.prev_task_link,
2336 if (ret == X86EMUL_PROPAGATE_FAULT) {
2337 /* FIXME: need to provide precise fault address */
2338 kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err);
2343 return load_state_from_tss32(ctxt, ops, &tss_seg);
2346 static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
2347 struct x86_emulate_ops *ops,
2348 u16 tss_selector, int reason,
2349 bool has_error_code, u32 error_code)
2351 struct desc_struct curr_tss_desc, next_tss_desc;
2353 u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu);
2354 ulong old_tss_base =
2355 get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR);
2358 /* FIXME: old_tss_base == ~0 ? */
2360 ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc);
2361 if (ret != X86EMUL_CONTINUE)
2363 ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc);
2364 if (ret != X86EMUL_CONTINUE)
2367 /* FIXME: check that next_tss_desc is tss */
2369 if (reason != TASK_SWITCH_IRET) {
2370 if ((tss_selector & 3) > next_tss_desc.dpl ||
2371 ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) {
2372 kvm_inject_gp(ctxt->vcpu, 0);
2373 return X86EMUL_PROPAGATE_FAULT;
2377 desc_limit = desc_limit_scaled(&next_tss_desc);
2378 if (!next_tss_desc.p ||
2379 ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
2380 desc_limit < 0x2b)) {
2381 kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR,
2382 tss_selector & 0xfffc);
2383 return X86EMUL_PROPAGATE_FAULT;
2386 if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
2387 curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */
2388 write_segment_descriptor(ctxt, ops, old_tss_sel,
2392 if (reason == TASK_SWITCH_IRET)
2393 ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
2395 /* set back link to prev task only if NT bit is set in eflags
2396 note that old_tss_sel is not used afetr this point */
2397 if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
2398 old_tss_sel = 0xffff;
2400 if (next_tss_desc.type & 8)
2401 ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel,
2402 old_tss_base, &next_tss_desc);
2404 ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel,
2405 old_tss_base, &next_tss_desc);
2406 if (ret != X86EMUL_CONTINUE)
2409 if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE)
2410 ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT;
2412 if (reason != TASK_SWITCH_IRET) {
2413 next_tss_desc.type |= (1 << 1); /* set busy flag */
2414 write_segment_descriptor(ctxt, ops, tss_selector,
2418 ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu);
2419 ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu);
2420 ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu);
2422 if (has_error_code) {
2423 struct decode_cache *c = &ctxt->decode;
2425 c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2;
2427 c->src.val = (unsigned long) error_code;
2434 int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
2435 struct x86_emulate_ops *ops,
2436 u16 tss_selector, int reason,
2437 bool has_error_code, u32 error_code)
2439 struct decode_cache *c = &ctxt->decode;
2442 memset(c, 0, sizeof(struct decode_cache));
2444 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2445 c->dst.type = OP_NONE;
2447 rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason,
2448 has_error_code, error_code);
2450 if (rc == X86EMUL_CONTINUE) {
2451 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2452 kvm_rip_write(ctxt->vcpu, c->eip);
2453 rc = writeback(ctxt, ops);
2456 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2459 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base,
2460 int reg, struct operand *op)
2462 struct decode_cache *c = &ctxt->decode;
2463 int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
2465 register_address_increment(c, &c->regs[reg], df * op->bytes);
2466 op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]);
2470 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
2473 struct decode_cache *c = &ctxt->decode;
2474 int rc = X86EMUL_CONTINUE;
2475 int saved_dst_type = c->dst.type;
2477 ctxt->interruptibility = 0;
2479 /* Shadow copy of register state. Committed on successful emulation.
2480 * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't
2484 memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
2486 if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
2487 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2491 /* LOCK prefix is allowed only with some instructions */
2492 if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) {
2493 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2497 /* Privileged instruction can be executed only in CPL=0 */
2498 if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) {
2499 kvm_inject_gp(ctxt->vcpu, 0);
2503 if (c->rep_prefix && (c->d & String)) {
2504 ctxt->restart = true;
2505 /* All REP prefixes have the same first termination condition */
2506 if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) {
2508 ctxt->restart = false;
2509 kvm_rip_write(ctxt->vcpu, c->eip);
2512 /* The second termination condition only applies for REPE
2513 * and REPNE. Test if the repeat string operation prefix is
2514 * REPE/REPZ or REPNE/REPNZ and if it's the case it tests the
2515 * corresponding termination condition according to:
2516 * - if REPE/REPZ and ZF = 0 then done
2517 * - if REPNE/REPNZ and ZF = 1 then done
2519 if ((c->b == 0xa6) || (c->b == 0xa7) ||
2520 (c->b == 0xae) || (c->b == 0xaf)) {
2521 if ((c->rep_prefix == REPE_PREFIX) &&
2522 ((ctxt->eflags & EFLG_ZF) == 0))
2524 if ((c->rep_prefix == REPNE_PREFIX) &&
2525 ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))
2531 if (c->src.type == OP_MEM) {
2532 rc = ops->read_emulated((unsigned long)c->src.ptr,
2536 if (rc != X86EMUL_CONTINUE)
2538 c->src.orig_val = c->src.val;
2541 if (c->src2.type == OP_MEM) {
2542 rc = ops->read_emulated((unsigned long)c->src2.ptr,
2546 if (rc != X86EMUL_CONTINUE)
2550 if ((c->d & DstMask) == ImplicitOps)
2554 if ((c->dst.type == OP_MEM) && !(c->d & Mov)) {
2555 /* optimisation - avoid slow emulated read if Mov */
2556 rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val,
2557 c->dst.bytes, ctxt->vcpu);
2558 if (rc != X86EMUL_CONTINUE)
2561 c->dst.orig_val = c->dst.val;
2571 emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
2573 case 0x06: /* push es */
2574 emulate_push_sreg(ctxt, VCPU_SREG_ES);
2576 case 0x07: /* pop es */
2577 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
2578 if (rc != X86EMUL_CONTINUE)
2583 emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
2585 case 0x0e: /* push cs */
2586 emulate_push_sreg(ctxt, VCPU_SREG_CS);
2590 emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
2592 case 0x16: /* push ss */
2593 emulate_push_sreg(ctxt, VCPU_SREG_SS);
2595 case 0x17: /* pop ss */
2596 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
2597 if (rc != X86EMUL_CONTINUE)
2602 emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
2604 case 0x1e: /* push ds */
2605 emulate_push_sreg(ctxt, VCPU_SREG_DS);
2607 case 0x1f: /* pop ds */
2608 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
2609 if (rc != X86EMUL_CONTINUE)
2614 emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
2618 emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
2622 emulate_2op_SrcV("xor", c->src, c->dst, ctxt->eflags);
2626 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
2628 case 0x40 ... 0x47: /* inc r16/r32 */
2629 emulate_1op("inc", c->dst, ctxt->eflags);
2631 case 0x48 ... 0x4f: /* dec r16/r32 */
2632 emulate_1op("dec", c->dst, ctxt->eflags);
2634 case 0x50 ... 0x57: /* push reg */
2637 case 0x58 ... 0x5f: /* pop reg */
2639 rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes);
2640 if (rc != X86EMUL_CONTINUE)
2643 case 0x60: /* pusha */
2644 emulate_pusha(ctxt);
2646 case 0x61: /* popa */
2647 rc = emulate_popa(ctxt, ops);
2648 if (rc != X86EMUL_CONTINUE)
2651 case 0x63: /* movsxd */
2652 if (ctxt->mode != X86EMUL_MODE_PROT64)
2653 goto cannot_emulate;
2654 c->dst.val = (s32) c->src.val;
2656 case 0x68: /* push imm */
2657 case 0x6a: /* push imm8 */
2660 case 0x6c: /* insb */
2661 case 0x6d: /* insw/insd */
2662 c->dst.bytes = min(c->dst.bytes, 4u);
2663 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2665 kvm_inject_gp(ctxt->vcpu, 0);
2668 if (!pio_in_emulated(ctxt, ops, c->dst.bytes,
2669 c->regs[VCPU_REGS_RDX], &c->dst.val))
2670 goto done; /* IO is needed, skip writeback */
2672 case 0x6e: /* outsb */
2673 case 0x6f: /* outsw/outsd */
2674 c->src.bytes = min(c->src.bytes, 4u);
2675 if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
2677 kvm_inject_gp(ctxt->vcpu, 0);
2680 ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX],
2681 &c->src.val, 1, ctxt->vcpu);
2683 c->dst.type = OP_NONE; /* nothing to writeback */
2685 case 0x70 ... 0x7f: /* jcc (short) */
2686 if (test_cc(c->b, ctxt->eflags))
2687 jmp_rel(c, c->src.val);
2689 case 0x80 ... 0x83: /* Grp1 */
2690 switch (c->modrm_reg) {
2710 emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
2712 case 0x86 ... 0x87: /* xchg */
2714 /* Write back the register source. */
2715 switch (c->dst.bytes) {
2717 *(u8 *) c->src.ptr = (u8) c->dst.val;
2720 *(u16 *) c->src.ptr = (u16) c->dst.val;
2723 *c->src.ptr = (u32) c->dst.val;
2724 break; /* 64b reg: zero-extend */
2726 *c->src.ptr = c->dst.val;
2730 * Write back the memory destination with implicit LOCK
2733 c->dst.val = c->src.val;
2736 case 0x88 ... 0x8b: /* mov */
2738 case 0x8c: { /* mov r/m, sreg */
2739 struct kvm_segment segreg;
2741 if (c->modrm_reg <= VCPU_SREG_GS)
2742 kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg);
2744 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2747 c->dst.val = segreg.selector;
2750 case 0x8d: /* lea r16/r32, m */
2751 c->dst.val = c->modrm_ea;
2753 case 0x8e: { /* mov seg, r/m16 */
2758 if (c->modrm_reg == VCPU_SREG_CS ||
2759 c->modrm_reg > VCPU_SREG_GS) {
2760 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
2764 if (c->modrm_reg == VCPU_SREG_SS)
2765 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS);
2767 rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg);
2769 c->dst.type = OP_NONE; /* Disable writeback. */
2772 case 0x8f: /* pop (sole member of Grp1a) */
2773 rc = emulate_grp1a(ctxt, ops);
2774 if (rc != X86EMUL_CONTINUE)
2777 case 0x90: /* nop / xchg r8,rax */
2778 if (!(c->rex_prefix & 1)) { /* nop */
2779 c->dst.type = OP_NONE;
2782 case 0x91 ... 0x97: /* xchg reg,rax */
2783 c->src.type = c->dst.type = OP_REG;
2784 c->src.bytes = c->dst.bytes = c->op_bytes;
2785 c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX];
2786 c->src.val = *(c->src.ptr);
2788 case 0x9c: /* pushf */
2789 c->src.val = (unsigned long) ctxt->eflags;
2792 case 0x9d: /* popf */
2793 c->dst.type = OP_REG;
2794 c->dst.ptr = (unsigned long *) &ctxt->eflags;
2795 c->dst.bytes = c->op_bytes;
2796 rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
2797 if (rc != X86EMUL_CONTINUE)
2800 case 0xa0 ... 0xa1: /* mov */
2801 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
2802 c->dst.val = c->src.val;
2804 case 0xa2 ... 0xa3: /* mov */
2805 c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX];
2807 case 0xa4 ... 0xa5: /* movs */
2809 case 0xa6 ... 0xa7: /* cmps */
2810 c->dst.type = OP_NONE; /* Disable writeback. */
2811 DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
2813 case 0xaa ... 0xab: /* stos */
2814 c->dst.val = c->regs[VCPU_REGS_RAX];
2816 case 0xac ... 0xad: /* lods */
2818 case 0xae ... 0xaf: /* scas */
2819 DPRINTF("Urk! I don't handle SCAS.\n");
2820 goto cannot_emulate;
2821 case 0xb0 ... 0xbf: /* mov r, imm */
2826 case 0xc3: /* ret */
2827 c->dst.type = OP_REG;
2828 c->dst.ptr = &c->eip;
2829 c->dst.bytes = c->op_bytes;
2830 goto pop_instruction;
2831 case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
2833 c->dst.val = c->src.val;
2835 case 0xcb: /* ret far */
2836 rc = emulate_ret_far(ctxt, ops);
2837 if (rc != X86EMUL_CONTINUE)
2840 case 0xd0 ... 0xd1: /* Grp2 */
2844 case 0xd2 ... 0xd3: /* Grp2 */
2845 c->src.val = c->regs[VCPU_REGS_RCX];
2848 case 0xe4: /* inb */
2851 case 0xe6: /* outb */
2852 case 0xe7: /* out */
2854 case 0xe8: /* call (near) */ {
2855 long int rel = c->src.val;
2856 c->src.val = (unsigned long) c->eip;
2861 case 0xe9: /* jmp rel */
2863 case 0xea: /* jmp far */
2865 if (load_segment_descriptor(ctxt, ops, c->src2.val,
2869 c->eip = c->src.val;
2872 jmp: /* jmp rel short */
2873 jmp_rel(c, c->src.val);
2874 c->dst.type = OP_NONE; /* Disable writeback. */
2876 case 0xec: /* in al,dx */
2877 case 0xed: /* in (e/r)ax,dx */
2878 c->src.val = c->regs[VCPU_REGS_RDX];
2880 c->dst.bytes = min(c->dst.bytes, 4u);
2881 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2882 kvm_inject_gp(ctxt->vcpu, 0);
2885 if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val,
2887 goto done; /* IO is needed */
2889 case 0xee: /* out al,dx */
2890 case 0xef: /* out (e/r)ax,dx */
2891 c->src.val = c->regs[VCPU_REGS_RDX];
2893 c->dst.bytes = min(c->dst.bytes, 4u);
2894 if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) {
2895 kvm_inject_gp(ctxt->vcpu, 0);
2898 ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1,
2900 c->dst.type = OP_NONE; /* Disable writeback. */
2902 case 0xf4: /* hlt */
2903 ctxt->vcpu->arch.halt_request = 1;
2905 case 0xf5: /* cmc */
2906 /* complement carry flag from eflags reg */
2907 ctxt->eflags ^= EFLG_CF;
2908 c->dst.type = OP_NONE; /* Disable writeback. */
2910 case 0xf6 ... 0xf7: /* Grp3 */
2911 if (!emulate_grp3(ctxt, ops))
2912 goto cannot_emulate;
2914 case 0xf8: /* clc */
2915 ctxt->eflags &= ~EFLG_CF;
2916 c->dst.type = OP_NONE; /* Disable writeback. */
2918 case 0xfa: /* cli */
2919 if (emulator_bad_iopl(ctxt, ops))
2920 kvm_inject_gp(ctxt->vcpu, 0);
2922 ctxt->eflags &= ~X86_EFLAGS_IF;
2923 c->dst.type = OP_NONE; /* Disable writeback. */
2926 case 0xfb: /* sti */
2927 if (emulator_bad_iopl(ctxt, ops))
2928 kvm_inject_gp(ctxt->vcpu, 0);
2930 toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI);
2931 ctxt->eflags |= X86_EFLAGS_IF;
2932 c->dst.type = OP_NONE; /* Disable writeback. */
2935 case 0xfc: /* cld */
2936 ctxt->eflags &= ~EFLG_DF;
2937 c->dst.type = OP_NONE; /* Disable writeback. */
2939 case 0xfd: /* std */
2940 ctxt->eflags |= EFLG_DF;
2941 c->dst.type = OP_NONE; /* Disable writeback. */
2943 case 0xfe: /* Grp4 */
2945 rc = emulate_grp45(ctxt, ops);
2946 if (rc != X86EMUL_CONTINUE)
2949 case 0xff: /* Grp5 */
2950 if (c->modrm_reg == 5)
2956 rc = writeback(ctxt, ops);
2957 if (rc != X86EMUL_CONTINUE)
2961 * restore dst type in case the decoding will be reused
2962 * (happens for string instruction )
2964 c->dst.type = saved_dst_type;
2966 if ((c->d & SrcMask) == SrcSI)
2967 string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI,
2970 if ((c->d & DstMask) == DstDI)
2971 string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst);
2973 if (c->rep_prefix && (c->d & String)) {
2974 struct read_cache *rc = &ctxt->decode.io_read;
2975 register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1);
2977 * Re-enter guest when pio read ahead buffer is empty or,
2978 * if it is not used, after each 1024 iteration.
2980 if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) ||
2981 (rc->end != 0 && rc->end == rc->pos))
2982 ctxt->restart = false;
2985 /* Commit shadow register state. */
2986 memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs);
2987 kvm_rip_write(ctxt->vcpu, c->eip);
2988 ops->set_rflags(ctxt->vcpu, ctxt->eflags);
2991 return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
2995 case 0x01: /* lgdt, lidt, lmsw */
2996 switch (c->modrm_reg) {
2998 unsigned long address;
3000 case 0: /* vmcall */
3001 if (c->modrm_mod != 3 || c->modrm_rm != 1)
3002 goto cannot_emulate;
3004 rc = kvm_fix_hypercall(ctxt->vcpu);
3005 if (rc != X86EMUL_CONTINUE)
3008 /* Let the processor re-execute the fixed hypercall */
3010 /* Disable writeback. */
3011 c->dst.type = OP_NONE;
3014 rc = read_descriptor(ctxt, ops, c->src.ptr,
3015 &size, &address, c->op_bytes);
3016 if (rc != X86EMUL_CONTINUE)
3018 realmode_lgdt(ctxt->vcpu, size, address);
3019 /* Disable writeback. */
3020 c->dst.type = OP_NONE;
3022 case 3: /* lidt/vmmcall */
3023 if (c->modrm_mod == 3) {
3024 switch (c->modrm_rm) {
3026 rc = kvm_fix_hypercall(ctxt->vcpu);
3027 if (rc != X86EMUL_CONTINUE)
3031 goto cannot_emulate;
3034 rc = read_descriptor(ctxt, ops, c->src.ptr,
3037 if (rc != X86EMUL_CONTINUE)
3039 realmode_lidt(ctxt->vcpu, size, address);
3041 /* Disable writeback. */
3042 c->dst.type = OP_NONE;
3046 c->dst.val = ops->get_cr(0, ctxt->vcpu);
3049 ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) |
3050 (c->src.val & 0x0f), ctxt->vcpu);
3051 c->dst.type = OP_NONE;
3053 case 5: /* not defined */
3054 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3057 emulate_invlpg(ctxt->vcpu, c->modrm_ea);
3058 /* Disable writeback. */
3059 c->dst.type = OP_NONE;
3062 goto cannot_emulate;
3065 case 0x05: /* syscall */
3066 rc = emulate_syscall(ctxt);
3067 if (rc != X86EMUL_CONTINUE)
3073 emulate_clts(ctxt->vcpu);
3074 c->dst.type = OP_NONE;
3076 case 0x08: /* invd */
3077 case 0x09: /* wbinvd */
3078 case 0x0d: /* GrpP (prefetch) */
3079 case 0x18: /* Grp16 (prefetch/nop) */
3080 c->dst.type = OP_NONE;
3082 case 0x20: /* mov cr, reg */
3083 switch (c->modrm_reg) {
3087 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3090 c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu);
3091 c->dst.type = OP_NONE; /* no writeback */
3093 case 0x21: /* mov from dr to reg */
3094 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3095 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3096 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3099 emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
3100 c->dst.type = OP_NONE; /* no writeback */
3102 case 0x22: /* mov reg, cr */
3103 ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu);
3104 c->dst.type = OP_NONE;
3106 case 0x23: /* mov from reg to dr */
3107 if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) &&
3108 (c->modrm_reg == 4 || c->modrm_reg == 5)) {
3109 kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
3112 emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]);
3113 c->dst.type = OP_NONE; /* no writeback */
3117 msr_data = (u32)c->regs[VCPU_REGS_RAX]
3118 | ((u64)c->regs[VCPU_REGS_RDX] << 32);
3119 if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) {
3120 kvm_inject_gp(ctxt->vcpu, 0);
3123 rc = X86EMUL_CONTINUE;
3124 c->dst.type = OP_NONE;
3128 if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) {
3129 kvm_inject_gp(ctxt->vcpu, 0);
3132 c->regs[VCPU_REGS_RAX] = (u32)msr_data;
3133 c->regs[VCPU_REGS_RDX] = msr_data >> 32;
3135 rc = X86EMUL_CONTINUE;
3136 c->dst.type = OP_NONE;
3138 case 0x34: /* sysenter */
3139 rc = emulate_sysenter(ctxt);
3140 if (rc != X86EMUL_CONTINUE)
3145 case 0x35: /* sysexit */
3146 rc = emulate_sysexit(ctxt);
3147 if (rc != X86EMUL_CONTINUE)
3152 case 0x40 ... 0x4f: /* cmov */
3153 c->dst.val = c->dst.orig_val = c->src.val;
3154 if (!test_cc(c->b, ctxt->eflags))
3155 c->dst.type = OP_NONE; /* no writeback */
3157 case 0x80 ... 0x8f: /* jnz rel, etc*/
3158 if (test_cc(c->b, ctxt->eflags))
3159 jmp_rel(c, c->src.val);
3160 c->dst.type = OP_NONE;
3162 case 0xa0: /* push fs */
3163 emulate_push_sreg(ctxt, VCPU_SREG_FS);
3165 case 0xa1: /* pop fs */
3166 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
3167 if (rc != X86EMUL_CONTINUE)
3172 c->dst.type = OP_NONE;
3173 /* only subword offset */
3174 c->src.val &= (c->dst.bytes << 3) - 1;
3175 emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
3177 case 0xa4: /* shld imm8, r, r/m */
3178 case 0xa5: /* shld cl, r, r/m */
3179 emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
3181 case 0xa8: /* push gs */
3182 emulate_push_sreg(ctxt, VCPU_SREG_GS);
3184 case 0xa9: /* pop gs */
3185 rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
3186 if (rc != X86EMUL_CONTINUE)
3191 /* only subword offset */
3192 c->src.val &= (c->dst.bytes << 3) - 1;
3193 emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags);
3195 case 0xac: /* shrd imm8, r, r/m */
3196 case 0xad: /* shrd cl, r, r/m */
3197 emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags);
3199 case 0xae: /* clflush */
3201 case 0xb0 ... 0xb1: /* cmpxchg */
3203 * Save real source value, then compare EAX against
3206 c->src.orig_val = c->src.val;
3207 c->src.val = c->regs[VCPU_REGS_RAX];
3208 emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
3209 if (ctxt->eflags & EFLG_ZF) {
3210 /* Success: write back to memory. */
3211 c->dst.val = c->src.orig_val;
3213 /* Failure: write the value we saw to EAX. */
3214 c->dst.type = OP_REG;
3215 c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
3220 /* only subword offset */
3221 c->src.val &= (c->dst.bytes << 3) - 1;
3222 emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags);
3224 case 0xb6 ... 0xb7: /* movzx */
3225 c->dst.bytes = c->op_bytes;
3226 c->dst.val = (c->d & ByteOp) ? (u8) c->src.val
3229 case 0xba: /* Grp8 */
3230 switch (c->modrm_reg & 3) {
3243 /* only subword offset */
3244 c->src.val &= (c->dst.bytes << 3) - 1;
3245 emulate_2op_SrcV_nobyte("btc", c->src, c->dst, ctxt->eflags);
3247 case 0xbe ... 0xbf: /* movsx */
3248 c->dst.bytes = c->op_bytes;
3249 c->dst.val = (c->d & ByteOp) ? (s8) c->src.val :
3252 case 0xc3: /* movnti */
3253 c->dst.bytes = c->op_bytes;
3254 c->dst.val = (c->op_bytes == 4) ? (u32) c->src.val :
3257 case 0xc7: /* Grp9 (cmpxchg8b) */
3258 rc = emulate_grp9(ctxt, ops);
3259 if (rc != X86EMUL_CONTINUE)
3266 DPRINTF("Cannot emulate %02x\n", c->b);