libnvmm_x86.c revision 1.29 1 /* $NetBSD: libnvmm_x86.c,v 1.29 2019/04/28 14:22:13 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
47 #define __cacheline_aligned __attribute__((__aligned__(64)))
48
49 #include <x86/specialreg.h>
50
51 /* -------------------------------------------------------------------------- */
52
53 static void
54 nvmm_arch_copystate(void *_dst, void *_src, uint64_t flags)
55 {
56 struct nvmm_x64_state *src = _src;
57 struct nvmm_x64_state *dst = _dst;
58
59 if (flags & NVMM_X64_STATE_GPRS) {
60 memcpy(dst->gprs, src->gprs, sizeof(dst->gprs));
61 }
62 if (flags & NVMM_X64_STATE_SEGS) {
63 memcpy(dst->segs, src->segs, sizeof(dst->segs));
64 }
65 if (flags & NVMM_X64_STATE_CRS) {
66 memcpy(dst->crs, src->crs, sizeof(dst->crs));
67 }
68 if (flags & NVMM_X64_STATE_DRS) {
69 memcpy(dst->drs, src->drs, sizeof(dst->drs));
70 }
71 if (flags & NVMM_X64_STATE_MSRS) {
72 memcpy(dst->msrs, src->msrs, sizeof(dst->msrs));
73 }
74 if (flags & NVMM_X64_STATE_INTR) {
75 memcpy(&dst->intr, &src->intr, sizeof(dst->intr));
76 }
77 if (flags & NVMM_X64_STATE_FPU) {
78 memcpy(&dst->fpu, &src->fpu, sizeof(dst->fpu));
79 }
80 }
81
82 /* -------------------------------------------------------------------------- */
83
84 /*
85 * Undocumented debugging function. Helpful.
86 */
87 int
88 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
89 {
90 struct nvmm_x64_state state;
91 uint16_t *attr;
92 size_t i;
93 int ret;
94
95 const char *segnames[] = {
96 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
97 };
98
99 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
100 if (ret == -1)
101 return -1;
102
103 printf("+ VCPU id=%d\n", (int)cpuid);
104 printf("| -> RIP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RIP]);
105 printf("| -> RSP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RSP]);
106 printf("| -> RAX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RAX]);
107 printf("| -> RBX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RBX]);
108 printf("| -> RCX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RCX]);
109 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
110 for (i = 0; i < NVMM_X64_NSEG; i++) {
111 attr = (uint16_t *)&state.segs[i].attrib;
112 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n",
113 segnames[i],
114 state.segs[i].selector,
115 state.segs[i].base,
116 state.segs[i].limit,
117 *attr);
118 }
119 printf("| -> MSR_EFER=%"PRIx64"\n", state.msrs[NVMM_X64_MSR_EFER]);
120 printf("| -> CR0=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR0]);
121 printf("| -> CR3=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR3]);
122 printf("| -> CR4=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR4]);
123 printf("| -> CR8=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR8]);
124
125 return 0;
126 }
127
128 /* -------------------------------------------------------------------------- */
129
130 #define PTE32_L1_SHIFT 12
131 #define PTE32_L2_SHIFT 22
132
133 #define PTE32_L2_MASK 0xffc00000
134 #define PTE32_L1_MASK 0x003ff000
135
136 #define PTE32_L2_FRAME (PTE32_L2_MASK)
137 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
138
139 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
140 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
141
142 #define CR3_FRAME_32BIT PG_FRAME
143
144 typedef uint32_t pte_32bit_t;
145
146 static int
147 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
148 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
149 {
150 gpaddr_t L2gpa, L1gpa;
151 uintptr_t L2hva, L1hva;
152 pte_32bit_t *pdir, pte;
153 nvmm_prot_t pageprot;
154
155 /* We begin with an RWXU access. */
156 *prot = NVMM_PROT_ALL;
157
158 /* Parse L2. */
159 L2gpa = (cr3 & CR3_FRAME_32BIT);
160 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
161 return -1;
162 pdir = (pte_32bit_t *)L2hva;
163 pte = pdir[pte32_l2idx(gva)];
164 if ((pte & PG_V) == 0)
165 return -1;
166 if ((pte & PG_u) == 0)
167 *prot &= ~NVMM_PROT_USER;
168 if ((pte & PG_KW) == 0)
169 *prot &= ~NVMM_PROT_WRITE;
170 if ((pte & PG_PS) && !has_pse)
171 return -1;
172 if (pte & PG_PS) {
173 *gpa = (pte & PTE32_L2_FRAME);
174 *gpa = *gpa + (gva & PTE32_L1_MASK);
175 return 0;
176 }
177
178 /* Parse L1. */
179 L1gpa = (pte & PG_FRAME);
180 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
181 return -1;
182 pdir = (pte_32bit_t *)L1hva;
183 pte = pdir[pte32_l1idx(gva)];
184 if ((pte & PG_V) == 0)
185 return -1;
186 if ((pte & PG_u) == 0)
187 *prot &= ~NVMM_PROT_USER;
188 if ((pte & PG_KW) == 0)
189 *prot &= ~NVMM_PROT_WRITE;
190 if (pte & PG_PS)
191 return -1;
192
193 *gpa = (pte & PG_FRAME);
194 return 0;
195 }
196
197 /* -------------------------------------------------------------------------- */
198
199 #define PTE32_PAE_L1_SHIFT 12
200 #define PTE32_PAE_L2_SHIFT 21
201 #define PTE32_PAE_L3_SHIFT 30
202
203 #define PTE32_PAE_L3_MASK 0xc0000000
204 #define PTE32_PAE_L2_MASK 0x3fe00000
205 #define PTE32_PAE_L1_MASK 0x001ff000
206
207 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
208 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
209 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
210
211 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
212 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
213 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
214
215 #define CR3_FRAME_32BIT_PAE __BITS(31, 5)
216
217 typedef uint64_t pte_32bit_pae_t;
218
219 static int
220 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
221 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
222 {
223 gpaddr_t L3gpa, L2gpa, L1gpa;
224 uintptr_t L3hva, L2hva, L1hva;
225 pte_32bit_pae_t *pdir, pte;
226 nvmm_prot_t pageprot;
227
228 /* We begin with an RWXU access. */
229 *prot = NVMM_PROT_ALL;
230
231 /* Parse L3. */
232 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
233 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
234 return -1;
235 pdir = (pte_32bit_pae_t *)L3hva;
236 pte = pdir[pte32_pae_l3idx(gva)];
237 if ((pte & PG_V) == 0)
238 return -1;
239 if (pte & PG_NX)
240 *prot &= ~NVMM_PROT_EXEC;
241 if (pte & PG_PS)
242 return -1;
243
244 /* Parse L2. */
245 L2gpa = (pte & PG_FRAME);
246 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
247 return -1;
248 pdir = (pte_32bit_pae_t *)L2hva;
249 pte = pdir[pte32_pae_l2idx(gva)];
250 if ((pte & PG_V) == 0)
251 return -1;
252 if ((pte & PG_u) == 0)
253 *prot &= ~NVMM_PROT_USER;
254 if ((pte & PG_KW) == 0)
255 *prot &= ~NVMM_PROT_WRITE;
256 if (pte & PG_NX)
257 *prot &= ~NVMM_PROT_EXEC;
258 if (pte & PG_PS) {
259 *gpa = (pte & PTE32_PAE_L2_FRAME);
260 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
261 return 0;
262 }
263
264 /* Parse L1. */
265 L1gpa = (pte & PG_FRAME);
266 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
267 return -1;
268 pdir = (pte_32bit_pae_t *)L1hva;
269 pte = pdir[pte32_pae_l1idx(gva)];
270 if ((pte & PG_V) == 0)
271 return -1;
272 if ((pte & PG_u) == 0)
273 *prot &= ~NVMM_PROT_USER;
274 if ((pte & PG_KW) == 0)
275 *prot &= ~NVMM_PROT_WRITE;
276 if (pte & PG_NX)
277 *prot &= ~NVMM_PROT_EXEC;
278 if (pte & PG_PS)
279 return -1;
280
281 *gpa = (pte & PG_FRAME);
282 return 0;
283 }
284
285 /* -------------------------------------------------------------------------- */
286
287 #define PTE64_L1_SHIFT 12
288 #define PTE64_L2_SHIFT 21
289 #define PTE64_L3_SHIFT 30
290 #define PTE64_L4_SHIFT 39
291
292 #define PTE64_L4_MASK 0x0000ff8000000000
293 #define PTE64_L3_MASK 0x0000007fc0000000
294 #define PTE64_L2_MASK 0x000000003fe00000
295 #define PTE64_L1_MASK 0x00000000001ff000
296
297 #define PTE64_L4_FRAME PTE64_L4_MASK
298 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
299 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
300 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
301
302 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
303 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
304 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
305 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
306
307 #define CR3_FRAME_64BIT PG_FRAME
308
309 typedef uint64_t pte_64bit_t;
310
311 static inline bool
312 x86_gva_64bit_canonical(gvaddr_t gva)
313 {
314 /* Bits 63:47 must have the same value. */
315 #define SIGN_EXTEND 0xffff800000000000ULL
316 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
317 }
318
319 static int
320 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
321 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
322 {
323 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
324 uintptr_t L4hva, L3hva, L2hva, L1hva;
325 pte_64bit_t *pdir, pte;
326 nvmm_prot_t pageprot;
327
328 /* We begin with an RWXU access. */
329 *prot = NVMM_PROT_ALL;
330
331 if (!x86_gva_64bit_canonical(gva))
332 return -1;
333
334 /* Parse L4. */
335 L4gpa = (cr3 & CR3_FRAME_64BIT);
336 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1)
337 return -1;
338 pdir = (pte_64bit_t *)L4hva;
339 pte = pdir[pte64_l4idx(gva)];
340 if ((pte & PG_V) == 0)
341 return -1;
342 if ((pte & PG_u) == 0)
343 *prot &= ~NVMM_PROT_USER;
344 if ((pte & PG_KW) == 0)
345 *prot &= ~NVMM_PROT_WRITE;
346 if (pte & PG_NX)
347 *prot &= ~NVMM_PROT_EXEC;
348 if (pte & PG_PS)
349 return -1;
350
351 /* Parse L3. */
352 L3gpa = (pte & PG_FRAME);
353 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
354 return -1;
355 pdir = (pte_64bit_t *)L3hva;
356 pte = pdir[pte64_l3idx(gva)];
357 if ((pte & PG_V) == 0)
358 return -1;
359 if ((pte & PG_u) == 0)
360 *prot &= ~NVMM_PROT_USER;
361 if ((pte & PG_KW) == 0)
362 *prot &= ~NVMM_PROT_WRITE;
363 if (pte & PG_NX)
364 *prot &= ~NVMM_PROT_EXEC;
365 if (pte & PG_PS) {
366 *gpa = (pte & PTE64_L3_FRAME);
367 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
368 return 0;
369 }
370
371 /* Parse L2. */
372 L2gpa = (pte & PG_FRAME);
373 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
374 return -1;
375 pdir = (pte_64bit_t *)L2hva;
376 pte = pdir[pte64_l2idx(gva)];
377 if ((pte & PG_V) == 0)
378 return -1;
379 if ((pte & PG_u) == 0)
380 *prot &= ~NVMM_PROT_USER;
381 if ((pte & PG_KW) == 0)
382 *prot &= ~NVMM_PROT_WRITE;
383 if (pte & PG_NX)
384 *prot &= ~NVMM_PROT_EXEC;
385 if (pte & PG_PS) {
386 *gpa = (pte & PTE64_L2_FRAME);
387 *gpa = *gpa + (gva & PTE64_L1_MASK);
388 return 0;
389 }
390
391 /* Parse L1. */
392 L1gpa = (pte & PG_FRAME);
393 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
394 return -1;
395 pdir = (pte_64bit_t *)L1hva;
396 pte = pdir[pte64_l1idx(gva)];
397 if ((pte & PG_V) == 0)
398 return -1;
399 if ((pte & PG_u) == 0)
400 *prot &= ~NVMM_PROT_USER;
401 if ((pte & PG_KW) == 0)
402 *prot &= ~NVMM_PROT_WRITE;
403 if (pte & PG_NX)
404 *prot &= ~NVMM_PROT_EXEC;
405 if (pte & PG_PS)
406 return -1;
407
408 *gpa = (pte & PG_FRAME);
409 return 0;
410 }
411
412 static inline int
413 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
414 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
415 {
416 bool is_pae, is_lng, has_pse;
417 uint64_t cr3;
418 size_t off;
419 int ret;
420
421 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
422 /* No paging. */
423 *prot = NVMM_PROT_ALL;
424 *gpa = gva;
425 return 0;
426 }
427
428 off = (gva & PAGE_MASK);
429 gva &= ~PAGE_MASK;
430
431 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
432 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
433 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
434 cr3 = state->crs[NVMM_X64_CR_CR3];
435
436 if (is_pae && is_lng) {
437 /* 64bit */
438 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
439 } else if (is_pae && !is_lng) {
440 /* 32bit PAE */
441 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
442 } else if (!is_pae && !is_lng) {
443 /* 32bit */
444 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
445 } else {
446 ret = -1;
447 }
448
449 if (ret == -1) {
450 errno = EFAULT;
451 }
452
453 *gpa = *gpa + off;
454
455 return ret;
456 }
457
458 int
459 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
460 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
461 {
462 struct nvmm_x64_state state;
463 int ret;
464
465 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
466 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
467 if (ret == -1)
468 return -1;
469
470 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
471 }
472
473 /* -------------------------------------------------------------------------- */
474
475 static inline bool
476 is_long_mode(struct nvmm_x64_state *state)
477 {
478 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
479 }
480
481 static inline bool
482 is_64bit(struct nvmm_x64_state *state)
483 {
484 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
485 }
486
487 static inline bool
488 is_32bit(struct nvmm_x64_state *state)
489 {
490 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
491 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
492 }
493
494 static inline bool
495 is_16bit(struct nvmm_x64_state *state)
496 {
497 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
498 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
499 }
500
501 static int
502 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
503 {
504 uint64_t limit;
505
506 /*
507 * This is incomplete. We should check topdown, etc, really that's
508 * tiring.
509 */
510 if (__predict_false(!seg->attrib.p)) {
511 goto error;
512 }
513
514 limit = (uint64_t)seg->limit + 1;
515 if (__predict_true(seg->attrib.g)) {
516 limit *= PAGE_SIZE;
517 }
518
519 if (__predict_false(gva + size > limit)) {
520 goto error;
521 }
522
523 return 0;
524
525 error:
526 errno = EFAULT;
527 return -1;
528 }
529
530 static inline void
531 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
532 {
533 *gva += seg->base;
534 }
535
536 static inline uint64_t
537 size_to_mask(size_t size)
538 {
539 switch (size) {
540 case 1:
541 return 0x00000000000000FF;
542 case 2:
543 return 0x000000000000FFFF;
544 case 4:
545 return 0x00000000FFFFFFFF;
546 case 8:
547 default:
548 return 0xFFFFFFFFFFFFFFFF;
549 }
550 }
551
552 static uint64_t
553 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
554 {
555 uint64_t mask, cnt;
556
557 mask = size_to_mask(adsize);
558 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
559
560 return cnt;
561 }
562
563 static void
564 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
565 {
566 uint64_t mask;
567
568 /* XXX: should we zero-extend? */
569 mask = size_to_mask(adsize);
570 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
571 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
572 }
573
574 static int
575 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
576 gvaddr_t gva, uint8_t *data, size_t size)
577 {
578 struct nvmm_mem mem;
579 nvmm_prot_t prot;
580 gpaddr_t gpa;
581 uintptr_t hva;
582 bool is_mmio;
583 int ret, remain;
584
585 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
586 if (__predict_false(ret == -1)) {
587 return -1;
588 }
589 if (__predict_false(!(prot & NVMM_PROT_READ))) {
590 errno = EFAULT;
591 return -1;
592 }
593
594 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
595 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
596 } else {
597 remain = 0;
598 }
599 size -= remain;
600
601 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
602 is_mmio = (ret == -1);
603
604 if (is_mmio) {
605 mem.data = data;
606 mem.gpa = gpa;
607 mem.write = false;
608 mem.size = size;
609 (*__callbacks.mem)(&mem);
610 } else {
611 if (__predict_false(!(prot & NVMM_PROT_READ))) {
612 errno = EFAULT;
613 return -1;
614 }
615 memcpy(data, (uint8_t *)hva, size);
616 }
617
618 if (remain > 0) {
619 ret = read_guest_memory(mach, state, gva + size,
620 data + size, remain);
621 } else {
622 ret = 0;
623 }
624
625 return ret;
626 }
627
628 static int
629 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
630 gvaddr_t gva, uint8_t *data, size_t size)
631 {
632 struct nvmm_mem mem;
633 nvmm_prot_t prot;
634 gpaddr_t gpa;
635 uintptr_t hva;
636 bool is_mmio;
637 int ret, remain;
638
639 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
640 if (__predict_false(ret == -1)) {
641 return -1;
642 }
643 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
644 errno = EFAULT;
645 return -1;
646 }
647
648 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
649 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
650 } else {
651 remain = 0;
652 }
653 size -= remain;
654
655 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
656 is_mmio = (ret == -1);
657
658 if (is_mmio) {
659 mem.data = data;
660 mem.gpa = gpa;
661 mem.write = true;
662 mem.size = size;
663 (*__callbacks.mem)(&mem);
664 } else {
665 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
666 errno = EFAULT;
667 return -1;
668 }
669 memcpy((uint8_t *)hva, data, size);
670 }
671
672 if (remain > 0) {
673 ret = write_guest_memory(mach, state, gva + size,
674 data + size, remain);
675 } else {
676 ret = 0;
677 }
678
679 return ret;
680 }
681
682 /* -------------------------------------------------------------------------- */
683
684 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
685
686 #define NVMM_IO_BATCH_SIZE 32
687
688 static int
689 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
690 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
691 {
692 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
693 size_t i, iosize, iocnt;
694 int ret;
695
696 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
697 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
698 iocnt = iosize / io->size;
699
700 io->data = iobuf;
701
702 if (!io->in) {
703 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
704 if (ret == -1)
705 return -1;
706 }
707
708 for (i = 0; i < iocnt; i++) {
709 (*__callbacks.io)(io);
710 io->data += io->size;
711 }
712
713 if (io->in) {
714 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
715 if (ret == -1)
716 return -1;
717 }
718
719 return iocnt;
720 }
721
722 int
723 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
724 struct nvmm_exit *exit)
725 {
726 struct nvmm_x64_state state;
727 struct nvmm_io io;
728 uint64_t cnt = 0; /* GCC */
729 uint8_t iobuf[8];
730 int iocnt = 1;
731 gvaddr_t gva = 0; /* GCC */
732 int reg = 0; /* GCC */
733 int ret, seg;
734 bool psld = false;
735
736 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
737 errno = EINVAL;
738 return -1;
739 }
740
741 io.port = exit->u.io.port;
742 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
743 io.size = exit->u.io.operand_size;
744 io.data = iobuf;
745
746 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
747 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
748 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
749 if (ret == -1)
750 return -1;
751
752 if (exit->u.io.rep) {
753 cnt = rep_get_cnt(&state, exit->u.io.address_size);
754 if (__predict_false(cnt == 0)) {
755 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
756 goto out;
757 }
758 }
759
760 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
761 psld = true;
762 }
763
764 /*
765 * Determine GVA.
766 */
767 if (exit->u.io.str) {
768 if (io.in) {
769 reg = NVMM_X64_GPR_RDI;
770 } else {
771 reg = NVMM_X64_GPR_RSI;
772 }
773
774 gva = state.gprs[reg];
775 gva &= size_to_mask(exit->u.io.address_size);
776
777 if (exit->u.io.seg != -1) {
778 seg = exit->u.io.seg;
779 } else {
780 if (io.in) {
781 seg = NVMM_X64_SEG_ES;
782 } else {
783 seg = fetch_segment(mach, &state);
784 if (seg == -1)
785 return -1;
786 }
787 }
788
789 if (__predict_true(is_long_mode(&state))) {
790 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
791 segment_apply(&state.segs[seg], &gva);
792 }
793 } else {
794 ret = segment_check(&state.segs[seg], gva, io.size);
795 if (ret == -1)
796 return -1;
797 segment_apply(&state.segs[seg], &gva);
798 }
799
800 if (exit->u.io.rep && !psld) {
801 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
802 if (iocnt == -1)
803 return -1;
804 goto done;
805 }
806 }
807
808 if (!io.in) {
809 if (!exit->u.io.str) {
810 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
811 } else {
812 ret = read_guest_memory(mach, &state, gva, io.data,
813 io.size);
814 if (ret == -1)
815 return -1;
816 }
817 }
818
819 (*__callbacks.io)(&io);
820
821 if (io.in) {
822 if (!exit->u.io.str) {
823 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
824 if (io.size == 4) {
825 /* Zero-extend to 64 bits. */
826 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
827 }
828 } else {
829 ret = write_guest_memory(mach, &state, gva, io.data,
830 io.size);
831 if (ret == -1)
832 return -1;
833 }
834 }
835
836 done:
837 if (exit->u.io.str) {
838 if (__predict_false(psld)) {
839 state.gprs[reg] -= iocnt * io.size;
840 } else {
841 state.gprs[reg] += iocnt * io.size;
842 }
843 }
844
845 if (exit->u.io.rep) {
846 cnt -= iocnt;
847 rep_set_cnt(&state, exit->u.io.address_size, cnt);
848 if (cnt == 0) {
849 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
850 }
851 } else {
852 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
853 }
854
855 out:
856 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
857 if (ret == -1)
858 return -1;
859
860 return 0;
861 }
862
863 /* -------------------------------------------------------------------------- */
864
865 struct x86_emul {
866 bool read;
867 bool notouch;
868 void (*func)(struct nvmm_mem *, uint64_t *);
869 };
870
871 static void x86_func_or(struct nvmm_mem *, uint64_t *);
872 static void x86_func_and(struct nvmm_mem *, uint64_t *);
873 static void x86_func_sub(struct nvmm_mem *, uint64_t *);
874 static void x86_func_xor(struct nvmm_mem *, uint64_t *);
875 static void x86_func_cmp(struct nvmm_mem *, uint64_t *);
876 static void x86_func_test(struct nvmm_mem *, uint64_t *);
877 static void x86_func_mov(struct nvmm_mem *, uint64_t *);
878 static void x86_func_stos(struct nvmm_mem *, uint64_t *);
879 static void x86_func_lods(struct nvmm_mem *, uint64_t *);
880 static void x86_func_movs(struct nvmm_mem *, uint64_t *);
881
882 static const struct x86_emul x86_emul_or = {
883 .read = true,
884 .func = x86_func_or
885 };
886
887 static const struct x86_emul x86_emul_and = {
888 .read = true,
889 .func = x86_func_and
890 };
891
892 static const struct x86_emul x86_emul_sub = {
893 .read = true,
894 .func = x86_func_sub
895 };
896
897 static const struct x86_emul x86_emul_xor = {
898 .read = true,
899 .func = x86_func_xor
900 };
901
902 static const struct x86_emul x86_emul_cmp = {
903 .notouch = true,
904 .func = x86_func_cmp
905 };
906
907 static const struct x86_emul x86_emul_test = {
908 .notouch = true,
909 .func = x86_func_test
910 };
911
912 static const struct x86_emul x86_emul_mov = {
913 .func = x86_func_mov
914 };
915
916 static const struct x86_emul x86_emul_stos = {
917 .func = x86_func_stos
918 };
919
920 static const struct x86_emul x86_emul_lods = {
921 .func = x86_func_lods
922 };
923
924 static const struct x86_emul x86_emul_movs = {
925 .func = x86_func_movs
926 };
927
928 /* Legacy prefixes. */
929 #define LEG_LOCK 0xF0
930 #define LEG_REPN 0xF2
931 #define LEG_REP 0xF3
932 #define LEG_OVR_CS 0x2E
933 #define LEG_OVR_SS 0x36
934 #define LEG_OVR_DS 0x3E
935 #define LEG_OVR_ES 0x26
936 #define LEG_OVR_FS 0x64
937 #define LEG_OVR_GS 0x65
938 #define LEG_OPR_OVR 0x66
939 #define LEG_ADR_OVR 0x67
940
941 struct x86_legpref {
942 bool opr_ovr:1;
943 bool adr_ovr:1;
944 bool rep:1;
945 bool repn:1;
946 int8_t seg;
947 };
948
949 struct x86_rexpref {
950 bool b:1;
951 bool x:1;
952 bool r:1;
953 bool w:1;
954 bool present:1;
955 };
956
957 struct x86_reg {
958 int num; /* NVMM GPR state index */
959 uint64_t mask;
960 };
961
962 enum x86_disp_type {
963 DISP_NONE,
964 DISP_0,
965 DISP_1,
966 DISP_4
967 };
968
969 struct x86_disp {
970 enum x86_disp_type type;
971 uint64_t data; /* 4 bytes, but can be sign-extended */
972 };
973
974 enum REGMODRM__Mod {
975 MOD_DIS0, /* also, register indirect */
976 MOD_DIS1,
977 MOD_DIS4,
978 MOD_REG
979 };
980
981 enum REGMODRM__Reg {
982 REG_000, /* these fields are indexes to the register map */
983 REG_001,
984 REG_010,
985 REG_011,
986 REG_100,
987 REG_101,
988 REG_110,
989 REG_111
990 };
991
992 enum REGMODRM__Rm {
993 RM_000, /* reg */
994 RM_001, /* reg */
995 RM_010, /* reg */
996 RM_011, /* reg */
997 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
998 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
999 RM_110,
1000 RM_111
1001 };
1002
1003 struct x86_regmodrm {
1004 uint8_t mod:2;
1005 uint8_t reg:3;
1006 uint8_t rm:3;
1007 };
1008
1009 struct x86_immediate {
1010 uint64_t data;
1011 };
1012
1013 struct x86_sib {
1014 uint8_t scale;
1015 const struct x86_reg *idx;
1016 const struct x86_reg *bas;
1017 };
1018
1019 enum x86_store_type {
1020 STORE_NONE,
1021 STORE_REG,
1022 STORE_IMM,
1023 STORE_SIB,
1024 STORE_DMO
1025 };
1026
1027 struct x86_store {
1028 enum x86_store_type type;
1029 union {
1030 const struct x86_reg *reg;
1031 struct x86_immediate imm;
1032 struct x86_sib sib;
1033 uint64_t dmo;
1034 } u;
1035 struct x86_disp disp;
1036 int hardseg;
1037 };
1038
1039 struct x86_instr {
1040 uint8_t len;
1041 struct x86_legpref legpref;
1042 struct x86_rexpref rexpref;
1043 struct x86_regmodrm regmodrm;
1044 uint8_t operand_size;
1045 uint8_t address_size;
1046 uint64_t zeroextend_mask;
1047
1048 const struct x86_opcode *opcode;
1049 const struct x86_emul *emul;
1050
1051 struct x86_store src;
1052 struct x86_store dst;
1053 struct x86_store *strm;
1054 };
1055
1056 struct x86_decode_fsm {
1057 /* vcpu */
1058 bool is64bit;
1059 bool is32bit;
1060 bool is16bit;
1061
1062 /* fsm */
1063 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1064 uint8_t *buf;
1065 uint8_t *end;
1066 };
1067
1068 struct x86_opcode {
1069 bool valid:1;
1070 bool regmodrm:1;
1071 bool regtorm:1;
1072 bool dmo:1;
1073 bool todmo:1;
1074 bool movs:1;
1075 bool stos:1;
1076 bool lods:1;
1077 bool szoverride:1;
1078 bool group1:1;
1079 bool group3:1;
1080 bool group11:1;
1081 bool immediate:1;
1082 uint8_t defsize;
1083 uint8_t flags;
1084 const struct x86_emul *emul;
1085 };
1086
1087 struct x86_group_entry {
1088 const struct x86_emul *emul;
1089 };
1090
1091 #define OPSIZE_BYTE 0x01
1092 #define OPSIZE_WORD 0x02 /* 2 bytes */
1093 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1094 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1095
1096 #define FLAG_imm8 0x01
1097 #define FLAG_immz 0x02
1098 #define FLAG_ze 0x04
1099
1100 static const struct x86_group_entry group1[8] __cacheline_aligned = {
1101 [1] = { .emul = &x86_emul_or },
1102 [4] = { .emul = &x86_emul_and },
1103 [6] = { .emul = &x86_emul_xor },
1104 [7] = { .emul = &x86_emul_cmp }
1105 };
1106
1107 static const struct x86_group_entry group3[8] __cacheline_aligned = {
1108 [0] = { .emul = &x86_emul_test },
1109 [1] = { .emul = &x86_emul_test }
1110 };
1111
1112 static const struct x86_group_entry group11[8] __cacheline_aligned = {
1113 [0] = { .emul = &x86_emul_mov }
1114 };
1115
1116 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
1117 /*
1118 * Group1
1119 */
1120 [0x80] = {
1121 /* Eb, Ib */
1122 .valid = true,
1123 .regmodrm = true,
1124 .regtorm = true,
1125 .szoverride = false,
1126 .defsize = OPSIZE_BYTE,
1127 .group1 = true,
1128 .immediate = true,
1129 .emul = NULL /* group1 */
1130 },
1131 [0x81] = {
1132 /* Ev, Iz */
1133 .valid = true,
1134 .regmodrm = true,
1135 .regtorm = true,
1136 .szoverride = true,
1137 .defsize = -1,
1138 .group1 = true,
1139 .immediate = true,
1140 .flags = FLAG_immz,
1141 .emul = NULL /* group1 */
1142 },
1143 [0x83] = {
1144 /* Ev, Ib */
1145 .valid = true,
1146 .regmodrm = true,
1147 .regtorm = true,
1148 .szoverride = true,
1149 .defsize = -1,
1150 .group1 = true,
1151 .immediate = true,
1152 .flags = FLAG_imm8,
1153 .emul = NULL /* group1 */
1154 },
1155
1156 /*
1157 * Group3
1158 */
1159 [0xF6] = {
1160 /* Eb, Ib */
1161 .valid = true,
1162 .regmodrm = true,
1163 .regtorm = true,
1164 .szoverride = false,
1165 .defsize = OPSIZE_BYTE,
1166 .group3 = true,
1167 .immediate = true,
1168 .emul = NULL /* group3 */
1169 },
1170 [0xF7] = {
1171 /* Ev, Iz */
1172 .valid = true,
1173 .regmodrm = true,
1174 .regtorm = true,
1175 .szoverride = true,
1176 .defsize = -1,
1177 .group3 = true,
1178 .immediate = true,
1179 .flags = FLAG_immz,
1180 .emul = NULL /* group3 */
1181 },
1182
1183 /*
1184 * Group11
1185 */
1186 [0xC6] = {
1187 /* Eb, Ib */
1188 .valid = true,
1189 .regmodrm = true,
1190 .regtorm = true,
1191 .szoverride = false,
1192 .defsize = OPSIZE_BYTE,
1193 .group11 = true,
1194 .immediate = true,
1195 .emul = NULL /* group11 */
1196 },
1197 [0xC7] = {
1198 /* Ev, Iz */
1199 .valid = true,
1200 .regmodrm = true,
1201 .regtorm = true,
1202 .szoverride = true,
1203 .defsize = -1,
1204 .group11 = true,
1205 .immediate = true,
1206 .flags = FLAG_immz,
1207 .emul = NULL /* group11 */
1208 },
1209
1210 /*
1211 * OR
1212 */
1213 [0x08] = {
1214 /* Eb, Gb */
1215 .valid = true,
1216 .regmodrm = true,
1217 .regtorm = true,
1218 .szoverride = false,
1219 .defsize = OPSIZE_BYTE,
1220 .emul = &x86_emul_or
1221 },
1222 [0x09] = {
1223 /* Ev, Gv */
1224 .valid = true,
1225 .regmodrm = true,
1226 .regtorm = true,
1227 .szoverride = true,
1228 .defsize = -1,
1229 .emul = &x86_emul_or
1230 },
1231 [0x0A] = {
1232 /* Gb, Eb */
1233 .valid = true,
1234 .regmodrm = true,
1235 .regtorm = false,
1236 .szoverride = false,
1237 .defsize = OPSIZE_BYTE,
1238 .emul = &x86_emul_or
1239 },
1240 [0x0B] = {
1241 /* Gv, Ev */
1242 .valid = true,
1243 .regmodrm = true,
1244 .regtorm = false,
1245 .szoverride = true,
1246 .defsize = -1,
1247 .emul = &x86_emul_or
1248 },
1249
1250 /*
1251 * AND
1252 */
1253 [0x20] = {
1254 /* Eb, Gb */
1255 .valid = true,
1256 .regmodrm = true,
1257 .regtorm = true,
1258 .szoverride = false,
1259 .defsize = OPSIZE_BYTE,
1260 .emul = &x86_emul_and
1261 },
1262 [0x21] = {
1263 /* Ev, Gv */
1264 .valid = true,
1265 .regmodrm = true,
1266 .regtorm = true,
1267 .szoverride = true,
1268 .defsize = -1,
1269 .emul = &x86_emul_and
1270 },
1271 [0x22] = {
1272 /* Gb, Eb */
1273 .valid = true,
1274 .regmodrm = true,
1275 .regtorm = false,
1276 .szoverride = false,
1277 .defsize = OPSIZE_BYTE,
1278 .emul = &x86_emul_and
1279 },
1280 [0x23] = {
1281 /* Gv, Ev */
1282 .valid = true,
1283 .regmodrm = true,
1284 .regtorm = false,
1285 .szoverride = true,
1286 .defsize = -1,
1287 .emul = &x86_emul_and
1288 },
1289
1290 /*
1291 * SUB
1292 */
1293 [0x28] = {
1294 /* Eb, Gb */
1295 .valid = true,
1296 .regmodrm = true,
1297 .regtorm = true,
1298 .szoverride = false,
1299 .defsize = OPSIZE_BYTE,
1300 .emul = &x86_emul_sub
1301 },
1302 [0x29] = {
1303 /* Ev, Gv */
1304 .valid = true,
1305 .regmodrm = true,
1306 .regtorm = true,
1307 .szoverride = true,
1308 .defsize = -1,
1309 .emul = &x86_emul_sub
1310 },
1311 [0x2A] = {
1312 /* Gb, Eb */
1313 .valid = true,
1314 .regmodrm = true,
1315 .regtorm = false,
1316 .szoverride = false,
1317 .defsize = OPSIZE_BYTE,
1318 .emul = &x86_emul_sub
1319 },
1320 [0x2B] = {
1321 /* Gv, Ev */
1322 .valid = true,
1323 .regmodrm = true,
1324 .regtorm = false,
1325 .szoverride = true,
1326 .defsize = -1,
1327 .emul = &x86_emul_sub
1328 },
1329
1330 /*
1331 * XOR
1332 */
1333 [0x30] = {
1334 /* Eb, Gb */
1335 .valid = true,
1336 .regmodrm = true,
1337 .regtorm = true,
1338 .szoverride = false,
1339 .defsize = OPSIZE_BYTE,
1340 .emul = &x86_emul_xor
1341 },
1342 [0x31] = {
1343 /* Ev, Gv */
1344 .valid = true,
1345 .regmodrm = true,
1346 .regtorm = true,
1347 .szoverride = true,
1348 .defsize = -1,
1349 .emul = &x86_emul_xor
1350 },
1351 [0x32] = {
1352 /* Gb, Eb */
1353 .valid = true,
1354 .regmodrm = true,
1355 .regtorm = false,
1356 .szoverride = false,
1357 .defsize = OPSIZE_BYTE,
1358 .emul = &x86_emul_xor
1359 },
1360 [0x33] = {
1361 /* Gv, Ev */
1362 .valid = true,
1363 .regmodrm = true,
1364 .regtorm = false,
1365 .szoverride = true,
1366 .defsize = -1,
1367 .emul = &x86_emul_xor
1368 },
1369
1370 /*
1371 * MOV
1372 */
1373 [0x88] = {
1374 /* Eb, Gb */
1375 .valid = true,
1376 .regmodrm = true,
1377 .regtorm = true,
1378 .szoverride = false,
1379 .defsize = OPSIZE_BYTE,
1380 .emul = &x86_emul_mov
1381 },
1382 [0x89] = {
1383 /* Ev, Gv */
1384 .valid = true,
1385 .regmodrm = true,
1386 .regtorm = true,
1387 .szoverride = true,
1388 .defsize = -1,
1389 .emul = &x86_emul_mov
1390 },
1391 [0x8A] = {
1392 /* Gb, Eb */
1393 .valid = true,
1394 .regmodrm = true,
1395 .regtorm = false,
1396 .szoverride = false,
1397 .defsize = OPSIZE_BYTE,
1398 .emul = &x86_emul_mov
1399 },
1400 [0x8B] = {
1401 /* Gv, Ev */
1402 .valid = true,
1403 .regmodrm = true,
1404 .regtorm = false,
1405 .szoverride = true,
1406 .defsize = -1,
1407 .emul = &x86_emul_mov
1408 },
1409 [0xA0] = {
1410 /* AL, Ob */
1411 .valid = true,
1412 .dmo = true,
1413 .todmo = false,
1414 .szoverride = false,
1415 .defsize = OPSIZE_BYTE,
1416 .emul = &x86_emul_mov
1417 },
1418 [0xA1] = {
1419 /* rAX, Ov */
1420 .valid = true,
1421 .dmo = true,
1422 .todmo = false,
1423 .szoverride = true,
1424 .defsize = -1,
1425 .emul = &x86_emul_mov
1426 },
1427 [0xA2] = {
1428 /* Ob, AL */
1429 .valid = true,
1430 .dmo = true,
1431 .todmo = true,
1432 .szoverride = false,
1433 .defsize = OPSIZE_BYTE,
1434 .emul = &x86_emul_mov
1435 },
1436 [0xA3] = {
1437 /* Ov, rAX */
1438 .valid = true,
1439 .dmo = true,
1440 .todmo = true,
1441 .szoverride = true,
1442 .defsize = -1,
1443 .emul = &x86_emul_mov
1444 },
1445
1446 /*
1447 * MOVS
1448 */
1449 [0xA4] = {
1450 /* Yb, Xb */
1451 .valid = true,
1452 .movs = true,
1453 .szoverride = false,
1454 .defsize = OPSIZE_BYTE,
1455 .emul = &x86_emul_movs
1456 },
1457 [0xA5] = {
1458 /* Yv, Xv */
1459 .valid = true,
1460 .movs = true,
1461 .szoverride = true,
1462 .defsize = -1,
1463 .emul = &x86_emul_movs
1464 },
1465
1466 /*
1467 * STOS
1468 */
1469 [0xAA] = {
1470 /* Yb, AL */
1471 .valid = true,
1472 .stos = true,
1473 .szoverride = false,
1474 .defsize = OPSIZE_BYTE,
1475 .emul = &x86_emul_stos
1476 },
1477 [0xAB] = {
1478 /* Yv, rAX */
1479 .valid = true,
1480 .stos = true,
1481 .szoverride = true,
1482 .defsize = -1,
1483 .emul = &x86_emul_stos
1484 },
1485
1486 /*
1487 * LODS
1488 */
1489 [0xAC] = {
1490 /* AL, Xb */
1491 .valid = true,
1492 .lods = true,
1493 .szoverride = false,
1494 .defsize = OPSIZE_BYTE,
1495 .emul = &x86_emul_lods
1496 },
1497 [0xAD] = {
1498 /* rAX, Xv */
1499 .valid = true,
1500 .lods = true,
1501 .szoverride = true,
1502 .defsize = -1,
1503 .emul = &x86_emul_lods
1504 },
1505 };
1506
1507 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
1508 /*
1509 * MOVZX
1510 */
1511 [0xB6] = {
1512 /* Gv, Eb */
1513 .valid = true,
1514 .regmodrm = true,
1515 .regtorm = false,
1516 .szoverride = true,
1517 .defsize = OPSIZE_BYTE,
1518 .flags = FLAG_ze,
1519 .emul = &x86_emul_mov
1520 },
1521 [0xB7] = {
1522 /* Gv, Ew */
1523 .valid = true,
1524 .regmodrm = true,
1525 .regtorm = false,
1526 .szoverride = true,
1527 .defsize = OPSIZE_WORD,
1528 .flags = FLAG_ze,
1529 .emul = &x86_emul_mov
1530 },
1531 };
1532
1533 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1534
1535 /* [REX-present][enc][opsize] */
1536 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
1537 [false] = {
1538 /* No REX prefix. */
1539 [0b00] = {
1540 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1541 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1542 [2] = { -1, 0 },
1543 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1544 [4] = { -1, 0 },
1545 [5] = { -1, 0 },
1546 [6] = { -1, 0 },
1547 [7] = { -1, 0 },
1548 },
1549 [0b01] = {
1550 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1551 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1552 [2] = { -1, 0 },
1553 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1554 [4] = { -1, 0 },
1555 [5] = { -1, 0 },
1556 [6] = { -1, 0 },
1557 [7] = { -1, 0 },
1558 },
1559 [0b10] = {
1560 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1561 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1562 [2] = { -1, 0 },
1563 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1564 [4] = { -1, 0 },
1565 [5] = { -1, 0 },
1566 [6] = { -1, 0 },
1567 [7] = { -1, 0 },
1568 },
1569 [0b11] = {
1570 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1571 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1572 [2] = { -1, 0 },
1573 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1574 [4] = { -1, 0 },
1575 [5] = { -1, 0 },
1576 [6] = { -1, 0 },
1577 [7] = { -1, 0 },
1578 }
1579 },
1580 [true] = {
1581 /* Has REX prefix. */
1582 [0b00] = {
1583 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1584 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1585 [2] = { -1, 0 },
1586 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1587 [4] = { -1, 0 },
1588 [5] = { -1, 0 },
1589 [6] = { -1, 0 },
1590 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1591 },
1592 [0b01] = {
1593 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1594 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1595 [2] = { -1, 0 },
1596 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1597 [4] = { -1, 0 },
1598 [5] = { -1, 0 },
1599 [6] = { -1, 0 },
1600 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1601 },
1602 [0b10] = {
1603 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1604 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1605 [2] = { -1, 0 },
1606 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1607 [4] = { -1, 0 },
1608 [5] = { -1, 0 },
1609 [6] = { -1, 0 },
1610 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1611 },
1612 [0b11] = {
1613 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1614 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1615 [2] = { -1, 0 },
1616 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1617 [4] = { -1, 0 },
1618 [5] = { -1, 0 },
1619 [6] = { -1, 0 },
1620 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1621 }
1622 }
1623 };
1624
1625 /* [depends][enc][size] */
1626 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
1627 [false] = {
1628 /* Not extended. */
1629 [0b000] = {
1630 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1631 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1632 [2] = { -1, 0 },
1633 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1634 [4] = { -1, 0 },
1635 [5] = { -1, 0 },
1636 [6] = { -1, 0 },
1637 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1638 },
1639 [0b001] = {
1640 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1641 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1642 [2] = { -1, 0 },
1643 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1644 [4] = { -1, 0 },
1645 [5] = { -1, 0 },
1646 [6] = { -1, 0 },
1647 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1648 },
1649 [0b010] = {
1650 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1651 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1652 [2] = { -1, 0 },
1653 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1654 [4] = { -1, 0 },
1655 [5] = { -1, 0 },
1656 [6] = { -1, 0 },
1657 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1658 },
1659 [0b011] = {
1660 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1661 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1662 [2] = { -1, 0 },
1663 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1664 [4] = { -1, 0 },
1665 [5] = { -1, 0 },
1666 [6] = { -1, 0 },
1667 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1668 },
1669 [0b100] = {
1670 [0] = { -1, 0 }, /* SPECIAL */
1671 [1] = { -1, 0 }, /* SPECIAL */
1672 [2] = { -1, 0 },
1673 [3] = { -1, 0 }, /* SPECIAL */
1674 [4] = { -1, 0 },
1675 [5] = { -1, 0 },
1676 [6] = { -1, 0 },
1677 [7] = { -1, 0 }, /* SPECIAL */
1678 },
1679 [0b101] = {
1680 [0] = { -1, 0 }, /* SPECIAL */
1681 [1] = { -1, 0 }, /* SPECIAL */
1682 [2] = { -1, 0 },
1683 [3] = { -1, 0 }, /* SPECIAL */
1684 [4] = { -1, 0 },
1685 [5] = { -1, 0 },
1686 [6] = { -1, 0 },
1687 [7] = { -1, 0 }, /* SPECIAL */
1688 },
1689 [0b110] = {
1690 [0] = { -1, 0 }, /* SPECIAL */
1691 [1] = { -1, 0 }, /* SPECIAL */
1692 [2] = { -1, 0 },
1693 [3] = { -1, 0 }, /* SPECIAL */
1694 [4] = { -1, 0 },
1695 [5] = { -1, 0 },
1696 [6] = { -1, 0 },
1697 [7] = { -1, 0 }, /* SPECIAL */
1698 },
1699 [0b111] = {
1700 [0] = { -1, 0 }, /* SPECIAL */
1701 [1] = { -1, 0 }, /* SPECIAL */
1702 [2] = { -1, 0 },
1703 [3] = { -1, 0 }, /* SPECIAL */
1704 [4] = { -1, 0 },
1705 [5] = { -1, 0 },
1706 [6] = { -1, 0 },
1707 [7] = { -1, 0 }, /* SPECIAL */
1708 },
1709 },
1710 [true] = {
1711 /* Extended. */
1712 [0b000] = {
1713 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1714 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1715 [2] = { -1, 0 },
1716 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1717 [4] = { -1, 0 },
1718 [5] = { -1, 0 },
1719 [6] = { -1, 0 },
1720 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1721 },
1722 [0b001] = {
1723 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1724 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1725 [2] = { -1, 0 },
1726 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1727 [4] = { -1, 0 },
1728 [5] = { -1, 0 },
1729 [6] = { -1, 0 },
1730 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1731 },
1732 [0b010] = {
1733 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1734 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1735 [2] = { -1, 0 },
1736 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1737 [4] = { -1, 0 },
1738 [5] = { -1, 0 },
1739 [6] = { -1, 0 },
1740 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1741 },
1742 [0b011] = {
1743 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1744 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1745 [2] = { -1, 0 },
1746 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1747 [4] = { -1, 0 },
1748 [5] = { -1, 0 },
1749 [6] = { -1, 0 },
1750 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1751 },
1752 [0b100] = {
1753 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1754 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1755 [2] = { -1, 0 },
1756 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1757 [4] = { -1, 0 },
1758 [5] = { -1, 0 },
1759 [6] = { -1, 0 },
1760 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1761 },
1762 [0b101] = {
1763 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1764 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1765 [2] = { -1, 0 },
1766 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1767 [4] = { -1, 0 },
1768 [5] = { -1, 0 },
1769 [6] = { -1, 0 },
1770 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1771 },
1772 [0b110] = {
1773 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1774 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1775 [2] = { -1, 0 },
1776 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1777 [4] = { -1, 0 },
1778 [5] = { -1, 0 },
1779 [6] = { -1, 0 },
1780 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1781 },
1782 [0b111] = {
1783 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1784 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1785 [2] = { -1, 0 },
1786 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1787 [4] = { -1, 0 },
1788 [5] = { -1, 0 },
1789 [6] = { -1, 0 },
1790 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1791 },
1792 }
1793 };
1794
1795 static int
1796 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1797 {
1798 fsm->fn = NULL;
1799 return -1;
1800 }
1801
1802 static int
1803 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1804 {
1805 if (fsm->buf + n > fsm->end) {
1806 return -1;
1807 }
1808 memcpy(bytes, fsm->buf, n);
1809 return 0;
1810 }
1811
1812 static inline void
1813 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1814 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1815 {
1816 fsm->buf += n;
1817 if (fsm->buf > fsm->end) {
1818 fsm->fn = node_overflow;
1819 } else {
1820 fsm->fn = fn;
1821 }
1822 }
1823
1824 static const struct x86_reg *
1825 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1826 {
1827 enc &= 0b11;
1828 if (regsize == 8) {
1829 /* May be 64bit without REX */
1830 return &gpr_map__special[1][enc][regsize-1];
1831 }
1832 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1833 }
1834
1835 /*
1836 * Special node, for MOVS. Fake two displacements of zero on the source and
1837 * destination registers.
1838 */
1839 static int
1840 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1841 {
1842 size_t adrsize;
1843
1844 adrsize = instr->address_size;
1845
1846 /* DS:RSI */
1847 instr->src.type = STORE_REG;
1848 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1849 instr->src.disp.type = DISP_0;
1850
1851 /* ES:RDI, force ES */
1852 instr->dst.type = STORE_REG;
1853 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1854 instr->dst.disp.type = DISP_0;
1855 instr->dst.hardseg = NVMM_X64_SEG_ES;
1856
1857 fsm_advance(fsm, 0, NULL);
1858
1859 return 0;
1860 }
1861
1862 /*
1863 * Special node, for STOS and LODS. Fake a displacement of zero on the
1864 * destination register.
1865 */
1866 static int
1867 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1868 {
1869 const struct x86_opcode *opcode = instr->opcode;
1870 struct x86_store *stlo, *streg;
1871 size_t adrsize, regsize;
1872
1873 adrsize = instr->address_size;
1874 regsize = instr->operand_size;
1875
1876 if (opcode->stos) {
1877 streg = &instr->src;
1878 stlo = &instr->dst;
1879 } else {
1880 streg = &instr->dst;
1881 stlo = &instr->src;
1882 }
1883
1884 streg->type = STORE_REG;
1885 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1886
1887 stlo->type = STORE_REG;
1888 if (opcode->stos) {
1889 /* ES:RDI, force ES */
1890 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1891 stlo->hardseg = NVMM_X64_SEG_ES;
1892 } else {
1893 /* DS:RSI */
1894 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1895 }
1896 stlo->disp.type = DISP_0;
1897
1898 fsm_advance(fsm, 0, NULL);
1899
1900 return 0;
1901 }
1902
1903 static int
1904 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1905 {
1906 const struct x86_opcode *opcode = instr->opcode;
1907 struct x86_store *stdmo, *streg;
1908 size_t adrsize, regsize;
1909
1910 adrsize = instr->address_size;
1911 regsize = instr->operand_size;
1912
1913 if (opcode->todmo) {
1914 streg = &instr->src;
1915 stdmo = &instr->dst;
1916 } else {
1917 streg = &instr->dst;
1918 stdmo = &instr->src;
1919 }
1920
1921 streg->type = STORE_REG;
1922 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1923
1924 stdmo->type = STORE_DMO;
1925 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1926 return -1;
1927 }
1928 fsm_advance(fsm, adrsize, NULL);
1929
1930 return 0;
1931 }
1932
1933 static inline uint64_t
1934 sign_extend(uint64_t val, int size)
1935 {
1936 if (size == 1) {
1937 if (val & __BIT(7))
1938 val |= 0xFFFFFFFFFFFFFF00;
1939 } else if (size == 2) {
1940 if (val & __BIT(15))
1941 val |= 0xFFFFFFFFFFFF0000;
1942 } else if (size == 4) {
1943 if (val & __BIT(31))
1944 val |= 0xFFFFFFFF00000000;
1945 }
1946 return val;
1947 }
1948
1949 static int
1950 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1951 {
1952 const struct x86_opcode *opcode = instr->opcode;
1953 struct x86_store *store;
1954 uint8_t immsize;
1955 size_t sesize = 0;
1956
1957 /* The immediate is the source */
1958 store = &instr->src;
1959 immsize = instr->operand_size;
1960
1961 if (opcode->flags & FLAG_imm8) {
1962 sesize = immsize;
1963 immsize = 1;
1964 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1965 sesize = immsize;
1966 immsize = 4;
1967 }
1968
1969 store->type = STORE_IMM;
1970 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1971 return -1;
1972 }
1973 fsm_advance(fsm, immsize, NULL);
1974
1975 if (sesize != 0) {
1976 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1977 }
1978
1979 return 0;
1980 }
1981
1982 static int
1983 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1984 {
1985 const struct x86_opcode *opcode = instr->opcode;
1986 uint64_t data = 0;
1987 size_t n;
1988
1989 if (instr->strm->disp.type == DISP_1) {
1990 n = 1;
1991 } else { /* DISP4 */
1992 n = 4;
1993 }
1994
1995 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1996 return -1;
1997 }
1998
1999 if (__predict_true(fsm->is64bit)) {
2000 data = sign_extend(data, n);
2001 }
2002
2003 instr->strm->disp.data = data;
2004
2005 if (opcode->immediate) {
2006 fsm_advance(fsm, n, node_immediate);
2007 } else {
2008 fsm_advance(fsm, n, NULL);
2009 }
2010
2011 return 0;
2012 }
2013
2014 static const struct x86_reg *
2015 get_register_idx(struct x86_instr *instr, uint8_t index)
2016 {
2017 uint8_t enc = index;
2018 const struct x86_reg *reg;
2019 size_t regsize;
2020
2021 regsize = instr->address_size;
2022 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
2023
2024 if (reg->num == -1) {
2025 reg = resolve_special_register(instr, enc, regsize);
2026 }
2027
2028 return reg;
2029 }
2030
2031 static const struct x86_reg *
2032 get_register_bas(struct x86_instr *instr, uint8_t base)
2033 {
2034 uint8_t enc = base;
2035 const struct x86_reg *reg;
2036 size_t regsize;
2037
2038 regsize = instr->address_size;
2039 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2040 if (reg->num == -1) {
2041 reg = resolve_special_register(instr, enc, regsize);
2042 }
2043
2044 return reg;
2045 }
2046
2047 static int
2048 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2049 {
2050 const struct x86_opcode *opcode;
2051 uint8_t scale, index, base;
2052 bool noindex, nobase;
2053 uint8_t byte;
2054
2055 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2056 return -1;
2057 }
2058
2059 scale = ((byte & 0b11000000) >> 6);
2060 index = ((byte & 0b00111000) >> 3);
2061 base = ((byte & 0b00000111) >> 0);
2062
2063 opcode = instr->opcode;
2064
2065 noindex = false;
2066 nobase = false;
2067
2068 if (index == 0b100 && !instr->rexpref.x) {
2069 /* Special case: the index is null */
2070 noindex = true;
2071 }
2072
2073 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2074 /* Special case: the base is null + disp32 */
2075 instr->strm->disp.type = DISP_4;
2076 nobase = true;
2077 }
2078
2079 instr->strm->type = STORE_SIB;
2080 instr->strm->u.sib.scale = (1 << scale);
2081 if (!noindex)
2082 instr->strm->u.sib.idx = get_register_idx(instr, index);
2083 if (!nobase)
2084 instr->strm->u.sib.bas = get_register_bas(instr, base);
2085
2086 /* May have a displacement, or an immediate */
2087 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
2088 fsm_advance(fsm, 1, node_disp);
2089 } else if (opcode->immediate) {
2090 fsm_advance(fsm, 1, node_immediate);
2091 } else {
2092 fsm_advance(fsm, 1, NULL);
2093 }
2094
2095 return 0;
2096 }
2097
2098 static const struct x86_reg *
2099 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2100 {
2101 uint8_t enc = instr->regmodrm.reg;
2102 const struct x86_reg *reg;
2103 size_t regsize;
2104
2105 regsize = instr->operand_size;
2106
2107 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2108 if (reg->num == -1) {
2109 reg = resolve_special_register(instr, enc, regsize);
2110 }
2111
2112 return reg;
2113 }
2114
2115 static const struct x86_reg *
2116 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2117 {
2118 uint8_t enc = instr->regmodrm.rm;
2119 const struct x86_reg *reg;
2120 size_t regsize;
2121
2122 if (instr->strm->disp.type == DISP_NONE) {
2123 regsize = instr->operand_size;
2124 } else {
2125 /* Indirect access, the size is that of the address. */
2126 regsize = instr->address_size;
2127 }
2128
2129 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2130 if (reg->num == -1) {
2131 reg = resolve_special_register(instr, enc, regsize);
2132 }
2133
2134 return reg;
2135 }
2136
2137 static inline bool
2138 has_sib(struct x86_instr *instr)
2139 {
2140 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
2141 }
2142
2143 static inline bool
2144 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2145 {
2146 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2147 instr->regmodrm.rm == RM_RBP_DISP32);
2148 }
2149
2150 static inline bool
2151 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2152 {
2153 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2154 instr->regmodrm.rm == RM_RBP_DISP32);
2155 }
2156
2157 static enum x86_disp_type
2158 get_disp_type(struct x86_instr *instr)
2159 {
2160 switch (instr->regmodrm.mod) {
2161 case MOD_DIS0: /* indirect */
2162 return DISP_0;
2163 case MOD_DIS1: /* indirect+1 */
2164 return DISP_1;
2165 case MOD_DIS4: /* indirect+4 */
2166 return DISP_4;
2167 case MOD_REG: /* direct */
2168 default: /* gcc */
2169 return DISP_NONE;
2170 }
2171 }
2172
2173 static int
2174 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2175 {
2176 struct x86_store *strg, *strm;
2177 const struct x86_opcode *opcode;
2178 const struct x86_reg *reg;
2179 uint8_t byte;
2180
2181 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2182 return -1;
2183 }
2184
2185 opcode = instr->opcode;
2186
2187 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2188 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2189 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2190
2191 if (opcode->regtorm) {
2192 strg = &instr->src;
2193 strm = &instr->dst;
2194 } else { /* RM to REG */
2195 strm = &instr->src;
2196 strg = &instr->dst;
2197 }
2198
2199 /* Save for later use. */
2200 instr->strm = strm;
2201
2202 /*
2203 * Special cases: Groups. The REG field of REGMODRM is the index in
2204 * the group. op1 gets overwritten in the Immediate node, if any.
2205 */
2206 if (opcode->group1) {
2207 if (group1[instr->regmodrm.reg].emul == NULL) {
2208 return -1;
2209 }
2210 instr->emul = group1[instr->regmodrm.reg].emul;
2211 } else if (opcode->group3) {
2212 if (group3[instr->regmodrm.reg].emul == NULL) {
2213 return -1;
2214 }
2215 instr->emul = group3[instr->regmodrm.reg].emul;
2216 } else if (opcode->group11) {
2217 if (group11[instr->regmodrm.reg].emul == NULL) {
2218 return -1;
2219 }
2220 instr->emul = group11[instr->regmodrm.reg].emul;
2221 }
2222
2223 if (!opcode->immediate) {
2224 reg = get_register_reg(instr, opcode);
2225 if (reg == NULL) {
2226 return -1;
2227 }
2228 strg->type = STORE_REG;
2229 strg->u.reg = reg;
2230 }
2231
2232 /* The displacement applies to RM. */
2233 strm->disp.type = get_disp_type(instr);
2234
2235 if (has_sib(instr)) {
2236 /* Overwrites RM */
2237 fsm_advance(fsm, 1, node_sib);
2238 return 0;
2239 }
2240
2241 if (is_rip_relative(fsm, instr)) {
2242 /* Overwrites RM */
2243 strm->type = STORE_REG;
2244 strm->u.reg = &gpr_map__rip;
2245 strm->disp.type = DISP_4;
2246 fsm_advance(fsm, 1, node_disp);
2247 return 0;
2248 }
2249
2250 if (is_disp32_only(fsm, instr)) {
2251 /* Overwrites RM */
2252 strm->type = STORE_REG;
2253 strm->u.reg = NULL;
2254 strm->disp.type = DISP_4;
2255 fsm_advance(fsm, 1, node_disp);
2256 return 0;
2257 }
2258
2259 reg = get_register_rm(instr, opcode);
2260 if (reg == NULL) {
2261 return -1;
2262 }
2263 strm->type = STORE_REG;
2264 strm->u.reg = reg;
2265
2266 if (strm->disp.type == DISP_NONE) {
2267 /* Direct register addressing mode */
2268 if (opcode->immediate) {
2269 fsm_advance(fsm, 1, node_immediate);
2270 } else {
2271 fsm_advance(fsm, 1, NULL);
2272 }
2273 } else if (strm->disp.type == DISP_0) {
2274 /* Indirect register addressing mode */
2275 if (opcode->immediate) {
2276 fsm_advance(fsm, 1, node_immediate);
2277 } else {
2278 fsm_advance(fsm, 1, NULL);
2279 }
2280 } else {
2281 fsm_advance(fsm, 1, node_disp);
2282 }
2283
2284 return 0;
2285 }
2286
2287 static size_t
2288 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2289 {
2290 const struct x86_opcode *opcode = instr->opcode;
2291 int opsize;
2292
2293 /* Get the opsize */
2294 if (!opcode->szoverride) {
2295 opsize = opcode->defsize;
2296 } else if (instr->rexpref.present && instr->rexpref.w) {
2297 opsize = 8;
2298 } else {
2299 if (!fsm->is16bit) {
2300 if (instr->legpref.opr_ovr) {
2301 opsize = 2;
2302 } else {
2303 opsize = 4;
2304 }
2305 } else { /* 16bit */
2306 if (instr->legpref.opr_ovr) {
2307 opsize = 4;
2308 } else {
2309 opsize = 2;
2310 }
2311 }
2312 }
2313
2314 return opsize;
2315 }
2316
2317 static size_t
2318 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2319 {
2320 if (fsm->is64bit) {
2321 if (__predict_false(instr->legpref.adr_ovr)) {
2322 return 4;
2323 }
2324 return 8;
2325 }
2326
2327 if (fsm->is32bit) {
2328 if (__predict_false(instr->legpref.adr_ovr)) {
2329 return 2;
2330 }
2331 return 4;
2332 }
2333
2334 /* 16bit. */
2335 if (__predict_false(instr->legpref.adr_ovr)) {
2336 return 4;
2337 }
2338 return 2;
2339 }
2340
2341 static int
2342 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2343 {
2344 const struct x86_opcode *opcode;
2345 uint8_t byte;
2346
2347 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2348 return -1;
2349 }
2350
2351 opcode = &primary_opcode_table[byte];
2352 if (__predict_false(!opcode->valid)) {
2353 return -1;
2354 }
2355
2356 instr->opcode = opcode;
2357 instr->emul = opcode->emul;
2358 instr->operand_size = get_operand_size(fsm, instr);
2359 instr->address_size = get_address_size(fsm, instr);
2360
2361 if (fsm->is64bit && (instr->operand_size == 4)) {
2362 /* Zero-extend to 64 bits. */
2363 instr->zeroextend_mask = ~size_to_mask(4);
2364 }
2365
2366 if (opcode->regmodrm) {
2367 fsm_advance(fsm, 1, node_regmodrm);
2368 } else if (opcode->dmo) {
2369 /* Direct-Memory Offsets */
2370 fsm_advance(fsm, 1, node_dmo);
2371 } else if (opcode->stos || opcode->lods) {
2372 fsm_advance(fsm, 1, node_stlo);
2373 } else if (opcode->movs) {
2374 fsm_advance(fsm, 1, node_movs);
2375 } else {
2376 return -1;
2377 }
2378
2379 return 0;
2380 }
2381
2382 static int
2383 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2384 {
2385 const struct x86_opcode *opcode;
2386 uint8_t byte;
2387
2388 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2389 return -1;
2390 }
2391
2392 opcode = &secondary_opcode_table[byte];
2393 if (__predict_false(!opcode->valid)) {
2394 return -1;
2395 }
2396
2397 instr->opcode = opcode;
2398 instr->emul = opcode->emul;
2399 instr->operand_size = get_operand_size(fsm, instr);
2400 instr->address_size = get_address_size(fsm, instr);
2401
2402 if (fsm->is64bit && (instr->operand_size == 4)) {
2403 /* Zero-extend to 64 bits. */
2404 instr->zeroextend_mask = ~size_to_mask(4);
2405 }
2406
2407 if (opcode->flags & FLAG_ze) {
2408 /*
2409 * Compute the mask for zero-extend. Update the operand size,
2410 * we move fewer bytes.
2411 */
2412 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2413 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2414 instr->operand_size = opcode->defsize;
2415 }
2416
2417 if (opcode->regmodrm) {
2418 fsm_advance(fsm, 1, node_regmodrm);
2419 } else {
2420 return -1;
2421 }
2422
2423 return 0;
2424 }
2425
2426 static int
2427 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2428 {
2429 uint8_t byte;
2430
2431 #define ESCAPE 0x0F
2432 #define VEX_1 0xC5
2433 #define VEX_2 0xC4
2434 #define XOP 0x8F
2435
2436 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2437 return -1;
2438 }
2439
2440 /*
2441 * We don't take XOP. It is AMD-specific, and it was removed shortly
2442 * after being introduced.
2443 */
2444 if (byte == ESCAPE) {
2445 fsm_advance(fsm, 1, node_secondary_opcode);
2446 } else if (!instr->rexpref.present) {
2447 if (byte == VEX_1) {
2448 return -1;
2449 } else if (byte == VEX_2) {
2450 return -1;
2451 } else {
2452 fsm->fn = node_primary_opcode;
2453 }
2454 } else {
2455 fsm->fn = node_primary_opcode;
2456 }
2457
2458 return 0;
2459 }
2460
2461 static int
2462 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2463 {
2464 struct x86_rexpref *rexpref = &instr->rexpref;
2465 uint8_t byte;
2466 size_t n = 0;
2467
2468 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2469 return -1;
2470 }
2471
2472 if (byte >= 0x40 && byte <= 0x4F) {
2473 if (__predict_false(!fsm->is64bit)) {
2474 return -1;
2475 }
2476 rexpref->b = ((byte & 0x1) != 0);
2477 rexpref->x = ((byte & 0x2) != 0);
2478 rexpref->r = ((byte & 0x4) != 0);
2479 rexpref->w = ((byte & 0x8) != 0);
2480 rexpref->present = true;
2481 n = 1;
2482 }
2483
2484 fsm_advance(fsm, n, node_main);
2485 return 0;
2486 }
2487
2488 static int
2489 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2490 {
2491 uint8_t byte;
2492
2493 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2494 return -1;
2495 }
2496
2497 if (byte == LEG_OPR_OVR) {
2498 instr->legpref.opr_ovr = 1;
2499 } else if (byte == LEG_OVR_DS) {
2500 instr->legpref.seg = NVMM_X64_SEG_DS;
2501 } else if (byte == LEG_OVR_ES) {
2502 instr->legpref.seg = NVMM_X64_SEG_ES;
2503 } else if (byte == LEG_REP) {
2504 instr->legpref.rep = 1;
2505 } else if (byte == LEG_OVR_GS) {
2506 instr->legpref.seg = NVMM_X64_SEG_GS;
2507 } else if (byte == LEG_OVR_FS) {
2508 instr->legpref.seg = NVMM_X64_SEG_FS;
2509 } else if (byte == LEG_ADR_OVR) {
2510 instr->legpref.adr_ovr = 1;
2511 } else if (byte == LEG_OVR_CS) {
2512 instr->legpref.seg = NVMM_X64_SEG_CS;
2513 } else if (byte == LEG_OVR_SS) {
2514 instr->legpref.seg = NVMM_X64_SEG_SS;
2515 } else if (byte == LEG_REPN) {
2516 instr->legpref.repn = 1;
2517 } else if (byte == LEG_LOCK) {
2518 /* ignore */
2519 } else {
2520 /* not a legacy prefix */
2521 fsm_advance(fsm, 0, node_rex_prefix);
2522 return 0;
2523 }
2524
2525 fsm_advance(fsm, 1, node_legacy_prefix);
2526 return 0;
2527 }
2528
2529 static int
2530 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2531 struct nvmm_x64_state *state)
2532 {
2533 struct x86_decode_fsm fsm;
2534 int ret;
2535
2536 memset(instr, 0, sizeof(*instr));
2537 instr->legpref.seg = -1;
2538 instr->src.hardseg = -1;
2539 instr->dst.hardseg = -1;
2540
2541 fsm.is64bit = is_64bit(state);
2542 fsm.is32bit = is_32bit(state);
2543 fsm.is16bit = is_16bit(state);
2544
2545 fsm.fn = node_legacy_prefix;
2546 fsm.buf = inst_bytes;
2547 fsm.end = inst_bytes + inst_len;
2548
2549 while (fsm.fn != NULL) {
2550 ret = (*fsm.fn)(&fsm, instr);
2551 if (ret == -1)
2552 return -1;
2553 }
2554
2555 instr->len = fsm.buf - inst_bytes;
2556
2557 return 0;
2558 }
2559
2560 /* -------------------------------------------------------------------------- */
2561
2562 #define EXEC_INSTR(sz, instr) \
2563 static uint##sz##_t \
2564 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
2565 { \
2566 uint##sz##_t res; \
2567 __asm __volatile ( \
2568 #instr " %2, %3;" \
2569 "mov %3, %1;" \
2570 "pushfq;" \
2571 "popq %0" \
2572 : "=r" (*rflags), "=r" (res) \
2573 : "r" (op1), "r" (op2)); \
2574 return res; \
2575 }
2576
2577 #define EXEC_DISPATCHER(instr) \
2578 static uint64_t \
2579 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2580 { \
2581 switch (opsize) { \
2582 case 1: \
2583 return exec_##instr##8(op1, op2, rflags); \
2584 case 2: \
2585 return exec_##instr##16(op1, op2, rflags); \
2586 case 4: \
2587 return exec_##instr##32(op1, op2, rflags); \
2588 default: \
2589 return exec_##instr##64(op1, op2, rflags); \
2590 } \
2591 }
2592
2593 /* SUB: ret = op1 - op2 */
2594 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2595 EXEC_INSTR(8, sub)
2596 EXEC_INSTR(16, sub)
2597 EXEC_INSTR(32, sub)
2598 EXEC_INSTR(64, sub)
2599 EXEC_DISPATCHER(sub)
2600
2601 /* OR: ret = op1 | op2 */
2602 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2603 EXEC_INSTR(8, or)
2604 EXEC_INSTR(16, or)
2605 EXEC_INSTR(32, or)
2606 EXEC_INSTR(64, or)
2607 EXEC_DISPATCHER(or)
2608
2609 /* AND: ret = op1 & op2 */
2610 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2611 EXEC_INSTR(8, and)
2612 EXEC_INSTR(16, and)
2613 EXEC_INSTR(32, and)
2614 EXEC_INSTR(64, and)
2615 EXEC_DISPATCHER(and)
2616
2617 /* XOR: ret = op1 ^ op2 */
2618 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2619 EXEC_INSTR(8, xor)
2620 EXEC_INSTR(16, xor)
2621 EXEC_INSTR(32, xor)
2622 EXEC_INSTR(64, xor)
2623 EXEC_DISPATCHER(xor)
2624
2625 /* -------------------------------------------------------------------------- */
2626
2627 /*
2628 * Emulation functions. We don't care about the order of the operands, except
2629 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
2630 * is op1 and who is op2.
2631 */
2632
2633 static void
2634 x86_func_or(struct nvmm_mem *mem, uint64_t *gprs)
2635 {
2636 uint64_t *retval = (uint64_t *)mem->data;
2637 const bool write = mem->write;
2638 uint64_t *op1, op2, fl, ret;
2639
2640 op1 = (uint64_t *)mem->data;
2641 op2 = 0;
2642
2643 /* Fetch the value to be OR'ed (op2). */
2644 mem->data = (uint8_t *)&op2;
2645 mem->write = false;
2646 (*__callbacks.mem)(mem);
2647
2648 /* Perform the OR. */
2649 ret = exec_or(*op1, op2, &fl, mem->size);
2650
2651 if (write) {
2652 /* Write back the result. */
2653 mem->data = (uint8_t *)&ret;
2654 mem->write = true;
2655 (*__callbacks.mem)(mem);
2656 } else {
2657 /* Return data to the caller. */
2658 *retval = ret;
2659 }
2660
2661 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2662 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2663 }
2664
2665 static void
2666 x86_func_and(struct nvmm_mem *mem, uint64_t *gprs)
2667 {
2668 uint64_t *retval = (uint64_t *)mem->data;
2669 const bool write = mem->write;
2670 uint64_t *op1, op2, fl, ret;
2671
2672 op1 = (uint64_t *)mem->data;
2673 op2 = 0;
2674
2675 /* Fetch the value to be AND'ed (op2). */
2676 mem->data = (uint8_t *)&op2;
2677 mem->write = false;
2678 (*__callbacks.mem)(mem);
2679
2680 /* Perform the AND. */
2681 ret = exec_and(*op1, op2, &fl, mem->size);
2682
2683 if (write) {
2684 /* Write back the result. */
2685 mem->data = (uint8_t *)&ret;
2686 mem->write = true;
2687 (*__callbacks.mem)(mem);
2688 } else {
2689 /* Return data to the caller. */
2690 *retval = ret;
2691 }
2692
2693 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2694 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2695 }
2696
2697 static void
2698 x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs)
2699 {
2700 uint64_t *retval = (uint64_t *)mem->data;
2701 const bool write = mem->write;
2702 uint64_t *op1, *op2, fl, ret;
2703 uint64_t tmp;
2704 bool memop1;
2705
2706 memop1 = !mem->write;
2707 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2708 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2709
2710 /* Fetch the value to be SUB'ed (op1 or op2). */
2711 mem->data = (uint8_t *)&tmp;
2712 mem->write = false;
2713 (*__callbacks.mem)(mem);
2714
2715 /* Perform the SUB. */
2716 ret = exec_sub(*op1, *op2, &fl, mem->size);
2717
2718 if (write) {
2719 /* Write back the result. */
2720 mem->data = (uint8_t *)&ret;
2721 mem->write = true;
2722 (*__callbacks.mem)(mem);
2723 } else {
2724 /* Return data to the caller. */
2725 *retval = ret;
2726 }
2727
2728 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2729 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2730 }
2731
2732 static void
2733 x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs)
2734 {
2735 uint64_t *retval = (uint64_t *)mem->data;
2736 const bool write = mem->write;
2737 uint64_t *op1, op2, fl, ret;
2738
2739 op1 = (uint64_t *)mem->data;
2740 op2 = 0;
2741
2742 /* Fetch the value to be XOR'ed (op2). */
2743 mem->data = (uint8_t *)&op2;
2744 mem->write = false;
2745 (*__callbacks.mem)(mem);
2746
2747 /* Perform the XOR. */
2748 ret = exec_xor(*op1, op2, &fl, mem->size);
2749
2750 if (write) {
2751 /* Write back the result. */
2752 mem->data = (uint8_t *)&ret;
2753 mem->write = true;
2754 (*__callbacks.mem)(mem);
2755 } else {
2756 /* Return data to the caller. */
2757 *retval = ret;
2758 }
2759
2760 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2761 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2762 }
2763
2764 static void
2765 x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs)
2766 {
2767 uint64_t *op1, *op2, fl;
2768 uint64_t tmp;
2769 bool memop1;
2770
2771 memop1 = !mem->write;
2772 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2773 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2774
2775 /* Fetch the value to be CMP'ed (op1 or op2). */
2776 mem->data = (uint8_t *)&tmp;
2777 mem->write = false;
2778 (*__callbacks.mem)(mem);
2779
2780 /* Perform the CMP. */
2781 exec_sub(*op1, *op2, &fl, mem->size);
2782
2783 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2784 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2785 }
2786
2787 static void
2788 x86_func_test(struct nvmm_mem *mem, uint64_t *gprs)
2789 {
2790 uint64_t *op1, *op2, fl;
2791 uint64_t tmp;
2792 bool memop1;
2793
2794 memop1 = !mem->write;
2795 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2796 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2797
2798 /* Fetch the value to be TEST'ed (op1 or op2). */
2799 mem->data = (uint8_t *)&tmp;
2800 mem->write = false;
2801 (*__callbacks.mem)(mem);
2802
2803 /* Perform the TEST. */
2804 exec_and(*op1, *op2, &fl, mem->size);
2805
2806 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2807 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2808 }
2809
2810 static void
2811 x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs)
2812 {
2813 /*
2814 * Nothing special, just move without emulation.
2815 */
2816 (*__callbacks.mem)(mem);
2817 }
2818
2819 static void
2820 x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs)
2821 {
2822 /*
2823 * Just move, and update RDI.
2824 */
2825 (*__callbacks.mem)(mem);
2826
2827 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2828 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2829 } else {
2830 gprs[NVMM_X64_GPR_RDI] += mem->size;
2831 }
2832 }
2833
2834 static void
2835 x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs)
2836 {
2837 /*
2838 * Just move, and update RSI.
2839 */
2840 (*__callbacks.mem)(mem);
2841
2842 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2843 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2844 } else {
2845 gprs[NVMM_X64_GPR_RSI] += mem->size;
2846 }
2847 }
2848
2849 static void
2850 x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs)
2851 {
2852 /*
2853 * Special instruction: double memory operand. Don't call the cb,
2854 * because the storage has already been performed earlier.
2855 */
2856
2857 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2858 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2859 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2860 } else {
2861 gprs[NVMM_X64_GPR_RSI] += mem->size;
2862 gprs[NVMM_X64_GPR_RDI] += mem->size;
2863 }
2864 }
2865
2866 /* -------------------------------------------------------------------------- */
2867
2868 static inline uint64_t
2869 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2870 {
2871 uint64_t val;
2872
2873 val = state->gprs[gpr];
2874 val &= size_to_mask(instr->address_size);
2875
2876 return val;
2877 }
2878
2879 static int
2880 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2881 struct x86_store *store, gvaddr_t *gvap, size_t size)
2882 {
2883 struct x86_sib *sib;
2884 gvaddr_t gva = 0;
2885 uint64_t reg;
2886 int ret, seg;
2887
2888 if (store->type == STORE_SIB) {
2889 sib = &store->u.sib;
2890 if (sib->bas != NULL)
2891 gva += gpr_read_address(instr, state, sib->bas->num);
2892 if (sib->idx != NULL) {
2893 reg = gpr_read_address(instr, state, sib->idx->num);
2894 gva += sib->scale * reg;
2895 }
2896 } else if (store->type == STORE_REG) {
2897 if (store->u.reg == NULL) {
2898 /* The base is null. Happens with disp32-only. */
2899 } else {
2900 gva = gpr_read_address(instr, state, store->u.reg->num);
2901 }
2902 } else {
2903 gva = store->u.dmo;
2904 }
2905
2906 if (store->disp.type != DISP_NONE) {
2907 gva += store->disp.data;
2908 }
2909
2910 if (store->hardseg != -1) {
2911 seg = store->hardseg;
2912 } else {
2913 if (__predict_false(instr->legpref.seg != -1)) {
2914 seg = instr->legpref.seg;
2915 } else {
2916 seg = NVMM_X64_SEG_DS;
2917 }
2918 }
2919
2920 if (__predict_true(is_long_mode(state))) {
2921 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2922 segment_apply(&state->segs[seg], &gva);
2923 }
2924 } else {
2925 ret = segment_check(&state->segs[seg], gva, size);
2926 if (ret == -1)
2927 return -1;
2928 segment_apply(&state->segs[seg], &gva);
2929 }
2930
2931 *gvap = gva;
2932 return 0;
2933 }
2934
2935 static int
2936 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2937 {
2938 uint8_t inst_bytes[5], byte;
2939 size_t i, fetchsize;
2940 gvaddr_t gva;
2941 int ret, seg;
2942
2943 fetchsize = sizeof(inst_bytes);
2944
2945 gva = state->gprs[NVMM_X64_GPR_RIP];
2946 if (__predict_false(!is_long_mode(state))) {
2947 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2948 fetchsize);
2949 if (ret == -1)
2950 return -1;
2951 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2952 }
2953
2954 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2955 if (ret == -1)
2956 return -1;
2957
2958 seg = NVMM_X64_SEG_DS;
2959 for (i = 0; i < fetchsize; i++) {
2960 byte = inst_bytes[i];
2961
2962 if (byte == LEG_OVR_DS) {
2963 seg = NVMM_X64_SEG_DS;
2964 } else if (byte == LEG_OVR_ES) {
2965 seg = NVMM_X64_SEG_ES;
2966 } else if (byte == LEG_OVR_GS) {
2967 seg = NVMM_X64_SEG_GS;
2968 } else if (byte == LEG_OVR_FS) {
2969 seg = NVMM_X64_SEG_FS;
2970 } else if (byte == LEG_OVR_CS) {
2971 seg = NVMM_X64_SEG_CS;
2972 } else if (byte == LEG_OVR_SS) {
2973 seg = NVMM_X64_SEG_SS;
2974 } else if (byte == LEG_OPR_OVR) {
2975 /* nothing */
2976 } else if (byte == LEG_ADR_OVR) {
2977 /* nothing */
2978 } else if (byte == LEG_REP) {
2979 /* nothing */
2980 } else if (byte == LEG_REPN) {
2981 /* nothing */
2982 } else if (byte == LEG_LOCK) {
2983 /* nothing */
2984 } else {
2985 return seg;
2986 }
2987 }
2988
2989 return seg;
2990 }
2991
2992 static int
2993 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2994 struct nvmm_exit *exit)
2995 {
2996 size_t fetchsize;
2997 gvaddr_t gva;
2998 int ret;
2999
3000 fetchsize = sizeof(exit->u.mem.inst_bytes);
3001
3002 gva = state->gprs[NVMM_X64_GPR_RIP];
3003 if (__predict_false(!is_long_mode(state))) {
3004 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3005 fetchsize);
3006 if (ret == -1)
3007 return -1;
3008 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3009 }
3010
3011 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
3012 fetchsize);
3013 if (ret == -1)
3014 return -1;
3015
3016 exit->u.mem.inst_len = fetchsize;
3017
3018 return 0;
3019 }
3020
3021 static int
3022 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3023 struct x86_instr *instr)
3024 {
3025 struct nvmm_mem mem;
3026 uint8_t data[8];
3027 gvaddr_t gva;
3028 size_t size;
3029 int ret;
3030
3031 size = instr->operand_size;
3032
3033 /* Source. */
3034 ret = store_to_gva(state, instr, &instr->src, &gva, size);
3035 if (ret == -1)
3036 return -1;
3037 ret = read_guest_memory(mach, state, gva, data, size);
3038 if (ret == -1)
3039 return -1;
3040
3041 /* Destination. */
3042 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3043 if (ret == -1)
3044 return -1;
3045 ret = write_guest_memory(mach, state, gva, data, size);
3046 if (ret == -1)
3047 return -1;
3048
3049 mem.size = size;
3050 (*instr->emul->func)(&mem, state->gprs);
3051
3052 return 0;
3053 }
3054
3055 #define DISASSEMBLER_BUG() \
3056 do { \
3057 errno = EINVAL; \
3058 return -1; \
3059 } while (0);
3060
3061 static int
3062 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3063 struct x86_instr *instr, struct nvmm_exit *exit)
3064 {
3065 struct nvmm_mem mem;
3066 uint8_t membuf[8];
3067 uint64_t val;
3068
3069 memset(membuf, 0, sizeof(membuf));
3070
3071 mem.gpa = exit->u.mem.gpa;
3072 mem.size = instr->operand_size;
3073 mem.data = membuf;
3074
3075 /* Determine the direction. */
3076 switch (instr->src.type) {
3077 case STORE_REG:
3078 if (instr->src.disp.type != DISP_NONE) {
3079 /* Indirect access. */
3080 mem.write = false;
3081 } else {
3082 /* Direct access. */
3083 mem.write = true;
3084 }
3085 break;
3086 case STORE_IMM:
3087 mem.write = true;
3088 break;
3089 case STORE_SIB:
3090 mem.write = false;
3091 break;
3092 case STORE_DMO:
3093 mem.write = false;
3094 break;
3095 default:
3096 DISASSEMBLER_BUG();
3097 }
3098
3099 if (mem.write) {
3100 switch (instr->src.type) {
3101 case STORE_REG:
3102 if (instr->src.disp.type != DISP_NONE) {
3103 DISASSEMBLER_BUG();
3104 }
3105 val = state->gprs[instr->src.u.reg->num];
3106 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3107 memcpy(mem.data, &val, mem.size);
3108 break;
3109 case STORE_IMM:
3110 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3111 break;
3112 default:
3113 DISASSEMBLER_BUG();
3114 }
3115 } else if (instr->emul->read) {
3116 if (instr->dst.type != STORE_REG) {
3117 DISASSEMBLER_BUG();
3118 }
3119 if (instr->dst.disp.type != DISP_NONE) {
3120 DISASSEMBLER_BUG();
3121 }
3122 val = state->gprs[instr->dst.u.reg->num];
3123 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3124 memcpy(mem.data, &val, mem.size);
3125 }
3126
3127 (*instr->emul->func)(&mem, state->gprs);
3128
3129 if (!instr->emul->notouch && !mem.write) {
3130 if (instr->dst.type != STORE_REG) {
3131 DISASSEMBLER_BUG();
3132 }
3133 memcpy(&val, membuf, sizeof(uint64_t));
3134 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3135 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3136 state->gprs[instr->dst.u.reg->num] |= val;
3137 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3138 }
3139
3140 return 0;
3141 }
3142
3143 int
3144 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
3145 struct nvmm_exit *exit)
3146 {
3147 struct nvmm_x64_state state;
3148 struct x86_instr instr;
3149 uint64_t cnt = 0; /* GCC */
3150 int ret;
3151
3152 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3153 errno = EINVAL;
3154 return -1;
3155 }
3156
3157 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
3158 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3159 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3160 if (ret == -1)
3161 return -1;
3162
3163 if (exit->u.mem.inst_len == 0) {
3164 /*
3165 * The instruction was not fetched from the kernel. Fetch
3166 * it ourselves.
3167 */
3168 ret = fetch_instruction(mach, &state, exit);
3169 if (ret == -1)
3170 return -1;
3171 }
3172
3173 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3174 &instr, &state);
3175 if (ret == -1) {
3176 errno = ENODEV;
3177 return -1;
3178 }
3179
3180 if (instr.legpref.rep || instr.legpref.repn) {
3181 cnt = rep_get_cnt(&state, instr.address_size);
3182 if (__predict_false(cnt == 0)) {
3183 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3184 goto out;
3185 }
3186 }
3187
3188 if (instr.opcode->movs) {
3189 ret = assist_mem_double(mach, &state, &instr);
3190 } else {
3191 ret = assist_mem_single(mach, &state, &instr, exit);
3192 }
3193 if (ret == -1) {
3194 errno = ENODEV;
3195 return -1;
3196 }
3197
3198 if (instr.legpref.rep || instr.legpref.repn) {
3199 cnt -= 1;
3200 rep_set_cnt(&state, instr.address_size, cnt);
3201 if (cnt == 0) {
3202 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3203 } else if (__predict_false(instr.legpref.repn)) {
3204 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3205 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3206 }
3207 }
3208 } else {
3209 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3210 }
3211
3212 out:
3213 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
3214 if (ret == -1)
3215 return -1;
3216
3217 return 0;
3218 }
3219