libnvmm_x86.c revision 1.7 1 /* $NetBSD: libnvmm_x86.c,v 1.7 2018/12/29 17:54:54 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #include <x86/specialreg.h>
49
50 extern struct nvmm_callbacks __callbacks;
51
52 /* -------------------------------------------------------------------------- */
53
54 /*
55 * Undocumented debugging function. Helpful.
56 */
57 int
58 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
59 {
60 struct nvmm_x64_state state;
61 size_t i;
62 int ret;
63
64 const char *segnames[] = {
65 "CS", "DS", "ES", "FS", "GS", "SS", "GDT", "IDT", "LDT", "TR"
66 };
67
68 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
69 if (ret == -1)
70 return -1;
71
72 printf("+ VCPU id=%d\n", (int)cpuid);
73 printf("| -> RIP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RIP]);
74 printf("| -> RSP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RSP]);
75 printf("| -> RAX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RAX]);
76 printf("| -> RBX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RBX]);
77 printf("| -> RCX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RCX]);
78 for (i = 0; i < NVMM_X64_NSEG; i++) {
79 printf("| -> %s: sel=0x%lx base=%p, limit=%p, P=%d\n",
80 segnames[i],
81 state.segs[i].selector,
82 (void *)state.segs[i].base,
83 (void *)state.segs[i].limit,
84 state.segs[i].attrib.p);
85 }
86
87 return 0;
88 }
89
90 /* -------------------------------------------------------------------------- */
91
92 #define PTE32_L1_SHIFT 12
93 #define PTE32_L2_SHIFT 22
94
95 #define PTE32_L2_MASK 0xffc00000
96 #define PTE32_L1_MASK 0x003ff000
97
98 #define PTE32_L2_FRAME (PTE32_L2_MASK)
99 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
100
101 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
102 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
103
104 typedef uint32_t pte_32bit_t;
105
106 static int
107 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
108 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
109 {
110 gpaddr_t L2gpa, L1gpa;
111 uintptr_t L2hva, L1hva;
112 pte_32bit_t *pdir, pte;
113
114 /* We begin with an RWXU access. */
115 *prot = NVMM_PROT_ALL;
116
117 /* Parse L2. */
118 L2gpa = (cr3 & PG_FRAME);
119 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
120 return -1;
121 pdir = (pte_32bit_t *)L2hva;
122 pte = pdir[pte32_l2idx(gva)];
123 if ((pte & PG_V) == 0)
124 return -1;
125 if ((pte & PG_u) == 0)
126 *prot &= ~NVMM_PROT_USER;
127 if ((pte & PG_KW) == 0)
128 *prot &= ~NVMM_PROT_WRITE;
129 if ((pte & PG_PS) && !has_pse)
130 return -1;
131 if (pte & PG_PS) {
132 *gpa = (pte & PTE32_L2_FRAME);
133 return 0;
134 }
135
136 /* Parse L1. */
137 L1gpa = (pte & PG_FRAME);
138 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
139 return -1;
140 pdir = (pte_32bit_t *)L1hva;
141 pte = pdir[pte32_l1idx(gva)];
142 if ((pte & PG_V) == 0)
143 return -1;
144 if ((pte & PG_u) == 0)
145 *prot &= ~NVMM_PROT_USER;
146 if ((pte & PG_KW) == 0)
147 *prot &= ~NVMM_PROT_WRITE;
148 if (pte & PG_PS)
149 return -1;
150
151 *gpa = (pte & PG_FRAME);
152 return 0;
153 }
154
155 /* -------------------------------------------------------------------------- */
156
157 #define PTE32_PAE_L1_SHIFT 12
158 #define PTE32_PAE_L2_SHIFT 21
159 #define PTE32_PAE_L3_SHIFT 30
160
161 #define PTE32_PAE_L3_MASK 0xc0000000
162 #define PTE32_PAE_L2_MASK 0x3fe00000
163 #define PTE32_PAE_L1_MASK 0x001ff000
164
165 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
166 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
167 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
168
169 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
170 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
171 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
172
173 typedef uint64_t pte_32bit_pae_t;
174
175 static int
176 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
177 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
178 {
179 gpaddr_t L3gpa, L2gpa, L1gpa;
180 uintptr_t L3hva, L2hva, L1hva;
181 pte_32bit_pae_t *pdir, pte;
182
183 /* We begin with an RWXU access. */
184 *prot = NVMM_PROT_ALL;
185
186 /* Parse L3. */
187 L3gpa = (cr3 & PG_FRAME);
188 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
189 return -1;
190 pdir = (pte_32bit_pae_t *)L3hva;
191 pte = pdir[pte32_pae_l3idx(gva)];
192 if ((pte & PG_V) == 0)
193 return -1;
194 if (pte & PG_NX)
195 *prot &= ~NVMM_PROT_EXEC;
196 if (pte & PG_PS)
197 return -1;
198
199 /* Parse L2. */
200 L2gpa = (pte & PG_FRAME);
201 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
202 return -1;
203 pdir = (pte_32bit_pae_t *)L2hva;
204 pte = pdir[pte32_pae_l2idx(gva)];
205 if ((pte & PG_V) == 0)
206 return -1;
207 if ((pte & PG_u) == 0)
208 *prot &= ~NVMM_PROT_USER;
209 if ((pte & PG_KW) == 0)
210 *prot &= ~NVMM_PROT_WRITE;
211 if (pte & PG_NX)
212 *prot &= ~NVMM_PROT_EXEC;
213 if ((pte & PG_PS) && !has_pse)
214 return -1;
215 if (pte & PG_PS) {
216 *gpa = (pte & PTE32_PAE_L2_FRAME);
217 return 0;
218 }
219
220 /* Parse L1. */
221 L1gpa = (pte & PG_FRAME);
222 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
223 return -1;
224 pdir = (pte_32bit_pae_t *)L1hva;
225 pte = pdir[pte32_pae_l1idx(gva)];
226 if ((pte & PG_V) == 0)
227 return -1;
228 if ((pte & PG_u) == 0)
229 *prot &= ~NVMM_PROT_USER;
230 if ((pte & PG_KW) == 0)
231 *prot &= ~NVMM_PROT_WRITE;
232 if (pte & PG_NX)
233 *prot &= ~NVMM_PROT_EXEC;
234 if (pte & PG_PS)
235 return -1;
236
237 *gpa = (pte & PG_FRAME);
238 return 0;
239 }
240
241 /* -------------------------------------------------------------------------- */
242
243 #define PTE64_L1_SHIFT 12
244 #define PTE64_L2_SHIFT 21
245 #define PTE64_L3_SHIFT 30
246 #define PTE64_L4_SHIFT 39
247
248 #define PTE64_L4_MASK 0x0000ff8000000000
249 #define PTE64_L3_MASK 0x0000007fc0000000
250 #define PTE64_L2_MASK 0x000000003fe00000
251 #define PTE64_L1_MASK 0x00000000001ff000
252
253 #define PTE64_L4_FRAME PTE64_L4_MASK
254 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
255 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
256 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
257
258 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
259 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
260 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
261 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
262
263 typedef uint64_t pte_64bit_t;
264
265 static inline bool
266 x86_gva_64bit_canonical(gvaddr_t gva)
267 {
268 /* Bits 63:47 must have the same value. */
269 #define SIGN_EXTEND 0xffff800000000000ULL
270 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
271 }
272
273 static int
274 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
275 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
276 {
277 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
278 uintptr_t L4hva, L3hva, L2hva, L1hva;
279 pte_64bit_t *pdir, pte;
280
281 /* We begin with an RWXU access. */
282 *prot = NVMM_PROT_ALL;
283
284 if (!x86_gva_64bit_canonical(gva))
285 return -1;
286
287 /* Parse L4. */
288 L4gpa = (cr3 & PG_FRAME);
289 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
290 return -1;
291 pdir = (pte_64bit_t *)L4hva;
292 pte = pdir[pte64_l4idx(gva)];
293 if ((pte & PG_V) == 0)
294 return -1;
295 if ((pte & PG_u) == 0)
296 *prot &= ~NVMM_PROT_USER;
297 if ((pte & PG_KW) == 0)
298 *prot &= ~NVMM_PROT_WRITE;
299 if (pte & PG_NX)
300 *prot &= ~NVMM_PROT_EXEC;
301 if (pte & PG_PS)
302 return -1;
303
304 /* Parse L3. */
305 L3gpa = (pte & PG_FRAME);
306 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
307 return -1;
308 pdir = (pte_64bit_t *)L3hva;
309 pte = pdir[pte64_l3idx(gva)];
310 if ((pte & PG_V) == 0)
311 return -1;
312 if ((pte & PG_u) == 0)
313 *prot &= ~NVMM_PROT_USER;
314 if ((pte & PG_KW) == 0)
315 *prot &= ~NVMM_PROT_WRITE;
316 if (pte & PG_NX)
317 *prot &= ~NVMM_PROT_EXEC;
318 if ((pte & PG_PS) && !has_pse)
319 return -1;
320 if (pte & PG_PS) {
321 *gpa = (pte & PTE64_L3_FRAME);
322 return 0;
323 }
324
325 /* Parse L2. */
326 L2gpa = (pte & PG_FRAME);
327 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
328 return -1;
329 pdir = (pte_64bit_t *)L2hva;
330 pte = pdir[pte64_l2idx(gva)];
331 if ((pte & PG_V) == 0)
332 return -1;
333 if ((pte & PG_u) == 0)
334 *prot &= ~NVMM_PROT_USER;
335 if ((pte & PG_KW) == 0)
336 *prot &= ~NVMM_PROT_WRITE;
337 if (pte & PG_NX)
338 *prot &= ~NVMM_PROT_EXEC;
339 if ((pte & PG_PS) && !has_pse)
340 return -1;
341 if (pte & PG_PS) {
342 *gpa = (pte & PTE64_L2_FRAME);
343 return 0;
344 }
345
346 /* Parse L1. */
347 L1gpa = (pte & PG_FRAME);
348 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
349 return -1;
350 pdir = (pte_64bit_t *)L1hva;
351 pte = pdir[pte64_l1idx(gva)];
352 if ((pte & PG_V) == 0)
353 return -1;
354 if ((pte & PG_u) == 0)
355 *prot &= ~NVMM_PROT_USER;
356 if ((pte & PG_KW) == 0)
357 *prot &= ~NVMM_PROT_WRITE;
358 if (pte & PG_NX)
359 *prot &= ~NVMM_PROT_EXEC;
360 if (pte & PG_PS)
361 return -1;
362
363 *gpa = (pte & PG_FRAME);
364 return 0;
365 }
366
367 static inline int
368 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
369 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
370 {
371 bool is_pae, is_lng, has_pse;
372 uint64_t cr3;
373 size_t off;
374 int ret;
375
376 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
377 /* No paging. */
378 *prot = NVMM_PROT_ALL;
379 *gpa = gva;
380 return 0;
381 }
382
383 off = (gva & PAGE_MASK);
384 gva &= ~PAGE_MASK;
385
386 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
387 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
388 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
389 cr3 = state->crs[NVMM_X64_CR_CR3];
390
391 if (is_pae && is_lng) {
392 /* 64bit */
393 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, has_pse, prot);
394 } else if (is_pae && !is_lng) {
395 /* 32bit PAE */
396 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, has_pse,
397 prot);
398 } else if (!is_pae && !is_lng) {
399 /* 32bit */
400 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
401 } else {
402 ret = -1;
403 }
404
405 if (ret == -1) {
406 errno = EFAULT;
407 }
408
409 *gpa = *gpa + off;
410
411 return ret;
412 }
413
414 int
415 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
416 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
417 {
418 struct nvmm_x64_state state;
419 int ret;
420
421 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
422 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
423 if (ret == -1)
424 return -1;
425
426 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
427 }
428
429 /* -------------------------------------------------------------------------- */
430
431 static inline bool
432 is_64bit(struct nvmm_x64_state *state)
433 {
434 return (state->segs[NVMM_X64_SEG_CS].attrib.lng != 0);
435 }
436
437 static inline bool
438 is_32bit(struct nvmm_x64_state *state)
439 {
440 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
441 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 1);
442 }
443
444 static inline bool
445 is_16bit(struct nvmm_x64_state *state)
446 {
447 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
448 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 0);
449 }
450
451 static inline bool
452 is_long_mode(struct nvmm_x64_state *state)
453 {
454 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
455 }
456
457 static int
458 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva, size_t size)
459 {
460 uint64_t limit;
461
462 /*
463 * This is incomplete. We should check topdown, etc, really that's
464 * tiring.
465 */
466 if (__predict_false(!seg->attrib.p)) {
467 goto error;
468 }
469
470 limit = (seg->limit + 1);
471 if (__predict_true(seg->attrib.gran)) {
472 limit *= PAGE_SIZE;
473 }
474
475 if (__predict_false(*gva + size > limit)) {
476 goto error;
477 }
478
479 *gva += seg->base;
480 return 0;
481
482 error:
483 errno = EFAULT;
484 return -1;
485 }
486
487 static uint64_t
488 mask_from_adsize(size_t adsize)
489 {
490 switch (adsize) {
491 case 8:
492 return 0xFFFFFFFFFFFFFFFF;
493 case 4:
494 return 0x00000000FFFFFFFF;
495 case 2:
496 default: /* impossible */
497 return 0x000000000000FFFF;
498 }
499 }
500
501 static uint64_t
502 rep_dec_apply(struct nvmm_x64_state *state, size_t adsize)
503 {
504 uint64_t mask, cnt;
505
506 mask = mask_from_adsize(adsize);
507
508 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
509 cnt -= 1;
510 cnt &= mask;
511
512 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
513 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
514
515 return cnt;
516 }
517
518 static int
519 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
520 gvaddr_t gva, uint8_t *data, size_t size)
521 {
522 struct nvmm_mem mem;
523 nvmm_prot_t prot;
524 gpaddr_t gpa;
525 uintptr_t hva;
526 bool is_mmio;
527 int ret, remain;
528
529 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
530 if (__predict_false(ret == -1)) {
531 return -1;
532 }
533 if (__predict_false(!(prot & NVMM_PROT_READ))) {
534 errno = EFAULT;
535 return -1;
536 }
537
538 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
539 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
540 } else {
541 remain = 0;
542 }
543 size -= remain;
544
545 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
546 is_mmio = (ret == -1);
547
548 if (is_mmio) {
549 mem.gva = gva;
550 mem.gpa = gpa;
551 mem.write = false;
552 mem.size = size;
553 (*__callbacks.mem)(&mem);
554 memcpy(data, mem.data, size);
555 } else {
556 memcpy(data, (uint8_t *)hva, size);
557 }
558
559 if (remain > 0) {
560 ret = read_guest_memory(mach, state, gva + size,
561 data + size, remain);
562 } else {
563 ret = 0;
564 }
565
566 return ret;
567 }
568
569 static int
570 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
571 gvaddr_t gva, uint8_t *data, size_t size)
572 {
573 struct nvmm_mem mem;
574 nvmm_prot_t prot;
575 gpaddr_t gpa;
576 uintptr_t hva;
577 bool is_mmio;
578 int ret, remain;
579
580 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
581 if (__predict_false(ret == -1)) {
582 return -1;
583 }
584 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
585 errno = EFAULT;
586 return -1;
587 }
588
589 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
590 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
591 } else {
592 remain = 0;
593 }
594 size -= remain;
595
596 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
597 is_mmio = (ret == -1);
598
599 if (is_mmio) {
600 mem.gva = gva;
601 mem.gpa = gpa;
602 mem.write = true;
603 memcpy(mem.data, data, size);
604 mem.size = size;
605 (*__callbacks.mem)(&mem);
606 } else {
607 memcpy((uint8_t *)hva, data, size);
608 }
609
610 if (remain > 0) {
611 ret = write_guest_memory(mach, state, gva + size,
612 data + size, remain);
613 } else {
614 ret = 0;
615 }
616
617 return ret;
618 }
619
620 /* -------------------------------------------------------------------------- */
621
622 int
623 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
624 struct nvmm_exit *exit)
625 {
626 struct nvmm_x64_state state;
627 struct nvmm_io io;
628 uint64_t cnt;
629 gvaddr_t gva;
630 int reg = 0; /* GCC */
631 int ret;
632
633 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
634 errno = EINVAL;
635 return -1;
636 }
637
638 io.port = exit->u.io.port;
639 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
640 io.size = exit->u.io.operand_size;
641
642 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
643 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
644 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
645 if (ret == -1)
646 return -1;
647
648 /*
649 * Determine GVA.
650 */
651 if (exit->u.io.str) {
652 if (io.in) {
653 reg = NVMM_X64_GPR_RDI;
654 } else {
655 reg = NVMM_X64_GPR_RSI;
656 }
657
658 gva = state.gprs[reg];
659 gva &= mask_from_adsize(exit->u.io.address_size);
660
661 if (!is_long_mode(&state)) {
662 ret = segment_apply(&state.segs[exit->u.io.seg], &gva,
663 io.size);
664 if (ret == -1)
665 return -1;
666 }
667 }
668
669 if (!io.in) {
670 if (!exit->u.io.str) {
671 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
672 } else {
673 ret = read_guest_memory(mach, &state, gva, io.data,
674 io.size);
675 if (ret == -1)
676 return -1;
677 }
678 }
679
680 (*__callbacks.io)(&io);
681
682 if (io.in) {
683 if (!exit->u.io.str) {
684 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
685 } else {
686 ret = write_guest_memory(mach, &state, gva, io.data,
687 io.size);
688 if (ret == -1)
689 return -1;
690 }
691 }
692
693 if (exit->u.io.str) {
694 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
695 state.gprs[reg] -= io.size;
696 } else {
697 state.gprs[reg] += io.size;
698 }
699 }
700
701 if (exit->u.io.rep) {
702 cnt = rep_dec_apply(&state, exit->u.io.address_size);
703 if (cnt == 0) {
704 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
705 }
706 } else {
707 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
708 }
709
710 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
711 if (ret == -1)
712 return -1;
713
714 return 0;
715 }
716
717 /* -------------------------------------------------------------------------- */
718
719 static void x86_emul_or(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
720 static void x86_emul_and(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
721 static void x86_emul_xor(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
722 static void x86_emul_mov(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
723 static void x86_emul_stos(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
724 static void x86_emul_lods(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
725 static void x86_emul_movs(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
726
727 enum x86_legpref {
728 /* Group 1 */
729 LEG_LOCK = 0,
730 LEG_REPN, /* REPNE/REPNZ */
731 LEG_REP, /* REP/REPE/REPZ */
732 /* Group 2 */
733 LEG_OVR_CS,
734 LEG_OVR_SS,
735 LEG_OVR_DS,
736 LEG_OVR_ES,
737 LEG_OVR_FS,
738 LEG_OVR_GS,
739 LEG_BRN_TAKEN,
740 LEG_BRN_NTAKEN,
741 /* Group 3 */
742 LEG_OPR_OVR,
743 /* Group 4 */
744 LEG_ADR_OVR,
745
746 NLEG
747 };
748
749 struct x86_rexpref {
750 bool present;
751 bool w;
752 bool r;
753 bool x;
754 bool b;
755 };
756
757 struct x86_reg {
758 int num; /* NVMM GPR state index */
759 uint64_t mask;
760 };
761
762 enum x86_disp_type {
763 DISP_NONE,
764 DISP_0,
765 DISP_1,
766 DISP_4
767 };
768
769 struct x86_disp {
770 enum x86_disp_type type;
771 uint8_t data[4];
772 };
773
774 enum REGMODRM__Mod {
775 MOD_DIS0, /* also, register indirect */
776 MOD_DIS1,
777 MOD_DIS4,
778 MOD_REG
779 };
780
781 enum REGMODRM__Reg {
782 REG_000, /* these fields are indexes to the register map */
783 REG_001,
784 REG_010,
785 REG_011,
786 REG_100,
787 REG_101,
788 REG_110,
789 REG_111
790 };
791
792 enum REGMODRM__Rm {
793 RM_000, /* reg */
794 RM_001, /* reg */
795 RM_010, /* reg */
796 RM_011, /* reg */
797 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
798 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
799 RM_110,
800 RM_111
801 };
802
803 struct x86_regmodrm {
804 bool present;
805 enum REGMODRM__Mod mod;
806 enum REGMODRM__Reg reg;
807 enum REGMODRM__Rm rm;
808 };
809
810 struct x86_immediate {
811 size_t size; /* 1/2/4/8 */
812 uint8_t data[8];
813 };
814
815 struct x86_sib {
816 uint8_t scale;
817 const struct x86_reg *idx;
818 const struct x86_reg *bas;
819 };
820
821 enum x86_store_type {
822 STORE_NONE,
823 STORE_REG,
824 STORE_IMM,
825 STORE_SIB,
826 STORE_DMO
827 };
828
829 struct x86_store {
830 enum x86_store_type type;
831 union {
832 const struct x86_reg *reg;
833 struct x86_immediate imm;
834 struct x86_sib sib;
835 uint64_t dmo;
836 } u;
837 struct x86_disp disp;
838 int hardseg;
839 };
840
841 struct x86_instr {
842 size_t len;
843 bool legpref[NLEG];
844 struct x86_rexpref rexpref;
845 size_t operand_size;
846 size_t address_size;
847
848 struct x86_regmodrm regmodrm;
849
850 const struct x86_opcode *opcode;
851
852 struct x86_store src;
853 struct x86_store dst;
854
855 struct x86_store *strm;
856
857 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
858 };
859
860 struct x86_decode_fsm {
861 /* vcpu */
862 bool is64bit;
863 bool is32bit;
864 bool is16bit;
865
866 /* fsm */
867 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
868 uint8_t *buf;
869 uint8_t *end;
870 };
871
872 struct x86_opcode {
873 uint8_t byte;
874 bool regmodrm;
875 bool regtorm;
876 bool dmo;
877 bool todmo;
878 bool movs;
879 bool stos;
880 bool lods;
881 bool szoverride;
882 int defsize;
883 int allsize;
884 bool group11;
885 bool immediate;
886 int immsize;
887 int flags;
888 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
889 };
890
891 struct x86_group_entry {
892 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
893 };
894
895 #define OPSIZE_BYTE 0x01
896 #define OPSIZE_WORD 0x02 /* 2 bytes */
897 #define OPSIZE_DOUB 0x04 /* 4 bytes */
898 #define OPSIZE_QUAD 0x08 /* 8 bytes */
899
900 #define FLAG_z 0x02
901
902 static const struct x86_group_entry group11[8] = {
903 [0] = { .emul = x86_emul_mov }
904 };
905
906 static const struct x86_opcode primary_opcode_table[] = {
907 /*
908 * Group11
909 */
910 {
911 .byte = 0xC6,
912 .regmodrm = true,
913 .regtorm = true,
914 .szoverride = false,
915 .defsize = OPSIZE_BYTE,
916 .allsize = -1,
917 .group11 = true,
918 .immediate = true,
919 .immsize = OPSIZE_BYTE,
920 .emul = NULL /* group11 */
921 },
922 {
923 .byte = 0xC7,
924 .regmodrm = true,
925 .regtorm = true,
926 .szoverride = true,
927 .defsize = -1,
928 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
929 .group11 = true,
930 .immediate = true,
931 .immsize = -1, /* special, Z */
932 .flags = FLAG_z,
933 .emul = NULL /* group11 */
934 },
935
936 /*
937 * OR
938 */
939 {
940 /* Eb, Gb */
941 .byte = 0x08,
942 .regmodrm = true,
943 .regtorm = true,
944 .szoverride = false,
945 .defsize = OPSIZE_BYTE,
946 .allsize = -1,
947 .emul = x86_emul_or
948 },
949 {
950 /* Ev, Gv */
951 .byte = 0x09,
952 .regmodrm = true,
953 .regtorm = true,
954 .szoverride = true,
955 .defsize = -1,
956 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
957 .emul = x86_emul_or
958 },
959 {
960 /* Gb, Eb */
961 .byte = 0x0A,
962 .regmodrm = true,
963 .regtorm = false,
964 .szoverride = false,
965 .defsize = OPSIZE_BYTE,
966 .allsize = -1,
967 .emul = x86_emul_or
968 },
969 {
970 /* Gv, Ev */
971 .byte = 0x0B,
972 .regmodrm = true,
973 .regtorm = false,
974 .szoverride = true,
975 .defsize = -1,
976 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
977 .emul = x86_emul_or
978 },
979
980 /*
981 * AND
982 */
983 {
984 /* Eb, Gb */
985 .byte = 0x20,
986 .regmodrm = true,
987 .regtorm = true,
988 .szoverride = false,
989 .defsize = OPSIZE_BYTE,
990 .allsize = -1,
991 .emul = x86_emul_and
992 },
993 {
994 /* Ev, Gv */
995 .byte = 0x21,
996 .regmodrm = true,
997 .regtorm = true,
998 .szoverride = true,
999 .defsize = -1,
1000 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1001 .emul = x86_emul_and
1002 },
1003 {
1004 /* Gb, Eb */
1005 .byte = 0x22,
1006 .regmodrm = true,
1007 .regtorm = false,
1008 .szoverride = false,
1009 .defsize = OPSIZE_BYTE,
1010 .allsize = -1,
1011 .emul = x86_emul_and
1012 },
1013 {
1014 /* Gv, Ev */
1015 .byte = 0x23,
1016 .regmodrm = true,
1017 .regtorm = false,
1018 .szoverride = true,
1019 .defsize = -1,
1020 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1021 .emul = x86_emul_and
1022 },
1023
1024 /*
1025 * XOR
1026 */
1027 {
1028 /* Eb, Gb */
1029 .byte = 0x30,
1030 .regmodrm = true,
1031 .regtorm = true,
1032 .szoverride = false,
1033 .defsize = OPSIZE_BYTE,
1034 .allsize = -1,
1035 .emul = x86_emul_xor
1036 },
1037 {
1038 /* Ev, Gv */
1039 .byte = 0x31,
1040 .regmodrm = true,
1041 .regtorm = true,
1042 .szoverride = true,
1043 .defsize = -1,
1044 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1045 .emul = x86_emul_xor
1046 },
1047 {
1048 /* Gb, Eb */
1049 .byte = 0x32,
1050 .regmodrm = true,
1051 .regtorm = false,
1052 .szoverride = false,
1053 .defsize = OPSIZE_BYTE,
1054 .allsize = -1,
1055 .emul = x86_emul_xor
1056 },
1057 {
1058 /* Gv, Ev */
1059 .byte = 0x33,
1060 .regmodrm = true,
1061 .regtorm = false,
1062 .szoverride = true,
1063 .defsize = -1,
1064 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1065 .emul = x86_emul_xor
1066 },
1067
1068 /*
1069 * MOV
1070 */
1071 {
1072 /* Eb, Gb */
1073 .byte = 0x88,
1074 .regmodrm = true,
1075 .regtorm = true,
1076 .szoverride = false,
1077 .defsize = OPSIZE_BYTE,
1078 .allsize = -1,
1079 .emul = x86_emul_mov
1080 },
1081 {
1082 /* Ev, Gv */
1083 .byte = 0x89,
1084 .regmodrm = true,
1085 .regtorm = true,
1086 .szoverride = true,
1087 .defsize = -1,
1088 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1089 .emul = x86_emul_mov
1090 },
1091 {
1092 /* Gb, Eb */
1093 .byte = 0x8A,
1094 .regmodrm = true,
1095 .regtorm = false,
1096 .szoverride = false,
1097 .defsize = OPSIZE_BYTE,
1098 .allsize = -1,
1099 .emul = x86_emul_mov
1100 },
1101 {
1102 /* Gv, Ev */
1103 .byte = 0x8B,
1104 .regmodrm = true,
1105 .regtorm = false,
1106 .szoverride = true,
1107 .defsize = -1,
1108 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1109 .emul = x86_emul_mov
1110 },
1111 {
1112 /* AL, Ob */
1113 .byte = 0xA0,
1114 .dmo = true,
1115 .todmo = false,
1116 .szoverride = false,
1117 .defsize = OPSIZE_BYTE,
1118 .allsize = -1,
1119 .emul = x86_emul_mov
1120 },
1121 {
1122 /* rAX, Ov */
1123 .byte = 0xA1,
1124 .dmo = true,
1125 .todmo = false,
1126 .szoverride = true,
1127 .defsize = -1,
1128 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1129 .emul = x86_emul_mov
1130 },
1131 {
1132 /* Ob, AL */
1133 .byte = 0xA2,
1134 .dmo = true,
1135 .todmo = true,
1136 .szoverride = false,
1137 .defsize = OPSIZE_BYTE,
1138 .allsize = -1,
1139 .emul = x86_emul_mov
1140 },
1141 {
1142 /* Ov, rAX */
1143 .byte = 0xA3,
1144 .dmo = true,
1145 .todmo = true,
1146 .szoverride = true,
1147 .defsize = -1,
1148 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1149 .emul = x86_emul_mov
1150 },
1151
1152 /*
1153 * MOVS
1154 */
1155 {
1156 /* Yb, Xb */
1157 .byte = 0xA4,
1158 .movs = true,
1159 .szoverride = false,
1160 .defsize = OPSIZE_BYTE,
1161 .allsize = -1,
1162 .emul = x86_emul_movs
1163 },
1164 {
1165 /* Yv, Xv */
1166 .byte = 0xA5,
1167 .movs = true,
1168 .szoverride = true,
1169 .defsize = -1,
1170 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1171 .emul = x86_emul_movs
1172 },
1173
1174 /*
1175 * STOS
1176 */
1177 {
1178 /* Yb, AL */
1179 .byte = 0xAA,
1180 .stos = true,
1181 .szoverride = false,
1182 .defsize = OPSIZE_BYTE,
1183 .allsize = -1,
1184 .emul = x86_emul_stos
1185 },
1186 {
1187 /* Yv, rAX */
1188 .byte = 0xAB,
1189 .stos = true,
1190 .szoverride = true,
1191 .defsize = -1,
1192 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1193 .emul = x86_emul_stos
1194 },
1195
1196 /*
1197 * LODS
1198 */
1199 {
1200 /* AL, Xb */
1201 .byte = 0xAC,
1202 .lods = true,
1203 .szoverride = false,
1204 .defsize = OPSIZE_BYTE,
1205 .allsize = -1,
1206 .emul = x86_emul_lods
1207 },
1208 {
1209 /* rAX, Xv */
1210 .byte = 0xAD,
1211 .lods = true,
1212 .szoverride = true,
1213 .defsize = -1,
1214 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1215 .emul = x86_emul_lods
1216 },
1217 };
1218
1219 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1220
1221 /* [REX-present][enc][opsize] */
1222 static const struct x86_reg gpr_map__special[2][4][8] = {
1223 [false] = {
1224 /* No REX prefix. */
1225 [0b00] = {
1226 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1227 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1228 [2] = { -1, 0 },
1229 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1230 [4] = { -1, 0 },
1231 [5] = { -1, 0 },
1232 [6] = { -1, 0 },
1233 [7] = { -1, 0 },
1234 },
1235 [0b01] = {
1236 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1237 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1238 [2] = { -1, 0 },
1239 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1240 [4] = { -1, 0 },
1241 [5] = { -1, 0 },
1242 [6] = { -1, 0 },
1243 [7] = { -1, 0 },
1244 },
1245 [0b10] = {
1246 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1247 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1248 [2] = { -1, 0 },
1249 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1250 [4] = { -1, 0 },
1251 [5] = { -1, 0 },
1252 [6] = { -1, 0 },
1253 [7] = { -1, 0 },
1254 },
1255 [0b11] = {
1256 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1257 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1258 [2] = { -1, 0 },
1259 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1260 [4] = { -1, 0 },
1261 [5] = { -1, 0 },
1262 [6] = { -1, 0 },
1263 [7] = { -1, 0 },
1264 }
1265 },
1266 [true] = {
1267 /* Has REX prefix. */
1268 [0b00] = {
1269 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1270 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1271 [2] = { -1, 0 },
1272 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1273 [4] = { -1, 0 },
1274 [5] = { -1, 0 },
1275 [6] = { -1, 0 },
1276 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1277 },
1278 [0b01] = {
1279 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1280 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1281 [2] = { -1, 0 },
1282 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1283 [4] = { -1, 0 },
1284 [5] = { -1, 0 },
1285 [6] = { -1, 0 },
1286 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1287 },
1288 [0b10] = {
1289 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1290 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1291 [2] = { -1, 0 },
1292 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1293 [4] = { -1, 0 },
1294 [5] = { -1, 0 },
1295 [6] = { -1, 0 },
1296 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1297 },
1298 [0b11] = {
1299 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1300 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1301 [2] = { -1, 0 },
1302 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1303 [4] = { -1, 0 },
1304 [5] = { -1, 0 },
1305 [6] = { -1, 0 },
1306 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1307 }
1308 }
1309 };
1310
1311 /* [depends][enc][size] */
1312 static const struct x86_reg gpr_map[2][8][8] = {
1313 [false] = {
1314 /* Not extended. */
1315 [0b000] = {
1316 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1317 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1318 [2] = { -1, 0 },
1319 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1320 [4] = { -1, 0 },
1321 [5] = { -1, 0 },
1322 [6] = { -1, 0 },
1323 [7] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* RAX */
1324 },
1325 [0b001] = {
1326 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1327 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1328 [2] = { -1, 0 },
1329 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1330 [4] = { -1, 0 },
1331 [5] = { -1, 0 },
1332 [6] = { -1, 0 },
1333 [7] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* RCX */
1334 },
1335 [0b010] = {
1336 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1337 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1338 [2] = { -1, 0 },
1339 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1340 [4] = { -1, 0 },
1341 [5] = { -1, 0 },
1342 [6] = { -1, 0 },
1343 [7] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* RDX */
1344 },
1345 [0b011] = {
1346 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1347 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1348 [2] = { -1, 0 },
1349 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1350 [4] = { -1, 0 },
1351 [5] = { -1, 0 },
1352 [6] = { -1, 0 },
1353 [7] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* RBX */
1354 },
1355 [0b100] = {
1356 [0] = { -1, 0 }, /* SPECIAL */
1357 [1] = { -1, 0 }, /* SPECIAL */
1358 [2] = { -1, 0 },
1359 [3] = { -1, 0 }, /* SPECIAL */
1360 [4] = { -1, 0 },
1361 [5] = { -1, 0 },
1362 [6] = { -1, 0 },
1363 [7] = { -1, 0 }, /* SPECIAL */
1364 },
1365 [0b101] = {
1366 [0] = { -1, 0 }, /* SPECIAL */
1367 [1] = { -1, 0 }, /* SPECIAL */
1368 [2] = { -1, 0 },
1369 [3] = { -1, 0 }, /* SPECIAL */
1370 [4] = { -1, 0 },
1371 [5] = { -1, 0 },
1372 [6] = { -1, 0 },
1373 [7] = { -1, 0 }, /* SPECIAL */
1374 },
1375 [0b110] = {
1376 [0] = { -1, 0 }, /* SPECIAL */
1377 [1] = { -1, 0 }, /* SPECIAL */
1378 [2] = { -1, 0 },
1379 [3] = { -1, 0 }, /* SPECIAL */
1380 [4] = { -1, 0 },
1381 [5] = { -1, 0 },
1382 [6] = { -1, 0 },
1383 [7] = { -1, 0 }, /* SPECIAL */
1384 },
1385 [0b111] = {
1386 [0] = { -1, 0 }, /* SPECIAL */
1387 [1] = { -1, 0 }, /* SPECIAL */
1388 [2] = { -1, 0 },
1389 [3] = { -1, 0 }, /* SPECIAL */
1390 [4] = { -1, 0 },
1391 [5] = { -1, 0 },
1392 [6] = { -1, 0 },
1393 [7] = { -1, 0 }, /* SPECIAL */
1394 },
1395 },
1396 [true] = {
1397 /* Extended. */
1398 [0b000] = {
1399 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1400 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1401 [2] = { -1, 0 },
1402 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1403 [4] = { -1, 0 },
1404 [5] = { -1, 0 },
1405 [6] = { -1, 0 },
1406 [7] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8 */
1407 },
1408 [0b001] = {
1409 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1410 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1411 [2] = { -1, 0 },
1412 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1413 [4] = { -1, 0 },
1414 [5] = { -1, 0 },
1415 [6] = { -1, 0 },
1416 [7] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9 */
1417 },
1418 [0b010] = {
1419 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1420 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1421 [2] = { -1, 0 },
1422 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1423 [4] = { -1, 0 },
1424 [5] = { -1, 0 },
1425 [6] = { -1, 0 },
1426 [7] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10 */
1427 },
1428 [0b011] = {
1429 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1430 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1431 [2] = { -1, 0 },
1432 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1433 [4] = { -1, 0 },
1434 [5] = { -1, 0 },
1435 [6] = { -1, 0 },
1436 [7] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11 */
1437 },
1438 [0b100] = {
1439 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1440 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1441 [2] = { -1, 0 },
1442 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1443 [4] = { -1, 0 },
1444 [5] = { -1, 0 },
1445 [6] = { -1, 0 },
1446 [7] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12 */
1447 },
1448 [0b101] = {
1449 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1450 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1451 [2] = { -1, 0 },
1452 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1453 [4] = { -1, 0 },
1454 [5] = { -1, 0 },
1455 [6] = { -1, 0 },
1456 [7] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13 */
1457 },
1458 [0b110] = {
1459 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1460 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1461 [2] = { -1, 0 },
1462 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1463 [4] = { -1, 0 },
1464 [5] = { -1, 0 },
1465 [6] = { -1, 0 },
1466 [7] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14 */
1467 },
1468 [0b111] = {
1469 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1470 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1471 [2] = { -1, 0 },
1472 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1473 [4] = { -1, 0 },
1474 [5] = { -1, 0 },
1475 [6] = { -1, 0 },
1476 [7] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15 */
1477 },
1478 }
1479 };
1480
1481 static int
1482 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1483 {
1484 fsm->fn = NULL;
1485 return -1;
1486 }
1487
1488 static int
1489 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1490 {
1491 if (fsm->buf + n > fsm->end) {
1492 return -1;
1493 }
1494 memcpy(bytes, fsm->buf, n);
1495 return 0;
1496 }
1497
1498 static void
1499 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1500 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1501 {
1502 fsm->buf += n;
1503 if (fsm->buf > fsm->end) {
1504 fsm->fn = node_overflow;
1505 } else {
1506 fsm->fn = fn;
1507 }
1508 }
1509
1510 static const struct x86_reg *
1511 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1512 {
1513 enc &= 0b11;
1514 if (regsize == 8) {
1515 /* May be 64bit without REX */
1516 return &gpr_map__special[1][enc][regsize-1];
1517 }
1518 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1519 }
1520
1521 /*
1522 * Special node, for MOVS. Fake two displacements of zero on the source and
1523 * destination registers.
1524 */
1525 static int
1526 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1527 {
1528 size_t adrsize;
1529
1530 adrsize = instr->address_size;
1531
1532 /* DS:RSI */
1533 instr->src.type = STORE_REG;
1534 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1535 instr->src.disp.type = DISP_0;
1536
1537 /* ES:RDI, force ES */
1538 instr->dst.type = STORE_REG;
1539 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1540 instr->dst.disp.type = DISP_0;
1541 instr->dst.hardseg = NVMM_X64_SEG_ES;
1542
1543 fsm_advance(fsm, 0, NULL);
1544
1545 return 0;
1546 }
1547
1548 /*
1549 * Special node, for STOS and LODS. Fake a displacement of zero on the
1550 * destination register.
1551 */
1552 static int
1553 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1554 {
1555 const struct x86_opcode *opcode = instr->opcode;
1556 struct x86_store *stlo, *streg;
1557 size_t adrsize, regsize;
1558
1559 adrsize = instr->address_size;
1560 regsize = instr->operand_size;
1561
1562 if (opcode->stos) {
1563 streg = &instr->src;
1564 stlo = &instr->dst;
1565 } else {
1566 streg = &instr->dst;
1567 stlo = &instr->src;
1568 }
1569
1570 streg->type = STORE_REG;
1571 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1572
1573 stlo->type = STORE_REG;
1574 if (opcode->stos) {
1575 /* ES:RDI, force ES */
1576 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1577 stlo->hardseg = NVMM_X64_SEG_ES;
1578 } else {
1579 /* DS:RSI */
1580 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1581 }
1582 stlo->disp.type = DISP_0;
1583
1584 fsm_advance(fsm, 0, NULL);
1585
1586 return 0;
1587 }
1588
1589 static int
1590 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1591 {
1592 const struct x86_opcode *opcode = instr->opcode;
1593 struct x86_store *stdmo, *streg;
1594 size_t adrsize, regsize;
1595
1596 adrsize = instr->address_size;
1597 regsize = instr->operand_size;
1598
1599 if (opcode->todmo) {
1600 streg = &instr->src;
1601 stdmo = &instr->dst;
1602 } else {
1603 streg = &instr->dst;
1604 stdmo = &instr->src;
1605 }
1606
1607 streg->type = STORE_REG;
1608 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1609
1610 stdmo->type = STORE_DMO;
1611 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1612 return -1;
1613 }
1614 fsm_advance(fsm, adrsize, NULL);
1615
1616 return 0;
1617 }
1618
1619 static int
1620 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1621 {
1622 const struct x86_opcode *opcode = instr->opcode;
1623 struct x86_store *store;
1624 uint8_t flags;
1625 uint8_t immsize;
1626
1627 /* The immediate is the source */
1628 store = &instr->src;
1629 immsize = instr->operand_size;
1630
1631 /* Get the correct flags */
1632 flags = opcode->flags;
1633 if ((flags & FLAG_z) && (immsize == 8)) {
1634 /* 'z' operates here */
1635 immsize = 4;
1636 }
1637
1638 store->type = STORE_IMM;
1639 store->u.imm.size = immsize;
1640
1641 if (fsm_read(fsm, store->u.imm.data, store->u.imm.size) == -1) {
1642 return -1;
1643 }
1644
1645 fsm_advance(fsm, store->u.imm.size, NULL);
1646
1647 return 0;
1648 }
1649
1650 static int
1651 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1652 {
1653 const struct x86_opcode *opcode = instr->opcode;
1654 size_t n;
1655
1656 if (instr->strm->disp.type == DISP_1) {
1657 n = 1;
1658 } else { /* DISP4 */
1659 n = 4;
1660 }
1661
1662 if (fsm_read(fsm, instr->strm->disp.data, n) == -1) {
1663 return -1;
1664 }
1665
1666 if (opcode->immediate) {
1667 fsm_advance(fsm, n, node_immediate);
1668 } else {
1669 fsm_advance(fsm, n, NULL);
1670 }
1671
1672 return 0;
1673 }
1674
1675 static const struct x86_reg *
1676 get_register_idx(struct x86_instr *instr, uint8_t index)
1677 {
1678 uint8_t enc = index;
1679 const struct x86_reg *reg;
1680 size_t regsize;
1681
1682 regsize = instr->address_size;
1683 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
1684
1685 if (reg->num == -1) {
1686 reg = resolve_special_register(instr, enc, regsize);
1687 }
1688
1689 return reg;
1690 }
1691
1692 static const struct x86_reg *
1693 get_register_bas(struct x86_instr *instr, uint8_t base)
1694 {
1695 uint8_t enc = base;
1696 const struct x86_reg *reg;
1697 size_t regsize;
1698
1699 regsize = instr->address_size;
1700 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1701 if (reg->num == -1) {
1702 reg = resolve_special_register(instr, enc, regsize);
1703 }
1704
1705 return reg;
1706 }
1707
1708 static int
1709 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1710 {
1711 const struct x86_opcode *opcode;
1712 uint8_t scale, index, base;
1713 bool noindex, nobase;
1714 uint8_t byte;
1715
1716 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
1717 return -1;
1718 }
1719
1720 scale = ((byte & 0b11000000) >> 6);
1721 index = ((byte & 0b00111000) >> 3);
1722 base = ((byte & 0b00000111) >> 0);
1723
1724 opcode = instr->opcode;
1725
1726 noindex = false;
1727 nobase = false;
1728
1729 if (index == 0b100 && !instr->rexpref.x) {
1730 /* Special case: the index is null */
1731 noindex = true;
1732 }
1733
1734 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
1735 /* Special case: the base is null + disp32 */
1736 instr->strm->disp.type = DISP_4;
1737 nobase = true;
1738 }
1739
1740 instr->strm->type = STORE_SIB;
1741 instr->strm->u.sib.scale = (1 << scale);
1742 if (!noindex)
1743 instr->strm->u.sib.idx = get_register_idx(instr, index);
1744 if (!nobase)
1745 instr->strm->u.sib.bas = get_register_bas(instr, base);
1746
1747 /* May have a displacement, or an immediate */
1748 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
1749 fsm_advance(fsm, 1, node_disp);
1750 } else if (opcode->immediate) {
1751 fsm_advance(fsm, 1, node_immediate);
1752 } else {
1753 fsm_advance(fsm, 1, NULL);
1754 }
1755
1756 return 0;
1757 }
1758
1759 static const struct x86_reg *
1760 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
1761 {
1762 uint8_t enc = instr->regmodrm.reg;
1763 const struct x86_reg *reg;
1764 size_t regsize;
1765
1766 if ((opcode->flags & FLAG_z) && (instr->operand_size == 8)) {
1767 /* 'z' operates here */
1768 regsize = 4;
1769 } else {
1770 regsize = instr->operand_size;
1771 }
1772
1773 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
1774 if (reg->num == -1) {
1775 reg = resolve_special_register(instr, enc, regsize);
1776 }
1777
1778 return reg;
1779 }
1780
1781 static const struct x86_reg *
1782 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
1783 {
1784 uint8_t enc = instr->regmodrm.rm;
1785 const struct x86_reg *reg;
1786 size_t regsize;
1787
1788 if (instr->strm->disp.type == DISP_NONE) {
1789 if ((opcode->flags & FLAG_z) && (instr->operand_size == 8)) {
1790 /* 'z' operates here */
1791 regsize = 4;
1792 } else {
1793 regsize = instr->operand_size;
1794 }
1795 } else {
1796 /* Indirect access, the size is that of the address. */
1797 regsize = instr->address_size;
1798 }
1799
1800 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1801 if (reg->num == -1) {
1802 reg = resolve_special_register(instr, enc, regsize);
1803 }
1804
1805 return reg;
1806 }
1807
1808 static inline bool
1809 has_sib(struct x86_instr *instr)
1810 {
1811 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
1812 }
1813
1814 static inline bool
1815 is_rip_relative(struct x86_instr *instr)
1816 {
1817 return (instr->strm->disp.type == DISP_0 &&
1818 instr->regmodrm.rm == RM_RBP_DISP32);
1819 }
1820
1821 static enum x86_disp_type
1822 get_disp_type(struct x86_instr *instr)
1823 {
1824 switch (instr->regmodrm.mod) {
1825 case MOD_DIS0: /* indirect */
1826 return DISP_0;
1827 case MOD_DIS1: /* indirect+1 */
1828 return DISP_1;
1829 case MOD_DIS4: /* indirect+4 */
1830 return DISP_4;
1831 case MOD_REG: /* direct */
1832 default: /* gcc */
1833 return DISP_NONE;
1834 }
1835 }
1836
1837 static int
1838 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1839 {
1840 struct x86_store *strg, *strm;
1841 const struct x86_opcode *opcode;
1842 const struct x86_reg *reg;
1843 uint8_t byte;
1844
1845 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
1846 return -1;
1847 }
1848
1849 opcode = instr->opcode;
1850
1851 instr->regmodrm.present = true;
1852 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
1853 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
1854 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
1855
1856 if (opcode->regtorm) {
1857 strg = &instr->src;
1858 strm = &instr->dst;
1859 } else { /* RM to REG */
1860 strm = &instr->src;
1861 strg = &instr->dst;
1862 }
1863
1864 /* Save for later use. */
1865 instr->strm = strm;
1866
1867 /*
1868 * Special cases: Groups. The REG field of REGMODRM is the index in
1869 * the group. op1 gets overwritten in the Immediate node, if any.
1870 */
1871 if (opcode->group11) {
1872 if (group11[instr->regmodrm.reg].emul == NULL) {
1873 return -1;
1874 }
1875 instr->emul = group11[instr->regmodrm.reg].emul;
1876 }
1877
1878 reg = get_register_reg(instr, opcode);
1879 if (reg == NULL) {
1880 return -1;
1881 }
1882 strg->type = STORE_REG;
1883 strg->u.reg = reg;
1884
1885 if (has_sib(instr)) {
1886 /* Overwrites RM */
1887 fsm_advance(fsm, 1, node_sib);
1888 return 0;
1889 }
1890
1891 /* The displacement applies to RM. */
1892 strm->disp.type = get_disp_type(instr);
1893
1894 if (is_rip_relative(instr)) {
1895 /* Overwrites RM */
1896 strm->type = STORE_REG;
1897 strm->u.reg = &gpr_map__rip;
1898 strm->disp.type = DISP_4;
1899 fsm_advance(fsm, 1, node_disp);
1900 return 0;
1901 }
1902
1903 reg = get_register_rm(instr, opcode);
1904 if (reg == NULL) {
1905 return -1;
1906 }
1907 strm->type = STORE_REG;
1908 strm->u.reg = reg;
1909
1910 if (strm->disp.type == DISP_NONE) {
1911 /* Direct register addressing mode */
1912 if (opcode->immediate) {
1913 fsm_advance(fsm, 1, node_immediate);
1914 } else {
1915 fsm_advance(fsm, 1, NULL);
1916 }
1917 } else if (strm->disp.type == DISP_0) {
1918 /* Indirect register addressing mode */
1919 if (opcode->immediate) {
1920 fsm_advance(fsm, 1, node_immediate);
1921 } else {
1922 fsm_advance(fsm, 1, NULL);
1923 }
1924 } else {
1925 fsm_advance(fsm, 1, node_disp);
1926 }
1927
1928 return 0;
1929 }
1930
1931 static size_t
1932 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1933 {
1934 const struct x86_opcode *opcode = instr->opcode;
1935 int opsize;
1936
1937 /* Get the opsize */
1938 if (!opcode->szoverride) {
1939 opsize = opcode->defsize;
1940 } else if (instr->rexpref.present && instr->rexpref.w) {
1941 opsize = 8;
1942 } else {
1943 if (!fsm->is16bit) {
1944 if (instr->legpref[LEG_OPR_OVR]) {
1945 opsize = 2;
1946 } else {
1947 opsize = 4;
1948 }
1949 } else { /* 16bit */
1950 if (instr->legpref[LEG_OPR_OVR]) {
1951 opsize = 4;
1952 } else {
1953 opsize = 2;
1954 }
1955 }
1956 }
1957
1958 /* See if available */
1959 if ((opcode->allsize & opsize) == 0) {
1960 // XXX do we care?
1961 }
1962
1963 return opsize;
1964 }
1965
1966 static size_t
1967 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1968 {
1969 if (fsm->is64bit) {
1970 if (__predict_false(instr->legpref[LEG_ADR_OVR])) {
1971 return 4;
1972 }
1973 return 8;
1974 }
1975
1976 if (fsm->is32bit) {
1977 if (__predict_false(instr->legpref[LEG_ADR_OVR])) {
1978 return 2;
1979 }
1980 return 4;
1981 }
1982
1983 /* 16bit. */
1984 if (__predict_false(instr->legpref[LEG_ADR_OVR])) {
1985 return 4;
1986 }
1987 return 2;
1988 }
1989
1990 static int
1991 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1992 {
1993 const struct x86_opcode *opcode;
1994 uint8_t byte;
1995 size_t i, n;
1996
1997 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
1998 return -1;
1999 }
2000
2001 n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
2002 for (i = 0; i < n; i++) {
2003 if (primary_opcode_table[i].byte == byte)
2004 break;
2005 }
2006 if (i == n) {
2007 return -1;
2008 }
2009 opcode = &primary_opcode_table[i];
2010
2011 instr->opcode = opcode;
2012 instr->emul = opcode->emul;
2013 instr->operand_size = get_operand_size(fsm, instr);
2014 instr->address_size = get_address_size(fsm, instr);
2015
2016 if (opcode->regmodrm) {
2017 fsm_advance(fsm, 1, node_regmodrm);
2018 } else if (opcode->dmo) {
2019 /* Direct-Memory Offsets */
2020 fsm_advance(fsm, 1, node_dmo);
2021 } else if (opcode->stos || opcode->lods) {
2022 fsm_advance(fsm, 1, node_stlo);
2023 } else if (opcode->movs) {
2024 fsm_advance(fsm, 1, node_movs);
2025 } else {
2026 return -1;
2027 }
2028
2029 return 0;
2030 }
2031
2032 static int
2033 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2034 {
2035 uint8_t byte;
2036
2037 #define ESCAPE 0x0F
2038 #define VEX_1 0xC5
2039 #define VEX_2 0xC4
2040 #define XOP 0x8F
2041
2042 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2043 return -1;
2044 }
2045
2046 /*
2047 * We don't take XOP. It is AMD-specific, and it was removed shortly
2048 * after being introduced.
2049 */
2050 if (byte == ESCAPE) {
2051 return -1;
2052 } else if (!instr->rexpref.present) {
2053 if (byte == VEX_1) {
2054 return -1;
2055 } else if (byte == VEX_2) {
2056 return -1;
2057 } else {
2058 fsm->fn = node_primary_opcode;
2059 }
2060 } else {
2061 fsm->fn = node_primary_opcode;
2062 }
2063
2064 return 0;
2065 }
2066
2067 static int
2068 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2069 {
2070 struct x86_rexpref *rexpref = &instr->rexpref;
2071 uint8_t byte;
2072 size_t n = 0;
2073
2074 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2075 return -1;
2076 }
2077
2078 if (byte >= 0x40 && byte <= 0x4F) {
2079 if (__predict_false(!fsm->is64bit)) {
2080 return -1;
2081 }
2082 rexpref->present = true;
2083 rexpref->w = ((byte & 0x8) != 0);
2084 rexpref->r = ((byte & 0x4) != 0);
2085 rexpref->x = ((byte & 0x2) != 0);
2086 rexpref->b = ((byte & 0x1) != 0);
2087 n = 1;
2088 }
2089
2090 fsm_advance(fsm, n, node_main);
2091 return 0;
2092 }
2093
2094 static const uint8_t legpref_table[NLEG] = {
2095 /* Group 1 */
2096 [LEG_LOCK] = 0xF0,
2097 [LEG_REPN] = 0xF2,
2098 [LEG_REP] = 0xF3,
2099 /* Group 2 */
2100 [LEG_OVR_CS] = 0x2E,
2101 [LEG_OVR_SS] = 0x36,
2102 [LEG_OVR_DS] = 0x3E,
2103 [LEG_OVR_ES] = 0x26,
2104 [LEG_OVR_FS] = 0x64,
2105 [LEG_OVR_GS] = 0x65,
2106 [LEG_BRN_TAKEN] = 0x2E,
2107 [LEG_BRN_NTAKEN] = 0x3E,
2108 /* Group 3 */
2109 [LEG_OPR_OVR] = 0x66,
2110 /* Group 4 */
2111 [LEG_ADR_OVR] = 0x67
2112 };
2113
2114 static int
2115 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2116 {
2117 uint8_t byte;
2118 size_t i;
2119
2120 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2121 return -1;
2122 }
2123
2124 for (i = 0; i < NLEG; i++) {
2125 if (byte == legpref_table[i])
2126 break;
2127 }
2128
2129 if (i == NLEG) {
2130 fsm->fn = node_rex_prefix;
2131 } else {
2132 instr->legpref[i] = true;
2133 fsm_advance(fsm, 1, node_legacy_prefix);
2134 }
2135
2136 return 0;
2137 }
2138
2139 static int
2140 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2141 struct nvmm_x64_state *state)
2142 {
2143 struct x86_decode_fsm fsm;
2144 int ret;
2145
2146 memset(instr, 0, sizeof(*instr));
2147
2148 fsm.is64bit = is_64bit(state);
2149 fsm.is32bit = is_32bit(state);
2150 fsm.is16bit = is_16bit(state);
2151
2152 fsm.fn = node_legacy_prefix;
2153 fsm.buf = inst_bytes;
2154 fsm.end = inst_bytes + inst_len;
2155
2156 while (fsm.fn != NULL) {
2157 ret = (*fsm.fn)(&fsm, instr);
2158 if (ret == -1)
2159 return -1;
2160 }
2161
2162 instr->len = fsm.buf - inst_bytes;
2163
2164 return 0;
2165 }
2166
2167 /* -------------------------------------------------------------------------- */
2168
2169 static inline uint8_t
2170 compute_parity(uint8_t *data)
2171 {
2172 uint64_t *ptr = (uint64_t *)data;
2173 uint64_t val = *ptr;
2174
2175 val ^= val >> 32;
2176 val ^= val >> 16;
2177 val ^= val >> 8;
2178 val ^= val >> 4;
2179 val ^= val >> 2;
2180 val ^= val >> 1;
2181 return (~val) & 1;
2182 }
2183
2184 static void
2185 x86_emul_or(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2186 uint64_t *gprs)
2187 {
2188 const bool write = mem->write;
2189 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2190 uint8_t data[8];
2191 size_t i;
2192
2193 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2194
2195 memcpy(data, mem->data, sizeof(data));
2196
2197 /* Fetch the value to be OR'ed. */
2198 mem->write = false;
2199 (*cb)(mem);
2200
2201 /* Perform the OR. */
2202 for (i = 0; i < mem->size; i++) {
2203 mem->data[i] |= data[i];
2204 if (mem->data[i] != 0)
2205 fl |= PSL_Z;
2206 }
2207 if (mem->data[mem->size-1] & __BIT(7))
2208 fl |= PSL_N;
2209 if (compute_parity(mem->data))
2210 fl |= PSL_PF;
2211
2212 if (write) {
2213 /* Write back the result. */
2214 mem->write = true;
2215 (*cb)(mem);
2216 }
2217
2218 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2219 }
2220
2221 static void
2222 x86_emul_and(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2223 uint64_t *gprs)
2224 {
2225 const bool write = mem->write;
2226 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2227 uint8_t data[8];
2228 size_t i;
2229
2230 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2231
2232 memcpy(data, mem->data, sizeof(data));
2233
2234 /* Fetch the value to be AND'ed. */
2235 mem->write = false;
2236 (*cb)(mem);
2237
2238 /* Perform the AND. */
2239 for (i = 0; i < mem->size; i++) {
2240 mem->data[i] &= data[i];
2241 if (mem->data[i] != 0)
2242 fl |= PSL_Z;
2243 }
2244 if (mem->data[mem->size-1] & __BIT(7))
2245 fl |= PSL_N;
2246 if (compute_parity(mem->data))
2247 fl |= PSL_PF;
2248
2249 if (write) {
2250 /* Write back the result. */
2251 mem->write = true;
2252 (*cb)(mem);
2253 }
2254
2255 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2256 }
2257
2258 static void
2259 x86_emul_xor(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2260 uint64_t *gprs)
2261 {
2262 const bool write = mem->write;
2263 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2264 uint8_t data[8];
2265 size_t i;
2266
2267 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2268
2269 memcpy(data, mem->data, sizeof(data));
2270
2271 /* Fetch the value to be XOR'ed. */
2272 mem->write = false;
2273 (*cb)(mem);
2274
2275 /* Perform the XOR. */
2276 for (i = 0; i < mem->size; i++) {
2277 mem->data[i] ^= data[i];
2278 if (mem->data[i] != 0)
2279 fl |= PSL_Z;
2280 }
2281 if (mem->data[mem->size-1] & __BIT(7))
2282 fl |= PSL_N;
2283 if (compute_parity(mem->data))
2284 fl |= PSL_PF;
2285
2286 if (write) {
2287 /* Write back the result. */
2288 mem->write = true;
2289 (*cb)(mem);
2290 }
2291
2292 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2293 }
2294
2295 static void
2296 x86_emul_mov(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2297 uint64_t *gprs)
2298 {
2299 /*
2300 * Nothing special, just move without emulation.
2301 */
2302 (*cb)(mem);
2303 }
2304
2305 static void
2306 x86_emul_stos(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2307 uint64_t *gprs)
2308 {
2309 /*
2310 * Just move, and update RDI.
2311 */
2312 (*cb)(mem);
2313
2314 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2315 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2316 } else {
2317 gprs[NVMM_X64_GPR_RDI] += mem->size;
2318 }
2319 }
2320
2321 static void
2322 x86_emul_lods(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2323 uint64_t *gprs)
2324 {
2325 /*
2326 * Just move, and update RSI.
2327 */
2328 (*cb)(mem);
2329
2330 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2331 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2332 } else {
2333 gprs[NVMM_X64_GPR_RSI] += mem->size;
2334 }
2335 }
2336
2337 static void
2338 x86_emul_movs(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2339 uint64_t *gprs)
2340 {
2341 /*
2342 * Special instruction: double memory operand. Don't call the cb,
2343 * because the storage has already been performed earlier.
2344 */
2345
2346 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2347 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2348 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2349 } else {
2350 gprs[NVMM_X64_GPR_RSI] += mem->size;
2351 gprs[NVMM_X64_GPR_RDI] += mem->size;
2352 }
2353 }
2354
2355 /* -------------------------------------------------------------------------- */
2356
2357 static inline uint64_t
2358 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2359 {
2360 uint64_t val;
2361
2362 val = state->gprs[gpr];
2363 if (__predict_false(instr->address_size == 4)) {
2364 val &= 0x00000000FFFFFFFF;
2365 } else if (__predict_false(instr->address_size == 2)) {
2366 val &= 0x000000000000FFFF;
2367 }
2368
2369 return val;
2370 }
2371
2372 static int
2373 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2374 struct x86_store *store, gvaddr_t *gvap, size_t size)
2375 {
2376 struct x86_sib *sib;
2377 gvaddr_t gva = 0;
2378 uint64_t reg;
2379 int ret, seg;
2380 uint32_t *p;
2381
2382 if (store->type == STORE_SIB) {
2383 sib = &store->u.sib;
2384 if (sib->bas != NULL)
2385 gva += gpr_read_address(instr, state, sib->bas->num);
2386 if (sib->idx != NULL) {
2387 reg = gpr_read_address(instr, state, sib->idx->num);
2388 gva += sib->scale * reg;
2389 }
2390 } else if (store->type == STORE_REG) {
2391 gva = gpr_read_address(instr, state, store->u.reg->num);
2392 } else {
2393 gva = store->u.dmo;
2394 }
2395
2396 if (store->disp.type != DISP_NONE) {
2397 p = (uint32_t *)&store->disp.data[0];
2398 gva += *p;
2399 }
2400
2401 if (!is_long_mode(state)) {
2402 if (store->hardseg != 0) {
2403 seg = store->hardseg;
2404 } else {
2405 if (instr->legpref[LEG_OVR_CS]) {
2406 seg = NVMM_X64_SEG_CS;
2407 } else if (instr->legpref[LEG_OVR_SS]) {
2408 seg = NVMM_X64_SEG_SS;
2409 } else if (instr->legpref[LEG_OVR_ES]) {
2410 seg = NVMM_X64_SEG_ES;
2411 } else if (instr->legpref[LEG_OVR_FS]) {
2412 seg = NVMM_X64_SEG_FS;
2413 } else if (instr->legpref[LEG_OVR_GS]) {
2414 seg = NVMM_X64_SEG_GS;
2415 } else {
2416 seg = NVMM_X64_SEG_DS;
2417 }
2418 }
2419
2420 ret = segment_apply(&state->segs[seg], &gva, size);
2421 if (ret == -1)
2422 return -1;
2423 }
2424
2425 *gvap = gva;
2426 return 0;
2427 }
2428
2429 static int
2430 store_to_mem(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2431 struct x86_instr *instr, struct x86_store *store, struct nvmm_mem *mem)
2432 {
2433 nvmm_prot_t prot;
2434 int ret;
2435
2436 ret = store_to_gva(state, instr, store, &mem->gva, mem->size);
2437 if (ret == -1)
2438 return -1;
2439
2440 if ((mem->gva & PAGE_MASK) + mem->size > PAGE_SIZE) {
2441 /* Don't allow a cross-page MMIO. */
2442 errno = EINVAL;
2443 return -1;
2444 }
2445
2446 ret = x86_gva_to_gpa(mach, state, mem->gva, &mem->gpa, &prot);
2447 if (ret == -1)
2448 return -1;
2449
2450 return 0;
2451 }
2452
2453 static int
2454 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2455 struct nvmm_exit *exit)
2456 {
2457 size_t fetchsize;
2458 gvaddr_t gva;
2459 int ret;
2460
2461 fetchsize = sizeof(exit->u.mem.inst_bytes);
2462
2463 gva = state->gprs[NVMM_X64_GPR_RIP];
2464 if (!is_long_mode(state)) {
2465 ret = segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva,
2466 fetchsize);
2467 if (ret == -1)
2468 return -1;
2469 }
2470
2471 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
2472 fetchsize);
2473 if (ret == -1)
2474 return -1;
2475
2476 exit->u.mem.inst_len = fetchsize;
2477
2478 return 0;
2479 }
2480
2481 static int
2482 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2483 struct x86_instr *instr)
2484 {
2485 struct nvmm_mem mem;
2486 uint8_t data[8];
2487 gvaddr_t gva;
2488 size_t size;
2489 int ret;
2490
2491 size = instr->operand_size;
2492
2493 /* Source. */
2494 ret = store_to_gva(state, instr, &instr->src, &gva, size);
2495 if (ret == -1)
2496 return -1;
2497 ret = read_guest_memory(mach, state, gva, data, size);
2498 if (ret == -1)
2499 return -1;
2500
2501 /* Destination. */
2502 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
2503 if (ret == -1)
2504 return -1;
2505 ret = write_guest_memory(mach, state, gva, data, size);
2506 if (ret == -1)
2507 return -1;
2508
2509 mem.size = size;
2510 (*instr->emul)(&mem, NULL, state->gprs);
2511
2512 return 0;
2513 }
2514
2515 #define DISASSEMBLER_BUG() \
2516 do { \
2517 errno = EINVAL; \
2518 return -1; \
2519 } while (0);
2520
2521 static int
2522 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2523 struct x86_instr *instr)
2524 {
2525 struct nvmm_mem mem;
2526 uint64_t val;
2527 int ret;
2528
2529 memset(&mem, 0, sizeof(mem));
2530
2531 switch (instr->src.type) {
2532 case STORE_REG:
2533 if (instr->src.disp.type != DISP_NONE) {
2534 /* Indirect access. */
2535 mem.write = false;
2536 mem.size = instr->operand_size;
2537 ret = store_to_mem(mach, state, instr, &instr->src,
2538 &mem);
2539 if (ret == -1)
2540 return -1;
2541 } else {
2542 /* Direct access. */
2543 mem.write = true;
2544 mem.size = instr->operand_size;
2545 val = state->gprs[instr->src.u.reg->num];
2546 val = __SHIFTOUT(val, instr->src.u.reg->mask);
2547 memcpy(mem.data, &val, mem.size);
2548 }
2549 break;
2550
2551 case STORE_IMM:
2552 mem.write = true;
2553 mem.size = instr->src.u.imm.size;
2554 memcpy(mem.data, instr->src.u.imm.data, mem.size);
2555 break;
2556
2557 case STORE_SIB:
2558 mem.write = false;
2559 mem.size = instr->operand_size;
2560 ret = store_to_mem(mach, state, instr, &instr->src, &mem);
2561 if (ret == -1)
2562 return -1;
2563 break;
2564
2565 case STORE_DMO:
2566 mem.write = false;
2567 mem.size = instr->operand_size;
2568 ret = store_to_mem(mach, state, instr, &instr->src, &mem);
2569 if (ret == -1)
2570 return -1;
2571 break;
2572
2573 default:
2574 return -1;
2575 }
2576
2577 switch (instr->dst.type) {
2578 case STORE_REG:
2579 if (instr->dst.disp.type != DISP_NONE) {
2580 if (__predict_false(!mem.write)) {
2581 DISASSEMBLER_BUG();
2582 }
2583 mem.size = instr->operand_size;
2584 ret = store_to_mem(mach, state, instr, &instr->dst,
2585 &mem);
2586 if (ret == -1)
2587 return -1;
2588 } else {
2589 /* nothing */
2590 }
2591 break;
2592
2593 case STORE_IMM:
2594 /* The dst can't be an immediate. */
2595 DISASSEMBLER_BUG();
2596
2597 case STORE_SIB:
2598 if (__predict_false(!mem.write)) {
2599 DISASSEMBLER_BUG();
2600 }
2601 mem.size = instr->operand_size;
2602 ret = store_to_mem(mach, state, instr, &instr->dst, &mem);
2603 if (ret == -1)
2604 return -1;
2605 break;
2606
2607 case STORE_DMO:
2608 if (__predict_false(!mem.write)) {
2609 DISASSEMBLER_BUG();
2610 }
2611 mem.size = instr->operand_size;
2612 ret = store_to_mem(mach, state, instr, &instr->dst, &mem);
2613 if (ret == -1)
2614 return -1;
2615 break;
2616
2617 default:
2618 return -1;
2619 }
2620
2621 (*instr->emul)(&mem, __callbacks.mem, state->gprs);
2622
2623 if (!mem.write) {
2624 /* instr->dst.type == STORE_REG */
2625 memcpy(&val, mem.data, sizeof(uint64_t));
2626 val = __SHIFTIN(val, instr->dst.u.reg->mask);
2627 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
2628 state->gprs[instr->dst.u.reg->num] |= val;
2629 }
2630
2631 return 0;
2632 }
2633
2634 int
2635 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
2636 struct nvmm_exit *exit)
2637 {
2638 struct nvmm_x64_state state;
2639 struct x86_instr instr;
2640 uint64_t cnt;
2641 int ret;
2642
2643 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
2644 errno = EINVAL;
2645 return -1;
2646 }
2647
2648 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
2649 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | NVMM_X64_STATE_CRS |
2650 NVMM_X64_STATE_MSRS);
2651 if (ret == -1)
2652 return -1;
2653
2654 if (exit->u.mem.inst_len == 0) {
2655 /*
2656 * The instruction was not fetched from the kernel. Fetch
2657 * it ourselves.
2658 */
2659 ret = fetch_instruction(mach, &state, exit);
2660 if (ret == -1)
2661 return -1;
2662 }
2663
2664 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
2665 &instr, &state);
2666 if (ret == -1) {
2667 errno = ENODEV;
2668 return -1;
2669 }
2670
2671 if (__predict_false(instr.legpref[LEG_REPN])) {
2672 errno = ENODEV;
2673 return -1;
2674 }
2675
2676 if (instr.opcode->movs) {
2677 ret = assist_mem_double(mach, &state, &instr);
2678 } else {
2679 ret = assist_mem_single(mach, &state, &instr);
2680 }
2681 if (ret == -1) {
2682 errno = ENODEV;
2683 return -1;
2684 }
2685
2686 if (instr.legpref[LEG_REP]) {
2687 cnt = rep_dec_apply(&state, instr.address_size);
2688 if (cnt == 0) {
2689 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2690 }
2691 } else {
2692 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2693 }
2694
2695 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
2696 if (ret == -1)
2697 return -1;
2698
2699 return 0;
2700 }
2701