libnvmm_x86.c revision 1.26 1 /* $NetBSD: libnvmm_x86.c,v 1.26 2019/02/26 12:23:12 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49
50 #include <x86/specialreg.h>
51
52 extern struct nvmm_callbacks __callbacks;
53
54 /* -------------------------------------------------------------------------- */
55
56 /*
57 * Undocumented debugging function. Helpful.
58 */
59 int
60 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
61 {
62 struct nvmm_x64_state state;
63 uint16_t *attr;
64 size_t i;
65 int ret;
66
67 const char *segnames[] = {
68 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
69 };
70
71 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
72 if (ret == -1)
73 return -1;
74
75 printf("+ VCPU id=%d\n", (int)cpuid);
76 printf("| -> RIP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RIP]);
77 printf("| -> RSP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RSP]);
78 printf("| -> RAX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RAX]);
79 printf("| -> RBX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RBX]);
80 printf("| -> RCX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RCX]);
81 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
82 for (i = 0; i < NVMM_X64_NSEG; i++) {
83 attr = (uint16_t *)&state.segs[i].attrib;
84 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n",
85 segnames[i],
86 state.segs[i].selector,
87 state.segs[i].base,
88 state.segs[i].limit,
89 *attr);
90 }
91 printf("| -> MSR_EFER=%"PRIx64"\n", state.msrs[NVMM_X64_MSR_EFER]);
92 printf("| -> CR0=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR0]);
93 printf("| -> CR3=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR3]);
94 printf("| -> CR4=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR4]);
95 printf("| -> CR8=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR8]);
96
97 return 0;
98 }
99
100 /* -------------------------------------------------------------------------- */
101
102 #define PTE32_L1_SHIFT 12
103 #define PTE32_L2_SHIFT 22
104
105 #define PTE32_L2_MASK 0xffc00000
106 #define PTE32_L1_MASK 0x003ff000
107
108 #define PTE32_L2_FRAME (PTE32_L2_MASK)
109 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
110
111 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
112 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
113
114 #define CR3_FRAME_32BIT PG_FRAME
115
116 typedef uint32_t pte_32bit_t;
117
118 static int
119 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
120 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
121 {
122 gpaddr_t L2gpa, L1gpa;
123 uintptr_t L2hva, L1hva;
124 pte_32bit_t *pdir, pte;
125
126 /* We begin with an RWXU access. */
127 *prot = NVMM_PROT_ALL;
128
129 /* Parse L2. */
130 L2gpa = (cr3 & CR3_FRAME_32BIT);
131 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
132 return -1;
133 pdir = (pte_32bit_t *)L2hva;
134 pte = pdir[pte32_l2idx(gva)];
135 if ((pte & PG_V) == 0)
136 return -1;
137 if ((pte & PG_u) == 0)
138 *prot &= ~NVMM_PROT_USER;
139 if ((pte & PG_KW) == 0)
140 *prot &= ~NVMM_PROT_WRITE;
141 if ((pte & PG_PS) && !has_pse)
142 return -1;
143 if (pte & PG_PS) {
144 *gpa = (pte & PTE32_L2_FRAME);
145 *gpa = *gpa + (gva & PTE32_L1_MASK);
146 return 0;
147 }
148
149 /* Parse L1. */
150 L1gpa = (pte & PG_FRAME);
151 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
152 return -1;
153 pdir = (pte_32bit_t *)L1hva;
154 pte = pdir[pte32_l1idx(gva)];
155 if ((pte & PG_V) == 0)
156 return -1;
157 if ((pte & PG_u) == 0)
158 *prot &= ~NVMM_PROT_USER;
159 if ((pte & PG_KW) == 0)
160 *prot &= ~NVMM_PROT_WRITE;
161 if (pte & PG_PS)
162 return -1;
163
164 *gpa = (pte & PG_FRAME);
165 return 0;
166 }
167
168 /* -------------------------------------------------------------------------- */
169
170 #define PTE32_PAE_L1_SHIFT 12
171 #define PTE32_PAE_L2_SHIFT 21
172 #define PTE32_PAE_L3_SHIFT 30
173
174 #define PTE32_PAE_L3_MASK 0xc0000000
175 #define PTE32_PAE_L2_MASK 0x3fe00000
176 #define PTE32_PAE_L1_MASK 0x001ff000
177
178 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
179 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
180 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
181
182 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
183 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
184 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
185
186 #define CR3_FRAME_32BIT_PAE __BITS(31, 5)
187
188 typedef uint64_t pte_32bit_pae_t;
189
190 static int
191 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
192 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
193 {
194 gpaddr_t L3gpa, L2gpa, L1gpa;
195 uintptr_t L3hva, L2hva, L1hva;
196 pte_32bit_pae_t *pdir, pte;
197
198 /* We begin with an RWXU access. */
199 *prot = NVMM_PROT_ALL;
200
201 /* Parse L3. */
202 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
203 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
204 return -1;
205 pdir = (pte_32bit_pae_t *)L3hva;
206 pte = pdir[pte32_pae_l3idx(gva)];
207 if ((pte & PG_V) == 0)
208 return -1;
209 if (pte & PG_NX)
210 *prot &= ~NVMM_PROT_EXEC;
211 if (pte & PG_PS)
212 return -1;
213
214 /* Parse L2. */
215 L2gpa = (pte & PG_FRAME);
216 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
217 return -1;
218 pdir = (pte_32bit_pae_t *)L2hva;
219 pte = pdir[pte32_pae_l2idx(gva)];
220 if ((pte & PG_V) == 0)
221 return -1;
222 if ((pte & PG_u) == 0)
223 *prot &= ~NVMM_PROT_USER;
224 if ((pte & PG_KW) == 0)
225 *prot &= ~NVMM_PROT_WRITE;
226 if (pte & PG_NX)
227 *prot &= ~NVMM_PROT_EXEC;
228 if (pte & PG_PS) {
229 *gpa = (pte & PTE32_PAE_L2_FRAME);
230 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
231 return 0;
232 }
233
234 /* Parse L1. */
235 L1gpa = (pte & PG_FRAME);
236 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
237 return -1;
238 pdir = (pte_32bit_pae_t *)L1hva;
239 pte = pdir[pte32_pae_l1idx(gva)];
240 if ((pte & PG_V) == 0)
241 return -1;
242 if ((pte & PG_u) == 0)
243 *prot &= ~NVMM_PROT_USER;
244 if ((pte & PG_KW) == 0)
245 *prot &= ~NVMM_PROT_WRITE;
246 if (pte & PG_NX)
247 *prot &= ~NVMM_PROT_EXEC;
248 if (pte & PG_PS)
249 return -1;
250
251 *gpa = (pte & PG_FRAME);
252 return 0;
253 }
254
255 /* -------------------------------------------------------------------------- */
256
257 #define PTE64_L1_SHIFT 12
258 #define PTE64_L2_SHIFT 21
259 #define PTE64_L3_SHIFT 30
260 #define PTE64_L4_SHIFT 39
261
262 #define PTE64_L4_MASK 0x0000ff8000000000
263 #define PTE64_L3_MASK 0x0000007fc0000000
264 #define PTE64_L2_MASK 0x000000003fe00000
265 #define PTE64_L1_MASK 0x00000000001ff000
266
267 #define PTE64_L4_FRAME PTE64_L4_MASK
268 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
269 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
270 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
271
272 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
273 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
274 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
275 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
276
277 #define CR3_FRAME_64BIT PG_FRAME
278
279 typedef uint64_t pte_64bit_t;
280
281 static inline bool
282 x86_gva_64bit_canonical(gvaddr_t gva)
283 {
284 /* Bits 63:47 must have the same value. */
285 #define SIGN_EXTEND 0xffff800000000000ULL
286 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
287 }
288
289 static int
290 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
291 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
292 {
293 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
294 uintptr_t L4hva, L3hva, L2hva, L1hva;
295 pte_64bit_t *pdir, pte;
296
297 /* We begin with an RWXU access. */
298 *prot = NVMM_PROT_ALL;
299
300 if (!x86_gva_64bit_canonical(gva))
301 return -1;
302
303 /* Parse L4. */
304 L4gpa = (cr3 & CR3_FRAME_64BIT);
305 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
306 return -1;
307 pdir = (pte_64bit_t *)L4hva;
308 pte = pdir[pte64_l4idx(gva)];
309 if ((pte & PG_V) == 0)
310 return -1;
311 if ((pte & PG_u) == 0)
312 *prot &= ~NVMM_PROT_USER;
313 if ((pte & PG_KW) == 0)
314 *prot &= ~NVMM_PROT_WRITE;
315 if (pte & PG_NX)
316 *prot &= ~NVMM_PROT_EXEC;
317 if (pte & PG_PS)
318 return -1;
319
320 /* Parse L3. */
321 L3gpa = (pte & PG_FRAME);
322 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
323 return -1;
324 pdir = (pte_64bit_t *)L3hva;
325 pte = pdir[pte64_l3idx(gva)];
326 if ((pte & PG_V) == 0)
327 return -1;
328 if ((pte & PG_u) == 0)
329 *prot &= ~NVMM_PROT_USER;
330 if ((pte & PG_KW) == 0)
331 *prot &= ~NVMM_PROT_WRITE;
332 if (pte & PG_NX)
333 *prot &= ~NVMM_PROT_EXEC;
334 if (pte & PG_PS) {
335 *gpa = (pte & PTE64_L3_FRAME);
336 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
337 return 0;
338 }
339
340 /* Parse L2. */
341 L2gpa = (pte & PG_FRAME);
342 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
343 return -1;
344 pdir = (pte_64bit_t *)L2hva;
345 pte = pdir[pte64_l2idx(gva)];
346 if ((pte & PG_V) == 0)
347 return -1;
348 if ((pte & PG_u) == 0)
349 *prot &= ~NVMM_PROT_USER;
350 if ((pte & PG_KW) == 0)
351 *prot &= ~NVMM_PROT_WRITE;
352 if (pte & PG_NX)
353 *prot &= ~NVMM_PROT_EXEC;
354 if (pte & PG_PS) {
355 *gpa = (pte & PTE64_L2_FRAME);
356 *gpa = *gpa + (gva & PTE64_L1_MASK);
357 return 0;
358 }
359
360 /* Parse L1. */
361 L1gpa = (pte & PG_FRAME);
362 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
363 return -1;
364 pdir = (pte_64bit_t *)L1hva;
365 pte = pdir[pte64_l1idx(gva)];
366 if ((pte & PG_V) == 0)
367 return -1;
368 if ((pte & PG_u) == 0)
369 *prot &= ~NVMM_PROT_USER;
370 if ((pte & PG_KW) == 0)
371 *prot &= ~NVMM_PROT_WRITE;
372 if (pte & PG_NX)
373 *prot &= ~NVMM_PROT_EXEC;
374 if (pte & PG_PS)
375 return -1;
376
377 *gpa = (pte & PG_FRAME);
378 return 0;
379 }
380
381 static inline int
382 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
383 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
384 {
385 bool is_pae, is_lng, has_pse;
386 uint64_t cr3;
387 size_t off;
388 int ret;
389
390 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
391 /* No paging. */
392 *prot = NVMM_PROT_ALL;
393 *gpa = gva;
394 return 0;
395 }
396
397 off = (gva & PAGE_MASK);
398 gva &= ~PAGE_MASK;
399
400 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
401 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
402 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
403 cr3 = state->crs[NVMM_X64_CR_CR3];
404
405 if (is_pae && is_lng) {
406 /* 64bit */
407 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
408 } else if (is_pae && !is_lng) {
409 /* 32bit PAE */
410 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
411 } else if (!is_pae && !is_lng) {
412 /* 32bit */
413 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
414 } else {
415 ret = -1;
416 }
417
418 if (ret == -1) {
419 errno = EFAULT;
420 }
421
422 *gpa = *gpa + off;
423
424 return ret;
425 }
426
427 int
428 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
429 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
430 {
431 struct nvmm_x64_state state;
432 int ret;
433
434 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
435 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
436 if (ret == -1)
437 return -1;
438
439 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
440 }
441
442 /* -------------------------------------------------------------------------- */
443
444 static inline bool
445 is_long_mode(struct nvmm_x64_state *state)
446 {
447 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
448 }
449
450 static inline bool
451 is_64bit(struct nvmm_x64_state *state)
452 {
453 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
454 }
455
456 static inline bool
457 is_32bit(struct nvmm_x64_state *state)
458 {
459 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
460 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
461 }
462
463 static inline bool
464 is_16bit(struct nvmm_x64_state *state)
465 {
466 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
467 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
468 }
469
470 static int
471 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
472 {
473 uint64_t limit;
474
475 /*
476 * This is incomplete. We should check topdown, etc, really that's
477 * tiring.
478 */
479 if (__predict_false(!seg->attrib.p)) {
480 goto error;
481 }
482
483 limit = (uint64_t)seg->limit + 1;
484 if (__predict_true(seg->attrib.g)) {
485 limit *= PAGE_SIZE;
486 }
487
488 if (__predict_false(gva + size > limit)) {
489 goto error;
490 }
491
492 return 0;
493
494 error:
495 errno = EFAULT;
496 return -1;
497 }
498
499 static inline void
500 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
501 {
502 *gva += seg->base;
503 }
504
505 static inline uint64_t
506 size_to_mask(size_t size)
507 {
508 switch (size) {
509 case 1:
510 return 0x00000000000000FF;
511 case 2:
512 return 0x000000000000FFFF;
513 case 4:
514 return 0x00000000FFFFFFFF;
515 case 8:
516 default:
517 return 0xFFFFFFFFFFFFFFFF;
518 }
519 }
520
521 static uint64_t
522 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
523 {
524 uint64_t mask, cnt;
525
526 mask = size_to_mask(adsize);
527 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
528
529 return cnt;
530 }
531
532 static void
533 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
534 {
535 uint64_t mask;
536
537 /* XXX: should we zero-extend? */
538 mask = size_to_mask(adsize);
539 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
540 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
541 }
542
543 static int
544 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
545 gvaddr_t gva, uint8_t *data, size_t size)
546 {
547 struct nvmm_mem mem;
548 nvmm_prot_t prot;
549 gpaddr_t gpa;
550 uintptr_t hva;
551 bool is_mmio;
552 int ret, remain;
553
554 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
555 if (__predict_false(ret == -1)) {
556 return -1;
557 }
558 if (__predict_false(!(prot & NVMM_PROT_READ))) {
559 errno = EFAULT;
560 return -1;
561 }
562
563 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
564 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
565 } else {
566 remain = 0;
567 }
568 size -= remain;
569
570 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
571 is_mmio = (ret == -1);
572
573 if (is_mmio) {
574 mem.data = data;
575 mem.gpa = gpa;
576 mem.write = false;
577 mem.size = size;
578 (*__callbacks.mem)(&mem);
579 } else {
580 memcpy(data, (uint8_t *)hva, size);
581 }
582
583 if (remain > 0) {
584 ret = read_guest_memory(mach, state, gva + size,
585 data + size, remain);
586 } else {
587 ret = 0;
588 }
589
590 return ret;
591 }
592
593 static int
594 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
595 gvaddr_t gva, uint8_t *data, size_t size)
596 {
597 struct nvmm_mem mem;
598 nvmm_prot_t prot;
599 gpaddr_t gpa;
600 uintptr_t hva;
601 bool is_mmio;
602 int ret, remain;
603
604 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
605 if (__predict_false(ret == -1)) {
606 return -1;
607 }
608 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
609 errno = EFAULT;
610 return -1;
611 }
612
613 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
614 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
615 } else {
616 remain = 0;
617 }
618 size -= remain;
619
620 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
621 is_mmio = (ret == -1);
622
623 if (is_mmio) {
624 mem.data = data;
625 mem.gpa = gpa;
626 mem.write = true;
627 mem.size = size;
628 (*__callbacks.mem)(&mem);
629 } else {
630 memcpy((uint8_t *)hva, data, size);
631 }
632
633 if (remain > 0) {
634 ret = write_guest_memory(mach, state, gva + size,
635 data + size, remain);
636 } else {
637 ret = 0;
638 }
639
640 return ret;
641 }
642
643 /* -------------------------------------------------------------------------- */
644
645 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
646
647 #define NVMM_IO_BATCH_SIZE 32
648
649 static int
650 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
651 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
652 {
653 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
654 size_t i, iosize, iocnt;
655 int ret;
656
657 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
658 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
659 iocnt = iosize / io->size;
660
661 io->data = iobuf;
662
663 if (!io->in) {
664 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
665 if (ret == -1)
666 return -1;
667 }
668
669 for (i = 0; i < iocnt; i++) {
670 (*__callbacks.io)(io);
671 io->data += io->size;
672 }
673
674 if (io->in) {
675 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
676 if (ret == -1)
677 return -1;
678 }
679
680 return iocnt;
681 }
682
683 int
684 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
685 struct nvmm_exit *exit)
686 {
687 struct nvmm_x64_state state;
688 struct nvmm_io io;
689 uint64_t cnt = 0; /* GCC */
690 uint8_t iobuf[8];
691 int iocnt = 1;
692 gvaddr_t gva = 0; /* GCC */
693 int reg = 0; /* GCC */
694 int ret, seg;
695 bool psld = false;
696
697 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
698 errno = EINVAL;
699 return -1;
700 }
701
702 io.port = exit->u.io.port;
703 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
704 io.size = exit->u.io.operand_size;
705 io.data = iobuf;
706
707 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
708 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
709 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
710 if (ret == -1)
711 return -1;
712
713 if (exit->u.io.rep) {
714 cnt = rep_get_cnt(&state, exit->u.io.address_size);
715 if (__predict_false(cnt == 0)) {
716 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
717 goto out;
718 }
719 }
720
721 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
722 psld = true;
723 }
724
725 /*
726 * Determine GVA.
727 */
728 if (exit->u.io.str) {
729 if (io.in) {
730 reg = NVMM_X64_GPR_RDI;
731 } else {
732 reg = NVMM_X64_GPR_RSI;
733 }
734
735 gva = state.gprs[reg];
736 gva &= size_to_mask(exit->u.io.address_size);
737
738 if (exit->u.io.seg != -1) {
739 seg = exit->u.io.seg;
740 } else {
741 if (io.in) {
742 seg = NVMM_X64_SEG_ES;
743 } else {
744 seg = fetch_segment(mach, &state);
745 if (seg == -1)
746 return -1;
747 }
748 }
749
750 if (__predict_true(is_long_mode(&state))) {
751 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
752 segment_apply(&state.segs[seg], &gva);
753 }
754 } else {
755 ret = segment_check(&state.segs[seg], gva, io.size);
756 if (ret == -1)
757 return -1;
758 segment_apply(&state.segs[seg], &gva);
759 }
760
761 if (exit->u.io.rep && !psld) {
762 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
763 if (iocnt == -1)
764 return -1;
765 goto done;
766 }
767 }
768
769 if (!io.in) {
770 if (!exit->u.io.str) {
771 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
772 } else {
773 ret = read_guest_memory(mach, &state, gva, io.data,
774 io.size);
775 if (ret == -1)
776 return -1;
777 }
778 }
779
780 (*__callbacks.io)(&io);
781
782 if (io.in) {
783 if (!exit->u.io.str) {
784 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
785 if (io.size == 4) {
786 /* Zero-extend to 64 bits. */
787 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
788 }
789 } else {
790 ret = write_guest_memory(mach, &state, gva, io.data,
791 io.size);
792 if (ret == -1)
793 return -1;
794 }
795 }
796
797 done:
798 if (exit->u.io.str) {
799 if (__predict_false(psld)) {
800 state.gprs[reg] -= iocnt * io.size;
801 } else {
802 state.gprs[reg] += iocnt * io.size;
803 }
804 }
805
806 if (exit->u.io.rep) {
807 cnt -= iocnt;
808 rep_set_cnt(&state, exit->u.io.address_size, cnt);
809 if (cnt == 0) {
810 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
811 }
812 } else {
813 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
814 }
815
816 out:
817 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
818 if (ret == -1)
819 return -1;
820
821 return 0;
822 }
823
824 /* -------------------------------------------------------------------------- */
825
826 struct x86_emul {
827 bool read;
828 bool notouch;
829 void (*func)(struct nvmm_mem *, uint64_t *);
830 };
831
832 static void x86_func_or(struct nvmm_mem *, uint64_t *);
833 static void x86_func_and(struct nvmm_mem *, uint64_t *);
834 static void x86_func_sub(struct nvmm_mem *, uint64_t *);
835 static void x86_func_xor(struct nvmm_mem *, uint64_t *);
836 static void x86_func_cmp(struct nvmm_mem *, uint64_t *);
837 static void x86_func_test(struct nvmm_mem *, uint64_t *);
838 static void x86_func_mov(struct nvmm_mem *, uint64_t *);
839 static void x86_func_stos(struct nvmm_mem *, uint64_t *);
840 static void x86_func_lods(struct nvmm_mem *, uint64_t *);
841 static void x86_func_movs(struct nvmm_mem *, uint64_t *);
842
843 static const struct x86_emul x86_emul_or = {
844 .read = true,
845 .func = x86_func_or
846 };
847
848 static const struct x86_emul x86_emul_and = {
849 .read = true,
850 .func = x86_func_and
851 };
852
853 static const struct x86_emul x86_emul_sub = {
854 .read = true,
855 .func = x86_func_sub
856 };
857
858 static const struct x86_emul x86_emul_xor = {
859 .read = true,
860 .func = x86_func_xor
861 };
862
863 static const struct x86_emul x86_emul_cmp = {
864 .notouch = true,
865 .func = x86_func_cmp
866 };
867
868 static const struct x86_emul x86_emul_test = {
869 .notouch = true,
870 .func = x86_func_test
871 };
872
873 static const struct x86_emul x86_emul_mov = {
874 .func = x86_func_mov
875 };
876
877 static const struct x86_emul x86_emul_stos = {
878 .func = x86_func_stos
879 };
880
881 static const struct x86_emul x86_emul_lods = {
882 .func = x86_func_lods
883 };
884
885 static const struct x86_emul x86_emul_movs = {
886 .func = x86_func_movs
887 };
888
889 /* Legacy prefixes. */
890 #define LEG_LOCK 0xF0
891 #define LEG_REPN 0xF2
892 #define LEG_REP 0xF3
893 #define LEG_OVR_CS 0x2E
894 #define LEG_OVR_SS 0x36
895 #define LEG_OVR_DS 0x3E
896 #define LEG_OVR_ES 0x26
897 #define LEG_OVR_FS 0x64
898 #define LEG_OVR_GS 0x65
899 #define LEG_OPR_OVR 0x66
900 #define LEG_ADR_OVR 0x67
901
902 struct x86_legpref {
903 bool opr_ovr:1;
904 bool adr_ovr:1;
905 bool rep:1;
906 bool repn:1;
907 int seg;
908 };
909
910 struct x86_rexpref {
911 bool present;
912 bool w;
913 bool r;
914 bool x;
915 bool b;
916 };
917
918 struct x86_reg {
919 int num; /* NVMM GPR state index */
920 uint64_t mask;
921 };
922
923 enum x86_disp_type {
924 DISP_NONE,
925 DISP_0,
926 DISP_1,
927 DISP_4
928 };
929
930 struct x86_disp {
931 enum x86_disp_type type;
932 uint64_t data; /* 4 bytes, but can be sign-extended */
933 };
934
935 enum REGMODRM__Mod {
936 MOD_DIS0, /* also, register indirect */
937 MOD_DIS1,
938 MOD_DIS4,
939 MOD_REG
940 };
941
942 enum REGMODRM__Reg {
943 REG_000, /* these fields are indexes to the register map */
944 REG_001,
945 REG_010,
946 REG_011,
947 REG_100,
948 REG_101,
949 REG_110,
950 REG_111
951 };
952
953 enum REGMODRM__Rm {
954 RM_000, /* reg */
955 RM_001, /* reg */
956 RM_010, /* reg */
957 RM_011, /* reg */
958 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
959 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
960 RM_110,
961 RM_111
962 };
963
964 struct x86_regmodrm {
965 bool present;
966 enum REGMODRM__Mod mod;
967 enum REGMODRM__Reg reg;
968 enum REGMODRM__Rm rm;
969 };
970
971 struct x86_immediate {
972 uint64_t data;
973 };
974
975 struct x86_sib {
976 uint8_t scale;
977 const struct x86_reg *idx;
978 const struct x86_reg *bas;
979 };
980
981 enum x86_store_type {
982 STORE_NONE,
983 STORE_REG,
984 STORE_IMM,
985 STORE_SIB,
986 STORE_DMO
987 };
988
989 struct x86_store {
990 enum x86_store_type type;
991 union {
992 const struct x86_reg *reg;
993 struct x86_immediate imm;
994 struct x86_sib sib;
995 uint64_t dmo;
996 } u;
997 struct x86_disp disp;
998 int hardseg;
999 };
1000
1001 struct x86_instr {
1002 size_t len;
1003 struct x86_legpref legpref;
1004 struct x86_rexpref rexpref;
1005 size_t operand_size;
1006 size_t address_size;
1007 uint64_t zeroextend_mask;
1008
1009 struct x86_regmodrm regmodrm;
1010
1011 const struct x86_opcode *opcode;
1012
1013 struct x86_store src;
1014 struct x86_store dst;
1015 struct x86_store *strm;
1016
1017 const struct x86_emul *emul;
1018 };
1019
1020 struct x86_decode_fsm {
1021 /* vcpu */
1022 bool is64bit;
1023 bool is32bit;
1024 bool is16bit;
1025
1026 /* fsm */
1027 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1028 uint8_t *buf;
1029 uint8_t *end;
1030 };
1031
1032 struct x86_opcode {
1033 uint8_t byte;
1034 bool regmodrm;
1035 bool regtorm;
1036 bool dmo;
1037 bool todmo;
1038 bool movs;
1039 bool stos;
1040 bool lods;
1041 bool szoverride;
1042 int defsize;
1043 int allsize;
1044 bool group1;
1045 bool group3;
1046 bool group11;
1047 bool immediate;
1048 int flags;
1049 const struct x86_emul *emul;
1050 };
1051
1052 struct x86_group_entry {
1053 const struct x86_emul *emul;
1054 };
1055
1056 #define OPSIZE_BYTE 0x01
1057 #define OPSIZE_WORD 0x02 /* 2 bytes */
1058 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1059 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1060
1061 #define FLAG_imm8 0x01
1062 #define FLAG_immz 0x02
1063 #define FLAG_ze 0x04
1064
1065 static const struct x86_group_entry group1[8] = {
1066 [1] = { .emul = &x86_emul_or },
1067 [4] = { .emul = &x86_emul_and },
1068 [6] = { .emul = &x86_emul_xor },
1069 [7] = { .emul = &x86_emul_cmp }
1070 };
1071
1072 static const struct x86_group_entry group3[8] = {
1073 [0] = { .emul = &x86_emul_test },
1074 [1] = { .emul = &x86_emul_test }
1075 };
1076
1077 static const struct x86_group_entry group11[8] = {
1078 [0] = { .emul = &x86_emul_mov }
1079 };
1080
1081 static const struct x86_opcode primary_opcode_table[] = {
1082 /*
1083 * Group1
1084 */
1085 {
1086 /* Eb, Ib */
1087 .byte = 0x80,
1088 .regmodrm = true,
1089 .regtorm = true,
1090 .szoverride = false,
1091 .defsize = OPSIZE_BYTE,
1092 .allsize = -1,
1093 .group1 = true,
1094 .immediate = true,
1095 .emul = NULL /* group1 */
1096 },
1097 {
1098 /* Ev, Iz */
1099 .byte = 0x81,
1100 .regmodrm = true,
1101 .regtorm = true,
1102 .szoverride = true,
1103 .defsize = -1,
1104 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1105 .group1 = true,
1106 .immediate = true,
1107 .flags = FLAG_immz,
1108 .emul = NULL /* group1 */
1109 },
1110 {
1111 /* Ev, Ib */
1112 .byte = 0x83,
1113 .regmodrm = true,
1114 .regtorm = true,
1115 .szoverride = true,
1116 .defsize = -1,
1117 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1118 .group1 = true,
1119 .immediate = true,
1120 .flags = FLAG_imm8,
1121 .emul = NULL /* group1 */
1122 },
1123
1124 /*
1125 * Group3
1126 */
1127 {
1128 /* Eb, Ib */
1129 .byte = 0xF6,
1130 .regmodrm = true,
1131 .regtorm = true,
1132 .szoverride = false,
1133 .defsize = OPSIZE_BYTE,
1134 .allsize = -1,
1135 .group3 = true,
1136 .immediate = true,
1137 .emul = NULL /* group3 */
1138 },
1139 {
1140 /* Ev, Iz */
1141 .byte = 0xF7,
1142 .regmodrm = true,
1143 .regtorm = true,
1144 .szoverride = true,
1145 .defsize = -1,
1146 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1147 .group3 = true,
1148 .immediate = true,
1149 .flags = FLAG_immz,
1150 .emul = NULL /* group3 */
1151 },
1152
1153 /*
1154 * Group11
1155 */
1156 {
1157 /* Eb, Ib */
1158 .byte = 0xC6,
1159 .regmodrm = true,
1160 .regtorm = true,
1161 .szoverride = false,
1162 .defsize = OPSIZE_BYTE,
1163 .allsize = -1,
1164 .group11 = true,
1165 .immediate = true,
1166 .emul = NULL /* group11 */
1167 },
1168 {
1169 /* Ev, Iz */
1170 .byte = 0xC7,
1171 .regmodrm = true,
1172 .regtorm = true,
1173 .szoverride = true,
1174 .defsize = -1,
1175 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1176 .group11 = true,
1177 .immediate = true,
1178 .flags = FLAG_immz,
1179 .emul = NULL /* group11 */
1180 },
1181
1182 /*
1183 * OR
1184 */
1185 {
1186 /* Eb, Gb */
1187 .byte = 0x08,
1188 .regmodrm = true,
1189 .regtorm = true,
1190 .szoverride = false,
1191 .defsize = OPSIZE_BYTE,
1192 .allsize = -1,
1193 .emul = &x86_emul_or
1194 },
1195 {
1196 /* Ev, Gv */
1197 .byte = 0x09,
1198 .regmodrm = true,
1199 .regtorm = true,
1200 .szoverride = true,
1201 .defsize = -1,
1202 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1203 .emul = &x86_emul_or
1204 },
1205 {
1206 /* Gb, Eb */
1207 .byte = 0x0A,
1208 .regmodrm = true,
1209 .regtorm = false,
1210 .szoverride = false,
1211 .defsize = OPSIZE_BYTE,
1212 .allsize = -1,
1213 .emul = &x86_emul_or
1214 },
1215 {
1216 /* Gv, Ev */
1217 .byte = 0x0B,
1218 .regmodrm = true,
1219 .regtorm = false,
1220 .szoverride = true,
1221 .defsize = -1,
1222 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1223 .emul = &x86_emul_or
1224 },
1225
1226 /*
1227 * AND
1228 */
1229 {
1230 /* Eb, Gb */
1231 .byte = 0x20,
1232 .regmodrm = true,
1233 .regtorm = true,
1234 .szoverride = false,
1235 .defsize = OPSIZE_BYTE,
1236 .allsize = -1,
1237 .emul = &x86_emul_and
1238 },
1239 {
1240 /* Ev, Gv */
1241 .byte = 0x21,
1242 .regmodrm = true,
1243 .regtorm = true,
1244 .szoverride = true,
1245 .defsize = -1,
1246 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1247 .emul = &x86_emul_and
1248 },
1249 {
1250 /* Gb, Eb */
1251 .byte = 0x22,
1252 .regmodrm = true,
1253 .regtorm = false,
1254 .szoverride = false,
1255 .defsize = OPSIZE_BYTE,
1256 .allsize = -1,
1257 .emul = &x86_emul_and
1258 },
1259 {
1260 /* Gv, Ev */
1261 .byte = 0x23,
1262 .regmodrm = true,
1263 .regtorm = false,
1264 .szoverride = true,
1265 .defsize = -1,
1266 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1267 .emul = &x86_emul_and
1268 },
1269
1270 /*
1271 * SUB
1272 */
1273 {
1274 /* Eb, Gb */
1275 .byte = 0x28,
1276 .regmodrm = true,
1277 .regtorm = true,
1278 .szoverride = false,
1279 .defsize = OPSIZE_BYTE,
1280 .allsize = -1,
1281 .emul = &x86_emul_sub
1282 },
1283 {
1284 /* Ev, Gv */
1285 .byte = 0x29,
1286 .regmodrm = true,
1287 .regtorm = true,
1288 .szoverride = true,
1289 .defsize = -1,
1290 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1291 .emul = &x86_emul_sub
1292 },
1293 {
1294 /* Gb, Eb */
1295 .byte = 0x2A,
1296 .regmodrm = true,
1297 .regtorm = false,
1298 .szoverride = false,
1299 .defsize = OPSIZE_BYTE,
1300 .allsize = -1,
1301 .emul = &x86_emul_sub
1302 },
1303 {
1304 /* Gv, Ev */
1305 .byte = 0x2B,
1306 .regmodrm = true,
1307 .regtorm = false,
1308 .szoverride = true,
1309 .defsize = -1,
1310 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1311 .emul = &x86_emul_sub
1312 },
1313
1314 /*
1315 * XOR
1316 */
1317 {
1318 /* Eb, Gb */
1319 .byte = 0x30,
1320 .regmodrm = true,
1321 .regtorm = true,
1322 .szoverride = false,
1323 .defsize = OPSIZE_BYTE,
1324 .allsize = -1,
1325 .emul = &x86_emul_xor
1326 },
1327 {
1328 /* Ev, Gv */
1329 .byte = 0x31,
1330 .regmodrm = true,
1331 .regtorm = true,
1332 .szoverride = true,
1333 .defsize = -1,
1334 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1335 .emul = &x86_emul_xor
1336 },
1337 {
1338 /* Gb, Eb */
1339 .byte = 0x32,
1340 .regmodrm = true,
1341 .regtorm = false,
1342 .szoverride = false,
1343 .defsize = OPSIZE_BYTE,
1344 .allsize = -1,
1345 .emul = &x86_emul_xor
1346 },
1347 {
1348 /* Gv, Ev */
1349 .byte = 0x33,
1350 .regmodrm = true,
1351 .regtorm = false,
1352 .szoverride = true,
1353 .defsize = -1,
1354 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1355 .emul = &x86_emul_xor
1356 },
1357
1358 /*
1359 * MOV
1360 */
1361 {
1362 /* Eb, Gb */
1363 .byte = 0x88,
1364 .regmodrm = true,
1365 .regtorm = true,
1366 .szoverride = false,
1367 .defsize = OPSIZE_BYTE,
1368 .allsize = -1,
1369 .emul = &x86_emul_mov
1370 },
1371 {
1372 /* Ev, Gv */
1373 .byte = 0x89,
1374 .regmodrm = true,
1375 .regtorm = true,
1376 .szoverride = true,
1377 .defsize = -1,
1378 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1379 .emul = &x86_emul_mov
1380 },
1381 {
1382 /* Gb, Eb */
1383 .byte = 0x8A,
1384 .regmodrm = true,
1385 .regtorm = false,
1386 .szoverride = false,
1387 .defsize = OPSIZE_BYTE,
1388 .allsize = -1,
1389 .emul = &x86_emul_mov
1390 },
1391 {
1392 /* Gv, Ev */
1393 .byte = 0x8B,
1394 .regmodrm = true,
1395 .regtorm = false,
1396 .szoverride = true,
1397 .defsize = -1,
1398 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1399 .emul = &x86_emul_mov
1400 },
1401 {
1402 /* AL, Ob */
1403 .byte = 0xA0,
1404 .dmo = true,
1405 .todmo = false,
1406 .szoverride = false,
1407 .defsize = OPSIZE_BYTE,
1408 .allsize = -1,
1409 .emul = &x86_emul_mov
1410 },
1411 {
1412 /* rAX, Ov */
1413 .byte = 0xA1,
1414 .dmo = true,
1415 .todmo = false,
1416 .szoverride = true,
1417 .defsize = -1,
1418 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1419 .emul = &x86_emul_mov
1420 },
1421 {
1422 /* Ob, AL */
1423 .byte = 0xA2,
1424 .dmo = true,
1425 .todmo = true,
1426 .szoverride = false,
1427 .defsize = OPSIZE_BYTE,
1428 .allsize = -1,
1429 .emul = &x86_emul_mov
1430 },
1431 {
1432 /* Ov, rAX */
1433 .byte = 0xA3,
1434 .dmo = true,
1435 .todmo = true,
1436 .szoverride = true,
1437 .defsize = -1,
1438 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1439 .emul = &x86_emul_mov
1440 },
1441
1442 /*
1443 * MOVS
1444 */
1445 {
1446 /* Yb, Xb */
1447 .byte = 0xA4,
1448 .movs = true,
1449 .szoverride = false,
1450 .defsize = OPSIZE_BYTE,
1451 .allsize = -1,
1452 .emul = &x86_emul_movs
1453 },
1454 {
1455 /* Yv, Xv */
1456 .byte = 0xA5,
1457 .movs = true,
1458 .szoverride = true,
1459 .defsize = -1,
1460 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1461 .emul = &x86_emul_movs
1462 },
1463
1464 /*
1465 * STOS
1466 */
1467 {
1468 /* Yb, AL */
1469 .byte = 0xAA,
1470 .stos = true,
1471 .szoverride = false,
1472 .defsize = OPSIZE_BYTE,
1473 .allsize = -1,
1474 .emul = &x86_emul_stos
1475 },
1476 {
1477 /* Yv, rAX */
1478 .byte = 0xAB,
1479 .stos = true,
1480 .szoverride = true,
1481 .defsize = -1,
1482 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1483 .emul = &x86_emul_stos
1484 },
1485
1486 /*
1487 * LODS
1488 */
1489 {
1490 /* AL, Xb */
1491 .byte = 0xAC,
1492 .lods = true,
1493 .szoverride = false,
1494 .defsize = OPSIZE_BYTE,
1495 .allsize = -1,
1496 .emul = &x86_emul_lods
1497 },
1498 {
1499 /* rAX, Xv */
1500 .byte = 0xAD,
1501 .lods = true,
1502 .szoverride = true,
1503 .defsize = -1,
1504 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1505 .emul = &x86_emul_lods
1506 },
1507 };
1508
1509 static const struct x86_opcode secondary_opcode_table[] = {
1510 /*
1511 * MOVZX
1512 */
1513 {
1514 /* Gv, Eb */
1515 .byte = 0xB6,
1516 .regmodrm = true,
1517 .regtorm = false,
1518 .szoverride = true,
1519 .defsize = OPSIZE_BYTE,
1520 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1521 .flags = FLAG_ze,
1522 .emul = &x86_emul_mov
1523 },
1524 {
1525 /* Gv, Ew */
1526 .byte = 0xB7,
1527 .regmodrm = true,
1528 .regtorm = false,
1529 .szoverride = true,
1530 .defsize = OPSIZE_WORD,
1531 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1532 .flags = FLAG_ze,
1533 .emul = &x86_emul_mov
1534 },
1535 };
1536
1537 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1538
1539 /* [REX-present][enc][opsize] */
1540 static const struct x86_reg gpr_map__special[2][4][8] = {
1541 [false] = {
1542 /* No REX prefix. */
1543 [0b00] = {
1544 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1545 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1546 [2] = { -1, 0 },
1547 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1548 [4] = { -1, 0 },
1549 [5] = { -1, 0 },
1550 [6] = { -1, 0 },
1551 [7] = { -1, 0 },
1552 },
1553 [0b01] = {
1554 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1555 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1556 [2] = { -1, 0 },
1557 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1558 [4] = { -1, 0 },
1559 [5] = { -1, 0 },
1560 [6] = { -1, 0 },
1561 [7] = { -1, 0 },
1562 },
1563 [0b10] = {
1564 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1565 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1566 [2] = { -1, 0 },
1567 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1568 [4] = { -1, 0 },
1569 [5] = { -1, 0 },
1570 [6] = { -1, 0 },
1571 [7] = { -1, 0 },
1572 },
1573 [0b11] = {
1574 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1575 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1576 [2] = { -1, 0 },
1577 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1578 [4] = { -1, 0 },
1579 [5] = { -1, 0 },
1580 [6] = { -1, 0 },
1581 [7] = { -1, 0 },
1582 }
1583 },
1584 [true] = {
1585 /* Has REX prefix. */
1586 [0b00] = {
1587 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1588 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1589 [2] = { -1, 0 },
1590 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1591 [4] = { -1, 0 },
1592 [5] = { -1, 0 },
1593 [6] = { -1, 0 },
1594 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1595 },
1596 [0b01] = {
1597 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1598 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1599 [2] = { -1, 0 },
1600 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1601 [4] = { -1, 0 },
1602 [5] = { -1, 0 },
1603 [6] = { -1, 0 },
1604 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1605 },
1606 [0b10] = {
1607 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1608 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1609 [2] = { -1, 0 },
1610 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1611 [4] = { -1, 0 },
1612 [5] = { -1, 0 },
1613 [6] = { -1, 0 },
1614 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1615 },
1616 [0b11] = {
1617 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1618 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1619 [2] = { -1, 0 },
1620 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1621 [4] = { -1, 0 },
1622 [5] = { -1, 0 },
1623 [6] = { -1, 0 },
1624 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1625 }
1626 }
1627 };
1628
1629 /* [depends][enc][size] */
1630 static const struct x86_reg gpr_map[2][8][8] = {
1631 [false] = {
1632 /* Not extended. */
1633 [0b000] = {
1634 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1635 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1636 [2] = { -1, 0 },
1637 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1638 [4] = { -1, 0 },
1639 [5] = { -1, 0 },
1640 [6] = { -1, 0 },
1641 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1642 },
1643 [0b001] = {
1644 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1645 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1646 [2] = { -1, 0 },
1647 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1648 [4] = { -1, 0 },
1649 [5] = { -1, 0 },
1650 [6] = { -1, 0 },
1651 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1652 },
1653 [0b010] = {
1654 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1655 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1656 [2] = { -1, 0 },
1657 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1658 [4] = { -1, 0 },
1659 [5] = { -1, 0 },
1660 [6] = { -1, 0 },
1661 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1662 },
1663 [0b011] = {
1664 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1665 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1666 [2] = { -1, 0 },
1667 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1668 [4] = { -1, 0 },
1669 [5] = { -1, 0 },
1670 [6] = { -1, 0 },
1671 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1672 },
1673 [0b100] = {
1674 [0] = { -1, 0 }, /* SPECIAL */
1675 [1] = { -1, 0 }, /* SPECIAL */
1676 [2] = { -1, 0 },
1677 [3] = { -1, 0 }, /* SPECIAL */
1678 [4] = { -1, 0 },
1679 [5] = { -1, 0 },
1680 [6] = { -1, 0 },
1681 [7] = { -1, 0 }, /* SPECIAL */
1682 },
1683 [0b101] = {
1684 [0] = { -1, 0 }, /* SPECIAL */
1685 [1] = { -1, 0 }, /* SPECIAL */
1686 [2] = { -1, 0 },
1687 [3] = { -1, 0 }, /* SPECIAL */
1688 [4] = { -1, 0 },
1689 [5] = { -1, 0 },
1690 [6] = { -1, 0 },
1691 [7] = { -1, 0 }, /* SPECIAL */
1692 },
1693 [0b110] = {
1694 [0] = { -1, 0 }, /* SPECIAL */
1695 [1] = { -1, 0 }, /* SPECIAL */
1696 [2] = { -1, 0 },
1697 [3] = { -1, 0 }, /* SPECIAL */
1698 [4] = { -1, 0 },
1699 [5] = { -1, 0 },
1700 [6] = { -1, 0 },
1701 [7] = { -1, 0 }, /* SPECIAL */
1702 },
1703 [0b111] = {
1704 [0] = { -1, 0 }, /* SPECIAL */
1705 [1] = { -1, 0 }, /* SPECIAL */
1706 [2] = { -1, 0 },
1707 [3] = { -1, 0 }, /* SPECIAL */
1708 [4] = { -1, 0 },
1709 [5] = { -1, 0 },
1710 [6] = { -1, 0 },
1711 [7] = { -1, 0 }, /* SPECIAL */
1712 },
1713 },
1714 [true] = {
1715 /* Extended. */
1716 [0b000] = {
1717 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1718 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1719 [2] = { -1, 0 },
1720 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1721 [4] = { -1, 0 },
1722 [5] = { -1, 0 },
1723 [6] = { -1, 0 },
1724 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1725 },
1726 [0b001] = {
1727 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1728 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1729 [2] = { -1, 0 },
1730 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1731 [4] = { -1, 0 },
1732 [5] = { -1, 0 },
1733 [6] = { -1, 0 },
1734 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1735 },
1736 [0b010] = {
1737 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1738 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1739 [2] = { -1, 0 },
1740 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1741 [4] = { -1, 0 },
1742 [5] = { -1, 0 },
1743 [6] = { -1, 0 },
1744 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1745 },
1746 [0b011] = {
1747 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1748 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1749 [2] = { -1, 0 },
1750 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1751 [4] = { -1, 0 },
1752 [5] = { -1, 0 },
1753 [6] = { -1, 0 },
1754 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1755 },
1756 [0b100] = {
1757 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1758 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1759 [2] = { -1, 0 },
1760 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1761 [4] = { -1, 0 },
1762 [5] = { -1, 0 },
1763 [6] = { -1, 0 },
1764 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1765 },
1766 [0b101] = {
1767 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1768 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1769 [2] = { -1, 0 },
1770 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1771 [4] = { -1, 0 },
1772 [5] = { -1, 0 },
1773 [6] = { -1, 0 },
1774 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1775 },
1776 [0b110] = {
1777 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1778 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1779 [2] = { -1, 0 },
1780 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1781 [4] = { -1, 0 },
1782 [5] = { -1, 0 },
1783 [6] = { -1, 0 },
1784 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1785 },
1786 [0b111] = {
1787 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1788 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1789 [2] = { -1, 0 },
1790 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1791 [4] = { -1, 0 },
1792 [5] = { -1, 0 },
1793 [6] = { -1, 0 },
1794 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1795 },
1796 }
1797 };
1798
1799 static int
1800 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1801 {
1802 fsm->fn = NULL;
1803 return -1;
1804 }
1805
1806 static int
1807 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1808 {
1809 if (fsm->buf + n > fsm->end) {
1810 return -1;
1811 }
1812 memcpy(bytes, fsm->buf, n);
1813 return 0;
1814 }
1815
1816 static void
1817 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1818 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1819 {
1820 fsm->buf += n;
1821 if (fsm->buf > fsm->end) {
1822 fsm->fn = node_overflow;
1823 } else {
1824 fsm->fn = fn;
1825 }
1826 }
1827
1828 static const struct x86_reg *
1829 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1830 {
1831 enc &= 0b11;
1832 if (regsize == 8) {
1833 /* May be 64bit without REX */
1834 return &gpr_map__special[1][enc][regsize-1];
1835 }
1836 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1837 }
1838
1839 /*
1840 * Special node, for MOVS. Fake two displacements of zero on the source and
1841 * destination registers.
1842 */
1843 static int
1844 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1845 {
1846 size_t adrsize;
1847
1848 adrsize = instr->address_size;
1849
1850 /* DS:RSI */
1851 instr->src.type = STORE_REG;
1852 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1853 instr->src.disp.type = DISP_0;
1854
1855 /* ES:RDI, force ES */
1856 instr->dst.type = STORE_REG;
1857 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1858 instr->dst.disp.type = DISP_0;
1859 instr->dst.hardseg = NVMM_X64_SEG_ES;
1860
1861 fsm_advance(fsm, 0, NULL);
1862
1863 return 0;
1864 }
1865
1866 /*
1867 * Special node, for STOS and LODS. Fake a displacement of zero on the
1868 * destination register.
1869 */
1870 static int
1871 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1872 {
1873 const struct x86_opcode *opcode = instr->opcode;
1874 struct x86_store *stlo, *streg;
1875 size_t adrsize, regsize;
1876
1877 adrsize = instr->address_size;
1878 regsize = instr->operand_size;
1879
1880 if (opcode->stos) {
1881 streg = &instr->src;
1882 stlo = &instr->dst;
1883 } else {
1884 streg = &instr->dst;
1885 stlo = &instr->src;
1886 }
1887
1888 streg->type = STORE_REG;
1889 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1890
1891 stlo->type = STORE_REG;
1892 if (opcode->stos) {
1893 /* ES:RDI, force ES */
1894 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1895 stlo->hardseg = NVMM_X64_SEG_ES;
1896 } else {
1897 /* DS:RSI */
1898 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1899 }
1900 stlo->disp.type = DISP_0;
1901
1902 fsm_advance(fsm, 0, NULL);
1903
1904 return 0;
1905 }
1906
1907 static int
1908 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1909 {
1910 const struct x86_opcode *opcode = instr->opcode;
1911 struct x86_store *stdmo, *streg;
1912 size_t adrsize, regsize;
1913
1914 adrsize = instr->address_size;
1915 regsize = instr->operand_size;
1916
1917 if (opcode->todmo) {
1918 streg = &instr->src;
1919 stdmo = &instr->dst;
1920 } else {
1921 streg = &instr->dst;
1922 stdmo = &instr->src;
1923 }
1924
1925 streg->type = STORE_REG;
1926 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1927
1928 stdmo->type = STORE_DMO;
1929 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1930 return -1;
1931 }
1932 fsm_advance(fsm, adrsize, NULL);
1933
1934 return 0;
1935 }
1936
1937 static inline uint64_t
1938 sign_extend(uint64_t val, int size)
1939 {
1940 if (size == 1) {
1941 if (val & __BIT(7))
1942 val |= 0xFFFFFFFFFFFFFF00;
1943 } else if (size == 2) {
1944 if (val & __BIT(15))
1945 val |= 0xFFFFFFFFFFFF0000;
1946 } else if (size == 4) {
1947 if (val & __BIT(31))
1948 val |= 0xFFFFFFFF00000000;
1949 }
1950 return val;
1951 }
1952
1953 static int
1954 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1955 {
1956 const struct x86_opcode *opcode = instr->opcode;
1957 struct x86_store *store;
1958 uint8_t immsize;
1959 size_t sesize = 0;
1960
1961 /* The immediate is the source */
1962 store = &instr->src;
1963 immsize = instr->operand_size;
1964
1965 if (opcode->flags & FLAG_imm8) {
1966 sesize = immsize;
1967 immsize = 1;
1968 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1969 sesize = immsize;
1970 immsize = 4;
1971 }
1972
1973 store->type = STORE_IMM;
1974 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1975 return -1;
1976 }
1977 fsm_advance(fsm, immsize, NULL);
1978
1979 if (sesize != 0) {
1980 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1981 }
1982
1983 return 0;
1984 }
1985
1986 static int
1987 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1988 {
1989 const struct x86_opcode *opcode = instr->opcode;
1990 uint64_t data = 0;
1991 size_t n;
1992
1993 if (instr->strm->disp.type == DISP_1) {
1994 n = 1;
1995 } else { /* DISP4 */
1996 n = 4;
1997 }
1998
1999 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
2000 return -1;
2001 }
2002
2003 if (__predict_true(fsm->is64bit)) {
2004 data = sign_extend(data, n);
2005 }
2006
2007 instr->strm->disp.data = data;
2008
2009 if (opcode->immediate) {
2010 fsm_advance(fsm, n, node_immediate);
2011 } else {
2012 fsm_advance(fsm, n, NULL);
2013 }
2014
2015 return 0;
2016 }
2017
2018 static const struct x86_reg *
2019 get_register_idx(struct x86_instr *instr, uint8_t index)
2020 {
2021 uint8_t enc = index;
2022 const struct x86_reg *reg;
2023 size_t regsize;
2024
2025 regsize = instr->address_size;
2026 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
2027
2028 if (reg->num == -1) {
2029 reg = resolve_special_register(instr, enc, regsize);
2030 }
2031
2032 return reg;
2033 }
2034
2035 static const struct x86_reg *
2036 get_register_bas(struct x86_instr *instr, uint8_t base)
2037 {
2038 uint8_t enc = base;
2039 const struct x86_reg *reg;
2040 size_t regsize;
2041
2042 regsize = instr->address_size;
2043 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2044 if (reg->num == -1) {
2045 reg = resolve_special_register(instr, enc, regsize);
2046 }
2047
2048 return reg;
2049 }
2050
2051 static int
2052 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2053 {
2054 const struct x86_opcode *opcode;
2055 uint8_t scale, index, base;
2056 bool noindex, nobase;
2057 uint8_t byte;
2058
2059 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2060 return -1;
2061 }
2062
2063 scale = ((byte & 0b11000000) >> 6);
2064 index = ((byte & 0b00111000) >> 3);
2065 base = ((byte & 0b00000111) >> 0);
2066
2067 opcode = instr->opcode;
2068
2069 noindex = false;
2070 nobase = false;
2071
2072 if (index == 0b100 && !instr->rexpref.x) {
2073 /* Special case: the index is null */
2074 noindex = true;
2075 }
2076
2077 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2078 /* Special case: the base is null + disp32 */
2079 instr->strm->disp.type = DISP_4;
2080 nobase = true;
2081 }
2082
2083 instr->strm->type = STORE_SIB;
2084 instr->strm->u.sib.scale = (1 << scale);
2085 if (!noindex)
2086 instr->strm->u.sib.idx = get_register_idx(instr, index);
2087 if (!nobase)
2088 instr->strm->u.sib.bas = get_register_bas(instr, base);
2089
2090 /* May have a displacement, or an immediate */
2091 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
2092 fsm_advance(fsm, 1, node_disp);
2093 } else if (opcode->immediate) {
2094 fsm_advance(fsm, 1, node_immediate);
2095 } else {
2096 fsm_advance(fsm, 1, NULL);
2097 }
2098
2099 return 0;
2100 }
2101
2102 static const struct x86_reg *
2103 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2104 {
2105 uint8_t enc = instr->regmodrm.reg;
2106 const struct x86_reg *reg;
2107 size_t regsize;
2108
2109 regsize = instr->operand_size;
2110
2111 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2112 if (reg->num == -1) {
2113 reg = resolve_special_register(instr, enc, regsize);
2114 }
2115
2116 return reg;
2117 }
2118
2119 static const struct x86_reg *
2120 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2121 {
2122 uint8_t enc = instr->regmodrm.rm;
2123 const struct x86_reg *reg;
2124 size_t regsize;
2125
2126 if (instr->strm->disp.type == DISP_NONE) {
2127 regsize = instr->operand_size;
2128 } else {
2129 /* Indirect access, the size is that of the address. */
2130 regsize = instr->address_size;
2131 }
2132
2133 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2134 if (reg->num == -1) {
2135 reg = resolve_special_register(instr, enc, regsize);
2136 }
2137
2138 return reg;
2139 }
2140
2141 static inline bool
2142 has_sib(struct x86_instr *instr)
2143 {
2144 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
2145 }
2146
2147 static inline bool
2148 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2149 {
2150 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2151 instr->regmodrm.rm == RM_RBP_DISP32);
2152 }
2153
2154 static inline bool
2155 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2156 {
2157 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2158 instr->regmodrm.rm == RM_RBP_DISP32);
2159 }
2160
2161 static enum x86_disp_type
2162 get_disp_type(struct x86_instr *instr)
2163 {
2164 switch (instr->regmodrm.mod) {
2165 case MOD_DIS0: /* indirect */
2166 return DISP_0;
2167 case MOD_DIS1: /* indirect+1 */
2168 return DISP_1;
2169 case MOD_DIS4: /* indirect+4 */
2170 return DISP_4;
2171 case MOD_REG: /* direct */
2172 default: /* gcc */
2173 return DISP_NONE;
2174 }
2175 }
2176
2177 static int
2178 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2179 {
2180 struct x86_store *strg, *strm;
2181 const struct x86_opcode *opcode;
2182 const struct x86_reg *reg;
2183 uint8_t byte;
2184
2185 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2186 return -1;
2187 }
2188
2189 opcode = instr->opcode;
2190
2191 instr->regmodrm.present = true;
2192 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2193 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2194 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2195
2196 if (opcode->regtorm) {
2197 strg = &instr->src;
2198 strm = &instr->dst;
2199 } else { /* RM to REG */
2200 strm = &instr->src;
2201 strg = &instr->dst;
2202 }
2203
2204 /* Save for later use. */
2205 instr->strm = strm;
2206
2207 /*
2208 * Special cases: Groups. The REG field of REGMODRM is the index in
2209 * the group. op1 gets overwritten in the Immediate node, if any.
2210 */
2211 if (opcode->group1) {
2212 if (group1[instr->regmodrm.reg].emul == NULL) {
2213 return -1;
2214 }
2215 instr->emul = group1[instr->regmodrm.reg].emul;
2216 } else if (opcode->group3) {
2217 if (group3[instr->regmodrm.reg].emul == NULL) {
2218 return -1;
2219 }
2220 instr->emul = group3[instr->regmodrm.reg].emul;
2221 } else if (opcode->group11) {
2222 if (group11[instr->regmodrm.reg].emul == NULL) {
2223 return -1;
2224 }
2225 instr->emul = group11[instr->regmodrm.reg].emul;
2226 }
2227
2228 if (!opcode->immediate) {
2229 reg = get_register_reg(instr, opcode);
2230 if (reg == NULL) {
2231 return -1;
2232 }
2233 strg->type = STORE_REG;
2234 strg->u.reg = reg;
2235 }
2236
2237 /* The displacement applies to RM. */
2238 strm->disp.type = get_disp_type(instr);
2239
2240 if (has_sib(instr)) {
2241 /* Overwrites RM */
2242 fsm_advance(fsm, 1, node_sib);
2243 return 0;
2244 }
2245
2246 if (is_rip_relative(fsm, instr)) {
2247 /* Overwrites RM */
2248 strm->type = STORE_REG;
2249 strm->u.reg = &gpr_map__rip;
2250 strm->disp.type = DISP_4;
2251 fsm_advance(fsm, 1, node_disp);
2252 return 0;
2253 }
2254
2255 if (is_disp32_only(fsm, instr)) {
2256 /* Overwrites RM */
2257 strm->type = STORE_REG;
2258 strm->u.reg = NULL;
2259 strm->disp.type = DISP_4;
2260 fsm_advance(fsm, 1, node_disp);
2261 return 0;
2262 }
2263
2264 reg = get_register_rm(instr, opcode);
2265 if (reg == NULL) {
2266 return -1;
2267 }
2268 strm->type = STORE_REG;
2269 strm->u.reg = reg;
2270
2271 if (strm->disp.type == DISP_NONE) {
2272 /* Direct register addressing mode */
2273 if (opcode->immediate) {
2274 fsm_advance(fsm, 1, node_immediate);
2275 } else {
2276 fsm_advance(fsm, 1, NULL);
2277 }
2278 } else if (strm->disp.type == DISP_0) {
2279 /* Indirect register addressing mode */
2280 if (opcode->immediate) {
2281 fsm_advance(fsm, 1, node_immediate);
2282 } else {
2283 fsm_advance(fsm, 1, NULL);
2284 }
2285 } else {
2286 fsm_advance(fsm, 1, node_disp);
2287 }
2288
2289 return 0;
2290 }
2291
2292 static size_t
2293 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2294 {
2295 const struct x86_opcode *opcode = instr->opcode;
2296 int opsize;
2297
2298 /* Get the opsize */
2299 if (!opcode->szoverride) {
2300 opsize = opcode->defsize;
2301 } else if (instr->rexpref.present && instr->rexpref.w) {
2302 opsize = 8;
2303 } else {
2304 if (!fsm->is16bit) {
2305 if (instr->legpref.opr_ovr) {
2306 opsize = 2;
2307 } else {
2308 opsize = 4;
2309 }
2310 } else { /* 16bit */
2311 if (instr->legpref.opr_ovr) {
2312 opsize = 4;
2313 } else {
2314 opsize = 2;
2315 }
2316 }
2317 }
2318
2319 /* See if available */
2320 if ((opcode->allsize & opsize) == 0) {
2321 // XXX do we care?
2322 }
2323
2324 return opsize;
2325 }
2326
2327 static size_t
2328 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2329 {
2330 if (fsm->is64bit) {
2331 if (__predict_false(instr->legpref.adr_ovr)) {
2332 return 4;
2333 }
2334 return 8;
2335 }
2336
2337 if (fsm->is32bit) {
2338 if (__predict_false(instr->legpref.adr_ovr)) {
2339 return 2;
2340 }
2341 return 4;
2342 }
2343
2344 /* 16bit. */
2345 if (__predict_false(instr->legpref.adr_ovr)) {
2346 return 4;
2347 }
2348 return 2;
2349 }
2350
2351 static int
2352 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2353 {
2354 const struct x86_opcode *opcode;
2355 uint8_t byte;
2356 size_t i, n;
2357
2358 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2359 return -1;
2360 }
2361
2362 n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
2363 for (i = 0; i < n; i++) {
2364 if (primary_opcode_table[i].byte == byte)
2365 break;
2366 }
2367 if (i == n) {
2368 return -1;
2369 }
2370 opcode = &primary_opcode_table[i];
2371
2372 instr->opcode = opcode;
2373 instr->emul = opcode->emul;
2374 instr->operand_size = get_operand_size(fsm, instr);
2375 instr->address_size = get_address_size(fsm, instr);
2376
2377 if (fsm->is64bit && (instr->operand_size == 4)) {
2378 /* Zero-extend to 64 bits. */
2379 instr->zeroextend_mask = ~size_to_mask(4);
2380 }
2381
2382 if (opcode->regmodrm) {
2383 fsm_advance(fsm, 1, node_regmodrm);
2384 } else if (opcode->dmo) {
2385 /* Direct-Memory Offsets */
2386 fsm_advance(fsm, 1, node_dmo);
2387 } else if (opcode->stos || opcode->lods) {
2388 fsm_advance(fsm, 1, node_stlo);
2389 } else if (opcode->movs) {
2390 fsm_advance(fsm, 1, node_movs);
2391 } else {
2392 return -1;
2393 }
2394
2395 return 0;
2396 }
2397
2398 static int
2399 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2400 {
2401 const struct x86_opcode *opcode;
2402 uint8_t byte;
2403 size_t i, n;
2404
2405 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2406 return -1;
2407 }
2408
2409 n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
2410 for (i = 0; i < n; i++) {
2411 if (secondary_opcode_table[i].byte == byte)
2412 break;
2413 }
2414 if (i == n) {
2415 return -1;
2416 }
2417 opcode = &secondary_opcode_table[i];
2418
2419 instr->opcode = opcode;
2420 instr->emul = opcode->emul;
2421 instr->operand_size = get_operand_size(fsm, instr);
2422 instr->address_size = get_address_size(fsm, instr);
2423
2424 if (fsm->is64bit && (instr->operand_size == 4)) {
2425 /* Zero-extend to 64 bits. */
2426 instr->zeroextend_mask = ~size_to_mask(4);
2427 }
2428
2429 if (opcode->flags & FLAG_ze) {
2430 /*
2431 * Compute the mask for zero-extend. Update the operand size,
2432 * we move fewer bytes.
2433 */
2434 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2435 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2436 instr->operand_size = opcode->defsize;
2437 }
2438
2439 if (opcode->regmodrm) {
2440 fsm_advance(fsm, 1, node_regmodrm);
2441 } else {
2442 return -1;
2443 }
2444
2445 return 0;
2446 }
2447
2448 static int
2449 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2450 {
2451 uint8_t byte;
2452
2453 #define ESCAPE 0x0F
2454 #define VEX_1 0xC5
2455 #define VEX_2 0xC4
2456 #define XOP 0x8F
2457
2458 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2459 return -1;
2460 }
2461
2462 /*
2463 * We don't take XOP. It is AMD-specific, and it was removed shortly
2464 * after being introduced.
2465 */
2466 if (byte == ESCAPE) {
2467 fsm_advance(fsm, 1, node_secondary_opcode);
2468 } else if (!instr->rexpref.present) {
2469 if (byte == VEX_1) {
2470 return -1;
2471 } else if (byte == VEX_2) {
2472 return -1;
2473 } else {
2474 fsm->fn = node_primary_opcode;
2475 }
2476 } else {
2477 fsm->fn = node_primary_opcode;
2478 }
2479
2480 return 0;
2481 }
2482
2483 static int
2484 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2485 {
2486 struct x86_rexpref *rexpref = &instr->rexpref;
2487 uint8_t byte;
2488 size_t n = 0;
2489
2490 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2491 return -1;
2492 }
2493
2494 if (byte >= 0x40 && byte <= 0x4F) {
2495 if (__predict_false(!fsm->is64bit)) {
2496 return -1;
2497 }
2498 rexpref->present = true;
2499 rexpref->w = ((byte & 0x8) != 0);
2500 rexpref->r = ((byte & 0x4) != 0);
2501 rexpref->x = ((byte & 0x2) != 0);
2502 rexpref->b = ((byte & 0x1) != 0);
2503 n = 1;
2504 }
2505
2506 fsm_advance(fsm, n, node_main);
2507 return 0;
2508 }
2509
2510 static int
2511 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2512 {
2513 uint8_t byte;
2514
2515 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2516 return -1;
2517 }
2518
2519 if (byte == LEG_OPR_OVR) {
2520 instr->legpref.opr_ovr = 1;
2521 } else if (byte == LEG_OVR_DS) {
2522 instr->legpref.seg = NVMM_X64_SEG_DS;
2523 } else if (byte == LEG_OVR_ES) {
2524 instr->legpref.seg = NVMM_X64_SEG_ES;
2525 } else if (byte == LEG_REP) {
2526 instr->legpref.rep = 1;
2527 } else if (byte == LEG_OVR_GS) {
2528 instr->legpref.seg = NVMM_X64_SEG_GS;
2529 } else if (byte == LEG_OVR_FS) {
2530 instr->legpref.seg = NVMM_X64_SEG_FS;
2531 } else if (byte == LEG_ADR_OVR) {
2532 instr->legpref.adr_ovr = 1;
2533 } else if (byte == LEG_OVR_CS) {
2534 instr->legpref.seg = NVMM_X64_SEG_CS;
2535 } else if (byte == LEG_OVR_SS) {
2536 instr->legpref.seg = NVMM_X64_SEG_SS;
2537 } else if (byte == LEG_REPN) {
2538 instr->legpref.repn = 1;
2539 } else if (byte == LEG_LOCK) {
2540 /* ignore */
2541 } else {
2542 /* not a legacy prefix */
2543 fsm_advance(fsm, 0, node_rex_prefix);
2544 return 0;
2545 }
2546
2547 fsm_advance(fsm, 1, node_legacy_prefix);
2548 return 0;
2549 }
2550
2551 static int
2552 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2553 struct nvmm_x64_state *state)
2554 {
2555 struct x86_decode_fsm fsm;
2556 int ret;
2557
2558 memset(instr, 0, sizeof(*instr));
2559 instr->legpref.seg = -1;
2560 instr->src.hardseg = -1;
2561 instr->dst.hardseg = -1;
2562
2563 fsm.is64bit = is_64bit(state);
2564 fsm.is32bit = is_32bit(state);
2565 fsm.is16bit = is_16bit(state);
2566
2567 fsm.fn = node_legacy_prefix;
2568 fsm.buf = inst_bytes;
2569 fsm.end = inst_bytes + inst_len;
2570
2571 while (fsm.fn != NULL) {
2572 ret = (*fsm.fn)(&fsm, instr);
2573 if (ret == -1)
2574 return -1;
2575 }
2576
2577 instr->len = fsm.buf - inst_bytes;
2578
2579 return 0;
2580 }
2581
2582 /* -------------------------------------------------------------------------- */
2583
2584 #define EXEC_INSTR(sz, instr) \
2585 static uint##sz##_t \
2586 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
2587 { \
2588 uint##sz##_t res; \
2589 __asm __volatile ( \
2590 #instr " %2, %3;" \
2591 "mov %3, %1;" \
2592 "pushfq;" \
2593 "popq %0" \
2594 : "=r" (*rflags), "=r" (res) \
2595 : "r" (op1), "r" (op2)); \
2596 return res; \
2597 }
2598
2599 #define EXEC_DISPATCHER(instr) \
2600 static uint64_t \
2601 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2602 { \
2603 switch (opsize) { \
2604 case 1: \
2605 return exec_##instr##8(op1, op2, rflags); \
2606 case 2: \
2607 return exec_##instr##16(op1, op2, rflags); \
2608 case 4: \
2609 return exec_##instr##32(op1, op2, rflags); \
2610 default: \
2611 return exec_##instr##64(op1, op2, rflags); \
2612 } \
2613 }
2614
2615 /* SUB: ret = op1 - op2 */
2616 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2617 EXEC_INSTR(8, sub)
2618 EXEC_INSTR(16, sub)
2619 EXEC_INSTR(32, sub)
2620 EXEC_INSTR(64, sub)
2621 EXEC_DISPATCHER(sub)
2622
2623 /* OR: ret = op1 | op2 */
2624 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2625 EXEC_INSTR(8, or)
2626 EXEC_INSTR(16, or)
2627 EXEC_INSTR(32, or)
2628 EXEC_INSTR(64, or)
2629 EXEC_DISPATCHER(or)
2630
2631 /* AND: ret = op1 & op2 */
2632 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2633 EXEC_INSTR(8, and)
2634 EXEC_INSTR(16, and)
2635 EXEC_INSTR(32, and)
2636 EXEC_INSTR(64, and)
2637 EXEC_DISPATCHER(and)
2638
2639 /* XOR: ret = op1 ^ op2 */
2640 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2641 EXEC_INSTR(8, xor)
2642 EXEC_INSTR(16, xor)
2643 EXEC_INSTR(32, xor)
2644 EXEC_INSTR(64, xor)
2645 EXEC_DISPATCHER(xor)
2646
2647 /* -------------------------------------------------------------------------- */
2648
2649 /*
2650 * Emulation functions. We don't care about the order of the operands, except
2651 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
2652 * is op1 and who is op2.
2653 */
2654
2655 static void
2656 x86_func_or(struct nvmm_mem *mem, uint64_t *gprs)
2657 {
2658 uint64_t *retval = (uint64_t *)mem->data;
2659 const bool write = mem->write;
2660 uint64_t *op1, op2, fl, ret;
2661
2662 op1 = (uint64_t *)mem->data;
2663 op2 = 0;
2664
2665 /* Fetch the value to be OR'ed (op2). */
2666 mem->data = (uint8_t *)&op2;
2667 mem->write = false;
2668 (*__callbacks.mem)(mem);
2669
2670 /* Perform the OR. */
2671 ret = exec_or(*op1, op2, &fl, mem->size);
2672
2673 if (write) {
2674 /* Write back the result. */
2675 mem->data = (uint8_t *)&ret;
2676 mem->write = true;
2677 (*__callbacks.mem)(mem);
2678 } else {
2679 /* Return data to the caller. */
2680 *retval = ret;
2681 }
2682
2683 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2684 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2685 }
2686
2687 static void
2688 x86_func_and(struct nvmm_mem *mem, uint64_t *gprs)
2689 {
2690 uint64_t *retval = (uint64_t *)mem->data;
2691 const bool write = mem->write;
2692 uint64_t *op1, op2, fl, ret;
2693
2694 op1 = (uint64_t *)mem->data;
2695 op2 = 0;
2696
2697 /* Fetch the value to be AND'ed (op2). */
2698 mem->data = (uint8_t *)&op2;
2699 mem->write = false;
2700 (*__callbacks.mem)(mem);
2701
2702 /* Perform the AND. */
2703 ret = exec_and(*op1, op2, &fl, mem->size);
2704
2705 if (write) {
2706 /* Write back the result. */
2707 mem->data = (uint8_t *)&ret;
2708 mem->write = true;
2709 (*__callbacks.mem)(mem);
2710 } else {
2711 /* Return data to the caller. */
2712 *retval = ret;
2713 }
2714
2715 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2716 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2717 }
2718
2719 static void
2720 x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs)
2721 {
2722 uint64_t *retval = (uint64_t *)mem->data;
2723 const bool write = mem->write;
2724 uint64_t *op1, *op2, fl, ret;
2725 uint64_t tmp;
2726 bool memop1;
2727
2728 memop1 = !mem->write;
2729 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2730 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2731
2732 /* Fetch the value to be SUB'ed (op1 or op2). */
2733 mem->data = (uint8_t *)&tmp;
2734 mem->write = false;
2735 (*__callbacks.mem)(mem);
2736
2737 /* Perform the SUB. */
2738 ret = exec_sub(*op1, *op2, &fl, mem->size);
2739
2740 if (write) {
2741 /* Write back the result. */
2742 mem->data = (uint8_t *)&ret;
2743 mem->write = true;
2744 (*__callbacks.mem)(mem);
2745 } else {
2746 /* Return data to the caller. */
2747 *retval = ret;
2748 }
2749
2750 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2751 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2752 }
2753
2754 static void
2755 x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs)
2756 {
2757 uint64_t *retval = (uint64_t *)mem->data;
2758 const bool write = mem->write;
2759 uint64_t *op1, op2, fl, ret;
2760
2761 op1 = (uint64_t *)mem->data;
2762 op2 = 0;
2763
2764 /* Fetch the value to be XOR'ed (op2). */
2765 mem->data = (uint8_t *)&op2;
2766 mem->write = false;
2767 (*__callbacks.mem)(mem);
2768
2769 /* Perform the XOR. */
2770 ret = exec_xor(*op1, op2, &fl, mem->size);
2771
2772 if (write) {
2773 /* Write back the result. */
2774 mem->data = (uint8_t *)&ret;
2775 mem->write = true;
2776 (*__callbacks.mem)(mem);
2777 } else {
2778 /* Return data to the caller. */
2779 *retval = ret;
2780 }
2781
2782 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2783 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2784 }
2785
2786 static void
2787 x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs)
2788 {
2789 uint64_t *op1, *op2, fl;
2790 uint64_t tmp;
2791 bool memop1;
2792
2793 memop1 = !mem->write;
2794 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2795 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2796
2797 /* Fetch the value to be CMP'ed (op1 or op2). */
2798 mem->data = (uint8_t *)&tmp;
2799 mem->write = false;
2800 (*__callbacks.mem)(mem);
2801
2802 /* Perform the CMP. */
2803 exec_sub(*op1, *op2, &fl, mem->size);
2804
2805 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2806 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2807 }
2808
2809 static void
2810 x86_func_test(struct nvmm_mem *mem, uint64_t *gprs)
2811 {
2812 uint64_t *op1, *op2, fl;
2813 uint64_t tmp;
2814 bool memop1;
2815
2816 memop1 = !mem->write;
2817 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2818 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2819
2820 /* Fetch the value to be TEST'ed (op1 or op2). */
2821 mem->data = (uint8_t *)&tmp;
2822 mem->write = false;
2823 (*__callbacks.mem)(mem);
2824
2825 /* Perform the TEST. */
2826 exec_and(*op1, *op2, &fl, mem->size);
2827
2828 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2829 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2830 }
2831
2832 static void
2833 x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs)
2834 {
2835 /*
2836 * Nothing special, just move without emulation.
2837 */
2838 (*__callbacks.mem)(mem);
2839 }
2840
2841 static void
2842 x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs)
2843 {
2844 /*
2845 * Just move, and update RDI.
2846 */
2847 (*__callbacks.mem)(mem);
2848
2849 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2850 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2851 } else {
2852 gprs[NVMM_X64_GPR_RDI] += mem->size;
2853 }
2854 }
2855
2856 static void
2857 x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs)
2858 {
2859 /*
2860 * Just move, and update RSI.
2861 */
2862 (*__callbacks.mem)(mem);
2863
2864 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2865 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2866 } else {
2867 gprs[NVMM_X64_GPR_RSI] += mem->size;
2868 }
2869 }
2870
2871 static void
2872 x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs)
2873 {
2874 /*
2875 * Special instruction: double memory operand. Don't call the cb,
2876 * because the storage has already been performed earlier.
2877 */
2878
2879 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2880 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2881 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2882 } else {
2883 gprs[NVMM_X64_GPR_RSI] += mem->size;
2884 gprs[NVMM_X64_GPR_RDI] += mem->size;
2885 }
2886 }
2887
2888 /* -------------------------------------------------------------------------- */
2889
2890 static inline uint64_t
2891 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2892 {
2893 uint64_t val;
2894
2895 val = state->gprs[gpr];
2896 val &= size_to_mask(instr->address_size);
2897
2898 return val;
2899 }
2900
2901 static int
2902 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2903 struct x86_store *store, gvaddr_t *gvap, size_t size)
2904 {
2905 struct x86_sib *sib;
2906 gvaddr_t gva = 0;
2907 uint64_t reg;
2908 int ret, seg;
2909
2910 if (store->type == STORE_SIB) {
2911 sib = &store->u.sib;
2912 if (sib->bas != NULL)
2913 gva += gpr_read_address(instr, state, sib->bas->num);
2914 if (sib->idx != NULL) {
2915 reg = gpr_read_address(instr, state, sib->idx->num);
2916 gva += sib->scale * reg;
2917 }
2918 } else if (store->type == STORE_REG) {
2919 if (store->u.reg == NULL) {
2920 /* The base is null. Happens with disp32-only. */
2921 } else {
2922 gva = gpr_read_address(instr, state, store->u.reg->num);
2923 }
2924 } else {
2925 gva = store->u.dmo;
2926 }
2927
2928 if (store->disp.type != DISP_NONE) {
2929 gva += store->disp.data;
2930 }
2931
2932 if (store->hardseg != -1) {
2933 seg = store->hardseg;
2934 } else {
2935 if (__predict_false(instr->legpref.seg != -1)) {
2936 seg = instr->legpref.seg;
2937 } else {
2938 seg = NVMM_X64_SEG_DS;
2939 }
2940 }
2941
2942 if (__predict_true(is_long_mode(state))) {
2943 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2944 segment_apply(&state->segs[seg], &gva);
2945 }
2946 } else {
2947 ret = segment_check(&state->segs[seg], gva, size);
2948 if (ret == -1)
2949 return -1;
2950 segment_apply(&state->segs[seg], &gva);
2951 }
2952
2953 *gvap = gva;
2954 return 0;
2955 }
2956
2957 static int
2958 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2959 {
2960 uint8_t inst_bytes[5], byte;
2961 size_t i, fetchsize;
2962 gvaddr_t gva;
2963 int ret, seg;
2964
2965 fetchsize = sizeof(inst_bytes);
2966
2967 gva = state->gprs[NVMM_X64_GPR_RIP];
2968 if (__predict_false(!is_long_mode(state))) {
2969 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2970 fetchsize);
2971 if (ret == -1)
2972 return -1;
2973 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2974 }
2975
2976 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2977 if (ret == -1)
2978 return -1;
2979
2980 seg = NVMM_X64_SEG_DS;
2981 for (i = 0; i < fetchsize; i++) {
2982 byte = inst_bytes[i];
2983
2984 if (byte == LEG_OVR_DS) {
2985 seg = NVMM_X64_SEG_DS;
2986 } else if (byte == LEG_OVR_ES) {
2987 seg = NVMM_X64_SEG_ES;
2988 } else if (byte == LEG_OVR_GS) {
2989 seg = NVMM_X64_SEG_GS;
2990 } else if (byte == LEG_OVR_FS) {
2991 seg = NVMM_X64_SEG_FS;
2992 } else if (byte == LEG_OVR_CS) {
2993 seg = NVMM_X64_SEG_CS;
2994 } else if (byte == LEG_OVR_SS) {
2995 seg = NVMM_X64_SEG_SS;
2996 } else if (byte == LEG_OPR_OVR) {
2997 /* nothing */
2998 } else if (byte == LEG_ADR_OVR) {
2999 /* nothing */
3000 } else if (byte == LEG_REP) {
3001 /* nothing */
3002 } else if (byte == LEG_REPN) {
3003 /* nothing */
3004 } else if (byte == LEG_LOCK) {
3005 /* nothing */
3006 } else {
3007 return seg;
3008 }
3009 }
3010
3011 return seg;
3012 }
3013
3014 static int
3015 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3016 struct nvmm_exit *exit)
3017 {
3018 size_t fetchsize;
3019 gvaddr_t gva;
3020 int ret;
3021
3022 fetchsize = sizeof(exit->u.mem.inst_bytes);
3023
3024 gva = state->gprs[NVMM_X64_GPR_RIP];
3025 if (__predict_false(!is_long_mode(state))) {
3026 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3027 fetchsize);
3028 if (ret == -1)
3029 return -1;
3030 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3031 }
3032
3033 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
3034 fetchsize);
3035 if (ret == -1)
3036 return -1;
3037
3038 exit->u.mem.inst_len = fetchsize;
3039
3040 return 0;
3041 }
3042
3043 static int
3044 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3045 struct x86_instr *instr)
3046 {
3047 struct nvmm_mem mem;
3048 uint8_t data[8];
3049 gvaddr_t gva;
3050 size_t size;
3051 int ret;
3052
3053 size = instr->operand_size;
3054
3055 /* Source. */
3056 ret = store_to_gva(state, instr, &instr->src, &gva, size);
3057 if (ret == -1)
3058 return -1;
3059 ret = read_guest_memory(mach, state, gva, data, size);
3060 if (ret == -1)
3061 return -1;
3062
3063 /* Destination. */
3064 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3065 if (ret == -1)
3066 return -1;
3067 ret = write_guest_memory(mach, state, gva, data, size);
3068 if (ret == -1)
3069 return -1;
3070
3071 mem.size = size;
3072 (*instr->emul->func)(&mem, state->gprs);
3073
3074 return 0;
3075 }
3076
3077 #define DISASSEMBLER_BUG() \
3078 do { \
3079 errno = EINVAL; \
3080 return -1; \
3081 } while (0);
3082
3083 static int
3084 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3085 struct x86_instr *instr, struct nvmm_exit *exit)
3086 {
3087 struct nvmm_mem mem;
3088 uint8_t membuf[8];
3089 uint64_t val;
3090
3091 memset(membuf, 0, sizeof(membuf));
3092
3093 mem.gpa = exit->u.mem.gpa;
3094 mem.size = instr->operand_size;
3095 mem.data = membuf;
3096
3097 /* Determine the direction. */
3098 switch (instr->src.type) {
3099 case STORE_REG:
3100 if (instr->src.disp.type != DISP_NONE) {
3101 /* Indirect access. */
3102 mem.write = false;
3103 } else {
3104 /* Direct access. */
3105 mem.write = true;
3106 }
3107 break;
3108 case STORE_IMM:
3109 mem.write = true;
3110 break;
3111 case STORE_SIB:
3112 mem.write = false;
3113 break;
3114 case STORE_DMO:
3115 mem.write = false;
3116 break;
3117 default:
3118 DISASSEMBLER_BUG();
3119 }
3120
3121 if (mem.write) {
3122 switch (instr->src.type) {
3123 case STORE_REG:
3124 if (instr->src.disp.type != DISP_NONE) {
3125 DISASSEMBLER_BUG();
3126 }
3127 val = state->gprs[instr->src.u.reg->num];
3128 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3129 memcpy(mem.data, &val, mem.size);
3130 break;
3131 case STORE_IMM:
3132 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3133 break;
3134 default:
3135 DISASSEMBLER_BUG();
3136 }
3137 } else if (instr->emul->read) {
3138 if (instr->dst.type != STORE_REG) {
3139 DISASSEMBLER_BUG();
3140 }
3141 if (instr->dst.disp.type != DISP_NONE) {
3142 DISASSEMBLER_BUG();
3143 }
3144 val = state->gprs[instr->dst.u.reg->num];
3145 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3146 memcpy(mem.data, &val, mem.size);
3147 }
3148
3149 (*instr->emul->func)(&mem, state->gprs);
3150
3151 if (!instr->emul->notouch && !mem.write) {
3152 if (instr->dst.type != STORE_REG) {
3153 DISASSEMBLER_BUG();
3154 }
3155 memcpy(&val, membuf, sizeof(uint64_t));
3156 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3157 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3158 state->gprs[instr->dst.u.reg->num] |= val;
3159 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3160 }
3161
3162 return 0;
3163 }
3164
3165 int
3166 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
3167 struct nvmm_exit *exit)
3168 {
3169 struct nvmm_x64_state state;
3170 struct x86_instr instr;
3171 uint64_t cnt = 0; /* GCC */
3172 int ret;
3173
3174 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3175 errno = EINVAL;
3176 return -1;
3177 }
3178
3179 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
3180 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3181 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3182 if (ret == -1)
3183 return -1;
3184
3185 if (exit->u.mem.inst_len == 0) {
3186 /*
3187 * The instruction was not fetched from the kernel. Fetch
3188 * it ourselves.
3189 */
3190 ret = fetch_instruction(mach, &state, exit);
3191 if (ret == -1)
3192 return -1;
3193 }
3194
3195 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3196 &instr, &state);
3197 if (ret == -1) {
3198 errno = ENODEV;
3199 return -1;
3200 }
3201
3202 if (instr.legpref.rep || instr.legpref.repn) {
3203 cnt = rep_get_cnt(&state, instr.address_size);
3204 if (__predict_false(cnt == 0)) {
3205 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3206 goto out;
3207 }
3208 }
3209
3210 if (instr.opcode->movs) {
3211 ret = assist_mem_double(mach, &state, &instr);
3212 } else {
3213 ret = assist_mem_single(mach, &state, &instr, exit);
3214 }
3215 if (ret == -1) {
3216 errno = ENODEV;
3217 return -1;
3218 }
3219
3220 if (instr.legpref.rep || instr.legpref.repn) {
3221 cnt -= 1;
3222 rep_set_cnt(&state, instr.address_size, cnt);
3223 if (cnt == 0) {
3224 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3225 } else if (__predict_false(instr.legpref.repn)) {
3226 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3227 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3228 }
3229 }
3230 } else {
3231 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3232 }
3233
3234 out:
3235 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
3236 if (ret == -1)
3237 return -1;
3238
3239 return 0;
3240 }
3241