libnvmm_x86.c revision 1.27 1 /* $NetBSD: libnvmm_x86.c,v 1.27 2019/03/07 15:47:34 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49 #define __cacheline_aligned __attribute__((__aligned__(64)))
50
51 #include <x86/specialreg.h>
52
53 extern struct nvmm_callbacks __callbacks;
54
55 /* -------------------------------------------------------------------------- */
56
57 /*
58 * Undocumented debugging function. Helpful.
59 */
60 int
61 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
62 {
63 struct nvmm_x64_state state;
64 uint16_t *attr;
65 size_t i;
66 int ret;
67
68 const char *segnames[] = {
69 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
70 };
71
72 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
73 if (ret == -1)
74 return -1;
75
76 printf("+ VCPU id=%d\n", (int)cpuid);
77 printf("| -> RIP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RIP]);
78 printf("| -> RSP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RSP]);
79 printf("| -> RAX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RAX]);
80 printf("| -> RBX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RBX]);
81 printf("| -> RCX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RCX]);
82 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
83 for (i = 0; i < NVMM_X64_NSEG; i++) {
84 attr = (uint16_t *)&state.segs[i].attrib;
85 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n",
86 segnames[i],
87 state.segs[i].selector,
88 state.segs[i].base,
89 state.segs[i].limit,
90 *attr);
91 }
92 printf("| -> MSR_EFER=%"PRIx64"\n", state.msrs[NVMM_X64_MSR_EFER]);
93 printf("| -> CR0=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR0]);
94 printf("| -> CR3=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR3]);
95 printf("| -> CR4=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR4]);
96 printf("| -> CR8=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR8]);
97
98 return 0;
99 }
100
101 /* -------------------------------------------------------------------------- */
102
103 #define PTE32_L1_SHIFT 12
104 #define PTE32_L2_SHIFT 22
105
106 #define PTE32_L2_MASK 0xffc00000
107 #define PTE32_L1_MASK 0x003ff000
108
109 #define PTE32_L2_FRAME (PTE32_L2_MASK)
110 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
111
112 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
113 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
114
115 #define CR3_FRAME_32BIT PG_FRAME
116
117 typedef uint32_t pte_32bit_t;
118
119 static int
120 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
121 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
122 {
123 gpaddr_t L2gpa, L1gpa;
124 uintptr_t L2hva, L1hva;
125 pte_32bit_t *pdir, pte;
126
127 /* We begin with an RWXU access. */
128 *prot = NVMM_PROT_ALL;
129
130 /* Parse L2. */
131 L2gpa = (cr3 & CR3_FRAME_32BIT);
132 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
133 return -1;
134 pdir = (pte_32bit_t *)L2hva;
135 pte = pdir[pte32_l2idx(gva)];
136 if ((pte & PG_V) == 0)
137 return -1;
138 if ((pte & PG_u) == 0)
139 *prot &= ~NVMM_PROT_USER;
140 if ((pte & PG_KW) == 0)
141 *prot &= ~NVMM_PROT_WRITE;
142 if ((pte & PG_PS) && !has_pse)
143 return -1;
144 if (pte & PG_PS) {
145 *gpa = (pte & PTE32_L2_FRAME);
146 *gpa = *gpa + (gva & PTE32_L1_MASK);
147 return 0;
148 }
149
150 /* Parse L1. */
151 L1gpa = (pte & PG_FRAME);
152 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
153 return -1;
154 pdir = (pte_32bit_t *)L1hva;
155 pte = pdir[pte32_l1idx(gva)];
156 if ((pte & PG_V) == 0)
157 return -1;
158 if ((pte & PG_u) == 0)
159 *prot &= ~NVMM_PROT_USER;
160 if ((pte & PG_KW) == 0)
161 *prot &= ~NVMM_PROT_WRITE;
162 if (pte & PG_PS)
163 return -1;
164
165 *gpa = (pte & PG_FRAME);
166 return 0;
167 }
168
169 /* -------------------------------------------------------------------------- */
170
171 #define PTE32_PAE_L1_SHIFT 12
172 #define PTE32_PAE_L2_SHIFT 21
173 #define PTE32_PAE_L3_SHIFT 30
174
175 #define PTE32_PAE_L3_MASK 0xc0000000
176 #define PTE32_PAE_L2_MASK 0x3fe00000
177 #define PTE32_PAE_L1_MASK 0x001ff000
178
179 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
180 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
181 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
182
183 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
184 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
185 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
186
187 #define CR3_FRAME_32BIT_PAE __BITS(31, 5)
188
189 typedef uint64_t pte_32bit_pae_t;
190
191 static int
192 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
193 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
194 {
195 gpaddr_t L3gpa, L2gpa, L1gpa;
196 uintptr_t L3hva, L2hva, L1hva;
197 pte_32bit_pae_t *pdir, pte;
198
199 /* We begin with an RWXU access. */
200 *prot = NVMM_PROT_ALL;
201
202 /* Parse L3. */
203 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
204 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
205 return -1;
206 pdir = (pte_32bit_pae_t *)L3hva;
207 pte = pdir[pte32_pae_l3idx(gva)];
208 if ((pte & PG_V) == 0)
209 return -1;
210 if (pte & PG_NX)
211 *prot &= ~NVMM_PROT_EXEC;
212 if (pte & PG_PS)
213 return -1;
214
215 /* Parse L2. */
216 L2gpa = (pte & PG_FRAME);
217 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
218 return -1;
219 pdir = (pte_32bit_pae_t *)L2hva;
220 pte = pdir[pte32_pae_l2idx(gva)];
221 if ((pte & PG_V) == 0)
222 return -1;
223 if ((pte & PG_u) == 0)
224 *prot &= ~NVMM_PROT_USER;
225 if ((pte & PG_KW) == 0)
226 *prot &= ~NVMM_PROT_WRITE;
227 if (pte & PG_NX)
228 *prot &= ~NVMM_PROT_EXEC;
229 if (pte & PG_PS) {
230 *gpa = (pte & PTE32_PAE_L2_FRAME);
231 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
232 return 0;
233 }
234
235 /* Parse L1. */
236 L1gpa = (pte & PG_FRAME);
237 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
238 return -1;
239 pdir = (pte_32bit_pae_t *)L1hva;
240 pte = pdir[pte32_pae_l1idx(gva)];
241 if ((pte & PG_V) == 0)
242 return -1;
243 if ((pte & PG_u) == 0)
244 *prot &= ~NVMM_PROT_USER;
245 if ((pte & PG_KW) == 0)
246 *prot &= ~NVMM_PROT_WRITE;
247 if (pte & PG_NX)
248 *prot &= ~NVMM_PROT_EXEC;
249 if (pte & PG_PS)
250 return -1;
251
252 *gpa = (pte & PG_FRAME);
253 return 0;
254 }
255
256 /* -------------------------------------------------------------------------- */
257
258 #define PTE64_L1_SHIFT 12
259 #define PTE64_L2_SHIFT 21
260 #define PTE64_L3_SHIFT 30
261 #define PTE64_L4_SHIFT 39
262
263 #define PTE64_L4_MASK 0x0000ff8000000000
264 #define PTE64_L3_MASK 0x0000007fc0000000
265 #define PTE64_L2_MASK 0x000000003fe00000
266 #define PTE64_L1_MASK 0x00000000001ff000
267
268 #define PTE64_L4_FRAME PTE64_L4_MASK
269 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
270 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
271 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
272
273 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
274 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
275 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
276 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
277
278 #define CR3_FRAME_64BIT PG_FRAME
279
280 typedef uint64_t pte_64bit_t;
281
282 static inline bool
283 x86_gva_64bit_canonical(gvaddr_t gva)
284 {
285 /* Bits 63:47 must have the same value. */
286 #define SIGN_EXTEND 0xffff800000000000ULL
287 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
288 }
289
290 static int
291 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
292 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
293 {
294 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
295 uintptr_t L4hva, L3hva, L2hva, L1hva;
296 pte_64bit_t *pdir, pte;
297
298 /* We begin with an RWXU access. */
299 *prot = NVMM_PROT_ALL;
300
301 if (!x86_gva_64bit_canonical(gva))
302 return -1;
303
304 /* Parse L4. */
305 L4gpa = (cr3 & CR3_FRAME_64BIT);
306 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
307 return -1;
308 pdir = (pte_64bit_t *)L4hva;
309 pte = pdir[pte64_l4idx(gva)];
310 if ((pte & PG_V) == 0)
311 return -1;
312 if ((pte & PG_u) == 0)
313 *prot &= ~NVMM_PROT_USER;
314 if ((pte & PG_KW) == 0)
315 *prot &= ~NVMM_PROT_WRITE;
316 if (pte & PG_NX)
317 *prot &= ~NVMM_PROT_EXEC;
318 if (pte & PG_PS)
319 return -1;
320
321 /* Parse L3. */
322 L3gpa = (pte & PG_FRAME);
323 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
324 return -1;
325 pdir = (pte_64bit_t *)L3hva;
326 pte = pdir[pte64_l3idx(gva)];
327 if ((pte & PG_V) == 0)
328 return -1;
329 if ((pte & PG_u) == 0)
330 *prot &= ~NVMM_PROT_USER;
331 if ((pte & PG_KW) == 0)
332 *prot &= ~NVMM_PROT_WRITE;
333 if (pte & PG_NX)
334 *prot &= ~NVMM_PROT_EXEC;
335 if (pte & PG_PS) {
336 *gpa = (pte & PTE64_L3_FRAME);
337 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
338 return 0;
339 }
340
341 /* Parse L2. */
342 L2gpa = (pte & PG_FRAME);
343 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
344 return -1;
345 pdir = (pte_64bit_t *)L2hva;
346 pte = pdir[pte64_l2idx(gva)];
347 if ((pte & PG_V) == 0)
348 return -1;
349 if ((pte & PG_u) == 0)
350 *prot &= ~NVMM_PROT_USER;
351 if ((pte & PG_KW) == 0)
352 *prot &= ~NVMM_PROT_WRITE;
353 if (pte & PG_NX)
354 *prot &= ~NVMM_PROT_EXEC;
355 if (pte & PG_PS) {
356 *gpa = (pte & PTE64_L2_FRAME);
357 *gpa = *gpa + (gva & PTE64_L1_MASK);
358 return 0;
359 }
360
361 /* Parse L1. */
362 L1gpa = (pte & PG_FRAME);
363 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
364 return -1;
365 pdir = (pte_64bit_t *)L1hva;
366 pte = pdir[pte64_l1idx(gva)];
367 if ((pte & PG_V) == 0)
368 return -1;
369 if ((pte & PG_u) == 0)
370 *prot &= ~NVMM_PROT_USER;
371 if ((pte & PG_KW) == 0)
372 *prot &= ~NVMM_PROT_WRITE;
373 if (pte & PG_NX)
374 *prot &= ~NVMM_PROT_EXEC;
375 if (pte & PG_PS)
376 return -1;
377
378 *gpa = (pte & PG_FRAME);
379 return 0;
380 }
381
382 static inline int
383 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
384 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
385 {
386 bool is_pae, is_lng, has_pse;
387 uint64_t cr3;
388 size_t off;
389 int ret;
390
391 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
392 /* No paging. */
393 *prot = NVMM_PROT_ALL;
394 *gpa = gva;
395 return 0;
396 }
397
398 off = (gva & PAGE_MASK);
399 gva &= ~PAGE_MASK;
400
401 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
402 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
403 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
404 cr3 = state->crs[NVMM_X64_CR_CR3];
405
406 if (is_pae && is_lng) {
407 /* 64bit */
408 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
409 } else if (is_pae && !is_lng) {
410 /* 32bit PAE */
411 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
412 } else if (!is_pae && !is_lng) {
413 /* 32bit */
414 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
415 } else {
416 ret = -1;
417 }
418
419 if (ret == -1) {
420 errno = EFAULT;
421 }
422
423 *gpa = *gpa + off;
424
425 return ret;
426 }
427
428 int
429 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
430 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
431 {
432 struct nvmm_x64_state state;
433 int ret;
434
435 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
436 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
437 if (ret == -1)
438 return -1;
439
440 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
441 }
442
443 /* -------------------------------------------------------------------------- */
444
445 static inline bool
446 is_long_mode(struct nvmm_x64_state *state)
447 {
448 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
449 }
450
451 static inline bool
452 is_64bit(struct nvmm_x64_state *state)
453 {
454 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
455 }
456
457 static inline bool
458 is_32bit(struct nvmm_x64_state *state)
459 {
460 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
461 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
462 }
463
464 static inline bool
465 is_16bit(struct nvmm_x64_state *state)
466 {
467 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
468 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
469 }
470
471 static int
472 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
473 {
474 uint64_t limit;
475
476 /*
477 * This is incomplete. We should check topdown, etc, really that's
478 * tiring.
479 */
480 if (__predict_false(!seg->attrib.p)) {
481 goto error;
482 }
483
484 limit = (uint64_t)seg->limit + 1;
485 if (__predict_true(seg->attrib.g)) {
486 limit *= PAGE_SIZE;
487 }
488
489 if (__predict_false(gva + size > limit)) {
490 goto error;
491 }
492
493 return 0;
494
495 error:
496 errno = EFAULT;
497 return -1;
498 }
499
500 static inline void
501 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
502 {
503 *gva += seg->base;
504 }
505
506 static inline uint64_t
507 size_to_mask(size_t size)
508 {
509 switch (size) {
510 case 1:
511 return 0x00000000000000FF;
512 case 2:
513 return 0x000000000000FFFF;
514 case 4:
515 return 0x00000000FFFFFFFF;
516 case 8:
517 default:
518 return 0xFFFFFFFFFFFFFFFF;
519 }
520 }
521
522 static uint64_t
523 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
524 {
525 uint64_t mask, cnt;
526
527 mask = size_to_mask(adsize);
528 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
529
530 return cnt;
531 }
532
533 static void
534 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
535 {
536 uint64_t mask;
537
538 /* XXX: should we zero-extend? */
539 mask = size_to_mask(adsize);
540 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
541 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
542 }
543
544 static int
545 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
546 gvaddr_t gva, uint8_t *data, size_t size)
547 {
548 struct nvmm_mem mem;
549 nvmm_prot_t prot;
550 gpaddr_t gpa;
551 uintptr_t hva;
552 bool is_mmio;
553 int ret, remain;
554
555 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
556 if (__predict_false(ret == -1)) {
557 return -1;
558 }
559 if (__predict_false(!(prot & NVMM_PROT_READ))) {
560 errno = EFAULT;
561 return -1;
562 }
563
564 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
565 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
566 } else {
567 remain = 0;
568 }
569 size -= remain;
570
571 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
572 is_mmio = (ret == -1);
573
574 if (is_mmio) {
575 mem.data = data;
576 mem.gpa = gpa;
577 mem.write = false;
578 mem.size = size;
579 (*__callbacks.mem)(&mem);
580 } else {
581 memcpy(data, (uint8_t *)hva, size);
582 }
583
584 if (remain > 0) {
585 ret = read_guest_memory(mach, state, gva + size,
586 data + size, remain);
587 } else {
588 ret = 0;
589 }
590
591 return ret;
592 }
593
594 static int
595 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
596 gvaddr_t gva, uint8_t *data, size_t size)
597 {
598 struct nvmm_mem mem;
599 nvmm_prot_t prot;
600 gpaddr_t gpa;
601 uintptr_t hva;
602 bool is_mmio;
603 int ret, remain;
604
605 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
606 if (__predict_false(ret == -1)) {
607 return -1;
608 }
609 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
610 errno = EFAULT;
611 return -1;
612 }
613
614 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
615 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
616 } else {
617 remain = 0;
618 }
619 size -= remain;
620
621 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
622 is_mmio = (ret == -1);
623
624 if (is_mmio) {
625 mem.data = data;
626 mem.gpa = gpa;
627 mem.write = true;
628 mem.size = size;
629 (*__callbacks.mem)(&mem);
630 } else {
631 memcpy((uint8_t *)hva, data, size);
632 }
633
634 if (remain > 0) {
635 ret = write_guest_memory(mach, state, gva + size,
636 data + size, remain);
637 } else {
638 ret = 0;
639 }
640
641 return ret;
642 }
643
644 /* -------------------------------------------------------------------------- */
645
646 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
647
648 #define NVMM_IO_BATCH_SIZE 32
649
650 static int
651 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
652 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
653 {
654 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
655 size_t i, iosize, iocnt;
656 int ret;
657
658 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
659 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
660 iocnt = iosize / io->size;
661
662 io->data = iobuf;
663
664 if (!io->in) {
665 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
666 if (ret == -1)
667 return -1;
668 }
669
670 for (i = 0; i < iocnt; i++) {
671 (*__callbacks.io)(io);
672 io->data += io->size;
673 }
674
675 if (io->in) {
676 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
677 if (ret == -1)
678 return -1;
679 }
680
681 return iocnt;
682 }
683
684 int
685 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
686 struct nvmm_exit *exit)
687 {
688 struct nvmm_x64_state state;
689 struct nvmm_io io;
690 uint64_t cnt = 0; /* GCC */
691 uint8_t iobuf[8];
692 int iocnt = 1;
693 gvaddr_t gva = 0; /* GCC */
694 int reg = 0; /* GCC */
695 int ret, seg;
696 bool psld = false;
697
698 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
699 errno = EINVAL;
700 return -1;
701 }
702
703 io.port = exit->u.io.port;
704 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
705 io.size = exit->u.io.operand_size;
706 io.data = iobuf;
707
708 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
709 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
710 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
711 if (ret == -1)
712 return -1;
713
714 if (exit->u.io.rep) {
715 cnt = rep_get_cnt(&state, exit->u.io.address_size);
716 if (__predict_false(cnt == 0)) {
717 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
718 goto out;
719 }
720 }
721
722 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
723 psld = true;
724 }
725
726 /*
727 * Determine GVA.
728 */
729 if (exit->u.io.str) {
730 if (io.in) {
731 reg = NVMM_X64_GPR_RDI;
732 } else {
733 reg = NVMM_X64_GPR_RSI;
734 }
735
736 gva = state.gprs[reg];
737 gva &= size_to_mask(exit->u.io.address_size);
738
739 if (exit->u.io.seg != -1) {
740 seg = exit->u.io.seg;
741 } else {
742 if (io.in) {
743 seg = NVMM_X64_SEG_ES;
744 } else {
745 seg = fetch_segment(mach, &state);
746 if (seg == -1)
747 return -1;
748 }
749 }
750
751 if (__predict_true(is_long_mode(&state))) {
752 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
753 segment_apply(&state.segs[seg], &gva);
754 }
755 } else {
756 ret = segment_check(&state.segs[seg], gva, io.size);
757 if (ret == -1)
758 return -1;
759 segment_apply(&state.segs[seg], &gva);
760 }
761
762 if (exit->u.io.rep && !psld) {
763 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
764 if (iocnt == -1)
765 return -1;
766 goto done;
767 }
768 }
769
770 if (!io.in) {
771 if (!exit->u.io.str) {
772 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
773 } else {
774 ret = read_guest_memory(mach, &state, gva, io.data,
775 io.size);
776 if (ret == -1)
777 return -1;
778 }
779 }
780
781 (*__callbacks.io)(&io);
782
783 if (io.in) {
784 if (!exit->u.io.str) {
785 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
786 if (io.size == 4) {
787 /* Zero-extend to 64 bits. */
788 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
789 }
790 } else {
791 ret = write_guest_memory(mach, &state, gva, io.data,
792 io.size);
793 if (ret == -1)
794 return -1;
795 }
796 }
797
798 done:
799 if (exit->u.io.str) {
800 if (__predict_false(psld)) {
801 state.gprs[reg] -= iocnt * io.size;
802 } else {
803 state.gprs[reg] += iocnt * io.size;
804 }
805 }
806
807 if (exit->u.io.rep) {
808 cnt -= iocnt;
809 rep_set_cnt(&state, exit->u.io.address_size, cnt);
810 if (cnt == 0) {
811 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
812 }
813 } else {
814 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
815 }
816
817 out:
818 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
819 if (ret == -1)
820 return -1;
821
822 return 0;
823 }
824
825 /* -------------------------------------------------------------------------- */
826
827 struct x86_emul {
828 bool read;
829 bool notouch;
830 void (*func)(struct nvmm_mem *, uint64_t *);
831 };
832
833 static void x86_func_or(struct nvmm_mem *, uint64_t *);
834 static void x86_func_and(struct nvmm_mem *, uint64_t *);
835 static void x86_func_sub(struct nvmm_mem *, uint64_t *);
836 static void x86_func_xor(struct nvmm_mem *, uint64_t *);
837 static void x86_func_cmp(struct nvmm_mem *, uint64_t *);
838 static void x86_func_test(struct nvmm_mem *, uint64_t *);
839 static void x86_func_mov(struct nvmm_mem *, uint64_t *);
840 static void x86_func_stos(struct nvmm_mem *, uint64_t *);
841 static void x86_func_lods(struct nvmm_mem *, uint64_t *);
842 static void x86_func_movs(struct nvmm_mem *, uint64_t *);
843
844 static const struct x86_emul x86_emul_or = {
845 .read = true,
846 .func = x86_func_or
847 };
848
849 static const struct x86_emul x86_emul_and = {
850 .read = true,
851 .func = x86_func_and
852 };
853
854 static const struct x86_emul x86_emul_sub = {
855 .read = true,
856 .func = x86_func_sub
857 };
858
859 static const struct x86_emul x86_emul_xor = {
860 .read = true,
861 .func = x86_func_xor
862 };
863
864 static const struct x86_emul x86_emul_cmp = {
865 .notouch = true,
866 .func = x86_func_cmp
867 };
868
869 static const struct x86_emul x86_emul_test = {
870 .notouch = true,
871 .func = x86_func_test
872 };
873
874 static const struct x86_emul x86_emul_mov = {
875 .func = x86_func_mov
876 };
877
878 static const struct x86_emul x86_emul_stos = {
879 .func = x86_func_stos
880 };
881
882 static const struct x86_emul x86_emul_lods = {
883 .func = x86_func_lods
884 };
885
886 static const struct x86_emul x86_emul_movs = {
887 .func = x86_func_movs
888 };
889
890 /* Legacy prefixes. */
891 #define LEG_LOCK 0xF0
892 #define LEG_REPN 0xF2
893 #define LEG_REP 0xF3
894 #define LEG_OVR_CS 0x2E
895 #define LEG_OVR_SS 0x36
896 #define LEG_OVR_DS 0x3E
897 #define LEG_OVR_ES 0x26
898 #define LEG_OVR_FS 0x64
899 #define LEG_OVR_GS 0x65
900 #define LEG_OPR_OVR 0x66
901 #define LEG_ADR_OVR 0x67
902
903 struct x86_legpref {
904 bool opr_ovr:1;
905 bool adr_ovr:1;
906 bool rep:1;
907 bool repn:1;
908 int8_t seg;
909 };
910
911 struct x86_rexpref {
912 bool b:1;
913 bool x:1;
914 bool r:1;
915 bool w:1;
916 bool present:1;
917 };
918
919 struct x86_reg {
920 int num; /* NVMM GPR state index */
921 uint64_t mask;
922 };
923
924 enum x86_disp_type {
925 DISP_NONE,
926 DISP_0,
927 DISP_1,
928 DISP_4
929 };
930
931 struct x86_disp {
932 enum x86_disp_type type;
933 uint64_t data; /* 4 bytes, but can be sign-extended */
934 };
935
936 enum REGMODRM__Mod {
937 MOD_DIS0, /* also, register indirect */
938 MOD_DIS1,
939 MOD_DIS4,
940 MOD_REG
941 };
942
943 enum REGMODRM__Reg {
944 REG_000, /* these fields are indexes to the register map */
945 REG_001,
946 REG_010,
947 REG_011,
948 REG_100,
949 REG_101,
950 REG_110,
951 REG_111
952 };
953
954 enum REGMODRM__Rm {
955 RM_000, /* reg */
956 RM_001, /* reg */
957 RM_010, /* reg */
958 RM_011, /* reg */
959 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
960 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
961 RM_110,
962 RM_111
963 };
964
965 struct x86_regmodrm {
966 uint8_t mod:2;
967 uint8_t reg:3;
968 uint8_t rm:3;
969 };
970
971 struct x86_immediate {
972 uint64_t data;
973 };
974
975 struct x86_sib {
976 uint8_t scale;
977 const struct x86_reg *idx;
978 const struct x86_reg *bas;
979 };
980
981 enum x86_store_type {
982 STORE_NONE,
983 STORE_REG,
984 STORE_IMM,
985 STORE_SIB,
986 STORE_DMO
987 };
988
989 struct x86_store {
990 enum x86_store_type type;
991 union {
992 const struct x86_reg *reg;
993 struct x86_immediate imm;
994 struct x86_sib sib;
995 uint64_t dmo;
996 } u;
997 struct x86_disp disp;
998 int hardseg;
999 };
1000
1001 struct x86_instr {
1002 uint8_t len;
1003 struct x86_legpref legpref;
1004 struct x86_rexpref rexpref;
1005 struct x86_regmodrm regmodrm;
1006 uint8_t operand_size;
1007 uint8_t address_size;
1008 uint64_t zeroextend_mask;
1009
1010 const struct x86_opcode *opcode;
1011 const struct x86_emul *emul;
1012
1013 struct x86_store src;
1014 struct x86_store dst;
1015 struct x86_store *strm;
1016 };
1017
1018 struct x86_decode_fsm {
1019 /* vcpu */
1020 bool is64bit;
1021 bool is32bit;
1022 bool is16bit;
1023
1024 /* fsm */
1025 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1026 uint8_t *buf;
1027 uint8_t *end;
1028 };
1029
1030 struct x86_opcode {
1031 bool valid:1;
1032 bool regmodrm:1;
1033 bool regtorm:1;
1034 bool dmo:1;
1035 bool todmo:1;
1036 bool movs:1;
1037 bool stos:1;
1038 bool lods:1;
1039 bool szoverride:1;
1040 bool group1:1;
1041 bool group3:1;
1042 bool group11:1;
1043 bool immediate:1;
1044 uint8_t defsize;
1045 uint8_t flags;
1046 const struct x86_emul *emul;
1047 };
1048
1049 struct x86_group_entry {
1050 const struct x86_emul *emul;
1051 };
1052
1053 #define OPSIZE_BYTE 0x01
1054 #define OPSIZE_WORD 0x02 /* 2 bytes */
1055 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1056 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1057
1058 #define FLAG_imm8 0x01
1059 #define FLAG_immz 0x02
1060 #define FLAG_ze 0x04
1061
1062 static const struct x86_group_entry group1[8] __cacheline_aligned = {
1063 [1] = { .emul = &x86_emul_or },
1064 [4] = { .emul = &x86_emul_and },
1065 [6] = { .emul = &x86_emul_xor },
1066 [7] = { .emul = &x86_emul_cmp }
1067 };
1068
1069 static const struct x86_group_entry group3[8] __cacheline_aligned = {
1070 [0] = { .emul = &x86_emul_test },
1071 [1] = { .emul = &x86_emul_test }
1072 };
1073
1074 static const struct x86_group_entry group11[8] __cacheline_aligned = {
1075 [0] = { .emul = &x86_emul_mov }
1076 };
1077
1078 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
1079 /*
1080 * Group1
1081 */
1082 [0x80] = {
1083 /* Eb, Ib */
1084 .valid = true,
1085 .regmodrm = true,
1086 .regtorm = true,
1087 .szoverride = false,
1088 .defsize = OPSIZE_BYTE,
1089 .group1 = true,
1090 .immediate = true,
1091 .emul = NULL /* group1 */
1092 },
1093 [0x81] = {
1094 /* Ev, Iz */
1095 .valid = true,
1096 .regmodrm = true,
1097 .regtorm = true,
1098 .szoverride = true,
1099 .defsize = -1,
1100 .group1 = true,
1101 .immediate = true,
1102 .flags = FLAG_immz,
1103 .emul = NULL /* group1 */
1104 },
1105 [0x83] = {
1106 /* Ev, Ib */
1107 .valid = true,
1108 .regmodrm = true,
1109 .regtorm = true,
1110 .szoverride = true,
1111 .defsize = -1,
1112 .group1 = true,
1113 .immediate = true,
1114 .flags = FLAG_imm8,
1115 .emul = NULL /* group1 */
1116 },
1117
1118 /*
1119 * Group3
1120 */
1121 [0xF6] = {
1122 /* Eb, Ib */
1123 .valid = true,
1124 .regmodrm = true,
1125 .regtorm = true,
1126 .szoverride = false,
1127 .defsize = OPSIZE_BYTE,
1128 .group3 = true,
1129 .immediate = true,
1130 .emul = NULL /* group3 */
1131 },
1132 [0xF7] = {
1133 /* Ev, Iz */
1134 .valid = true,
1135 .regmodrm = true,
1136 .regtorm = true,
1137 .szoverride = true,
1138 .defsize = -1,
1139 .group3 = true,
1140 .immediate = true,
1141 .flags = FLAG_immz,
1142 .emul = NULL /* group3 */
1143 },
1144
1145 /*
1146 * Group11
1147 */
1148 [0xC6] = {
1149 /* Eb, Ib */
1150 .valid = true,
1151 .regmodrm = true,
1152 .regtorm = true,
1153 .szoverride = false,
1154 .defsize = OPSIZE_BYTE,
1155 .group11 = true,
1156 .immediate = true,
1157 .emul = NULL /* group11 */
1158 },
1159 [0xC7] = {
1160 /* Ev, Iz */
1161 .valid = true,
1162 .regmodrm = true,
1163 .regtorm = true,
1164 .szoverride = true,
1165 .defsize = -1,
1166 .group11 = true,
1167 .immediate = true,
1168 .flags = FLAG_immz,
1169 .emul = NULL /* group11 */
1170 },
1171
1172 /*
1173 * OR
1174 */
1175 [0x08] = {
1176 /* Eb, Gb */
1177 .valid = true,
1178 .regmodrm = true,
1179 .regtorm = true,
1180 .szoverride = false,
1181 .defsize = OPSIZE_BYTE,
1182 .emul = &x86_emul_or
1183 },
1184 [0x09] = {
1185 /* Ev, Gv */
1186 .valid = true,
1187 .regmodrm = true,
1188 .regtorm = true,
1189 .szoverride = true,
1190 .defsize = -1,
1191 .emul = &x86_emul_or
1192 },
1193 [0x0A] = {
1194 /* Gb, Eb */
1195 .valid = true,
1196 .regmodrm = true,
1197 .regtorm = false,
1198 .szoverride = false,
1199 .defsize = OPSIZE_BYTE,
1200 .emul = &x86_emul_or
1201 },
1202 [0x0B] = {
1203 /* Gv, Ev */
1204 .valid = true,
1205 .regmodrm = true,
1206 .regtorm = false,
1207 .szoverride = true,
1208 .defsize = -1,
1209 .emul = &x86_emul_or
1210 },
1211
1212 /*
1213 * AND
1214 */
1215 [0x20] = {
1216 /* Eb, Gb */
1217 .valid = true,
1218 .regmodrm = true,
1219 .regtorm = true,
1220 .szoverride = false,
1221 .defsize = OPSIZE_BYTE,
1222 .emul = &x86_emul_and
1223 },
1224 [0x21] = {
1225 /* Ev, Gv */
1226 .valid = true,
1227 .regmodrm = true,
1228 .regtorm = true,
1229 .szoverride = true,
1230 .defsize = -1,
1231 .emul = &x86_emul_and
1232 },
1233 [0x22] = {
1234 /* Gb, Eb */
1235 .valid = true,
1236 .regmodrm = true,
1237 .regtorm = false,
1238 .szoverride = false,
1239 .defsize = OPSIZE_BYTE,
1240 .emul = &x86_emul_and
1241 },
1242 [0x23] = {
1243 /* Gv, Ev */
1244 .valid = true,
1245 .regmodrm = true,
1246 .regtorm = false,
1247 .szoverride = true,
1248 .defsize = -1,
1249 .emul = &x86_emul_and
1250 },
1251
1252 /*
1253 * SUB
1254 */
1255 [0x28] = {
1256 /* Eb, Gb */
1257 .valid = true,
1258 .regmodrm = true,
1259 .regtorm = true,
1260 .szoverride = false,
1261 .defsize = OPSIZE_BYTE,
1262 .emul = &x86_emul_sub
1263 },
1264 [0x29] = {
1265 /* Ev, Gv */
1266 .valid = true,
1267 .regmodrm = true,
1268 .regtorm = true,
1269 .szoverride = true,
1270 .defsize = -1,
1271 .emul = &x86_emul_sub
1272 },
1273 [0x2A] = {
1274 /* Gb, Eb */
1275 .valid = true,
1276 .regmodrm = true,
1277 .regtorm = false,
1278 .szoverride = false,
1279 .defsize = OPSIZE_BYTE,
1280 .emul = &x86_emul_sub
1281 },
1282 [0x2B] = {
1283 /* Gv, Ev */
1284 .valid = true,
1285 .regmodrm = true,
1286 .regtorm = false,
1287 .szoverride = true,
1288 .defsize = -1,
1289 .emul = &x86_emul_sub
1290 },
1291
1292 /*
1293 * XOR
1294 */
1295 [0x30] = {
1296 /* Eb, Gb */
1297 .valid = true,
1298 .regmodrm = true,
1299 .regtorm = true,
1300 .szoverride = false,
1301 .defsize = OPSIZE_BYTE,
1302 .emul = &x86_emul_xor
1303 },
1304 [0x31] = {
1305 /* Ev, Gv */
1306 .valid = true,
1307 .regmodrm = true,
1308 .regtorm = true,
1309 .szoverride = true,
1310 .defsize = -1,
1311 .emul = &x86_emul_xor
1312 },
1313 [0x32] = {
1314 /* Gb, Eb */
1315 .valid = true,
1316 .regmodrm = true,
1317 .regtorm = false,
1318 .szoverride = false,
1319 .defsize = OPSIZE_BYTE,
1320 .emul = &x86_emul_xor
1321 },
1322 [0x33] = {
1323 /* Gv, Ev */
1324 .valid = true,
1325 .regmodrm = true,
1326 .regtorm = false,
1327 .szoverride = true,
1328 .defsize = -1,
1329 .emul = &x86_emul_xor
1330 },
1331
1332 /*
1333 * MOV
1334 */
1335 [0x88] = {
1336 /* Eb, Gb */
1337 .valid = true,
1338 .regmodrm = true,
1339 .regtorm = true,
1340 .szoverride = false,
1341 .defsize = OPSIZE_BYTE,
1342 .emul = &x86_emul_mov
1343 },
1344 [0x89] = {
1345 /* Ev, Gv */
1346 .valid = true,
1347 .regmodrm = true,
1348 .regtorm = true,
1349 .szoverride = true,
1350 .defsize = -1,
1351 .emul = &x86_emul_mov
1352 },
1353 [0x8A] = {
1354 /* Gb, Eb */
1355 .valid = true,
1356 .regmodrm = true,
1357 .regtorm = false,
1358 .szoverride = false,
1359 .defsize = OPSIZE_BYTE,
1360 .emul = &x86_emul_mov
1361 },
1362 [0x8B] = {
1363 /* Gv, Ev */
1364 .valid = true,
1365 .regmodrm = true,
1366 .regtorm = false,
1367 .szoverride = true,
1368 .defsize = -1,
1369 .emul = &x86_emul_mov
1370 },
1371 [0xA0] = {
1372 /* AL, Ob */
1373 .valid = true,
1374 .dmo = true,
1375 .todmo = false,
1376 .szoverride = false,
1377 .defsize = OPSIZE_BYTE,
1378 .emul = &x86_emul_mov
1379 },
1380 [0xA1] = {
1381 /* rAX, Ov */
1382 .valid = true,
1383 .dmo = true,
1384 .todmo = false,
1385 .szoverride = true,
1386 .defsize = -1,
1387 .emul = &x86_emul_mov
1388 },
1389 [0xA2] = {
1390 /* Ob, AL */
1391 .valid = true,
1392 .dmo = true,
1393 .todmo = true,
1394 .szoverride = false,
1395 .defsize = OPSIZE_BYTE,
1396 .emul = &x86_emul_mov
1397 },
1398 [0xA3] = {
1399 /* Ov, rAX */
1400 .valid = true,
1401 .dmo = true,
1402 .todmo = true,
1403 .szoverride = true,
1404 .defsize = -1,
1405 .emul = &x86_emul_mov
1406 },
1407
1408 /*
1409 * MOVS
1410 */
1411 [0xA4] = {
1412 /* Yb, Xb */
1413 .valid = true,
1414 .movs = true,
1415 .szoverride = false,
1416 .defsize = OPSIZE_BYTE,
1417 .emul = &x86_emul_movs
1418 },
1419 [0xA5] = {
1420 /* Yv, Xv */
1421 .valid = true,
1422 .movs = true,
1423 .szoverride = true,
1424 .defsize = -1,
1425 .emul = &x86_emul_movs
1426 },
1427
1428 /*
1429 * STOS
1430 */
1431 [0xAA] = {
1432 /* Yb, AL */
1433 .valid = true,
1434 .stos = true,
1435 .szoverride = false,
1436 .defsize = OPSIZE_BYTE,
1437 .emul = &x86_emul_stos
1438 },
1439 [0xAB] = {
1440 /* Yv, rAX */
1441 .valid = true,
1442 .stos = true,
1443 .szoverride = true,
1444 .defsize = -1,
1445 .emul = &x86_emul_stos
1446 },
1447
1448 /*
1449 * LODS
1450 */
1451 [0xAC] = {
1452 /* AL, Xb */
1453 .valid = true,
1454 .lods = true,
1455 .szoverride = false,
1456 .defsize = OPSIZE_BYTE,
1457 .emul = &x86_emul_lods
1458 },
1459 [0xAD] = {
1460 /* rAX, Xv */
1461 .valid = true,
1462 .lods = true,
1463 .szoverride = true,
1464 .defsize = -1,
1465 .emul = &x86_emul_lods
1466 },
1467 };
1468
1469 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
1470 /*
1471 * MOVZX
1472 */
1473 [0xB6] = {
1474 /* Gv, Eb */
1475 .valid = true,
1476 .regmodrm = true,
1477 .regtorm = false,
1478 .szoverride = true,
1479 .defsize = OPSIZE_BYTE,
1480 .flags = FLAG_ze,
1481 .emul = &x86_emul_mov
1482 },
1483 [0xB7] = {
1484 /* Gv, Ew */
1485 .valid = true,
1486 .regmodrm = true,
1487 .regtorm = false,
1488 .szoverride = true,
1489 .defsize = OPSIZE_WORD,
1490 .flags = FLAG_ze,
1491 .emul = &x86_emul_mov
1492 },
1493 };
1494
1495 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1496
1497 /* [REX-present][enc][opsize] */
1498 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
1499 [false] = {
1500 /* No REX prefix. */
1501 [0b00] = {
1502 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1503 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1504 [2] = { -1, 0 },
1505 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1506 [4] = { -1, 0 },
1507 [5] = { -1, 0 },
1508 [6] = { -1, 0 },
1509 [7] = { -1, 0 },
1510 },
1511 [0b01] = {
1512 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1513 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1514 [2] = { -1, 0 },
1515 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1516 [4] = { -1, 0 },
1517 [5] = { -1, 0 },
1518 [6] = { -1, 0 },
1519 [7] = { -1, 0 },
1520 },
1521 [0b10] = {
1522 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1523 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1524 [2] = { -1, 0 },
1525 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1526 [4] = { -1, 0 },
1527 [5] = { -1, 0 },
1528 [6] = { -1, 0 },
1529 [7] = { -1, 0 },
1530 },
1531 [0b11] = {
1532 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1533 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1534 [2] = { -1, 0 },
1535 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1536 [4] = { -1, 0 },
1537 [5] = { -1, 0 },
1538 [6] = { -1, 0 },
1539 [7] = { -1, 0 },
1540 }
1541 },
1542 [true] = {
1543 /* Has REX prefix. */
1544 [0b00] = {
1545 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1546 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1547 [2] = { -1, 0 },
1548 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1549 [4] = { -1, 0 },
1550 [5] = { -1, 0 },
1551 [6] = { -1, 0 },
1552 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1553 },
1554 [0b01] = {
1555 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1556 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1557 [2] = { -1, 0 },
1558 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1559 [4] = { -1, 0 },
1560 [5] = { -1, 0 },
1561 [6] = { -1, 0 },
1562 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1563 },
1564 [0b10] = {
1565 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1566 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1567 [2] = { -1, 0 },
1568 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1569 [4] = { -1, 0 },
1570 [5] = { -1, 0 },
1571 [6] = { -1, 0 },
1572 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1573 },
1574 [0b11] = {
1575 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1576 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1577 [2] = { -1, 0 },
1578 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1579 [4] = { -1, 0 },
1580 [5] = { -1, 0 },
1581 [6] = { -1, 0 },
1582 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1583 }
1584 }
1585 };
1586
1587 /* [depends][enc][size] */
1588 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
1589 [false] = {
1590 /* Not extended. */
1591 [0b000] = {
1592 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1593 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1594 [2] = { -1, 0 },
1595 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1596 [4] = { -1, 0 },
1597 [5] = { -1, 0 },
1598 [6] = { -1, 0 },
1599 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1600 },
1601 [0b001] = {
1602 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1603 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1604 [2] = { -1, 0 },
1605 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1606 [4] = { -1, 0 },
1607 [5] = { -1, 0 },
1608 [6] = { -1, 0 },
1609 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1610 },
1611 [0b010] = {
1612 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1613 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1614 [2] = { -1, 0 },
1615 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1616 [4] = { -1, 0 },
1617 [5] = { -1, 0 },
1618 [6] = { -1, 0 },
1619 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1620 },
1621 [0b011] = {
1622 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1623 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1624 [2] = { -1, 0 },
1625 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1626 [4] = { -1, 0 },
1627 [5] = { -1, 0 },
1628 [6] = { -1, 0 },
1629 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1630 },
1631 [0b100] = {
1632 [0] = { -1, 0 }, /* SPECIAL */
1633 [1] = { -1, 0 }, /* SPECIAL */
1634 [2] = { -1, 0 },
1635 [3] = { -1, 0 }, /* SPECIAL */
1636 [4] = { -1, 0 },
1637 [5] = { -1, 0 },
1638 [6] = { -1, 0 },
1639 [7] = { -1, 0 }, /* SPECIAL */
1640 },
1641 [0b101] = {
1642 [0] = { -1, 0 }, /* SPECIAL */
1643 [1] = { -1, 0 }, /* SPECIAL */
1644 [2] = { -1, 0 },
1645 [3] = { -1, 0 }, /* SPECIAL */
1646 [4] = { -1, 0 },
1647 [5] = { -1, 0 },
1648 [6] = { -1, 0 },
1649 [7] = { -1, 0 }, /* SPECIAL */
1650 },
1651 [0b110] = {
1652 [0] = { -1, 0 }, /* SPECIAL */
1653 [1] = { -1, 0 }, /* SPECIAL */
1654 [2] = { -1, 0 },
1655 [3] = { -1, 0 }, /* SPECIAL */
1656 [4] = { -1, 0 },
1657 [5] = { -1, 0 },
1658 [6] = { -1, 0 },
1659 [7] = { -1, 0 }, /* SPECIAL */
1660 },
1661 [0b111] = {
1662 [0] = { -1, 0 }, /* SPECIAL */
1663 [1] = { -1, 0 }, /* SPECIAL */
1664 [2] = { -1, 0 },
1665 [3] = { -1, 0 }, /* SPECIAL */
1666 [4] = { -1, 0 },
1667 [5] = { -1, 0 },
1668 [6] = { -1, 0 },
1669 [7] = { -1, 0 }, /* SPECIAL */
1670 },
1671 },
1672 [true] = {
1673 /* Extended. */
1674 [0b000] = {
1675 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1676 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1677 [2] = { -1, 0 },
1678 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1679 [4] = { -1, 0 },
1680 [5] = { -1, 0 },
1681 [6] = { -1, 0 },
1682 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1683 },
1684 [0b001] = {
1685 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1686 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1687 [2] = { -1, 0 },
1688 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1689 [4] = { -1, 0 },
1690 [5] = { -1, 0 },
1691 [6] = { -1, 0 },
1692 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1693 },
1694 [0b010] = {
1695 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1696 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1697 [2] = { -1, 0 },
1698 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1699 [4] = { -1, 0 },
1700 [5] = { -1, 0 },
1701 [6] = { -1, 0 },
1702 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1703 },
1704 [0b011] = {
1705 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1706 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1707 [2] = { -1, 0 },
1708 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1709 [4] = { -1, 0 },
1710 [5] = { -1, 0 },
1711 [6] = { -1, 0 },
1712 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1713 },
1714 [0b100] = {
1715 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1716 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1717 [2] = { -1, 0 },
1718 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1719 [4] = { -1, 0 },
1720 [5] = { -1, 0 },
1721 [6] = { -1, 0 },
1722 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1723 },
1724 [0b101] = {
1725 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1726 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1727 [2] = { -1, 0 },
1728 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1729 [4] = { -1, 0 },
1730 [5] = { -1, 0 },
1731 [6] = { -1, 0 },
1732 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1733 },
1734 [0b110] = {
1735 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1736 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1737 [2] = { -1, 0 },
1738 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1739 [4] = { -1, 0 },
1740 [5] = { -1, 0 },
1741 [6] = { -1, 0 },
1742 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1743 },
1744 [0b111] = {
1745 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1746 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1747 [2] = { -1, 0 },
1748 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1749 [4] = { -1, 0 },
1750 [5] = { -1, 0 },
1751 [6] = { -1, 0 },
1752 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1753 },
1754 }
1755 };
1756
1757 static int
1758 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1759 {
1760 fsm->fn = NULL;
1761 return -1;
1762 }
1763
1764 static int
1765 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1766 {
1767 if (fsm->buf + n > fsm->end) {
1768 return -1;
1769 }
1770 memcpy(bytes, fsm->buf, n);
1771 return 0;
1772 }
1773
1774 static inline void
1775 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1776 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1777 {
1778 fsm->buf += n;
1779 if (fsm->buf > fsm->end) {
1780 fsm->fn = node_overflow;
1781 } else {
1782 fsm->fn = fn;
1783 }
1784 }
1785
1786 static const struct x86_reg *
1787 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1788 {
1789 enc &= 0b11;
1790 if (regsize == 8) {
1791 /* May be 64bit without REX */
1792 return &gpr_map__special[1][enc][regsize-1];
1793 }
1794 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1795 }
1796
1797 /*
1798 * Special node, for MOVS. Fake two displacements of zero on the source and
1799 * destination registers.
1800 */
1801 static int
1802 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1803 {
1804 size_t adrsize;
1805
1806 adrsize = instr->address_size;
1807
1808 /* DS:RSI */
1809 instr->src.type = STORE_REG;
1810 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1811 instr->src.disp.type = DISP_0;
1812
1813 /* ES:RDI, force ES */
1814 instr->dst.type = STORE_REG;
1815 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1816 instr->dst.disp.type = DISP_0;
1817 instr->dst.hardseg = NVMM_X64_SEG_ES;
1818
1819 fsm_advance(fsm, 0, NULL);
1820
1821 return 0;
1822 }
1823
1824 /*
1825 * Special node, for STOS and LODS. Fake a displacement of zero on the
1826 * destination register.
1827 */
1828 static int
1829 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1830 {
1831 const struct x86_opcode *opcode = instr->opcode;
1832 struct x86_store *stlo, *streg;
1833 size_t adrsize, regsize;
1834
1835 adrsize = instr->address_size;
1836 regsize = instr->operand_size;
1837
1838 if (opcode->stos) {
1839 streg = &instr->src;
1840 stlo = &instr->dst;
1841 } else {
1842 streg = &instr->dst;
1843 stlo = &instr->src;
1844 }
1845
1846 streg->type = STORE_REG;
1847 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1848
1849 stlo->type = STORE_REG;
1850 if (opcode->stos) {
1851 /* ES:RDI, force ES */
1852 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1853 stlo->hardseg = NVMM_X64_SEG_ES;
1854 } else {
1855 /* DS:RSI */
1856 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1857 }
1858 stlo->disp.type = DISP_0;
1859
1860 fsm_advance(fsm, 0, NULL);
1861
1862 return 0;
1863 }
1864
1865 static int
1866 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1867 {
1868 const struct x86_opcode *opcode = instr->opcode;
1869 struct x86_store *stdmo, *streg;
1870 size_t adrsize, regsize;
1871
1872 adrsize = instr->address_size;
1873 regsize = instr->operand_size;
1874
1875 if (opcode->todmo) {
1876 streg = &instr->src;
1877 stdmo = &instr->dst;
1878 } else {
1879 streg = &instr->dst;
1880 stdmo = &instr->src;
1881 }
1882
1883 streg->type = STORE_REG;
1884 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1885
1886 stdmo->type = STORE_DMO;
1887 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1888 return -1;
1889 }
1890 fsm_advance(fsm, adrsize, NULL);
1891
1892 return 0;
1893 }
1894
1895 static inline uint64_t
1896 sign_extend(uint64_t val, int size)
1897 {
1898 if (size == 1) {
1899 if (val & __BIT(7))
1900 val |= 0xFFFFFFFFFFFFFF00;
1901 } else if (size == 2) {
1902 if (val & __BIT(15))
1903 val |= 0xFFFFFFFFFFFF0000;
1904 } else if (size == 4) {
1905 if (val & __BIT(31))
1906 val |= 0xFFFFFFFF00000000;
1907 }
1908 return val;
1909 }
1910
1911 static int
1912 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1913 {
1914 const struct x86_opcode *opcode = instr->opcode;
1915 struct x86_store *store;
1916 uint8_t immsize;
1917 size_t sesize = 0;
1918
1919 /* The immediate is the source */
1920 store = &instr->src;
1921 immsize = instr->operand_size;
1922
1923 if (opcode->flags & FLAG_imm8) {
1924 sesize = immsize;
1925 immsize = 1;
1926 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1927 sesize = immsize;
1928 immsize = 4;
1929 }
1930
1931 store->type = STORE_IMM;
1932 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1933 return -1;
1934 }
1935 fsm_advance(fsm, immsize, NULL);
1936
1937 if (sesize != 0) {
1938 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1939 }
1940
1941 return 0;
1942 }
1943
1944 static int
1945 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1946 {
1947 const struct x86_opcode *opcode = instr->opcode;
1948 uint64_t data = 0;
1949 size_t n;
1950
1951 if (instr->strm->disp.type == DISP_1) {
1952 n = 1;
1953 } else { /* DISP4 */
1954 n = 4;
1955 }
1956
1957 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1958 return -1;
1959 }
1960
1961 if (__predict_true(fsm->is64bit)) {
1962 data = sign_extend(data, n);
1963 }
1964
1965 instr->strm->disp.data = data;
1966
1967 if (opcode->immediate) {
1968 fsm_advance(fsm, n, node_immediate);
1969 } else {
1970 fsm_advance(fsm, n, NULL);
1971 }
1972
1973 return 0;
1974 }
1975
1976 static const struct x86_reg *
1977 get_register_idx(struct x86_instr *instr, uint8_t index)
1978 {
1979 uint8_t enc = index;
1980 const struct x86_reg *reg;
1981 size_t regsize;
1982
1983 regsize = instr->address_size;
1984 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
1985
1986 if (reg->num == -1) {
1987 reg = resolve_special_register(instr, enc, regsize);
1988 }
1989
1990 return reg;
1991 }
1992
1993 static const struct x86_reg *
1994 get_register_bas(struct x86_instr *instr, uint8_t base)
1995 {
1996 uint8_t enc = base;
1997 const struct x86_reg *reg;
1998 size_t regsize;
1999
2000 regsize = instr->address_size;
2001 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2002 if (reg->num == -1) {
2003 reg = resolve_special_register(instr, enc, regsize);
2004 }
2005
2006 return reg;
2007 }
2008
2009 static int
2010 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2011 {
2012 const struct x86_opcode *opcode;
2013 uint8_t scale, index, base;
2014 bool noindex, nobase;
2015 uint8_t byte;
2016
2017 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2018 return -1;
2019 }
2020
2021 scale = ((byte & 0b11000000) >> 6);
2022 index = ((byte & 0b00111000) >> 3);
2023 base = ((byte & 0b00000111) >> 0);
2024
2025 opcode = instr->opcode;
2026
2027 noindex = false;
2028 nobase = false;
2029
2030 if (index == 0b100 && !instr->rexpref.x) {
2031 /* Special case: the index is null */
2032 noindex = true;
2033 }
2034
2035 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2036 /* Special case: the base is null + disp32 */
2037 instr->strm->disp.type = DISP_4;
2038 nobase = true;
2039 }
2040
2041 instr->strm->type = STORE_SIB;
2042 instr->strm->u.sib.scale = (1 << scale);
2043 if (!noindex)
2044 instr->strm->u.sib.idx = get_register_idx(instr, index);
2045 if (!nobase)
2046 instr->strm->u.sib.bas = get_register_bas(instr, base);
2047
2048 /* May have a displacement, or an immediate */
2049 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
2050 fsm_advance(fsm, 1, node_disp);
2051 } else if (opcode->immediate) {
2052 fsm_advance(fsm, 1, node_immediate);
2053 } else {
2054 fsm_advance(fsm, 1, NULL);
2055 }
2056
2057 return 0;
2058 }
2059
2060 static const struct x86_reg *
2061 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2062 {
2063 uint8_t enc = instr->regmodrm.reg;
2064 const struct x86_reg *reg;
2065 size_t regsize;
2066
2067 regsize = instr->operand_size;
2068
2069 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2070 if (reg->num == -1) {
2071 reg = resolve_special_register(instr, enc, regsize);
2072 }
2073
2074 return reg;
2075 }
2076
2077 static const struct x86_reg *
2078 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2079 {
2080 uint8_t enc = instr->regmodrm.rm;
2081 const struct x86_reg *reg;
2082 size_t regsize;
2083
2084 if (instr->strm->disp.type == DISP_NONE) {
2085 regsize = instr->operand_size;
2086 } else {
2087 /* Indirect access, the size is that of the address. */
2088 regsize = instr->address_size;
2089 }
2090
2091 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2092 if (reg->num == -1) {
2093 reg = resolve_special_register(instr, enc, regsize);
2094 }
2095
2096 return reg;
2097 }
2098
2099 static inline bool
2100 has_sib(struct x86_instr *instr)
2101 {
2102 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
2103 }
2104
2105 static inline bool
2106 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2107 {
2108 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2109 instr->regmodrm.rm == RM_RBP_DISP32);
2110 }
2111
2112 static inline bool
2113 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2114 {
2115 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2116 instr->regmodrm.rm == RM_RBP_DISP32);
2117 }
2118
2119 static enum x86_disp_type
2120 get_disp_type(struct x86_instr *instr)
2121 {
2122 switch (instr->regmodrm.mod) {
2123 case MOD_DIS0: /* indirect */
2124 return DISP_0;
2125 case MOD_DIS1: /* indirect+1 */
2126 return DISP_1;
2127 case MOD_DIS4: /* indirect+4 */
2128 return DISP_4;
2129 case MOD_REG: /* direct */
2130 default: /* gcc */
2131 return DISP_NONE;
2132 }
2133 }
2134
2135 static int
2136 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2137 {
2138 struct x86_store *strg, *strm;
2139 const struct x86_opcode *opcode;
2140 const struct x86_reg *reg;
2141 uint8_t byte;
2142
2143 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2144 return -1;
2145 }
2146
2147 opcode = instr->opcode;
2148
2149 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2150 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2151 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2152
2153 if (opcode->regtorm) {
2154 strg = &instr->src;
2155 strm = &instr->dst;
2156 } else { /* RM to REG */
2157 strm = &instr->src;
2158 strg = &instr->dst;
2159 }
2160
2161 /* Save for later use. */
2162 instr->strm = strm;
2163
2164 /*
2165 * Special cases: Groups. The REG field of REGMODRM is the index in
2166 * the group. op1 gets overwritten in the Immediate node, if any.
2167 */
2168 if (opcode->group1) {
2169 if (group1[instr->regmodrm.reg].emul == NULL) {
2170 return -1;
2171 }
2172 instr->emul = group1[instr->regmodrm.reg].emul;
2173 } else if (opcode->group3) {
2174 if (group3[instr->regmodrm.reg].emul == NULL) {
2175 return -1;
2176 }
2177 instr->emul = group3[instr->regmodrm.reg].emul;
2178 } else if (opcode->group11) {
2179 if (group11[instr->regmodrm.reg].emul == NULL) {
2180 return -1;
2181 }
2182 instr->emul = group11[instr->regmodrm.reg].emul;
2183 }
2184
2185 if (!opcode->immediate) {
2186 reg = get_register_reg(instr, opcode);
2187 if (reg == NULL) {
2188 return -1;
2189 }
2190 strg->type = STORE_REG;
2191 strg->u.reg = reg;
2192 }
2193
2194 /* The displacement applies to RM. */
2195 strm->disp.type = get_disp_type(instr);
2196
2197 if (has_sib(instr)) {
2198 /* Overwrites RM */
2199 fsm_advance(fsm, 1, node_sib);
2200 return 0;
2201 }
2202
2203 if (is_rip_relative(fsm, instr)) {
2204 /* Overwrites RM */
2205 strm->type = STORE_REG;
2206 strm->u.reg = &gpr_map__rip;
2207 strm->disp.type = DISP_4;
2208 fsm_advance(fsm, 1, node_disp);
2209 return 0;
2210 }
2211
2212 if (is_disp32_only(fsm, instr)) {
2213 /* Overwrites RM */
2214 strm->type = STORE_REG;
2215 strm->u.reg = NULL;
2216 strm->disp.type = DISP_4;
2217 fsm_advance(fsm, 1, node_disp);
2218 return 0;
2219 }
2220
2221 reg = get_register_rm(instr, opcode);
2222 if (reg == NULL) {
2223 return -1;
2224 }
2225 strm->type = STORE_REG;
2226 strm->u.reg = reg;
2227
2228 if (strm->disp.type == DISP_NONE) {
2229 /* Direct register addressing mode */
2230 if (opcode->immediate) {
2231 fsm_advance(fsm, 1, node_immediate);
2232 } else {
2233 fsm_advance(fsm, 1, NULL);
2234 }
2235 } else if (strm->disp.type == DISP_0) {
2236 /* Indirect register addressing mode */
2237 if (opcode->immediate) {
2238 fsm_advance(fsm, 1, node_immediate);
2239 } else {
2240 fsm_advance(fsm, 1, NULL);
2241 }
2242 } else {
2243 fsm_advance(fsm, 1, node_disp);
2244 }
2245
2246 return 0;
2247 }
2248
2249 static size_t
2250 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2251 {
2252 const struct x86_opcode *opcode = instr->opcode;
2253 int opsize;
2254
2255 /* Get the opsize */
2256 if (!opcode->szoverride) {
2257 opsize = opcode->defsize;
2258 } else if (instr->rexpref.present && instr->rexpref.w) {
2259 opsize = 8;
2260 } else {
2261 if (!fsm->is16bit) {
2262 if (instr->legpref.opr_ovr) {
2263 opsize = 2;
2264 } else {
2265 opsize = 4;
2266 }
2267 } else { /* 16bit */
2268 if (instr->legpref.opr_ovr) {
2269 opsize = 4;
2270 } else {
2271 opsize = 2;
2272 }
2273 }
2274 }
2275
2276 return opsize;
2277 }
2278
2279 static size_t
2280 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2281 {
2282 if (fsm->is64bit) {
2283 if (__predict_false(instr->legpref.adr_ovr)) {
2284 return 4;
2285 }
2286 return 8;
2287 }
2288
2289 if (fsm->is32bit) {
2290 if (__predict_false(instr->legpref.adr_ovr)) {
2291 return 2;
2292 }
2293 return 4;
2294 }
2295
2296 /* 16bit. */
2297 if (__predict_false(instr->legpref.adr_ovr)) {
2298 return 4;
2299 }
2300 return 2;
2301 }
2302
2303 static int
2304 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2305 {
2306 const struct x86_opcode *opcode;
2307 uint8_t byte;
2308
2309 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2310 return -1;
2311 }
2312
2313 opcode = &primary_opcode_table[byte];
2314 if (__predict_false(!opcode->valid)) {
2315 return -1;
2316 }
2317
2318 instr->opcode = opcode;
2319 instr->emul = opcode->emul;
2320 instr->operand_size = get_operand_size(fsm, instr);
2321 instr->address_size = get_address_size(fsm, instr);
2322
2323 if (fsm->is64bit && (instr->operand_size == 4)) {
2324 /* Zero-extend to 64 bits. */
2325 instr->zeroextend_mask = ~size_to_mask(4);
2326 }
2327
2328 if (opcode->regmodrm) {
2329 fsm_advance(fsm, 1, node_regmodrm);
2330 } else if (opcode->dmo) {
2331 /* Direct-Memory Offsets */
2332 fsm_advance(fsm, 1, node_dmo);
2333 } else if (opcode->stos || opcode->lods) {
2334 fsm_advance(fsm, 1, node_stlo);
2335 } else if (opcode->movs) {
2336 fsm_advance(fsm, 1, node_movs);
2337 } else {
2338 return -1;
2339 }
2340
2341 return 0;
2342 }
2343
2344 static int
2345 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2346 {
2347 const struct x86_opcode *opcode;
2348 uint8_t byte;
2349
2350 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2351 return -1;
2352 }
2353
2354 opcode = &secondary_opcode_table[byte];
2355 if (__predict_false(!opcode->valid)) {
2356 return -1;
2357 }
2358
2359 instr->opcode = opcode;
2360 instr->emul = opcode->emul;
2361 instr->operand_size = get_operand_size(fsm, instr);
2362 instr->address_size = get_address_size(fsm, instr);
2363
2364 if (fsm->is64bit && (instr->operand_size == 4)) {
2365 /* Zero-extend to 64 bits. */
2366 instr->zeroextend_mask = ~size_to_mask(4);
2367 }
2368
2369 if (opcode->flags & FLAG_ze) {
2370 /*
2371 * Compute the mask for zero-extend. Update the operand size,
2372 * we move fewer bytes.
2373 */
2374 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2375 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2376 instr->operand_size = opcode->defsize;
2377 }
2378
2379 if (opcode->regmodrm) {
2380 fsm_advance(fsm, 1, node_regmodrm);
2381 } else {
2382 return -1;
2383 }
2384
2385 return 0;
2386 }
2387
2388 static int
2389 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2390 {
2391 uint8_t byte;
2392
2393 #define ESCAPE 0x0F
2394 #define VEX_1 0xC5
2395 #define VEX_2 0xC4
2396 #define XOP 0x8F
2397
2398 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2399 return -1;
2400 }
2401
2402 /*
2403 * We don't take XOP. It is AMD-specific, and it was removed shortly
2404 * after being introduced.
2405 */
2406 if (byte == ESCAPE) {
2407 fsm_advance(fsm, 1, node_secondary_opcode);
2408 } else if (!instr->rexpref.present) {
2409 if (byte == VEX_1) {
2410 return -1;
2411 } else if (byte == VEX_2) {
2412 return -1;
2413 } else {
2414 fsm->fn = node_primary_opcode;
2415 }
2416 } else {
2417 fsm->fn = node_primary_opcode;
2418 }
2419
2420 return 0;
2421 }
2422
2423 static int
2424 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2425 {
2426 struct x86_rexpref *rexpref = &instr->rexpref;
2427 uint8_t byte;
2428 size_t n = 0;
2429
2430 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2431 return -1;
2432 }
2433
2434 if (byte >= 0x40 && byte <= 0x4F) {
2435 if (__predict_false(!fsm->is64bit)) {
2436 return -1;
2437 }
2438 rexpref->b = ((byte & 0x1) != 0);
2439 rexpref->x = ((byte & 0x2) != 0);
2440 rexpref->r = ((byte & 0x4) != 0);
2441 rexpref->w = ((byte & 0x8) != 0);
2442 rexpref->present = true;
2443 n = 1;
2444 }
2445
2446 fsm_advance(fsm, n, node_main);
2447 return 0;
2448 }
2449
2450 static int
2451 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2452 {
2453 uint8_t byte;
2454
2455 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2456 return -1;
2457 }
2458
2459 if (byte == LEG_OPR_OVR) {
2460 instr->legpref.opr_ovr = 1;
2461 } else if (byte == LEG_OVR_DS) {
2462 instr->legpref.seg = NVMM_X64_SEG_DS;
2463 } else if (byte == LEG_OVR_ES) {
2464 instr->legpref.seg = NVMM_X64_SEG_ES;
2465 } else if (byte == LEG_REP) {
2466 instr->legpref.rep = 1;
2467 } else if (byte == LEG_OVR_GS) {
2468 instr->legpref.seg = NVMM_X64_SEG_GS;
2469 } else if (byte == LEG_OVR_FS) {
2470 instr->legpref.seg = NVMM_X64_SEG_FS;
2471 } else if (byte == LEG_ADR_OVR) {
2472 instr->legpref.adr_ovr = 1;
2473 } else if (byte == LEG_OVR_CS) {
2474 instr->legpref.seg = NVMM_X64_SEG_CS;
2475 } else if (byte == LEG_OVR_SS) {
2476 instr->legpref.seg = NVMM_X64_SEG_SS;
2477 } else if (byte == LEG_REPN) {
2478 instr->legpref.repn = 1;
2479 } else if (byte == LEG_LOCK) {
2480 /* ignore */
2481 } else {
2482 /* not a legacy prefix */
2483 fsm_advance(fsm, 0, node_rex_prefix);
2484 return 0;
2485 }
2486
2487 fsm_advance(fsm, 1, node_legacy_prefix);
2488 return 0;
2489 }
2490
2491 static int
2492 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2493 struct nvmm_x64_state *state)
2494 {
2495 struct x86_decode_fsm fsm;
2496 int ret;
2497
2498 memset(instr, 0, sizeof(*instr));
2499 instr->legpref.seg = -1;
2500 instr->src.hardseg = -1;
2501 instr->dst.hardseg = -1;
2502
2503 fsm.is64bit = is_64bit(state);
2504 fsm.is32bit = is_32bit(state);
2505 fsm.is16bit = is_16bit(state);
2506
2507 fsm.fn = node_legacy_prefix;
2508 fsm.buf = inst_bytes;
2509 fsm.end = inst_bytes + inst_len;
2510
2511 while (fsm.fn != NULL) {
2512 ret = (*fsm.fn)(&fsm, instr);
2513 if (ret == -1)
2514 return -1;
2515 }
2516
2517 instr->len = fsm.buf - inst_bytes;
2518
2519 return 0;
2520 }
2521
2522 /* -------------------------------------------------------------------------- */
2523
2524 #define EXEC_INSTR(sz, instr) \
2525 static uint##sz##_t \
2526 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
2527 { \
2528 uint##sz##_t res; \
2529 __asm __volatile ( \
2530 #instr " %2, %3;" \
2531 "mov %3, %1;" \
2532 "pushfq;" \
2533 "popq %0" \
2534 : "=r" (*rflags), "=r" (res) \
2535 : "r" (op1), "r" (op2)); \
2536 return res; \
2537 }
2538
2539 #define EXEC_DISPATCHER(instr) \
2540 static uint64_t \
2541 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2542 { \
2543 switch (opsize) { \
2544 case 1: \
2545 return exec_##instr##8(op1, op2, rflags); \
2546 case 2: \
2547 return exec_##instr##16(op1, op2, rflags); \
2548 case 4: \
2549 return exec_##instr##32(op1, op2, rflags); \
2550 default: \
2551 return exec_##instr##64(op1, op2, rflags); \
2552 } \
2553 }
2554
2555 /* SUB: ret = op1 - op2 */
2556 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2557 EXEC_INSTR(8, sub)
2558 EXEC_INSTR(16, sub)
2559 EXEC_INSTR(32, sub)
2560 EXEC_INSTR(64, sub)
2561 EXEC_DISPATCHER(sub)
2562
2563 /* OR: ret = op1 | op2 */
2564 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2565 EXEC_INSTR(8, or)
2566 EXEC_INSTR(16, or)
2567 EXEC_INSTR(32, or)
2568 EXEC_INSTR(64, or)
2569 EXEC_DISPATCHER(or)
2570
2571 /* AND: ret = op1 & op2 */
2572 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2573 EXEC_INSTR(8, and)
2574 EXEC_INSTR(16, and)
2575 EXEC_INSTR(32, and)
2576 EXEC_INSTR(64, and)
2577 EXEC_DISPATCHER(and)
2578
2579 /* XOR: ret = op1 ^ op2 */
2580 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2581 EXEC_INSTR(8, xor)
2582 EXEC_INSTR(16, xor)
2583 EXEC_INSTR(32, xor)
2584 EXEC_INSTR(64, xor)
2585 EXEC_DISPATCHER(xor)
2586
2587 /* -------------------------------------------------------------------------- */
2588
2589 /*
2590 * Emulation functions. We don't care about the order of the operands, except
2591 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
2592 * is op1 and who is op2.
2593 */
2594
2595 static void
2596 x86_func_or(struct nvmm_mem *mem, uint64_t *gprs)
2597 {
2598 uint64_t *retval = (uint64_t *)mem->data;
2599 const bool write = mem->write;
2600 uint64_t *op1, op2, fl, ret;
2601
2602 op1 = (uint64_t *)mem->data;
2603 op2 = 0;
2604
2605 /* Fetch the value to be OR'ed (op2). */
2606 mem->data = (uint8_t *)&op2;
2607 mem->write = false;
2608 (*__callbacks.mem)(mem);
2609
2610 /* Perform the OR. */
2611 ret = exec_or(*op1, op2, &fl, mem->size);
2612
2613 if (write) {
2614 /* Write back the result. */
2615 mem->data = (uint8_t *)&ret;
2616 mem->write = true;
2617 (*__callbacks.mem)(mem);
2618 } else {
2619 /* Return data to the caller. */
2620 *retval = ret;
2621 }
2622
2623 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2624 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2625 }
2626
2627 static void
2628 x86_func_and(struct nvmm_mem *mem, uint64_t *gprs)
2629 {
2630 uint64_t *retval = (uint64_t *)mem->data;
2631 const bool write = mem->write;
2632 uint64_t *op1, op2, fl, ret;
2633
2634 op1 = (uint64_t *)mem->data;
2635 op2 = 0;
2636
2637 /* Fetch the value to be AND'ed (op2). */
2638 mem->data = (uint8_t *)&op2;
2639 mem->write = false;
2640 (*__callbacks.mem)(mem);
2641
2642 /* Perform the AND. */
2643 ret = exec_and(*op1, op2, &fl, mem->size);
2644
2645 if (write) {
2646 /* Write back the result. */
2647 mem->data = (uint8_t *)&ret;
2648 mem->write = true;
2649 (*__callbacks.mem)(mem);
2650 } else {
2651 /* Return data to the caller. */
2652 *retval = ret;
2653 }
2654
2655 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2656 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2657 }
2658
2659 static void
2660 x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs)
2661 {
2662 uint64_t *retval = (uint64_t *)mem->data;
2663 const bool write = mem->write;
2664 uint64_t *op1, *op2, fl, ret;
2665 uint64_t tmp;
2666 bool memop1;
2667
2668 memop1 = !mem->write;
2669 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2670 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2671
2672 /* Fetch the value to be SUB'ed (op1 or op2). */
2673 mem->data = (uint8_t *)&tmp;
2674 mem->write = false;
2675 (*__callbacks.mem)(mem);
2676
2677 /* Perform the SUB. */
2678 ret = exec_sub(*op1, *op2, &fl, mem->size);
2679
2680 if (write) {
2681 /* Write back the result. */
2682 mem->data = (uint8_t *)&ret;
2683 mem->write = true;
2684 (*__callbacks.mem)(mem);
2685 } else {
2686 /* Return data to the caller. */
2687 *retval = ret;
2688 }
2689
2690 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2691 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2692 }
2693
2694 static void
2695 x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs)
2696 {
2697 uint64_t *retval = (uint64_t *)mem->data;
2698 const bool write = mem->write;
2699 uint64_t *op1, op2, fl, ret;
2700
2701 op1 = (uint64_t *)mem->data;
2702 op2 = 0;
2703
2704 /* Fetch the value to be XOR'ed (op2). */
2705 mem->data = (uint8_t *)&op2;
2706 mem->write = false;
2707 (*__callbacks.mem)(mem);
2708
2709 /* Perform the XOR. */
2710 ret = exec_xor(*op1, op2, &fl, mem->size);
2711
2712 if (write) {
2713 /* Write back the result. */
2714 mem->data = (uint8_t *)&ret;
2715 mem->write = true;
2716 (*__callbacks.mem)(mem);
2717 } else {
2718 /* Return data to the caller. */
2719 *retval = ret;
2720 }
2721
2722 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2723 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2724 }
2725
2726 static void
2727 x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs)
2728 {
2729 uint64_t *op1, *op2, fl;
2730 uint64_t tmp;
2731 bool memop1;
2732
2733 memop1 = !mem->write;
2734 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2735 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2736
2737 /* Fetch the value to be CMP'ed (op1 or op2). */
2738 mem->data = (uint8_t *)&tmp;
2739 mem->write = false;
2740 (*__callbacks.mem)(mem);
2741
2742 /* Perform the CMP. */
2743 exec_sub(*op1, *op2, &fl, mem->size);
2744
2745 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2746 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2747 }
2748
2749 static void
2750 x86_func_test(struct nvmm_mem *mem, uint64_t *gprs)
2751 {
2752 uint64_t *op1, *op2, fl;
2753 uint64_t tmp;
2754 bool memop1;
2755
2756 memop1 = !mem->write;
2757 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2758 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2759
2760 /* Fetch the value to be TEST'ed (op1 or op2). */
2761 mem->data = (uint8_t *)&tmp;
2762 mem->write = false;
2763 (*__callbacks.mem)(mem);
2764
2765 /* Perform the TEST. */
2766 exec_and(*op1, *op2, &fl, mem->size);
2767
2768 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2769 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2770 }
2771
2772 static void
2773 x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs)
2774 {
2775 /*
2776 * Nothing special, just move without emulation.
2777 */
2778 (*__callbacks.mem)(mem);
2779 }
2780
2781 static void
2782 x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs)
2783 {
2784 /*
2785 * Just move, and update RDI.
2786 */
2787 (*__callbacks.mem)(mem);
2788
2789 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2790 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2791 } else {
2792 gprs[NVMM_X64_GPR_RDI] += mem->size;
2793 }
2794 }
2795
2796 static void
2797 x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs)
2798 {
2799 /*
2800 * Just move, and update RSI.
2801 */
2802 (*__callbacks.mem)(mem);
2803
2804 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2805 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2806 } else {
2807 gprs[NVMM_X64_GPR_RSI] += mem->size;
2808 }
2809 }
2810
2811 static void
2812 x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs)
2813 {
2814 /*
2815 * Special instruction: double memory operand. Don't call the cb,
2816 * because the storage has already been performed earlier.
2817 */
2818
2819 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2820 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2821 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2822 } else {
2823 gprs[NVMM_X64_GPR_RSI] += mem->size;
2824 gprs[NVMM_X64_GPR_RDI] += mem->size;
2825 }
2826 }
2827
2828 /* -------------------------------------------------------------------------- */
2829
2830 static inline uint64_t
2831 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2832 {
2833 uint64_t val;
2834
2835 val = state->gprs[gpr];
2836 val &= size_to_mask(instr->address_size);
2837
2838 return val;
2839 }
2840
2841 static int
2842 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2843 struct x86_store *store, gvaddr_t *gvap, size_t size)
2844 {
2845 struct x86_sib *sib;
2846 gvaddr_t gva = 0;
2847 uint64_t reg;
2848 int ret, seg;
2849
2850 if (store->type == STORE_SIB) {
2851 sib = &store->u.sib;
2852 if (sib->bas != NULL)
2853 gva += gpr_read_address(instr, state, sib->bas->num);
2854 if (sib->idx != NULL) {
2855 reg = gpr_read_address(instr, state, sib->idx->num);
2856 gva += sib->scale * reg;
2857 }
2858 } else if (store->type == STORE_REG) {
2859 if (store->u.reg == NULL) {
2860 /* The base is null. Happens with disp32-only. */
2861 } else {
2862 gva = gpr_read_address(instr, state, store->u.reg->num);
2863 }
2864 } else {
2865 gva = store->u.dmo;
2866 }
2867
2868 if (store->disp.type != DISP_NONE) {
2869 gva += store->disp.data;
2870 }
2871
2872 if (store->hardseg != -1) {
2873 seg = store->hardseg;
2874 } else {
2875 if (__predict_false(instr->legpref.seg != -1)) {
2876 seg = instr->legpref.seg;
2877 } else {
2878 seg = NVMM_X64_SEG_DS;
2879 }
2880 }
2881
2882 if (__predict_true(is_long_mode(state))) {
2883 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2884 segment_apply(&state->segs[seg], &gva);
2885 }
2886 } else {
2887 ret = segment_check(&state->segs[seg], gva, size);
2888 if (ret == -1)
2889 return -1;
2890 segment_apply(&state->segs[seg], &gva);
2891 }
2892
2893 *gvap = gva;
2894 return 0;
2895 }
2896
2897 static int
2898 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2899 {
2900 uint8_t inst_bytes[5], byte;
2901 size_t i, fetchsize;
2902 gvaddr_t gva;
2903 int ret, seg;
2904
2905 fetchsize = sizeof(inst_bytes);
2906
2907 gva = state->gprs[NVMM_X64_GPR_RIP];
2908 if (__predict_false(!is_long_mode(state))) {
2909 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2910 fetchsize);
2911 if (ret == -1)
2912 return -1;
2913 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2914 }
2915
2916 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2917 if (ret == -1)
2918 return -1;
2919
2920 seg = NVMM_X64_SEG_DS;
2921 for (i = 0; i < fetchsize; i++) {
2922 byte = inst_bytes[i];
2923
2924 if (byte == LEG_OVR_DS) {
2925 seg = NVMM_X64_SEG_DS;
2926 } else if (byte == LEG_OVR_ES) {
2927 seg = NVMM_X64_SEG_ES;
2928 } else if (byte == LEG_OVR_GS) {
2929 seg = NVMM_X64_SEG_GS;
2930 } else if (byte == LEG_OVR_FS) {
2931 seg = NVMM_X64_SEG_FS;
2932 } else if (byte == LEG_OVR_CS) {
2933 seg = NVMM_X64_SEG_CS;
2934 } else if (byte == LEG_OVR_SS) {
2935 seg = NVMM_X64_SEG_SS;
2936 } else if (byte == LEG_OPR_OVR) {
2937 /* nothing */
2938 } else if (byte == LEG_ADR_OVR) {
2939 /* nothing */
2940 } else if (byte == LEG_REP) {
2941 /* nothing */
2942 } else if (byte == LEG_REPN) {
2943 /* nothing */
2944 } else if (byte == LEG_LOCK) {
2945 /* nothing */
2946 } else {
2947 return seg;
2948 }
2949 }
2950
2951 return seg;
2952 }
2953
2954 static int
2955 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2956 struct nvmm_exit *exit)
2957 {
2958 size_t fetchsize;
2959 gvaddr_t gva;
2960 int ret;
2961
2962 fetchsize = sizeof(exit->u.mem.inst_bytes);
2963
2964 gva = state->gprs[NVMM_X64_GPR_RIP];
2965 if (__predict_false(!is_long_mode(state))) {
2966 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2967 fetchsize);
2968 if (ret == -1)
2969 return -1;
2970 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2971 }
2972
2973 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
2974 fetchsize);
2975 if (ret == -1)
2976 return -1;
2977
2978 exit->u.mem.inst_len = fetchsize;
2979
2980 return 0;
2981 }
2982
2983 static int
2984 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2985 struct x86_instr *instr)
2986 {
2987 struct nvmm_mem mem;
2988 uint8_t data[8];
2989 gvaddr_t gva;
2990 size_t size;
2991 int ret;
2992
2993 size = instr->operand_size;
2994
2995 /* Source. */
2996 ret = store_to_gva(state, instr, &instr->src, &gva, size);
2997 if (ret == -1)
2998 return -1;
2999 ret = read_guest_memory(mach, state, gva, data, size);
3000 if (ret == -1)
3001 return -1;
3002
3003 /* Destination. */
3004 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3005 if (ret == -1)
3006 return -1;
3007 ret = write_guest_memory(mach, state, gva, data, size);
3008 if (ret == -1)
3009 return -1;
3010
3011 mem.size = size;
3012 (*instr->emul->func)(&mem, state->gprs);
3013
3014 return 0;
3015 }
3016
3017 #define DISASSEMBLER_BUG() \
3018 do { \
3019 errno = EINVAL; \
3020 return -1; \
3021 } while (0);
3022
3023 static int
3024 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3025 struct x86_instr *instr, struct nvmm_exit *exit)
3026 {
3027 struct nvmm_mem mem;
3028 uint8_t membuf[8];
3029 uint64_t val;
3030
3031 memset(membuf, 0, sizeof(membuf));
3032
3033 mem.gpa = exit->u.mem.gpa;
3034 mem.size = instr->operand_size;
3035 mem.data = membuf;
3036
3037 /* Determine the direction. */
3038 switch (instr->src.type) {
3039 case STORE_REG:
3040 if (instr->src.disp.type != DISP_NONE) {
3041 /* Indirect access. */
3042 mem.write = false;
3043 } else {
3044 /* Direct access. */
3045 mem.write = true;
3046 }
3047 break;
3048 case STORE_IMM:
3049 mem.write = true;
3050 break;
3051 case STORE_SIB:
3052 mem.write = false;
3053 break;
3054 case STORE_DMO:
3055 mem.write = false;
3056 break;
3057 default:
3058 DISASSEMBLER_BUG();
3059 }
3060
3061 if (mem.write) {
3062 switch (instr->src.type) {
3063 case STORE_REG:
3064 if (instr->src.disp.type != DISP_NONE) {
3065 DISASSEMBLER_BUG();
3066 }
3067 val = state->gprs[instr->src.u.reg->num];
3068 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3069 memcpy(mem.data, &val, mem.size);
3070 break;
3071 case STORE_IMM:
3072 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3073 break;
3074 default:
3075 DISASSEMBLER_BUG();
3076 }
3077 } else if (instr->emul->read) {
3078 if (instr->dst.type != STORE_REG) {
3079 DISASSEMBLER_BUG();
3080 }
3081 if (instr->dst.disp.type != DISP_NONE) {
3082 DISASSEMBLER_BUG();
3083 }
3084 val = state->gprs[instr->dst.u.reg->num];
3085 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3086 memcpy(mem.data, &val, mem.size);
3087 }
3088
3089 (*instr->emul->func)(&mem, state->gprs);
3090
3091 if (!instr->emul->notouch && !mem.write) {
3092 if (instr->dst.type != STORE_REG) {
3093 DISASSEMBLER_BUG();
3094 }
3095 memcpy(&val, membuf, sizeof(uint64_t));
3096 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3097 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3098 state->gprs[instr->dst.u.reg->num] |= val;
3099 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3100 }
3101
3102 return 0;
3103 }
3104
3105 int
3106 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
3107 struct nvmm_exit *exit)
3108 {
3109 struct nvmm_x64_state state;
3110 struct x86_instr instr;
3111 uint64_t cnt = 0; /* GCC */
3112 int ret;
3113
3114 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3115 errno = EINVAL;
3116 return -1;
3117 }
3118
3119 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
3120 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3121 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3122 if (ret == -1)
3123 return -1;
3124
3125 if (exit->u.mem.inst_len == 0) {
3126 /*
3127 * The instruction was not fetched from the kernel. Fetch
3128 * it ourselves.
3129 */
3130 ret = fetch_instruction(mach, &state, exit);
3131 if (ret == -1)
3132 return -1;
3133 }
3134
3135 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3136 &instr, &state);
3137 if (ret == -1) {
3138 errno = ENODEV;
3139 return -1;
3140 }
3141
3142 if (instr.legpref.rep || instr.legpref.repn) {
3143 cnt = rep_get_cnt(&state, instr.address_size);
3144 if (__predict_false(cnt == 0)) {
3145 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3146 goto out;
3147 }
3148 }
3149
3150 if (instr.opcode->movs) {
3151 ret = assist_mem_double(mach, &state, &instr);
3152 } else {
3153 ret = assist_mem_single(mach, &state, &instr, exit);
3154 }
3155 if (ret == -1) {
3156 errno = ENODEV;
3157 return -1;
3158 }
3159
3160 if (instr.legpref.rep || instr.legpref.repn) {
3161 cnt -= 1;
3162 rep_set_cnt(&state, instr.address_size, cnt);
3163 if (cnt == 0) {
3164 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3165 } else if (__predict_false(instr.legpref.repn)) {
3166 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3167 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3168 }
3169 }
3170 } else {
3171 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3172 }
3173
3174 out:
3175 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
3176 if (ret == -1)
3177 return -1;
3178
3179 return 0;
3180 }
3181