libnvmm_x86.c revision 1.31 1 /* $NetBSD: libnvmm_x86.c,v 1.31 2019/06/08 07:27:44 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
47 #define __cacheline_aligned __attribute__((__aligned__(64)))
48
49 #include <x86/specialreg.h>
50
51 /* -------------------------------------------------------------------------- */
52
53 /*
54 * Undocumented debugging function. Helpful.
55 */
56 int
57 nvmm_vcpu_dump(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
58 {
59 struct nvmm_x64_state *state = vcpu->state;
60 uint16_t *attr;
61 size_t i;
62 int ret;
63
64 const char *segnames[] = {
65 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
66 };
67
68 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_ALL);
69 if (ret == -1)
70 return -1;
71
72 printf("+ VCPU id=%d\n", (int)vcpu->cpuid);
73 printf("| -> RIP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RIP]);
74 printf("| -> RSP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSP]);
75 printf("| -> RAX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RAX]);
76 printf("| -> RBX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBX]);
77 printf("| -> RCX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RCX]);
78 printf("| -> RFLAGS=%p\n", (void *)state->gprs[NVMM_X64_GPR_RFLAGS]);
79 for (i = 0; i < NVMM_X64_NSEG; i++) {
80 attr = (uint16_t *)&state->segs[i].attrib;
81 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n",
82 segnames[i],
83 state->segs[i].selector,
84 state->segs[i].base,
85 state->segs[i].limit,
86 *attr);
87 }
88 printf("| -> MSR_EFER=%"PRIx64"\n", state->msrs[NVMM_X64_MSR_EFER]);
89 printf("| -> CR0=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR0]);
90 printf("| -> CR3=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR3]);
91 printf("| -> CR4=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR4]);
92 printf("| -> CR8=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR8]);
93
94 return 0;
95 }
96
97 /* -------------------------------------------------------------------------- */
98
99 #define PTE32_L1_SHIFT 12
100 #define PTE32_L2_SHIFT 22
101
102 #define PTE32_L2_MASK 0xffc00000
103 #define PTE32_L1_MASK 0x003ff000
104
105 #define PTE32_L2_FRAME (PTE32_L2_MASK)
106 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
107
108 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
109 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
110
111 #define CR3_FRAME_32BIT PG_FRAME
112
113 typedef uint32_t pte_32bit_t;
114
115 static int
116 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
117 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
118 {
119 gpaddr_t L2gpa, L1gpa;
120 uintptr_t L2hva, L1hva;
121 pte_32bit_t *pdir, pte;
122 nvmm_prot_t pageprot;
123
124 /* We begin with an RWXU access. */
125 *prot = NVMM_PROT_ALL;
126
127 /* Parse L2. */
128 L2gpa = (cr3 & CR3_FRAME_32BIT);
129 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
130 return -1;
131 pdir = (pte_32bit_t *)L2hva;
132 pte = pdir[pte32_l2idx(gva)];
133 if ((pte & PG_V) == 0)
134 return -1;
135 if ((pte & PG_u) == 0)
136 *prot &= ~NVMM_PROT_USER;
137 if ((pte & PG_KW) == 0)
138 *prot &= ~NVMM_PROT_WRITE;
139 if ((pte & PG_PS) && !has_pse)
140 return -1;
141 if (pte & PG_PS) {
142 *gpa = (pte & PTE32_L2_FRAME);
143 *gpa = *gpa + (gva & PTE32_L1_MASK);
144 return 0;
145 }
146
147 /* Parse L1. */
148 L1gpa = (pte & PG_FRAME);
149 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
150 return -1;
151 pdir = (pte_32bit_t *)L1hva;
152 pte = pdir[pte32_l1idx(gva)];
153 if ((pte & PG_V) == 0)
154 return -1;
155 if ((pte & PG_u) == 0)
156 *prot &= ~NVMM_PROT_USER;
157 if ((pte & PG_KW) == 0)
158 *prot &= ~NVMM_PROT_WRITE;
159 if (pte & PG_PS)
160 return -1;
161
162 *gpa = (pte & PG_FRAME);
163 return 0;
164 }
165
166 /* -------------------------------------------------------------------------- */
167
168 #define PTE32_PAE_L1_SHIFT 12
169 #define PTE32_PAE_L2_SHIFT 21
170 #define PTE32_PAE_L3_SHIFT 30
171
172 #define PTE32_PAE_L3_MASK 0xc0000000
173 #define PTE32_PAE_L2_MASK 0x3fe00000
174 #define PTE32_PAE_L1_MASK 0x001ff000
175
176 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
177 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
178 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
179
180 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
181 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
182 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
183
184 #define CR3_FRAME_32BIT_PAE __BITS(31, 5)
185
186 typedef uint64_t pte_32bit_pae_t;
187
188 static int
189 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
190 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
191 {
192 gpaddr_t L3gpa, L2gpa, L1gpa;
193 uintptr_t L3hva, L2hva, L1hva;
194 pte_32bit_pae_t *pdir, pte;
195 nvmm_prot_t pageprot;
196
197 /* We begin with an RWXU access. */
198 *prot = NVMM_PROT_ALL;
199
200 /* Parse L3. */
201 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
202 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
203 return -1;
204 pdir = (pte_32bit_pae_t *)L3hva;
205 pte = pdir[pte32_pae_l3idx(gva)];
206 if ((pte & PG_V) == 0)
207 return -1;
208 if (pte & PG_NX)
209 *prot &= ~NVMM_PROT_EXEC;
210 if (pte & PG_PS)
211 return -1;
212
213 /* Parse L2. */
214 L2gpa = (pte & PG_FRAME);
215 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
216 return -1;
217 pdir = (pte_32bit_pae_t *)L2hva;
218 pte = pdir[pte32_pae_l2idx(gva)];
219 if ((pte & PG_V) == 0)
220 return -1;
221 if ((pte & PG_u) == 0)
222 *prot &= ~NVMM_PROT_USER;
223 if ((pte & PG_KW) == 0)
224 *prot &= ~NVMM_PROT_WRITE;
225 if (pte & PG_NX)
226 *prot &= ~NVMM_PROT_EXEC;
227 if (pte & PG_PS) {
228 *gpa = (pte & PTE32_PAE_L2_FRAME);
229 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
230 return 0;
231 }
232
233 /* Parse L1. */
234 L1gpa = (pte & PG_FRAME);
235 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
236 return -1;
237 pdir = (pte_32bit_pae_t *)L1hva;
238 pte = pdir[pte32_pae_l1idx(gva)];
239 if ((pte & PG_V) == 0)
240 return -1;
241 if ((pte & PG_u) == 0)
242 *prot &= ~NVMM_PROT_USER;
243 if ((pte & PG_KW) == 0)
244 *prot &= ~NVMM_PROT_WRITE;
245 if (pte & PG_NX)
246 *prot &= ~NVMM_PROT_EXEC;
247 if (pte & PG_PS)
248 return -1;
249
250 *gpa = (pte & PG_FRAME);
251 return 0;
252 }
253
254 /* -------------------------------------------------------------------------- */
255
256 #define PTE64_L1_SHIFT 12
257 #define PTE64_L2_SHIFT 21
258 #define PTE64_L3_SHIFT 30
259 #define PTE64_L4_SHIFT 39
260
261 #define PTE64_L4_MASK 0x0000ff8000000000
262 #define PTE64_L3_MASK 0x0000007fc0000000
263 #define PTE64_L2_MASK 0x000000003fe00000
264 #define PTE64_L1_MASK 0x00000000001ff000
265
266 #define PTE64_L4_FRAME PTE64_L4_MASK
267 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
268 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
269 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
270
271 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
272 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
273 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
274 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
275
276 #define CR3_FRAME_64BIT PG_FRAME
277
278 typedef uint64_t pte_64bit_t;
279
280 static inline bool
281 x86_gva_64bit_canonical(gvaddr_t gva)
282 {
283 /* Bits 63:47 must have the same value. */
284 #define SIGN_EXTEND 0xffff800000000000ULL
285 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
286 }
287
288 static int
289 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
290 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
291 {
292 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
293 uintptr_t L4hva, L3hva, L2hva, L1hva;
294 pte_64bit_t *pdir, pte;
295 nvmm_prot_t pageprot;
296
297 /* We begin with an RWXU access. */
298 *prot = NVMM_PROT_ALL;
299
300 if (!x86_gva_64bit_canonical(gva))
301 return -1;
302
303 /* Parse L4. */
304 L4gpa = (cr3 & CR3_FRAME_64BIT);
305 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1)
306 return -1;
307 pdir = (pte_64bit_t *)L4hva;
308 pte = pdir[pte64_l4idx(gva)];
309 if ((pte & PG_V) == 0)
310 return -1;
311 if ((pte & PG_u) == 0)
312 *prot &= ~NVMM_PROT_USER;
313 if ((pte & PG_KW) == 0)
314 *prot &= ~NVMM_PROT_WRITE;
315 if (pte & PG_NX)
316 *prot &= ~NVMM_PROT_EXEC;
317 if (pte & PG_PS)
318 return -1;
319
320 /* Parse L3. */
321 L3gpa = (pte & PG_FRAME);
322 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
323 return -1;
324 pdir = (pte_64bit_t *)L3hva;
325 pte = pdir[pte64_l3idx(gva)];
326 if ((pte & PG_V) == 0)
327 return -1;
328 if ((pte & PG_u) == 0)
329 *prot &= ~NVMM_PROT_USER;
330 if ((pte & PG_KW) == 0)
331 *prot &= ~NVMM_PROT_WRITE;
332 if (pte & PG_NX)
333 *prot &= ~NVMM_PROT_EXEC;
334 if (pte & PG_PS) {
335 *gpa = (pte & PTE64_L3_FRAME);
336 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
337 return 0;
338 }
339
340 /* Parse L2. */
341 L2gpa = (pte & PG_FRAME);
342 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
343 return -1;
344 pdir = (pte_64bit_t *)L2hva;
345 pte = pdir[pte64_l2idx(gva)];
346 if ((pte & PG_V) == 0)
347 return -1;
348 if ((pte & PG_u) == 0)
349 *prot &= ~NVMM_PROT_USER;
350 if ((pte & PG_KW) == 0)
351 *prot &= ~NVMM_PROT_WRITE;
352 if (pte & PG_NX)
353 *prot &= ~NVMM_PROT_EXEC;
354 if (pte & PG_PS) {
355 *gpa = (pte & PTE64_L2_FRAME);
356 *gpa = *gpa + (gva & PTE64_L1_MASK);
357 return 0;
358 }
359
360 /* Parse L1. */
361 L1gpa = (pte & PG_FRAME);
362 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
363 return -1;
364 pdir = (pte_64bit_t *)L1hva;
365 pte = pdir[pte64_l1idx(gva)];
366 if ((pte & PG_V) == 0)
367 return -1;
368 if ((pte & PG_u) == 0)
369 *prot &= ~NVMM_PROT_USER;
370 if ((pte & PG_KW) == 0)
371 *prot &= ~NVMM_PROT_WRITE;
372 if (pte & PG_NX)
373 *prot &= ~NVMM_PROT_EXEC;
374 if (pte & PG_PS)
375 return -1;
376
377 *gpa = (pte & PG_FRAME);
378 return 0;
379 }
380
381 static inline int
382 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
383 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
384 {
385 bool is_pae, is_lng, has_pse;
386 uint64_t cr3;
387 size_t off;
388 int ret;
389
390 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
391 /* No paging. */
392 *prot = NVMM_PROT_ALL;
393 *gpa = gva;
394 return 0;
395 }
396
397 off = (gva & PAGE_MASK);
398 gva &= ~PAGE_MASK;
399
400 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
401 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
402 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
403 cr3 = state->crs[NVMM_X64_CR_CR3];
404
405 if (is_pae && is_lng) {
406 /* 64bit */
407 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
408 } else if (is_pae && !is_lng) {
409 /* 32bit PAE */
410 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
411 } else if (!is_pae && !is_lng) {
412 /* 32bit */
413 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
414 } else {
415 ret = -1;
416 }
417
418 if (ret == -1) {
419 errno = EFAULT;
420 }
421
422 *gpa = *gpa + off;
423
424 return ret;
425 }
426
427 int
428 nvmm_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
429 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
430 {
431 struct nvmm_x64_state *state = vcpu->state;
432 int ret;
433
434 ret = nvmm_vcpu_getstate(mach, vcpu,
435 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
436 if (ret == -1)
437 return -1;
438
439 return x86_gva_to_gpa(mach, state, gva, gpa, prot);
440 }
441
442 /* -------------------------------------------------------------------------- */
443
444 static inline bool
445 is_long_mode(struct nvmm_x64_state *state)
446 {
447 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
448 }
449
450 static inline bool
451 is_64bit(struct nvmm_x64_state *state)
452 {
453 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
454 }
455
456 static inline bool
457 is_32bit(struct nvmm_x64_state *state)
458 {
459 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
460 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
461 }
462
463 static inline bool
464 is_16bit(struct nvmm_x64_state *state)
465 {
466 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
467 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
468 }
469
470 static int
471 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
472 {
473 uint64_t limit;
474
475 /*
476 * This is incomplete. We should check topdown, etc, really that's
477 * tiring.
478 */
479 if (__predict_false(!seg->attrib.p)) {
480 goto error;
481 }
482
483 limit = (uint64_t)seg->limit + 1;
484 if (__predict_true(seg->attrib.g)) {
485 limit *= PAGE_SIZE;
486 }
487
488 if (__predict_false(gva + size > limit)) {
489 goto error;
490 }
491
492 return 0;
493
494 error:
495 errno = EFAULT;
496 return -1;
497 }
498
499 static inline void
500 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
501 {
502 *gva += seg->base;
503 }
504
505 static inline uint64_t
506 size_to_mask(size_t size)
507 {
508 switch (size) {
509 case 1:
510 return 0x00000000000000FF;
511 case 2:
512 return 0x000000000000FFFF;
513 case 4:
514 return 0x00000000FFFFFFFF;
515 case 8:
516 default:
517 return 0xFFFFFFFFFFFFFFFF;
518 }
519 }
520
521 static uint64_t
522 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
523 {
524 uint64_t mask, cnt;
525
526 mask = size_to_mask(adsize);
527 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
528
529 return cnt;
530 }
531
532 static void
533 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
534 {
535 uint64_t mask;
536
537 /* XXX: should we zero-extend? */
538 mask = size_to_mask(adsize);
539 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
540 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
541 }
542
543 static int
544 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
545 gvaddr_t gva, uint8_t *data, size_t size)
546 {
547 struct nvmm_mem mem;
548 nvmm_prot_t prot;
549 gpaddr_t gpa;
550 uintptr_t hva;
551 bool is_mmio;
552 int ret, remain;
553
554 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
555 if (__predict_false(ret == -1)) {
556 return -1;
557 }
558 if (__predict_false(!(prot & NVMM_PROT_READ))) {
559 errno = EFAULT;
560 return -1;
561 }
562
563 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
564 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
565 } else {
566 remain = 0;
567 }
568 size -= remain;
569
570 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
571 is_mmio = (ret == -1);
572
573 if (is_mmio) {
574 mem.data = data;
575 mem.gpa = gpa;
576 mem.write = false;
577 mem.size = size;
578 (*mach->cbs.mem)(&mem);
579 } else {
580 if (__predict_false(!(prot & NVMM_PROT_READ))) {
581 errno = EFAULT;
582 return -1;
583 }
584 memcpy(data, (uint8_t *)hva, size);
585 }
586
587 if (remain > 0) {
588 ret = read_guest_memory(mach, state, gva + size,
589 data + size, remain);
590 } else {
591 ret = 0;
592 }
593
594 return ret;
595 }
596
597 static int
598 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
599 gvaddr_t gva, uint8_t *data, size_t size)
600 {
601 struct nvmm_mem mem;
602 nvmm_prot_t prot;
603 gpaddr_t gpa;
604 uintptr_t hva;
605 bool is_mmio;
606 int ret, remain;
607
608 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
609 if (__predict_false(ret == -1)) {
610 return -1;
611 }
612 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
613 errno = EFAULT;
614 return -1;
615 }
616
617 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
618 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
619 } else {
620 remain = 0;
621 }
622 size -= remain;
623
624 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
625 is_mmio = (ret == -1);
626
627 if (is_mmio) {
628 mem.data = data;
629 mem.gpa = gpa;
630 mem.write = true;
631 mem.size = size;
632 (*mach->cbs.mem)(&mem);
633 } else {
634 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
635 errno = EFAULT;
636 return -1;
637 }
638 memcpy((uint8_t *)hva, data, size);
639 }
640
641 if (remain > 0) {
642 ret = write_guest_memory(mach, state, gva + size,
643 data + size, remain);
644 } else {
645 ret = 0;
646 }
647
648 return ret;
649 }
650
651 /* -------------------------------------------------------------------------- */
652
653 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
654
655 #define NVMM_IO_BATCH_SIZE 32
656
657 static int
658 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
659 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
660 {
661 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
662 size_t i, iosize, iocnt;
663 int ret;
664
665 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
666 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
667 iocnt = iosize / io->size;
668
669 io->data = iobuf;
670
671 if (!io->in) {
672 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
673 if (ret == -1)
674 return -1;
675 }
676
677 for (i = 0; i < iocnt; i++) {
678 (*mach->cbs.io)(io);
679 io->data += io->size;
680 }
681
682 if (io->in) {
683 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
684 if (ret == -1)
685 return -1;
686 }
687
688 return iocnt;
689 }
690
691 int
692 nvmm_assist_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
693 {
694 struct nvmm_x64_state *state = vcpu->state;
695 struct nvmm_exit *exit = vcpu->exit;
696 struct nvmm_io io;
697 uint64_t cnt = 0; /* GCC */
698 uint8_t iobuf[8];
699 int iocnt = 1;
700 gvaddr_t gva = 0; /* GCC */
701 int reg = 0; /* GCC */
702 int ret, seg;
703 bool psld = false;
704
705 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
706 errno = EINVAL;
707 return -1;
708 }
709
710 io.port = exit->u.io.port;
711 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
712 io.size = exit->u.io.operand_size;
713 io.data = iobuf;
714
715 ret = nvmm_vcpu_getstate(mach, vcpu,
716 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
717 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
718 if (ret == -1)
719 return -1;
720
721 if (exit->u.io.rep) {
722 cnt = rep_get_cnt(state, exit->u.io.address_size);
723 if (__predict_false(cnt == 0)) {
724 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
725 goto out;
726 }
727 }
728
729 if (__predict_false(state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
730 psld = true;
731 }
732
733 /*
734 * Determine GVA.
735 */
736 if (exit->u.io.str) {
737 if (io.in) {
738 reg = NVMM_X64_GPR_RDI;
739 } else {
740 reg = NVMM_X64_GPR_RSI;
741 }
742
743 gva = state->gprs[reg];
744 gva &= size_to_mask(exit->u.io.address_size);
745
746 if (exit->u.io.seg != -1) {
747 seg = exit->u.io.seg;
748 } else {
749 if (io.in) {
750 seg = NVMM_X64_SEG_ES;
751 } else {
752 seg = fetch_segment(mach, state);
753 if (seg == -1)
754 return -1;
755 }
756 }
757
758 if (__predict_true(is_long_mode(state))) {
759 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
760 segment_apply(&state->segs[seg], &gva);
761 }
762 } else {
763 ret = segment_check(&state->segs[seg], gva, io.size);
764 if (ret == -1)
765 return -1;
766 segment_apply(&state->segs[seg], &gva);
767 }
768
769 if (exit->u.io.rep && !psld) {
770 iocnt = assist_io_batch(mach, state, &io, gva, cnt);
771 if (iocnt == -1)
772 return -1;
773 goto done;
774 }
775 }
776
777 if (!io.in) {
778 if (!exit->u.io.str) {
779 memcpy(io.data, &state->gprs[NVMM_X64_GPR_RAX], io.size);
780 } else {
781 ret = read_guest_memory(mach, state, gva, io.data,
782 io.size);
783 if (ret == -1)
784 return -1;
785 }
786 }
787
788 (*mach->cbs.io)(&io);
789
790 if (io.in) {
791 if (!exit->u.io.str) {
792 memcpy(&state->gprs[NVMM_X64_GPR_RAX], io.data, io.size);
793 if (io.size == 4) {
794 /* Zero-extend to 64 bits. */
795 state->gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
796 }
797 } else {
798 ret = write_guest_memory(mach, state, gva, io.data,
799 io.size);
800 if (ret == -1)
801 return -1;
802 }
803 }
804
805 done:
806 if (exit->u.io.str) {
807 if (__predict_false(psld)) {
808 state->gprs[reg] -= iocnt * io.size;
809 } else {
810 state->gprs[reg] += iocnt * io.size;
811 }
812 }
813
814 if (exit->u.io.rep) {
815 cnt -= iocnt;
816 rep_set_cnt(state, exit->u.io.address_size, cnt);
817 if (cnt == 0) {
818 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
819 }
820 } else {
821 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
822 }
823
824 out:
825 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
826 if (ret == -1)
827 return -1;
828
829 return 0;
830 }
831
832 /* -------------------------------------------------------------------------- */
833
834 struct x86_emul {
835 bool read;
836 bool notouch;
837 void (*func)(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
838 };
839
840 static void x86_func_or(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
841 static void x86_func_and(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
842 static void x86_func_sub(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
843 static void x86_func_xor(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
844 static void x86_func_cmp(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
845 static void x86_func_test(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
846 static void x86_func_mov(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
847 static void x86_func_stos(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
848 static void x86_func_lods(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
849 static void x86_func_movs(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
850
851 static const struct x86_emul x86_emul_or = {
852 .read = true,
853 .func = x86_func_or
854 };
855
856 static const struct x86_emul x86_emul_and = {
857 .read = true,
858 .func = x86_func_and
859 };
860
861 static const struct x86_emul x86_emul_sub = {
862 .read = true,
863 .func = x86_func_sub
864 };
865
866 static const struct x86_emul x86_emul_xor = {
867 .read = true,
868 .func = x86_func_xor
869 };
870
871 static const struct x86_emul x86_emul_cmp = {
872 .notouch = true,
873 .func = x86_func_cmp
874 };
875
876 static const struct x86_emul x86_emul_test = {
877 .notouch = true,
878 .func = x86_func_test
879 };
880
881 static const struct x86_emul x86_emul_mov = {
882 .func = x86_func_mov
883 };
884
885 static const struct x86_emul x86_emul_stos = {
886 .func = x86_func_stos
887 };
888
889 static const struct x86_emul x86_emul_lods = {
890 .func = x86_func_lods
891 };
892
893 static const struct x86_emul x86_emul_movs = {
894 .func = x86_func_movs
895 };
896
897 /* Legacy prefixes. */
898 #define LEG_LOCK 0xF0
899 #define LEG_REPN 0xF2
900 #define LEG_REP 0xF3
901 #define LEG_OVR_CS 0x2E
902 #define LEG_OVR_SS 0x36
903 #define LEG_OVR_DS 0x3E
904 #define LEG_OVR_ES 0x26
905 #define LEG_OVR_FS 0x64
906 #define LEG_OVR_GS 0x65
907 #define LEG_OPR_OVR 0x66
908 #define LEG_ADR_OVR 0x67
909
910 struct x86_legpref {
911 bool opr_ovr:1;
912 bool adr_ovr:1;
913 bool rep:1;
914 bool repn:1;
915 int8_t seg;
916 };
917
918 struct x86_rexpref {
919 bool b:1;
920 bool x:1;
921 bool r:1;
922 bool w:1;
923 bool present:1;
924 };
925
926 struct x86_reg {
927 int num; /* NVMM GPR state index */
928 uint64_t mask;
929 };
930
931 enum x86_disp_type {
932 DISP_NONE,
933 DISP_0,
934 DISP_1,
935 DISP_4
936 };
937
938 struct x86_disp {
939 enum x86_disp_type type;
940 uint64_t data; /* 4 bytes, but can be sign-extended */
941 };
942
943 enum REGMODRM__Mod {
944 MOD_DIS0, /* also, register indirect */
945 MOD_DIS1,
946 MOD_DIS4,
947 MOD_REG
948 };
949
950 enum REGMODRM__Reg {
951 REG_000, /* these fields are indexes to the register map */
952 REG_001,
953 REG_010,
954 REG_011,
955 REG_100,
956 REG_101,
957 REG_110,
958 REG_111
959 };
960
961 enum REGMODRM__Rm {
962 RM_000, /* reg */
963 RM_001, /* reg */
964 RM_010, /* reg */
965 RM_011, /* reg */
966 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
967 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
968 RM_110,
969 RM_111
970 };
971
972 struct x86_regmodrm {
973 uint8_t mod:2;
974 uint8_t reg:3;
975 uint8_t rm:3;
976 };
977
978 struct x86_immediate {
979 uint64_t data;
980 };
981
982 struct x86_sib {
983 uint8_t scale;
984 const struct x86_reg *idx;
985 const struct x86_reg *bas;
986 };
987
988 enum x86_store_type {
989 STORE_NONE,
990 STORE_REG,
991 STORE_IMM,
992 STORE_SIB,
993 STORE_DMO
994 };
995
996 struct x86_store {
997 enum x86_store_type type;
998 union {
999 const struct x86_reg *reg;
1000 struct x86_immediate imm;
1001 struct x86_sib sib;
1002 uint64_t dmo;
1003 } u;
1004 struct x86_disp disp;
1005 int hardseg;
1006 };
1007
1008 struct x86_instr {
1009 uint8_t len;
1010 struct x86_legpref legpref;
1011 struct x86_rexpref rexpref;
1012 struct x86_regmodrm regmodrm;
1013 uint8_t operand_size;
1014 uint8_t address_size;
1015 uint64_t zeroextend_mask;
1016
1017 const struct x86_opcode *opcode;
1018 const struct x86_emul *emul;
1019
1020 struct x86_store src;
1021 struct x86_store dst;
1022 struct x86_store *strm;
1023 };
1024
1025 struct x86_decode_fsm {
1026 /* vcpu */
1027 bool is64bit;
1028 bool is32bit;
1029 bool is16bit;
1030
1031 /* fsm */
1032 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1033 uint8_t *buf;
1034 uint8_t *end;
1035 };
1036
1037 struct x86_opcode {
1038 bool valid:1;
1039 bool regmodrm:1;
1040 bool regtorm:1;
1041 bool dmo:1;
1042 bool todmo:1;
1043 bool movs:1;
1044 bool stos:1;
1045 bool lods:1;
1046 bool szoverride:1;
1047 bool group1:1;
1048 bool group3:1;
1049 bool group11:1;
1050 bool immediate:1;
1051 uint8_t defsize;
1052 uint8_t flags;
1053 const struct x86_emul *emul;
1054 };
1055
1056 struct x86_group_entry {
1057 const struct x86_emul *emul;
1058 };
1059
1060 #define OPSIZE_BYTE 0x01
1061 #define OPSIZE_WORD 0x02 /* 2 bytes */
1062 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1063 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1064
1065 #define FLAG_imm8 0x01
1066 #define FLAG_immz 0x02
1067 #define FLAG_ze 0x04
1068
1069 static const struct x86_group_entry group1[8] __cacheline_aligned = {
1070 [1] = { .emul = &x86_emul_or },
1071 [4] = { .emul = &x86_emul_and },
1072 [6] = { .emul = &x86_emul_xor },
1073 [7] = { .emul = &x86_emul_cmp }
1074 };
1075
1076 static const struct x86_group_entry group3[8] __cacheline_aligned = {
1077 [0] = { .emul = &x86_emul_test },
1078 [1] = { .emul = &x86_emul_test }
1079 };
1080
1081 static const struct x86_group_entry group11[8] __cacheline_aligned = {
1082 [0] = { .emul = &x86_emul_mov }
1083 };
1084
1085 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
1086 /*
1087 * Group1
1088 */
1089 [0x80] = {
1090 /* Eb, Ib */
1091 .valid = true,
1092 .regmodrm = true,
1093 .regtorm = true,
1094 .szoverride = false,
1095 .defsize = OPSIZE_BYTE,
1096 .group1 = true,
1097 .immediate = true,
1098 .emul = NULL /* group1 */
1099 },
1100 [0x81] = {
1101 /* Ev, Iz */
1102 .valid = true,
1103 .regmodrm = true,
1104 .regtorm = true,
1105 .szoverride = true,
1106 .defsize = -1,
1107 .group1 = true,
1108 .immediate = true,
1109 .flags = FLAG_immz,
1110 .emul = NULL /* group1 */
1111 },
1112 [0x83] = {
1113 /* Ev, Ib */
1114 .valid = true,
1115 .regmodrm = true,
1116 .regtorm = true,
1117 .szoverride = true,
1118 .defsize = -1,
1119 .group1 = true,
1120 .immediate = true,
1121 .flags = FLAG_imm8,
1122 .emul = NULL /* group1 */
1123 },
1124
1125 /*
1126 * Group3
1127 */
1128 [0xF6] = {
1129 /* Eb, Ib */
1130 .valid = true,
1131 .regmodrm = true,
1132 .regtorm = true,
1133 .szoverride = false,
1134 .defsize = OPSIZE_BYTE,
1135 .group3 = true,
1136 .immediate = true,
1137 .emul = NULL /* group3 */
1138 },
1139 [0xF7] = {
1140 /* Ev, Iz */
1141 .valid = true,
1142 .regmodrm = true,
1143 .regtorm = true,
1144 .szoverride = true,
1145 .defsize = -1,
1146 .group3 = true,
1147 .immediate = true,
1148 .flags = FLAG_immz,
1149 .emul = NULL /* group3 */
1150 },
1151
1152 /*
1153 * Group11
1154 */
1155 [0xC6] = {
1156 /* Eb, Ib */
1157 .valid = true,
1158 .regmodrm = true,
1159 .regtorm = true,
1160 .szoverride = false,
1161 .defsize = OPSIZE_BYTE,
1162 .group11 = true,
1163 .immediate = true,
1164 .emul = NULL /* group11 */
1165 },
1166 [0xC7] = {
1167 /* Ev, Iz */
1168 .valid = true,
1169 .regmodrm = true,
1170 .regtorm = true,
1171 .szoverride = true,
1172 .defsize = -1,
1173 .group11 = true,
1174 .immediate = true,
1175 .flags = FLAG_immz,
1176 .emul = NULL /* group11 */
1177 },
1178
1179 /*
1180 * OR
1181 */
1182 [0x08] = {
1183 /* Eb, Gb */
1184 .valid = true,
1185 .regmodrm = true,
1186 .regtorm = true,
1187 .szoverride = false,
1188 .defsize = OPSIZE_BYTE,
1189 .emul = &x86_emul_or
1190 },
1191 [0x09] = {
1192 /* Ev, Gv */
1193 .valid = true,
1194 .regmodrm = true,
1195 .regtorm = true,
1196 .szoverride = true,
1197 .defsize = -1,
1198 .emul = &x86_emul_or
1199 },
1200 [0x0A] = {
1201 /* Gb, Eb */
1202 .valid = true,
1203 .regmodrm = true,
1204 .regtorm = false,
1205 .szoverride = false,
1206 .defsize = OPSIZE_BYTE,
1207 .emul = &x86_emul_or
1208 },
1209 [0x0B] = {
1210 /* Gv, Ev */
1211 .valid = true,
1212 .regmodrm = true,
1213 .regtorm = false,
1214 .szoverride = true,
1215 .defsize = -1,
1216 .emul = &x86_emul_or
1217 },
1218
1219 /*
1220 * AND
1221 */
1222 [0x20] = {
1223 /* Eb, Gb */
1224 .valid = true,
1225 .regmodrm = true,
1226 .regtorm = true,
1227 .szoverride = false,
1228 .defsize = OPSIZE_BYTE,
1229 .emul = &x86_emul_and
1230 },
1231 [0x21] = {
1232 /* Ev, Gv */
1233 .valid = true,
1234 .regmodrm = true,
1235 .regtorm = true,
1236 .szoverride = true,
1237 .defsize = -1,
1238 .emul = &x86_emul_and
1239 },
1240 [0x22] = {
1241 /* Gb, Eb */
1242 .valid = true,
1243 .regmodrm = true,
1244 .regtorm = false,
1245 .szoverride = false,
1246 .defsize = OPSIZE_BYTE,
1247 .emul = &x86_emul_and
1248 },
1249 [0x23] = {
1250 /* Gv, Ev */
1251 .valid = true,
1252 .regmodrm = true,
1253 .regtorm = false,
1254 .szoverride = true,
1255 .defsize = -1,
1256 .emul = &x86_emul_and
1257 },
1258
1259 /*
1260 * SUB
1261 */
1262 [0x28] = {
1263 /* Eb, Gb */
1264 .valid = true,
1265 .regmodrm = true,
1266 .regtorm = true,
1267 .szoverride = false,
1268 .defsize = OPSIZE_BYTE,
1269 .emul = &x86_emul_sub
1270 },
1271 [0x29] = {
1272 /* Ev, Gv */
1273 .valid = true,
1274 .regmodrm = true,
1275 .regtorm = true,
1276 .szoverride = true,
1277 .defsize = -1,
1278 .emul = &x86_emul_sub
1279 },
1280 [0x2A] = {
1281 /* Gb, Eb */
1282 .valid = true,
1283 .regmodrm = true,
1284 .regtorm = false,
1285 .szoverride = false,
1286 .defsize = OPSIZE_BYTE,
1287 .emul = &x86_emul_sub
1288 },
1289 [0x2B] = {
1290 /* Gv, Ev */
1291 .valid = true,
1292 .regmodrm = true,
1293 .regtorm = false,
1294 .szoverride = true,
1295 .defsize = -1,
1296 .emul = &x86_emul_sub
1297 },
1298
1299 /*
1300 * XOR
1301 */
1302 [0x30] = {
1303 /* Eb, Gb */
1304 .valid = true,
1305 .regmodrm = true,
1306 .regtorm = true,
1307 .szoverride = false,
1308 .defsize = OPSIZE_BYTE,
1309 .emul = &x86_emul_xor
1310 },
1311 [0x31] = {
1312 /* Ev, Gv */
1313 .valid = true,
1314 .regmodrm = true,
1315 .regtorm = true,
1316 .szoverride = true,
1317 .defsize = -1,
1318 .emul = &x86_emul_xor
1319 },
1320 [0x32] = {
1321 /* Gb, Eb */
1322 .valid = true,
1323 .regmodrm = true,
1324 .regtorm = false,
1325 .szoverride = false,
1326 .defsize = OPSIZE_BYTE,
1327 .emul = &x86_emul_xor
1328 },
1329 [0x33] = {
1330 /* Gv, Ev */
1331 .valid = true,
1332 .regmodrm = true,
1333 .regtorm = false,
1334 .szoverride = true,
1335 .defsize = -1,
1336 .emul = &x86_emul_xor
1337 },
1338
1339 /*
1340 * MOV
1341 */
1342 [0x88] = {
1343 /* Eb, Gb */
1344 .valid = true,
1345 .regmodrm = true,
1346 .regtorm = true,
1347 .szoverride = false,
1348 .defsize = OPSIZE_BYTE,
1349 .emul = &x86_emul_mov
1350 },
1351 [0x89] = {
1352 /* Ev, Gv */
1353 .valid = true,
1354 .regmodrm = true,
1355 .regtorm = true,
1356 .szoverride = true,
1357 .defsize = -1,
1358 .emul = &x86_emul_mov
1359 },
1360 [0x8A] = {
1361 /* Gb, Eb */
1362 .valid = true,
1363 .regmodrm = true,
1364 .regtorm = false,
1365 .szoverride = false,
1366 .defsize = OPSIZE_BYTE,
1367 .emul = &x86_emul_mov
1368 },
1369 [0x8B] = {
1370 /* Gv, Ev */
1371 .valid = true,
1372 .regmodrm = true,
1373 .regtorm = false,
1374 .szoverride = true,
1375 .defsize = -1,
1376 .emul = &x86_emul_mov
1377 },
1378 [0xA0] = {
1379 /* AL, Ob */
1380 .valid = true,
1381 .dmo = true,
1382 .todmo = false,
1383 .szoverride = false,
1384 .defsize = OPSIZE_BYTE,
1385 .emul = &x86_emul_mov
1386 },
1387 [0xA1] = {
1388 /* rAX, Ov */
1389 .valid = true,
1390 .dmo = true,
1391 .todmo = false,
1392 .szoverride = true,
1393 .defsize = -1,
1394 .emul = &x86_emul_mov
1395 },
1396 [0xA2] = {
1397 /* Ob, AL */
1398 .valid = true,
1399 .dmo = true,
1400 .todmo = true,
1401 .szoverride = false,
1402 .defsize = OPSIZE_BYTE,
1403 .emul = &x86_emul_mov
1404 },
1405 [0xA3] = {
1406 /* Ov, rAX */
1407 .valid = true,
1408 .dmo = true,
1409 .todmo = true,
1410 .szoverride = true,
1411 .defsize = -1,
1412 .emul = &x86_emul_mov
1413 },
1414
1415 /*
1416 * MOVS
1417 */
1418 [0xA4] = {
1419 /* Yb, Xb */
1420 .valid = true,
1421 .movs = true,
1422 .szoverride = false,
1423 .defsize = OPSIZE_BYTE,
1424 .emul = &x86_emul_movs
1425 },
1426 [0xA5] = {
1427 /* Yv, Xv */
1428 .valid = true,
1429 .movs = true,
1430 .szoverride = true,
1431 .defsize = -1,
1432 .emul = &x86_emul_movs
1433 },
1434
1435 /*
1436 * STOS
1437 */
1438 [0xAA] = {
1439 /* Yb, AL */
1440 .valid = true,
1441 .stos = true,
1442 .szoverride = false,
1443 .defsize = OPSIZE_BYTE,
1444 .emul = &x86_emul_stos
1445 },
1446 [0xAB] = {
1447 /* Yv, rAX */
1448 .valid = true,
1449 .stos = true,
1450 .szoverride = true,
1451 .defsize = -1,
1452 .emul = &x86_emul_stos
1453 },
1454
1455 /*
1456 * LODS
1457 */
1458 [0xAC] = {
1459 /* AL, Xb */
1460 .valid = true,
1461 .lods = true,
1462 .szoverride = false,
1463 .defsize = OPSIZE_BYTE,
1464 .emul = &x86_emul_lods
1465 },
1466 [0xAD] = {
1467 /* rAX, Xv */
1468 .valid = true,
1469 .lods = true,
1470 .szoverride = true,
1471 .defsize = -1,
1472 .emul = &x86_emul_lods
1473 },
1474 };
1475
1476 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
1477 /*
1478 * MOVZX
1479 */
1480 [0xB6] = {
1481 /* Gv, Eb */
1482 .valid = true,
1483 .regmodrm = true,
1484 .regtorm = false,
1485 .szoverride = true,
1486 .defsize = OPSIZE_BYTE,
1487 .flags = FLAG_ze,
1488 .emul = &x86_emul_mov
1489 },
1490 [0xB7] = {
1491 /* Gv, Ew */
1492 .valid = true,
1493 .regmodrm = true,
1494 .regtorm = false,
1495 .szoverride = true,
1496 .defsize = OPSIZE_WORD,
1497 .flags = FLAG_ze,
1498 .emul = &x86_emul_mov
1499 },
1500 };
1501
1502 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1503
1504 /* [REX-present][enc][opsize] */
1505 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
1506 [false] = {
1507 /* No REX prefix. */
1508 [0b00] = {
1509 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1510 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1511 [2] = { -1, 0 },
1512 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1513 [4] = { -1, 0 },
1514 [5] = { -1, 0 },
1515 [6] = { -1, 0 },
1516 [7] = { -1, 0 },
1517 },
1518 [0b01] = {
1519 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1520 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1521 [2] = { -1, 0 },
1522 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1523 [4] = { -1, 0 },
1524 [5] = { -1, 0 },
1525 [6] = { -1, 0 },
1526 [7] = { -1, 0 },
1527 },
1528 [0b10] = {
1529 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1530 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1531 [2] = { -1, 0 },
1532 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1533 [4] = { -1, 0 },
1534 [5] = { -1, 0 },
1535 [6] = { -1, 0 },
1536 [7] = { -1, 0 },
1537 },
1538 [0b11] = {
1539 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1540 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1541 [2] = { -1, 0 },
1542 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1543 [4] = { -1, 0 },
1544 [5] = { -1, 0 },
1545 [6] = { -1, 0 },
1546 [7] = { -1, 0 },
1547 }
1548 },
1549 [true] = {
1550 /* Has REX prefix. */
1551 [0b00] = {
1552 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1553 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1554 [2] = { -1, 0 },
1555 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1556 [4] = { -1, 0 },
1557 [5] = { -1, 0 },
1558 [6] = { -1, 0 },
1559 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1560 },
1561 [0b01] = {
1562 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1563 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1564 [2] = { -1, 0 },
1565 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1566 [4] = { -1, 0 },
1567 [5] = { -1, 0 },
1568 [6] = { -1, 0 },
1569 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1570 },
1571 [0b10] = {
1572 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1573 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1574 [2] = { -1, 0 },
1575 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1576 [4] = { -1, 0 },
1577 [5] = { -1, 0 },
1578 [6] = { -1, 0 },
1579 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1580 },
1581 [0b11] = {
1582 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1583 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1584 [2] = { -1, 0 },
1585 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1586 [4] = { -1, 0 },
1587 [5] = { -1, 0 },
1588 [6] = { -1, 0 },
1589 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1590 }
1591 }
1592 };
1593
1594 /* [depends][enc][size] */
1595 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
1596 [false] = {
1597 /* Not extended. */
1598 [0b000] = {
1599 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1600 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1601 [2] = { -1, 0 },
1602 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1603 [4] = { -1, 0 },
1604 [5] = { -1, 0 },
1605 [6] = { -1, 0 },
1606 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1607 },
1608 [0b001] = {
1609 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1610 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1611 [2] = { -1, 0 },
1612 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1613 [4] = { -1, 0 },
1614 [5] = { -1, 0 },
1615 [6] = { -1, 0 },
1616 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1617 },
1618 [0b010] = {
1619 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1620 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1621 [2] = { -1, 0 },
1622 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1623 [4] = { -1, 0 },
1624 [5] = { -1, 0 },
1625 [6] = { -1, 0 },
1626 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1627 },
1628 [0b011] = {
1629 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1630 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1631 [2] = { -1, 0 },
1632 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1633 [4] = { -1, 0 },
1634 [5] = { -1, 0 },
1635 [6] = { -1, 0 },
1636 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1637 },
1638 [0b100] = {
1639 [0] = { -1, 0 }, /* SPECIAL */
1640 [1] = { -1, 0 }, /* SPECIAL */
1641 [2] = { -1, 0 },
1642 [3] = { -1, 0 }, /* SPECIAL */
1643 [4] = { -1, 0 },
1644 [5] = { -1, 0 },
1645 [6] = { -1, 0 },
1646 [7] = { -1, 0 }, /* SPECIAL */
1647 },
1648 [0b101] = {
1649 [0] = { -1, 0 }, /* SPECIAL */
1650 [1] = { -1, 0 }, /* SPECIAL */
1651 [2] = { -1, 0 },
1652 [3] = { -1, 0 }, /* SPECIAL */
1653 [4] = { -1, 0 },
1654 [5] = { -1, 0 },
1655 [6] = { -1, 0 },
1656 [7] = { -1, 0 }, /* SPECIAL */
1657 },
1658 [0b110] = {
1659 [0] = { -1, 0 }, /* SPECIAL */
1660 [1] = { -1, 0 }, /* SPECIAL */
1661 [2] = { -1, 0 },
1662 [3] = { -1, 0 }, /* SPECIAL */
1663 [4] = { -1, 0 },
1664 [5] = { -1, 0 },
1665 [6] = { -1, 0 },
1666 [7] = { -1, 0 }, /* SPECIAL */
1667 },
1668 [0b111] = {
1669 [0] = { -1, 0 }, /* SPECIAL */
1670 [1] = { -1, 0 }, /* SPECIAL */
1671 [2] = { -1, 0 },
1672 [3] = { -1, 0 }, /* SPECIAL */
1673 [4] = { -1, 0 },
1674 [5] = { -1, 0 },
1675 [6] = { -1, 0 },
1676 [7] = { -1, 0 }, /* SPECIAL */
1677 },
1678 },
1679 [true] = {
1680 /* Extended. */
1681 [0b000] = {
1682 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1683 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1684 [2] = { -1, 0 },
1685 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1686 [4] = { -1, 0 },
1687 [5] = { -1, 0 },
1688 [6] = { -1, 0 },
1689 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1690 },
1691 [0b001] = {
1692 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1693 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1694 [2] = { -1, 0 },
1695 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1696 [4] = { -1, 0 },
1697 [5] = { -1, 0 },
1698 [6] = { -1, 0 },
1699 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1700 },
1701 [0b010] = {
1702 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1703 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1704 [2] = { -1, 0 },
1705 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1706 [4] = { -1, 0 },
1707 [5] = { -1, 0 },
1708 [6] = { -1, 0 },
1709 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1710 },
1711 [0b011] = {
1712 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1713 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1714 [2] = { -1, 0 },
1715 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1716 [4] = { -1, 0 },
1717 [5] = { -1, 0 },
1718 [6] = { -1, 0 },
1719 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1720 },
1721 [0b100] = {
1722 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1723 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1724 [2] = { -1, 0 },
1725 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1726 [4] = { -1, 0 },
1727 [5] = { -1, 0 },
1728 [6] = { -1, 0 },
1729 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1730 },
1731 [0b101] = {
1732 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1733 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1734 [2] = { -1, 0 },
1735 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1736 [4] = { -1, 0 },
1737 [5] = { -1, 0 },
1738 [6] = { -1, 0 },
1739 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1740 },
1741 [0b110] = {
1742 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1743 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1744 [2] = { -1, 0 },
1745 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1746 [4] = { -1, 0 },
1747 [5] = { -1, 0 },
1748 [6] = { -1, 0 },
1749 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1750 },
1751 [0b111] = {
1752 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1753 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1754 [2] = { -1, 0 },
1755 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1756 [4] = { -1, 0 },
1757 [5] = { -1, 0 },
1758 [6] = { -1, 0 },
1759 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1760 },
1761 }
1762 };
1763
1764 static int
1765 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1766 {
1767 fsm->fn = NULL;
1768 return -1;
1769 }
1770
1771 static int
1772 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1773 {
1774 if (fsm->buf + n > fsm->end) {
1775 return -1;
1776 }
1777 memcpy(bytes, fsm->buf, n);
1778 return 0;
1779 }
1780
1781 static inline void
1782 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1783 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1784 {
1785 fsm->buf += n;
1786 if (fsm->buf > fsm->end) {
1787 fsm->fn = node_overflow;
1788 } else {
1789 fsm->fn = fn;
1790 }
1791 }
1792
1793 static const struct x86_reg *
1794 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1795 {
1796 enc &= 0b11;
1797 if (regsize == 8) {
1798 /* May be 64bit without REX */
1799 return &gpr_map__special[1][enc][regsize-1];
1800 }
1801 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1802 }
1803
1804 /*
1805 * Special node, for MOVS. Fake two displacements of zero on the source and
1806 * destination registers.
1807 */
1808 static int
1809 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1810 {
1811 size_t adrsize;
1812
1813 adrsize = instr->address_size;
1814
1815 /* DS:RSI */
1816 instr->src.type = STORE_REG;
1817 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1818 instr->src.disp.type = DISP_0;
1819
1820 /* ES:RDI, force ES */
1821 instr->dst.type = STORE_REG;
1822 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1823 instr->dst.disp.type = DISP_0;
1824 instr->dst.hardseg = NVMM_X64_SEG_ES;
1825
1826 fsm_advance(fsm, 0, NULL);
1827
1828 return 0;
1829 }
1830
1831 /*
1832 * Special node, for STOS and LODS. Fake a displacement of zero on the
1833 * destination register.
1834 */
1835 static int
1836 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1837 {
1838 const struct x86_opcode *opcode = instr->opcode;
1839 struct x86_store *stlo, *streg;
1840 size_t adrsize, regsize;
1841
1842 adrsize = instr->address_size;
1843 regsize = instr->operand_size;
1844
1845 if (opcode->stos) {
1846 streg = &instr->src;
1847 stlo = &instr->dst;
1848 } else {
1849 streg = &instr->dst;
1850 stlo = &instr->src;
1851 }
1852
1853 streg->type = STORE_REG;
1854 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1855
1856 stlo->type = STORE_REG;
1857 if (opcode->stos) {
1858 /* ES:RDI, force ES */
1859 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1860 stlo->hardseg = NVMM_X64_SEG_ES;
1861 } else {
1862 /* DS:RSI */
1863 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1864 }
1865 stlo->disp.type = DISP_0;
1866
1867 fsm_advance(fsm, 0, NULL);
1868
1869 return 0;
1870 }
1871
1872 static int
1873 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1874 {
1875 const struct x86_opcode *opcode = instr->opcode;
1876 struct x86_store *stdmo, *streg;
1877 size_t adrsize, regsize;
1878
1879 adrsize = instr->address_size;
1880 regsize = instr->operand_size;
1881
1882 if (opcode->todmo) {
1883 streg = &instr->src;
1884 stdmo = &instr->dst;
1885 } else {
1886 streg = &instr->dst;
1887 stdmo = &instr->src;
1888 }
1889
1890 streg->type = STORE_REG;
1891 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1892
1893 stdmo->type = STORE_DMO;
1894 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1895 return -1;
1896 }
1897 fsm_advance(fsm, adrsize, NULL);
1898
1899 return 0;
1900 }
1901
1902 static inline uint64_t
1903 sign_extend(uint64_t val, int size)
1904 {
1905 if (size == 1) {
1906 if (val & __BIT(7))
1907 val |= 0xFFFFFFFFFFFFFF00;
1908 } else if (size == 2) {
1909 if (val & __BIT(15))
1910 val |= 0xFFFFFFFFFFFF0000;
1911 } else if (size == 4) {
1912 if (val & __BIT(31))
1913 val |= 0xFFFFFFFF00000000;
1914 }
1915 return val;
1916 }
1917
1918 static int
1919 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1920 {
1921 const struct x86_opcode *opcode = instr->opcode;
1922 struct x86_store *store;
1923 uint8_t immsize;
1924 size_t sesize = 0;
1925
1926 /* The immediate is the source */
1927 store = &instr->src;
1928 immsize = instr->operand_size;
1929
1930 if (opcode->flags & FLAG_imm8) {
1931 sesize = immsize;
1932 immsize = 1;
1933 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1934 sesize = immsize;
1935 immsize = 4;
1936 }
1937
1938 store->type = STORE_IMM;
1939 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1940 return -1;
1941 }
1942 fsm_advance(fsm, immsize, NULL);
1943
1944 if (sesize != 0) {
1945 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1946 }
1947
1948 return 0;
1949 }
1950
1951 static int
1952 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1953 {
1954 const struct x86_opcode *opcode = instr->opcode;
1955 uint64_t data = 0;
1956 size_t n;
1957
1958 if (instr->strm->disp.type == DISP_1) {
1959 n = 1;
1960 } else { /* DISP4 */
1961 n = 4;
1962 }
1963
1964 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1965 return -1;
1966 }
1967
1968 if (__predict_true(fsm->is64bit)) {
1969 data = sign_extend(data, n);
1970 }
1971
1972 instr->strm->disp.data = data;
1973
1974 if (opcode->immediate) {
1975 fsm_advance(fsm, n, node_immediate);
1976 } else {
1977 fsm_advance(fsm, n, NULL);
1978 }
1979
1980 return 0;
1981 }
1982
1983 static const struct x86_reg *
1984 get_register_idx(struct x86_instr *instr, uint8_t index)
1985 {
1986 uint8_t enc = index;
1987 const struct x86_reg *reg;
1988 size_t regsize;
1989
1990 regsize = instr->address_size;
1991 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
1992
1993 if (reg->num == -1) {
1994 reg = resolve_special_register(instr, enc, regsize);
1995 }
1996
1997 return reg;
1998 }
1999
2000 static const struct x86_reg *
2001 get_register_bas(struct x86_instr *instr, uint8_t base)
2002 {
2003 uint8_t enc = base;
2004 const struct x86_reg *reg;
2005 size_t regsize;
2006
2007 regsize = instr->address_size;
2008 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2009 if (reg->num == -1) {
2010 reg = resolve_special_register(instr, enc, regsize);
2011 }
2012
2013 return reg;
2014 }
2015
2016 static int
2017 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2018 {
2019 const struct x86_opcode *opcode;
2020 uint8_t scale, index, base;
2021 bool noindex, nobase;
2022 uint8_t byte;
2023
2024 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2025 return -1;
2026 }
2027
2028 scale = ((byte & 0b11000000) >> 6);
2029 index = ((byte & 0b00111000) >> 3);
2030 base = ((byte & 0b00000111) >> 0);
2031
2032 opcode = instr->opcode;
2033
2034 noindex = false;
2035 nobase = false;
2036
2037 if (index == 0b100 && !instr->rexpref.x) {
2038 /* Special case: the index is null */
2039 noindex = true;
2040 }
2041
2042 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2043 /* Special case: the base is null + disp32 */
2044 instr->strm->disp.type = DISP_4;
2045 nobase = true;
2046 }
2047
2048 instr->strm->type = STORE_SIB;
2049 instr->strm->u.sib.scale = (1 << scale);
2050 if (!noindex)
2051 instr->strm->u.sib.idx = get_register_idx(instr, index);
2052 if (!nobase)
2053 instr->strm->u.sib.bas = get_register_bas(instr, base);
2054
2055 /* May have a displacement, or an immediate */
2056 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
2057 fsm_advance(fsm, 1, node_disp);
2058 } else if (opcode->immediate) {
2059 fsm_advance(fsm, 1, node_immediate);
2060 } else {
2061 fsm_advance(fsm, 1, NULL);
2062 }
2063
2064 return 0;
2065 }
2066
2067 static const struct x86_reg *
2068 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2069 {
2070 uint8_t enc = instr->regmodrm.reg;
2071 const struct x86_reg *reg;
2072 size_t regsize;
2073
2074 regsize = instr->operand_size;
2075
2076 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2077 if (reg->num == -1) {
2078 reg = resolve_special_register(instr, enc, regsize);
2079 }
2080
2081 return reg;
2082 }
2083
2084 static const struct x86_reg *
2085 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2086 {
2087 uint8_t enc = instr->regmodrm.rm;
2088 const struct x86_reg *reg;
2089 size_t regsize;
2090
2091 if (instr->strm->disp.type == DISP_NONE) {
2092 regsize = instr->operand_size;
2093 } else {
2094 /* Indirect access, the size is that of the address. */
2095 regsize = instr->address_size;
2096 }
2097
2098 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2099 if (reg->num == -1) {
2100 reg = resolve_special_register(instr, enc, regsize);
2101 }
2102
2103 return reg;
2104 }
2105
2106 static inline bool
2107 has_sib(struct x86_instr *instr)
2108 {
2109 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
2110 }
2111
2112 static inline bool
2113 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2114 {
2115 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2116 instr->regmodrm.rm == RM_RBP_DISP32);
2117 }
2118
2119 static inline bool
2120 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2121 {
2122 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2123 instr->regmodrm.rm == RM_RBP_DISP32);
2124 }
2125
2126 static enum x86_disp_type
2127 get_disp_type(struct x86_instr *instr)
2128 {
2129 switch (instr->regmodrm.mod) {
2130 case MOD_DIS0: /* indirect */
2131 return DISP_0;
2132 case MOD_DIS1: /* indirect+1 */
2133 return DISP_1;
2134 case MOD_DIS4: /* indirect+4 */
2135 return DISP_4;
2136 case MOD_REG: /* direct */
2137 default: /* gcc */
2138 return DISP_NONE;
2139 }
2140 }
2141
2142 static int
2143 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2144 {
2145 struct x86_store *strg, *strm;
2146 const struct x86_opcode *opcode;
2147 const struct x86_reg *reg;
2148 uint8_t byte;
2149
2150 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2151 return -1;
2152 }
2153
2154 opcode = instr->opcode;
2155
2156 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2157 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2158 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2159
2160 if (opcode->regtorm) {
2161 strg = &instr->src;
2162 strm = &instr->dst;
2163 } else { /* RM to REG */
2164 strm = &instr->src;
2165 strg = &instr->dst;
2166 }
2167
2168 /* Save for later use. */
2169 instr->strm = strm;
2170
2171 /*
2172 * Special cases: Groups. The REG field of REGMODRM is the index in
2173 * the group. op1 gets overwritten in the Immediate node, if any.
2174 */
2175 if (opcode->group1) {
2176 if (group1[instr->regmodrm.reg].emul == NULL) {
2177 return -1;
2178 }
2179 instr->emul = group1[instr->regmodrm.reg].emul;
2180 } else if (opcode->group3) {
2181 if (group3[instr->regmodrm.reg].emul == NULL) {
2182 return -1;
2183 }
2184 instr->emul = group3[instr->regmodrm.reg].emul;
2185 } else if (opcode->group11) {
2186 if (group11[instr->regmodrm.reg].emul == NULL) {
2187 return -1;
2188 }
2189 instr->emul = group11[instr->regmodrm.reg].emul;
2190 }
2191
2192 if (!opcode->immediate) {
2193 reg = get_register_reg(instr, opcode);
2194 if (reg == NULL) {
2195 return -1;
2196 }
2197 strg->type = STORE_REG;
2198 strg->u.reg = reg;
2199 }
2200
2201 /* The displacement applies to RM. */
2202 strm->disp.type = get_disp_type(instr);
2203
2204 if (has_sib(instr)) {
2205 /* Overwrites RM */
2206 fsm_advance(fsm, 1, node_sib);
2207 return 0;
2208 }
2209
2210 if (is_rip_relative(fsm, instr)) {
2211 /* Overwrites RM */
2212 strm->type = STORE_REG;
2213 strm->u.reg = &gpr_map__rip;
2214 strm->disp.type = DISP_4;
2215 fsm_advance(fsm, 1, node_disp);
2216 return 0;
2217 }
2218
2219 if (is_disp32_only(fsm, instr)) {
2220 /* Overwrites RM */
2221 strm->type = STORE_REG;
2222 strm->u.reg = NULL;
2223 strm->disp.type = DISP_4;
2224 fsm_advance(fsm, 1, node_disp);
2225 return 0;
2226 }
2227
2228 reg = get_register_rm(instr, opcode);
2229 if (reg == NULL) {
2230 return -1;
2231 }
2232 strm->type = STORE_REG;
2233 strm->u.reg = reg;
2234
2235 if (strm->disp.type == DISP_NONE) {
2236 /* Direct register addressing mode */
2237 if (opcode->immediate) {
2238 fsm_advance(fsm, 1, node_immediate);
2239 } else {
2240 fsm_advance(fsm, 1, NULL);
2241 }
2242 } else if (strm->disp.type == DISP_0) {
2243 /* Indirect register addressing mode */
2244 if (opcode->immediate) {
2245 fsm_advance(fsm, 1, node_immediate);
2246 } else {
2247 fsm_advance(fsm, 1, NULL);
2248 }
2249 } else {
2250 fsm_advance(fsm, 1, node_disp);
2251 }
2252
2253 return 0;
2254 }
2255
2256 static size_t
2257 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2258 {
2259 const struct x86_opcode *opcode = instr->opcode;
2260 int opsize;
2261
2262 /* Get the opsize */
2263 if (!opcode->szoverride) {
2264 opsize = opcode->defsize;
2265 } else if (instr->rexpref.present && instr->rexpref.w) {
2266 opsize = 8;
2267 } else {
2268 if (!fsm->is16bit) {
2269 if (instr->legpref.opr_ovr) {
2270 opsize = 2;
2271 } else {
2272 opsize = 4;
2273 }
2274 } else { /* 16bit */
2275 if (instr->legpref.opr_ovr) {
2276 opsize = 4;
2277 } else {
2278 opsize = 2;
2279 }
2280 }
2281 }
2282
2283 return opsize;
2284 }
2285
2286 static size_t
2287 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2288 {
2289 if (fsm->is64bit) {
2290 if (__predict_false(instr->legpref.adr_ovr)) {
2291 return 4;
2292 }
2293 return 8;
2294 }
2295
2296 if (fsm->is32bit) {
2297 if (__predict_false(instr->legpref.adr_ovr)) {
2298 return 2;
2299 }
2300 return 4;
2301 }
2302
2303 /* 16bit. */
2304 if (__predict_false(instr->legpref.adr_ovr)) {
2305 return 4;
2306 }
2307 return 2;
2308 }
2309
2310 static int
2311 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2312 {
2313 const struct x86_opcode *opcode;
2314 uint8_t byte;
2315
2316 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2317 return -1;
2318 }
2319
2320 opcode = &primary_opcode_table[byte];
2321 if (__predict_false(!opcode->valid)) {
2322 return -1;
2323 }
2324
2325 instr->opcode = opcode;
2326 instr->emul = opcode->emul;
2327 instr->operand_size = get_operand_size(fsm, instr);
2328 instr->address_size = get_address_size(fsm, instr);
2329
2330 if (fsm->is64bit && (instr->operand_size == 4)) {
2331 /* Zero-extend to 64 bits. */
2332 instr->zeroextend_mask = ~size_to_mask(4);
2333 }
2334
2335 if (opcode->regmodrm) {
2336 fsm_advance(fsm, 1, node_regmodrm);
2337 } else if (opcode->dmo) {
2338 /* Direct-Memory Offsets */
2339 fsm_advance(fsm, 1, node_dmo);
2340 } else if (opcode->stos || opcode->lods) {
2341 fsm_advance(fsm, 1, node_stlo);
2342 } else if (opcode->movs) {
2343 fsm_advance(fsm, 1, node_movs);
2344 } else {
2345 return -1;
2346 }
2347
2348 return 0;
2349 }
2350
2351 static int
2352 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2353 {
2354 const struct x86_opcode *opcode;
2355 uint8_t byte;
2356
2357 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2358 return -1;
2359 }
2360
2361 opcode = &secondary_opcode_table[byte];
2362 if (__predict_false(!opcode->valid)) {
2363 return -1;
2364 }
2365
2366 instr->opcode = opcode;
2367 instr->emul = opcode->emul;
2368 instr->operand_size = get_operand_size(fsm, instr);
2369 instr->address_size = get_address_size(fsm, instr);
2370
2371 if (fsm->is64bit && (instr->operand_size == 4)) {
2372 /* Zero-extend to 64 bits. */
2373 instr->zeroextend_mask = ~size_to_mask(4);
2374 }
2375
2376 if (opcode->flags & FLAG_ze) {
2377 /*
2378 * Compute the mask for zero-extend. Update the operand size,
2379 * we move fewer bytes.
2380 */
2381 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2382 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2383 instr->operand_size = opcode->defsize;
2384 }
2385
2386 if (opcode->regmodrm) {
2387 fsm_advance(fsm, 1, node_regmodrm);
2388 } else {
2389 return -1;
2390 }
2391
2392 return 0;
2393 }
2394
2395 static int
2396 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2397 {
2398 uint8_t byte;
2399
2400 #define ESCAPE 0x0F
2401 #define VEX_1 0xC5
2402 #define VEX_2 0xC4
2403 #define XOP 0x8F
2404
2405 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2406 return -1;
2407 }
2408
2409 /*
2410 * We don't take XOP. It is AMD-specific, and it was removed shortly
2411 * after being introduced.
2412 */
2413 if (byte == ESCAPE) {
2414 fsm_advance(fsm, 1, node_secondary_opcode);
2415 } else if (!instr->rexpref.present) {
2416 if (byte == VEX_1) {
2417 return -1;
2418 } else if (byte == VEX_2) {
2419 return -1;
2420 } else {
2421 fsm->fn = node_primary_opcode;
2422 }
2423 } else {
2424 fsm->fn = node_primary_opcode;
2425 }
2426
2427 return 0;
2428 }
2429
2430 static int
2431 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2432 {
2433 struct x86_rexpref *rexpref = &instr->rexpref;
2434 uint8_t byte;
2435 size_t n = 0;
2436
2437 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2438 return -1;
2439 }
2440
2441 if (byte >= 0x40 && byte <= 0x4F) {
2442 if (__predict_false(!fsm->is64bit)) {
2443 return -1;
2444 }
2445 rexpref->b = ((byte & 0x1) != 0);
2446 rexpref->x = ((byte & 0x2) != 0);
2447 rexpref->r = ((byte & 0x4) != 0);
2448 rexpref->w = ((byte & 0x8) != 0);
2449 rexpref->present = true;
2450 n = 1;
2451 }
2452
2453 fsm_advance(fsm, n, node_main);
2454 return 0;
2455 }
2456
2457 static int
2458 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2459 {
2460 uint8_t byte;
2461
2462 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2463 return -1;
2464 }
2465
2466 if (byte == LEG_OPR_OVR) {
2467 instr->legpref.opr_ovr = 1;
2468 } else if (byte == LEG_OVR_DS) {
2469 instr->legpref.seg = NVMM_X64_SEG_DS;
2470 } else if (byte == LEG_OVR_ES) {
2471 instr->legpref.seg = NVMM_X64_SEG_ES;
2472 } else if (byte == LEG_REP) {
2473 instr->legpref.rep = 1;
2474 } else if (byte == LEG_OVR_GS) {
2475 instr->legpref.seg = NVMM_X64_SEG_GS;
2476 } else if (byte == LEG_OVR_FS) {
2477 instr->legpref.seg = NVMM_X64_SEG_FS;
2478 } else if (byte == LEG_ADR_OVR) {
2479 instr->legpref.adr_ovr = 1;
2480 } else if (byte == LEG_OVR_CS) {
2481 instr->legpref.seg = NVMM_X64_SEG_CS;
2482 } else if (byte == LEG_OVR_SS) {
2483 instr->legpref.seg = NVMM_X64_SEG_SS;
2484 } else if (byte == LEG_REPN) {
2485 instr->legpref.repn = 1;
2486 } else if (byte == LEG_LOCK) {
2487 /* ignore */
2488 } else {
2489 /* not a legacy prefix */
2490 fsm_advance(fsm, 0, node_rex_prefix);
2491 return 0;
2492 }
2493
2494 fsm_advance(fsm, 1, node_legacy_prefix);
2495 return 0;
2496 }
2497
2498 static int
2499 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2500 struct nvmm_x64_state *state)
2501 {
2502 struct x86_decode_fsm fsm;
2503 int ret;
2504
2505 memset(instr, 0, sizeof(*instr));
2506 instr->legpref.seg = -1;
2507 instr->src.hardseg = -1;
2508 instr->dst.hardseg = -1;
2509
2510 fsm.is64bit = is_64bit(state);
2511 fsm.is32bit = is_32bit(state);
2512 fsm.is16bit = is_16bit(state);
2513
2514 fsm.fn = node_legacy_prefix;
2515 fsm.buf = inst_bytes;
2516 fsm.end = inst_bytes + inst_len;
2517
2518 while (fsm.fn != NULL) {
2519 ret = (*fsm.fn)(&fsm, instr);
2520 if (ret == -1)
2521 return -1;
2522 }
2523
2524 instr->len = fsm.buf - inst_bytes;
2525
2526 return 0;
2527 }
2528
2529 /* -------------------------------------------------------------------------- */
2530
2531 #define EXEC_INSTR(sz, instr) \
2532 static uint##sz##_t \
2533 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
2534 { \
2535 uint##sz##_t res; \
2536 __asm __volatile ( \
2537 #instr " %2, %3;" \
2538 "mov %3, %1;" \
2539 "pushfq;" \
2540 "popq %0" \
2541 : "=r" (*rflags), "=r" (res) \
2542 : "r" (op1), "r" (op2)); \
2543 return res; \
2544 }
2545
2546 #define EXEC_DISPATCHER(instr) \
2547 static uint64_t \
2548 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2549 { \
2550 switch (opsize) { \
2551 case 1: \
2552 return exec_##instr##8(op1, op2, rflags); \
2553 case 2: \
2554 return exec_##instr##16(op1, op2, rflags); \
2555 case 4: \
2556 return exec_##instr##32(op1, op2, rflags); \
2557 default: \
2558 return exec_##instr##64(op1, op2, rflags); \
2559 } \
2560 }
2561
2562 /* SUB: ret = op1 - op2 */
2563 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2564 EXEC_INSTR(8, sub)
2565 EXEC_INSTR(16, sub)
2566 EXEC_INSTR(32, sub)
2567 EXEC_INSTR(64, sub)
2568 EXEC_DISPATCHER(sub)
2569
2570 /* OR: ret = op1 | op2 */
2571 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2572 EXEC_INSTR(8, or)
2573 EXEC_INSTR(16, or)
2574 EXEC_INSTR(32, or)
2575 EXEC_INSTR(64, or)
2576 EXEC_DISPATCHER(or)
2577
2578 /* AND: ret = op1 & op2 */
2579 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2580 EXEC_INSTR(8, and)
2581 EXEC_INSTR(16, and)
2582 EXEC_INSTR(32, and)
2583 EXEC_INSTR(64, and)
2584 EXEC_DISPATCHER(and)
2585
2586 /* XOR: ret = op1 ^ op2 */
2587 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2588 EXEC_INSTR(8, xor)
2589 EXEC_INSTR(16, xor)
2590 EXEC_INSTR(32, xor)
2591 EXEC_INSTR(64, xor)
2592 EXEC_DISPATCHER(xor)
2593
2594 /* -------------------------------------------------------------------------- */
2595
2596 /*
2597 * Emulation functions. We don't care about the order of the operands, except
2598 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
2599 * is op1 and who is op2.
2600 */
2601
2602 static void
2603 x86_func_or(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2604 {
2605 uint64_t *retval = (uint64_t *)mem->data;
2606 const bool write = mem->write;
2607 uint64_t *op1, op2, fl, ret;
2608
2609 op1 = (uint64_t *)mem->data;
2610 op2 = 0;
2611
2612 /* Fetch the value to be OR'ed (op2). */
2613 mem->data = (uint8_t *)&op2;
2614 mem->write = false;
2615 (*mach->cbs.mem)(mem);
2616
2617 /* Perform the OR. */
2618 ret = exec_or(*op1, op2, &fl, mem->size);
2619
2620 if (write) {
2621 /* Write back the result. */
2622 mem->data = (uint8_t *)&ret;
2623 mem->write = true;
2624 (*mach->cbs.mem)(mem);
2625 } else {
2626 /* Return data to the caller. */
2627 *retval = ret;
2628 }
2629
2630 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2631 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2632 }
2633
2634 static void
2635 x86_func_and(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2636 {
2637 uint64_t *retval = (uint64_t *)mem->data;
2638 const bool write = mem->write;
2639 uint64_t *op1, op2, fl, ret;
2640
2641 op1 = (uint64_t *)mem->data;
2642 op2 = 0;
2643
2644 /* Fetch the value to be AND'ed (op2). */
2645 mem->data = (uint8_t *)&op2;
2646 mem->write = false;
2647 (*mach->cbs.mem)(mem);
2648
2649 /* Perform the AND. */
2650 ret = exec_and(*op1, op2, &fl, mem->size);
2651
2652 if (write) {
2653 /* Write back the result. */
2654 mem->data = (uint8_t *)&ret;
2655 mem->write = true;
2656 (*mach->cbs.mem)(mem);
2657 } else {
2658 /* Return data to the caller. */
2659 *retval = ret;
2660 }
2661
2662 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2663 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2664 }
2665
2666 static void
2667 x86_func_sub(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2668 {
2669 uint64_t *retval = (uint64_t *)mem->data;
2670 const bool write = mem->write;
2671 uint64_t *op1, *op2, fl, ret;
2672 uint64_t tmp;
2673 bool memop1;
2674
2675 memop1 = !mem->write;
2676 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2677 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2678
2679 /* Fetch the value to be SUB'ed (op1 or op2). */
2680 mem->data = (uint8_t *)&tmp;
2681 mem->write = false;
2682 (*mach->cbs.mem)(mem);
2683
2684 /* Perform the SUB. */
2685 ret = exec_sub(*op1, *op2, &fl, mem->size);
2686
2687 if (write) {
2688 /* Write back the result. */
2689 mem->data = (uint8_t *)&ret;
2690 mem->write = true;
2691 (*mach->cbs.mem)(mem);
2692 } else {
2693 /* Return data to the caller. */
2694 *retval = ret;
2695 }
2696
2697 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2698 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2699 }
2700
2701 static void
2702 x86_func_xor(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2703 {
2704 uint64_t *retval = (uint64_t *)mem->data;
2705 const bool write = mem->write;
2706 uint64_t *op1, op2, fl, ret;
2707
2708 op1 = (uint64_t *)mem->data;
2709 op2 = 0;
2710
2711 /* Fetch the value to be XOR'ed (op2). */
2712 mem->data = (uint8_t *)&op2;
2713 mem->write = false;
2714 (*mach->cbs.mem)(mem);
2715
2716 /* Perform the XOR. */
2717 ret = exec_xor(*op1, op2, &fl, mem->size);
2718
2719 if (write) {
2720 /* Write back the result. */
2721 mem->data = (uint8_t *)&ret;
2722 mem->write = true;
2723 (*mach->cbs.mem)(mem);
2724 } else {
2725 /* Return data to the caller. */
2726 *retval = ret;
2727 }
2728
2729 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2730 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2731 }
2732
2733 static void
2734 x86_func_cmp(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2735 {
2736 uint64_t *op1, *op2, fl;
2737 uint64_t tmp;
2738 bool memop1;
2739
2740 memop1 = !mem->write;
2741 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2742 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2743
2744 /* Fetch the value to be CMP'ed (op1 or op2). */
2745 mem->data = (uint8_t *)&tmp;
2746 mem->write = false;
2747 (*mach->cbs.mem)(mem);
2748
2749 /* Perform the CMP. */
2750 exec_sub(*op1, *op2, &fl, mem->size);
2751
2752 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2753 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2754 }
2755
2756 static void
2757 x86_func_test(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2758 {
2759 uint64_t *op1, *op2, fl;
2760 uint64_t tmp;
2761 bool memop1;
2762
2763 memop1 = !mem->write;
2764 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2765 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2766
2767 /* Fetch the value to be TEST'ed (op1 or op2). */
2768 mem->data = (uint8_t *)&tmp;
2769 mem->write = false;
2770 (*mach->cbs.mem)(mem);
2771
2772 /* Perform the TEST. */
2773 exec_and(*op1, *op2, &fl, mem->size);
2774
2775 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2776 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2777 }
2778
2779 static void
2780 x86_func_mov(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2781 {
2782 /*
2783 * Nothing special, just move without emulation.
2784 */
2785 (*mach->cbs.mem)(mem);
2786 }
2787
2788 static void
2789 x86_func_stos(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2790 {
2791 /*
2792 * Just move, and update RDI.
2793 */
2794 (*mach->cbs.mem)(mem);
2795
2796 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2797 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2798 } else {
2799 gprs[NVMM_X64_GPR_RDI] += mem->size;
2800 }
2801 }
2802
2803 static void
2804 x86_func_lods(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2805 {
2806 /*
2807 * Just move, and update RSI.
2808 */
2809 (*mach->cbs.mem)(mem);
2810
2811 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2812 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2813 } else {
2814 gprs[NVMM_X64_GPR_RSI] += mem->size;
2815 }
2816 }
2817
2818 static void
2819 x86_func_movs(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2820 {
2821 /*
2822 * Special instruction: double memory operand. Don't call the cb,
2823 * because the storage has already been performed earlier.
2824 */
2825
2826 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2827 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2828 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2829 } else {
2830 gprs[NVMM_X64_GPR_RSI] += mem->size;
2831 gprs[NVMM_X64_GPR_RDI] += mem->size;
2832 }
2833 }
2834
2835 /* -------------------------------------------------------------------------- */
2836
2837 static inline uint64_t
2838 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2839 {
2840 uint64_t val;
2841
2842 val = state->gprs[gpr];
2843 val &= size_to_mask(instr->address_size);
2844
2845 return val;
2846 }
2847
2848 static int
2849 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2850 struct x86_store *store, gvaddr_t *gvap, size_t size)
2851 {
2852 struct x86_sib *sib;
2853 gvaddr_t gva = 0;
2854 uint64_t reg;
2855 int ret, seg;
2856
2857 if (store->type == STORE_SIB) {
2858 sib = &store->u.sib;
2859 if (sib->bas != NULL)
2860 gva += gpr_read_address(instr, state, sib->bas->num);
2861 if (sib->idx != NULL) {
2862 reg = gpr_read_address(instr, state, sib->idx->num);
2863 gva += sib->scale * reg;
2864 }
2865 } else if (store->type == STORE_REG) {
2866 if (store->u.reg == NULL) {
2867 /* The base is null. Happens with disp32-only. */
2868 } else {
2869 gva = gpr_read_address(instr, state, store->u.reg->num);
2870 }
2871 } else {
2872 gva = store->u.dmo;
2873 }
2874
2875 if (store->disp.type != DISP_NONE) {
2876 gva += store->disp.data;
2877 }
2878
2879 if (store->hardseg != -1) {
2880 seg = store->hardseg;
2881 } else {
2882 if (__predict_false(instr->legpref.seg != -1)) {
2883 seg = instr->legpref.seg;
2884 } else {
2885 seg = NVMM_X64_SEG_DS;
2886 }
2887 }
2888
2889 if (__predict_true(is_long_mode(state))) {
2890 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2891 segment_apply(&state->segs[seg], &gva);
2892 }
2893 } else {
2894 ret = segment_check(&state->segs[seg], gva, size);
2895 if (ret == -1)
2896 return -1;
2897 segment_apply(&state->segs[seg], &gva);
2898 }
2899
2900 *gvap = gva;
2901 return 0;
2902 }
2903
2904 static int
2905 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2906 {
2907 uint8_t inst_bytes[5], byte;
2908 size_t i, fetchsize;
2909 gvaddr_t gva;
2910 int ret, seg;
2911
2912 fetchsize = sizeof(inst_bytes);
2913
2914 gva = state->gprs[NVMM_X64_GPR_RIP];
2915 if (__predict_false(!is_long_mode(state))) {
2916 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2917 fetchsize);
2918 if (ret == -1)
2919 return -1;
2920 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2921 }
2922
2923 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2924 if (ret == -1)
2925 return -1;
2926
2927 seg = NVMM_X64_SEG_DS;
2928 for (i = 0; i < fetchsize; i++) {
2929 byte = inst_bytes[i];
2930
2931 if (byte == LEG_OVR_DS) {
2932 seg = NVMM_X64_SEG_DS;
2933 } else if (byte == LEG_OVR_ES) {
2934 seg = NVMM_X64_SEG_ES;
2935 } else if (byte == LEG_OVR_GS) {
2936 seg = NVMM_X64_SEG_GS;
2937 } else if (byte == LEG_OVR_FS) {
2938 seg = NVMM_X64_SEG_FS;
2939 } else if (byte == LEG_OVR_CS) {
2940 seg = NVMM_X64_SEG_CS;
2941 } else if (byte == LEG_OVR_SS) {
2942 seg = NVMM_X64_SEG_SS;
2943 } else if (byte == LEG_OPR_OVR) {
2944 /* nothing */
2945 } else if (byte == LEG_ADR_OVR) {
2946 /* nothing */
2947 } else if (byte == LEG_REP) {
2948 /* nothing */
2949 } else if (byte == LEG_REPN) {
2950 /* nothing */
2951 } else if (byte == LEG_LOCK) {
2952 /* nothing */
2953 } else {
2954 return seg;
2955 }
2956 }
2957
2958 return seg;
2959 }
2960
2961 static int
2962 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2963 struct nvmm_exit *exit)
2964 {
2965 size_t fetchsize;
2966 gvaddr_t gva;
2967 int ret;
2968
2969 fetchsize = sizeof(exit->u.mem.inst_bytes);
2970
2971 gva = state->gprs[NVMM_X64_GPR_RIP];
2972 if (__predict_false(!is_long_mode(state))) {
2973 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2974 fetchsize);
2975 if (ret == -1)
2976 return -1;
2977 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2978 }
2979
2980 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
2981 fetchsize);
2982 if (ret == -1)
2983 return -1;
2984
2985 exit->u.mem.inst_len = fetchsize;
2986
2987 return 0;
2988 }
2989
2990 static int
2991 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2992 struct x86_instr *instr)
2993 {
2994 struct nvmm_mem mem;
2995 uint8_t data[8];
2996 gvaddr_t gva;
2997 size_t size;
2998 int ret;
2999
3000 size = instr->operand_size;
3001
3002 /* Source. */
3003 ret = store_to_gva(state, instr, &instr->src, &gva, size);
3004 if (ret == -1)
3005 return -1;
3006 ret = read_guest_memory(mach, state, gva, data, size);
3007 if (ret == -1)
3008 return -1;
3009
3010 /* Destination. */
3011 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3012 if (ret == -1)
3013 return -1;
3014 ret = write_guest_memory(mach, state, gva, data, size);
3015 if (ret == -1)
3016 return -1;
3017
3018 mem.size = size;
3019 (*instr->emul->func)(mach, &mem, state->gprs);
3020
3021 return 0;
3022 }
3023
3024 #define DISASSEMBLER_BUG() \
3025 do { \
3026 errno = EINVAL; \
3027 return -1; \
3028 } while (0);
3029
3030 static int
3031 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3032 struct x86_instr *instr, struct nvmm_exit *exit)
3033 {
3034 struct nvmm_mem mem;
3035 uint8_t membuf[8];
3036 uint64_t val;
3037
3038 memset(membuf, 0, sizeof(membuf));
3039
3040 mem.gpa = exit->u.mem.gpa;
3041 mem.size = instr->operand_size;
3042 mem.data = membuf;
3043
3044 /* Determine the direction. */
3045 switch (instr->src.type) {
3046 case STORE_REG:
3047 if (instr->src.disp.type != DISP_NONE) {
3048 /* Indirect access. */
3049 mem.write = false;
3050 } else {
3051 /* Direct access. */
3052 mem.write = true;
3053 }
3054 break;
3055 case STORE_IMM:
3056 mem.write = true;
3057 break;
3058 case STORE_SIB:
3059 mem.write = false;
3060 break;
3061 case STORE_DMO:
3062 mem.write = false;
3063 break;
3064 default:
3065 DISASSEMBLER_BUG();
3066 }
3067
3068 if (mem.write) {
3069 switch (instr->src.type) {
3070 case STORE_REG:
3071 if (instr->src.disp.type != DISP_NONE) {
3072 DISASSEMBLER_BUG();
3073 }
3074 val = state->gprs[instr->src.u.reg->num];
3075 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3076 memcpy(mem.data, &val, mem.size);
3077 break;
3078 case STORE_IMM:
3079 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3080 break;
3081 default:
3082 DISASSEMBLER_BUG();
3083 }
3084 } else if (instr->emul->read) {
3085 if (instr->dst.type != STORE_REG) {
3086 DISASSEMBLER_BUG();
3087 }
3088 if (instr->dst.disp.type != DISP_NONE) {
3089 DISASSEMBLER_BUG();
3090 }
3091 val = state->gprs[instr->dst.u.reg->num];
3092 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3093 memcpy(mem.data, &val, mem.size);
3094 }
3095
3096 (*instr->emul->func)(mach, &mem, state->gprs);
3097
3098 if (!instr->emul->notouch && !mem.write) {
3099 if (instr->dst.type != STORE_REG) {
3100 DISASSEMBLER_BUG();
3101 }
3102 memcpy(&val, membuf, sizeof(uint64_t));
3103 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3104 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3105 state->gprs[instr->dst.u.reg->num] |= val;
3106 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3107 }
3108
3109 return 0;
3110 }
3111
3112 int
3113 nvmm_assist_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
3114 {
3115 struct nvmm_x64_state *state = vcpu->state;
3116 struct nvmm_exit *exit = vcpu->exit;
3117 struct x86_instr instr;
3118 uint64_t cnt = 0; /* GCC */
3119 int ret;
3120
3121 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3122 errno = EINVAL;
3123 return -1;
3124 }
3125
3126 ret = nvmm_vcpu_getstate(mach, vcpu,
3127 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3128 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3129 if (ret == -1)
3130 return -1;
3131
3132 if (exit->u.mem.inst_len == 0) {
3133 /*
3134 * The instruction was not fetched from the kernel. Fetch
3135 * it ourselves.
3136 */
3137 ret = fetch_instruction(mach, state, exit);
3138 if (ret == -1)
3139 return -1;
3140 }
3141
3142 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3143 &instr, state);
3144 if (ret == -1) {
3145 errno = ENODEV;
3146 return -1;
3147 }
3148
3149 if (instr.legpref.rep || instr.legpref.repn) {
3150 cnt = rep_get_cnt(state, instr.address_size);
3151 if (__predict_false(cnt == 0)) {
3152 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3153 goto out;
3154 }
3155 }
3156
3157 if (instr.opcode->movs) {
3158 ret = assist_mem_double(mach, state, &instr);
3159 } else {
3160 ret = assist_mem_single(mach, state, &instr, exit);
3161 }
3162 if (ret == -1) {
3163 errno = ENODEV;
3164 return -1;
3165 }
3166
3167 if (instr.legpref.rep || instr.legpref.repn) {
3168 cnt -= 1;
3169 rep_set_cnt(state, instr.address_size, cnt);
3170 if (cnt == 0) {
3171 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3172 } else if (__predict_false(instr.legpref.repn)) {
3173 if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3174 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3175 }
3176 }
3177 } else {
3178 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3179 }
3180
3181 out:
3182 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
3183 if (ret == -1)
3184 return -1;
3185
3186 return 0;
3187 }
3188