libnvmm_x86.c revision 1.28 1 /* $NetBSD: libnvmm_x86.c,v 1.28 2019/04/04 17:33:47 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49 #define __cacheline_aligned __attribute__((__aligned__(64)))
50
51 #include <x86/specialreg.h>
52
53 extern struct nvmm_callbacks __callbacks;
54
55 /* -------------------------------------------------------------------------- */
56
57 /*
58 * Undocumented debugging function. Helpful.
59 */
60 int
61 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
62 {
63 struct nvmm_x64_state state;
64 uint16_t *attr;
65 size_t i;
66 int ret;
67
68 const char *segnames[] = {
69 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
70 };
71
72 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
73 if (ret == -1)
74 return -1;
75
76 printf("+ VCPU id=%d\n", (int)cpuid);
77 printf("| -> RIP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RIP]);
78 printf("| -> RSP=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RSP]);
79 printf("| -> RAX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RAX]);
80 printf("| -> RBX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RBX]);
81 printf("| -> RCX=%"PRIx64"\n", state.gprs[NVMM_X64_GPR_RCX]);
82 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
83 for (i = 0; i < NVMM_X64_NSEG; i++) {
84 attr = (uint16_t *)&state.segs[i].attrib;
85 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n",
86 segnames[i],
87 state.segs[i].selector,
88 state.segs[i].base,
89 state.segs[i].limit,
90 *attr);
91 }
92 printf("| -> MSR_EFER=%"PRIx64"\n", state.msrs[NVMM_X64_MSR_EFER]);
93 printf("| -> CR0=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR0]);
94 printf("| -> CR3=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR3]);
95 printf("| -> CR4=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR4]);
96 printf("| -> CR8=%"PRIx64"\n", state.crs[NVMM_X64_CR_CR8]);
97
98 return 0;
99 }
100
101 /* -------------------------------------------------------------------------- */
102
103 #define PTE32_L1_SHIFT 12
104 #define PTE32_L2_SHIFT 22
105
106 #define PTE32_L2_MASK 0xffc00000
107 #define PTE32_L1_MASK 0x003ff000
108
109 #define PTE32_L2_FRAME (PTE32_L2_MASK)
110 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
111
112 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
113 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
114
115 #define CR3_FRAME_32BIT PG_FRAME
116
117 typedef uint32_t pte_32bit_t;
118
119 static int
120 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
121 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
122 {
123 gpaddr_t L2gpa, L1gpa;
124 uintptr_t L2hva, L1hva;
125 pte_32bit_t *pdir, pte;
126 nvmm_prot_t pageprot;
127
128 /* We begin with an RWXU access. */
129 *prot = NVMM_PROT_ALL;
130
131 /* Parse L2. */
132 L2gpa = (cr3 & CR3_FRAME_32BIT);
133 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
134 return -1;
135 pdir = (pte_32bit_t *)L2hva;
136 pte = pdir[pte32_l2idx(gva)];
137 if ((pte & PG_V) == 0)
138 return -1;
139 if ((pte & PG_u) == 0)
140 *prot &= ~NVMM_PROT_USER;
141 if ((pte & PG_KW) == 0)
142 *prot &= ~NVMM_PROT_WRITE;
143 if ((pte & PG_PS) && !has_pse)
144 return -1;
145 if (pte & PG_PS) {
146 *gpa = (pte & PTE32_L2_FRAME);
147 *gpa = *gpa + (gva & PTE32_L1_MASK);
148 return 0;
149 }
150
151 /* Parse L1. */
152 L1gpa = (pte & PG_FRAME);
153 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
154 return -1;
155 pdir = (pte_32bit_t *)L1hva;
156 pte = pdir[pte32_l1idx(gva)];
157 if ((pte & PG_V) == 0)
158 return -1;
159 if ((pte & PG_u) == 0)
160 *prot &= ~NVMM_PROT_USER;
161 if ((pte & PG_KW) == 0)
162 *prot &= ~NVMM_PROT_WRITE;
163 if (pte & PG_PS)
164 return -1;
165
166 *gpa = (pte & PG_FRAME);
167 return 0;
168 }
169
170 /* -------------------------------------------------------------------------- */
171
172 #define PTE32_PAE_L1_SHIFT 12
173 #define PTE32_PAE_L2_SHIFT 21
174 #define PTE32_PAE_L3_SHIFT 30
175
176 #define PTE32_PAE_L3_MASK 0xc0000000
177 #define PTE32_PAE_L2_MASK 0x3fe00000
178 #define PTE32_PAE_L1_MASK 0x001ff000
179
180 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
181 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
182 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
183
184 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
185 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
186 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
187
188 #define CR3_FRAME_32BIT_PAE __BITS(31, 5)
189
190 typedef uint64_t pte_32bit_pae_t;
191
192 static int
193 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
194 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
195 {
196 gpaddr_t L3gpa, L2gpa, L1gpa;
197 uintptr_t L3hva, L2hva, L1hva;
198 pte_32bit_pae_t *pdir, pte;
199 nvmm_prot_t pageprot;
200
201 /* We begin with an RWXU access. */
202 *prot = NVMM_PROT_ALL;
203
204 /* Parse L3. */
205 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
206 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
207 return -1;
208 pdir = (pte_32bit_pae_t *)L3hva;
209 pte = pdir[pte32_pae_l3idx(gva)];
210 if ((pte & PG_V) == 0)
211 return -1;
212 if (pte & PG_NX)
213 *prot &= ~NVMM_PROT_EXEC;
214 if (pte & PG_PS)
215 return -1;
216
217 /* Parse L2. */
218 L2gpa = (pte & PG_FRAME);
219 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
220 return -1;
221 pdir = (pte_32bit_pae_t *)L2hva;
222 pte = pdir[pte32_pae_l2idx(gva)];
223 if ((pte & PG_V) == 0)
224 return -1;
225 if ((pte & PG_u) == 0)
226 *prot &= ~NVMM_PROT_USER;
227 if ((pte & PG_KW) == 0)
228 *prot &= ~NVMM_PROT_WRITE;
229 if (pte & PG_NX)
230 *prot &= ~NVMM_PROT_EXEC;
231 if (pte & PG_PS) {
232 *gpa = (pte & PTE32_PAE_L2_FRAME);
233 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
234 return 0;
235 }
236
237 /* Parse L1. */
238 L1gpa = (pte & PG_FRAME);
239 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
240 return -1;
241 pdir = (pte_32bit_pae_t *)L1hva;
242 pte = pdir[pte32_pae_l1idx(gva)];
243 if ((pte & PG_V) == 0)
244 return -1;
245 if ((pte & PG_u) == 0)
246 *prot &= ~NVMM_PROT_USER;
247 if ((pte & PG_KW) == 0)
248 *prot &= ~NVMM_PROT_WRITE;
249 if (pte & PG_NX)
250 *prot &= ~NVMM_PROT_EXEC;
251 if (pte & PG_PS)
252 return -1;
253
254 *gpa = (pte & PG_FRAME);
255 return 0;
256 }
257
258 /* -------------------------------------------------------------------------- */
259
260 #define PTE64_L1_SHIFT 12
261 #define PTE64_L2_SHIFT 21
262 #define PTE64_L3_SHIFT 30
263 #define PTE64_L4_SHIFT 39
264
265 #define PTE64_L4_MASK 0x0000ff8000000000
266 #define PTE64_L3_MASK 0x0000007fc0000000
267 #define PTE64_L2_MASK 0x000000003fe00000
268 #define PTE64_L1_MASK 0x00000000001ff000
269
270 #define PTE64_L4_FRAME PTE64_L4_MASK
271 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
272 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
273 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
274
275 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
276 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
277 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
278 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
279
280 #define CR3_FRAME_64BIT PG_FRAME
281
282 typedef uint64_t pte_64bit_t;
283
284 static inline bool
285 x86_gva_64bit_canonical(gvaddr_t gva)
286 {
287 /* Bits 63:47 must have the same value. */
288 #define SIGN_EXTEND 0xffff800000000000ULL
289 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
290 }
291
292 static int
293 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
294 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
295 {
296 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
297 uintptr_t L4hva, L3hva, L2hva, L1hva;
298 pte_64bit_t *pdir, pte;
299 nvmm_prot_t pageprot;
300
301 /* We begin with an RWXU access. */
302 *prot = NVMM_PROT_ALL;
303
304 if (!x86_gva_64bit_canonical(gva))
305 return -1;
306
307 /* Parse L4. */
308 L4gpa = (cr3 & CR3_FRAME_64BIT);
309 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1)
310 return -1;
311 pdir = (pte_64bit_t *)L4hva;
312 pte = pdir[pte64_l4idx(gva)];
313 if ((pte & PG_V) == 0)
314 return -1;
315 if ((pte & PG_u) == 0)
316 *prot &= ~NVMM_PROT_USER;
317 if ((pte & PG_KW) == 0)
318 *prot &= ~NVMM_PROT_WRITE;
319 if (pte & PG_NX)
320 *prot &= ~NVMM_PROT_EXEC;
321 if (pte & PG_PS)
322 return -1;
323
324 /* Parse L3. */
325 L3gpa = (pte & PG_FRAME);
326 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
327 return -1;
328 pdir = (pte_64bit_t *)L3hva;
329 pte = pdir[pte64_l3idx(gva)];
330 if ((pte & PG_V) == 0)
331 return -1;
332 if ((pte & PG_u) == 0)
333 *prot &= ~NVMM_PROT_USER;
334 if ((pte & PG_KW) == 0)
335 *prot &= ~NVMM_PROT_WRITE;
336 if (pte & PG_NX)
337 *prot &= ~NVMM_PROT_EXEC;
338 if (pte & PG_PS) {
339 *gpa = (pte & PTE64_L3_FRAME);
340 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
341 return 0;
342 }
343
344 /* Parse L2. */
345 L2gpa = (pte & PG_FRAME);
346 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
347 return -1;
348 pdir = (pte_64bit_t *)L2hva;
349 pte = pdir[pte64_l2idx(gva)];
350 if ((pte & PG_V) == 0)
351 return -1;
352 if ((pte & PG_u) == 0)
353 *prot &= ~NVMM_PROT_USER;
354 if ((pte & PG_KW) == 0)
355 *prot &= ~NVMM_PROT_WRITE;
356 if (pte & PG_NX)
357 *prot &= ~NVMM_PROT_EXEC;
358 if (pte & PG_PS) {
359 *gpa = (pte & PTE64_L2_FRAME);
360 *gpa = *gpa + (gva & PTE64_L1_MASK);
361 return 0;
362 }
363
364 /* Parse L1. */
365 L1gpa = (pte & PG_FRAME);
366 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
367 return -1;
368 pdir = (pte_64bit_t *)L1hva;
369 pte = pdir[pte64_l1idx(gva)];
370 if ((pte & PG_V) == 0)
371 return -1;
372 if ((pte & PG_u) == 0)
373 *prot &= ~NVMM_PROT_USER;
374 if ((pte & PG_KW) == 0)
375 *prot &= ~NVMM_PROT_WRITE;
376 if (pte & PG_NX)
377 *prot &= ~NVMM_PROT_EXEC;
378 if (pte & PG_PS)
379 return -1;
380
381 *gpa = (pte & PG_FRAME);
382 return 0;
383 }
384
385 static inline int
386 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
387 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
388 {
389 bool is_pae, is_lng, has_pse;
390 uint64_t cr3;
391 size_t off;
392 int ret;
393
394 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
395 /* No paging. */
396 *prot = NVMM_PROT_ALL;
397 *gpa = gva;
398 return 0;
399 }
400
401 off = (gva & PAGE_MASK);
402 gva &= ~PAGE_MASK;
403
404 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
405 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
406 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
407 cr3 = state->crs[NVMM_X64_CR_CR3];
408
409 if (is_pae && is_lng) {
410 /* 64bit */
411 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
412 } else if (is_pae && !is_lng) {
413 /* 32bit PAE */
414 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
415 } else if (!is_pae && !is_lng) {
416 /* 32bit */
417 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
418 } else {
419 ret = -1;
420 }
421
422 if (ret == -1) {
423 errno = EFAULT;
424 }
425
426 *gpa = *gpa + off;
427
428 return ret;
429 }
430
431 int
432 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
433 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
434 {
435 struct nvmm_x64_state state;
436 int ret;
437
438 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
439 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
440 if (ret == -1)
441 return -1;
442
443 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
444 }
445
446 /* -------------------------------------------------------------------------- */
447
448 static inline bool
449 is_long_mode(struct nvmm_x64_state *state)
450 {
451 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
452 }
453
454 static inline bool
455 is_64bit(struct nvmm_x64_state *state)
456 {
457 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
458 }
459
460 static inline bool
461 is_32bit(struct nvmm_x64_state *state)
462 {
463 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
464 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
465 }
466
467 static inline bool
468 is_16bit(struct nvmm_x64_state *state)
469 {
470 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
471 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
472 }
473
474 static int
475 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
476 {
477 uint64_t limit;
478
479 /*
480 * This is incomplete. We should check topdown, etc, really that's
481 * tiring.
482 */
483 if (__predict_false(!seg->attrib.p)) {
484 goto error;
485 }
486
487 limit = (uint64_t)seg->limit + 1;
488 if (__predict_true(seg->attrib.g)) {
489 limit *= PAGE_SIZE;
490 }
491
492 if (__predict_false(gva + size > limit)) {
493 goto error;
494 }
495
496 return 0;
497
498 error:
499 errno = EFAULT;
500 return -1;
501 }
502
503 static inline void
504 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
505 {
506 *gva += seg->base;
507 }
508
509 static inline uint64_t
510 size_to_mask(size_t size)
511 {
512 switch (size) {
513 case 1:
514 return 0x00000000000000FF;
515 case 2:
516 return 0x000000000000FFFF;
517 case 4:
518 return 0x00000000FFFFFFFF;
519 case 8:
520 default:
521 return 0xFFFFFFFFFFFFFFFF;
522 }
523 }
524
525 static uint64_t
526 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
527 {
528 uint64_t mask, cnt;
529
530 mask = size_to_mask(adsize);
531 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
532
533 return cnt;
534 }
535
536 static void
537 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
538 {
539 uint64_t mask;
540
541 /* XXX: should we zero-extend? */
542 mask = size_to_mask(adsize);
543 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
544 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
545 }
546
547 static int
548 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
549 gvaddr_t gva, uint8_t *data, size_t size)
550 {
551 struct nvmm_mem mem;
552 nvmm_prot_t prot;
553 gpaddr_t gpa;
554 uintptr_t hva;
555 bool is_mmio;
556 int ret, remain;
557
558 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
559 if (__predict_false(ret == -1)) {
560 return -1;
561 }
562 if (__predict_false(!(prot & NVMM_PROT_READ))) {
563 errno = EFAULT;
564 return -1;
565 }
566
567 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
568 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
569 } else {
570 remain = 0;
571 }
572 size -= remain;
573
574 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
575 is_mmio = (ret == -1);
576
577 if (is_mmio) {
578 mem.data = data;
579 mem.gpa = gpa;
580 mem.write = false;
581 mem.size = size;
582 (*__callbacks.mem)(&mem);
583 } else {
584 if (__predict_false(!(prot & NVMM_PROT_READ))) {
585 errno = EFAULT;
586 return -1;
587 }
588 memcpy(data, (uint8_t *)hva, size);
589 }
590
591 if (remain > 0) {
592 ret = read_guest_memory(mach, state, gva + size,
593 data + size, remain);
594 } else {
595 ret = 0;
596 }
597
598 return ret;
599 }
600
601 static int
602 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
603 gvaddr_t gva, uint8_t *data, size_t size)
604 {
605 struct nvmm_mem mem;
606 nvmm_prot_t prot;
607 gpaddr_t gpa;
608 uintptr_t hva;
609 bool is_mmio;
610 int ret, remain;
611
612 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
613 if (__predict_false(ret == -1)) {
614 return -1;
615 }
616 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
617 errno = EFAULT;
618 return -1;
619 }
620
621 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
622 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
623 } else {
624 remain = 0;
625 }
626 size -= remain;
627
628 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
629 is_mmio = (ret == -1);
630
631 if (is_mmio) {
632 mem.data = data;
633 mem.gpa = gpa;
634 mem.write = true;
635 mem.size = size;
636 (*__callbacks.mem)(&mem);
637 } else {
638 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
639 errno = EFAULT;
640 return -1;
641 }
642 memcpy((uint8_t *)hva, data, size);
643 }
644
645 if (remain > 0) {
646 ret = write_guest_memory(mach, state, gva + size,
647 data + size, remain);
648 } else {
649 ret = 0;
650 }
651
652 return ret;
653 }
654
655 /* -------------------------------------------------------------------------- */
656
657 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
658
659 #define NVMM_IO_BATCH_SIZE 32
660
661 static int
662 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
663 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
664 {
665 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
666 size_t i, iosize, iocnt;
667 int ret;
668
669 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
670 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
671 iocnt = iosize / io->size;
672
673 io->data = iobuf;
674
675 if (!io->in) {
676 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
677 if (ret == -1)
678 return -1;
679 }
680
681 for (i = 0; i < iocnt; i++) {
682 (*__callbacks.io)(io);
683 io->data += io->size;
684 }
685
686 if (io->in) {
687 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
688 if (ret == -1)
689 return -1;
690 }
691
692 return iocnt;
693 }
694
695 int
696 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
697 struct nvmm_exit *exit)
698 {
699 struct nvmm_x64_state state;
700 struct nvmm_io io;
701 uint64_t cnt = 0; /* GCC */
702 uint8_t iobuf[8];
703 int iocnt = 1;
704 gvaddr_t gva = 0; /* GCC */
705 int reg = 0; /* GCC */
706 int ret, seg;
707 bool psld = false;
708
709 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
710 errno = EINVAL;
711 return -1;
712 }
713
714 io.port = exit->u.io.port;
715 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
716 io.size = exit->u.io.operand_size;
717 io.data = iobuf;
718
719 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
720 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
721 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
722 if (ret == -1)
723 return -1;
724
725 if (exit->u.io.rep) {
726 cnt = rep_get_cnt(&state, exit->u.io.address_size);
727 if (__predict_false(cnt == 0)) {
728 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
729 goto out;
730 }
731 }
732
733 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
734 psld = true;
735 }
736
737 /*
738 * Determine GVA.
739 */
740 if (exit->u.io.str) {
741 if (io.in) {
742 reg = NVMM_X64_GPR_RDI;
743 } else {
744 reg = NVMM_X64_GPR_RSI;
745 }
746
747 gva = state.gprs[reg];
748 gva &= size_to_mask(exit->u.io.address_size);
749
750 if (exit->u.io.seg != -1) {
751 seg = exit->u.io.seg;
752 } else {
753 if (io.in) {
754 seg = NVMM_X64_SEG_ES;
755 } else {
756 seg = fetch_segment(mach, &state);
757 if (seg == -1)
758 return -1;
759 }
760 }
761
762 if (__predict_true(is_long_mode(&state))) {
763 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
764 segment_apply(&state.segs[seg], &gva);
765 }
766 } else {
767 ret = segment_check(&state.segs[seg], gva, io.size);
768 if (ret == -1)
769 return -1;
770 segment_apply(&state.segs[seg], &gva);
771 }
772
773 if (exit->u.io.rep && !psld) {
774 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
775 if (iocnt == -1)
776 return -1;
777 goto done;
778 }
779 }
780
781 if (!io.in) {
782 if (!exit->u.io.str) {
783 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
784 } else {
785 ret = read_guest_memory(mach, &state, gva, io.data,
786 io.size);
787 if (ret == -1)
788 return -1;
789 }
790 }
791
792 (*__callbacks.io)(&io);
793
794 if (io.in) {
795 if (!exit->u.io.str) {
796 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
797 if (io.size == 4) {
798 /* Zero-extend to 64 bits. */
799 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
800 }
801 } else {
802 ret = write_guest_memory(mach, &state, gva, io.data,
803 io.size);
804 if (ret == -1)
805 return -1;
806 }
807 }
808
809 done:
810 if (exit->u.io.str) {
811 if (__predict_false(psld)) {
812 state.gprs[reg] -= iocnt * io.size;
813 } else {
814 state.gprs[reg] += iocnt * io.size;
815 }
816 }
817
818 if (exit->u.io.rep) {
819 cnt -= iocnt;
820 rep_set_cnt(&state, exit->u.io.address_size, cnt);
821 if (cnt == 0) {
822 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
823 }
824 } else {
825 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
826 }
827
828 out:
829 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
830 if (ret == -1)
831 return -1;
832
833 return 0;
834 }
835
836 /* -------------------------------------------------------------------------- */
837
838 struct x86_emul {
839 bool read;
840 bool notouch;
841 void (*func)(struct nvmm_mem *, uint64_t *);
842 };
843
844 static void x86_func_or(struct nvmm_mem *, uint64_t *);
845 static void x86_func_and(struct nvmm_mem *, uint64_t *);
846 static void x86_func_sub(struct nvmm_mem *, uint64_t *);
847 static void x86_func_xor(struct nvmm_mem *, uint64_t *);
848 static void x86_func_cmp(struct nvmm_mem *, uint64_t *);
849 static void x86_func_test(struct nvmm_mem *, uint64_t *);
850 static void x86_func_mov(struct nvmm_mem *, uint64_t *);
851 static void x86_func_stos(struct nvmm_mem *, uint64_t *);
852 static void x86_func_lods(struct nvmm_mem *, uint64_t *);
853 static void x86_func_movs(struct nvmm_mem *, uint64_t *);
854
855 static const struct x86_emul x86_emul_or = {
856 .read = true,
857 .func = x86_func_or
858 };
859
860 static const struct x86_emul x86_emul_and = {
861 .read = true,
862 .func = x86_func_and
863 };
864
865 static const struct x86_emul x86_emul_sub = {
866 .read = true,
867 .func = x86_func_sub
868 };
869
870 static const struct x86_emul x86_emul_xor = {
871 .read = true,
872 .func = x86_func_xor
873 };
874
875 static const struct x86_emul x86_emul_cmp = {
876 .notouch = true,
877 .func = x86_func_cmp
878 };
879
880 static const struct x86_emul x86_emul_test = {
881 .notouch = true,
882 .func = x86_func_test
883 };
884
885 static const struct x86_emul x86_emul_mov = {
886 .func = x86_func_mov
887 };
888
889 static const struct x86_emul x86_emul_stos = {
890 .func = x86_func_stos
891 };
892
893 static const struct x86_emul x86_emul_lods = {
894 .func = x86_func_lods
895 };
896
897 static const struct x86_emul x86_emul_movs = {
898 .func = x86_func_movs
899 };
900
901 /* Legacy prefixes. */
902 #define LEG_LOCK 0xF0
903 #define LEG_REPN 0xF2
904 #define LEG_REP 0xF3
905 #define LEG_OVR_CS 0x2E
906 #define LEG_OVR_SS 0x36
907 #define LEG_OVR_DS 0x3E
908 #define LEG_OVR_ES 0x26
909 #define LEG_OVR_FS 0x64
910 #define LEG_OVR_GS 0x65
911 #define LEG_OPR_OVR 0x66
912 #define LEG_ADR_OVR 0x67
913
914 struct x86_legpref {
915 bool opr_ovr:1;
916 bool adr_ovr:1;
917 bool rep:1;
918 bool repn:1;
919 int8_t seg;
920 };
921
922 struct x86_rexpref {
923 bool b:1;
924 bool x:1;
925 bool r:1;
926 bool w:1;
927 bool present:1;
928 };
929
930 struct x86_reg {
931 int num; /* NVMM GPR state index */
932 uint64_t mask;
933 };
934
935 enum x86_disp_type {
936 DISP_NONE,
937 DISP_0,
938 DISP_1,
939 DISP_4
940 };
941
942 struct x86_disp {
943 enum x86_disp_type type;
944 uint64_t data; /* 4 bytes, but can be sign-extended */
945 };
946
947 enum REGMODRM__Mod {
948 MOD_DIS0, /* also, register indirect */
949 MOD_DIS1,
950 MOD_DIS4,
951 MOD_REG
952 };
953
954 enum REGMODRM__Reg {
955 REG_000, /* these fields are indexes to the register map */
956 REG_001,
957 REG_010,
958 REG_011,
959 REG_100,
960 REG_101,
961 REG_110,
962 REG_111
963 };
964
965 enum REGMODRM__Rm {
966 RM_000, /* reg */
967 RM_001, /* reg */
968 RM_010, /* reg */
969 RM_011, /* reg */
970 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
971 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
972 RM_110,
973 RM_111
974 };
975
976 struct x86_regmodrm {
977 uint8_t mod:2;
978 uint8_t reg:3;
979 uint8_t rm:3;
980 };
981
982 struct x86_immediate {
983 uint64_t data;
984 };
985
986 struct x86_sib {
987 uint8_t scale;
988 const struct x86_reg *idx;
989 const struct x86_reg *bas;
990 };
991
992 enum x86_store_type {
993 STORE_NONE,
994 STORE_REG,
995 STORE_IMM,
996 STORE_SIB,
997 STORE_DMO
998 };
999
1000 struct x86_store {
1001 enum x86_store_type type;
1002 union {
1003 const struct x86_reg *reg;
1004 struct x86_immediate imm;
1005 struct x86_sib sib;
1006 uint64_t dmo;
1007 } u;
1008 struct x86_disp disp;
1009 int hardseg;
1010 };
1011
1012 struct x86_instr {
1013 uint8_t len;
1014 struct x86_legpref legpref;
1015 struct x86_rexpref rexpref;
1016 struct x86_regmodrm regmodrm;
1017 uint8_t operand_size;
1018 uint8_t address_size;
1019 uint64_t zeroextend_mask;
1020
1021 const struct x86_opcode *opcode;
1022 const struct x86_emul *emul;
1023
1024 struct x86_store src;
1025 struct x86_store dst;
1026 struct x86_store *strm;
1027 };
1028
1029 struct x86_decode_fsm {
1030 /* vcpu */
1031 bool is64bit;
1032 bool is32bit;
1033 bool is16bit;
1034
1035 /* fsm */
1036 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1037 uint8_t *buf;
1038 uint8_t *end;
1039 };
1040
1041 struct x86_opcode {
1042 bool valid:1;
1043 bool regmodrm:1;
1044 bool regtorm:1;
1045 bool dmo:1;
1046 bool todmo:1;
1047 bool movs:1;
1048 bool stos:1;
1049 bool lods:1;
1050 bool szoverride:1;
1051 bool group1:1;
1052 bool group3:1;
1053 bool group11:1;
1054 bool immediate:1;
1055 uint8_t defsize;
1056 uint8_t flags;
1057 const struct x86_emul *emul;
1058 };
1059
1060 struct x86_group_entry {
1061 const struct x86_emul *emul;
1062 };
1063
1064 #define OPSIZE_BYTE 0x01
1065 #define OPSIZE_WORD 0x02 /* 2 bytes */
1066 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1067 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1068
1069 #define FLAG_imm8 0x01
1070 #define FLAG_immz 0x02
1071 #define FLAG_ze 0x04
1072
1073 static const struct x86_group_entry group1[8] __cacheline_aligned = {
1074 [1] = { .emul = &x86_emul_or },
1075 [4] = { .emul = &x86_emul_and },
1076 [6] = { .emul = &x86_emul_xor },
1077 [7] = { .emul = &x86_emul_cmp }
1078 };
1079
1080 static const struct x86_group_entry group3[8] __cacheline_aligned = {
1081 [0] = { .emul = &x86_emul_test },
1082 [1] = { .emul = &x86_emul_test }
1083 };
1084
1085 static const struct x86_group_entry group11[8] __cacheline_aligned = {
1086 [0] = { .emul = &x86_emul_mov }
1087 };
1088
1089 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
1090 /*
1091 * Group1
1092 */
1093 [0x80] = {
1094 /* Eb, Ib */
1095 .valid = true,
1096 .regmodrm = true,
1097 .regtorm = true,
1098 .szoverride = false,
1099 .defsize = OPSIZE_BYTE,
1100 .group1 = true,
1101 .immediate = true,
1102 .emul = NULL /* group1 */
1103 },
1104 [0x81] = {
1105 /* Ev, Iz */
1106 .valid = true,
1107 .regmodrm = true,
1108 .regtorm = true,
1109 .szoverride = true,
1110 .defsize = -1,
1111 .group1 = true,
1112 .immediate = true,
1113 .flags = FLAG_immz,
1114 .emul = NULL /* group1 */
1115 },
1116 [0x83] = {
1117 /* Ev, Ib */
1118 .valid = true,
1119 .regmodrm = true,
1120 .regtorm = true,
1121 .szoverride = true,
1122 .defsize = -1,
1123 .group1 = true,
1124 .immediate = true,
1125 .flags = FLAG_imm8,
1126 .emul = NULL /* group1 */
1127 },
1128
1129 /*
1130 * Group3
1131 */
1132 [0xF6] = {
1133 /* Eb, Ib */
1134 .valid = true,
1135 .regmodrm = true,
1136 .regtorm = true,
1137 .szoverride = false,
1138 .defsize = OPSIZE_BYTE,
1139 .group3 = true,
1140 .immediate = true,
1141 .emul = NULL /* group3 */
1142 },
1143 [0xF7] = {
1144 /* Ev, Iz */
1145 .valid = true,
1146 .regmodrm = true,
1147 .regtorm = true,
1148 .szoverride = true,
1149 .defsize = -1,
1150 .group3 = true,
1151 .immediate = true,
1152 .flags = FLAG_immz,
1153 .emul = NULL /* group3 */
1154 },
1155
1156 /*
1157 * Group11
1158 */
1159 [0xC6] = {
1160 /* Eb, Ib */
1161 .valid = true,
1162 .regmodrm = true,
1163 .regtorm = true,
1164 .szoverride = false,
1165 .defsize = OPSIZE_BYTE,
1166 .group11 = true,
1167 .immediate = true,
1168 .emul = NULL /* group11 */
1169 },
1170 [0xC7] = {
1171 /* Ev, Iz */
1172 .valid = true,
1173 .regmodrm = true,
1174 .regtorm = true,
1175 .szoverride = true,
1176 .defsize = -1,
1177 .group11 = true,
1178 .immediate = true,
1179 .flags = FLAG_immz,
1180 .emul = NULL /* group11 */
1181 },
1182
1183 /*
1184 * OR
1185 */
1186 [0x08] = {
1187 /* Eb, Gb */
1188 .valid = true,
1189 .regmodrm = true,
1190 .regtorm = true,
1191 .szoverride = false,
1192 .defsize = OPSIZE_BYTE,
1193 .emul = &x86_emul_or
1194 },
1195 [0x09] = {
1196 /* Ev, Gv */
1197 .valid = true,
1198 .regmodrm = true,
1199 .regtorm = true,
1200 .szoverride = true,
1201 .defsize = -1,
1202 .emul = &x86_emul_or
1203 },
1204 [0x0A] = {
1205 /* Gb, Eb */
1206 .valid = true,
1207 .regmodrm = true,
1208 .regtorm = false,
1209 .szoverride = false,
1210 .defsize = OPSIZE_BYTE,
1211 .emul = &x86_emul_or
1212 },
1213 [0x0B] = {
1214 /* Gv, Ev */
1215 .valid = true,
1216 .regmodrm = true,
1217 .regtorm = false,
1218 .szoverride = true,
1219 .defsize = -1,
1220 .emul = &x86_emul_or
1221 },
1222
1223 /*
1224 * AND
1225 */
1226 [0x20] = {
1227 /* Eb, Gb */
1228 .valid = true,
1229 .regmodrm = true,
1230 .regtorm = true,
1231 .szoverride = false,
1232 .defsize = OPSIZE_BYTE,
1233 .emul = &x86_emul_and
1234 },
1235 [0x21] = {
1236 /* Ev, Gv */
1237 .valid = true,
1238 .regmodrm = true,
1239 .regtorm = true,
1240 .szoverride = true,
1241 .defsize = -1,
1242 .emul = &x86_emul_and
1243 },
1244 [0x22] = {
1245 /* Gb, Eb */
1246 .valid = true,
1247 .regmodrm = true,
1248 .regtorm = false,
1249 .szoverride = false,
1250 .defsize = OPSIZE_BYTE,
1251 .emul = &x86_emul_and
1252 },
1253 [0x23] = {
1254 /* Gv, Ev */
1255 .valid = true,
1256 .regmodrm = true,
1257 .regtorm = false,
1258 .szoverride = true,
1259 .defsize = -1,
1260 .emul = &x86_emul_and
1261 },
1262
1263 /*
1264 * SUB
1265 */
1266 [0x28] = {
1267 /* Eb, Gb */
1268 .valid = true,
1269 .regmodrm = true,
1270 .regtorm = true,
1271 .szoverride = false,
1272 .defsize = OPSIZE_BYTE,
1273 .emul = &x86_emul_sub
1274 },
1275 [0x29] = {
1276 /* Ev, Gv */
1277 .valid = true,
1278 .regmodrm = true,
1279 .regtorm = true,
1280 .szoverride = true,
1281 .defsize = -1,
1282 .emul = &x86_emul_sub
1283 },
1284 [0x2A] = {
1285 /* Gb, Eb */
1286 .valid = true,
1287 .regmodrm = true,
1288 .regtorm = false,
1289 .szoverride = false,
1290 .defsize = OPSIZE_BYTE,
1291 .emul = &x86_emul_sub
1292 },
1293 [0x2B] = {
1294 /* Gv, Ev */
1295 .valid = true,
1296 .regmodrm = true,
1297 .regtorm = false,
1298 .szoverride = true,
1299 .defsize = -1,
1300 .emul = &x86_emul_sub
1301 },
1302
1303 /*
1304 * XOR
1305 */
1306 [0x30] = {
1307 /* Eb, Gb */
1308 .valid = true,
1309 .regmodrm = true,
1310 .regtorm = true,
1311 .szoverride = false,
1312 .defsize = OPSIZE_BYTE,
1313 .emul = &x86_emul_xor
1314 },
1315 [0x31] = {
1316 /* Ev, Gv */
1317 .valid = true,
1318 .regmodrm = true,
1319 .regtorm = true,
1320 .szoverride = true,
1321 .defsize = -1,
1322 .emul = &x86_emul_xor
1323 },
1324 [0x32] = {
1325 /* Gb, Eb */
1326 .valid = true,
1327 .regmodrm = true,
1328 .regtorm = false,
1329 .szoverride = false,
1330 .defsize = OPSIZE_BYTE,
1331 .emul = &x86_emul_xor
1332 },
1333 [0x33] = {
1334 /* Gv, Ev */
1335 .valid = true,
1336 .regmodrm = true,
1337 .regtorm = false,
1338 .szoverride = true,
1339 .defsize = -1,
1340 .emul = &x86_emul_xor
1341 },
1342
1343 /*
1344 * MOV
1345 */
1346 [0x88] = {
1347 /* Eb, Gb */
1348 .valid = true,
1349 .regmodrm = true,
1350 .regtorm = true,
1351 .szoverride = false,
1352 .defsize = OPSIZE_BYTE,
1353 .emul = &x86_emul_mov
1354 },
1355 [0x89] = {
1356 /* Ev, Gv */
1357 .valid = true,
1358 .regmodrm = true,
1359 .regtorm = true,
1360 .szoverride = true,
1361 .defsize = -1,
1362 .emul = &x86_emul_mov
1363 },
1364 [0x8A] = {
1365 /* Gb, Eb */
1366 .valid = true,
1367 .regmodrm = true,
1368 .regtorm = false,
1369 .szoverride = false,
1370 .defsize = OPSIZE_BYTE,
1371 .emul = &x86_emul_mov
1372 },
1373 [0x8B] = {
1374 /* Gv, Ev */
1375 .valid = true,
1376 .regmodrm = true,
1377 .regtorm = false,
1378 .szoverride = true,
1379 .defsize = -1,
1380 .emul = &x86_emul_mov
1381 },
1382 [0xA0] = {
1383 /* AL, Ob */
1384 .valid = true,
1385 .dmo = true,
1386 .todmo = false,
1387 .szoverride = false,
1388 .defsize = OPSIZE_BYTE,
1389 .emul = &x86_emul_mov
1390 },
1391 [0xA1] = {
1392 /* rAX, Ov */
1393 .valid = true,
1394 .dmo = true,
1395 .todmo = false,
1396 .szoverride = true,
1397 .defsize = -1,
1398 .emul = &x86_emul_mov
1399 },
1400 [0xA2] = {
1401 /* Ob, AL */
1402 .valid = true,
1403 .dmo = true,
1404 .todmo = true,
1405 .szoverride = false,
1406 .defsize = OPSIZE_BYTE,
1407 .emul = &x86_emul_mov
1408 },
1409 [0xA3] = {
1410 /* Ov, rAX */
1411 .valid = true,
1412 .dmo = true,
1413 .todmo = true,
1414 .szoverride = true,
1415 .defsize = -1,
1416 .emul = &x86_emul_mov
1417 },
1418
1419 /*
1420 * MOVS
1421 */
1422 [0xA4] = {
1423 /* Yb, Xb */
1424 .valid = true,
1425 .movs = true,
1426 .szoverride = false,
1427 .defsize = OPSIZE_BYTE,
1428 .emul = &x86_emul_movs
1429 },
1430 [0xA5] = {
1431 /* Yv, Xv */
1432 .valid = true,
1433 .movs = true,
1434 .szoverride = true,
1435 .defsize = -1,
1436 .emul = &x86_emul_movs
1437 },
1438
1439 /*
1440 * STOS
1441 */
1442 [0xAA] = {
1443 /* Yb, AL */
1444 .valid = true,
1445 .stos = true,
1446 .szoverride = false,
1447 .defsize = OPSIZE_BYTE,
1448 .emul = &x86_emul_stos
1449 },
1450 [0xAB] = {
1451 /* Yv, rAX */
1452 .valid = true,
1453 .stos = true,
1454 .szoverride = true,
1455 .defsize = -1,
1456 .emul = &x86_emul_stos
1457 },
1458
1459 /*
1460 * LODS
1461 */
1462 [0xAC] = {
1463 /* AL, Xb */
1464 .valid = true,
1465 .lods = true,
1466 .szoverride = false,
1467 .defsize = OPSIZE_BYTE,
1468 .emul = &x86_emul_lods
1469 },
1470 [0xAD] = {
1471 /* rAX, Xv */
1472 .valid = true,
1473 .lods = true,
1474 .szoverride = true,
1475 .defsize = -1,
1476 .emul = &x86_emul_lods
1477 },
1478 };
1479
1480 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
1481 /*
1482 * MOVZX
1483 */
1484 [0xB6] = {
1485 /* Gv, Eb */
1486 .valid = true,
1487 .regmodrm = true,
1488 .regtorm = false,
1489 .szoverride = true,
1490 .defsize = OPSIZE_BYTE,
1491 .flags = FLAG_ze,
1492 .emul = &x86_emul_mov
1493 },
1494 [0xB7] = {
1495 /* Gv, Ew */
1496 .valid = true,
1497 .regmodrm = true,
1498 .regtorm = false,
1499 .szoverride = true,
1500 .defsize = OPSIZE_WORD,
1501 .flags = FLAG_ze,
1502 .emul = &x86_emul_mov
1503 },
1504 };
1505
1506 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1507
1508 /* [REX-present][enc][opsize] */
1509 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
1510 [false] = {
1511 /* No REX prefix. */
1512 [0b00] = {
1513 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1514 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1515 [2] = { -1, 0 },
1516 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1517 [4] = { -1, 0 },
1518 [5] = { -1, 0 },
1519 [6] = { -1, 0 },
1520 [7] = { -1, 0 },
1521 },
1522 [0b01] = {
1523 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1524 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1525 [2] = { -1, 0 },
1526 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1527 [4] = { -1, 0 },
1528 [5] = { -1, 0 },
1529 [6] = { -1, 0 },
1530 [7] = { -1, 0 },
1531 },
1532 [0b10] = {
1533 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1534 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1535 [2] = { -1, 0 },
1536 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1537 [4] = { -1, 0 },
1538 [5] = { -1, 0 },
1539 [6] = { -1, 0 },
1540 [7] = { -1, 0 },
1541 },
1542 [0b11] = {
1543 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1544 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1545 [2] = { -1, 0 },
1546 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1547 [4] = { -1, 0 },
1548 [5] = { -1, 0 },
1549 [6] = { -1, 0 },
1550 [7] = { -1, 0 },
1551 }
1552 },
1553 [true] = {
1554 /* Has REX prefix. */
1555 [0b00] = {
1556 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1557 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1558 [2] = { -1, 0 },
1559 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1560 [4] = { -1, 0 },
1561 [5] = { -1, 0 },
1562 [6] = { -1, 0 },
1563 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1564 },
1565 [0b01] = {
1566 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1567 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1568 [2] = { -1, 0 },
1569 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1570 [4] = { -1, 0 },
1571 [5] = { -1, 0 },
1572 [6] = { -1, 0 },
1573 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1574 },
1575 [0b10] = {
1576 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1577 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1578 [2] = { -1, 0 },
1579 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1580 [4] = { -1, 0 },
1581 [5] = { -1, 0 },
1582 [6] = { -1, 0 },
1583 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1584 },
1585 [0b11] = {
1586 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1587 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1588 [2] = { -1, 0 },
1589 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1590 [4] = { -1, 0 },
1591 [5] = { -1, 0 },
1592 [6] = { -1, 0 },
1593 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1594 }
1595 }
1596 };
1597
1598 /* [depends][enc][size] */
1599 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
1600 [false] = {
1601 /* Not extended. */
1602 [0b000] = {
1603 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1604 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1605 [2] = { -1, 0 },
1606 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1607 [4] = { -1, 0 },
1608 [5] = { -1, 0 },
1609 [6] = { -1, 0 },
1610 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1611 },
1612 [0b001] = {
1613 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1614 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1615 [2] = { -1, 0 },
1616 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1617 [4] = { -1, 0 },
1618 [5] = { -1, 0 },
1619 [6] = { -1, 0 },
1620 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1621 },
1622 [0b010] = {
1623 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1624 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1625 [2] = { -1, 0 },
1626 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1627 [4] = { -1, 0 },
1628 [5] = { -1, 0 },
1629 [6] = { -1, 0 },
1630 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1631 },
1632 [0b011] = {
1633 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1634 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1635 [2] = { -1, 0 },
1636 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1637 [4] = { -1, 0 },
1638 [5] = { -1, 0 },
1639 [6] = { -1, 0 },
1640 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1641 },
1642 [0b100] = {
1643 [0] = { -1, 0 }, /* SPECIAL */
1644 [1] = { -1, 0 }, /* SPECIAL */
1645 [2] = { -1, 0 },
1646 [3] = { -1, 0 }, /* SPECIAL */
1647 [4] = { -1, 0 },
1648 [5] = { -1, 0 },
1649 [6] = { -1, 0 },
1650 [7] = { -1, 0 }, /* SPECIAL */
1651 },
1652 [0b101] = {
1653 [0] = { -1, 0 }, /* SPECIAL */
1654 [1] = { -1, 0 }, /* SPECIAL */
1655 [2] = { -1, 0 },
1656 [3] = { -1, 0 }, /* SPECIAL */
1657 [4] = { -1, 0 },
1658 [5] = { -1, 0 },
1659 [6] = { -1, 0 },
1660 [7] = { -1, 0 }, /* SPECIAL */
1661 },
1662 [0b110] = {
1663 [0] = { -1, 0 }, /* SPECIAL */
1664 [1] = { -1, 0 }, /* SPECIAL */
1665 [2] = { -1, 0 },
1666 [3] = { -1, 0 }, /* SPECIAL */
1667 [4] = { -1, 0 },
1668 [5] = { -1, 0 },
1669 [6] = { -1, 0 },
1670 [7] = { -1, 0 }, /* SPECIAL */
1671 },
1672 [0b111] = {
1673 [0] = { -1, 0 }, /* SPECIAL */
1674 [1] = { -1, 0 }, /* SPECIAL */
1675 [2] = { -1, 0 },
1676 [3] = { -1, 0 }, /* SPECIAL */
1677 [4] = { -1, 0 },
1678 [5] = { -1, 0 },
1679 [6] = { -1, 0 },
1680 [7] = { -1, 0 }, /* SPECIAL */
1681 },
1682 },
1683 [true] = {
1684 /* Extended. */
1685 [0b000] = {
1686 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1687 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1688 [2] = { -1, 0 },
1689 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1690 [4] = { -1, 0 },
1691 [5] = { -1, 0 },
1692 [6] = { -1, 0 },
1693 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1694 },
1695 [0b001] = {
1696 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1697 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1698 [2] = { -1, 0 },
1699 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1700 [4] = { -1, 0 },
1701 [5] = { -1, 0 },
1702 [6] = { -1, 0 },
1703 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1704 },
1705 [0b010] = {
1706 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1707 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1708 [2] = { -1, 0 },
1709 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1710 [4] = { -1, 0 },
1711 [5] = { -1, 0 },
1712 [6] = { -1, 0 },
1713 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1714 },
1715 [0b011] = {
1716 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1717 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1718 [2] = { -1, 0 },
1719 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1720 [4] = { -1, 0 },
1721 [5] = { -1, 0 },
1722 [6] = { -1, 0 },
1723 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1724 },
1725 [0b100] = {
1726 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1727 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1728 [2] = { -1, 0 },
1729 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1730 [4] = { -1, 0 },
1731 [5] = { -1, 0 },
1732 [6] = { -1, 0 },
1733 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1734 },
1735 [0b101] = {
1736 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1737 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1738 [2] = { -1, 0 },
1739 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1740 [4] = { -1, 0 },
1741 [5] = { -1, 0 },
1742 [6] = { -1, 0 },
1743 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1744 },
1745 [0b110] = {
1746 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1747 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1748 [2] = { -1, 0 },
1749 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1750 [4] = { -1, 0 },
1751 [5] = { -1, 0 },
1752 [6] = { -1, 0 },
1753 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1754 },
1755 [0b111] = {
1756 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1757 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1758 [2] = { -1, 0 },
1759 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1760 [4] = { -1, 0 },
1761 [5] = { -1, 0 },
1762 [6] = { -1, 0 },
1763 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1764 },
1765 }
1766 };
1767
1768 static int
1769 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1770 {
1771 fsm->fn = NULL;
1772 return -1;
1773 }
1774
1775 static int
1776 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1777 {
1778 if (fsm->buf + n > fsm->end) {
1779 return -1;
1780 }
1781 memcpy(bytes, fsm->buf, n);
1782 return 0;
1783 }
1784
1785 static inline void
1786 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1787 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1788 {
1789 fsm->buf += n;
1790 if (fsm->buf > fsm->end) {
1791 fsm->fn = node_overflow;
1792 } else {
1793 fsm->fn = fn;
1794 }
1795 }
1796
1797 static const struct x86_reg *
1798 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1799 {
1800 enc &= 0b11;
1801 if (regsize == 8) {
1802 /* May be 64bit without REX */
1803 return &gpr_map__special[1][enc][regsize-1];
1804 }
1805 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1806 }
1807
1808 /*
1809 * Special node, for MOVS. Fake two displacements of zero on the source and
1810 * destination registers.
1811 */
1812 static int
1813 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1814 {
1815 size_t adrsize;
1816
1817 adrsize = instr->address_size;
1818
1819 /* DS:RSI */
1820 instr->src.type = STORE_REG;
1821 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1822 instr->src.disp.type = DISP_0;
1823
1824 /* ES:RDI, force ES */
1825 instr->dst.type = STORE_REG;
1826 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1827 instr->dst.disp.type = DISP_0;
1828 instr->dst.hardseg = NVMM_X64_SEG_ES;
1829
1830 fsm_advance(fsm, 0, NULL);
1831
1832 return 0;
1833 }
1834
1835 /*
1836 * Special node, for STOS and LODS. Fake a displacement of zero on the
1837 * destination register.
1838 */
1839 static int
1840 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1841 {
1842 const struct x86_opcode *opcode = instr->opcode;
1843 struct x86_store *stlo, *streg;
1844 size_t adrsize, regsize;
1845
1846 adrsize = instr->address_size;
1847 regsize = instr->operand_size;
1848
1849 if (opcode->stos) {
1850 streg = &instr->src;
1851 stlo = &instr->dst;
1852 } else {
1853 streg = &instr->dst;
1854 stlo = &instr->src;
1855 }
1856
1857 streg->type = STORE_REG;
1858 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1859
1860 stlo->type = STORE_REG;
1861 if (opcode->stos) {
1862 /* ES:RDI, force ES */
1863 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1864 stlo->hardseg = NVMM_X64_SEG_ES;
1865 } else {
1866 /* DS:RSI */
1867 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1868 }
1869 stlo->disp.type = DISP_0;
1870
1871 fsm_advance(fsm, 0, NULL);
1872
1873 return 0;
1874 }
1875
1876 static int
1877 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1878 {
1879 const struct x86_opcode *opcode = instr->opcode;
1880 struct x86_store *stdmo, *streg;
1881 size_t adrsize, regsize;
1882
1883 adrsize = instr->address_size;
1884 regsize = instr->operand_size;
1885
1886 if (opcode->todmo) {
1887 streg = &instr->src;
1888 stdmo = &instr->dst;
1889 } else {
1890 streg = &instr->dst;
1891 stdmo = &instr->src;
1892 }
1893
1894 streg->type = STORE_REG;
1895 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1896
1897 stdmo->type = STORE_DMO;
1898 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1899 return -1;
1900 }
1901 fsm_advance(fsm, adrsize, NULL);
1902
1903 return 0;
1904 }
1905
1906 static inline uint64_t
1907 sign_extend(uint64_t val, int size)
1908 {
1909 if (size == 1) {
1910 if (val & __BIT(7))
1911 val |= 0xFFFFFFFFFFFFFF00;
1912 } else if (size == 2) {
1913 if (val & __BIT(15))
1914 val |= 0xFFFFFFFFFFFF0000;
1915 } else if (size == 4) {
1916 if (val & __BIT(31))
1917 val |= 0xFFFFFFFF00000000;
1918 }
1919 return val;
1920 }
1921
1922 static int
1923 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1924 {
1925 const struct x86_opcode *opcode = instr->opcode;
1926 struct x86_store *store;
1927 uint8_t immsize;
1928 size_t sesize = 0;
1929
1930 /* The immediate is the source */
1931 store = &instr->src;
1932 immsize = instr->operand_size;
1933
1934 if (opcode->flags & FLAG_imm8) {
1935 sesize = immsize;
1936 immsize = 1;
1937 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1938 sesize = immsize;
1939 immsize = 4;
1940 }
1941
1942 store->type = STORE_IMM;
1943 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1944 return -1;
1945 }
1946 fsm_advance(fsm, immsize, NULL);
1947
1948 if (sesize != 0) {
1949 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1950 }
1951
1952 return 0;
1953 }
1954
1955 static int
1956 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1957 {
1958 const struct x86_opcode *opcode = instr->opcode;
1959 uint64_t data = 0;
1960 size_t n;
1961
1962 if (instr->strm->disp.type == DISP_1) {
1963 n = 1;
1964 } else { /* DISP4 */
1965 n = 4;
1966 }
1967
1968 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1969 return -1;
1970 }
1971
1972 if (__predict_true(fsm->is64bit)) {
1973 data = sign_extend(data, n);
1974 }
1975
1976 instr->strm->disp.data = data;
1977
1978 if (opcode->immediate) {
1979 fsm_advance(fsm, n, node_immediate);
1980 } else {
1981 fsm_advance(fsm, n, NULL);
1982 }
1983
1984 return 0;
1985 }
1986
1987 static const struct x86_reg *
1988 get_register_idx(struct x86_instr *instr, uint8_t index)
1989 {
1990 uint8_t enc = index;
1991 const struct x86_reg *reg;
1992 size_t regsize;
1993
1994 regsize = instr->address_size;
1995 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
1996
1997 if (reg->num == -1) {
1998 reg = resolve_special_register(instr, enc, regsize);
1999 }
2000
2001 return reg;
2002 }
2003
2004 static const struct x86_reg *
2005 get_register_bas(struct x86_instr *instr, uint8_t base)
2006 {
2007 uint8_t enc = base;
2008 const struct x86_reg *reg;
2009 size_t regsize;
2010
2011 regsize = instr->address_size;
2012 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2013 if (reg->num == -1) {
2014 reg = resolve_special_register(instr, enc, regsize);
2015 }
2016
2017 return reg;
2018 }
2019
2020 static int
2021 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2022 {
2023 const struct x86_opcode *opcode;
2024 uint8_t scale, index, base;
2025 bool noindex, nobase;
2026 uint8_t byte;
2027
2028 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2029 return -1;
2030 }
2031
2032 scale = ((byte & 0b11000000) >> 6);
2033 index = ((byte & 0b00111000) >> 3);
2034 base = ((byte & 0b00000111) >> 0);
2035
2036 opcode = instr->opcode;
2037
2038 noindex = false;
2039 nobase = false;
2040
2041 if (index == 0b100 && !instr->rexpref.x) {
2042 /* Special case: the index is null */
2043 noindex = true;
2044 }
2045
2046 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2047 /* Special case: the base is null + disp32 */
2048 instr->strm->disp.type = DISP_4;
2049 nobase = true;
2050 }
2051
2052 instr->strm->type = STORE_SIB;
2053 instr->strm->u.sib.scale = (1 << scale);
2054 if (!noindex)
2055 instr->strm->u.sib.idx = get_register_idx(instr, index);
2056 if (!nobase)
2057 instr->strm->u.sib.bas = get_register_bas(instr, base);
2058
2059 /* May have a displacement, or an immediate */
2060 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
2061 fsm_advance(fsm, 1, node_disp);
2062 } else if (opcode->immediate) {
2063 fsm_advance(fsm, 1, node_immediate);
2064 } else {
2065 fsm_advance(fsm, 1, NULL);
2066 }
2067
2068 return 0;
2069 }
2070
2071 static const struct x86_reg *
2072 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2073 {
2074 uint8_t enc = instr->regmodrm.reg;
2075 const struct x86_reg *reg;
2076 size_t regsize;
2077
2078 regsize = instr->operand_size;
2079
2080 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2081 if (reg->num == -1) {
2082 reg = resolve_special_register(instr, enc, regsize);
2083 }
2084
2085 return reg;
2086 }
2087
2088 static const struct x86_reg *
2089 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2090 {
2091 uint8_t enc = instr->regmodrm.rm;
2092 const struct x86_reg *reg;
2093 size_t regsize;
2094
2095 if (instr->strm->disp.type == DISP_NONE) {
2096 regsize = instr->operand_size;
2097 } else {
2098 /* Indirect access, the size is that of the address. */
2099 regsize = instr->address_size;
2100 }
2101
2102 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2103 if (reg->num == -1) {
2104 reg = resolve_special_register(instr, enc, regsize);
2105 }
2106
2107 return reg;
2108 }
2109
2110 static inline bool
2111 has_sib(struct x86_instr *instr)
2112 {
2113 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
2114 }
2115
2116 static inline bool
2117 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2118 {
2119 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2120 instr->regmodrm.rm == RM_RBP_DISP32);
2121 }
2122
2123 static inline bool
2124 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2125 {
2126 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2127 instr->regmodrm.rm == RM_RBP_DISP32);
2128 }
2129
2130 static enum x86_disp_type
2131 get_disp_type(struct x86_instr *instr)
2132 {
2133 switch (instr->regmodrm.mod) {
2134 case MOD_DIS0: /* indirect */
2135 return DISP_0;
2136 case MOD_DIS1: /* indirect+1 */
2137 return DISP_1;
2138 case MOD_DIS4: /* indirect+4 */
2139 return DISP_4;
2140 case MOD_REG: /* direct */
2141 default: /* gcc */
2142 return DISP_NONE;
2143 }
2144 }
2145
2146 static int
2147 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2148 {
2149 struct x86_store *strg, *strm;
2150 const struct x86_opcode *opcode;
2151 const struct x86_reg *reg;
2152 uint8_t byte;
2153
2154 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2155 return -1;
2156 }
2157
2158 opcode = instr->opcode;
2159
2160 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2161 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2162 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2163
2164 if (opcode->regtorm) {
2165 strg = &instr->src;
2166 strm = &instr->dst;
2167 } else { /* RM to REG */
2168 strm = &instr->src;
2169 strg = &instr->dst;
2170 }
2171
2172 /* Save for later use. */
2173 instr->strm = strm;
2174
2175 /*
2176 * Special cases: Groups. The REG field of REGMODRM is the index in
2177 * the group. op1 gets overwritten in the Immediate node, if any.
2178 */
2179 if (opcode->group1) {
2180 if (group1[instr->regmodrm.reg].emul == NULL) {
2181 return -1;
2182 }
2183 instr->emul = group1[instr->regmodrm.reg].emul;
2184 } else if (opcode->group3) {
2185 if (group3[instr->regmodrm.reg].emul == NULL) {
2186 return -1;
2187 }
2188 instr->emul = group3[instr->regmodrm.reg].emul;
2189 } else if (opcode->group11) {
2190 if (group11[instr->regmodrm.reg].emul == NULL) {
2191 return -1;
2192 }
2193 instr->emul = group11[instr->regmodrm.reg].emul;
2194 }
2195
2196 if (!opcode->immediate) {
2197 reg = get_register_reg(instr, opcode);
2198 if (reg == NULL) {
2199 return -1;
2200 }
2201 strg->type = STORE_REG;
2202 strg->u.reg = reg;
2203 }
2204
2205 /* The displacement applies to RM. */
2206 strm->disp.type = get_disp_type(instr);
2207
2208 if (has_sib(instr)) {
2209 /* Overwrites RM */
2210 fsm_advance(fsm, 1, node_sib);
2211 return 0;
2212 }
2213
2214 if (is_rip_relative(fsm, instr)) {
2215 /* Overwrites RM */
2216 strm->type = STORE_REG;
2217 strm->u.reg = &gpr_map__rip;
2218 strm->disp.type = DISP_4;
2219 fsm_advance(fsm, 1, node_disp);
2220 return 0;
2221 }
2222
2223 if (is_disp32_only(fsm, instr)) {
2224 /* Overwrites RM */
2225 strm->type = STORE_REG;
2226 strm->u.reg = NULL;
2227 strm->disp.type = DISP_4;
2228 fsm_advance(fsm, 1, node_disp);
2229 return 0;
2230 }
2231
2232 reg = get_register_rm(instr, opcode);
2233 if (reg == NULL) {
2234 return -1;
2235 }
2236 strm->type = STORE_REG;
2237 strm->u.reg = reg;
2238
2239 if (strm->disp.type == DISP_NONE) {
2240 /* Direct register addressing mode */
2241 if (opcode->immediate) {
2242 fsm_advance(fsm, 1, node_immediate);
2243 } else {
2244 fsm_advance(fsm, 1, NULL);
2245 }
2246 } else if (strm->disp.type == DISP_0) {
2247 /* Indirect register addressing mode */
2248 if (opcode->immediate) {
2249 fsm_advance(fsm, 1, node_immediate);
2250 } else {
2251 fsm_advance(fsm, 1, NULL);
2252 }
2253 } else {
2254 fsm_advance(fsm, 1, node_disp);
2255 }
2256
2257 return 0;
2258 }
2259
2260 static size_t
2261 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2262 {
2263 const struct x86_opcode *opcode = instr->opcode;
2264 int opsize;
2265
2266 /* Get the opsize */
2267 if (!opcode->szoverride) {
2268 opsize = opcode->defsize;
2269 } else if (instr->rexpref.present && instr->rexpref.w) {
2270 opsize = 8;
2271 } else {
2272 if (!fsm->is16bit) {
2273 if (instr->legpref.opr_ovr) {
2274 opsize = 2;
2275 } else {
2276 opsize = 4;
2277 }
2278 } else { /* 16bit */
2279 if (instr->legpref.opr_ovr) {
2280 opsize = 4;
2281 } else {
2282 opsize = 2;
2283 }
2284 }
2285 }
2286
2287 return opsize;
2288 }
2289
2290 static size_t
2291 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2292 {
2293 if (fsm->is64bit) {
2294 if (__predict_false(instr->legpref.adr_ovr)) {
2295 return 4;
2296 }
2297 return 8;
2298 }
2299
2300 if (fsm->is32bit) {
2301 if (__predict_false(instr->legpref.adr_ovr)) {
2302 return 2;
2303 }
2304 return 4;
2305 }
2306
2307 /* 16bit. */
2308 if (__predict_false(instr->legpref.adr_ovr)) {
2309 return 4;
2310 }
2311 return 2;
2312 }
2313
2314 static int
2315 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2316 {
2317 const struct x86_opcode *opcode;
2318 uint8_t byte;
2319
2320 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2321 return -1;
2322 }
2323
2324 opcode = &primary_opcode_table[byte];
2325 if (__predict_false(!opcode->valid)) {
2326 return -1;
2327 }
2328
2329 instr->opcode = opcode;
2330 instr->emul = opcode->emul;
2331 instr->operand_size = get_operand_size(fsm, instr);
2332 instr->address_size = get_address_size(fsm, instr);
2333
2334 if (fsm->is64bit && (instr->operand_size == 4)) {
2335 /* Zero-extend to 64 bits. */
2336 instr->zeroextend_mask = ~size_to_mask(4);
2337 }
2338
2339 if (opcode->regmodrm) {
2340 fsm_advance(fsm, 1, node_regmodrm);
2341 } else if (opcode->dmo) {
2342 /* Direct-Memory Offsets */
2343 fsm_advance(fsm, 1, node_dmo);
2344 } else if (opcode->stos || opcode->lods) {
2345 fsm_advance(fsm, 1, node_stlo);
2346 } else if (opcode->movs) {
2347 fsm_advance(fsm, 1, node_movs);
2348 } else {
2349 return -1;
2350 }
2351
2352 return 0;
2353 }
2354
2355 static int
2356 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2357 {
2358 const struct x86_opcode *opcode;
2359 uint8_t byte;
2360
2361 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2362 return -1;
2363 }
2364
2365 opcode = &secondary_opcode_table[byte];
2366 if (__predict_false(!opcode->valid)) {
2367 return -1;
2368 }
2369
2370 instr->opcode = opcode;
2371 instr->emul = opcode->emul;
2372 instr->operand_size = get_operand_size(fsm, instr);
2373 instr->address_size = get_address_size(fsm, instr);
2374
2375 if (fsm->is64bit && (instr->operand_size == 4)) {
2376 /* Zero-extend to 64 bits. */
2377 instr->zeroextend_mask = ~size_to_mask(4);
2378 }
2379
2380 if (opcode->flags & FLAG_ze) {
2381 /*
2382 * Compute the mask for zero-extend. Update the operand size,
2383 * we move fewer bytes.
2384 */
2385 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2386 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2387 instr->operand_size = opcode->defsize;
2388 }
2389
2390 if (opcode->regmodrm) {
2391 fsm_advance(fsm, 1, node_regmodrm);
2392 } else {
2393 return -1;
2394 }
2395
2396 return 0;
2397 }
2398
2399 static int
2400 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2401 {
2402 uint8_t byte;
2403
2404 #define ESCAPE 0x0F
2405 #define VEX_1 0xC5
2406 #define VEX_2 0xC4
2407 #define XOP 0x8F
2408
2409 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2410 return -1;
2411 }
2412
2413 /*
2414 * We don't take XOP. It is AMD-specific, and it was removed shortly
2415 * after being introduced.
2416 */
2417 if (byte == ESCAPE) {
2418 fsm_advance(fsm, 1, node_secondary_opcode);
2419 } else if (!instr->rexpref.present) {
2420 if (byte == VEX_1) {
2421 return -1;
2422 } else if (byte == VEX_2) {
2423 return -1;
2424 } else {
2425 fsm->fn = node_primary_opcode;
2426 }
2427 } else {
2428 fsm->fn = node_primary_opcode;
2429 }
2430
2431 return 0;
2432 }
2433
2434 static int
2435 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2436 {
2437 struct x86_rexpref *rexpref = &instr->rexpref;
2438 uint8_t byte;
2439 size_t n = 0;
2440
2441 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2442 return -1;
2443 }
2444
2445 if (byte >= 0x40 && byte <= 0x4F) {
2446 if (__predict_false(!fsm->is64bit)) {
2447 return -1;
2448 }
2449 rexpref->b = ((byte & 0x1) != 0);
2450 rexpref->x = ((byte & 0x2) != 0);
2451 rexpref->r = ((byte & 0x4) != 0);
2452 rexpref->w = ((byte & 0x8) != 0);
2453 rexpref->present = true;
2454 n = 1;
2455 }
2456
2457 fsm_advance(fsm, n, node_main);
2458 return 0;
2459 }
2460
2461 static int
2462 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2463 {
2464 uint8_t byte;
2465
2466 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2467 return -1;
2468 }
2469
2470 if (byte == LEG_OPR_OVR) {
2471 instr->legpref.opr_ovr = 1;
2472 } else if (byte == LEG_OVR_DS) {
2473 instr->legpref.seg = NVMM_X64_SEG_DS;
2474 } else if (byte == LEG_OVR_ES) {
2475 instr->legpref.seg = NVMM_X64_SEG_ES;
2476 } else if (byte == LEG_REP) {
2477 instr->legpref.rep = 1;
2478 } else if (byte == LEG_OVR_GS) {
2479 instr->legpref.seg = NVMM_X64_SEG_GS;
2480 } else if (byte == LEG_OVR_FS) {
2481 instr->legpref.seg = NVMM_X64_SEG_FS;
2482 } else if (byte == LEG_ADR_OVR) {
2483 instr->legpref.adr_ovr = 1;
2484 } else if (byte == LEG_OVR_CS) {
2485 instr->legpref.seg = NVMM_X64_SEG_CS;
2486 } else if (byte == LEG_OVR_SS) {
2487 instr->legpref.seg = NVMM_X64_SEG_SS;
2488 } else if (byte == LEG_REPN) {
2489 instr->legpref.repn = 1;
2490 } else if (byte == LEG_LOCK) {
2491 /* ignore */
2492 } else {
2493 /* not a legacy prefix */
2494 fsm_advance(fsm, 0, node_rex_prefix);
2495 return 0;
2496 }
2497
2498 fsm_advance(fsm, 1, node_legacy_prefix);
2499 return 0;
2500 }
2501
2502 static int
2503 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2504 struct nvmm_x64_state *state)
2505 {
2506 struct x86_decode_fsm fsm;
2507 int ret;
2508
2509 memset(instr, 0, sizeof(*instr));
2510 instr->legpref.seg = -1;
2511 instr->src.hardseg = -1;
2512 instr->dst.hardseg = -1;
2513
2514 fsm.is64bit = is_64bit(state);
2515 fsm.is32bit = is_32bit(state);
2516 fsm.is16bit = is_16bit(state);
2517
2518 fsm.fn = node_legacy_prefix;
2519 fsm.buf = inst_bytes;
2520 fsm.end = inst_bytes + inst_len;
2521
2522 while (fsm.fn != NULL) {
2523 ret = (*fsm.fn)(&fsm, instr);
2524 if (ret == -1)
2525 return -1;
2526 }
2527
2528 instr->len = fsm.buf - inst_bytes;
2529
2530 return 0;
2531 }
2532
2533 /* -------------------------------------------------------------------------- */
2534
2535 #define EXEC_INSTR(sz, instr) \
2536 static uint##sz##_t \
2537 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
2538 { \
2539 uint##sz##_t res; \
2540 __asm __volatile ( \
2541 #instr " %2, %3;" \
2542 "mov %3, %1;" \
2543 "pushfq;" \
2544 "popq %0" \
2545 : "=r" (*rflags), "=r" (res) \
2546 : "r" (op1), "r" (op2)); \
2547 return res; \
2548 }
2549
2550 #define EXEC_DISPATCHER(instr) \
2551 static uint64_t \
2552 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2553 { \
2554 switch (opsize) { \
2555 case 1: \
2556 return exec_##instr##8(op1, op2, rflags); \
2557 case 2: \
2558 return exec_##instr##16(op1, op2, rflags); \
2559 case 4: \
2560 return exec_##instr##32(op1, op2, rflags); \
2561 default: \
2562 return exec_##instr##64(op1, op2, rflags); \
2563 } \
2564 }
2565
2566 /* SUB: ret = op1 - op2 */
2567 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2568 EXEC_INSTR(8, sub)
2569 EXEC_INSTR(16, sub)
2570 EXEC_INSTR(32, sub)
2571 EXEC_INSTR(64, sub)
2572 EXEC_DISPATCHER(sub)
2573
2574 /* OR: ret = op1 | op2 */
2575 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2576 EXEC_INSTR(8, or)
2577 EXEC_INSTR(16, or)
2578 EXEC_INSTR(32, or)
2579 EXEC_INSTR(64, or)
2580 EXEC_DISPATCHER(or)
2581
2582 /* AND: ret = op1 & op2 */
2583 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2584 EXEC_INSTR(8, and)
2585 EXEC_INSTR(16, and)
2586 EXEC_INSTR(32, and)
2587 EXEC_INSTR(64, and)
2588 EXEC_DISPATCHER(and)
2589
2590 /* XOR: ret = op1 ^ op2 */
2591 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2592 EXEC_INSTR(8, xor)
2593 EXEC_INSTR(16, xor)
2594 EXEC_INSTR(32, xor)
2595 EXEC_INSTR(64, xor)
2596 EXEC_DISPATCHER(xor)
2597
2598 /* -------------------------------------------------------------------------- */
2599
2600 /*
2601 * Emulation functions. We don't care about the order of the operands, except
2602 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
2603 * is op1 and who is op2.
2604 */
2605
2606 static void
2607 x86_func_or(struct nvmm_mem *mem, uint64_t *gprs)
2608 {
2609 uint64_t *retval = (uint64_t *)mem->data;
2610 const bool write = mem->write;
2611 uint64_t *op1, op2, fl, ret;
2612
2613 op1 = (uint64_t *)mem->data;
2614 op2 = 0;
2615
2616 /* Fetch the value to be OR'ed (op2). */
2617 mem->data = (uint8_t *)&op2;
2618 mem->write = false;
2619 (*__callbacks.mem)(mem);
2620
2621 /* Perform the OR. */
2622 ret = exec_or(*op1, op2, &fl, mem->size);
2623
2624 if (write) {
2625 /* Write back the result. */
2626 mem->data = (uint8_t *)&ret;
2627 mem->write = true;
2628 (*__callbacks.mem)(mem);
2629 } else {
2630 /* Return data to the caller. */
2631 *retval = ret;
2632 }
2633
2634 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2635 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2636 }
2637
2638 static void
2639 x86_func_and(struct nvmm_mem *mem, uint64_t *gprs)
2640 {
2641 uint64_t *retval = (uint64_t *)mem->data;
2642 const bool write = mem->write;
2643 uint64_t *op1, op2, fl, ret;
2644
2645 op1 = (uint64_t *)mem->data;
2646 op2 = 0;
2647
2648 /* Fetch the value to be AND'ed (op2). */
2649 mem->data = (uint8_t *)&op2;
2650 mem->write = false;
2651 (*__callbacks.mem)(mem);
2652
2653 /* Perform the AND. */
2654 ret = exec_and(*op1, op2, &fl, mem->size);
2655
2656 if (write) {
2657 /* Write back the result. */
2658 mem->data = (uint8_t *)&ret;
2659 mem->write = true;
2660 (*__callbacks.mem)(mem);
2661 } else {
2662 /* Return data to the caller. */
2663 *retval = ret;
2664 }
2665
2666 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2667 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2668 }
2669
2670 static void
2671 x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs)
2672 {
2673 uint64_t *retval = (uint64_t *)mem->data;
2674 const bool write = mem->write;
2675 uint64_t *op1, *op2, fl, ret;
2676 uint64_t tmp;
2677 bool memop1;
2678
2679 memop1 = !mem->write;
2680 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2681 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2682
2683 /* Fetch the value to be SUB'ed (op1 or op2). */
2684 mem->data = (uint8_t *)&tmp;
2685 mem->write = false;
2686 (*__callbacks.mem)(mem);
2687
2688 /* Perform the SUB. */
2689 ret = exec_sub(*op1, *op2, &fl, mem->size);
2690
2691 if (write) {
2692 /* Write back the result. */
2693 mem->data = (uint8_t *)&ret;
2694 mem->write = true;
2695 (*__callbacks.mem)(mem);
2696 } else {
2697 /* Return data to the caller. */
2698 *retval = ret;
2699 }
2700
2701 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2702 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2703 }
2704
2705 static void
2706 x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs)
2707 {
2708 uint64_t *retval = (uint64_t *)mem->data;
2709 const bool write = mem->write;
2710 uint64_t *op1, op2, fl, ret;
2711
2712 op1 = (uint64_t *)mem->data;
2713 op2 = 0;
2714
2715 /* Fetch the value to be XOR'ed (op2). */
2716 mem->data = (uint8_t *)&op2;
2717 mem->write = false;
2718 (*__callbacks.mem)(mem);
2719
2720 /* Perform the XOR. */
2721 ret = exec_xor(*op1, op2, &fl, mem->size);
2722
2723 if (write) {
2724 /* Write back the result. */
2725 mem->data = (uint8_t *)&ret;
2726 mem->write = true;
2727 (*__callbacks.mem)(mem);
2728 } else {
2729 /* Return data to the caller. */
2730 *retval = ret;
2731 }
2732
2733 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2734 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2735 }
2736
2737 static void
2738 x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs)
2739 {
2740 uint64_t *op1, *op2, fl;
2741 uint64_t tmp;
2742 bool memop1;
2743
2744 memop1 = !mem->write;
2745 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2746 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2747
2748 /* Fetch the value to be CMP'ed (op1 or op2). */
2749 mem->data = (uint8_t *)&tmp;
2750 mem->write = false;
2751 (*__callbacks.mem)(mem);
2752
2753 /* Perform the CMP. */
2754 exec_sub(*op1, *op2, &fl, mem->size);
2755
2756 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2757 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2758 }
2759
2760 static void
2761 x86_func_test(struct nvmm_mem *mem, uint64_t *gprs)
2762 {
2763 uint64_t *op1, *op2, fl;
2764 uint64_t tmp;
2765 bool memop1;
2766
2767 memop1 = !mem->write;
2768 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2769 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2770
2771 /* Fetch the value to be TEST'ed (op1 or op2). */
2772 mem->data = (uint8_t *)&tmp;
2773 mem->write = false;
2774 (*__callbacks.mem)(mem);
2775
2776 /* Perform the TEST. */
2777 exec_and(*op1, *op2, &fl, mem->size);
2778
2779 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2780 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2781 }
2782
2783 static void
2784 x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs)
2785 {
2786 /*
2787 * Nothing special, just move without emulation.
2788 */
2789 (*__callbacks.mem)(mem);
2790 }
2791
2792 static void
2793 x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs)
2794 {
2795 /*
2796 * Just move, and update RDI.
2797 */
2798 (*__callbacks.mem)(mem);
2799
2800 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2801 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2802 } else {
2803 gprs[NVMM_X64_GPR_RDI] += mem->size;
2804 }
2805 }
2806
2807 static void
2808 x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs)
2809 {
2810 /*
2811 * Just move, and update RSI.
2812 */
2813 (*__callbacks.mem)(mem);
2814
2815 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2816 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2817 } else {
2818 gprs[NVMM_X64_GPR_RSI] += mem->size;
2819 }
2820 }
2821
2822 static void
2823 x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs)
2824 {
2825 /*
2826 * Special instruction: double memory operand. Don't call the cb,
2827 * because the storage has already been performed earlier.
2828 */
2829
2830 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2831 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2832 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2833 } else {
2834 gprs[NVMM_X64_GPR_RSI] += mem->size;
2835 gprs[NVMM_X64_GPR_RDI] += mem->size;
2836 }
2837 }
2838
2839 /* -------------------------------------------------------------------------- */
2840
2841 static inline uint64_t
2842 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2843 {
2844 uint64_t val;
2845
2846 val = state->gprs[gpr];
2847 val &= size_to_mask(instr->address_size);
2848
2849 return val;
2850 }
2851
2852 static int
2853 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2854 struct x86_store *store, gvaddr_t *gvap, size_t size)
2855 {
2856 struct x86_sib *sib;
2857 gvaddr_t gva = 0;
2858 uint64_t reg;
2859 int ret, seg;
2860
2861 if (store->type == STORE_SIB) {
2862 sib = &store->u.sib;
2863 if (sib->bas != NULL)
2864 gva += gpr_read_address(instr, state, sib->bas->num);
2865 if (sib->idx != NULL) {
2866 reg = gpr_read_address(instr, state, sib->idx->num);
2867 gva += sib->scale * reg;
2868 }
2869 } else if (store->type == STORE_REG) {
2870 if (store->u.reg == NULL) {
2871 /* The base is null. Happens with disp32-only. */
2872 } else {
2873 gva = gpr_read_address(instr, state, store->u.reg->num);
2874 }
2875 } else {
2876 gva = store->u.dmo;
2877 }
2878
2879 if (store->disp.type != DISP_NONE) {
2880 gva += store->disp.data;
2881 }
2882
2883 if (store->hardseg != -1) {
2884 seg = store->hardseg;
2885 } else {
2886 if (__predict_false(instr->legpref.seg != -1)) {
2887 seg = instr->legpref.seg;
2888 } else {
2889 seg = NVMM_X64_SEG_DS;
2890 }
2891 }
2892
2893 if (__predict_true(is_long_mode(state))) {
2894 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2895 segment_apply(&state->segs[seg], &gva);
2896 }
2897 } else {
2898 ret = segment_check(&state->segs[seg], gva, size);
2899 if (ret == -1)
2900 return -1;
2901 segment_apply(&state->segs[seg], &gva);
2902 }
2903
2904 *gvap = gva;
2905 return 0;
2906 }
2907
2908 static int
2909 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2910 {
2911 uint8_t inst_bytes[5], byte;
2912 size_t i, fetchsize;
2913 gvaddr_t gva;
2914 int ret, seg;
2915
2916 fetchsize = sizeof(inst_bytes);
2917
2918 gva = state->gprs[NVMM_X64_GPR_RIP];
2919 if (__predict_false(!is_long_mode(state))) {
2920 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2921 fetchsize);
2922 if (ret == -1)
2923 return -1;
2924 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2925 }
2926
2927 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2928 if (ret == -1)
2929 return -1;
2930
2931 seg = NVMM_X64_SEG_DS;
2932 for (i = 0; i < fetchsize; i++) {
2933 byte = inst_bytes[i];
2934
2935 if (byte == LEG_OVR_DS) {
2936 seg = NVMM_X64_SEG_DS;
2937 } else if (byte == LEG_OVR_ES) {
2938 seg = NVMM_X64_SEG_ES;
2939 } else if (byte == LEG_OVR_GS) {
2940 seg = NVMM_X64_SEG_GS;
2941 } else if (byte == LEG_OVR_FS) {
2942 seg = NVMM_X64_SEG_FS;
2943 } else if (byte == LEG_OVR_CS) {
2944 seg = NVMM_X64_SEG_CS;
2945 } else if (byte == LEG_OVR_SS) {
2946 seg = NVMM_X64_SEG_SS;
2947 } else if (byte == LEG_OPR_OVR) {
2948 /* nothing */
2949 } else if (byte == LEG_ADR_OVR) {
2950 /* nothing */
2951 } else if (byte == LEG_REP) {
2952 /* nothing */
2953 } else if (byte == LEG_REPN) {
2954 /* nothing */
2955 } else if (byte == LEG_LOCK) {
2956 /* nothing */
2957 } else {
2958 return seg;
2959 }
2960 }
2961
2962 return seg;
2963 }
2964
2965 static int
2966 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2967 struct nvmm_exit *exit)
2968 {
2969 size_t fetchsize;
2970 gvaddr_t gva;
2971 int ret;
2972
2973 fetchsize = sizeof(exit->u.mem.inst_bytes);
2974
2975 gva = state->gprs[NVMM_X64_GPR_RIP];
2976 if (__predict_false(!is_long_mode(state))) {
2977 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2978 fetchsize);
2979 if (ret == -1)
2980 return -1;
2981 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2982 }
2983
2984 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
2985 fetchsize);
2986 if (ret == -1)
2987 return -1;
2988
2989 exit->u.mem.inst_len = fetchsize;
2990
2991 return 0;
2992 }
2993
2994 static int
2995 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2996 struct x86_instr *instr)
2997 {
2998 struct nvmm_mem mem;
2999 uint8_t data[8];
3000 gvaddr_t gva;
3001 size_t size;
3002 int ret;
3003
3004 size = instr->operand_size;
3005
3006 /* Source. */
3007 ret = store_to_gva(state, instr, &instr->src, &gva, size);
3008 if (ret == -1)
3009 return -1;
3010 ret = read_guest_memory(mach, state, gva, data, size);
3011 if (ret == -1)
3012 return -1;
3013
3014 /* Destination. */
3015 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3016 if (ret == -1)
3017 return -1;
3018 ret = write_guest_memory(mach, state, gva, data, size);
3019 if (ret == -1)
3020 return -1;
3021
3022 mem.size = size;
3023 (*instr->emul->func)(&mem, state->gprs);
3024
3025 return 0;
3026 }
3027
3028 #define DISASSEMBLER_BUG() \
3029 do { \
3030 errno = EINVAL; \
3031 return -1; \
3032 } while (0);
3033
3034 static int
3035 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3036 struct x86_instr *instr, struct nvmm_exit *exit)
3037 {
3038 struct nvmm_mem mem;
3039 uint8_t membuf[8];
3040 uint64_t val;
3041
3042 memset(membuf, 0, sizeof(membuf));
3043
3044 mem.gpa = exit->u.mem.gpa;
3045 mem.size = instr->operand_size;
3046 mem.data = membuf;
3047
3048 /* Determine the direction. */
3049 switch (instr->src.type) {
3050 case STORE_REG:
3051 if (instr->src.disp.type != DISP_NONE) {
3052 /* Indirect access. */
3053 mem.write = false;
3054 } else {
3055 /* Direct access. */
3056 mem.write = true;
3057 }
3058 break;
3059 case STORE_IMM:
3060 mem.write = true;
3061 break;
3062 case STORE_SIB:
3063 mem.write = false;
3064 break;
3065 case STORE_DMO:
3066 mem.write = false;
3067 break;
3068 default:
3069 DISASSEMBLER_BUG();
3070 }
3071
3072 if (mem.write) {
3073 switch (instr->src.type) {
3074 case STORE_REG:
3075 if (instr->src.disp.type != DISP_NONE) {
3076 DISASSEMBLER_BUG();
3077 }
3078 val = state->gprs[instr->src.u.reg->num];
3079 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3080 memcpy(mem.data, &val, mem.size);
3081 break;
3082 case STORE_IMM:
3083 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3084 break;
3085 default:
3086 DISASSEMBLER_BUG();
3087 }
3088 } else if (instr->emul->read) {
3089 if (instr->dst.type != STORE_REG) {
3090 DISASSEMBLER_BUG();
3091 }
3092 if (instr->dst.disp.type != DISP_NONE) {
3093 DISASSEMBLER_BUG();
3094 }
3095 val = state->gprs[instr->dst.u.reg->num];
3096 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3097 memcpy(mem.data, &val, mem.size);
3098 }
3099
3100 (*instr->emul->func)(&mem, state->gprs);
3101
3102 if (!instr->emul->notouch && !mem.write) {
3103 if (instr->dst.type != STORE_REG) {
3104 DISASSEMBLER_BUG();
3105 }
3106 memcpy(&val, membuf, sizeof(uint64_t));
3107 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3108 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3109 state->gprs[instr->dst.u.reg->num] |= val;
3110 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3111 }
3112
3113 return 0;
3114 }
3115
3116 int
3117 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
3118 struct nvmm_exit *exit)
3119 {
3120 struct nvmm_x64_state state;
3121 struct x86_instr instr;
3122 uint64_t cnt = 0; /* GCC */
3123 int ret;
3124
3125 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3126 errno = EINVAL;
3127 return -1;
3128 }
3129
3130 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
3131 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3132 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3133 if (ret == -1)
3134 return -1;
3135
3136 if (exit->u.mem.inst_len == 0) {
3137 /*
3138 * The instruction was not fetched from the kernel. Fetch
3139 * it ourselves.
3140 */
3141 ret = fetch_instruction(mach, &state, exit);
3142 if (ret == -1)
3143 return -1;
3144 }
3145
3146 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3147 &instr, &state);
3148 if (ret == -1) {
3149 errno = ENODEV;
3150 return -1;
3151 }
3152
3153 if (instr.legpref.rep || instr.legpref.repn) {
3154 cnt = rep_get_cnt(&state, instr.address_size);
3155 if (__predict_false(cnt == 0)) {
3156 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3157 goto out;
3158 }
3159 }
3160
3161 if (instr.opcode->movs) {
3162 ret = assist_mem_double(mach, &state, &instr);
3163 } else {
3164 ret = assist_mem_single(mach, &state, &instr, exit);
3165 }
3166 if (ret == -1) {
3167 errno = ENODEV;
3168 return -1;
3169 }
3170
3171 if (instr.legpref.rep || instr.legpref.repn) {
3172 cnt -= 1;
3173 rep_set_cnt(&state, instr.address_size, cnt);
3174 if (cnt == 0) {
3175 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3176 } else if (__predict_false(instr.legpref.repn)) {
3177 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3178 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3179 }
3180 }
3181 } else {
3182 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3183 }
3184
3185 out:
3186 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
3187 if (ret == -1)
3188 return -1;
3189
3190 return 0;
3191 }
3192