libnvmm_x86.c revision 1.18 1 /* $NetBSD: libnvmm_x86.c,v 1.18 2019/02/01 06:49:58 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49
50 #include <x86/specialreg.h>
51
52 extern struct nvmm_callbacks __callbacks;
53
54 /* -------------------------------------------------------------------------- */
55
56 /*
57 * Undocumented debugging function. Helpful.
58 */
59 int
60 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
61 {
62 struct nvmm_x64_state state;
63 size_t i;
64 int ret;
65
66 const char *segnames[] = {
67 "CS", "DS", "ES", "FS", "GS", "SS", "GDT", "IDT", "LDT", "TR"
68 };
69
70 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
71 if (ret == -1)
72 return -1;
73
74 printf("+ VCPU id=%d\n", (int)cpuid);
75 printf("| -> RIP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RIP]);
76 printf("| -> RSP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RSP]);
77 printf("| -> RAX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RAX]);
78 printf("| -> RBX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RBX]);
79 printf("| -> RCX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RCX]);
80 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
81 for (i = 0; i < NVMM_X64_NSEG; i++) {
82 printf("| -> %s: sel=0x%lx base=%p, limit=%p, P=%d, D=%d L=%d\n",
83 segnames[i],
84 state.segs[i].selector,
85 (void *)state.segs[i].base,
86 (void *)state.segs[i].limit,
87 state.segs[i].attrib.p, state.segs[i].attrib.def32,
88 state.segs[i].attrib.lng);
89 }
90 printf("| -> MSR_EFER=%p\n", (void *)state.msrs[NVMM_X64_MSR_EFER]);
91 printf("| -> CR0=%p\n", (void *)state.crs[NVMM_X64_CR_CR0]);
92 printf("| -> CR3=%p\n", (void *)state.crs[NVMM_X64_CR_CR3]);
93 printf("| -> CR4=%p\n", (void *)state.crs[NVMM_X64_CR_CR4]);
94 printf("| -> CR8=%p\n", (void *)state.crs[NVMM_X64_CR_CR8]);
95 printf("| -> CPL=%p\n", (void *)state.misc[NVMM_X64_MISC_CPL]);
96
97 return 0;
98 }
99
100 /* -------------------------------------------------------------------------- */
101
102 #define PTE32_L1_SHIFT 12
103 #define PTE32_L2_SHIFT 22
104
105 #define PTE32_L2_MASK 0xffc00000
106 #define PTE32_L1_MASK 0x003ff000
107
108 #define PTE32_L2_FRAME (PTE32_L2_MASK)
109 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
110
111 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
112 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
113
114 typedef uint32_t pte_32bit_t;
115
116 static int
117 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
118 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
119 {
120 gpaddr_t L2gpa, L1gpa;
121 uintptr_t L2hva, L1hva;
122 pte_32bit_t *pdir, pte;
123
124 /* We begin with an RWXU access. */
125 *prot = NVMM_PROT_ALL;
126
127 /* Parse L2. */
128 L2gpa = (cr3 & PG_FRAME);
129 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
130 return -1;
131 pdir = (pte_32bit_t *)L2hva;
132 pte = pdir[pte32_l2idx(gva)];
133 if ((pte & PG_V) == 0)
134 return -1;
135 if ((pte & PG_u) == 0)
136 *prot &= ~NVMM_PROT_USER;
137 if ((pte & PG_KW) == 0)
138 *prot &= ~NVMM_PROT_WRITE;
139 if ((pte & PG_PS) && !has_pse)
140 return -1;
141 if (pte & PG_PS) {
142 *gpa = (pte & PTE32_L2_FRAME);
143 *gpa = *gpa + (gva & PTE32_L1_MASK);
144 return 0;
145 }
146
147 /* Parse L1. */
148 L1gpa = (pte & PG_FRAME);
149 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
150 return -1;
151 pdir = (pte_32bit_t *)L1hva;
152 pte = pdir[pte32_l1idx(gva)];
153 if ((pte & PG_V) == 0)
154 return -1;
155 if ((pte & PG_u) == 0)
156 *prot &= ~NVMM_PROT_USER;
157 if ((pte & PG_KW) == 0)
158 *prot &= ~NVMM_PROT_WRITE;
159 if (pte & PG_PS)
160 return -1;
161
162 *gpa = (pte & PG_FRAME);
163 return 0;
164 }
165
166 /* -------------------------------------------------------------------------- */
167
168 #define PTE32_PAE_L1_SHIFT 12
169 #define PTE32_PAE_L2_SHIFT 21
170 #define PTE32_PAE_L3_SHIFT 30
171
172 #define PTE32_PAE_L3_MASK 0xc0000000
173 #define PTE32_PAE_L2_MASK 0x3fe00000
174 #define PTE32_PAE_L1_MASK 0x001ff000
175
176 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
177 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
178 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
179
180 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
181 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
182 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
183
184 typedef uint64_t pte_32bit_pae_t;
185
186 static int
187 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
188 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
189 {
190 gpaddr_t L3gpa, L2gpa, L1gpa;
191 uintptr_t L3hva, L2hva, L1hva;
192 pte_32bit_pae_t *pdir, pte;
193
194 /* We begin with an RWXU access. */
195 *prot = NVMM_PROT_ALL;
196
197 /* Parse L3. */
198 L3gpa = (cr3 & PG_FRAME);
199 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
200 return -1;
201 pdir = (pte_32bit_pae_t *)L3hva;
202 pte = pdir[pte32_pae_l3idx(gva)];
203 if ((pte & PG_V) == 0)
204 return -1;
205 if (pte & PG_NX)
206 *prot &= ~NVMM_PROT_EXEC;
207 if (pte & PG_PS)
208 return -1;
209
210 /* Parse L2. */
211 L2gpa = (pte & PG_FRAME);
212 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
213 return -1;
214 pdir = (pte_32bit_pae_t *)L2hva;
215 pte = pdir[pte32_pae_l2idx(gva)];
216 if ((pte & PG_V) == 0)
217 return -1;
218 if ((pte & PG_u) == 0)
219 *prot &= ~NVMM_PROT_USER;
220 if ((pte & PG_KW) == 0)
221 *prot &= ~NVMM_PROT_WRITE;
222 if (pte & PG_NX)
223 *prot &= ~NVMM_PROT_EXEC;
224 if ((pte & PG_PS) && !has_pse)
225 return -1;
226 if (pte & PG_PS) {
227 *gpa = (pte & PTE32_PAE_L2_FRAME);
228 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
229 return 0;
230 }
231
232 /* Parse L1. */
233 L1gpa = (pte & PG_FRAME);
234 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
235 return -1;
236 pdir = (pte_32bit_pae_t *)L1hva;
237 pte = pdir[pte32_pae_l1idx(gva)];
238 if ((pte & PG_V) == 0)
239 return -1;
240 if ((pte & PG_u) == 0)
241 *prot &= ~NVMM_PROT_USER;
242 if ((pte & PG_KW) == 0)
243 *prot &= ~NVMM_PROT_WRITE;
244 if (pte & PG_NX)
245 *prot &= ~NVMM_PROT_EXEC;
246 if (pte & PG_PS)
247 return -1;
248
249 *gpa = (pte & PG_FRAME);
250 return 0;
251 }
252
253 /* -------------------------------------------------------------------------- */
254
255 #define PTE64_L1_SHIFT 12
256 #define PTE64_L2_SHIFT 21
257 #define PTE64_L3_SHIFT 30
258 #define PTE64_L4_SHIFT 39
259
260 #define PTE64_L4_MASK 0x0000ff8000000000
261 #define PTE64_L3_MASK 0x0000007fc0000000
262 #define PTE64_L2_MASK 0x000000003fe00000
263 #define PTE64_L1_MASK 0x00000000001ff000
264
265 #define PTE64_L4_FRAME PTE64_L4_MASK
266 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
267 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
268 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
269
270 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
271 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
272 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
273 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
274
275 typedef uint64_t pte_64bit_t;
276
277 static inline bool
278 x86_gva_64bit_canonical(gvaddr_t gva)
279 {
280 /* Bits 63:47 must have the same value. */
281 #define SIGN_EXTEND 0xffff800000000000ULL
282 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
283 }
284
285 static int
286 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
287 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
288 {
289 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
290 uintptr_t L4hva, L3hva, L2hva, L1hva;
291 pte_64bit_t *pdir, pte;
292
293 /* We begin with an RWXU access. */
294 *prot = NVMM_PROT_ALL;
295
296 if (!x86_gva_64bit_canonical(gva))
297 return -1;
298
299 /* Parse L4. */
300 L4gpa = (cr3 & PG_FRAME);
301 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
302 return -1;
303 pdir = (pte_64bit_t *)L4hva;
304 pte = pdir[pte64_l4idx(gva)];
305 if ((pte & PG_V) == 0)
306 return -1;
307 if ((pte & PG_u) == 0)
308 *prot &= ~NVMM_PROT_USER;
309 if ((pte & PG_KW) == 0)
310 *prot &= ~NVMM_PROT_WRITE;
311 if (pte & PG_NX)
312 *prot &= ~NVMM_PROT_EXEC;
313 if (pte & PG_PS)
314 return -1;
315
316 /* Parse L3. */
317 L3gpa = (pte & PG_FRAME);
318 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
319 return -1;
320 pdir = (pte_64bit_t *)L3hva;
321 pte = pdir[pte64_l3idx(gva)];
322 if ((pte & PG_V) == 0)
323 return -1;
324 if ((pte & PG_u) == 0)
325 *prot &= ~NVMM_PROT_USER;
326 if ((pte & PG_KW) == 0)
327 *prot &= ~NVMM_PROT_WRITE;
328 if (pte & PG_NX)
329 *prot &= ~NVMM_PROT_EXEC;
330 if (pte & PG_PS) {
331 *gpa = (pte & PTE64_L3_FRAME);
332 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
333 return 0;
334 }
335
336 /* Parse L2. */
337 L2gpa = (pte & PG_FRAME);
338 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
339 return -1;
340 pdir = (pte_64bit_t *)L2hva;
341 pte = pdir[pte64_l2idx(gva)];
342 if ((pte & PG_V) == 0)
343 return -1;
344 if ((pte & PG_u) == 0)
345 *prot &= ~NVMM_PROT_USER;
346 if ((pte & PG_KW) == 0)
347 *prot &= ~NVMM_PROT_WRITE;
348 if (pte & PG_NX)
349 *prot &= ~NVMM_PROT_EXEC;
350 if (pte & PG_PS) {
351 *gpa = (pte & PTE64_L2_FRAME);
352 *gpa = *gpa + (gva & PTE64_L1_MASK);
353 return 0;
354 }
355
356 /* Parse L1. */
357 L1gpa = (pte & PG_FRAME);
358 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
359 return -1;
360 pdir = (pte_64bit_t *)L1hva;
361 pte = pdir[pte64_l1idx(gva)];
362 if ((pte & PG_V) == 0)
363 return -1;
364 if ((pte & PG_u) == 0)
365 *prot &= ~NVMM_PROT_USER;
366 if ((pte & PG_KW) == 0)
367 *prot &= ~NVMM_PROT_WRITE;
368 if (pte & PG_NX)
369 *prot &= ~NVMM_PROT_EXEC;
370 if (pte & PG_PS)
371 return -1;
372
373 *gpa = (pte & PG_FRAME);
374 return 0;
375 }
376
377 static inline int
378 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
379 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
380 {
381 bool is_pae, is_lng, has_pse;
382 uint64_t cr3;
383 size_t off;
384 int ret;
385
386 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
387 /* No paging. */
388 *prot = NVMM_PROT_ALL;
389 *gpa = gva;
390 return 0;
391 }
392
393 off = (gva & PAGE_MASK);
394 gva &= ~PAGE_MASK;
395
396 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
397 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
398 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
399 cr3 = state->crs[NVMM_X64_CR_CR3];
400
401 if (is_pae && is_lng) {
402 /* 64bit */
403 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
404 } else if (is_pae && !is_lng) {
405 /* 32bit PAE */
406 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, has_pse,
407 prot);
408 } else if (!is_pae && !is_lng) {
409 /* 32bit */
410 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
411 } else {
412 ret = -1;
413 }
414
415 if (ret == -1) {
416 errno = EFAULT;
417 }
418
419 *gpa = *gpa + off;
420
421 return ret;
422 }
423
424 int
425 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
426 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
427 {
428 struct nvmm_x64_state state;
429 int ret;
430
431 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
432 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
433 if (ret == -1)
434 return -1;
435
436 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
437 }
438
439 /* -------------------------------------------------------------------------- */
440
441 static inline bool
442 is_long_mode(struct nvmm_x64_state *state)
443 {
444 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
445 }
446
447 static inline bool
448 is_64bit(struct nvmm_x64_state *state)
449 {
450 return (state->segs[NVMM_X64_SEG_CS].attrib.lng != 0);
451 }
452
453 static inline bool
454 is_32bit(struct nvmm_x64_state *state)
455 {
456 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
457 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 1);
458 }
459
460 static inline bool
461 is_16bit(struct nvmm_x64_state *state)
462 {
463 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
464 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 0);
465 }
466
467 static int
468 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
469 {
470 uint64_t limit;
471
472 /*
473 * This is incomplete. We should check topdown, etc, really that's
474 * tiring.
475 */
476 if (__predict_false(!seg->attrib.p)) {
477 goto error;
478 }
479
480 limit = (seg->limit + 1);
481 if (__predict_true(seg->attrib.gran)) {
482 limit *= PAGE_SIZE;
483 }
484
485 if (__predict_false(gva + size > limit)) {
486 goto error;
487 }
488
489 return 0;
490
491 error:
492 errno = EFAULT;
493 return -1;
494 }
495
496 static inline void
497 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
498 {
499 *gva += seg->base;
500 }
501
502 static inline uint64_t
503 size_to_mask(size_t size)
504 {
505 switch (size) {
506 case 1:
507 return 0x00000000000000FF;
508 case 2:
509 return 0x000000000000FFFF;
510 case 4:
511 return 0x00000000FFFFFFFF;
512 case 8:
513 default:
514 return 0xFFFFFFFFFFFFFFFF;
515 }
516 }
517
518 static uint64_t
519 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
520 {
521 uint64_t mask, cnt;
522
523 mask = size_to_mask(adsize);
524 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
525
526 return cnt;
527 }
528
529 static void
530 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
531 {
532 uint64_t mask;
533
534 /* XXX: should we zero-extend? */
535 mask = size_to_mask(adsize);
536 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
537 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
538 }
539
540 static int
541 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
542 gvaddr_t gva, uint8_t *data, size_t size)
543 {
544 struct nvmm_mem mem;
545 nvmm_prot_t prot;
546 gpaddr_t gpa;
547 uintptr_t hva;
548 bool is_mmio;
549 int ret, remain;
550
551 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
552 if (__predict_false(ret == -1)) {
553 return -1;
554 }
555 if (__predict_false(!(prot & NVMM_PROT_READ))) {
556 errno = EFAULT;
557 return -1;
558 }
559
560 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
561 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
562 } else {
563 remain = 0;
564 }
565 size -= remain;
566
567 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
568 is_mmio = (ret == -1);
569
570 if (is_mmio) {
571 mem.data = data;
572 mem.gpa = gpa;
573 mem.write = false;
574 mem.size = size;
575 (*__callbacks.mem)(&mem);
576 } else {
577 memcpy(data, (uint8_t *)hva, size);
578 }
579
580 if (remain > 0) {
581 ret = read_guest_memory(mach, state, gva + size,
582 data + size, remain);
583 } else {
584 ret = 0;
585 }
586
587 return ret;
588 }
589
590 static int
591 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
592 gvaddr_t gva, uint8_t *data, size_t size)
593 {
594 struct nvmm_mem mem;
595 nvmm_prot_t prot;
596 gpaddr_t gpa;
597 uintptr_t hva;
598 bool is_mmio;
599 int ret, remain;
600
601 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
602 if (__predict_false(ret == -1)) {
603 return -1;
604 }
605 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
606 errno = EFAULT;
607 return -1;
608 }
609
610 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
611 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
612 } else {
613 remain = 0;
614 }
615 size -= remain;
616
617 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
618 is_mmio = (ret == -1);
619
620 if (is_mmio) {
621 mem.data = data;
622 mem.gpa = gpa;
623 mem.write = true;
624 mem.size = size;
625 (*__callbacks.mem)(&mem);
626 } else {
627 memcpy((uint8_t *)hva, data, size);
628 }
629
630 if (remain > 0) {
631 ret = write_guest_memory(mach, state, gva + size,
632 data + size, remain);
633 } else {
634 ret = 0;
635 }
636
637 return ret;
638 }
639
640 /* -------------------------------------------------------------------------- */
641
642 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
643
644 #define NVMM_IO_BATCH_SIZE 32
645
646 static int
647 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
648 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
649 {
650 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
651 size_t i, iosize, iocnt;
652 int ret;
653
654 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
655 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
656 iocnt = iosize / io->size;
657
658 io->data = iobuf;
659
660 if (!io->in) {
661 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
662 if (ret == -1)
663 return -1;
664 }
665
666 for (i = 0; i < iocnt; i++) {
667 (*__callbacks.io)(io);
668 io->data += io->size;
669 }
670
671 if (io->in) {
672 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
673 if (ret == -1)
674 return -1;
675 }
676
677 return iocnt;
678 }
679
680 int
681 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
682 struct nvmm_exit *exit)
683 {
684 struct nvmm_x64_state state;
685 struct nvmm_io io;
686 uint64_t cnt = 0; /* GCC */
687 uint8_t iobuf[8];
688 int iocnt = 1;
689 gvaddr_t gva = 0; /* GCC */
690 int reg = 0; /* GCC */
691 int ret, seg;
692 bool psld = false;
693
694 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
695 errno = EINVAL;
696 return -1;
697 }
698
699 io.port = exit->u.io.port;
700 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
701 io.size = exit->u.io.operand_size;
702 io.data = iobuf;
703
704 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
705 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
706 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
707 if (ret == -1)
708 return -1;
709
710 if (exit->u.io.rep) {
711 cnt = rep_get_cnt(&state, exit->u.io.address_size);
712 if (__predict_false(cnt == 0)) {
713 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
714 goto out;
715 }
716 }
717
718 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
719 psld = true;
720 }
721
722 /*
723 * Determine GVA.
724 */
725 if (exit->u.io.str) {
726 if (io.in) {
727 reg = NVMM_X64_GPR_RDI;
728 } else {
729 reg = NVMM_X64_GPR_RSI;
730 }
731
732 gva = state.gprs[reg];
733 gva &= size_to_mask(exit->u.io.address_size);
734
735 if (exit->u.io.seg != -1) {
736 seg = exit->u.io.seg;
737 } else {
738 if (io.in) {
739 seg = NVMM_X64_SEG_ES;
740 } else {
741 seg = fetch_segment(mach, &state);
742 if (seg == -1)
743 return -1;
744 }
745 }
746
747 if (__predict_true(is_long_mode(&state))) {
748 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
749 segment_apply(&state.segs[seg], &gva);
750 }
751 } else {
752 ret = segment_check(&state.segs[seg], gva, io.size);
753 if (ret == -1)
754 return -1;
755 segment_apply(&state.segs[seg], &gva);
756 }
757
758 if (exit->u.io.rep && !psld) {
759 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
760 if (iocnt == -1)
761 return -1;
762 goto done;
763 }
764 }
765
766 if (!io.in) {
767 if (!exit->u.io.str) {
768 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
769 } else {
770 ret = read_guest_memory(mach, &state, gva, io.data,
771 io.size);
772 if (ret == -1)
773 return -1;
774 }
775 }
776
777 (*__callbacks.io)(&io);
778
779 if (io.in) {
780 if (!exit->u.io.str) {
781 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
782 if (io.size == 4) {
783 /* Zero-extend to 64 bits. */
784 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
785 }
786 } else {
787 ret = write_guest_memory(mach, &state, gva, io.data,
788 io.size);
789 if (ret == -1)
790 return -1;
791 }
792 }
793
794 done:
795 if (exit->u.io.str) {
796 if (__predict_false(psld)) {
797 state.gprs[reg] -= iocnt * io.size;
798 } else {
799 state.gprs[reg] += iocnt * io.size;
800 }
801 }
802
803 if (exit->u.io.rep) {
804 cnt -= iocnt;
805 rep_set_cnt(&state, exit->u.io.address_size, cnt);
806 if (cnt == 0) {
807 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
808 }
809 } else {
810 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
811 }
812
813 out:
814 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
815 if (ret == -1)
816 return -1;
817
818 return 0;
819 }
820
821 /* -------------------------------------------------------------------------- */
822
823 static void x86_emul_or(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
824 static void x86_emul_and(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
825 static void x86_emul_xor(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
826 static void x86_emul_mov(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
827 static void x86_emul_stos(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
828 static void x86_emul_lods(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
829 static void x86_emul_movs(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
830
831 /* Legacy prefixes. */
832 #define LEG_LOCK 0xF0
833 #define LEG_REPN 0xF2
834 #define LEG_REP 0xF3
835 #define LEG_OVR_CS 0x2E
836 #define LEG_OVR_SS 0x36
837 #define LEG_OVR_DS 0x3E
838 #define LEG_OVR_ES 0x26
839 #define LEG_OVR_FS 0x64
840 #define LEG_OVR_GS 0x65
841 #define LEG_OPR_OVR 0x66
842 #define LEG_ADR_OVR 0x67
843
844 struct x86_legpref {
845 bool opr_ovr:1;
846 bool adr_ovr:1;
847 bool rep:1;
848 bool repn:1;
849 int seg;
850 };
851
852 struct x86_rexpref {
853 bool present;
854 bool w;
855 bool r;
856 bool x;
857 bool b;
858 };
859
860 struct x86_reg {
861 int num; /* NVMM GPR state index */
862 uint64_t mask;
863 };
864
865 enum x86_disp_type {
866 DISP_NONE,
867 DISP_0,
868 DISP_1,
869 DISP_4
870 };
871
872 struct x86_disp {
873 enum x86_disp_type type;
874 uint64_t data; /* 4 bytes, but can be sign-extended */
875 };
876
877 enum REGMODRM__Mod {
878 MOD_DIS0, /* also, register indirect */
879 MOD_DIS1,
880 MOD_DIS4,
881 MOD_REG
882 };
883
884 enum REGMODRM__Reg {
885 REG_000, /* these fields are indexes to the register map */
886 REG_001,
887 REG_010,
888 REG_011,
889 REG_100,
890 REG_101,
891 REG_110,
892 REG_111
893 };
894
895 enum REGMODRM__Rm {
896 RM_000, /* reg */
897 RM_001, /* reg */
898 RM_010, /* reg */
899 RM_011, /* reg */
900 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
901 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
902 RM_110,
903 RM_111
904 };
905
906 struct x86_regmodrm {
907 bool present;
908 enum REGMODRM__Mod mod;
909 enum REGMODRM__Reg reg;
910 enum REGMODRM__Rm rm;
911 };
912
913 struct x86_immediate {
914 uint64_t data;
915 };
916
917 struct x86_sib {
918 uint8_t scale;
919 const struct x86_reg *idx;
920 const struct x86_reg *bas;
921 };
922
923 enum x86_store_type {
924 STORE_NONE,
925 STORE_REG,
926 STORE_IMM,
927 STORE_SIB,
928 STORE_DMO
929 };
930
931 struct x86_store {
932 enum x86_store_type type;
933 union {
934 const struct x86_reg *reg;
935 struct x86_immediate imm;
936 struct x86_sib sib;
937 uint64_t dmo;
938 } u;
939 struct x86_disp disp;
940 int hardseg;
941 };
942
943 struct x86_instr {
944 size_t len;
945 struct x86_legpref legpref;
946 struct x86_rexpref rexpref;
947 size_t operand_size;
948 size_t address_size;
949 uint64_t zeroextend_mask;
950
951 struct x86_regmodrm regmodrm;
952
953 const struct x86_opcode *opcode;
954
955 struct x86_store src;
956 struct x86_store dst;
957
958 struct x86_store *strm;
959
960 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
961 };
962
963 struct x86_decode_fsm {
964 /* vcpu */
965 bool is64bit;
966 bool is32bit;
967 bool is16bit;
968
969 /* fsm */
970 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
971 uint8_t *buf;
972 uint8_t *end;
973 };
974
975 struct x86_opcode {
976 uint8_t byte;
977 bool regmodrm;
978 bool regtorm;
979 bool dmo;
980 bool todmo;
981 bool movs;
982 bool stos;
983 bool lods;
984 bool szoverride;
985 int defsize;
986 int allsize;
987 bool group1;
988 bool group11;
989 bool immediate;
990 int flags;
991 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
992 };
993
994 struct x86_group_entry {
995 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
996 };
997
998 #define OPSIZE_BYTE 0x01
999 #define OPSIZE_WORD 0x02 /* 2 bytes */
1000 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1001 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1002
1003 #define FLAG_imm8 0x01
1004 #define FLAG_immz 0x02
1005 #define FLAG_ze 0x04
1006
1007 static const struct x86_group_entry group1[8] = {
1008 [1] = { .emul = x86_emul_or },
1009 [4] = { .emul = x86_emul_and },
1010 [6] = { .emul = x86_emul_xor }
1011 };
1012
1013 static const struct x86_group_entry group11[8] = {
1014 [0] = { .emul = x86_emul_mov }
1015 };
1016
1017 static const struct x86_opcode primary_opcode_table[] = {
1018 /*
1019 * Group1
1020 */
1021 {
1022 /* Ev, Iz */
1023 .byte = 0x81,
1024 .regmodrm = true,
1025 .regtorm = true,
1026 .szoverride = true,
1027 .defsize = -1,
1028 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1029 .group1 = true,
1030 .immediate = true,
1031 .flags = FLAG_immz,
1032 .emul = NULL /* group1 */
1033 },
1034 {
1035 /* Ev, Ib */
1036 .byte = 0x83,
1037 .regmodrm = true,
1038 .regtorm = true,
1039 .szoverride = true,
1040 .defsize = -1,
1041 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1042 .group1 = true,
1043 .immediate = true,
1044 .flags = FLAG_imm8,
1045 .emul = NULL /* group1 */
1046 },
1047
1048 /*
1049 * Group11
1050 */
1051 {
1052 /* Eb, Ib */
1053 .byte = 0xC6,
1054 .regmodrm = true,
1055 .regtorm = true,
1056 .szoverride = false,
1057 .defsize = OPSIZE_BYTE,
1058 .allsize = -1,
1059 .group11 = true,
1060 .immediate = true,
1061 .emul = NULL /* group11 */
1062 },
1063 {
1064 /* Ev, Iz */
1065 .byte = 0xC7,
1066 .regmodrm = true,
1067 .regtorm = true,
1068 .szoverride = true,
1069 .defsize = -1,
1070 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1071 .group11 = true,
1072 .immediate = true,
1073 .flags = FLAG_immz,
1074 .emul = NULL /* group11 */
1075 },
1076
1077 /*
1078 * OR
1079 */
1080 {
1081 /* Eb, Gb */
1082 .byte = 0x08,
1083 .regmodrm = true,
1084 .regtorm = true,
1085 .szoverride = false,
1086 .defsize = OPSIZE_BYTE,
1087 .allsize = -1,
1088 .emul = x86_emul_or
1089 },
1090 {
1091 /* Ev, Gv */
1092 .byte = 0x09,
1093 .regmodrm = true,
1094 .regtorm = true,
1095 .szoverride = true,
1096 .defsize = -1,
1097 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1098 .emul = x86_emul_or
1099 },
1100 {
1101 /* Gb, Eb */
1102 .byte = 0x0A,
1103 .regmodrm = true,
1104 .regtorm = false,
1105 .szoverride = false,
1106 .defsize = OPSIZE_BYTE,
1107 .allsize = -1,
1108 .emul = x86_emul_or
1109 },
1110 {
1111 /* Gv, Ev */
1112 .byte = 0x0B,
1113 .regmodrm = true,
1114 .regtorm = false,
1115 .szoverride = true,
1116 .defsize = -1,
1117 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1118 .emul = x86_emul_or
1119 },
1120
1121 /*
1122 * AND
1123 */
1124 {
1125 /* Eb, Gb */
1126 .byte = 0x20,
1127 .regmodrm = true,
1128 .regtorm = true,
1129 .szoverride = false,
1130 .defsize = OPSIZE_BYTE,
1131 .allsize = -1,
1132 .emul = x86_emul_and
1133 },
1134 {
1135 /* Ev, Gv */
1136 .byte = 0x21,
1137 .regmodrm = true,
1138 .regtorm = true,
1139 .szoverride = true,
1140 .defsize = -1,
1141 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1142 .emul = x86_emul_and
1143 },
1144 {
1145 /* Gb, Eb */
1146 .byte = 0x22,
1147 .regmodrm = true,
1148 .regtorm = false,
1149 .szoverride = false,
1150 .defsize = OPSIZE_BYTE,
1151 .allsize = -1,
1152 .emul = x86_emul_and
1153 },
1154 {
1155 /* Gv, Ev */
1156 .byte = 0x23,
1157 .regmodrm = true,
1158 .regtorm = false,
1159 .szoverride = true,
1160 .defsize = -1,
1161 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1162 .emul = x86_emul_and
1163 },
1164
1165 /*
1166 * XOR
1167 */
1168 {
1169 /* Eb, Gb */
1170 .byte = 0x30,
1171 .regmodrm = true,
1172 .regtorm = true,
1173 .szoverride = false,
1174 .defsize = OPSIZE_BYTE,
1175 .allsize = -1,
1176 .emul = x86_emul_xor
1177 },
1178 {
1179 /* Ev, Gv */
1180 .byte = 0x31,
1181 .regmodrm = true,
1182 .regtorm = true,
1183 .szoverride = true,
1184 .defsize = -1,
1185 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1186 .emul = x86_emul_xor
1187 },
1188 {
1189 /* Gb, Eb */
1190 .byte = 0x32,
1191 .regmodrm = true,
1192 .regtorm = false,
1193 .szoverride = false,
1194 .defsize = OPSIZE_BYTE,
1195 .allsize = -1,
1196 .emul = x86_emul_xor
1197 },
1198 {
1199 /* Gv, Ev */
1200 .byte = 0x33,
1201 .regmodrm = true,
1202 .regtorm = false,
1203 .szoverride = true,
1204 .defsize = -1,
1205 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1206 .emul = x86_emul_xor
1207 },
1208
1209 /*
1210 * MOV
1211 */
1212 {
1213 /* Eb, Gb */
1214 .byte = 0x88,
1215 .regmodrm = true,
1216 .regtorm = true,
1217 .szoverride = false,
1218 .defsize = OPSIZE_BYTE,
1219 .allsize = -1,
1220 .emul = x86_emul_mov
1221 },
1222 {
1223 /* Ev, Gv */
1224 .byte = 0x89,
1225 .regmodrm = true,
1226 .regtorm = true,
1227 .szoverride = true,
1228 .defsize = -1,
1229 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1230 .emul = x86_emul_mov
1231 },
1232 {
1233 /* Gb, Eb */
1234 .byte = 0x8A,
1235 .regmodrm = true,
1236 .regtorm = false,
1237 .szoverride = false,
1238 .defsize = OPSIZE_BYTE,
1239 .allsize = -1,
1240 .emul = x86_emul_mov
1241 },
1242 {
1243 /* Gv, Ev */
1244 .byte = 0x8B,
1245 .regmodrm = true,
1246 .regtorm = false,
1247 .szoverride = true,
1248 .defsize = -1,
1249 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1250 .emul = x86_emul_mov
1251 },
1252 {
1253 /* AL, Ob */
1254 .byte = 0xA0,
1255 .dmo = true,
1256 .todmo = false,
1257 .szoverride = false,
1258 .defsize = OPSIZE_BYTE,
1259 .allsize = -1,
1260 .emul = x86_emul_mov
1261 },
1262 {
1263 /* rAX, Ov */
1264 .byte = 0xA1,
1265 .dmo = true,
1266 .todmo = false,
1267 .szoverride = true,
1268 .defsize = -1,
1269 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1270 .emul = x86_emul_mov
1271 },
1272 {
1273 /* Ob, AL */
1274 .byte = 0xA2,
1275 .dmo = true,
1276 .todmo = true,
1277 .szoverride = false,
1278 .defsize = OPSIZE_BYTE,
1279 .allsize = -1,
1280 .emul = x86_emul_mov
1281 },
1282 {
1283 /* Ov, rAX */
1284 .byte = 0xA3,
1285 .dmo = true,
1286 .todmo = true,
1287 .szoverride = true,
1288 .defsize = -1,
1289 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1290 .emul = x86_emul_mov
1291 },
1292
1293 /*
1294 * MOVS
1295 */
1296 {
1297 /* Yb, Xb */
1298 .byte = 0xA4,
1299 .movs = true,
1300 .szoverride = false,
1301 .defsize = OPSIZE_BYTE,
1302 .allsize = -1,
1303 .emul = x86_emul_movs
1304 },
1305 {
1306 /* Yv, Xv */
1307 .byte = 0xA5,
1308 .movs = true,
1309 .szoverride = true,
1310 .defsize = -1,
1311 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1312 .emul = x86_emul_movs
1313 },
1314
1315 /*
1316 * STOS
1317 */
1318 {
1319 /* Yb, AL */
1320 .byte = 0xAA,
1321 .stos = true,
1322 .szoverride = false,
1323 .defsize = OPSIZE_BYTE,
1324 .allsize = -1,
1325 .emul = x86_emul_stos
1326 },
1327 {
1328 /* Yv, rAX */
1329 .byte = 0xAB,
1330 .stos = true,
1331 .szoverride = true,
1332 .defsize = -1,
1333 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1334 .emul = x86_emul_stos
1335 },
1336
1337 /*
1338 * LODS
1339 */
1340 {
1341 /* AL, Xb */
1342 .byte = 0xAC,
1343 .lods = true,
1344 .szoverride = false,
1345 .defsize = OPSIZE_BYTE,
1346 .allsize = -1,
1347 .emul = x86_emul_lods
1348 },
1349 {
1350 /* rAX, Xv */
1351 .byte = 0xAD,
1352 .lods = true,
1353 .szoverride = true,
1354 .defsize = -1,
1355 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1356 .emul = x86_emul_lods
1357 },
1358 };
1359
1360 static const struct x86_opcode secondary_opcode_table[] = {
1361 /*
1362 * MOVZX
1363 */
1364 {
1365 /* Gv, Eb */
1366 .byte = 0xB6,
1367 .regmodrm = true,
1368 .regtorm = false,
1369 .szoverride = true,
1370 .defsize = OPSIZE_BYTE,
1371 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1372 .flags = FLAG_ze,
1373 .emul = x86_emul_mov
1374 },
1375 {
1376 /* Gv, Ew */
1377 .byte = 0xB7,
1378 .regmodrm = true,
1379 .regtorm = false,
1380 .szoverride = true,
1381 .defsize = OPSIZE_WORD,
1382 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1383 .flags = FLAG_ze,
1384 .emul = x86_emul_mov
1385 },
1386 };
1387
1388 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1389
1390 /* [REX-present][enc][opsize] */
1391 static const struct x86_reg gpr_map__special[2][4][8] = {
1392 [false] = {
1393 /* No REX prefix. */
1394 [0b00] = {
1395 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1396 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1397 [2] = { -1, 0 },
1398 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1399 [4] = { -1, 0 },
1400 [5] = { -1, 0 },
1401 [6] = { -1, 0 },
1402 [7] = { -1, 0 },
1403 },
1404 [0b01] = {
1405 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1406 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1407 [2] = { -1, 0 },
1408 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1409 [4] = { -1, 0 },
1410 [5] = { -1, 0 },
1411 [6] = { -1, 0 },
1412 [7] = { -1, 0 },
1413 },
1414 [0b10] = {
1415 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1416 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1417 [2] = { -1, 0 },
1418 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1419 [4] = { -1, 0 },
1420 [5] = { -1, 0 },
1421 [6] = { -1, 0 },
1422 [7] = { -1, 0 },
1423 },
1424 [0b11] = {
1425 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1426 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1427 [2] = { -1, 0 },
1428 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1429 [4] = { -1, 0 },
1430 [5] = { -1, 0 },
1431 [6] = { -1, 0 },
1432 [7] = { -1, 0 },
1433 }
1434 },
1435 [true] = {
1436 /* Has REX prefix. */
1437 [0b00] = {
1438 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1439 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1440 [2] = { -1, 0 },
1441 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1442 [4] = { -1, 0 },
1443 [5] = { -1, 0 },
1444 [6] = { -1, 0 },
1445 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1446 },
1447 [0b01] = {
1448 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1449 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1450 [2] = { -1, 0 },
1451 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1452 [4] = { -1, 0 },
1453 [5] = { -1, 0 },
1454 [6] = { -1, 0 },
1455 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1456 },
1457 [0b10] = {
1458 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1459 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1460 [2] = { -1, 0 },
1461 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1462 [4] = { -1, 0 },
1463 [5] = { -1, 0 },
1464 [6] = { -1, 0 },
1465 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1466 },
1467 [0b11] = {
1468 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1469 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1470 [2] = { -1, 0 },
1471 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1472 [4] = { -1, 0 },
1473 [5] = { -1, 0 },
1474 [6] = { -1, 0 },
1475 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1476 }
1477 }
1478 };
1479
1480 /* [depends][enc][size] */
1481 static const struct x86_reg gpr_map[2][8][8] = {
1482 [false] = {
1483 /* Not extended. */
1484 [0b000] = {
1485 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1486 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1487 [2] = { -1, 0 },
1488 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1489 [4] = { -1, 0 },
1490 [5] = { -1, 0 },
1491 [6] = { -1, 0 },
1492 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1493 },
1494 [0b001] = {
1495 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1496 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1497 [2] = { -1, 0 },
1498 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1499 [4] = { -1, 0 },
1500 [5] = { -1, 0 },
1501 [6] = { -1, 0 },
1502 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1503 },
1504 [0b010] = {
1505 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1506 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1507 [2] = { -1, 0 },
1508 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1509 [4] = { -1, 0 },
1510 [5] = { -1, 0 },
1511 [6] = { -1, 0 },
1512 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1513 },
1514 [0b011] = {
1515 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1516 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1517 [2] = { -1, 0 },
1518 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1519 [4] = { -1, 0 },
1520 [5] = { -1, 0 },
1521 [6] = { -1, 0 },
1522 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1523 },
1524 [0b100] = {
1525 [0] = { -1, 0 }, /* SPECIAL */
1526 [1] = { -1, 0 }, /* SPECIAL */
1527 [2] = { -1, 0 },
1528 [3] = { -1, 0 }, /* SPECIAL */
1529 [4] = { -1, 0 },
1530 [5] = { -1, 0 },
1531 [6] = { -1, 0 },
1532 [7] = { -1, 0 }, /* SPECIAL */
1533 },
1534 [0b101] = {
1535 [0] = { -1, 0 }, /* SPECIAL */
1536 [1] = { -1, 0 }, /* SPECIAL */
1537 [2] = { -1, 0 },
1538 [3] = { -1, 0 }, /* SPECIAL */
1539 [4] = { -1, 0 },
1540 [5] = { -1, 0 },
1541 [6] = { -1, 0 },
1542 [7] = { -1, 0 }, /* SPECIAL */
1543 },
1544 [0b110] = {
1545 [0] = { -1, 0 }, /* SPECIAL */
1546 [1] = { -1, 0 }, /* SPECIAL */
1547 [2] = { -1, 0 },
1548 [3] = { -1, 0 }, /* SPECIAL */
1549 [4] = { -1, 0 },
1550 [5] = { -1, 0 },
1551 [6] = { -1, 0 },
1552 [7] = { -1, 0 }, /* SPECIAL */
1553 },
1554 [0b111] = {
1555 [0] = { -1, 0 }, /* SPECIAL */
1556 [1] = { -1, 0 }, /* SPECIAL */
1557 [2] = { -1, 0 },
1558 [3] = { -1, 0 }, /* SPECIAL */
1559 [4] = { -1, 0 },
1560 [5] = { -1, 0 },
1561 [6] = { -1, 0 },
1562 [7] = { -1, 0 }, /* SPECIAL */
1563 },
1564 },
1565 [true] = {
1566 /* Extended. */
1567 [0b000] = {
1568 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1569 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1570 [2] = { -1, 0 },
1571 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1572 [4] = { -1, 0 },
1573 [5] = { -1, 0 },
1574 [6] = { -1, 0 },
1575 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1576 },
1577 [0b001] = {
1578 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1579 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1580 [2] = { -1, 0 },
1581 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1582 [4] = { -1, 0 },
1583 [5] = { -1, 0 },
1584 [6] = { -1, 0 },
1585 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1586 },
1587 [0b010] = {
1588 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1589 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1590 [2] = { -1, 0 },
1591 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1592 [4] = { -1, 0 },
1593 [5] = { -1, 0 },
1594 [6] = { -1, 0 },
1595 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1596 },
1597 [0b011] = {
1598 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1599 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1600 [2] = { -1, 0 },
1601 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1602 [4] = { -1, 0 },
1603 [5] = { -1, 0 },
1604 [6] = { -1, 0 },
1605 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1606 },
1607 [0b100] = {
1608 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1609 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1610 [2] = { -1, 0 },
1611 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1612 [4] = { -1, 0 },
1613 [5] = { -1, 0 },
1614 [6] = { -1, 0 },
1615 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1616 },
1617 [0b101] = {
1618 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1619 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1620 [2] = { -1, 0 },
1621 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1622 [4] = { -1, 0 },
1623 [5] = { -1, 0 },
1624 [6] = { -1, 0 },
1625 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1626 },
1627 [0b110] = {
1628 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1629 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1630 [2] = { -1, 0 },
1631 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1632 [4] = { -1, 0 },
1633 [5] = { -1, 0 },
1634 [6] = { -1, 0 },
1635 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1636 },
1637 [0b111] = {
1638 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1639 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1640 [2] = { -1, 0 },
1641 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1642 [4] = { -1, 0 },
1643 [5] = { -1, 0 },
1644 [6] = { -1, 0 },
1645 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1646 },
1647 }
1648 };
1649
1650 static int
1651 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1652 {
1653 fsm->fn = NULL;
1654 return -1;
1655 }
1656
1657 static int
1658 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1659 {
1660 if (fsm->buf + n > fsm->end) {
1661 return -1;
1662 }
1663 memcpy(bytes, fsm->buf, n);
1664 return 0;
1665 }
1666
1667 static void
1668 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1669 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1670 {
1671 fsm->buf += n;
1672 if (fsm->buf > fsm->end) {
1673 fsm->fn = node_overflow;
1674 } else {
1675 fsm->fn = fn;
1676 }
1677 }
1678
1679 static const struct x86_reg *
1680 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1681 {
1682 enc &= 0b11;
1683 if (regsize == 8) {
1684 /* May be 64bit without REX */
1685 return &gpr_map__special[1][enc][regsize-1];
1686 }
1687 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1688 }
1689
1690 /*
1691 * Special node, for MOVS. Fake two displacements of zero on the source and
1692 * destination registers.
1693 */
1694 static int
1695 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1696 {
1697 size_t adrsize;
1698
1699 adrsize = instr->address_size;
1700
1701 /* DS:RSI */
1702 instr->src.type = STORE_REG;
1703 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1704 instr->src.disp.type = DISP_0;
1705
1706 /* ES:RDI, force ES */
1707 instr->dst.type = STORE_REG;
1708 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1709 instr->dst.disp.type = DISP_0;
1710 instr->dst.hardseg = NVMM_X64_SEG_ES;
1711
1712 fsm_advance(fsm, 0, NULL);
1713
1714 return 0;
1715 }
1716
1717 /*
1718 * Special node, for STOS and LODS. Fake a displacement of zero on the
1719 * destination register.
1720 */
1721 static int
1722 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1723 {
1724 const struct x86_opcode *opcode = instr->opcode;
1725 struct x86_store *stlo, *streg;
1726 size_t adrsize, regsize;
1727
1728 adrsize = instr->address_size;
1729 regsize = instr->operand_size;
1730
1731 if (opcode->stos) {
1732 streg = &instr->src;
1733 stlo = &instr->dst;
1734 } else {
1735 streg = &instr->dst;
1736 stlo = &instr->src;
1737 }
1738
1739 streg->type = STORE_REG;
1740 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1741
1742 stlo->type = STORE_REG;
1743 if (opcode->stos) {
1744 /* ES:RDI, force ES */
1745 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1746 stlo->hardseg = NVMM_X64_SEG_ES;
1747 } else {
1748 /* DS:RSI */
1749 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1750 }
1751 stlo->disp.type = DISP_0;
1752
1753 fsm_advance(fsm, 0, NULL);
1754
1755 return 0;
1756 }
1757
1758 static int
1759 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1760 {
1761 const struct x86_opcode *opcode = instr->opcode;
1762 struct x86_store *stdmo, *streg;
1763 size_t adrsize, regsize;
1764
1765 adrsize = instr->address_size;
1766 regsize = instr->operand_size;
1767
1768 if (opcode->todmo) {
1769 streg = &instr->src;
1770 stdmo = &instr->dst;
1771 } else {
1772 streg = &instr->dst;
1773 stdmo = &instr->src;
1774 }
1775
1776 streg->type = STORE_REG;
1777 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1778
1779 stdmo->type = STORE_DMO;
1780 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1781 return -1;
1782 }
1783 fsm_advance(fsm, adrsize, NULL);
1784
1785 return 0;
1786 }
1787
1788 static inline uint64_t
1789 sign_extend(uint64_t val, int size)
1790 {
1791 if (size == 1) {
1792 if (val & __BIT(7))
1793 val |= 0xFFFFFFFFFFFFFF00;
1794 } else if (size == 2) {
1795 if (val & __BIT(15))
1796 val |= 0xFFFFFFFFFFFF0000;
1797 } else if (size == 4) {
1798 if (val & __BIT(31))
1799 val |= 0xFFFFFFFF00000000;
1800 }
1801 return val;
1802 }
1803
1804 static int
1805 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1806 {
1807 const struct x86_opcode *opcode = instr->opcode;
1808 struct x86_store *store;
1809 uint8_t immsize;
1810 size_t sesize = 0;
1811
1812 /* The immediate is the source */
1813 store = &instr->src;
1814 immsize = instr->operand_size;
1815
1816 if (opcode->flags & FLAG_imm8) {
1817 sesize = immsize;
1818 immsize = 1;
1819 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1820 sesize = immsize;
1821 immsize = 4;
1822 }
1823
1824 store->type = STORE_IMM;
1825 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1826 return -1;
1827 }
1828 fsm_advance(fsm, immsize, NULL);
1829
1830 if (sesize != 0) {
1831 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1832 }
1833
1834 return 0;
1835 }
1836
1837 static int
1838 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1839 {
1840 const struct x86_opcode *opcode = instr->opcode;
1841 uint64_t data = 0;
1842 size_t n;
1843
1844 if (instr->strm->disp.type == DISP_1) {
1845 n = 1;
1846 } else { /* DISP4 */
1847 n = 4;
1848 }
1849
1850 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1851 return -1;
1852 }
1853
1854 if (__predict_true(fsm->is64bit)) {
1855 data = sign_extend(data, n);
1856 }
1857
1858 instr->strm->disp.data = data;
1859
1860 if (opcode->immediate) {
1861 fsm_advance(fsm, n, node_immediate);
1862 } else {
1863 fsm_advance(fsm, n, NULL);
1864 }
1865
1866 return 0;
1867 }
1868
1869 static const struct x86_reg *
1870 get_register_idx(struct x86_instr *instr, uint8_t index)
1871 {
1872 uint8_t enc = index;
1873 const struct x86_reg *reg;
1874 size_t regsize;
1875
1876 regsize = instr->address_size;
1877 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
1878
1879 if (reg->num == -1) {
1880 reg = resolve_special_register(instr, enc, regsize);
1881 }
1882
1883 return reg;
1884 }
1885
1886 static const struct x86_reg *
1887 get_register_bas(struct x86_instr *instr, uint8_t base)
1888 {
1889 uint8_t enc = base;
1890 const struct x86_reg *reg;
1891 size_t regsize;
1892
1893 regsize = instr->address_size;
1894 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1895 if (reg->num == -1) {
1896 reg = resolve_special_register(instr, enc, regsize);
1897 }
1898
1899 return reg;
1900 }
1901
1902 static int
1903 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1904 {
1905 const struct x86_opcode *opcode;
1906 uint8_t scale, index, base;
1907 bool noindex, nobase;
1908 uint8_t byte;
1909
1910 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
1911 return -1;
1912 }
1913
1914 scale = ((byte & 0b11000000) >> 6);
1915 index = ((byte & 0b00111000) >> 3);
1916 base = ((byte & 0b00000111) >> 0);
1917
1918 opcode = instr->opcode;
1919
1920 noindex = false;
1921 nobase = false;
1922
1923 if (index == 0b100 && !instr->rexpref.x) {
1924 /* Special case: the index is null */
1925 noindex = true;
1926 }
1927
1928 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
1929 /* Special case: the base is null + disp32 */
1930 instr->strm->disp.type = DISP_4;
1931 nobase = true;
1932 }
1933
1934 instr->strm->type = STORE_SIB;
1935 instr->strm->u.sib.scale = (1 << scale);
1936 if (!noindex)
1937 instr->strm->u.sib.idx = get_register_idx(instr, index);
1938 if (!nobase)
1939 instr->strm->u.sib.bas = get_register_bas(instr, base);
1940
1941 /* May have a displacement, or an immediate */
1942 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
1943 fsm_advance(fsm, 1, node_disp);
1944 } else if (opcode->immediate) {
1945 fsm_advance(fsm, 1, node_immediate);
1946 } else {
1947 fsm_advance(fsm, 1, NULL);
1948 }
1949
1950 return 0;
1951 }
1952
1953 static const struct x86_reg *
1954 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
1955 {
1956 uint8_t enc = instr->regmodrm.reg;
1957 const struct x86_reg *reg;
1958 size_t regsize;
1959
1960 regsize = instr->operand_size;
1961
1962 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
1963 if (reg->num == -1) {
1964 reg = resolve_special_register(instr, enc, regsize);
1965 }
1966
1967 return reg;
1968 }
1969
1970 static const struct x86_reg *
1971 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
1972 {
1973 uint8_t enc = instr->regmodrm.rm;
1974 const struct x86_reg *reg;
1975 size_t regsize;
1976
1977 if (instr->strm->disp.type == DISP_NONE) {
1978 regsize = instr->operand_size;
1979 } else {
1980 /* Indirect access, the size is that of the address. */
1981 regsize = instr->address_size;
1982 }
1983
1984 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1985 if (reg->num == -1) {
1986 reg = resolve_special_register(instr, enc, regsize);
1987 }
1988
1989 return reg;
1990 }
1991
1992 static inline bool
1993 has_sib(struct x86_instr *instr)
1994 {
1995 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
1996 }
1997
1998 static inline bool
1999 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2000 {
2001 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2002 instr->regmodrm.rm == RM_RBP_DISP32);
2003 }
2004
2005 static inline bool
2006 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2007 {
2008 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2009 instr->regmodrm.rm == RM_RBP_DISP32);
2010 }
2011
2012 static enum x86_disp_type
2013 get_disp_type(struct x86_instr *instr)
2014 {
2015 switch (instr->regmodrm.mod) {
2016 case MOD_DIS0: /* indirect */
2017 return DISP_0;
2018 case MOD_DIS1: /* indirect+1 */
2019 return DISP_1;
2020 case MOD_DIS4: /* indirect+4 */
2021 return DISP_4;
2022 case MOD_REG: /* direct */
2023 default: /* gcc */
2024 return DISP_NONE;
2025 }
2026 }
2027
2028 static int
2029 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2030 {
2031 struct x86_store *strg, *strm;
2032 const struct x86_opcode *opcode;
2033 const struct x86_reg *reg;
2034 uint8_t byte;
2035
2036 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2037 return -1;
2038 }
2039
2040 opcode = instr->opcode;
2041
2042 instr->regmodrm.present = true;
2043 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2044 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2045 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2046
2047 if (opcode->regtorm) {
2048 strg = &instr->src;
2049 strm = &instr->dst;
2050 } else { /* RM to REG */
2051 strm = &instr->src;
2052 strg = &instr->dst;
2053 }
2054
2055 /* Save for later use. */
2056 instr->strm = strm;
2057
2058 /*
2059 * Special cases: Groups. The REG field of REGMODRM is the index in
2060 * the group. op1 gets overwritten in the Immediate node, if any.
2061 */
2062 if (opcode->group1) {
2063 if (group1[instr->regmodrm.reg].emul == NULL) {
2064 return -1;
2065 }
2066 instr->emul = group1[instr->regmodrm.reg].emul;
2067 } else if (opcode->group11) {
2068 if (group11[instr->regmodrm.reg].emul == NULL) {
2069 return -1;
2070 }
2071 instr->emul = group11[instr->regmodrm.reg].emul;
2072 }
2073
2074 if (!opcode->immediate) {
2075 reg = get_register_reg(instr, opcode);
2076 if (reg == NULL) {
2077 return -1;
2078 }
2079 strg->type = STORE_REG;
2080 strg->u.reg = reg;
2081 }
2082
2083 if (has_sib(instr)) {
2084 /* Overwrites RM */
2085 fsm_advance(fsm, 1, node_sib);
2086 return 0;
2087 }
2088
2089 /* The displacement applies to RM. */
2090 strm->disp.type = get_disp_type(instr);
2091
2092 if (is_rip_relative(fsm, instr)) {
2093 /* Overwrites RM */
2094 strm->type = STORE_REG;
2095 strm->u.reg = &gpr_map__rip;
2096 strm->disp.type = DISP_4;
2097 fsm_advance(fsm, 1, node_disp);
2098 return 0;
2099 }
2100
2101 if (is_disp32_only(fsm, instr)) {
2102 /* Overwrites RM */
2103 strm->type = STORE_REG;
2104 strm->u.reg = NULL;
2105 strm->disp.type = DISP_4;
2106 fsm_advance(fsm, 1, node_disp);
2107 return 0;
2108 }
2109
2110 reg = get_register_rm(instr, opcode);
2111 if (reg == NULL) {
2112 return -1;
2113 }
2114 strm->type = STORE_REG;
2115 strm->u.reg = reg;
2116
2117 if (strm->disp.type == DISP_NONE) {
2118 /* Direct register addressing mode */
2119 if (opcode->immediate) {
2120 fsm_advance(fsm, 1, node_immediate);
2121 } else {
2122 fsm_advance(fsm, 1, NULL);
2123 }
2124 } else if (strm->disp.type == DISP_0) {
2125 /* Indirect register addressing mode */
2126 if (opcode->immediate) {
2127 fsm_advance(fsm, 1, node_immediate);
2128 } else {
2129 fsm_advance(fsm, 1, NULL);
2130 }
2131 } else {
2132 fsm_advance(fsm, 1, node_disp);
2133 }
2134
2135 return 0;
2136 }
2137
2138 static size_t
2139 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2140 {
2141 const struct x86_opcode *opcode = instr->opcode;
2142 int opsize;
2143
2144 /* Get the opsize */
2145 if (!opcode->szoverride) {
2146 opsize = opcode->defsize;
2147 } else if (instr->rexpref.present && instr->rexpref.w) {
2148 opsize = 8;
2149 } else {
2150 if (!fsm->is16bit) {
2151 if (instr->legpref.opr_ovr) {
2152 opsize = 2;
2153 } else {
2154 opsize = 4;
2155 }
2156 } else { /* 16bit */
2157 if (instr->legpref.opr_ovr) {
2158 opsize = 4;
2159 } else {
2160 opsize = 2;
2161 }
2162 }
2163 }
2164
2165 /* See if available */
2166 if ((opcode->allsize & opsize) == 0) {
2167 // XXX do we care?
2168 }
2169
2170 return opsize;
2171 }
2172
2173 static size_t
2174 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2175 {
2176 if (fsm->is64bit) {
2177 if (__predict_false(instr->legpref.adr_ovr)) {
2178 return 4;
2179 }
2180 return 8;
2181 }
2182
2183 if (fsm->is32bit) {
2184 if (__predict_false(instr->legpref.adr_ovr)) {
2185 return 2;
2186 }
2187 return 4;
2188 }
2189
2190 /* 16bit. */
2191 if (__predict_false(instr->legpref.adr_ovr)) {
2192 return 4;
2193 }
2194 return 2;
2195 }
2196
2197 static int
2198 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2199 {
2200 const struct x86_opcode *opcode;
2201 uint8_t byte;
2202 size_t i, n;
2203
2204 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2205 return -1;
2206 }
2207
2208 n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
2209 for (i = 0; i < n; i++) {
2210 if (primary_opcode_table[i].byte == byte)
2211 break;
2212 }
2213 if (i == n) {
2214 return -1;
2215 }
2216 opcode = &primary_opcode_table[i];
2217
2218 instr->opcode = opcode;
2219 instr->emul = opcode->emul;
2220 instr->operand_size = get_operand_size(fsm, instr);
2221 instr->address_size = get_address_size(fsm, instr);
2222
2223 if (fsm->is64bit && (instr->operand_size == 4)) {
2224 /* Zero-extend to 64 bits. */
2225 instr->zeroextend_mask = ~size_to_mask(4);
2226 }
2227
2228 if (opcode->regmodrm) {
2229 fsm_advance(fsm, 1, node_regmodrm);
2230 } else if (opcode->dmo) {
2231 /* Direct-Memory Offsets */
2232 fsm_advance(fsm, 1, node_dmo);
2233 } else if (opcode->stos || opcode->lods) {
2234 fsm_advance(fsm, 1, node_stlo);
2235 } else if (opcode->movs) {
2236 fsm_advance(fsm, 1, node_movs);
2237 } else {
2238 return -1;
2239 }
2240
2241 return 0;
2242 }
2243
2244 static int
2245 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2246 {
2247 const struct x86_opcode *opcode;
2248 uint8_t byte;
2249 size_t i, n;
2250
2251 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2252 return -1;
2253 }
2254
2255 n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
2256 for (i = 0; i < n; i++) {
2257 if (secondary_opcode_table[i].byte == byte)
2258 break;
2259 }
2260 if (i == n) {
2261 return -1;
2262 }
2263 opcode = &secondary_opcode_table[i];
2264
2265 instr->opcode = opcode;
2266 instr->emul = opcode->emul;
2267 instr->operand_size = get_operand_size(fsm, instr);
2268 instr->address_size = get_address_size(fsm, instr);
2269
2270 if (fsm->is64bit && (instr->operand_size == 4)) {
2271 /* Zero-extend to 64 bits. */
2272 instr->zeroextend_mask = ~size_to_mask(4);
2273 }
2274
2275 if (opcode->flags & FLAG_ze) {
2276 /*
2277 * Compute the mask for zero-extend. Update the operand size,
2278 * we move fewer bytes.
2279 */
2280 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2281 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2282 instr->operand_size = opcode->defsize;
2283 }
2284
2285 if (opcode->regmodrm) {
2286 fsm_advance(fsm, 1, node_regmodrm);
2287 } else {
2288 return -1;
2289 }
2290
2291 return 0;
2292 }
2293
2294 static int
2295 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2296 {
2297 uint8_t byte;
2298
2299 #define ESCAPE 0x0F
2300 #define VEX_1 0xC5
2301 #define VEX_2 0xC4
2302 #define XOP 0x8F
2303
2304 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2305 return -1;
2306 }
2307
2308 /*
2309 * We don't take XOP. It is AMD-specific, and it was removed shortly
2310 * after being introduced.
2311 */
2312 if (byte == ESCAPE) {
2313 fsm_advance(fsm, 1, node_secondary_opcode);
2314 } else if (!instr->rexpref.present) {
2315 if (byte == VEX_1) {
2316 return -1;
2317 } else if (byte == VEX_2) {
2318 return -1;
2319 } else {
2320 fsm->fn = node_primary_opcode;
2321 }
2322 } else {
2323 fsm->fn = node_primary_opcode;
2324 }
2325
2326 return 0;
2327 }
2328
2329 static int
2330 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2331 {
2332 struct x86_rexpref *rexpref = &instr->rexpref;
2333 uint8_t byte;
2334 size_t n = 0;
2335
2336 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2337 return -1;
2338 }
2339
2340 if (byte >= 0x40 && byte <= 0x4F) {
2341 if (__predict_false(!fsm->is64bit)) {
2342 return -1;
2343 }
2344 rexpref->present = true;
2345 rexpref->w = ((byte & 0x8) != 0);
2346 rexpref->r = ((byte & 0x4) != 0);
2347 rexpref->x = ((byte & 0x2) != 0);
2348 rexpref->b = ((byte & 0x1) != 0);
2349 n = 1;
2350 }
2351
2352 fsm_advance(fsm, n, node_main);
2353 return 0;
2354 }
2355
2356 static int
2357 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2358 {
2359 uint8_t byte;
2360
2361 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2362 return -1;
2363 }
2364
2365 if (byte == LEG_OPR_OVR) {
2366 instr->legpref.opr_ovr = 1;
2367 } else if (byte == LEG_OVR_DS) {
2368 instr->legpref.seg = NVMM_X64_SEG_DS;
2369 } else if (byte == LEG_OVR_ES) {
2370 instr->legpref.seg = NVMM_X64_SEG_ES;
2371 } else if (byte == LEG_REP) {
2372 instr->legpref.rep = 1;
2373 } else if (byte == LEG_OVR_GS) {
2374 instr->legpref.seg = NVMM_X64_SEG_GS;
2375 } else if (byte == LEG_OVR_FS) {
2376 instr->legpref.seg = NVMM_X64_SEG_FS;
2377 } else if (byte == LEG_ADR_OVR) {
2378 instr->legpref.adr_ovr = 1;
2379 } else if (byte == LEG_OVR_CS) {
2380 instr->legpref.seg = NVMM_X64_SEG_CS;
2381 } else if (byte == LEG_OVR_SS) {
2382 instr->legpref.seg = NVMM_X64_SEG_SS;
2383 } else if (byte == LEG_REPN) {
2384 instr->legpref.repn = 1;
2385 } else if (byte == LEG_LOCK) {
2386 /* ignore */
2387 } else {
2388 /* not a legacy prefix */
2389 fsm_advance(fsm, 0, node_rex_prefix);
2390 return 0;
2391 }
2392
2393 fsm_advance(fsm, 1, node_legacy_prefix);
2394 return 0;
2395 }
2396
2397 static int
2398 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2399 struct nvmm_x64_state *state)
2400 {
2401 struct x86_decode_fsm fsm;
2402 int ret;
2403
2404 memset(instr, 0, sizeof(*instr));
2405 instr->legpref.seg = -1;
2406
2407 fsm.is64bit = is_64bit(state);
2408 fsm.is32bit = is_32bit(state);
2409 fsm.is16bit = is_16bit(state);
2410
2411 fsm.fn = node_legacy_prefix;
2412 fsm.buf = inst_bytes;
2413 fsm.end = inst_bytes + inst_len;
2414
2415 while (fsm.fn != NULL) {
2416 ret = (*fsm.fn)(&fsm, instr);
2417 if (ret == -1)
2418 return -1;
2419 }
2420
2421 instr->len = fsm.buf - inst_bytes;
2422
2423 return 0;
2424 }
2425
2426 /* -------------------------------------------------------------------------- */
2427
2428 static inline uint8_t
2429 compute_parity(uint8_t *data)
2430 {
2431 uint64_t *ptr = (uint64_t *)data;
2432 uint64_t val = *ptr;
2433
2434 val ^= val >> 32;
2435 val ^= val >> 16;
2436 val ^= val >> 8;
2437 val ^= val >> 4;
2438 val ^= val >> 2;
2439 val ^= val >> 1;
2440 return (~val) & 1;
2441 }
2442
2443 static void
2444 x86_emul_or(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2445 uint64_t *gprs)
2446 {
2447 const bool write = mem->write;
2448 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2449 uint8_t data[8];
2450 size_t i;
2451
2452 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2453
2454 memcpy(data, mem->data, sizeof(data));
2455
2456 /* Fetch the value to be OR'ed. */
2457 mem->write = false;
2458 (*cb)(mem);
2459
2460 /* Perform the OR. */
2461 for (i = 0; i < mem->size; i++) {
2462 mem->data[i] |= data[i];
2463 if (mem->data[i] != 0)
2464 fl |= PSL_Z;
2465 }
2466 if (mem->data[mem->size-1] & __BIT(7))
2467 fl |= PSL_N;
2468 if (compute_parity(mem->data))
2469 fl |= PSL_PF;
2470
2471 if (write) {
2472 /* Write back the result. */
2473 mem->write = true;
2474 (*cb)(mem);
2475 }
2476
2477 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2478 }
2479
2480 static void
2481 x86_emul_and(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2482 uint64_t *gprs)
2483 {
2484 const bool write = mem->write;
2485 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2486 uint8_t data[8];
2487 size_t i;
2488
2489 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2490
2491 memcpy(data, mem->data, sizeof(data));
2492
2493 /* Fetch the value to be AND'ed. */
2494 mem->write = false;
2495 (*cb)(mem);
2496
2497 /* Perform the AND. */
2498 for (i = 0; i < mem->size; i++) {
2499 mem->data[i] &= data[i];
2500 if (mem->data[i] != 0)
2501 fl |= PSL_Z;
2502 }
2503 if (mem->data[mem->size-1] & __BIT(7))
2504 fl |= PSL_N;
2505 if (compute_parity(mem->data))
2506 fl |= PSL_PF;
2507
2508 if (write) {
2509 /* Write back the result. */
2510 mem->write = true;
2511 (*cb)(mem);
2512 }
2513
2514 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2515 }
2516
2517 static void
2518 x86_emul_xor(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2519 uint64_t *gprs)
2520 {
2521 const bool write = mem->write;
2522 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2523 uint8_t data[8];
2524 size_t i;
2525
2526 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2527
2528 memcpy(data, mem->data, sizeof(data));
2529
2530 /* Fetch the value to be XOR'ed. */
2531 mem->write = false;
2532 (*cb)(mem);
2533
2534 /* Perform the XOR. */
2535 for (i = 0; i < mem->size; i++) {
2536 mem->data[i] ^= data[i];
2537 if (mem->data[i] != 0)
2538 fl |= PSL_Z;
2539 }
2540 if (mem->data[mem->size-1] & __BIT(7))
2541 fl |= PSL_N;
2542 if (compute_parity(mem->data))
2543 fl |= PSL_PF;
2544
2545 if (write) {
2546 /* Write back the result. */
2547 mem->write = true;
2548 (*cb)(mem);
2549 }
2550
2551 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2552 }
2553
2554 static void
2555 x86_emul_mov(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2556 uint64_t *gprs)
2557 {
2558 /*
2559 * Nothing special, just move without emulation.
2560 */
2561 (*cb)(mem);
2562 }
2563
2564 static void
2565 x86_emul_stos(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2566 uint64_t *gprs)
2567 {
2568 /*
2569 * Just move, and update RDI.
2570 */
2571 (*cb)(mem);
2572
2573 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2574 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2575 } else {
2576 gprs[NVMM_X64_GPR_RDI] += mem->size;
2577 }
2578 }
2579
2580 static void
2581 x86_emul_lods(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2582 uint64_t *gprs)
2583 {
2584 /*
2585 * Just move, and update RSI.
2586 */
2587 (*cb)(mem);
2588
2589 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2590 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2591 } else {
2592 gprs[NVMM_X64_GPR_RSI] += mem->size;
2593 }
2594 }
2595
2596 static void
2597 x86_emul_movs(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2598 uint64_t *gprs)
2599 {
2600 /*
2601 * Special instruction: double memory operand. Don't call the cb,
2602 * because the storage has already been performed earlier.
2603 */
2604
2605 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2606 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2607 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2608 } else {
2609 gprs[NVMM_X64_GPR_RSI] += mem->size;
2610 gprs[NVMM_X64_GPR_RDI] += mem->size;
2611 }
2612 }
2613
2614 /* -------------------------------------------------------------------------- */
2615
2616 static inline uint64_t
2617 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2618 {
2619 uint64_t val;
2620
2621 val = state->gprs[gpr];
2622 val &= size_to_mask(instr->address_size);
2623
2624 return val;
2625 }
2626
2627 static int
2628 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2629 struct x86_store *store, gvaddr_t *gvap, size_t size)
2630 {
2631 struct x86_sib *sib;
2632 gvaddr_t gva = 0;
2633 uint64_t reg;
2634 int ret, seg;
2635
2636 if (store->type == STORE_SIB) {
2637 sib = &store->u.sib;
2638 if (sib->bas != NULL)
2639 gva += gpr_read_address(instr, state, sib->bas->num);
2640 if (sib->idx != NULL) {
2641 reg = gpr_read_address(instr, state, sib->idx->num);
2642 gva += sib->scale * reg;
2643 }
2644 } else if (store->type == STORE_REG) {
2645 if (store->u.reg == NULL) {
2646 /* The base is null. Happens with disp32-only. */
2647 } else {
2648 gva = gpr_read_address(instr, state, store->u.reg->num);
2649 }
2650 } else {
2651 gva = store->u.dmo;
2652 }
2653
2654 if (store->disp.type != DISP_NONE) {
2655 gva += store->disp.data;
2656 }
2657
2658 if (store->hardseg != 0) {
2659 seg = store->hardseg;
2660 } else {
2661 if (__predict_false(instr->legpref.seg != -1)) {
2662 seg = instr->legpref.seg;
2663 } else {
2664 seg = NVMM_X64_SEG_DS;
2665 }
2666 }
2667
2668 if (__predict_true(is_long_mode(state))) {
2669 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2670 segment_apply(&state->segs[seg], &gva);
2671 }
2672 } else {
2673 ret = segment_check(&state->segs[seg], gva, size);
2674 if (ret == -1)
2675 return -1;
2676 segment_apply(&state->segs[seg], &gva);
2677 }
2678
2679 *gvap = gva;
2680 return 0;
2681 }
2682
2683 static int
2684 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2685 {
2686 uint8_t inst_bytes[15], byte;
2687 size_t i, fetchsize;
2688 gvaddr_t gva;
2689 int ret, seg;
2690
2691 fetchsize = sizeof(inst_bytes);
2692
2693 gva = state->gprs[NVMM_X64_GPR_RIP];
2694 if (__predict_false(!is_long_mode(state))) {
2695 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2696 fetchsize);
2697 if (ret == -1)
2698 return -1;
2699 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2700 }
2701
2702 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2703 if (ret == -1)
2704 return -1;
2705
2706 seg = NVMM_X64_SEG_DS;
2707 for (i = 0; i < fetchsize; i++) {
2708 byte = inst_bytes[i];
2709
2710 if (byte == LEG_OVR_DS) {
2711 seg = NVMM_X64_SEG_DS;
2712 } else if (byte == LEG_OVR_ES) {
2713 seg = NVMM_X64_SEG_ES;
2714 } else if (byte == LEG_OVR_GS) {
2715 seg = NVMM_X64_SEG_GS;
2716 } else if (byte == LEG_OVR_FS) {
2717 seg = NVMM_X64_SEG_FS;
2718 } else if (byte == LEG_OVR_CS) {
2719 seg = NVMM_X64_SEG_CS;
2720 } else if (byte == LEG_OVR_SS) {
2721 seg = NVMM_X64_SEG_SS;
2722 } else if (byte == LEG_OPR_OVR) {
2723 /* nothing */
2724 } else if (byte == LEG_ADR_OVR) {
2725 /* nothing */
2726 } else if (byte == LEG_REP) {
2727 /* nothing */
2728 } else if (byte == LEG_REPN) {
2729 /* nothing */
2730 } else if (byte == LEG_LOCK) {
2731 /* nothing */
2732 } else {
2733 return seg;
2734 }
2735 }
2736
2737 return seg;
2738 }
2739
2740 static int
2741 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2742 struct nvmm_exit *exit)
2743 {
2744 size_t fetchsize;
2745 gvaddr_t gva;
2746 int ret;
2747
2748 fetchsize = sizeof(exit->u.mem.inst_bytes);
2749
2750 gva = state->gprs[NVMM_X64_GPR_RIP];
2751 if (__predict_false(!is_long_mode(state))) {
2752 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2753 fetchsize);
2754 if (ret == -1)
2755 return -1;
2756 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2757 }
2758
2759 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
2760 fetchsize);
2761 if (ret == -1)
2762 return -1;
2763
2764 exit->u.mem.inst_len = fetchsize;
2765
2766 return 0;
2767 }
2768
2769 static int
2770 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2771 struct x86_instr *instr)
2772 {
2773 struct nvmm_mem mem;
2774 uint8_t data[8];
2775 gvaddr_t gva;
2776 size_t size;
2777 int ret;
2778
2779 size = instr->operand_size;
2780
2781 /* Source. */
2782 ret = store_to_gva(state, instr, &instr->src, &gva, size);
2783 if (ret == -1)
2784 return -1;
2785 ret = read_guest_memory(mach, state, gva, data, size);
2786 if (ret == -1)
2787 return -1;
2788
2789 /* Destination. */
2790 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
2791 if (ret == -1)
2792 return -1;
2793 ret = write_guest_memory(mach, state, gva, data, size);
2794 if (ret == -1)
2795 return -1;
2796
2797 mem.size = size;
2798 (*instr->emul)(&mem, NULL, state->gprs);
2799
2800 return 0;
2801 }
2802
2803 #define DISASSEMBLER_BUG() \
2804 do { \
2805 errno = EINVAL; \
2806 return -1; \
2807 } while (0);
2808
2809 static int
2810 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2811 struct x86_instr *instr, struct nvmm_exit *exit)
2812 {
2813 struct nvmm_mem mem;
2814 uint8_t membuf[8];
2815 uint64_t val;
2816
2817 memset(membuf, 0, sizeof(membuf));
2818
2819 mem.gpa = exit->u.mem.gpa;
2820 mem.size = instr->operand_size;
2821 mem.data = membuf;
2822
2823 /* Determine the direction. */
2824 switch (instr->src.type) {
2825 case STORE_REG:
2826 if (instr->src.disp.type != DISP_NONE) {
2827 /* Indirect access. */
2828 mem.write = false;
2829 } else {
2830 /* Direct access. */
2831 mem.write = true;
2832 }
2833 break;
2834 case STORE_IMM:
2835 mem.write = true;
2836 break;
2837 case STORE_SIB:
2838 mem.write = false;
2839 break;
2840 case STORE_DMO:
2841 mem.write = false;
2842 break;
2843 default:
2844 DISASSEMBLER_BUG();
2845 }
2846
2847 if (mem.write) {
2848 switch (instr->src.type) {
2849 case STORE_REG:
2850 if (instr->src.disp.type != DISP_NONE) {
2851 DISASSEMBLER_BUG();
2852 }
2853 val = state->gprs[instr->src.u.reg->num];
2854 val = __SHIFTOUT(val, instr->src.u.reg->mask);
2855 memcpy(mem.data, &val, mem.size);
2856 break;
2857 case STORE_IMM:
2858 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
2859 break;
2860 default:
2861 DISASSEMBLER_BUG();
2862 }
2863 }
2864
2865 (*instr->emul)(&mem, __callbacks.mem, state->gprs);
2866
2867 if (!mem.write) {
2868 if (instr->dst.type != STORE_REG) {
2869 DISASSEMBLER_BUG();
2870 }
2871 memcpy(&val, mem.data, sizeof(uint64_t));
2872 val = __SHIFTIN(val, instr->dst.u.reg->mask);
2873 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
2874 state->gprs[instr->dst.u.reg->num] |= val;
2875 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
2876 }
2877
2878 return 0;
2879 }
2880
2881 int
2882 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
2883 struct nvmm_exit *exit)
2884 {
2885 struct nvmm_x64_state state;
2886 struct x86_instr instr;
2887 uint64_t cnt = 0; /* GCC */
2888 int ret;
2889
2890 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
2891 errno = EINVAL;
2892 return -1;
2893 }
2894
2895 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
2896 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
2897 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
2898 if (ret == -1)
2899 return -1;
2900
2901 if (exit->u.mem.inst_len == 0) {
2902 /*
2903 * The instruction was not fetched from the kernel. Fetch
2904 * it ourselves.
2905 */
2906 ret = fetch_instruction(mach, &state, exit);
2907 if (ret == -1)
2908 return -1;
2909 }
2910
2911 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
2912 &instr, &state);
2913 if (ret == -1) {
2914 errno = ENODEV;
2915 return -1;
2916 }
2917
2918 if (instr.legpref.rep || instr.legpref.repn) {
2919 cnt = rep_get_cnt(&state, instr.address_size);
2920 if (__predict_false(cnt == 0)) {
2921 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2922 goto out;
2923 }
2924 }
2925
2926 if (instr.opcode->movs) {
2927 ret = assist_mem_double(mach, &state, &instr);
2928 } else {
2929 ret = assist_mem_single(mach, &state, &instr, exit);
2930 }
2931 if (ret == -1) {
2932 errno = ENODEV;
2933 return -1;
2934 }
2935
2936 if (instr.legpref.rep || instr.legpref.repn) {
2937 cnt -= 1;
2938 rep_set_cnt(&state, instr.address_size, cnt);
2939 if (cnt == 0) {
2940 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2941 } else if (__predict_false(instr.legpref.repn)) {
2942 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
2943 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2944 }
2945 }
2946 } else {
2947 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2948 }
2949
2950 out:
2951 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
2952 if (ret == -1)
2953 return -1;
2954
2955 return 0;
2956 }
2957