libnvmm_x86.c revision 1.14 1 /* $NetBSD: libnvmm_x86.c,v 1.14 2019/01/08 07:34:22 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49
50 #include <x86/specialreg.h>
51
52 extern struct nvmm_callbacks __callbacks;
53
54 /* -------------------------------------------------------------------------- */
55
56 /*
57 * Undocumented debugging function. Helpful.
58 */
59 int
60 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
61 {
62 struct nvmm_x64_state state;
63 size_t i;
64 int ret;
65
66 const char *segnames[] = {
67 "CS", "DS", "ES", "FS", "GS", "SS", "GDT", "IDT", "LDT", "TR"
68 };
69
70 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
71 if (ret == -1)
72 return -1;
73
74 printf("+ VCPU id=%d\n", (int)cpuid);
75 printf("| -> RIP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RIP]);
76 printf("| -> RSP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RSP]);
77 printf("| -> RAX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RAX]);
78 printf("| -> RBX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RBX]);
79 printf("| -> RCX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RCX]);
80 for (i = 0; i < NVMM_X64_NSEG; i++) {
81 printf("| -> %s: sel=0x%lx base=%p, limit=%p, P=%d, D=%d\n",
82 segnames[i],
83 state.segs[i].selector,
84 (void *)state.segs[i].base,
85 (void *)state.segs[i].limit,
86 state.segs[i].attrib.p, state.segs[i].attrib.def32);
87 }
88 printf("| -> MSR_EFER=%p\n", (void *)state.msrs[NVMM_X64_MSR_EFER]);
89 printf("| -> CR0=%p\n", (void *)state.crs[NVMM_X64_CR_CR0]);
90 printf("| -> CR3=%p\n", (void *)state.crs[NVMM_X64_CR_CR3]);
91 printf("| -> CR4=%p\n", (void *)state.crs[NVMM_X64_CR_CR4]);
92 printf("| -> CR8=%p\n", (void *)state.crs[NVMM_X64_CR_CR8]);
93 printf("| -> CPL=%p\n", (void *)state.misc[NVMM_X64_MISC_CPL]);
94
95 return 0;
96 }
97
98 /* -------------------------------------------------------------------------- */
99
100 #define PTE32_L1_SHIFT 12
101 #define PTE32_L2_SHIFT 22
102
103 #define PTE32_L2_MASK 0xffc00000
104 #define PTE32_L1_MASK 0x003ff000
105
106 #define PTE32_L2_FRAME (PTE32_L2_MASK)
107 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
108
109 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
110 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
111
112 typedef uint32_t pte_32bit_t;
113
114 static int
115 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
116 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
117 {
118 gpaddr_t L2gpa, L1gpa;
119 uintptr_t L2hva, L1hva;
120 pte_32bit_t *pdir, pte;
121
122 /* We begin with an RWXU access. */
123 *prot = NVMM_PROT_ALL;
124
125 /* Parse L2. */
126 L2gpa = (cr3 & PG_FRAME);
127 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
128 return -1;
129 pdir = (pte_32bit_t *)L2hva;
130 pte = pdir[pte32_l2idx(gva)];
131 if ((pte & PG_V) == 0)
132 return -1;
133 if ((pte & PG_u) == 0)
134 *prot &= ~NVMM_PROT_USER;
135 if ((pte & PG_KW) == 0)
136 *prot &= ~NVMM_PROT_WRITE;
137 if ((pte & PG_PS) && !has_pse)
138 return -1;
139 if (pte & PG_PS) {
140 *gpa = (pte & PTE32_L2_FRAME);
141 *gpa = *gpa + (gva & PTE32_L1_MASK);
142 return 0;
143 }
144
145 /* Parse L1. */
146 L1gpa = (pte & PG_FRAME);
147 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
148 return -1;
149 pdir = (pte_32bit_t *)L1hva;
150 pte = pdir[pte32_l1idx(gva)];
151 if ((pte & PG_V) == 0)
152 return -1;
153 if ((pte & PG_u) == 0)
154 *prot &= ~NVMM_PROT_USER;
155 if ((pte & PG_KW) == 0)
156 *prot &= ~NVMM_PROT_WRITE;
157 if (pte & PG_PS)
158 return -1;
159
160 *gpa = (pte & PG_FRAME);
161 return 0;
162 }
163
164 /* -------------------------------------------------------------------------- */
165
166 #define PTE32_PAE_L1_SHIFT 12
167 #define PTE32_PAE_L2_SHIFT 21
168 #define PTE32_PAE_L3_SHIFT 30
169
170 #define PTE32_PAE_L3_MASK 0xc0000000
171 #define PTE32_PAE_L2_MASK 0x3fe00000
172 #define PTE32_PAE_L1_MASK 0x001ff000
173
174 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
175 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
176 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
177
178 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
179 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
180 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
181
182 typedef uint64_t pte_32bit_pae_t;
183
184 static int
185 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
186 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
187 {
188 gpaddr_t L3gpa, L2gpa, L1gpa;
189 uintptr_t L3hva, L2hva, L1hva;
190 pte_32bit_pae_t *pdir, pte;
191
192 /* We begin with an RWXU access. */
193 *prot = NVMM_PROT_ALL;
194
195 /* Parse L3. */
196 L3gpa = (cr3 & PG_FRAME);
197 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
198 return -1;
199 pdir = (pte_32bit_pae_t *)L3hva;
200 pte = pdir[pte32_pae_l3idx(gva)];
201 if ((pte & PG_V) == 0)
202 return -1;
203 if (pte & PG_NX)
204 *prot &= ~NVMM_PROT_EXEC;
205 if (pte & PG_PS)
206 return -1;
207
208 /* Parse L2. */
209 L2gpa = (pte & PG_FRAME);
210 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
211 return -1;
212 pdir = (pte_32bit_pae_t *)L2hva;
213 pte = pdir[pte32_pae_l2idx(gva)];
214 if ((pte & PG_V) == 0)
215 return -1;
216 if ((pte & PG_u) == 0)
217 *prot &= ~NVMM_PROT_USER;
218 if ((pte & PG_KW) == 0)
219 *prot &= ~NVMM_PROT_WRITE;
220 if (pte & PG_NX)
221 *prot &= ~NVMM_PROT_EXEC;
222 if ((pte & PG_PS) && !has_pse)
223 return -1;
224 if (pte & PG_PS) {
225 *gpa = (pte & PTE32_PAE_L2_FRAME);
226 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
227 return 0;
228 }
229
230 /* Parse L1. */
231 L1gpa = (pte & PG_FRAME);
232 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
233 return -1;
234 pdir = (pte_32bit_pae_t *)L1hva;
235 pte = pdir[pte32_pae_l1idx(gva)];
236 if ((pte & PG_V) == 0)
237 return -1;
238 if ((pte & PG_u) == 0)
239 *prot &= ~NVMM_PROT_USER;
240 if ((pte & PG_KW) == 0)
241 *prot &= ~NVMM_PROT_WRITE;
242 if (pte & PG_NX)
243 *prot &= ~NVMM_PROT_EXEC;
244 if (pte & PG_PS)
245 return -1;
246
247 *gpa = (pte & PG_FRAME);
248 return 0;
249 }
250
251 /* -------------------------------------------------------------------------- */
252
253 #define PTE64_L1_SHIFT 12
254 #define PTE64_L2_SHIFT 21
255 #define PTE64_L3_SHIFT 30
256 #define PTE64_L4_SHIFT 39
257
258 #define PTE64_L4_MASK 0x0000ff8000000000
259 #define PTE64_L3_MASK 0x0000007fc0000000
260 #define PTE64_L2_MASK 0x000000003fe00000
261 #define PTE64_L1_MASK 0x00000000001ff000
262
263 #define PTE64_L4_FRAME PTE64_L4_MASK
264 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
265 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
266 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
267
268 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
269 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
270 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
271 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
272
273 typedef uint64_t pte_64bit_t;
274
275 static inline bool
276 x86_gva_64bit_canonical(gvaddr_t gva)
277 {
278 /* Bits 63:47 must have the same value. */
279 #define SIGN_EXTEND 0xffff800000000000ULL
280 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
281 }
282
283 static int
284 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
285 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
286 {
287 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
288 uintptr_t L4hva, L3hva, L2hva, L1hva;
289 pte_64bit_t *pdir, pte;
290
291 /* We begin with an RWXU access. */
292 *prot = NVMM_PROT_ALL;
293
294 if (!x86_gva_64bit_canonical(gva))
295 return -1;
296
297 /* Parse L4. */
298 L4gpa = (cr3 & PG_FRAME);
299 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
300 return -1;
301 pdir = (pte_64bit_t *)L4hva;
302 pte = pdir[pte64_l4idx(gva)];
303 if ((pte & PG_V) == 0)
304 return -1;
305 if ((pte & PG_u) == 0)
306 *prot &= ~NVMM_PROT_USER;
307 if ((pte & PG_KW) == 0)
308 *prot &= ~NVMM_PROT_WRITE;
309 if (pte & PG_NX)
310 *prot &= ~NVMM_PROT_EXEC;
311 if (pte & PG_PS)
312 return -1;
313
314 /* Parse L3. */
315 L3gpa = (pte & PG_FRAME);
316 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
317 return -1;
318 pdir = (pte_64bit_t *)L3hva;
319 pte = pdir[pte64_l3idx(gva)];
320 if ((pte & PG_V) == 0)
321 return -1;
322 if ((pte & PG_u) == 0)
323 *prot &= ~NVMM_PROT_USER;
324 if ((pte & PG_KW) == 0)
325 *prot &= ~NVMM_PROT_WRITE;
326 if (pte & PG_NX)
327 *prot &= ~NVMM_PROT_EXEC;
328 if (pte & PG_PS) {
329 *gpa = (pte & PTE64_L3_FRAME);
330 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
331 return 0;
332 }
333
334 /* Parse L2. */
335 L2gpa = (pte & PG_FRAME);
336 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
337 return -1;
338 pdir = (pte_64bit_t *)L2hva;
339 pte = pdir[pte64_l2idx(gva)];
340 if ((pte & PG_V) == 0)
341 return -1;
342 if ((pte & PG_u) == 0)
343 *prot &= ~NVMM_PROT_USER;
344 if ((pte & PG_KW) == 0)
345 *prot &= ~NVMM_PROT_WRITE;
346 if (pte & PG_NX)
347 *prot &= ~NVMM_PROT_EXEC;
348 if (pte & PG_PS) {
349 *gpa = (pte & PTE64_L2_FRAME);
350 *gpa = *gpa + (gva & PTE64_L1_MASK);
351 return 0;
352 }
353
354 /* Parse L1. */
355 L1gpa = (pte & PG_FRAME);
356 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
357 return -1;
358 pdir = (pte_64bit_t *)L1hva;
359 pte = pdir[pte64_l1idx(gva)];
360 if ((pte & PG_V) == 0)
361 return -1;
362 if ((pte & PG_u) == 0)
363 *prot &= ~NVMM_PROT_USER;
364 if ((pte & PG_KW) == 0)
365 *prot &= ~NVMM_PROT_WRITE;
366 if (pte & PG_NX)
367 *prot &= ~NVMM_PROT_EXEC;
368 if (pte & PG_PS)
369 return -1;
370
371 *gpa = (pte & PG_FRAME);
372 return 0;
373 }
374
375 static inline int
376 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
377 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
378 {
379 bool is_pae, is_lng, has_pse;
380 uint64_t cr3;
381 size_t off;
382 int ret;
383
384 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
385 /* No paging. */
386 *prot = NVMM_PROT_ALL;
387 *gpa = gva;
388 return 0;
389 }
390
391 off = (gva & PAGE_MASK);
392 gva &= ~PAGE_MASK;
393
394 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
395 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
396 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
397 cr3 = state->crs[NVMM_X64_CR_CR3];
398
399 if (is_pae && is_lng) {
400 /* 64bit */
401 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
402 } else if (is_pae && !is_lng) {
403 /* 32bit PAE */
404 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, has_pse,
405 prot);
406 } else if (!is_pae && !is_lng) {
407 /* 32bit */
408 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
409 } else {
410 ret = -1;
411 }
412
413 if (ret == -1) {
414 errno = EFAULT;
415 }
416
417 *gpa = *gpa + off;
418
419 return ret;
420 }
421
422 int
423 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
424 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
425 {
426 struct nvmm_x64_state state;
427 int ret;
428
429 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
430 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
431 if (ret == -1)
432 return -1;
433
434 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
435 }
436
437 /* -------------------------------------------------------------------------- */
438
439 static inline bool
440 is_64bit(struct nvmm_x64_state *state)
441 {
442 return (state->segs[NVMM_X64_SEG_CS].attrib.lng != 0);
443 }
444
445 static inline bool
446 is_32bit(struct nvmm_x64_state *state)
447 {
448 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
449 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 1);
450 }
451
452 static inline bool
453 is_16bit(struct nvmm_x64_state *state)
454 {
455 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
456 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 0);
457 }
458
459 static inline bool
460 is_long_mode(struct nvmm_x64_state *state)
461 {
462 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
463 }
464
465 static int
466 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva, size_t size)
467 {
468 uint64_t limit;
469
470 /*
471 * This is incomplete. We should check topdown, etc, really that's
472 * tiring.
473 */
474 if (__predict_false(!seg->attrib.p)) {
475 goto error;
476 }
477
478 limit = (seg->limit + 1);
479 if (__predict_true(seg->attrib.gran)) {
480 limit *= PAGE_SIZE;
481 }
482
483 if (__predict_false(*gva + size > limit)) {
484 goto error;
485 }
486
487 *gva += seg->base;
488 return 0;
489
490 error:
491 errno = EFAULT;
492 return -1;
493 }
494
495 static uint64_t
496 mask_from_adsize(size_t adsize)
497 {
498 switch (adsize) {
499 case 8:
500 return 0xFFFFFFFFFFFFFFFF;
501 case 4:
502 return 0x00000000FFFFFFFF;
503 case 2:
504 default: /* impossible */
505 return 0x000000000000FFFF;
506 }
507 }
508
509 static uint64_t
510 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
511 {
512 uint64_t mask, cnt;
513
514 mask = mask_from_adsize(adsize);
515 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
516
517 return cnt;
518 }
519
520 static void
521 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
522 {
523 uint64_t mask;
524
525 mask = mask_from_adsize(adsize);
526 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
527 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
528 }
529
530 static uint64_t
531 rep_dec_apply(struct nvmm_x64_state *state, size_t adsize)
532 {
533 uint64_t mask, cnt;
534
535 mask = mask_from_adsize(adsize);
536
537 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
538 cnt -= 1;
539 cnt &= mask;
540
541 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
542 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
543
544 return cnt;
545 }
546
547 static int
548 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
549 gvaddr_t gva, uint8_t *data, size_t size)
550 {
551 struct nvmm_mem mem;
552 nvmm_prot_t prot;
553 gpaddr_t gpa;
554 uintptr_t hva;
555 bool is_mmio;
556 int ret, remain;
557
558 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
559 if (__predict_false(ret == -1)) {
560 return -1;
561 }
562 if (__predict_false(!(prot & NVMM_PROT_READ))) {
563 errno = EFAULT;
564 return -1;
565 }
566
567 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
568 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
569 } else {
570 remain = 0;
571 }
572 size -= remain;
573
574 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
575 is_mmio = (ret == -1);
576
577 if (is_mmio) {
578 mem.data = data;
579 mem.gpa = gpa;
580 mem.write = false;
581 mem.size = size;
582 (*__callbacks.mem)(&mem);
583 } else {
584 memcpy(data, (uint8_t *)hva, size);
585 }
586
587 if (remain > 0) {
588 ret = read_guest_memory(mach, state, gva + size,
589 data + size, remain);
590 } else {
591 ret = 0;
592 }
593
594 return ret;
595 }
596
597 static int
598 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
599 gvaddr_t gva, uint8_t *data, size_t size)
600 {
601 struct nvmm_mem mem;
602 nvmm_prot_t prot;
603 gpaddr_t gpa;
604 uintptr_t hva;
605 bool is_mmio;
606 int ret, remain;
607
608 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
609 if (__predict_false(ret == -1)) {
610 return -1;
611 }
612 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
613 errno = EFAULT;
614 return -1;
615 }
616
617 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
618 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
619 } else {
620 remain = 0;
621 }
622 size -= remain;
623
624 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
625 is_mmio = (ret == -1);
626
627 if (is_mmio) {
628 mem.data = data;
629 mem.gpa = gpa;
630 mem.write = true;
631 mem.size = size;
632 (*__callbacks.mem)(&mem);
633 } else {
634 memcpy((uint8_t *)hva, data, size);
635 }
636
637 if (remain > 0) {
638 ret = write_guest_memory(mach, state, gva + size,
639 data + size, remain);
640 } else {
641 ret = 0;
642 }
643
644 return ret;
645 }
646
647 /* -------------------------------------------------------------------------- */
648
649 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
650
651 #define NVMM_IO_BATCH_SIZE 32
652
653 static int
654 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
655 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
656 {
657 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
658 size_t i, iosize, iocnt;
659 int ret;
660
661 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
662 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
663 iocnt = iosize / io->size;
664
665 io->data = iobuf;
666
667 if (!io->in) {
668 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
669 if (ret == -1)
670 return -1;
671 }
672
673 for (i = 0; i < iocnt; i++) {
674 (*__callbacks.io)(io);
675 io->data += io->size;
676 }
677
678 if (io->in) {
679 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
680 if (ret == -1)
681 return -1;
682 }
683
684 return iocnt;
685 }
686
687 int
688 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
689 struct nvmm_exit *exit)
690 {
691 struct nvmm_x64_state state;
692 struct nvmm_io io;
693 uint64_t cnt = 0; /* GCC */
694 uint8_t iobuf[8];
695 int iocnt = 1;
696 gvaddr_t gva;
697 int reg = 0; /* GCC */
698 int ret, seg;
699 bool psld = false;
700
701 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
702 errno = EINVAL;
703 return -1;
704 }
705
706 io.port = exit->u.io.port;
707 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
708 io.size = exit->u.io.operand_size;
709 io.data = iobuf;
710
711 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
712 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
713 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
714 if (ret == -1)
715 return -1;
716
717 if (exit->u.io.rep) {
718 cnt = rep_get_cnt(&state, exit->u.io.address_size);
719 if (__predict_false(cnt == 0)) {
720 return 0;
721 }
722 }
723
724 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
725 psld = true;
726 }
727
728 /*
729 * Determine GVA.
730 */
731 if (exit->u.io.str) {
732 if (io.in) {
733 reg = NVMM_X64_GPR_RDI;
734 } else {
735 reg = NVMM_X64_GPR_RSI;
736 }
737
738 gva = state.gprs[reg];
739 gva &= mask_from_adsize(exit->u.io.address_size);
740
741 if (!is_long_mode(&state)) {
742 if (exit->u.io.seg != -1) {
743 seg = exit->u.io.seg;
744 } else {
745 if (io.in) {
746 seg = NVMM_X64_SEG_ES;
747 } else {
748 seg = fetch_segment(mach, &state);
749 if (seg == -1)
750 return -1;
751 }
752 }
753
754 ret = segment_apply(&state.segs[seg], &gva, io.size);
755 if (ret == -1)
756 return -1;
757 }
758
759 if (exit->u.io.rep && !psld) {
760 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
761 if (iocnt == -1)
762 return -1;
763 goto done;
764 }
765 }
766
767 if (!io.in) {
768 if (!exit->u.io.str) {
769 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
770 } else {
771 ret = read_guest_memory(mach, &state, gva, io.data,
772 io.size);
773 if (ret == -1)
774 return -1;
775 }
776 }
777
778 (*__callbacks.io)(&io);
779
780 if (io.in) {
781 if (!exit->u.io.str) {
782 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
783 } else {
784 ret = write_guest_memory(mach, &state, gva, io.data,
785 io.size);
786 if (ret == -1)
787 return -1;
788 }
789 }
790
791 done:
792 if (exit->u.io.str) {
793 if (__predict_false(psld)) {
794 state.gprs[reg] -= iocnt * io.size;
795 } else {
796 state.gprs[reg] += iocnt * io.size;
797 }
798 }
799
800 if (exit->u.io.rep) {
801 cnt -= iocnt;
802 rep_set_cnt(&state, exit->u.io.address_size, cnt);
803 if (cnt == 0) {
804 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
805 }
806 } else {
807 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
808 }
809
810 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
811 if (ret == -1)
812 return -1;
813
814 return 0;
815 }
816
817 /* -------------------------------------------------------------------------- */
818
819 static void x86_emul_or(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
820 static void x86_emul_and(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
821 static void x86_emul_xor(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
822 static void x86_emul_mov(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
823 static void x86_emul_stos(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
824 static void x86_emul_lods(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
825 static void x86_emul_movs(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
826
827 /* Legacy prefixes. */
828 #define LEG_LOCK 0xF0
829 #define LEG_REPN 0xF2
830 #define LEG_REP 0xF3
831 #define LEG_OVR_CS 0x2E
832 #define LEG_OVR_SS 0x36
833 #define LEG_OVR_DS 0x3E
834 #define LEG_OVR_ES 0x26
835 #define LEG_OVR_FS 0x64
836 #define LEG_OVR_GS 0x65
837 #define LEG_OPR_OVR 0x66
838 #define LEG_ADR_OVR 0x67
839
840 struct x86_legpref {
841 bool opr_ovr:1;
842 bool adr_ovr:1;
843 bool rep:1;
844 bool repn:1;
845 int seg;
846 };
847
848 struct x86_rexpref {
849 bool present;
850 bool w;
851 bool r;
852 bool x;
853 bool b;
854 };
855
856 struct x86_reg {
857 int num; /* NVMM GPR state index */
858 uint64_t mask;
859 };
860
861 enum x86_disp_type {
862 DISP_NONE,
863 DISP_0,
864 DISP_1,
865 DISP_4
866 };
867
868 struct x86_disp {
869 enum x86_disp_type type;
870 uint64_t data; /* 4 bytes, but can be sign-extended */
871 };
872
873 enum REGMODRM__Mod {
874 MOD_DIS0, /* also, register indirect */
875 MOD_DIS1,
876 MOD_DIS4,
877 MOD_REG
878 };
879
880 enum REGMODRM__Reg {
881 REG_000, /* these fields are indexes to the register map */
882 REG_001,
883 REG_010,
884 REG_011,
885 REG_100,
886 REG_101,
887 REG_110,
888 REG_111
889 };
890
891 enum REGMODRM__Rm {
892 RM_000, /* reg */
893 RM_001, /* reg */
894 RM_010, /* reg */
895 RM_011, /* reg */
896 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
897 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
898 RM_110,
899 RM_111
900 };
901
902 struct x86_regmodrm {
903 bool present;
904 enum REGMODRM__Mod mod;
905 enum REGMODRM__Reg reg;
906 enum REGMODRM__Rm rm;
907 };
908
909 struct x86_immediate {
910 size_t size; /* 1/2/4/8 */
911 uint64_t data;
912 };
913
914 struct x86_sib {
915 uint8_t scale;
916 const struct x86_reg *idx;
917 const struct x86_reg *bas;
918 };
919
920 enum x86_store_type {
921 STORE_NONE,
922 STORE_REG,
923 STORE_IMM,
924 STORE_SIB,
925 STORE_DMO
926 };
927
928 struct x86_store {
929 enum x86_store_type type;
930 union {
931 const struct x86_reg *reg;
932 struct x86_immediate imm;
933 struct x86_sib sib;
934 uint64_t dmo;
935 } u;
936 struct x86_disp disp;
937 int hardseg;
938 };
939
940 struct x86_instr {
941 size_t len;
942 struct x86_legpref legpref;
943 struct x86_rexpref rexpref;
944 size_t operand_size;
945 size_t address_size;
946 uint64_t zeroextend_mask;
947
948 struct x86_regmodrm regmodrm;
949
950 const struct x86_opcode *opcode;
951
952 struct x86_store src;
953 struct x86_store dst;
954
955 struct x86_store *strm;
956
957 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
958 };
959
960 struct x86_decode_fsm {
961 /* vcpu */
962 bool is64bit;
963 bool is32bit;
964 bool is16bit;
965
966 /* fsm */
967 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
968 uint8_t *buf;
969 uint8_t *end;
970 };
971
972 struct x86_opcode {
973 uint8_t byte;
974 bool regmodrm;
975 bool regtorm;
976 bool dmo;
977 bool todmo;
978 bool movs;
979 bool stos;
980 bool lods;
981 bool szoverride;
982 int defsize;
983 int allsize;
984 bool group1;
985 bool group11;
986 bool immediate;
987 int flags;
988 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
989 };
990
991 struct x86_group_entry {
992 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
993 };
994
995 #define OPSIZE_BYTE 0x01
996 #define OPSIZE_WORD 0x02 /* 2 bytes */
997 #define OPSIZE_DOUB 0x04 /* 4 bytes */
998 #define OPSIZE_QUAD 0x08 /* 8 bytes */
999
1000 #define FLAG_imm8 0x01
1001 #define FLAG_immz 0x02
1002 #define FLAG_ze 0x04
1003
1004 static const struct x86_group_entry group1[8] = {
1005 [1] = { .emul = x86_emul_or },
1006 [4] = { .emul = x86_emul_and },
1007 [6] = { .emul = x86_emul_xor }
1008 };
1009
1010 static const struct x86_group_entry group11[8] = {
1011 [0] = { .emul = x86_emul_mov }
1012 };
1013
1014 static const struct x86_opcode primary_opcode_table[] = {
1015 /*
1016 * Group1
1017 */
1018 {
1019 /* Ev, Ib */
1020 .byte = 0x83,
1021 .regmodrm = true,
1022 .regtorm = true,
1023 .szoverride = true,
1024 .defsize = -1,
1025 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1026 .group1 = true,
1027 .immediate = true,
1028 .flags = FLAG_imm8,
1029 .emul = NULL /* group1 */
1030 },
1031
1032 /*
1033 * Group11
1034 */
1035 {
1036 /* Eb, Ib */
1037 .byte = 0xC6,
1038 .regmodrm = true,
1039 .regtorm = true,
1040 .szoverride = false,
1041 .defsize = OPSIZE_BYTE,
1042 .allsize = -1,
1043 .group11 = true,
1044 .immediate = true,
1045 .emul = NULL /* group11 */
1046 },
1047 {
1048 /* Ev, Iz */
1049 .byte = 0xC7,
1050 .regmodrm = true,
1051 .regtorm = true,
1052 .szoverride = true,
1053 .defsize = -1,
1054 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1055 .group11 = true,
1056 .immediate = true,
1057 .flags = FLAG_immz,
1058 .emul = NULL /* group11 */
1059 },
1060
1061 /*
1062 * OR
1063 */
1064 {
1065 /* Eb, Gb */
1066 .byte = 0x08,
1067 .regmodrm = true,
1068 .regtorm = true,
1069 .szoverride = false,
1070 .defsize = OPSIZE_BYTE,
1071 .allsize = -1,
1072 .emul = x86_emul_or
1073 },
1074 {
1075 /* Ev, Gv */
1076 .byte = 0x09,
1077 .regmodrm = true,
1078 .regtorm = true,
1079 .szoverride = true,
1080 .defsize = -1,
1081 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1082 .emul = x86_emul_or
1083 },
1084 {
1085 /* Gb, Eb */
1086 .byte = 0x0A,
1087 .regmodrm = true,
1088 .regtorm = false,
1089 .szoverride = false,
1090 .defsize = OPSIZE_BYTE,
1091 .allsize = -1,
1092 .emul = x86_emul_or
1093 },
1094 {
1095 /* Gv, Ev */
1096 .byte = 0x0B,
1097 .regmodrm = true,
1098 .regtorm = false,
1099 .szoverride = true,
1100 .defsize = -1,
1101 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1102 .emul = x86_emul_or
1103 },
1104
1105 /*
1106 * AND
1107 */
1108 {
1109 /* Eb, Gb */
1110 .byte = 0x20,
1111 .regmodrm = true,
1112 .regtorm = true,
1113 .szoverride = false,
1114 .defsize = OPSIZE_BYTE,
1115 .allsize = -1,
1116 .emul = x86_emul_and
1117 },
1118 {
1119 /* Ev, Gv */
1120 .byte = 0x21,
1121 .regmodrm = true,
1122 .regtorm = true,
1123 .szoverride = true,
1124 .defsize = -1,
1125 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1126 .emul = x86_emul_and
1127 },
1128 {
1129 /* Gb, Eb */
1130 .byte = 0x22,
1131 .regmodrm = true,
1132 .regtorm = false,
1133 .szoverride = false,
1134 .defsize = OPSIZE_BYTE,
1135 .allsize = -1,
1136 .emul = x86_emul_and
1137 },
1138 {
1139 /* Gv, Ev */
1140 .byte = 0x23,
1141 .regmodrm = true,
1142 .regtorm = false,
1143 .szoverride = true,
1144 .defsize = -1,
1145 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1146 .emul = x86_emul_and
1147 },
1148
1149 /*
1150 * XOR
1151 */
1152 {
1153 /* Eb, Gb */
1154 .byte = 0x30,
1155 .regmodrm = true,
1156 .regtorm = true,
1157 .szoverride = false,
1158 .defsize = OPSIZE_BYTE,
1159 .allsize = -1,
1160 .emul = x86_emul_xor
1161 },
1162 {
1163 /* Ev, Gv */
1164 .byte = 0x31,
1165 .regmodrm = true,
1166 .regtorm = true,
1167 .szoverride = true,
1168 .defsize = -1,
1169 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1170 .emul = x86_emul_xor
1171 },
1172 {
1173 /* Gb, Eb */
1174 .byte = 0x32,
1175 .regmodrm = true,
1176 .regtorm = false,
1177 .szoverride = false,
1178 .defsize = OPSIZE_BYTE,
1179 .allsize = -1,
1180 .emul = x86_emul_xor
1181 },
1182 {
1183 /* Gv, Ev */
1184 .byte = 0x33,
1185 .regmodrm = true,
1186 .regtorm = false,
1187 .szoverride = true,
1188 .defsize = -1,
1189 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1190 .emul = x86_emul_xor
1191 },
1192
1193 /*
1194 * MOV
1195 */
1196 {
1197 /* Eb, Gb */
1198 .byte = 0x88,
1199 .regmodrm = true,
1200 .regtorm = true,
1201 .szoverride = false,
1202 .defsize = OPSIZE_BYTE,
1203 .allsize = -1,
1204 .emul = x86_emul_mov
1205 },
1206 {
1207 /* Ev, Gv */
1208 .byte = 0x89,
1209 .regmodrm = true,
1210 .regtorm = true,
1211 .szoverride = true,
1212 .defsize = -1,
1213 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1214 .emul = x86_emul_mov
1215 },
1216 {
1217 /* Gb, Eb */
1218 .byte = 0x8A,
1219 .regmodrm = true,
1220 .regtorm = false,
1221 .szoverride = false,
1222 .defsize = OPSIZE_BYTE,
1223 .allsize = -1,
1224 .emul = x86_emul_mov
1225 },
1226 {
1227 /* Gv, Ev */
1228 .byte = 0x8B,
1229 .regmodrm = true,
1230 .regtorm = false,
1231 .szoverride = true,
1232 .defsize = -1,
1233 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1234 .emul = x86_emul_mov
1235 },
1236 {
1237 /* AL, Ob */
1238 .byte = 0xA0,
1239 .dmo = true,
1240 .todmo = false,
1241 .szoverride = false,
1242 .defsize = OPSIZE_BYTE,
1243 .allsize = -1,
1244 .emul = x86_emul_mov
1245 },
1246 {
1247 /* rAX, Ov */
1248 .byte = 0xA1,
1249 .dmo = true,
1250 .todmo = false,
1251 .szoverride = true,
1252 .defsize = -1,
1253 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1254 .emul = x86_emul_mov
1255 },
1256 {
1257 /* Ob, AL */
1258 .byte = 0xA2,
1259 .dmo = true,
1260 .todmo = true,
1261 .szoverride = false,
1262 .defsize = OPSIZE_BYTE,
1263 .allsize = -1,
1264 .emul = x86_emul_mov
1265 },
1266 {
1267 /* Ov, rAX */
1268 .byte = 0xA3,
1269 .dmo = true,
1270 .todmo = true,
1271 .szoverride = true,
1272 .defsize = -1,
1273 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1274 .emul = x86_emul_mov
1275 },
1276
1277 /*
1278 * MOVS
1279 */
1280 {
1281 /* Yb, Xb */
1282 .byte = 0xA4,
1283 .movs = true,
1284 .szoverride = false,
1285 .defsize = OPSIZE_BYTE,
1286 .allsize = -1,
1287 .emul = x86_emul_movs
1288 },
1289 {
1290 /* Yv, Xv */
1291 .byte = 0xA5,
1292 .movs = true,
1293 .szoverride = true,
1294 .defsize = -1,
1295 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1296 .emul = x86_emul_movs
1297 },
1298
1299 /*
1300 * STOS
1301 */
1302 {
1303 /* Yb, AL */
1304 .byte = 0xAA,
1305 .stos = true,
1306 .szoverride = false,
1307 .defsize = OPSIZE_BYTE,
1308 .allsize = -1,
1309 .emul = x86_emul_stos
1310 },
1311 {
1312 /* Yv, rAX */
1313 .byte = 0xAB,
1314 .stos = true,
1315 .szoverride = true,
1316 .defsize = -1,
1317 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1318 .emul = x86_emul_stos
1319 },
1320
1321 /*
1322 * LODS
1323 */
1324 {
1325 /* AL, Xb */
1326 .byte = 0xAC,
1327 .lods = true,
1328 .szoverride = false,
1329 .defsize = OPSIZE_BYTE,
1330 .allsize = -1,
1331 .emul = x86_emul_lods
1332 },
1333 {
1334 /* rAX, Xv */
1335 .byte = 0xAD,
1336 .lods = true,
1337 .szoverride = true,
1338 .defsize = -1,
1339 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1340 .emul = x86_emul_lods
1341 },
1342 };
1343
1344 static const struct x86_opcode secondary_opcode_table[] = {
1345 /*
1346 * MOVZX
1347 */
1348 {
1349 /* Gv, Eb */
1350 .byte = 0xB6,
1351 .regmodrm = true,
1352 .regtorm = false,
1353 .szoverride = true,
1354 .defsize = OPSIZE_BYTE,
1355 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1356 .flags = FLAG_ze,
1357 .emul = x86_emul_mov
1358 },
1359 {
1360 /* Gv, Ew */
1361 .byte = 0xB7,
1362 .regmodrm = true,
1363 .regtorm = false,
1364 .szoverride = true,
1365 .defsize = OPSIZE_WORD,
1366 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1367 .flags = FLAG_ze,
1368 .emul = x86_emul_mov
1369 },
1370 };
1371
1372 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1373
1374 /* [REX-present][enc][opsize] */
1375 static const struct x86_reg gpr_map__special[2][4][8] = {
1376 [false] = {
1377 /* No REX prefix. */
1378 [0b00] = {
1379 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1380 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1381 [2] = { -1, 0 },
1382 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1383 [4] = { -1, 0 },
1384 [5] = { -1, 0 },
1385 [6] = { -1, 0 },
1386 [7] = { -1, 0 },
1387 },
1388 [0b01] = {
1389 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1390 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1391 [2] = { -1, 0 },
1392 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1393 [4] = { -1, 0 },
1394 [5] = { -1, 0 },
1395 [6] = { -1, 0 },
1396 [7] = { -1, 0 },
1397 },
1398 [0b10] = {
1399 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1400 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1401 [2] = { -1, 0 },
1402 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1403 [4] = { -1, 0 },
1404 [5] = { -1, 0 },
1405 [6] = { -1, 0 },
1406 [7] = { -1, 0 },
1407 },
1408 [0b11] = {
1409 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1410 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1411 [2] = { -1, 0 },
1412 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1413 [4] = { -1, 0 },
1414 [5] = { -1, 0 },
1415 [6] = { -1, 0 },
1416 [7] = { -1, 0 },
1417 }
1418 },
1419 [true] = {
1420 /* Has REX prefix. */
1421 [0b00] = {
1422 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1423 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1424 [2] = { -1, 0 },
1425 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1426 [4] = { -1, 0 },
1427 [5] = { -1, 0 },
1428 [6] = { -1, 0 },
1429 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1430 },
1431 [0b01] = {
1432 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1433 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1434 [2] = { -1, 0 },
1435 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1436 [4] = { -1, 0 },
1437 [5] = { -1, 0 },
1438 [6] = { -1, 0 },
1439 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1440 },
1441 [0b10] = {
1442 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1443 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1444 [2] = { -1, 0 },
1445 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1446 [4] = { -1, 0 },
1447 [5] = { -1, 0 },
1448 [6] = { -1, 0 },
1449 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1450 },
1451 [0b11] = {
1452 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1453 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1454 [2] = { -1, 0 },
1455 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1456 [4] = { -1, 0 },
1457 [5] = { -1, 0 },
1458 [6] = { -1, 0 },
1459 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1460 }
1461 }
1462 };
1463
1464 /* [depends][enc][size] */
1465 static const struct x86_reg gpr_map[2][8][8] = {
1466 [false] = {
1467 /* Not extended. */
1468 [0b000] = {
1469 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1470 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1471 [2] = { -1, 0 },
1472 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1473 [4] = { -1, 0 },
1474 [5] = { -1, 0 },
1475 [6] = { -1, 0 },
1476 [7] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* RAX */
1477 },
1478 [0b001] = {
1479 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1480 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1481 [2] = { -1, 0 },
1482 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1483 [4] = { -1, 0 },
1484 [5] = { -1, 0 },
1485 [6] = { -1, 0 },
1486 [7] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* RCX */
1487 },
1488 [0b010] = {
1489 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1490 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1491 [2] = { -1, 0 },
1492 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1493 [4] = { -1, 0 },
1494 [5] = { -1, 0 },
1495 [6] = { -1, 0 },
1496 [7] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* RDX */
1497 },
1498 [0b011] = {
1499 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1500 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1501 [2] = { -1, 0 },
1502 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1503 [4] = { -1, 0 },
1504 [5] = { -1, 0 },
1505 [6] = { -1, 0 },
1506 [7] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* RBX */
1507 },
1508 [0b100] = {
1509 [0] = { -1, 0 }, /* SPECIAL */
1510 [1] = { -1, 0 }, /* SPECIAL */
1511 [2] = { -1, 0 },
1512 [3] = { -1, 0 }, /* SPECIAL */
1513 [4] = { -1, 0 },
1514 [5] = { -1, 0 },
1515 [6] = { -1, 0 },
1516 [7] = { -1, 0 }, /* SPECIAL */
1517 },
1518 [0b101] = {
1519 [0] = { -1, 0 }, /* SPECIAL */
1520 [1] = { -1, 0 }, /* SPECIAL */
1521 [2] = { -1, 0 },
1522 [3] = { -1, 0 }, /* SPECIAL */
1523 [4] = { -1, 0 },
1524 [5] = { -1, 0 },
1525 [6] = { -1, 0 },
1526 [7] = { -1, 0 }, /* SPECIAL */
1527 },
1528 [0b110] = {
1529 [0] = { -1, 0 }, /* SPECIAL */
1530 [1] = { -1, 0 }, /* SPECIAL */
1531 [2] = { -1, 0 },
1532 [3] = { -1, 0 }, /* SPECIAL */
1533 [4] = { -1, 0 },
1534 [5] = { -1, 0 },
1535 [6] = { -1, 0 },
1536 [7] = { -1, 0 }, /* SPECIAL */
1537 },
1538 [0b111] = {
1539 [0] = { -1, 0 }, /* SPECIAL */
1540 [1] = { -1, 0 }, /* SPECIAL */
1541 [2] = { -1, 0 },
1542 [3] = { -1, 0 }, /* SPECIAL */
1543 [4] = { -1, 0 },
1544 [5] = { -1, 0 },
1545 [6] = { -1, 0 },
1546 [7] = { -1, 0 }, /* SPECIAL */
1547 },
1548 },
1549 [true] = {
1550 /* Extended. */
1551 [0b000] = {
1552 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1553 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1554 [2] = { -1, 0 },
1555 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1556 [4] = { -1, 0 },
1557 [5] = { -1, 0 },
1558 [6] = { -1, 0 },
1559 [7] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8 */
1560 },
1561 [0b001] = {
1562 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1563 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1564 [2] = { -1, 0 },
1565 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1566 [4] = { -1, 0 },
1567 [5] = { -1, 0 },
1568 [6] = { -1, 0 },
1569 [7] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9 */
1570 },
1571 [0b010] = {
1572 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1573 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1574 [2] = { -1, 0 },
1575 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1576 [4] = { -1, 0 },
1577 [5] = { -1, 0 },
1578 [6] = { -1, 0 },
1579 [7] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10 */
1580 },
1581 [0b011] = {
1582 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1583 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1584 [2] = { -1, 0 },
1585 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1586 [4] = { -1, 0 },
1587 [5] = { -1, 0 },
1588 [6] = { -1, 0 },
1589 [7] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11 */
1590 },
1591 [0b100] = {
1592 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1593 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1594 [2] = { -1, 0 },
1595 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1596 [4] = { -1, 0 },
1597 [5] = { -1, 0 },
1598 [6] = { -1, 0 },
1599 [7] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12 */
1600 },
1601 [0b101] = {
1602 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1603 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1604 [2] = { -1, 0 },
1605 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1606 [4] = { -1, 0 },
1607 [5] = { -1, 0 },
1608 [6] = { -1, 0 },
1609 [7] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13 */
1610 },
1611 [0b110] = {
1612 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1613 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1614 [2] = { -1, 0 },
1615 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1616 [4] = { -1, 0 },
1617 [5] = { -1, 0 },
1618 [6] = { -1, 0 },
1619 [7] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14 */
1620 },
1621 [0b111] = {
1622 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1623 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1624 [2] = { -1, 0 },
1625 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1626 [4] = { -1, 0 },
1627 [5] = { -1, 0 },
1628 [6] = { -1, 0 },
1629 [7] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15 */
1630 },
1631 }
1632 };
1633
1634 static int
1635 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1636 {
1637 fsm->fn = NULL;
1638 return -1;
1639 }
1640
1641 static int
1642 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1643 {
1644 if (fsm->buf + n > fsm->end) {
1645 return -1;
1646 }
1647 memcpy(bytes, fsm->buf, n);
1648 return 0;
1649 }
1650
1651 static void
1652 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1653 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1654 {
1655 fsm->buf += n;
1656 if (fsm->buf > fsm->end) {
1657 fsm->fn = node_overflow;
1658 } else {
1659 fsm->fn = fn;
1660 }
1661 }
1662
1663 static const struct x86_reg *
1664 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1665 {
1666 enc &= 0b11;
1667 if (regsize == 8) {
1668 /* May be 64bit without REX */
1669 return &gpr_map__special[1][enc][regsize-1];
1670 }
1671 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1672 }
1673
1674 /*
1675 * Special node, for MOVS. Fake two displacements of zero on the source and
1676 * destination registers.
1677 */
1678 static int
1679 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1680 {
1681 size_t adrsize;
1682
1683 adrsize = instr->address_size;
1684
1685 /* DS:RSI */
1686 instr->src.type = STORE_REG;
1687 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1688 instr->src.disp.type = DISP_0;
1689
1690 /* ES:RDI, force ES */
1691 instr->dst.type = STORE_REG;
1692 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1693 instr->dst.disp.type = DISP_0;
1694 instr->dst.hardseg = NVMM_X64_SEG_ES;
1695
1696 fsm_advance(fsm, 0, NULL);
1697
1698 return 0;
1699 }
1700
1701 /*
1702 * Special node, for STOS and LODS. Fake a displacement of zero on the
1703 * destination register.
1704 */
1705 static int
1706 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1707 {
1708 const struct x86_opcode *opcode = instr->opcode;
1709 struct x86_store *stlo, *streg;
1710 size_t adrsize, regsize;
1711
1712 adrsize = instr->address_size;
1713 regsize = instr->operand_size;
1714
1715 if (opcode->stos) {
1716 streg = &instr->src;
1717 stlo = &instr->dst;
1718 } else {
1719 streg = &instr->dst;
1720 stlo = &instr->src;
1721 }
1722
1723 streg->type = STORE_REG;
1724 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1725
1726 stlo->type = STORE_REG;
1727 if (opcode->stos) {
1728 /* ES:RDI, force ES */
1729 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1730 stlo->hardseg = NVMM_X64_SEG_ES;
1731 } else {
1732 /* DS:RSI */
1733 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1734 }
1735 stlo->disp.type = DISP_0;
1736
1737 fsm_advance(fsm, 0, NULL);
1738
1739 return 0;
1740 }
1741
1742 static int
1743 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1744 {
1745 const struct x86_opcode *opcode = instr->opcode;
1746 struct x86_store *stdmo, *streg;
1747 size_t adrsize, regsize;
1748
1749 adrsize = instr->address_size;
1750 regsize = instr->operand_size;
1751
1752 if (opcode->todmo) {
1753 streg = &instr->src;
1754 stdmo = &instr->dst;
1755 } else {
1756 streg = &instr->dst;
1757 stdmo = &instr->src;
1758 }
1759
1760 streg->type = STORE_REG;
1761 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1762
1763 stdmo->type = STORE_DMO;
1764 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1765 return -1;
1766 }
1767 fsm_advance(fsm, adrsize, NULL);
1768
1769 return 0;
1770 }
1771
1772 static uint64_t
1773 sign_extend(uint64_t val, int size)
1774 {
1775 if (size == 1) {
1776 if (val & __BIT(7))
1777 val |= 0xFFFFFFFFFFFFFF00;
1778 } else if (size == 2) {
1779 if (val & __BIT(15))
1780 val |= 0xFFFFFFFFFFFF0000;
1781 } else if (size == 4) {
1782 if (val & __BIT(31))
1783 val |= 0xFFFFFFFF00000000;
1784 }
1785 return val;
1786 }
1787
1788 static int
1789 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1790 {
1791 const struct x86_opcode *opcode = instr->opcode;
1792 struct x86_store *store;
1793 uint8_t immsize;
1794 size_t sesize = 0;
1795
1796 /* The immediate is the source */
1797 store = &instr->src;
1798 immsize = instr->operand_size;
1799
1800 if (opcode->flags & FLAG_imm8) {
1801 sesize = immsize;
1802 immsize = 1;
1803 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1804 sesize = immsize;
1805 immsize = 4;
1806 }
1807
1808 store->type = STORE_IMM;
1809 store->u.imm.size = immsize;
1810 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1811 return -1;
1812 }
1813 fsm_advance(fsm, store->u.imm.size, NULL);
1814
1815 if (sesize != 0) {
1816 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1817 store->u.imm.size = sesize;
1818 }
1819
1820 return 0;
1821 }
1822
1823 static int
1824 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1825 {
1826 const struct x86_opcode *opcode = instr->opcode;
1827 uint64_t data = 0;
1828 size_t n;
1829
1830 if (instr->strm->disp.type == DISP_1) {
1831 n = 1;
1832 } else { /* DISP4 */
1833 n = 4;
1834 }
1835
1836 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1837 return -1;
1838 }
1839
1840 if (__predict_true(fsm->is64bit)) {
1841 data = sign_extend(data, n);
1842 }
1843
1844 instr->strm->disp.data = data;
1845
1846 if (opcode->immediate) {
1847 fsm_advance(fsm, n, node_immediate);
1848 } else {
1849 fsm_advance(fsm, n, NULL);
1850 }
1851
1852 return 0;
1853 }
1854
1855 static const struct x86_reg *
1856 get_register_idx(struct x86_instr *instr, uint8_t index)
1857 {
1858 uint8_t enc = index;
1859 const struct x86_reg *reg;
1860 size_t regsize;
1861
1862 regsize = instr->address_size;
1863 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
1864
1865 if (reg->num == -1) {
1866 reg = resolve_special_register(instr, enc, regsize);
1867 }
1868
1869 return reg;
1870 }
1871
1872 static const struct x86_reg *
1873 get_register_bas(struct x86_instr *instr, uint8_t base)
1874 {
1875 uint8_t enc = base;
1876 const struct x86_reg *reg;
1877 size_t regsize;
1878
1879 regsize = instr->address_size;
1880 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1881 if (reg->num == -1) {
1882 reg = resolve_special_register(instr, enc, regsize);
1883 }
1884
1885 return reg;
1886 }
1887
1888 static int
1889 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1890 {
1891 const struct x86_opcode *opcode;
1892 uint8_t scale, index, base;
1893 bool noindex, nobase;
1894 uint8_t byte;
1895
1896 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
1897 return -1;
1898 }
1899
1900 scale = ((byte & 0b11000000) >> 6);
1901 index = ((byte & 0b00111000) >> 3);
1902 base = ((byte & 0b00000111) >> 0);
1903
1904 opcode = instr->opcode;
1905
1906 noindex = false;
1907 nobase = false;
1908
1909 if (index == 0b100 && !instr->rexpref.x) {
1910 /* Special case: the index is null */
1911 noindex = true;
1912 }
1913
1914 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
1915 /* Special case: the base is null + disp32 */
1916 instr->strm->disp.type = DISP_4;
1917 nobase = true;
1918 }
1919
1920 instr->strm->type = STORE_SIB;
1921 instr->strm->u.sib.scale = (1 << scale);
1922 if (!noindex)
1923 instr->strm->u.sib.idx = get_register_idx(instr, index);
1924 if (!nobase)
1925 instr->strm->u.sib.bas = get_register_bas(instr, base);
1926
1927 /* May have a displacement, or an immediate */
1928 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
1929 fsm_advance(fsm, 1, node_disp);
1930 } else if (opcode->immediate) {
1931 fsm_advance(fsm, 1, node_immediate);
1932 } else {
1933 fsm_advance(fsm, 1, NULL);
1934 }
1935
1936 return 0;
1937 }
1938
1939 static const struct x86_reg *
1940 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
1941 {
1942 uint8_t enc = instr->regmodrm.reg;
1943 const struct x86_reg *reg;
1944 size_t regsize;
1945
1946 regsize = instr->operand_size;
1947
1948 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
1949 if (reg->num == -1) {
1950 reg = resolve_special_register(instr, enc, regsize);
1951 }
1952
1953 return reg;
1954 }
1955
1956 static const struct x86_reg *
1957 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
1958 {
1959 uint8_t enc = instr->regmodrm.rm;
1960 const struct x86_reg *reg;
1961 size_t regsize;
1962
1963 if (instr->strm->disp.type == DISP_NONE) {
1964 regsize = instr->operand_size;
1965 } else {
1966 /* Indirect access, the size is that of the address. */
1967 regsize = instr->address_size;
1968 }
1969
1970 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1971 if (reg->num == -1) {
1972 reg = resolve_special_register(instr, enc, regsize);
1973 }
1974
1975 return reg;
1976 }
1977
1978 static inline bool
1979 has_sib(struct x86_instr *instr)
1980 {
1981 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
1982 }
1983
1984 static inline bool
1985 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1986 {
1987 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
1988 instr->regmodrm.rm == RM_RBP_DISP32);
1989 }
1990
1991 static inline bool
1992 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1993 {
1994 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
1995 instr->regmodrm.rm == RM_RBP_DISP32);
1996 }
1997
1998 static enum x86_disp_type
1999 get_disp_type(struct x86_instr *instr)
2000 {
2001 switch (instr->regmodrm.mod) {
2002 case MOD_DIS0: /* indirect */
2003 return DISP_0;
2004 case MOD_DIS1: /* indirect+1 */
2005 return DISP_1;
2006 case MOD_DIS4: /* indirect+4 */
2007 return DISP_4;
2008 case MOD_REG: /* direct */
2009 default: /* gcc */
2010 return DISP_NONE;
2011 }
2012 }
2013
2014 static int
2015 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2016 {
2017 struct x86_store *strg, *strm;
2018 const struct x86_opcode *opcode;
2019 const struct x86_reg *reg;
2020 uint8_t byte;
2021
2022 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2023 return -1;
2024 }
2025
2026 opcode = instr->opcode;
2027
2028 instr->regmodrm.present = true;
2029 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2030 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2031 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2032
2033 if (opcode->regtorm) {
2034 strg = &instr->src;
2035 strm = &instr->dst;
2036 } else { /* RM to REG */
2037 strm = &instr->src;
2038 strg = &instr->dst;
2039 }
2040
2041 /* Save for later use. */
2042 instr->strm = strm;
2043
2044 /*
2045 * Special cases: Groups. The REG field of REGMODRM is the index in
2046 * the group. op1 gets overwritten in the Immediate node, if any.
2047 */
2048 if (opcode->group1) {
2049 if (group1[instr->regmodrm.reg].emul == NULL) {
2050 return -1;
2051 }
2052 instr->emul = group1[instr->regmodrm.reg].emul;
2053 } else if (opcode->group11) {
2054 if (group11[instr->regmodrm.reg].emul == NULL) {
2055 return -1;
2056 }
2057 instr->emul = group11[instr->regmodrm.reg].emul;
2058 }
2059
2060 reg = get_register_reg(instr, opcode);
2061 if (reg == NULL) {
2062 return -1;
2063 }
2064 strg->type = STORE_REG;
2065 strg->u.reg = reg;
2066
2067 if (has_sib(instr)) {
2068 /* Overwrites RM */
2069 fsm_advance(fsm, 1, node_sib);
2070 return 0;
2071 }
2072
2073 /* The displacement applies to RM. */
2074 strm->disp.type = get_disp_type(instr);
2075
2076 if (is_rip_relative(fsm, instr)) {
2077 /* Overwrites RM */
2078 strm->type = STORE_REG;
2079 strm->u.reg = &gpr_map__rip;
2080 strm->disp.type = DISP_4;
2081 fsm_advance(fsm, 1, node_disp);
2082 return 0;
2083 }
2084
2085 if (is_disp32_only(fsm, instr)) {
2086 /* Overwrites RM */
2087 strm->type = STORE_REG;
2088 strm->u.reg = NULL;
2089 strm->disp.type = DISP_4;
2090 fsm_advance(fsm, 1, node_disp);
2091 return 0;
2092 }
2093
2094 reg = get_register_rm(instr, opcode);
2095 if (reg == NULL) {
2096 return -1;
2097 }
2098 strm->type = STORE_REG;
2099 strm->u.reg = reg;
2100
2101 if (strm->disp.type == DISP_NONE) {
2102 /* Direct register addressing mode */
2103 if (opcode->immediate) {
2104 fsm_advance(fsm, 1, node_immediate);
2105 } else {
2106 fsm_advance(fsm, 1, NULL);
2107 }
2108 } else if (strm->disp.type == DISP_0) {
2109 /* Indirect register addressing mode */
2110 if (opcode->immediate) {
2111 fsm_advance(fsm, 1, node_immediate);
2112 } else {
2113 fsm_advance(fsm, 1, NULL);
2114 }
2115 } else {
2116 fsm_advance(fsm, 1, node_disp);
2117 }
2118
2119 return 0;
2120 }
2121
2122 static size_t
2123 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2124 {
2125 const struct x86_opcode *opcode = instr->opcode;
2126 int opsize;
2127
2128 /* Get the opsize */
2129 if (!opcode->szoverride) {
2130 opsize = opcode->defsize;
2131 } else if (instr->rexpref.present && instr->rexpref.w) {
2132 opsize = 8;
2133 } else {
2134 if (!fsm->is16bit) {
2135 if (instr->legpref.opr_ovr) {
2136 opsize = 2;
2137 } else {
2138 opsize = 4;
2139 }
2140 } else { /* 16bit */
2141 if (instr->legpref.opr_ovr) {
2142 opsize = 4;
2143 } else {
2144 opsize = 2;
2145 }
2146 }
2147 }
2148
2149 /* See if available */
2150 if ((opcode->allsize & opsize) == 0) {
2151 // XXX do we care?
2152 }
2153
2154 return opsize;
2155 }
2156
2157 static size_t
2158 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2159 {
2160 if (fsm->is64bit) {
2161 if (__predict_false(instr->legpref.adr_ovr)) {
2162 return 4;
2163 }
2164 return 8;
2165 }
2166
2167 if (fsm->is32bit) {
2168 if (__predict_false(instr->legpref.adr_ovr)) {
2169 return 2;
2170 }
2171 return 4;
2172 }
2173
2174 /* 16bit. */
2175 if (__predict_false(instr->legpref.adr_ovr)) {
2176 return 4;
2177 }
2178 return 2;
2179 }
2180
2181 static int
2182 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2183 {
2184 const struct x86_opcode *opcode;
2185 uint8_t byte;
2186 size_t i, n;
2187
2188 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2189 return -1;
2190 }
2191
2192 n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
2193 for (i = 0; i < n; i++) {
2194 if (primary_opcode_table[i].byte == byte)
2195 break;
2196 }
2197 if (i == n) {
2198 return -1;
2199 }
2200 opcode = &primary_opcode_table[i];
2201
2202 instr->opcode = opcode;
2203 instr->emul = opcode->emul;
2204 instr->operand_size = get_operand_size(fsm, instr);
2205 instr->address_size = get_address_size(fsm, instr);
2206
2207 if (opcode->regmodrm) {
2208 fsm_advance(fsm, 1, node_regmodrm);
2209 } else if (opcode->dmo) {
2210 /* Direct-Memory Offsets */
2211 fsm_advance(fsm, 1, node_dmo);
2212 } else if (opcode->stos || opcode->lods) {
2213 fsm_advance(fsm, 1, node_stlo);
2214 } else if (opcode->movs) {
2215 fsm_advance(fsm, 1, node_movs);
2216 } else {
2217 return -1;
2218 }
2219
2220 return 0;
2221 }
2222
2223 static uint64_t
2224 size_to_mask(size_t size)
2225 {
2226 switch (size) {
2227 case 1:
2228 return 0x00000000000000FF;
2229 case 2:
2230 return 0x000000000000FFFF;
2231 case 4:
2232 return 0x00000000FFFFFFFF;
2233 case 8:
2234 default:
2235 return 0xFFFFFFFFFFFFFFFF;
2236 }
2237 }
2238
2239 static int
2240 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2241 {
2242 const struct x86_opcode *opcode;
2243 uint8_t byte;
2244 size_t i, n;
2245
2246 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2247 return -1;
2248 }
2249
2250 n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
2251 for (i = 0; i < n; i++) {
2252 if (secondary_opcode_table[i].byte == byte)
2253 break;
2254 }
2255 if (i == n) {
2256 return -1;
2257 }
2258 opcode = &secondary_opcode_table[i];
2259
2260 instr->opcode = opcode;
2261 instr->emul = opcode->emul;
2262 instr->operand_size = get_operand_size(fsm, instr);
2263 instr->address_size = get_address_size(fsm, instr);
2264
2265 if (opcode->flags & FLAG_ze) {
2266 /*
2267 * Compute the mask for zero-extend. Update the operand size,
2268 * we move fewer bytes.
2269 */
2270 instr->zeroextend_mask = size_to_mask(instr->operand_size);
2271 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2272 instr->operand_size = opcode->defsize;
2273 }
2274
2275 if (opcode->regmodrm) {
2276 fsm_advance(fsm, 1, node_regmodrm);
2277 } else {
2278 return -1;
2279 }
2280
2281 return 0;
2282 }
2283
2284 static int
2285 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2286 {
2287 uint8_t byte;
2288
2289 #define ESCAPE 0x0F
2290 #define VEX_1 0xC5
2291 #define VEX_2 0xC4
2292 #define XOP 0x8F
2293
2294 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2295 return -1;
2296 }
2297
2298 /*
2299 * We don't take XOP. It is AMD-specific, and it was removed shortly
2300 * after being introduced.
2301 */
2302 if (byte == ESCAPE) {
2303 fsm_advance(fsm, 1, node_secondary_opcode);
2304 } else if (!instr->rexpref.present) {
2305 if (byte == VEX_1) {
2306 return -1;
2307 } else if (byte == VEX_2) {
2308 return -1;
2309 } else {
2310 fsm->fn = node_primary_opcode;
2311 }
2312 } else {
2313 fsm->fn = node_primary_opcode;
2314 }
2315
2316 return 0;
2317 }
2318
2319 static int
2320 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2321 {
2322 struct x86_rexpref *rexpref = &instr->rexpref;
2323 uint8_t byte;
2324 size_t n = 0;
2325
2326 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2327 return -1;
2328 }
2329
2330 if (byte >= 0x40 && byte <= 0x4F) {
2331 if (__predict_false(!fsm->is64bit)) {
2332 return -1;
2333 }
2334 rexpref->present = true;
2335 rexpref->w = ((byte & 0x8) != 0);
2336 rexpref->r = ((byte & 0x4) != 0);
2337 rexpref->x = ((byte & 0x2) != 0);
2338 rexpref->b = ((byte & 0x1) != 0);
2339 n = 1;
2340 }
2341
2342 fsm_advance(fsm, n, node_main);
2343 return 0;
2344 }
2345
2346 static int
2347 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2348 {
2349 uint8_t byte;
2350
2351 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2352 return -1;
2353 }
2354
2355 if (byte == LEG_OPR_OVR) {
2356 instr->legpref.opr_ovr = 1;
2357 } else if (byte == LEG_OVR_DS) {
2358 instr->legpref.seg = NVMM_X64_SEG_DS;
2359 } else if (byte == LEG_OVR_ES) {
2360 instr->legpref.seg = NVMM_X64_SEG_ES;
2361 } else if (byte == LEG_REP) {
2362 instr->legpref.rep = 1;
2363 } else if (byte == LEG_OVR_GS) {
2364 instr->legpref.seg = NVMM_X64_SEG_GS;
2365 } else if (byte == LEG_OVR_FS) {
2366 instr->legpref.seg = NVMM_X64_SEG_FS;
2367 } else if (byte == LEG_ADR_OVR) {
2368 instr->legpref.adr_ovr = 1;
2369 } else if (byte == LEG_OVR_CS) {
2370 instr->legpref.seg = NVMM_X64_SEG_CS;
2371 } else if (byte == LEG_OVR_SS) {
2372 instr->legpref.seg = NVMM_X64_SEG_SS;
2373 } else if (byte == LEG_REPN) {
2374 instr->legpref.repn = 1;
2375 } else if (byte == LEG_LOCK) {
2376 /* ignore */
2377 } else {
2378 /* not a legacy prefix */
2379 fsm_advance(fsm, 0, node_rex_prefix);
2380 return 0;
2381 }
2382
2383 fsm_advance(fsm, 1, node_legacy_prefix);
2384 return 0;
2385 }
2386
2387 static int
2388 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2389 struct nvmm_x64_state *state)
2390 {
2391 struct x86_decode_fsm fsm;
2392 int ret;
2393
2394 memset(instr, 0, sizeof(*instr));
2395 instr->legpref.seg = -1;
2396
2397 fsm.is64bit = is_64bit(state);
2398 fsm.is32bit = is_32bit(state);
2399 fsm.is16bit = is_16bit(state);
2400
2401 fsm.fn = node_legacy_prefix;
2402 fsm.buf = inst_bytes;
2403 fsm.end = inst_bytes + inst_len;
2404
2405 while (fsm.fn != NULL) {
2406 ret = (*fsm.fn)(&fsm, instr);
2407 if (ret == -1)
2408 return -1;
2409 }
2410
2411 instr->len = fsm.buf - inst_bytes;
2412
2413 return 0;
2414 }
2415
2416 /* -------------------------------------------------------------------------- */
2417
2418 static inline uint8_t
2419 compute_parity(uint8_t *data)
2420 {
2421 uint64_t *ptr = (uint64_t *)data;
2422 uint64_t val = *ptr;
2423
2424 val ^= val >> 32;
2425 val ^= val >> 16;
2426 val ^= val >> 8;
2427 val ^= val >> 4;
2428 val ^= val >> 2;
2429 val ^= val >> 1;
2430 return (~val) & 1;
2431 }
2432
2433 static void
2434 x86_emul_or(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2435 uint64_t *gprs)
2436 {
2437 const bool write = mem->write;
2438 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2439 uint8_t data[8];
2440 size_t i;
2441
2442 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2443
2444 memcpy(data, mem->data, sizeof(data));
2445
2446 /* Fetch the value to be OR'ed. */
2447 mem->write = false;
2448 (*cb)(mem);
2449
2450 /* Perform the OR. */
2451 for (i = 0; i < mem->size; i++) {
2452 mem->data[i] |= data[i];
2453 if (mem->data[i] != 0)
2454 fl |= PSL_Z;
2455 }
2456 if (mem->data[mem->size-1] & __BIT(7))
2457 fl |= PSL_N;
2458 if (compute_parity(mem->data))
2459 fl |= PSL_PF;
2460
2461 if (write) {
2462 /* Write back the result. */
2463 mem->write = true;
2464 (*cb)(mem);
2465 }
2466
2467 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2468 }
2469
2470 static void
2471 x86_emul_and(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2472 uint64_t *gprs)
2473 {
2474 const bool write = mem->write;
2475 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2476 uint8_t data[8];
2477 size_t i;
2478
2479 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2480
2481 memcpy(data, mem->data, sizeof(data));
2482
2483 /* Fetch the value to be AND'ed. */
2484 mem->write = false;
2485 (*cb)(mem);
2486
2487 /* Perform the AND. */
2488 for (i = 0; i < mem->size; i++) {
2489 mem->data[i] &= data[i];
2490 if (mem->data[i] != 0)
2491 fl |= PSL_Z;
2492 }
2493 if (mem->data[mem->size-1] & __BIT(7))
2494 fl |= PSL_N;
2495 if (compute_parity(mem->data))
2496 fl |= PSL_PF;
2497
2498 if (write) {
2499 /* Write back the result. */
2500 mem->write = true;
2501 (*cb)(mem);
2502 }
2503
2504 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2505 }
2506
2507 static void
2508 x86_emul_xor(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2509 uint64_t *gprs)
2510 {
2511 const bool write = mem->write;
2512 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2513 uint8_t data[8];
2514 size_t i;
2515
2516 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2517
2518 memcpy(data, mem->data, sizeof(data));
2519
2520 /* Fetch the value to be XOR'ed. */
2521 mem->write = false;
2522 (*cb)(mem);
2523
2524 /* Perform the XOR. */
2525 for (i = 0; i < mem->size; i++) {
2526 mem->data[i] ^= data[i];
2527 if (mem->data[i] != 0)
2528 fl |= PSL_Z;
2529 }
2530 if (mem->data[mem->size-1] & __BIT(7))
2531 fl |= PSL_N;
2532 if (compute_parity(mem->data))
2533 fl |= PSL_PF;
2534
2535 if (write) {
2536 /* Write back the result. */
2537 mem->write = true;
2538 (*cb)(mem);
2539 }
2540
2541 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2542 }
2543
2544 static void
2545 x86_emul_mov(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2546 uint64_t *gprs)
2547 {
2548 /*
2549 * Nothing special, just move without emulation.
2550 */
2551 (*cb)(mem);
2552 }
2553
2554 static void
2555 x86_emul_stos(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2556 uint64_t *gprs)
2557 {
2558 /*
2559 * Just move, and update RDI.
2560 */
2561 (*cb)(mem);
2562
2563 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2564 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2565 } else {
2566 gprs[NVMM_X64_GPR_RDI] += mem->size;
2567 }
2568 }
2569
2570 static void
2571 x86_emul_lods(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2572 uint64_t *gprs)
2573 {
2574 /*
2575 * Just move, and update RSI.
2576 */
2577 (*cb)(mem);
2578
2579 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2580 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2581 } else {
2582 gprs[NVMM_X64_GPR_RSI] += mem->size;
2583 }
2584 }
2585
2586 static void
2587 x86_emul_movs(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2588 uint64_t *gprs)
2589 {
2590 /*
2591 * Special instruction: double memory operand. Don't call the cb,
2592 * because the storage has already been performed earlier.
2593 */
2594
2595 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2596 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2597 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2598 } else {
2599 gprs[NVMM_X64_GPR_RSI] += mem->size;
2600 gprs[NVMM_X64_GPR_RDI] += mem->size;
2601 }
2602 }
2603
2604 /* -------------------------------------------------------------------------- */
2605
2606 static inline uint64_t
2607 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2608 {
2609 uint64_t val;
2610
2611 val = state->gprs[gpr];
2612 if (__predict_false(instr->address_size == 4)) {
2613 val &= 0x00000000FFFFFFFF;
2614 } else if (__predict_false(instr->address_size == 2)) {
2615 val &= 0x000000000000FFFF;
2616 }
2617
2618 return val;
2619 }
2620
2621 static int
2622 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2623 struct x86_store *store, gvaddr_t *gvap, size_t size)
2624 {
2625 struct x86_sib *sib;
2626 gvaddr_t gva = 0;
2627 uint64_t reg;
2628 int ret, seg;
2629
2630 if (store->type == STORE_SIB) {
2631 sib = &store->u.sib;
2632 if (sib->bas != NULL)
2633 gva += gpr_read_address(instr, state, sib->bas->num);
2634 if (sib->idx != NULL) {
2635 reg = gpr_read_address(instr, state, sib->idx->num);
2636 gva += sib->scale * reg;
2637 }
2638 } else if (store->type == STORE_REG) {
2639 if (store->u.reg == NULL) {
2640 /* The base is null. Happens with disp32-only. */
2641 } else {
2642 gva = gpr_read_address(instr, state, store->u.reg->num);
2643 }
2644 } else {
2645 gva = store->u.dmo;
2646 }
2647
2648 if (store->disp.type != DISP_NONE) {
2649 gva += store->disp.data;
2650 }
2651
2652 if (!is_long_mode(state)) {
2653 if (store->hardseg != 0) {
2654 seg = store->hardseg;
2655 } else {
2656 if (__predict_false(instr->legpref.seg != -1)) {
2657 seg = instr->legpref.seg;
2658 } else {
2659 seg = NVMM_X64_SEG_DS;
2660 }
2661 }
2662
2663 ret = segment_apply(&state->segs[seg], &gva, size);
2664 if (ret == -1)
2665 return -1;
2666 }
2667
2668 *gvap = gva;
2669 return 0;
2670 }
2671
2672 static int
2673 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2674 {
2675 uint8_t inst_bytes[15], byte;
2676 size_t i, fetchsize;
2677 gvaddr_t gva;
2678 int ret, seg;
2679
2680 fetchsize = sizeof(inst_bytes);
2681
2682 gva = state->gprs[NVMM_X64_GPR_RIP];
2683 if (!is_long_mode(state)) {
2684 ret = segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva,
2685 fetchsize);
2686 if (ret == -1)
2687 return -1;
2688 }
2689
2690 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2691 if (ret == -1)
2692 return -1;
2693
2694 seg = NVMM_X64_SEG_DS;
2695 for (i = 0; i < fetchsize; i++) {
2696 byte = inst_bytes[i];
2697
2698 if (byte == LEG_OVR_DS) {
2699 seg = NVMM_X64_SEG_DS;
2700 } else if (byte == LEG_OVR_ES) {
2701 seg = NVMM_X64_SEG_ES;
2702 } else if (byte == LEG_OVR_GS) {
2703 seg = NVMM_X64_SEG_GS;
2704 } else if (byte == LEG_OVR_FS) {
2705 seg = NVMM_X64_SEG_FS;
2706 } else if (byte == LEG_OVR_CS) {
2707 seg = NVMM_X64_SEG_CS;
2708 } else if (byte == LEG_OVR_SS) {
2709 seg = NVMM_X64_SEG_SS;
2710 } else if (byte == LEG_OPR_OVR) {
2711 /* nothing */
2712 } else if (byte == LEG_ADR_OVR) {
2713 /* nothing */
2714 } else if (byte == LEG_REP) {
2715 /* nothing */
2716 } else if (byte == LEG_REPN) {
2717 /* nothing */
2718 } else if (byte == LEG_LOCK) {
2719 /* nothing */
2720 } else {
2721 return seg;
2722 }
2723 }
2724
2725 return seg;
2726 }
2727
2728 static int
2729 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2730 struct nvmm_exit *exit)
2731 {
2732 size_t fetchsize;
2733 gvaddr_t gva;
2734 int ret;
2735
2736 fetchsize = sizeof(exit->u.mem.inst_bytes);
2737
2738 gva = state->gprs[NVMM_X64_GPR_RIP];
2739 if (!is_long_mode(state)) {
2740 ret = segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva,
2741 fetchsize);
2742 if (ret == -1)
2743 return -1;
2744 }
2745
2746 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
2747 fetchsize);
2748 if (ret == -1)
2749 return -1;
2750
2751 exit->u.mem.inst_len = fetchsize;
2752
2753 return 0;
2754 }
2755
2756 static int
2757 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2758 struct x86_instr *instr)
2759 {
2760 struct nvmm_mem mem;
2761 uint8_t data[8];
2762 gvaddr_t gva;
2763 size_t size;
2764 int ret;
2765
2766 size = instr->operand_size;
2767
2768 /* Source. */
2769 ret = store_to_gva(state, instr, &instr->src, &gva, size);
2770 if (ret == -1)
2771 return -1;
2772 ret = read_guest_memory(mach, state, gva, data, size);
2773 if (ret == -1)
2774 return -1;
2775
2776 /* Destination. */
2777 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
2778 if (ret == -1)
2779 return -1;
2780 ret = write_guest_memory(mach, state, gva, data, size);
2781 if (ret == -1)
2782 return -1;
2783
2784 mem.size = size;
2785 (*instr->emul)(&mem, NULL, state->gprs);
2786
2787 return 0;
2788 }
2789
2790 #define DISASSEMBLER_BUG() \
2791 do { \
2792 errno = EINVAL; \
2793 return -1; \
2794 } while (0);
2795
2796 static int
2797 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2798 struct x86_instr *instr, struct nvmm_exit *exit)
2799 {
2800 struct nvmm_mem mem;
2801 uint8_t membuf[8];
2802 uint64_t val;
2803
2804 memset(membuf, 0, sizeof(membuf));
2805
2806 mem.gpa = exit->u.mem.gpa;
2807 mem.size = instr->operand_size;
2808 mem.data = membuf;
2809
2810 /* Determine the direction. */
2811 switch (instr->src.type) {
2812 case STORE_REG:
2813 if (instr->src.disp.type != DISP_NONE) {
2814 /* Indirect access. */
2815 mem.write = false;
2816 } else {
2817 /* Direct access. */
2818 mem.write = true;
2819 }
2820 break;
2821 case STORE_IMM:
2822 mem.write = true;
2823 break;
2824 case STORE_SIB:
2825 mem.write = false;
2826 break;
2827 case STORE_DMO:
2828 mem.write = false;
2829 break;
2830 default:
2831 DISASSEMBLER_BUG();
2832 }
2833
2834 if (mem.write) {
2835 switch (instr->src.type) {
2836 case STORE_REG:
2837 if (instr->src.disp.type != DISP_NONE) {
2838 DISASSEMBLER_BUG();
2839 }
2840 val = state->gprs[instr->src.u.reg->num];
2841 val = __SHIFTOUT(val, instr->src.u.reg->mask);
2842 memcpy(mem.data, &val, mem.size);
2843 break;
2844 case STORE_IMM:
2845 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
2846 break;
2847 default:
2848 DISASSEMBLER_BUG();
2849 }
2850 }
2851
2852 (*instr->emul)(&mem, __callbacks.mem, state->gprs);
2853
2854 if (!mem.write) {
2855 if (instr->dst.type != STORE_REG) {
2856 DISASSEMBLER_BUG();
2857 }
2858 memcpy(&val, mem.data, sizeof(uint64_t));
2859 val = __SHIFTIN(val, instr->dst.u.reg->mask);
2860 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
2861 state->gprs[instr->dst.u.reg->num] |= val;
2862 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
2863 }
2864
2865 return 0;
2866 }
2867
2868 int
2869 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
2870 struct nvmm_exit *exit)
2871 {
2872 struct nvmm_x64_state state;
2873 struct x86_instr instr;
2874 uint64_t cnt;
2875 int ret;
2876
2877 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
2878 errno = EINVAL;
2879 return -1;
2880 }
2881
2882 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
2883 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | NVMM_X64_STATE_CRS |
2884 NVMM_X64_STATE_MSRS);
2885 if (ret == -1)
2886 return -1;
2887
2888 if (exit->u.mem.inst_len == 0) {
2889 /*
2890 * The instruction was not fetched from the kernel. Fetch
2891 * it ourselves.
2892 */
2893 ret = fetch_instruction(mach, &state, exit);
2894 if (ret == -1)
2895 return -1;
2896 }
2897
2898 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
2899 &instr, &state);
2900 if (ret == -1) {
2901 errno = ENODEV;
2902 return -1;
2903 }
2904
2905 if (instr.opcode->movs) {
2906 ret = assist_mem_double(mach, &state, &instr);
2907 } else {
2908 ret = assist_mem_single(mach, &state, &instr, exit);
2909 }
2910 if (ret == -1) {
2911 errno = ENODEV;
2912 return -1;
2913 }
2914
2915 if (instr.legpref.rep || instr.legpref.repn) {
2916 cnt = rep_dec_apply(&state, instr.address_size);
2917 if (cnt == 0) {
2918 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2919 } else if (__predict_false(instr.legpref.repn)) {
2920 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
2921 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2922 }
2923 }
2924 } else {
2925 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2926 }
2927
2928 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
2929 if (ret == -1)
2930 return -1;
2931
2932 return 0;
2933 }
2934