libnvmm_x86.c revision 1.11 1 /* $NetBSD: libnvmm_x86.c,v 1.11 2019/01/07 13:47:33 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49
50 #include <x86/specialreg.h>
51
52 extern struct nvmm_callbacks __callbacks;
53
54 /* -------------------------------------------------------------------------- */
55
56 /*
57 * Undocumented debugging function. Helpful.
58 */
59 int
60 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
61 {
62 struct nvmm_x64_state state;
63 size_t i;
64 int ret;
65
66 const char *segnames[] = {
67 "CS", "DS", "ES", "FS", "GS", "SS", "GDT", "IDT", "LDT", "TR"
68 };
69
70 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
71 if (ret == -1)
72 return -1;
73
74 printf("+ VCPU id=%d\n", (int)cpuid);
75 printf("| -> RIP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RIP]);
76 printf("| -> RSP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RSP]);
77 printf("| -> RAX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RAX]);
78 printf("| -> RBX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RBX]);
79 printf("| -> RCX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RCX]);
80 for (i = 0; i < NVMM_X64_NSEG; i++) {
81 printf("| -> %s: sel=0x%lx base=%p, limit=%p, P=%d, D=%d\n",
82 segnames[i],
83 state.segs[i].selector,
84 (void *)state.segs[i].base,
85 (void *)state.segs[i].limit,
86 state.segs[i].attrib.p, state.segs[i].attrib.def32);
87 }
88 printf("| -> MSR_EFER=%p\n", (void *)state.msrs[NVMM_X64_MSR_EFER]);
89 printf("| -> CR0=%p\n", (void *)state.crs[NVMM_X64_CR_CR0]);
90 printf("| -> CR3=%p\n", (void *)state.crs[NVMM_X64_CR_CR3]);
91 printf("| -> CR4=%p\n", (void *)state.crs[NVMM_X64_CR_CR4]);
92 printf("| -> CR8=%p\n", (void *)state.crs[NVMM_X64_CR_CR8]);
93 printf("| -> CPL=%p\n", (void *)state.misc[NVMM_X64_MISC_CPL]);
94
95 return 0;
96 }
97
98 /* -------------------------------------------------------------------------- */
99
100 #define PTE32_L1_SHIFT 12
101 #define PTE32_L2_SHIFT 22
102
103 #define PTE32_L2_MASK 0xffc00000
104 #define PTE32_L1_MASK 0x003ff000
105
106 #define PTE32_L2_FRAME (PTE32_L2_MASK)
107 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
108
109 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
110 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
111
112 typedef uint32_t pte_32bit_t;
113
114 static int
115 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
116 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
117 {
118 gpaddr_t L2gpa, L1gpa;
119 uintptr_t L2hva, L1hva;
120 pte_32bit_t *pdir, pte;
121
122 /* We begin with an RWXU access. */
123 *prot = NVMM_PROT_ALL;
124
125 /* Parse L2. */
126 L2gpa = (cr3 & PG_FRAME);
127 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
128 return -1;
129 pdir = (pte_32bit_t *)L2hva;
130 pte = pdir[pte32_l2idx(gva)];
131 if ((pte & PG_V) == 0)
132 return -1;
133 if ((pte & PG_u) == 0)
134 *prot &= ~NVMM_PROT_USER;
135 if ((pte & PG_KW) == 0)
136 *prot &= ~NVMM_PROT_WRITE;
137 if ((pte & PG_PS) && !has_pse)
138 return -1;
139 if (pte & PG_PS) {
140 *gpa = (pte & PTE32_L2_FRAME);
141 *gpa = *gpa + (gva & PTE32_L1_MASK);
142 return 0;
143 }
144
145 /* Parse L1. */
146 L1gpa = (pte & PG_FRAME);
147 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
148 return -1;
149 pdir = (pte_32bit_t *)L1hva;
150 pte = pdir[pte32_l1idx(gva)];
151 if ((pte & PG_V) == 0)
152 return -1;
153 if ((pte & PG_u) == 0)
154 *prot &= ~NVMM_PROT_USER;
155 if ((pte & PG_KW) == 0)
156 *prot &= ~NVMM_PROT_WRITE;
157 if (pte & PG_PS)
158 return -1;
159
160 *gpa = (pte & PG_FRAME);
161 return 0;
162 }
163
164 /* -------------------------------------------------------------------------- */
165
166 #define PTE32_PAE_L1_SHIFT 12
167 #define PTE32_PAE_L2_SHIFT 21
168 #define PTE32_PAE_L3_SHIFT 30
169
170 #define PTE32_PAE_L3_MASK 0xc0000000
171 #define PTE32_PAE_L2_MASK 0x3fe00000
172 #define PTE32_PAE_L1_MASK 0x001ff000
173
174 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
175 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
176 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
177
178 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
179 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
180 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
181
182 typedef uint64_t pte_32bit_pae_t;
183
184 static int
185 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
186 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
187 {
188 gpaddr_t L3gpa, L2gpa, L1gpa;
189 uintptr_t L3hva, L2hva, L1hva;
190 pte_32bit_pae_t *pdir, pte;
191
192 /* We begin with an RWXU access. */
193 *prot = NVMM_PROT_ALL;
194
195 /* Parse L3. */
196 L3gpa = (cr3 & PG_FRAME);
197 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
198 return -1;
199 pdir = (pte_32bit_pae_t *)L3hva;
200 pte = pdir[pte32_pae_l3idx(gva)];
201 if ((pte & PG_V) == 0)
202 return -1;
203 if (pte & PG_NX)
204 *prot &= ~NVMM_PROT_EXEC;
205 if (pte & PG_PS)
206 return -1;
207
208 /* Parse L2. */
209 L2gpa = (pte & PG_FRAME);
210 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
211 return -1;
212 pdir = (pte_32bit_pae_t *)L2hva;
213 pte = pdir[pte32_pae_l2idx(gva)];
214 if ((pte & PG_V) == 0)
215 return -1;
216 if ((pte & PG_u) == 0)
217 *prot &= ~NVMM_PROT_USER;
218 if ((pte & PG_KW) == 0)
219 *prot &= ~NVMM_PROT_WRITE;
220 if (pte & PG_NX)
221 *prot &= ~NVMM_PROT_EXEC;
222 if ((pte & PG_PS) && !has_pse)
223 return -1;
224 if (pte & PG_PS) {
225 *gpa = (pte & PTE32_PAE_L2_FRAME);
226 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
227 return 0;
228 }
229
230 /* Parse L1. */
231 L1gpa = (pte & PG_FRAME);
232 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
233 return -1;
234 pdir = (pte_32bit_pae_t *)L1hva;
235 pte = pdir[pte32_pae_l1idx(gva)];
236 if ((pte & PG_V) == 0)
237 return -1;
238 if ((pte & PG_u) == 0)
239 *prot &= ~NVMM_PROT_USER;
240 if ((pte & PG_KW) == 0)
241 *prot &= ~NVMM_PROT_WRITE;
242 if (pte & PG_NX)
243 *prot &= ~NVMM_PROT_EXEC;
244 if (pte & PG_PS)
245 return -1;
246
247 *gpa = (pte & PG_FRAME);
248 return 0;
249 }
250
251 /* -------------------------------------------------------------------------- */
252
253 #define PTE64_L1_SHIFT 12
254 #define PTE64_L2_SHIFT 21
255 #define PTE64_L3_SHIFT 30
256 #define PTE64_L4_SHIFT 39
257
258 #define PTE64_L4_MASK 0x0000ff8000000000
259 #define PTE64_L3_MASK 0x0000007fc0000000
260 #define PTE64_L2_MASK 0x000000003fe00000
261 #define PTE64_L1_MASK 0x00000000001ff000
262
263 #define PTE64_L4_FRAME PTE64_L4_MASK
264 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
265 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
266 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
267
268 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
269 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
270 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
271 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
272
273 typedef uint64_t pte_64bit_t;
274
275 static inline bool
276 x86_gva_64bit_canonical(gvaddr_t gva)
277 {
278 /* Bits 63:47 must have the same value. */
279 #define SIGN_EXTEND 0xffff800000000000ULL
280 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
281 }
282
283 static int
284 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
285 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
286 {
287 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
288 uintptr_t L4hva, L3hva, L2hva, L1hva;
289 pte_64bit_t *pdir, pte;
290
291 /* We begin with an RWXU access. */
292 *prot = NVMM_PROT_ALL;
293
294 if (!x86_gva_64bit_canonical(gva))
295 return -1;
296
297 /* Parse L4. */
298 L4gpa = (cr3 & PG_FRAME);
299 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
300 return -1;
301 pdir = (pte_64bit_t *)L4hva;
302 pte = pdir[pte64_l4idx(gva)];
303 if ((pte & PG_V) == 0)
304 return -1;
305 if ((pte & PG_u) == 0)
306 *prot &= ~NVMM_PROT_USER;
307 if ((pte & PG_KW) == 0)
308 *prot &= ~NVMM_PROT_WRITE;
309 if (pte & PG_NX)
310 *prot &= ~NVMM_PROT_EXEC;
311 if (pte & PG_PS)
312 return -1;
313
314 /* Parse L3. */
315 L3gpa = (pte & PG_FRAME);
316 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
317 return -1;
318 pdir = (pte_64bit_t *)L3hva;
319 pte = pdir[pte64_l3idx(gva)];
320 if ((pte & PG_V) == 0)
321 return -1;
322 if ((pte & PG_u) == 0)
323 *prot &= ~NVMM_PROT_USER;
324 if ((pte & PG_KW) == 0)
325 *prot &= ~NVMM_PROT_WRITE;
326 if (pte & PG_NX)
327 *prot &= ~NVMM_PROT_EXEC;
328 if (pte & PG_PS) {
329 *gpa = (pte & PTE64_L3_FRAME);
330 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
331 return 0;
332 }
333
334 /* Parse L2. */
335 L2gpa = (pte & PG_FRAME);
336 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
337 return -1;
338 pdir = (pte_64bit_t *)L2hva;
339 pte = pdir[pte64_l2idx(gva)];
340 if ((pte & PG_V) == 0)
341 return -1;
342 if ((pte & PG_u) == 0)
343 *prot &= ~NVMM_PROT_USER;
344 if ((pte & PG_KW) == 0)
345 *prot &= ~NVMM_PROT_WRITE;
346 if (pte & PG_NX)
347 *prot &= ~NVMM_PROT_EXEC;
348 if (pte & PG_PS) {
349 *gpa = (pte & PTE64_L2_FRAME);
350 *gpa = *gpa + (gva & PTE64_L1_MASK);
351 return 0;
352 }
353
354 /* Parse L1. */
355 L1gpa = (pte & PG_FRAME);
356 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
357 return -1;
358 pdir = (pte_64bit_t *)L1hva;
359 pte = pdir[pte64_l1idx(gva)];
360 if ((pte & PG_V) == 0)
361 return -1;
362 if ((pte & PG_u) == 0)
363 *prot &= ~NVMM_PROT_USER;
364 if ((pte & PG_KW) == 0)
365 *prot &= ~NVMM_PROT_WRITE;
366 if (pte & PG_NX)
367 *prot &= ~NVMM_PROT_EXEC;
368 if (pte & PG_PS)
369 return -1;
370
371 *gpa = (pte & PG_FRAME);
372 return 0;
373 }
374
375 static inline int
376 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
377 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
378 {
379 bool is_pae, is_lng, has_pse;
380 uint64_t cr3;
381 size_t off;
382 int ret;
383
384 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
385 /* No paging. */
386 *prot = NVMM_PROT_ALL;
387 *gpa = gva;
388 return 0;
389 }
390
391 off = (gva & PAGE_MASK);
392 gva &= ~PAGE_MASK;
393
394 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
395 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
396 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
397 cr3 = state->crs[NVMM_X64_CR_CR3];
398
399 if (is_pae && is_lng) {
400 /* 64bit */
401 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
402 } else if (is_pae && !is_lng) {
403 /* 32bit PAE */
404 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, has_pse,
405 prot);
406 } else if (!is_pae && !is_lng) {
407 /* 32bit */
408 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
409 } else {
410 ret = -1;
411 }
412
413 if (ret == -1) {
414 errno = EFAULT;
415 }
416
417 *gpa = *gpa + off;
418
419 return ret;
420 }
421
422 int
423 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
424 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
425 {
426 struct nvmm_x64_state state;
427 int ret;
428
429 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
430 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
431 if (ret == -1)
432 return -1;
433
434 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
435 }
436
437 /* -------------------------------------------------------------------------- */
438
439 static inline bool
440 is_64bit(struct nvmm_x64_state *state)
441 {
442 return (state->segs[NVMM_X64_SEG_CS].attrib.lng != 0);
443 }
444
445 static inline bool
446 is_32bit(struct nvmm_x64_state *state)
447 {
448 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
449 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 1);
450 }
451
452 static inline bool
453 is_16bit(struct nvmm_x64_state *state)
454 {
455 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
456 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 0);
457 }
458
459 static inline bool
460 is_long_mode(struct nvmm_x64_state *state)
461 {
462 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LME) != 0;
463 }
464
465 static int
466 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva, size_t size)
467 {
468 uint64_t limit;
469
470 /*
471 * This is incomplete. We should check topdown, etc, really that's
472 * tiring.
473 */
474 if (__predict_false(!seg->attrib.p)) {
475 goto error;
476 }
477
478 limit = (seg->limit + 1);
479 if (__predict_true(seg->attrib.gran)) {
480 limit *= PAGE_SIZE;
481 }
482
483 if (__predict_false(*gva + size > limit)) {
484 goto error;
485 }
486
487 *gva += seg->base;
488 return 0;
489
490 error:
491 errno = EFAULT;
492 return -1;
493 }
494
495 static uint64_t
496 mask_from_adsize(size_t adsize)
497 {
498 switch (adsize) {
499 case 8:
500 return 0xFFFFFFFFFFFFFFFF;
501 case 4:
502 return 0x00000000FFFFFFFF;
503 case 2:
504 default: /* impossible */
505 return 0x000000000000FFFF;
506 }
507 }
508
509 static uint64_t
510 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
511 {
512 uint64_t mask, cnt;
513
514 mask = mask_from_adsize(adsize);
515 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
516
517 return cnt;
518 }
519
520 static void
521 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
522 {
523 uint64_t mask;
524
525 mask = mask_from_adsize(adsize);
526 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
527 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
528 }
529
530 static uint64_t
531 rep_dec_apply(struct nvmm_x64_state *state, size_t adsize)
532 {
533 uint64_t mask, cnt;
534
535 mask = mask_from_adsize(adsize);
536
537 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
538 cnt -= 1;
539 cnt &= mask;
540
541 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
542 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
543
544 return cnt;
545 }
546
547 static int
548 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
549 gvaddr_t gva, uint8_t *data, size_t size)
550 {
551 struct nvmm_mem mem;
552 nvmm_prot_t prot;
553 gpaddr_t gpa;
554 uintptr_t hva;
555 bool is_mmio;
556 int ret, remain;
557
558 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
559 if (__predict_false(ret == -1)) {
560 return -1;
561 }
562 if (__predict_false(!(prot & NVMM_PROT_READ))) {
563 errno = EFAULT;
564 return -1;
565 }
566
567 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
568 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
569 } else {
570 remain = 0;
571 }
572 size -= remain;
573
574 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
575 is_mmio = (ret == -1);
576
577 if (is_mmio) {
578 mem.data = data;
579 mem.gva = gva;
580 mem.gpa = gpa;
581 mem.write = false;
582 mem.size = size;
583 (*__callbacks.mem)(&mem);
584 } else {
585 memcpy(data, (uint8_t *)hva, size);
586 }
587
588 if (remain > 0) {
589 ret = read_guest_memory(mach, state, gva + size,
590 data + size, remain);
591 } else {
592 ret = 0;
593 }
594
595 return ret;
596 }
597
598 static int
599 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
600 gvaddr_t gva, uint8_t *data, size_t size)
601 {
602 struct nvmm_mem mem;
603 nvmm_prot_t prot;
604 gpaddr_t gpa;
605 uintptr_t hva;
606 bool is_mmio;
607 int ret, remain;
608
609 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
610 if (__predict_false(ret == -1)) {
611 return -1;
612 }
613 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
614 errno = EFAULT;
615 return -1;
616 }
617
618 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
619 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
620 } else {
621 remain = 0;
622 }
623 size -= remain;
624
625 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
626 is_mmio = (ret == -1);
627
628 if (is_mmio) {
629 mem.data = data;
630 mem.gva = gva;
631 mem.gpa = gpa;
632 mem.write = true;
633 mem.size = size;
634 (*__callbacks.mem)(&mem);
635 } else {
636 memcpy((uint8_t *)hva, data, size);
637 }
638
639 if (remain > 0) {
640 ret = write_guest_memory(mach, state, gva + size,
641 data + size, remain);
642 } else {
643 ret = 0;
644 }
645
646 return ret;
647 }
648
649 /* -------------------------------------------------------------------------- */
650
651 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
652
653 #define NVMM_IO_BATCH_SIZE 32
654
655 static int
656 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
657 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
658 {
659 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
660 size_t i, iosize, iocnt;
661 int ret;
662
663 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
664 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
665 iocnt = iosize / io->size;
666
667 io->data = iobuf;
668
669 if (!io->in) {
670 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
671 if (ret == -1)
672 return -1;
673 }
674
675 for (i = 0; i < iocnt; i++) {
676 (*__callbacks.io)(io);
677 io->data += io->size;
678 }
679
680 if (io->in) {
681 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
682 if (ret == -1)
683 return -1;
684 }
685
686 return iocnt;
687 }
688
689 int
690 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
691 struct nvmm_exit *exit)
692 {
693 struct nvmm_x64_state state;
694 struct nvmm_io io;
695 uint64_t cnt = 0; /* GCC */
696 uint8_t iobuf[8];
697 int iocnt = 1;
698 gvaddr_t gva;
699 int reg = 0; /* GCC */
700 int ret, seg;
701 bool psld = false;
702
703 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
704 errno = EINVAL;
705 return -1;
706 }
707
708 io.port = exit->u.io.port;
709 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
710 io.size = exit->u.io.operand_size;
711 io.data = iobuf;
712
713 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
714 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
715 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
716 if (ret == -1)
717 return -1;
718
719 if (exit->u.io.rep) {
720 cnt = rep_get_cnt(&state, exit->u.io.address_size);
721 if (__predict_false(cnt == 0)) {
722 return 0;
723 }
724 }
725
726 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
727 psld = true;
728 }
729
730 /*
731 * Determine GVA.
732 */
733 if (exit->u.io.str) {
734 if (io.in) {
735 reg = NVMM_X64_GPR_RDI;
736 } else {
737 reg = NVMM_X64_GPR_RSI;
738 }
739
740 gva = state.gprs[reg];
741 gva &= mask_from_adsize(exit->u.io.address_size);
742
743 if (!is_long_mode(&state)) {
744 if (exit->u.io.seg != -1) {
745 seg = exit->u.io.seg;
746 } else {
747 if (io.in) {
748 seg = NVMM_X64_SEG_ES;
749 } else {
750 seg = fetch_segment(mach, &state);
751 if (seg == -1)
752 return -1;
753 }
754 }
755
756 ret = segment_apply(&state.segs[seg], &gva, io.size);
757 if (ret == -1)
758 return -1;
759 }
760
761 if (exit->u.io.rep && !psld) {
762 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
763 if (iocnt == -1)
764 return -1;
765 goto done;
766 }
767 }
768
769 if (!io.in) {
770 if (!exit->u.io.str) {
771 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
772 } else {
773 ret = read_guest_memory(mach, &state, gva, io.data,
774 io.size);
775 if (ret == -1)
776 return -1;
777 }
778 }
779
780 (*__callbacks.io)(&io);
781
782 if (io.in) {
783 if (!exit->u.io.str) {
784 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
785 } else {
786 ret = write_guest_memory(mach, &state, gva, io.data,
787 io.size);
788 if (ret == -1)
789 return -1;
790 }
791 }
792
793 done:
794 if (exit->u.io.str) {
795 if (__predict_false(psld)) {
796 state.gprs[reg] -= iocnt * io.size;
797 } else {
798 state.gprs[reg] += iocnt * io.size;
799 }
800 }
801
802 if (exit->u.io.rep) {
803 cnt -= iocnt;
804 rep_set_cnt(&state, exit->u.io.address_size, cnt);
805 if (cnt == 0) {
806 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
807 }
808 } else {
809 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
810 }
811
812 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
813 if (ret == -1)
814 return -1;
815
816 return 0;
817 }
818
819 /* -------------------------------------------------------------------------- */
820
821 static void x86_emul_or(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
822 static void x86_emul_and(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
823 static void x86_emul_xor(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
824 static void x86_emul_mov(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
825 static void x86_emul_stos(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
826 static void x86_emul_lods(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
827 static void x86_emul_movs(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
828
829 enum x86_legpref {
830 /* Group 1 */
831 LEG_LOCK = 0,
832 LEG_REPN, /* REPNE/REPNZ */
833 LEG_REP, /* REP/REPE/REPZ */
834 /* Group 2 */
835 LEG_OVR_CS,
836 LEG_OVR_SS,
837 LEG_OVR_DS,
838 LEG_OVR_ES,
839 LEG_OVR_FS,
840 LEG_OVR_GS,
841 LEG_BRN_TAKEN,
842 LEG_BRN_NTAKEN,
843 /* Group 3 */
844 LEG_OPR_OVR,
845 /* Group 4 */
846 LEG_ADR_OVR,
847
848 NLEG
849 };
850
851 struct x86_rexpref {
852 bool present;
853 bool w;
854 bool r;
855 bool x;
856 bool b;
857 };
858
859 struct x86_reg {
860 int num; /* NVMM GPR state index */
861 uint64_t mask;
862 };
863
864 enum x86_disp_type {
865 DISP_NONE,
866 DISP_0,
867 DISP_1,
868 DISP_4
869 };
870
871 struct x86_disp {
872 enum x86_disp_type type;
873 uint64_t data; /* 4 bytes, but can be sign-extended */
874 };
875
876 enum REGMODRM__Mod {
877 MOD_DIS0, /* also, register indirect */
878 MOD_DIS1,
879 MOD_DIS4,
880 MOD_REG
881 };
882
883 enum REGMODRM__Reg {
884 REG_000, /* these fields are indexes to the register map */
885 REG_001,
886 REG_010,
887 REG_011,
888 REG_100,
889 REG_101,
890 REG_110,
891 REG_111
892 };
893
894 enum REGMODRM__Rm {
895 RM_000, /* reg */
896 RM_001, /* reg */
897 RM_010, /* reg */
898 RM_011, /* reg */
899 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
900 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
901 RM_110,
902 RM_111
903 };
904
905 struct x86_regmodrm {
906 bool present;
907 enum REGMODRM__Mod mod;
908 enum REGMODRM__Reg reg;
909 enum REGMODRM__Rm rm;
910 };
911
912 struct x86_immediate {
913 size_t size; /* 1/2/4/8 */
914 uint64_t data;
915 };
916
917 struct x86_sib {
918 uint8_t scale;
919 const struct x86_reg *idx;
920 const struct x86_reg *bas;
921 };
922
923 enum x86_store_type {
924 STORE_NONE,
925 STORE_REG,
926 STORE_IMM,
927 STORE_SIB,
928 STORE_DMO
929 };
930
931 struct x86_store {
932 enum x86_store_type type;
933 union {
934 const struct x86_reg *reg;
935 struct x86_immediate imm;
936 struct x86_sib sib;
937 uint64_t dmo;
938 } u;
939 struct x86_disp disp;
940 int hardseg;
941 };
942
943 struct x86_instr {
944 size_t len;
945 bool legpref[NLEG];
946 struct x86_rexpref rexpref;
947 size_t operand_size;
948 size_t address_size;
949 uint64_t zeroextend_mask;
950
951 struct x86_regmodrm regmodrm;
952
953 const struct x86_opcode *opcode;
954
955 struct x86_store src;
956 struct x86_store dst;
957
958 struct x86_store *strm;
959
960 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
961 };
962
963 struct x86_decode_fsm {
964 /* vcpu */
965 bool is64bit;
966 bool is32bit;
967 bool is16bit;
968
969 /* fsm */
970 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
971 uint8_t *buf;
972 uint8_t *end;
973 };
974
975 struct x86_opcode {
976 uint8_t byte;
977 bool regmodrm;
978 bool regtorm;
979 bool dmo;
980 bool todmo;
981 bool movs;
982 bool stos;
983 bool lods;
984 bool szoverride;
985 int defsize;
986 int allsize;
987 bool group1;
988 bool group11;
989 bool immediate;
990 int flags;
991 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
992 };
993
994 struct x86_group_entry {
995 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
996 };
997
998 #define OPSIZE_BYTE 0x01
999 #define OPSIZE_WORD 0x02 /* 2 bytes */
1000 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1001 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1002
1003 #define FLAG_imm8 0x01
1004 #define FLAG_immz 0x02
1005 #define FLAG_ze 0x04
1006
1007 static const struct x86_group_entry group1[8] = {
1008 [1] = { .emul = x86_emul_or },
1009 [4] = { .emul = x86_emul_and },
1010 [6] = { .emul = x86_emul_xor }
1011 };
1012
1013 static const struct x86_group_entry group11[8] = {
1014 [0] = { .emul = x86_emul_mov }
1015 };
1016
1017 static const struct x86_opcode primary_opcode_table[] = {
1018 /*
1019 * Group1
1020 */
1021 {
1022 /* Ev, Ib */
1023 .byte = 0x83,
1024 .regmodrm = true,
1025 .regtorm = true,
1026 .szoverride = true,
1027 .defsize = -1,
1028 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1029 .group1 = true,
1030 .immediate = true,
1031 .flags = FLAG_imm8,
1032 .emul = NULL /* group1 */
1033 },
1034
1035 /*
1036 * Group11
1037 */
1038 {
1039 /* Eb, Ib */
1040 .byte = 0xC6,
1041 .regmodrm = true,
1042 .regtorm = true,
1043 .szoverride = false,
1044 .defsize = OPSIZE_BYTE,
1045 .allsize = -1,
1046 .group11 = true,
1047 .immediate = true,
1048 .emul = NULL /* group11 */
1049 },
1050 {
1051 /* Ev, Iz */
1052 .byte = 0xC7,
1053 .regmodrm = true,
1054 .regtorm = true,
1055 .szoverride = true,
1056 .defsize = -1,
1057 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1058 .group11 = true,
1059 .immediate = true,
1060 .flags = FLAG_immz,
1061 .emul = NULL /* group11 */
1062 },
1063
1064 /*
1065 * OR
1066 */
1067 {
1068 /* Eb, Gb */
1069 .byte = 0x08,
1070 .regmodrm = true,
1071 .regtorm = true,
1072 .szoverride = false,
1073 .defsize = OPSIZE_BYTE,
1074 .allsize = -1,
1075 .emul = x86_emul_or
1076 },
1077 {
1078 /* Ev, Gv */
1079 .byte = 0x09,
1080 .regmodrm = true,
1081 .regtorm = true,
1082 .szoverride = true,
1083 .defsize = -1,
1084 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1085 .emul = x86_emul_or
1086 },
1087 {
1088 /* Gb, Eb */
1089 .byte = 0x0A,
1090 .regmodrm = true,
1091 .regtorm = false,
1092 .szoverride = false,
1093 .defsize = OPSIZE_BYTE,
1094 .allsize = -1,
1095 .emul = x86_emul_or
1096 },
1097 {
1098 /* Gv, Ev */
1099 .byte = 0x0B,
1100 .regmodrm = true,
1101 .regtorm = false,
1102 .szoverride = true,
1103 .defsize = -1,
1104 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1105 .emul = x86_emul_or
1106 },
1107
1108 /*
1109 * AND
1110 */
1111 {
1112 /* Eb, Gb */
1113 .byte = 0x20,
1114 .regmodrm = true,
1115 .regtorm = true,
1116 .szoverride = false,
1117 .defsize = OPSIZE_BYTE,
1118 .allsize = -1,
1119 .emul = x86_emul_and
1120 },
1121 {
1122 /* Ev, Gv */
1123 .byte = 0x21,
1124 .regmodrm = true,
1125 .regtorm = true,
1126 .szoverride = true,
1127 .defsize = -1,
1128 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1129 .emul = x86_emul_and
1130 },
1131 {
1132 /* Gb, Eb */
1133 .byte = 0x22,
1134 .regmodrm = true,
1135 .regtorm = false,
1136 .szoverride = false,
1137 .defsize = OPSIZE_BYTE,
1138 .allsize = -1,
1139 .emul = x86_emul_and
1140 },
1141 {
1142 /* Gv, Ev */
1143 .byte = 0x23,
1144 .regmodrm = true,
1145 .regtorm = false,
1146 .szoverride = true,
1147 .defsize = -1,
1148 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1149 .emul = x86_emul_and
1150 },
1151
1152 /*
1153 * XOR
1154 */
1155 {
1156 /* Eb, Gb */
1157 .byte = 0x30,
1158 .regmodrm = true,
1159 .regtorm = true,
1160 .szoverride = false,
1161 .defsize = OPSIZE_BYTE,
1162 .allsize = -1,
1163 .emul = x86_emul_xor
1164 },
1165 {
1166 /* Ev, Gv */
1167 .byte = 0x31,
1168 .regmodrm = true,
1169 .regtorm = true,
1170 .szoverride = true,
1171 .defsize = -1,
1172 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1173 .emul = x86_emul_xor
1174 },
1175 {
1176 /* Gb, Eb */
1177 .byte = 0x32,
1178 .regmodrm = true,
1179 .regtorm = false,
1180 .szoverride = false,
1181 .defsize = OPSIZE_BYTE,
1182 .allsize = -1,
1183 .emul = x86_emul_xor
1184 },
1185 {
1186 /* Gv, Ev */
1187 .byte = 0x33,
1188 .regmodrm = true,
1189 .regtorm = false,
1190 .szoverride = true,
1191 .defsize = -1,
1192 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1193 .emul = x86_emul_xor
1194 },
1195
1196 /*
1197 * MOV
1198 */
1199 {
1200 /* Eb, Gb */
1201 .byte = 0x88,
1202 .regmodrm = true,
1203 .regtorm = true,
1204 .szoverride = false,
1205 .defsize = OPSIZE_BYTE,
1206 .allsize = -1,
1207 .emul = x86_emul_mov
1208 },
1209 {
1210 /* Ev, Gv */
1211 .byte = 0x89,
1212 .regmodrm = true,
1213 .regtorm = true,
1214 .szoverride = true,
1215 .defsize = -1,
1216 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1217 .emul = x86_emul_mov
1218 },
1219 {
1220 /* Gb, Eb */
1221 .byte = 0x8A,
1222 .regmodrm = true,
1223 .regtorm = false,
1224 .szoverride = false,
1225 .defsize = OPSIZE_BYTE,
1226 .allsize = -1,
1227 .emul = x86_emul_mov
1228 },
1229 {
1230 /* Gv, Ev */
1231 .byte = 0x8B,
1232 .regmodrm = true,
1233 .regtorm = false,
1234 .szoverride = true,
1235 .defsize = -1,
1236 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1237 .emul = x86_emul_mov
1238 },
1239 {
1240 /* AL, Ob */
1241 .byte = 0xA0,
1242 .dmo = true,
1243 .todmo = false,
1244 .szoverride = false,
1245 .defsize = OPSIZE_BYTE,
1246 .allsize = -1,
1247 .emul = x86_emul_mov
1248 },
1249 {
1250 /* rAX, Ov */
1251 .byte = 0xA1,
1252 .dmo = true,
1253 .todmo = false,
1254 .szoverride = true,
1255 .defsize = -1,
1256 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1257 .emul = x86_emul_mov
1258 },
1259 {
1260 /* Ob, AL */
1261 .byte = 0xA2,
1262 .dmo = true,
1263 .todmo = true,
1264 .szoverride = false,
1265 .defsize = OPSIZE_BYTE,
1266 .allsize = -1,
1267 .emul = x86_emul_mov
1268 },
1269 {
1270 /* Ov, rAX */
1271 .byte = 0xA3,
1272 .dmo = true,
1273 .todmo = true,
1274 .szoverride = true,
1275 .defsize = -1,
1276 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1277 .emul = x86_emul_mov
1278 },
1279
1280 /*
1281 * MOVS
1282 */
1283 {
1284 /* Yb, Xb */
1285 .byte = 0xA4,
1286 .movs = true,
1287 .szoverride = false,
1288 .defsize = OPSIZE_BYTE,
1289 .allsize = -1,
1290 .emul = x86_emul_movs
1291 },
1292 {
1293 /* Yv, Xv */
1294 .byte = 0xA5,
1295 .movs = true,
1296 .szoverride = true,
1297 .defsize = -1,
1298 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1299 .emul = x86_emul_movs
1300 },
1301
1302 /*
1303 * STOS
1304 */
1305 {
1306 /* Yb, AL */
1307 .byte = 0xAA,
1308 .stos = true,
1309 .szoverride = false,
1310 .defsize = OPSIZE_BYTE,
1311 .allsize = -1,
1312 .emul = x86_emul_stos
1313 },
1314 {
1315 /* Yv, rAX */
1316 .byte = 0xAB,
1317 .stos = true,
1318 .szoverride = true,
1319 .defsize = -1,
1320 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1321 .emul = x86_emul_stos
1322 },
1323
1324 /*
1325 * LODS
1326 */
1327 {
1328 /* AL, Xb */
1329 .byte = 0xAC,
1330 .lods = true,
1331 .szoverride = false,
1332 .defsize = OPSIZE_BYTE,
1333 .allsize = -1,
1334 .emul = x86_emul_lods
1335 },
1336 {
1337 /* rAX, Xv */
1338 .byte = 0xAD,
1339 .lods = true,
1340 .szoverride = true,
1341 .defsize = -1,
1342 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1343 .emul = x86_emul_lods
1344 },
1345 };
1346
1347 static const struct x86_opcode secondary_opcode_table[] = {
1348 /*
1349 * MOVZX
1350 */
1351 {
1352 /* Gv, Eb */
1353 .byte = 0xB6,
1354 .regmodrm = true,
1355 .regtorm = false,
1356 .szoverride = true,
1357 .defsize = OPSIZE_BYTE,
1358 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1359 .flags = FLAG_ze,
1360 .emul = x86_emul_mov
1361 },
1362 {
1363 /* Gv, Ew */
1364 .byte = 0xB7,
1365 .regmodrm = true,
1366 .regtorm = false,
1367 .szoverride = true,
1368 .defsize = OPSIZE_WORD,
1369 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1370 .flags = FLAG_ze,
1371 .emul = x86_emul_mov
1372 },
1373 };
1374
1375 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1376
1377 /* [REX-present][enc][opsize] */
1378 static const struct x86_reg gpr_map__special[2][4][8] = {
1379 [false] = {
1380 /* No REX prefix. */
1381 [0b00] = {
1382 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1383 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1384 [2] = { -1, 0 },
1385 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1386 [4] = { -1, 0 },
1387 [5] = { -1, 0 },
1388 [6] = { -1, 0 },
1389 [7] = { -1, 0 },
1390 },
1391 [0b01] = {
1392 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1393 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1394 [2] = { -1, 0 },
1395 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1396 [4] = { -1, 0 },
1397 [5] = { -1, 0 },
1398 [6] = { -1, 0 },
1399 [7] = { -1, 0 },
1400 },
1401 [0b10] = {
1402 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1403 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1404 [2] = { -1, 0 },
1405 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1406 [4] = { -1, 0 },
1407 [5] = { -1, 0 },
1408 [6] = { -1, 0 },
1409 [7] = { -1, 0 },
1410 },
1411 [0b11] = {
1412 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1413 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1414 [2] = { -1, 0 },
1415 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1416 [4] = { -1, 0 },
1417 [5] = { -1, 0 },
1418 [6] = { -1, 0 },
1419 [7] = { -1, 0 },
1420 }
1421 },
1422 [true] = {
1423 /* Has REX prefix. */
1424 [0b00] = {
1425 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1426 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1427 [2] = { -1, 0 },
1428 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1429 [4] = { -1, 0 },
1430 [5] = { -1, 0 },
1431 [6] = { -1, 0 },
1432 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1433 },
1434 [0b01] = {
1435 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1436 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1437 [2] = { -1, 0 },
1438 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1439 [4] = { -1, 0 },
1440 [5] = { -1, 0 },
1441 [6] = { -1, 0 },
1442 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1443 },
1444 [0b10] = {
1445 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1446 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1447 [2] = { -1, 0 },
1448 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1449 [4] = { -1, 0 },
1450 [5] = { -1, 0 },
1451 [6] = { -1, 0 },
1452 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1453 },
1454 [0b11] = {
1455 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1456 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1457 [2] = { -1, 0 },
1458 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1459 [4] = { -1, 0 },
1460 [5] = { -1, 0 },
1461 [6] = { -1, 0 },
1462 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1463 }
1464 }
1465 };
1466
1467 /* [depends][enc][size] */
1468 static const struct x86_reg gpr_map[2][8][8] = {
1469 [false] = {
1470 /* Not extended. */
1471 [0b000] = {
1472 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1473 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1474 [2] = { -1, 0 },
1475 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1476 [4] = { -1, 0 },
1477 [5] = { -1, 0 },
1478 [6] = { -1, 0 },
1479 [7] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* RAX */
1480 },
1481 [0b001] = {
1482 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1483 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1484 [2] = { -1, 0 },
1485 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1486 [4] = { -1, 0 },
1487 [5] = { -1, 0 },
1488 [6] = { -1, 0 },
1489 [7] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* RCX */
1490 },
1491 [0b010] = {
1492 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1493 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1494 [2] = { -1, 0 },
1495 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1496 [4] = { -1, 0 },
1497 [5] = { -1, 0 },
1498 [6] = { -1, 0 },
1499 [7] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* RDX */
1500 },
1501 [0b011] = {
1502 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1503 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1504 [2] = { -1, 0 },
1505 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1506 [4] = { -1, 0 },
1507 [5] = { -1, 0 },
1508 [6] = { -1, 0 },
1509 [7] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* RBX */
1510 },
1511 [0b100] = {
1512 [0] = { -1, 0 }, /* SPECIAL */
1513 [1] = { -1, 0 }, /* SPECIAL */
1514 [2] = { -1, 0 },
1515 [3] = { -1, 0 }, /* SPECIAL */
1516 [4] = { -1, 0 },
1517 [5] = { -1, 0 },
1518 [6] = { -1, 0 },
1519 [7] = { -1, 0 }, /* SPECIAL */
1520 },
1521 [0b101] = {
1522 [0] = { -1, 0 }, /* SPECIAL */
1523 [1] = { -1, 0 }, /* SPECIAL */
1524 [2] = { -1, 0 },
1525 [3] = { -1, 0 }, /* SPECIAL */
1526 [4] = { -1, 0 },
1527 [5] = { -1, 0 },
1528 [6] = { -1, 0 },
1529 [7] = { -1, 0 }, /* SPECIAL */
1530 },
1531 [0b110] = {
1532 [0] = { -1, 0 }, /* SPECIAL */
1533 [1] = { -1, 0 }, /* SPECIAL */
1534 [2] = { -1, 0 },
1535 [3] = { -1, 0 }, /* SPECIAL */
1536 [4] = { -1, 0 },
1537 [5] = { -1, 0 },
1538 [6] = { -1, 0 },
1539 [7] = { -1, 0 }, /* SPECIAL */
1540 },
1541 [0b111] = {
1542 [0] = { -1, 0 }, /* SPECIAL */
1543 [1] = { -1, 0 }, /* SPECIAL */
1544 [2] = { -1, 0 },
1545 [3] = { -1, 0 }, /* SPECIAL */
1546 [4] = { -1, 0 },
1547 [5] = { -1, 0 },
1548 [6] = { -1, 0 },
1549 [7] = { -1, 0 }, /* SPECIAL */
1550 },
1551 },
1552 [true] = {
1553 /* Extended. */
1554 [0b000] = {
1555 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1556 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1557 [2] = { -1, 0 },
1558 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1559 [4] = { -1, 0 },
1560 [5] = { -1, 0 },
1561 [6] = { -1, 0 },
1562 [7] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8 */
1563 },
1564 [0b001] = {
1565 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1566 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1567 [2] = { -1, 0 },
1568 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1569 [4] = { -1, 0 },
1570 [5] = { -1, 0 },
1571 [6] = { -1, 0 },
1572 [7] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9 */
1573 },
1574 [0b010] = {
1575 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1576 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1577 [2] = { -1, 0 },
1578 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1579 [4] = { -1, 0 },
1580 [5] = { -1, 0 },
1581 [6] = { -1, 0 },
1582 [7] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10 */
1583 },
1584 [0b011] = {
1585 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1586 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1587 [2] = { -1, 0 },
1588 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1589 [4] = { -1, 0 },
1590 [5] = { -1, 0 },
1591 [6] = { -1, 0 },
1592 [7] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11 */
1593 },
1594 [0b100] = {
1595 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1596 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1597 [2] = { -1, 0 },
1598 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1599 [4] = { -1, 0 },
1600 [5] = { -1, 0 },
1601 [6] = { -1, 0 },
1602 [7] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12 */
1603 },
1604 [0b101] = {
1605 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1606 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1607 [2] = { -1, 0 },
1608 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1609 [4] = { -1, 0 },
1610 [5] = { -1, 0 },
1611 [6] = { -1, 0 },
1612 [7] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13 */
1613 },
1614 [0b110] = {
1615 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1616 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1617 [2] = { -1, 0 },
1618 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1619 [4] = { -1, 0 },
1620 [5] = { -1, 0 },
1621 [6] = { -1, 0 },
1622 [7] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14 */
1623 },
1624 [0b111] = {
1625 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1626 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1627 [2] = { -1, 0 },
1628 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1629 [4] = { -1, 0 },
1630 [5] = { -1, 0 },
1631 [6] = { -1, 0 },
1632 [7] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15 */
1633 },
1634 }
1635 };
1636
1637 static int
1638 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1639 {
1640 fsm->fn = NULL;
1641 return -1;
1642 }
1643
1644 static int
1645 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1646 {
1647 if (fsm->buf + n > fsm->end) {
1648 return -1;
1649 }
1650 memcpy(bytes, fsm->buf, n);
1651 return 0;
1652 }
1653
1654 static void
1655 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1656 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1657 {
1658 fsm->buf += n;
1659 if (fsm->buf > fsm->end) {
1660 fsm->fn = node_overflow;
1661 } else {
1662 fsm->fn = fn;
1663 }
1664 }
1665
1666 static const struct x86_reg *
1667 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1668 {
1669 enc &= 0b11;
1670 if (regsize == 8) {
1671 /* May be 64bit without REX */
1672 return &gpr_map__special[1][enc][regsize-1];
1673 }
1674 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1675 }
1676
1677 /*
1678 * Special node, for MOVS. Fake two displacements of zero on the source and
1679 * destination registers.
1680 */
1681 static int
1682 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1683 {
1684 size_t adrsize;
1685
1686 adrsize = instr->address_size;
1687
1688 /* DS:RSI */
1689 instr->src.type = STORE_REG;
1690 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1691 instr->src.disp.type = DISP_0;
1692
1693 /* ES:RDI, force ES */
1694 instr->dst.type = STORE_REG;
1695 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1696 instr->dst.disp.type = DISP_0;
1697 instr->dst.hardseg = NVMM_X64_SEG_ES;
1698
1699 fsm_advance(fsm, 0, NULL);
1700
1701 return 0;
1702 }
1703
1704 /*
1705 * Special node, for STOS and LODS. Fake a displacement of zero on the
1706 * destination register.
1707 */
1708 static int
1709 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1710 {
1711 const struct x86_opcode *opcode = instr->opcode;
1712 struct x86_store *stlo, *streg;
1713 size_t adrsize, regsize;
1714
1715 adrsize = instr->address_size;
1716 regsize = instr->operand_size;
1717
1718 if (opcode->stos) {
1719 streg = &instr->src;
1720 stlo = &instr->dst;
1721 } else {
1722 streg = &instr->dst;
1723 stlo = &instr->src;
1724 }
1725
1726 streg->type = STORE_REG;
1727 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1728
1729 stlo->type = STORE_REG;
1730 if (opcode->stos) {
1731 /* ES:RDI, force ES */
1732 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1733 stlo->hardseg = NVMM_X64_SEG_ES;
1734 } else {
1735 /* DS:RSI */
1736 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1737 }
1738 stlo->disp.type = DISP_0;
1739
1740 fsm_advance(fsm, 0, NULL);
1741
1742 return 0;
1743 }
1744
1745 static int
1746 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1747 {
1748 const struct x86_opcode *opcode = instr->opcode;
1749 struct x86_store *stdmo, *streg;
1750 size_t adrsize, regsize;
1751
1752 adrsize = instr->address_size;
1753 regsize = instr->operand_size;
1754
1755 if (opcode->todmo) {
1756 streg = &instr->src;
1757 stdmo = &instr->dst;
1758 } else {
1759 streg = &instr->dst;
1760 stdmo = &instr->src;
1761 }
1762
1763 streg->type = STORE_REG;
1764 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1765
1766 stdmo->type = STORE_DMO;
1767 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1768 return -1;
1769 }
1770 fsm_advance(fsm, adrsize, NULL);
1771
1772 return 0;
1773 }
1774
1775 static uint64_t
1776 sign_extend(uint64_t val, int size)
1777 {
1778 if (size == 1) {
1779 if (val & __BIT(7))
1780 val |= 0xFFFFFFFFFFFFFF00;
1781 } else if (size == 2) {
1782 if (val & __BIT(15))
1783 val |= 0xFFFFFFFFFFFF0000;
1784 } else if (size == 4) {
1785 if (val & __BIT(31))
1786 val |= 0xFFFFFFFF00000000;
1787 }
1788 return val;
1789 }
1790
1791 static int
1792 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1793 {
1794 const struct x86_opcode *opcode = instr->opcode;
1795 struct x86_store *store;
1796 uint8_t immsize;
1797 size_t sesize = 0;
1798
1799 /* The immediate is the source */
1800 store = &instr->src;
1801 immsize = instr->operand_size;
1802
1803 if (opcode->flags & FLAG_imm8) {
1804 sesize = immsize;
1805 immsize = 1;
1806 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1807 sesize = immsize;
1808 immsize = 4;
1809 }
1810
1811 store->type = STORE_IMM;
1812 store->u.imm.size = immsize;
1813 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1814 return -1;
1815 }
1816 fsm_advance(fsm, store->u.imm.size, NULL);
1817
1818 if (sesize != 0) {
1819 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1820 store->u.imm.size = sesize;
1821 }
1822
1823 return 0;
1824 }
1825
1826 static int
1827 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1828 {
1829 const struct x86_opcode *opcode = instr->opcode;
1830 uint64_t data = 0;
1831 size_t n;
1832
1833 if (instr->strm->disp.type == DISP_1) {
1834 n = 1;
1835 } else { /* DISP4 */
1836 n = 4;
1837 }
1838
1839 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1840 return -1;
1841 }
1842
1843 if (__predict_true(fsm->is64bit)) {
1844 data = sign_extend(data, n);
1845 }
1846
1847 instr->strm->disp.data = data;
1848
1849 if (opcode->immediate) {
1850 fsm_advance(fsm, n, node_immediate);
1851 } else {
1852 fsm_advance(fsm, n, NULL);
1853 }
1854
1855 return 0;
1856 }
1857
1858 static const struct x86_reg *
1859 get_register_idx(struct x86_instr *instr, uint8_t index)
1860 {
1861 uint8_t enc = index;
1862 const struct x86_reg *reg;
1863 size_t regsize;
1864
1865 regsize = instr->address_size;
1866 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
1867
1868 if (reg->num == -1) {
1869 reg = resolve_special_register(instr, enc, regsize);
1870 }
1871
1872 return reg;
1873 }
1874
1875 static const struct x86_reg *
1876 get_register_bas(struct x86_instr *instr, uint8_t base)
1877 {
1878 uint8_t enc = base;
1879 const struct x86_reg *reg;
1880 size_t regsize;
1881
1882 regsize = instr->address_size;
1883 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1884 if (reg->num == -1) {
1885 reg = resolve_special_register(instr, enc, regsize);
1886 }
1887
1888 return reg;
1889 }
1890
1891 static int
1892 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1893 {
1894 const struct x86_opcode *opcode;
1895 uint8_t scale, index, base;
1896 bool noindex, nobase;
1897 uint8_t byte;
1898
1899 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
1900 return -1;
1901 }
1902
1903 scale = ((byte & 0b11000000) >> 6);
1904 index = ((byte & 0b00111000) >> 3);
1905 base = ((byte & 0b00000111) >> 0);
1906
1907 opcode = instr->opcode;
1908
1909 noindex = false;
1910 nobase = false;
1911
1912 if (index == 0b100 && !instr->rexpref.x) {
1913 /* Special case: the index is null */
1914 noindex = true;
1915 }
1916
1917 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
1918 /* Special case: the base is null + disp32 */
1919 instr->strm->disp.type = DISP_4;
1920 nobase = true;
1921 }
1922
1923 instr->strm->type = STORE_SIB;
1924 instr->strm->u.sib.scale = (1 << scale);
1925 if (!noindex)
1926 instr->strm->u.sib.idx = get_register_idx(instr, index);
1927 if (!nobase)
1928 instr->strm->u.sib.bas = get_register_bas(instr, base);
1929
1930 /* May have a displacement, or an immediate */
1931 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
1932 fsm_advance(fsm, 1, node_disp);
1933 } else if (opcode->immediate) {
1934 fsm_advance(fsm, 1, node_immediate);
1935 } else {
1936 fsm_advance(fsm, 1, NULL);
1937 }
1938
1939 return 0;
1940 }
1941
1942 static const struct x86_reg *
1943 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
1944 {
1945 uint8_t enc = instr->regmodrm.reg;
1946 const struct x86_reg *reg;
1947 size_t regsize;
1948
1949 regsize = instr->operand_size;
1950
1951 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
1952 if (reg->num == -1) {
1953 reg = resolve_special_register(instr, enc, regsize);
1954 }
1955
1956 return reg;
1957 }
1958
1959 static const struct x86_reg *
1960 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
1961 {
1962 uint8_t enc = instr->regmodrm.rm;
1963 const struct x86_reg *reg;
1964 size_t regsize;
1965
1966 if (instr->strm->disp.type == DISP_NONE) {
1967 regsize = instr->operand_size;
1968 } else {
1969 /* Indirect access, the size is that of the address. */
1970 regsize = instr->address_size;
1971 }
1972
1973 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1974 if (reg->num == -1) {
1975 reg = resolve_special_register(instr, enc, regsize);
1976 }
1977
1978 return reg;
1979 }
1980
1981 static inline bool
1982 has_sib(struct x86_instr *instr)
1983 {
1984 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
1985 }
1986
1987 static inline bool
1988 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1989 {
1990 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
1991 instr->regmodrm.rm == RM_RBP_DISP32);
1992 }
1993
1994 static inline bool
1995 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1996 {
1997 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
1998 instr->regmodrm.rm == RM_RBP_DISP32);
1999 }
2000
2001 static enum x86_disp_type
2002 get_disp_type(struct x86_instr *instr)
2003 {
2004 switch (instr->regmodrm.mod) {
2005 case MOD_DIS0: /* indirect */
2006 return DISP_0;
2007 case MOD_DIS1: /* indirect+1 */
2008 return DISP_1;
2009 case MOD_DIS4: /* indirect+4 */
2010 return DISP_4;
2011 case MOD_REG: /* direct */
2012 default: /* gcc */
2013 return DISP_NONE;
2014 }
2015 }
2016
2017 static int
2018 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2019 {
2020 struct x86_store *strg, *strm;
2021 const struct x86_opcode *opcode;
2022 const struct x86_reg *reg;
2023 uint8_t byte;
2024
2025 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2026 return -1;
2027 }
2028
2029 opcode = instr->opcode;
2030
2031 instr->regmodrm.present = true;
2032 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2033 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2034 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2035
2036 if (opcode->regtorm) {
2037 strg = &instr->src;
2038 strm = &instr->dst;
2039 } else { /* RM to REG */
2040 strm = &instr->src;
2041 strg = &instr->dst;
2042 }
2043
2044 /* Save for later use. */
2045 instr->strm = strm;
2046
2047 /*
2048 * Special cases: Groups. The REG field of REGMODRM is the index in
2049 * the group. op1 gets overwritten in the Immediate node, if any.
2050 */
2051 if (opcode->group1) {
2052 if (group1[instr->regmodrm.reg].emul == NULL) {
2053 return -1;
2054 }
2055 instr->emul = group1[instr->regmodrm.reg].emul;
2056 } else if (opcode->group11) {
2057 if (group11[instr->regmodrm.reg].emul == NULL) {
2058 return -1;
2059 }
2060 instr->emul = group11[instr->regmodrm.reg].emul;
2061 }
2062
2063 reg = get_register_reg(instr, opcode);
2064 if (reg == NULL) {
2065 return -1;
2066 }
2067 strg->type = STORE_REG;
2068 strg->u.reg = reg;
2069
2070 if (has_sib(instr)) {
2071 /* Overwrites RM */
2072 fsm_advance(fsm, 1, node_sib);
2073 return 0;
2074 }
2075
2076 /* The displacement applies to RM. */
2077 strm->disp.type = get_disp_type(instr);
2078
2079 if (is_rip_relative(fsm, instr)) {
2080 /* Overwrites RM */
2081 strm->type = STORE_REG;
2082 strm->u.reg = &gpr_map__rip;
2083 strm->disp.type = DISP_4;
2084 fsm_advance(fsm, 1, node_disp);
2085 return 0;
2086 }
2087
2088 if (is_disp32_only(fsm, instr)) {
2089 /* Overwrites RM */
2090 strm->type = STORE_REG;
2091 strm->u.reg = NULL;
2092 strm->disp.type = DISP_4;
2093 fsm_advance(fsm, 1, node_disp);
2094 return 0;
2095 }
2096
2097 reg = get_register_rm(instr, opcode);
2098 if (reg == NULL) {
2099 return -1;
2100 }
2101 strm->type = STORE_REG;
2102 strm->u.reg = reg;
2103
2104 if (strm->disp.type == DISP_NONE) {
2105 /* Direct register addressing mode */
2106 if (opcode->immediate) {
2107 fsm_advance(fsm, 1, node_immediate);
2108 } else {
2109 fsm_advance(fsm, 1, NULL);
2110 }
2111 } else if (strm->disp.type == DISP_0) {
2112 /* Indirect register addressing mode */
2113 if (opcode->immediate) {
2114 fsm_advance(fsm, 1, node_immediate);
2115 } else {
2116 fsm_advance(fsm, 1, NULL);
2117 }
2118 } else {
2119 fsm_advance(fsm, 1, node_disp);
2120 }
2121
2122 return 0;
2123 }
2124
2125 static size_t
2126 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2127 {
2128 const struct x86_opcode *opcode = instr->opcode;
2129 int opsize;
2130
2131 /* Get the opsize */
2132 if (!opcode->szoverride) {
2133 opsize = opcode->defsize;
2134 } else if (instr->rexpref.present && instr->rexpref.w) {
2135 opsize = 8;
2136 } else {
2137 if (!fsm->is16bit) {
2138 if (instr->legpref[LEG_OPR_OVR]) {
2139 opsize = 2;
2140 } else {
2141 opsize = 4;
2142 }
2143 } else { /* 16bit */
2144 if (instr->legpref[LEG_OPR_OVR]) {
2145 opsize = 4;
2146 } else {
2147 opsize = 2;
2148 }
2149 }
2150 }
2151
2152 /* See if available */
2153 if ((opcode->allsize & opsize) == 0) {
2154 // XXX do we care?
2155 }
2156
2157 return opsize;
2158 }
2159
2160 static size_t
2161 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2162 {
2163 if (fsm->is64bit) {
2164 if (__predict_false(instr->legpref[LEG_ADR_OVR])) {
2165 return 4;
2166 }
2167 return 8;
2168 }
2169
2170 if (fsm->is32bit) {
2171 if (__predict_false(instr->legpref[LEG_ADR_OVR])) {
2172 return 2;
2173 }
2174 return 4;
2175 }
2176
2177 /* 16bit. */
2178 if (__predict_false(instr->legpref[LEG_ADR_OVR])) {
2179 return 4;
2180 }
2181 return 2;
2182 }
2183
2184 static int
2185 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2186 {
2187 const struct x86_opcode *opcode;
2188 uint8_t byte;
2189 size_t i, n;
2190
2191 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2192 return -1;
2193 }
2194
2195 n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
2196 for (i = 0; i < n; i++) {
2197 if (primary_opcode_table[i].byte == byte)
2198 break;
2199 }
2200 if (i == n) {
2201 return -1;
2202 }
2203 opcode = &primary_opcode_table[i];
2204
2205 instr->opcode = opcode;
2206 instr->emul = opcode->emul;
2207 instr->operand_size = get_operand_size(fsm, instr);
2208 instr->address_size = get_address_size(fsm, instr);
2209
2210 if (opcode->regmodrm) {
2211 fsm_advance(fsm, 1, node_regmodrm);
2212 } else if (opcode->dmo) {
2213 /* Direct-Memory Offsets */
2214 fsm_advance(fsm, 1, node_dmo);
2215 } else if (opcode->stos || opcode->lods) {
2216 fsm_advance(fsm, 1, node_stlo);
2217 } else if (opcode->movs) {
2218 fsm_advance(fsm, 1, node_movs);
2219 } else {
2220 return -1;
2221 }
2222
2223 return 0;
2224 }
2225
2226 static uint64_t
2227 size_to_mask(size_t size)
2228 {
2229 switch (size) {
2230 case 1:
2231 return 0x00000000000000FF;
2232 case 2:
2233 return 0x000000000000FFFF;
2234 case 4:
2235 return 0x00000000FFFFFFFF;
2236 case 8:
2237 default:
2238 return 0xFFFFFFFFFFFFFFFF;
2239 }
2240 }
2241
2242 static int
2243 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2244 {
2245 const struct x86_opcode *opcode;
2246 uint8_t byte;
2247 size_t i, n;
2248
2249 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2250 return -1;
2251 }
2252
2253 n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
2254 for (i = 0; i < n; i++) {
2255 if (secondary_opcode_table[i].byte == byte)
2256 break;
2257 }
2258 if (i == n) {
2259 return -1;
2260 }
2261 opcode = &secondary_opcode_table[i];
2262
2263 instr->opcode = opcode;
2264 instr->emul = opcode->emul;
2265 instr->operand_size = get_operand_size(fsm, instr);
2266 instr->address_size = get_address_size(fsm, instr);
2267
2268 if (opcode->flags & FLAG_ze) {
2269 /*
2270 * Compute the mask for zero-extend. Update the operand size,
2271 * we move fewer bytes.
2272 */
2273 instr->zeroextend_mask = size_to_mask(instr->operand_size);
2274 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2275 instr->operand_size = opcode->defsize;
2276 }
2277
2278 if (opcode->regmodrm) {
2279 fsm_advance(fsm, 1, node_regmodrm);
2280 } else {
2281 return -1;
2282 }
2283
2284 return 0;
2285 }
2286
2287 static int
2288 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2289 {
2290 uint8_t byte;
2291
2292 #define ESCAPE 0x0F
2293 #define VEX_1 0xC5
2294 #define VEX_2 0xC4
2295 #define XOP 0x8F
2296
2297 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2298 return -1;
2299 }
2300
2301 /*
2302 * We don't take XOP. It is AMD-specific, and it was removed shortly
2303 * after being introduced.
2304 */
2305 if (byte == ESCAPE) {
2306 fsm_advance(fsm, 1, node_secondary_opcode);
2307 } else if (!instr->rexpref.present) {
2308 if (byte == VEX_1) {
2309 return -1;
2310 } else if (byte == VEX_2) {
2311 return -1;
2312 } else {
2313 fsm->fn = node_primary_opcode;
2314 }
2315 } else {
2316 fsm->fn = node_primary_opcode;
2317 }
2318
2319 return 0;
2320 }
2321
2322 static int
2323 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2324 {
2325 struct x86_rexpref *rexpref = &instr->rexpref;
2326 uint8_t byte;
2327 size_t n = 0;
2328
2329 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2330 return -1;
2331 }
2332
2333 if (byte >= 0x40 && byte <= 0x4F) {
2334 if (__predict_false(!fsm->is64bit)) {
2335 return -1;
2336 }
2337 rexpref->present = true;
2338 rexpref->w = ((byte & 0x8) != 0);
2339 rexpref->r = ((byte & 0x4) != 0);
2340 rexpref->x = ((byte & 0x2) != 0);
2341 rexpref->b = ((byte & 0x1) != 0);
2342 n = 1;
2343 }
2344
2345 fsm_advance(fsm, n, node_main);
2346 return 0;
2347 }
2348
2349 static const struct {
2350 uint8_t byte;
2351 int seg;
2352 } legpref_table[NLEG] = {
2353 /* Group 1 */
2354 [LEG_LOCK] = { 0xF0, -1 },
2355 [LEG_REPN] = { 0xF2, -1 },
2356 [LEG_REP] = { 0xF3, -1 },
2357 /* Group 2 */
2358 [LEG_OVR_CS] = { 0x2E, NVMM_X64_SEG_CS },
2359 [LEG_OVR_SS] = { 0x36, NVMM_X64_SEG_SS },
2360 [LEG_OVR_DS] = { 0x3E, NVMM_X64_SEG_DS },
2361 [LEG_OVR_ES] = { 0x26, NVMM_X64_SEG_ES },
2362 [LEG_OVR_FS] = { 0x64, NVMM_X64_SEG_FS },
2363 [LEG_OVR_GS] = { 0x65, NVMM_X64_SEG_GS },
2364 [LEG_BRN_TAKEN] = { 0x2E, -1 },
2365 [LEG_BRN_NTAKEN] = { 0x3E, -1 },
2366 /* Group 3 */
2367 [LEG_OPR_OVR] = { 0x66, -1 },
2368 /* Group 4 */
2369 [LEG_ADR_OVR] = { 0x67, -1 },
2370 };
2371
2372 static int
2373 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2374 {
2375 uint8_t byte;
2376 size_t i;
2377
2378 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2379 return -1;
2380 }
2381
2382 for (i = 0; i < NLEG; i++) {
2383 if (byte == legpref_table[i].byte)
2384 break;
2385 }
2386
2387 if (i == NLEG) {
2388 fsm->fn = node_rex_prefix;
2389 } else {
2390 instr->legpref[i] = true;
2391 fsm_advance(fsm, 1, node_legacy_prefix);
2392 }
2393
2394 return 0;
2395 }
2396
2397 static int
2398 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2399 struct nvmm_x64_state *state)
2400 {
2401 struct x86_decode_fsm fsm;
2402 int ret;
2403
2404 memset(instr, 0, sizeof(*instr));
2405
2406 fsm.is64bit = is_64bit(state);
2407 fsm.is32bit = is_32bit(state);
2408 fsm.is16bit = is_16bit(state);
2409
2410 fsm.fn = node_legacy_prefix;
2411 fsm.buf = inst_bytes;
2412 fsm.end = inst_bytes + inst_len;
2413
2414 while (fsm.fn != NULL) {
2415 ret = (*fsm.fn)(&fsm, instr);
2416 if (ret == -1)
2417 return -1;
2418 }
2419
2420 instr->len = fsm.buf - inst_bytes;
2421
2422 return 0;
2423 }
2424
2425 /* -------------------------------------------------------------------------- */
2426
2427 static inline uint8_t
2428 compute_parity(uint8_t *data)
2429 {
2430 uint64_t *ptr = (uint64_t *)data;
2431 uint64_t val = *ptr;
2432
2433 val ^= val >> 32;
2434 val ^= val >> 16;
2435 val ^= val >> 8;
2436 val ^= val >> 4;
2437 val ^= val >> 2;
2438 val ^= val >> 1;
2439 return (~val) & 1;
2440 }
2441
2442 static void
2443 x86_emul_or(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2444 uint64_t *gprs)
2445 {
2446 const bool write = mem->write;
2447 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2448 uint8_t data[8];
2449 size_t i;
2450
2451 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2452
2453 memcpy(data, mem->data, sizeof(data));
2454
2455 /* Fetch the value to be OR'ed. */
2456 mem->write = false;
2457 (*cb)(mem);
2458
2459 /* Perform the OR. */
2460 for (i = 0; i < mem->size; i++) {
2461 mem->data[i] |= data[i];
2462 if (mem->data[i] != 0)
2463 fl |= PSL_Z;
2464 }
2465 if (mem->data[mem->size-1] & __BIT(7))
2466 fl |= PSL_N;
2467 if (compute_parity(mem->data))
2468 fl |= PSL_PF;
2469
2470 if (write) {
2471 /* Write back the result. */
2472 mem->write = true;
2473 (*cb)(mem);
2474 }
2475
2476 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2477 }
2478
2479 static void
2480 x86_emul_and(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2481 uint64_t *gprs)
2482 {
2483 const bool write = mem->write;
2484 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2485 uint8_t data[8];
2486 size_t i;
2487
2488 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2489
2490 memcpy(data, mem->data, sizeof(data));
2491
2492 /* Fetch the value to be AND'ed. */
2493 mem->write = false;
2494 (*cb)(mem);
2495
2496 /* Perform the AND. */
2497 for (i = 0; i < mem->size; i++) {
2498 mem->data[i] &= data[i];
2499 if (mem->data[i] != 0)
2500 fl |= PSL_Z;
2501 }
2502 if (mem->data[mem->size-1] & __BIT(7))
2503 fl |= PSL_N;
2504 if (compute_parity(mem->data))
2505 fl |= PSL_PF;
2506
2507 if (write) {
2508 /* Write back the result. */
2509 mem->write = true;
2510 (*cb)(mem);
2511 }
2512
2513 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2514 }
2515
2516 static void
2517 x86_emul_xor(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2518 uint64_t *gprs)
2519 {
2520 const bool write = mem->write;
2521 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2522 uint8_t data[8];
2523 size_t i;
2524
2525 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2526
2527 memcpy(data, mem->data, sizeof(data));
2528
2529 /* Fetch the value to be XOR'ed. */
2530 mem->write = false;
2531 (*cb)(mem);
2532
2533 /* Perform the XOR. */
2534 for (i = 0; i < mem->size; i++) {
2535 mem->data[i] ^= data[i];
2536 if (mem->data[i] != 0)
2537 fl |= PSL_Z;
2538 }
2539 if (mem->data[mem->size-1] & __BIT(7))
2540 fl |= PSL_N;
2541 if (compute_parity(mem->data))
2542 fl |= PSL_PF;
2543
2544 if (write) {
2545 /* Write back the result. */
2546 mem->write = true;
2547 (*cb)(mem);
2548 }
2549
2550 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2551 }
2552
2553 static void
2554 x86_emul_mov(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2555 uint64_t *gprs)
2556 {
2557 /*
2558 * Nothing special, just move without emulation.
2559 */
2560 (*cb)(mem);
2561 }
2562
2563 static void
2564 x86_emul_stos(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2565 uint64_t *gprs)
2566 {
2567 /*
2568 * Just move, and update RDI.
2569 */
2570 (*cb)(mem);
2571
2572 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2573 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2574 } else {
2575 gprs[NVMM_X64_GPR_RDI] += mem->size;
2576 }
2577 }
2578
2579 static void
2580 x86_emul_lods(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2581 uint64_t *gprs)
2582 {
2583 /*
2584 * Just move, and update RSI.
2585 */
2586 (*cb)(mem);
2587
2588 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2589 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2590 } else {
2591 gprs[NVMM_X64_GPR_RSI] += mem->size;
2592 }
2593 }
2594
2595 static void
2596 x86_emul_movs(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2597 uint64_t *gprs)
2598 {
2599 /*
2600 * Special instruction: double memory operand. Don't call the cb,
2601 * because the storage has already been performed earlier.
2602 */
2603
2604 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2605 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2606 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2607 } else {
2608 gprs[NVMM_X64_GPR_RSI] += mem->size;
2609 gprs[NVMM_X64_GPR_RDI] += mem->size;
2610 }
2611 }
2612
2613 /* -------------------------------------------------------------------------- */
2614
2615 static inline uint64_t
2616 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2617 {
2618 uint64_t val;
2619
2620 val = state->gprs[gpr];
2621 if (__predict_false(instr->address_size == 4)) {
2622 val &= 0x00000000FFFFFFFF;
2623 } else if (__predict_false(instr->address_size == 2)) {
2624 val &= 0x000000000000FFFF;
2625 }
2626
2627 return val;
2628 }
2629
2630 static int
2631 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2632 struct x86_store *store, gvaddr_t *gvap, size_t size)
2633 {
2634 struct x86_sib *sib;
2635 gvaddr_t gva = 0;
2636 uint64_t reg;
2637 int ret, seg;
2638
2639 if (store->type == STORE_SIB) {
2640 sib = &store->u.sib;
2641 if (sib->bas != NULL)
2642 gva += gpr_read_address(instr, state, sib->bas->num);
2643 if (sib->idx != NULL) {
2644 reg = gpr_read_address(instr, state, sib->idx->num);
2645 gva += sib->scale * reg;
2646 }
2647 } else if (store->type == STORE_REG) {
2648 if (store->u.reg == NULL) {
2649 /* The base is null. Happens with disp32-only. */
2650 } else {
2651 gva = gpr_read_address(instr, state, store->u.reg->num);
2652 }
2653 } else {
2654 gva = store->u.dmo;
2655 }
2656
2657 if (store->disp.type != DISP_NONE) {
2658 gva += store->disp.data;
2659 }
2660
2661 if (!is_long_mode(state)) {
2662 if (store->hardseg != 0) {
2663 seg = store->hardseg;
2664 } else {
2665 if (instr->legpref[LEG_OVR_CS]) {
2666 seg = NVMM_X64_SEG_CS;
2667 } else if (instr->legpref[LEG_OVR_SS]) {
2668 seg = NVMM_X64_SEG_SS;
2669 } else if (instr->legpref[LEG_OVR_ES]) {
2670 seg = NVMM_X64_SEG_ES;
2671 } else if (instr->legpref[LEG_OVR_FS]) {
2672 seg = NVMM_X64_SEG_FS;
2673 } else if (instr->legpref[LEG_OVR_GS]) {
2674 seg = NVMM_X64_SEG_GS;
2675 } else {
2676 seg = NVMM_X64_SEG_DS;
2677 }
2678 }
2679
2680 ret = segment_apply(&state->segs[seg], &gva, size);
2681 if (ret == -1)
2682 return -1;
2683 }
2684
2685 *gvap = gva;
2686 return 0;
2687 }
2688
2689 static int
2690 store_to_mem(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2691 struct x86_instr *instr, struct x86_store *store, struct nvmm_mem *mem)
2692 {
2693 nvmm_prot_t prot;
2694 int ret;
2695
2696 ret = store_to_gva(state, instr, store, &mem->gva, mem->size);
2697 if (ret == -1)
2698 return -1;
2699
2700 if ((mem->gva & PAGE_MASK) + mem->size > PAGE_SIZE) {
2701 /* Don't allow a cross-page MMIO. */
2702 errno = EINVAL;
2703 return -1;
2704 }
2705
2706 ret = x86_gva_to_gpa(mach, state, mem->gva, &mem->gpa, &prot);
2707 if (ret == -1)
2708 return -1;
2709
2710 return 0;
2711 }
2712
2713 static int
2714 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2715 {
2716 uint8_t inst_bytes[15], byte;
2717 size_t i, n, fetchsize;
2718 gvaddr_t gva;
2719 int ret, seg;
2720
2721 fetchsize = sizeof(inst_bytes);
2722
2723 gva = state->gprs[NVMM_X64_GPR_RIP];
2724 if (!is_long_mode(state)) {
2725 ret = segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva,
2726 fetchsize);
2727 if (ret == -1)
2728 return -1;
2729 }
2730
2731 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2732 if (ret == -1)
2733 return -1;
2734
2735 seg = NVMM_X64_SEG_DS;
2736 for (n = 0; n < fetchsize; n++) {
2737 byte = inst_bytes[n];
2738 for (i = 0; i < NLEG; i++) {
2739 if (byte != legpref_table[i].byte)
2740 continue;
2741 if (i >= LEG_OVR_CS && i <= LEG_OVR_GS)
2742 seg = legpref_table[i].seg;
2743 break;
2744 }
2745 if (i == NLEG) {
2746 break;
2747 }
2748 }
2749
2750 return seg;
2751 }
2752
2753 static int
2754 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2755 struct nvmm_exit *exit)
2756 {
2757 size_t fetchsize;
2758 gvaddr_t gva;
2759 int ret;
2760
2761 fetchsize = sizeof(exit->u.mem.inst_bytes);
2762
2763 gva = state->gprs[NVMM_X64_GPR_RIP];
2764 if (!is_long_mode(state)) {
2765 ret = segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva,
2766 fetchsize);
2767 if (ret == -1)
2768 return -1;
2769 }
2770
2771 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
2772 fetchsize);
2773 if (ret == -1)
2774 return -1;
2775
2776 exit->u.mem.inst_len = fetchsize;
2777
2778 return 0;
2779 }
2780
2781 static int
2782 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2783 struct x86_instr *instr)
2784 {
2785 struct nvmm_mem mem;
2786 uint8_t data[8];
2787 gvaddr_t gva;
2788 size_t size;
2789 int ret;
2790
2791 size = instr->operand_size;
2792
2793 /* Source. */
2794 ret = store_to_gva(state, instr, &instr->src, &gva, size);
2795 if (ret == -1)
2796 return -1;
2797 ret = read_guest_memory(mach, state, gva, data, size);
2798 if (ret == -1)
2799 return -1;
2800
2801 /* Destination. */
2802 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
2803 if (ret == -1)
2804 return -1;
2805 ret = write_guest_memory(mach, state, gva, data, size);
2806 if (ret == -1)
2807 return -1;
2808
2809 mem.size = size;
2810 (*instr->emul)(&mem, NULL, state->gprs);
2811
2812 return 0;
2813 }
2814
2815 #define DISASSEMBLER_BUG() \
2816 do { \
2817 errno = EINVAL; \
2818 return -1; \
2819 } while (0);
2820
2821 static int
2822 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2823 struct x86_instr *instr)
2824 {
2825 struct nvmm_mem mem;
2826 uint8_t membuf[8];
2827 uint64_t val;
2828 int ret;
2829
2830 memset(membuf, 0, sizeof(membuf));
2831 mem.data = membuf;
2832
2833 switch (instr->src.type) {
2834 case STORE_REG:
2835 if (instr->src.disp.type != DISP_NONE) {
2836 /* Indirect access. */
2837 mem.write = false;
2838 mem.size = instr->operand_size;
2839 ret = store_to_mem(mach, state, instr, &instr->src,
2840 &mem);
2841 if (ret == -1)
2842 return -1;
2843 } else {
2844 /* Direct access. */
2845 mem.write = true;
2846 mem.size = instr->operand_size;
2847 val = state->gprs[instr->src.u.reg->num];
2848 val = __SHIFTOUT(val, instr->src.u.reg->mask);
2849 memcpy(mem.data, &val, mem.size);
2850 }
2851 break;
2852
2853 case STORE_IMM:
2854 mem.write = true;
2855 mem.size = instr->src.u.imm.size;
2856 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
2857 break;
2858
2859 case STORE_SIB:
2860 mem.write = false;
2861 mem.size = instr->operand_size;
2862 ret = store_to_mem(mach, state, instr, &instr->src, &mem);
2863 if (ret == -1)
2864 return -1;
2865 break;
2866
2867 case STORE_DMO:
2868 mem.write = false;
2869 mem.size = instr->operand_size;
2870 ret = store_to_mem(mach, state, instr, &instr->src, &mem);
2871 if (ret == -1)
2872 return -1;
2873 break;
2874
2875 default:
2876 return -1;
2877 }
2878
2879 switch (instr->dst.type) {
2880 case STORE_REG:
2881 if (instr->dst.disp.type != DISP_NONE) {
2882 if (__predict_false(!mem.write)) {
2883 DISASSEMBLER_BUG();
2884 }
2885 mem.size = instr->operand_size;
2886 ret = store_to_mem(mach, state, instr, &instr->dst,
2887 &mem);
2888 if (ret == -1)
2889 return -1;
2890 } else {
2891 /* nothing */
2892 }
2893 break;
2894
2895 case STORE_IMM:
2896 /* The dst can't be an immediate. */
2897 DISASSEMBLER_BUG();
2898
2899 case STORE_SIB:
2900 if (__predict_false(!mem.write)) {
2901 DISASSEMBLER_BUG();
2902 }
2903 mem.size = instr->operand_size;
2904 ret = store_to_mem(mach, state, instr, &instr->dst, &mem);
2905 if (ret == -1)
2906 return -1;
2907 break;
2908
2909 case STORE_DMO:
2910 if (__predict_false(!mem.write)) {
2911 DISASSEMBLER_BUG();
2912 }
2913 mem.size = instr->operand_size;
2914 ret = store_to_mem(mach, state, instr, &instr->dst, &mem);
2915 if (ret == -1)
2916 return -1;
2917 break;
2918
2919 default:
2920 return -1;
2921 }
2922
2923 (*instr->emul)(&mem, __callbacks.mem, state->gprs);
2924
2925 if (!mem.write) {
2926 /* instr->dst.type == STORE_REG */
2927 memcpy(&val, mem.data, sizeof(uint64_t));
2928 val = __SHIFTIN(val, instr->dst.u.reg->mask);
2929 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
2930 state->gprs[instr->dst.u.reg->num] |= val;
2931 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
2932 }
2933
2934 return 0;
2935 }
2936
2937 int
2938 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
2939 struct nvmm_exit *exit)
2940 {
2941 struct nvmm_x64_state state;
2942 struct x86_instr instr;
2943 uint64_t cnt;
2944 int ret;
2945
2946 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
2947 errno = EINVAL;
2948 return -1;
2949 }
2950
2951 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
2952 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | NVMM_X64_STATE_CRS |
2953 NVMM_X64_STATE_MSRS);
2954 if (ret == -1)
2955 return -1;
2956
2957 if (exit->u.mem.inst_len == 0) {
2958 /*
2959 * The instruction was not fetched from the kernel. Fetch
2960 * it ourselves.
2961 */
2962 ret = fetch_instruction(mach, &state, exit);
2963 if (ret == -1)
2964 return -1;
2965 }
2966
2967 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
2968 &instr, &state);
2969 if (ret == -1) {
2970 errno = ENODEV;
2971 return -1;
2972 }
2973
2974 if (__predict_false(instr.legpref[LEG_REPN])) {
2975 errno = ENODEV;
2976 return -1;
2977 }
2978
2979 if (instr.opcode->movs) {
2980 ret = assist_mem_double(mach, &state, &instr);
2981 } else {
2982 ret = assist_mem_single(mach, &state, &instr);
2983 }
2984 if (ret == -1) {
2985 errno = ENODEV;
2986 return -1;
2987 }
2988
2989 if (instr.legpref[LEG_REP]) {
2990 cnt = rep_dec_apply(&state, instr.address_size);
2991 if (cnt == 0) {
2992 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2993 }
2994 } else {
2995 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2996 }
2997
2998 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
2999 if (ret == -1)
3000 return -1;
3001
3002 return 0;
3003 }
3004