libnvmm_x86.c revision 1.17 1 /* $NetBSD: libnvmm_x86.c,v 1.17 2019/01/27 02:08:35 pgoyette Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49
50 #include <x86/specialreg.h>
51
52 extern struct nvmm_callbacks __callbacks;
53
54 /* -------------------------------------------------------------------------- */
55
56 /*
57 * Undocumented debugging function. Helpful.
58 */
59 int
60 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
61 {
62 struct nvmm_x64_state state;
63 size_t i;
64 int ret;
65
66 const char *segnames[] = {
67 "CS", "DS", "ES", "FS", "GS", "SS", "GDT", "IDT", "LDT", "TR"
68 };
69
70 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
71 if (ret == -1)
72 return -1;
73
74 printf("+ VCPU id=%d\n", (int)cpuid);
75 printf("| -> RIP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RIP]);
76 printf("| -> RSP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RSP]);
77 printf("| -> RAX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RAX]);
78 printf("| -> RBX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RBX]);
79 printf("| -> RCX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RCX]);
80 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
81 for (i = 0; i < NVMM_X64_NSEG; i++) {
82 printf("| -> %s: sel=0x%lx base=%p, limit=%p, P=%d, D=%d L=%d\n",
83 segnames[i],
84 state.segs[i].selector,
85 (void *)state.segs[i].base,
86 (void *)state.segs[i].limit,
87 state.segs[i].attrib.p, state.segs[i].attrib.def32,
88 state.segs[i].attrib.lng);
89 }
90 printf("| -> MSR_EFER=%p\n", (void *)state.msrs[NVMM_X64_MSR_EFER]);
91 printf("| -> CR0=%p\n", (void *)state.crs[NVMM_X64_CR_CR0]);
92 printf("| -> CR3=%p\n", (void *)state.crs[NVMM_X64_CR_CR3]);
93 printf("| -> CR4=%p\n", (void *)state.crs[NVMM_X64_CR_CR4]);
94 printf("| -> CR8=%p\n", (void *)state.crs[NVMM_X64_CR_CR8]);
95 printf("| -> CPL=%p\n", (void *)state.misc[NVMM_X64_MISC_CPL]);
96
97 return 0;
98 }
99
100 /* -------------------------------------------------------------------------- */
101
102 #define PTE32_L1_SHIFT 12
103 #define PTE32_L2_SHIFT 22
104
105 #define PTE32_L2_MASK 0xffc00000
106 #define PTE32_L1_MASK 0x003ff000
107
108 #define PTE32_L2_FRAME (PTE32_L2_MASK)
109 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
110
111 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
112 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
113
114 typedef uint32_t pte_32bit_t;
115
116 static int
117 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
118 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
119 {
120 gpaddr_t L2gpa, L1gpa;
121 uintptr_t L2hva, L1hva;
122 pte_32bit_t *pdir, pte;
123
124 /* We begin with an RWXU access. */
125 *prot = NVMM_PROT_ALL;
126
127 /* Parse L2. */
128 L2gpa = (cr3 & PG_FRAME);
129 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
130 return -1;
131 pdir = (pte_32bit_t *)L2hva;
132 pte = pdir[pte32_l2idx(gva)];
133 if ((pte & PG_V) == 0)
134 return -1;
135 if ((pte & PG_u) == 0)
136 *prot &= ~NVMM_PROT_USER;
137 if ((pte & PG_KW) == 0)
138 *prot &= ~NVMM_PROT_WRITE;
139 if ((pte & PG_PS) && !has_pse)
140 return -1;
141 if (pte & PG_PS) {
142 *gpa = (pte & PTE32_L2_FRAME);
143 *gpa = *gpa + (gva & PTE32_L1_MASK);
144 return 0;
145 }
146
147 /* Parse L1. */
148 L1gpa = (pte & PG_FRAME);
149 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
150 return -1;
151 pdir = (pte_32bit_t *)L1hva;
152 pte = pdir[pte32_l1idx(gva)];
153 if ((pte & PG_V) == 0)
154 return -1;
155 if ((pte & PG_u) == 0)
156 *prot &= ~NVMM_PROT_USER;
157 if ((pte & PG_KW) == 0)
158 *prot &= ~NVMM_PROT_WRITE;
159 if (pte & PG_PS)
160 return -1;
161
162 *gpa = (pte & PG_FRAME);
163 return 0;
164 }
165
166 /* -------------------------------------------------------------------------- */
167
168 #define PTE32_PAE_L1_SHIFT 12
169 #define PTE32_PAE_L2_SHIFT 21
170 #define PTE32_PAE_L3_SHIFT 30
171
172 #define PTE32_PAE_L3_MASK 0xc0000000
173 #define PTE32_PAE_L2_MASK 0x3fe00000
174 #define PTE32_PAE_L1_MASK 0x001ff000
175
176 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
177 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
178 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
179
180 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
181 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
182 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
183
184 typedef uint64_t pte_32bit_pae_t;
185
186 static int
187 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
188 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
189 {
190 gpaddr_t L3gpa, L2gpa, L1gpa;
191 uintptr_t L3hva, L2hva, L1hva;
192 pte_32bit_pae_t *pdir, pte;
193
194 /* We begin with an RWXU access. */
195 *prot = NVMM_PROT_ALL;
196
197 /* Parse L3. */
198 L3gpa = (cr3 & PG_FRAME);
199 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
200 return -1;
201 pdir = (pte_32bit_pae_t *)L3hva;
202 pte = pdir[pte32_pae_l3idx(gva)];
203 if ((pte & PG_V) == 0)
204 return -1;
205 if (pte & PG_NX)
206 *prot &= ~NVMM_PROT_EXEC;
207 if (pte & PG_PS)
208 return -1;
209
210 /* Parse L2. */
211 L2gpa = (pte & PG_FRAME);
212 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
213 return -1;
214 pdir = (pte_32bit_pae_t *)L2hva;
215 pte = pdir[pte32_pae_l2idx(gva)];
216 if ((pte & PG_V) == 0)
217 return -1;
218 if ((pte & PG_u) == 0)
219 *prot &= ~NVMM_PROT_USER;
220 if ((pte & PG_KW) == 0)
221 *prot &= ~NVMM_PROT_WRITE;
222 if (pte & PG_NX)
223 *prot &= ~NVMM_PROT_EXEC;
224 if ((pte & PG_PS) && !has_pse)
225 return -1;
226 if (pte & PG_PS) {
227 *gpa = (pte & PTE32_PAE_L2_FRAME);
228 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
229 return 0;
230 }
231
232 /* Parse L1. */
233 L1gpa = (pte & PG_FRAME);
234 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
235 return -1;
236 pdir = (pte_32bit_pae_t *)L1hva;
237 pte = pdir[pte32_pae_l1idx(gva)];
238 if ((pte & PG_V) == 0)
239 return -1;
240 if ((pte & PG_u) == 0)
241 *prot &= ~NVMM_PROT_USER;
242 if ((pte & PG_KW) == 0)
243 *prot &= ~NVMM_PROT_WRITE;
244 if (pte & PG_NX)
245 *prot &= ~NVMM_PROT_EXEC;
246 if (pte & PG_PS)
247 return -1;
248
249 *gpa = (pte & PG_FRAME);
250 return 0;
251 }
252
253 /* -------------------------------------------------------------------------- */
254
255 #define PTE64_L1_SHIFT 12
256 #define PTE64_L2_SHIFT 21
257 #define PTE64_L3_SHIFT 30
258 #define PTE64_L4_SHIFT 39
259
260 #define PTE64_L4_MASK 0x0000ff8000000000
261 #define PTE64_L3_MASK 0x0000007fc0000000
262 #define PTE64_L2_MASK 0x000000003fe00000
263 #define PTE64_L1_MASK 0x00000000001ff000
264
265 #define PTE64_L4_FRAME PTE64_L4_MASK
266 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
267 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
268 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
269
270 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
271 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
272 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
273 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
274
275 typedef uint64_t pte_64bit_t;
276
277 static inline bool
278 x86_gva_64bit_canonical(gvaddr_t gva)
279 {
280 /* Bits 63:47 must have the same value. */
281 #define SIGN_EXTEND 0xffff800000000000ULL
282 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
283 }
284
285 static int
286 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
287 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
288 {
289 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
290 uintptr_t L4hva, L3hva, L2hva, L1hva;
291 pte_64bit_t *pdir, pte;
292
293 /* We begin with an RWXU access. */
294 *prot = NVMM_PROT_ALL;
295
296 if (!x86_gva_64bit_canonical(gva))
297 return -1;
298
299 /* Parse L4. */
300 L4gpa = (cr3 & PG_FRAME);
301 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
302 return -1;
303 pdir = (pte_64bit_t *)L4hva;
304 pte = pdir[pte64_l4idx(gva)];
305 if ((pte & PG_V) == 0)
306 return -1;
307 if ((pte & PG_u) == 0)
308 *prot &= ~NVMM_PROT_USER;
309 if ((pte & PG_KW) == 0)
310 *prot &= ~NVMM_PROT_WRITE;
311 if (pte & PG_NX)
312 *prot &= ~NVMM_PROT_EXEC;
313 if (pte & PG_PS)
314 return -1;
315
316 /* Parse L3. */
317 L3gpa = (pte & PG_FRAME);
318 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
319 return -1;
320 pdir = (pte_64bit_t *)L3hva;
321 pte = pdir[pte64_l3idx(gva)];
322 if ((pte & PG_V) == 0)
323 return -1;
324 if ((pte & PG_u) == 0)
325 *prot &= ~NVMM_PROT_USER;
326 if ((pte & PG_KW) == 0)
327 *prot &= ~NVMM_PROT_WRITE;
328 if (pte & PG_NX)
329 *prot &= ~NVMM_PROT_EXEC;
330 if (pte & PG_PS) {
331 *gpa = (pte & PTE64_L3_FRAME);
332 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
333 return 0;
334 }
335
336 /* Parse L2. */
337 L2gpa = (pte & PG_FRAME);
338 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
339 return -1;
340 pdir = (pte_64bit_t *)L2hva;
341 pte = pdir[pte64_l2idx(gva)];
342 if ((pte & PG_V) == 0)
343 return -1;
344 if ((pte & PG_u) == 0)
345 *prot &= ~NVMM_PROT_USER;
346 if ((pte & PG_KW) == 0)
347 *prot &= ~NVMM_PROT_WRITE;
348 if (pte & PG_NX)
349 *prot &= ~NVMM_PROT_EXEC;
350 if (pte & PG_PS) {
351 *gpa = (pte & PTE64_L2_FRAME);
352 *gpa = *gpa + (gva & PTE64_L1_MASK);
353 return 0;
354 }
355
356 /* Parse L1. */
357 L1gpa = (pte & PG_FRAME);
358 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
359 return -1;
360 pdir = (pte_64bit_t *)L1hva;
361 pte = pdir[pte64_l1idx(gva)];
362 if ((pte & PG_V) == 0)
363 return -1;
364 if ((pte & PG_u) == 0)
365 *prot &= ~NVMM_PROT_USER;
366 if ((pte & PG_KW) == 0)
367 *prot &= ~NVMM_PROT_WRITE;
368 if (pte & PG_NX)
369 *prot &= ~NVMM_PROT_EXEC;
370 if (pte & PG_PS)
371 return -1;
372
373 *gpa = (pte & PG_FRAME);
374 return 0;
375 }
376
377 static inline int
378 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
379 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
380 {
381 bool is_pae, is_lng, has_pse;
382 uint64_t cr3;
383 size_t off;
384 int ret;
385
386 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
387 /* No paging. */
388 *prot = NVMM_PROT_ALL;
389 *gpa = gva;
390 return 0;
391 }
392
393 off = (gva & PAGE_MASK);
394 gva &= ~PAGE_MASK;
395
396 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
397 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
398 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
399 cr3 = state->crs[NVMM_X64_CR_CR3];
400
401 if (is_pae && is_lng) {
402 /* 64bit */
403 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
404 } else if (is_pae && !is_lng) {
405 /* 32bit PAE */
406 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, has_pse,
407 prot);
408 } else if (!is_pae && !is_lng) {
409 /* 32bit */
410 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
411 } else {
412 ret = -1;
413 }
414
415 if (ret == -1) {
416 errno = EFAULT;
417 }
418
419 *gpa = *gpa + off;
420
421 return ret;
422 }
423
424 int
425 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
426 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
427 {
428 struct nvmm_x64_state state;
429 int ret;
430
431 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
432 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
433 if (ret == -1)
434 return -1;
435
436 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
437 }
438
439 /* -------------------------------------------------------------------------- */
440
441 static inline bool
442 is_long_mode(struct nvmm_x64_state *state)
443 {
444 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
445 }
446
447 static inline bool
448 is_64bit(struct nvmm_x64_state *state)
449 {
450 return (state->segs[NVMM_X64_SEG_CS].attrib.lng != 0);
451 }
452
453 static inline bool
454 is_32bit(struct nvmm_x64_state *state)
455 {
456 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
457 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 1);
458 }
459
460 static inline bool
461 is_16bit(struct nvmm_x64_state *state)
462 {
463 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
464 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 0);
465 }
466
467 static int
468 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
469 {
470 uint64_t limit;
471
472 /*
473 * This is incomplete. We should check topdown, etc, really that's
474 * tiring.
475 */
476 if (__predict_false(!seg->attrib.p)) {
477 goto error;
478 }
479
480 limit = (seg->limit + 1);
481 if (__predict_true(seg->attrib.gran)) {
482 limit *= PAGE_SIZE;
483 }
484
485 if (__predict_false(gva + size > limit)) {
486 goto error;
487 }
488
489 return 0;
490
491 error:
492 errno = EFAULT;
493 return -1;
494 }
495
496 static inline void
497 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
498 {
499 *gva += seg->base;
500 }
501
502 static inline uint64_t
503 size_to_mask(size_t size)
504 {
505 switch (size) {
506 case 1:
507 return 0x00000000000000FF;
508 case 2:
509 return 0x000000000000FFFF;
510 case 4:
511 return 0x00000000FFFFFFFF;
512 case 8:
513 default:
514 return 0xFFFFFFFFFFFFFFFF;
515 }
516 }
517
518 static uint64_t
519 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
520 {
521 uint64_t mask, cnt;
522
523 mask = size_to_mask(adsize);
524 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
525
526 return cnt;
527 }
528
529 static void
530 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
531 {
532 uint64_t mask;
533
534 /* XXX: should we zero-extend? */
535 mask = size_to_mask(adsize);
536 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
537 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
538 }
539
540 static int
541 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
542 gvaddr_t gva, uint8_t *data, size_t size)
543 {
544 struct nvmm_mem mem;
545 nvmm_prot_t prot;
546 gpaddr_t gpa;
547 uintptr_t hva;
548 bool is_mmio;
549 int ret, remain;
550
551 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
552 if (__predict_false(ret == -1)) {
553 return -1;
554 }
555 if (__predict_false(!(prot & NVMM_PROT_READ))) {
556 errno = EFAULT;
557 return -1;
558 }
559
560 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
561 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
562 } else {
563 remain = 0;
564 }
565 size -= remain;
566
567 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
568 is_mmio = (ret == -1);
569
570 if (is_mmio) {
571 mem.data = data;
572 mem.gpa = gpa;
573 mem.write = false;
574 mem.size = size;
575 (*__callbacks.mem)(&mem);
576 } else {
577 memcpy(data, (uint8_t *)hva, size);
578 }
579
580 if (remain > 0) {
581 ret = read_guest_memory(mach, state, gva + size,
582 data + size, remain);
583 } else {
584 ret = 0;
585 }
586
587 return ret;
588 }
589
590 static int
591 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
592 gvaddr_t gva, uint8_t *data, size_t size)
593 {
594 struct nvmm_mem mem;
595 nvmm_prot_t prot;
596 gpaddr_t gpa;
597 uintptr_t hva;
598 bool is_mmio;
599 int ret, remain;
600
601 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
602 if (__predict_false(ret == -1)) {
603 return -1;
604 }
605 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
606 errno = EFAULT;
607 return -1;
608 }
609
610 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
611 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
612 } else {
613 remain = 0;
614 }
615 size -= remain;
616
617 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
618 is_mmio = (ret == -1);
619
620 if (is_mmio) {
621 mem.data = data;
622 mem.gpa = gpa;
623 mem.write = true;
624 mem.size = size;
625 (*__callbacks.mem)(&mem);
626 } else {
627 memcpy((uint8_t *)hva, data, size);
628 }
629
630 if (remain > 0) {
631 ret = write_guest_memory(mach, state, gva + size,
632 data + size, remain);
633 } else {
634 ret = 0;
635 }
636
637 return ret;
638 }
639
640 /* -------------------------------------------------------------------------- */
641
642 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
643
644 #define NVMM_IO_BATCH_SIZE 32
645
646 static int
647 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
648 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
649 {
650 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
651 size_t i, iosize, iocnt;
652 int ret;
653
654 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
655 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
656 iocnt = iosize / io->size;
657
658 io->data = iobuf;
659
660 if (!io->in) {
661 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
662 if (ret == -1)
663 return -1;
664 }
665
666 for (i = 0; i < iocnt; i++) {
667 (*__callbacks.io)(io);
668 io->data += io->size;
669 }
670
671 if (io->in) {
672 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
673 if (ret == -1)
674 return -1;
675 }
676
677 return iocnt;
678 }
679
680 int
681 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
682 struct nvmm_exit *exit)
683 {
684 struct nvmm_x64_state state;
685 struct nvmm_io io;
686 uint64_t cnt = 0; /* GCC */
687 uint8_t iobuf[8];
688 int iocnt = 1;
689 gvaddr_t gva = 0; /* GCC */
690 int reg = 0; /* GCC */
691 int ret, seg;
692 bool psld = false;
693
694 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
695 errno = EINVAL;
696 return -1;
697 }
698
699 io.port = exit->u.io.port;
700 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
701 io.size = exit->u.io.operand_size;
702 io.data = iobuf;
703
704 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
705 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
706 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
707 if (ret == -1)
708 return -1;
709
710 if (exit->u.io.rep) {
711 cnt = rep_get_cnt(&state, exit->u.io.address_size);
712 if (__predict_false(cnt == 0)) {
713 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
714 goto out;
715 }
716 }
717
718 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
719 psld = true;
720 }
721
722 /*
723 * Determine GVA.
724 */
725 if (exit->u.io.str) {
726 if (io.in) {
727 reg = NVMM_X64_GPR_RDI;
728 } else {
729 reg = NVMM_X64_GPR_RSI;
730 }
731
732 gva = state.gprs[reg];
733 gva &= size_to_mask(exit->u.io.address_size);
734
735 if (exit->u.io.seg != -1) {
736 seg = exit->u.io.seg;
737 } else {
738 if (io.in) {
739 seg = NVMM_X64_SEG_ES;
740 } else {
741 seg = fetch_segment(mach, &state);
742 if (seg == -1)
743 return -1;
744 }
745 }
746
747 if (__predict_true(is_long_mode(&state))) {
748 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
749 segment_apply(&state.segs[seg], &gva);
750 }
751 } else {
752 ret = segment_check(&state.segs[seg], gva, io.size);
753 if (ret == -1)
754 return -1;
755 segment_apply(&state.segs[seg], &gva);
756 }
757
758 if (exit->u.io.rep && !psld) {
759 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
760 if (iocnt == -1)
761 return -1;
762 goto done;
763 }
764 }
765
766 if (!io.in) {
767 if (!exit->u.io.str) {
768 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
769 } else {
770 ret = read_guest_memory(mach, &state, gva, io.data,
771 io.size);
772 if (ret == -1)
773 return -1;
774 }
775 }
776
777 (*__callbacks.io)(&io);
778
779 if (io.in) {
780 if (!exit->u.io.str) {
781 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
782 if (io.size == 4) {
783 /* Zero-extend to 64 bits. */
784 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
785 }
786 } else {
787 ret = write_guest_memory(mach, &state, gva, io.data,
788 io.size);
789 if (ret == -1)
790 return -1;
791 }
792 }
793
794 done:
795 if (exit->u.io.str) {
796 if (__predict_false(psld)) {
797 state.gprs[reg] -= iocnt * io.size;
798 } else {
799 state.gprs[reg] += iocnt * io.size;
800 }
801 }
802
803 if (exit->u.io.rep) {
804 cnt -= iocnt;
805 rep_set_cnt(&state, exit->u.io.address_size, cnt);
806 if (cnt == 0) {
807 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
808 }
809 } else {
810 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
811 }
812
813 out:
814 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
815 if (ret == -1)
816 return -1;
817
818 return 0;
819 }
820
821 /* -------------------------------------------------------------------------- */
822
823 static void x86_emul_or(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
824 static void x86_emul_and(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
825 static void x86_emul_xor(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
826 static void x86_emul_mov(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
827 static void x86_emul_stos(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
828 static void x86_emul_lods(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
829 static void x86_emul_movs(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
830
831 /* Legacy prefixes. */
832 #define LEG_LOCK 0xF0
833 #define LEG_REPN 0xF2
834 #define LEG_REP 0xF3
835 #define LEG_OVR_CS 0x2E
836 #define LEG_OVR_SS 0x36
837 #define LEG_OVR_DS 0x3E
838 #define LEG_OVR_ES 0x26
839 #define LEG_OVR_FS 0x64
840 #define LEG_OVR_GS 0x65
841 #define LEG_OPR_OVR 0x66
842 #define LEG_ADR_OVR 0x67
843
844 struct x86_legpref {
845 bool opr_ovr:1;
846 bool adr_ovr:1;
847 bool rep:1;
848 bool repn:1;
849 int seg;
850 };
851
852 struct x86_rexpref {
853 bool present;
854 bool w;
855 bool r;
856 bool x;
857 bool b;
858 };
859
860 struct x86_reg {
861 int num; /* NVMM GPR state index */
862 uint64_t mask;
863 };
864
865 enum x86_disp_type {
866 DISP_NONE,
867 DISP_0,
868 DISP_1,
869 DISP_4
870 };
871
872 struct x86_disp {
873 enum x86_disp_type type;
874 uint64_t data; /* 4 bytes, but can be sign-extended */
875 };
876
877 enum REGMODRM__Mod {
878 MOD_DIS0, /* also, register indirect */
879 MOD_DIS1,
880 MOD_DIS4,
881 MOD_REG
882 };
883
884 enum REGMODRM__Reg {
885 REG_000, /* these fields are indexes to the register map */
886 REG_001,
887 REG_010,
888 REG_011,
889 REG_100,
890 REG_101,
891 REG_110,
892 REG_111
893 };
894
895 enum REGMODRM__Rm {
896 RM_000, /* reg */
897 RM_001, /* reg */
898 RM_010, /* reg */
899 RM_011, /* reg */
900 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
901 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
902 RM_110,
903 RM_111
904 };
905
906 struct x86_regmodrm {
907 bool present;
908 enum REGMODRM__Mod mod;
909 enum REGMODRM__Reg reg;
910 enum REGMODRM__Rm rm;
911 };
912
913 struct x86_immediate {
914 uint64_t data;
915 };
916
917 struct x86_sib {
918 uint8_t scale;
919 const struct x86_reg *idx;
920 const struct x86_reg *bas;
921 };
922
923 enum x86_store_type {
924 STORE_NONE,
925 STORE_REG,
926 STORE_IMM,
927 STORE_SIB,
928 STORE_DMO
929 };
930
931 struct x86_store {
932 enum x86_store_type type;
933 union {
934 const struct x86_reg *reg;
935 struct x86_immediate imm;
936 struct x86_sib sib;
937 uint64_t dmo;
938 } u;
939 struct x86_disp disp;
940 int hardseg;
941 };
942
943 struct x86_instr {
944 size_t len;
945 struct x86_legpref legpref;
946 struct x86_rexpref rexpref;
947 size_t operand_size;
948 size_t address_size;
949 uint64_t zeroextend_mask;
950
951 struct x86_regmodrm regmodrm;
952
953 const struct x86_opcode *opcode;
954
955 struct x86_store src;
956 struct x86_store dst;
957
958 struct x86_store *strm;
959
960 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
961 };
962
963 struct x86_decode_fsm {
964 /* vcpu */
965 bool is64bit;
966 bool is32bit;
967 bool is16bit;
968
969 /* fsm */
970 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
971 uint8_t *buf;
972 uint8_t *end;
973 };
974
975 struct x86_opcode {
976 uint8_t byte;
977 bool regmodrm;
978 bool regtorm;
979 bool dmo;
980 bool todmo;
981 bool movs;
982 bool stos;
983 bool lods;
984 bool szoverride;
985 int defsize;
986 int allsize;
987 bool group1;
988 bool group11;
989 bool immediate;
990 int flags;
991 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
992 };
993
994 struct x86_group_entry {
995 void (*emul)(struct nvmm_mem *, void (*)(struct nvmm_mem *), uint64_t *);
996 };
997
998 #define OPSIZE_BYTE 0x01
999 #define OPSIZE_WORD 0x02 /* 2 bytes */
1000 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1001 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1002
1003 #define FLAG_imm8 0x01
1004 #define FLAG_immz 0x02
1005 #define FLAG_ze 0x04
1006
1007 static const struct x86_group_entry group1[8] = {
1008 [1] = { .emul = x86_emul_or },
1009 [4] = { .emul = x86_emul_and },
1010 [6] = { .emul = x86_emul_xor }
1011 };
1012
1013 static const struct x86_group_entry group11[8] = {
1014 [0] = { .emul = x86_emul_mov }
1015 };
1016
1017 static const struct x86_opcode primary_opcode_table[] = {
1018 /*
1019 * Group1
1020 */
1021 {
1022 /* Ev, Iz */
1023 .byte = 0x81,
1024 .regmodrm = true,
1025 .regtorm = true,
1026 .szoverride = true,
1027 .defsize = -1,
1028 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1029 .group1 = true,
1030 .immediate = true,
1031 .flags = FLAG_immz,
1032 .emul = NULL /* group1 */
1033 },
1034 {
1035 /* Ev, Ib */
1036 .byte = 0x83,
1037 .regmodrm = true,
1038 .regtorm = true,
1039 .szoverride = true,
1040 .defsize = -1,
1041 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1042 .group1 = true,
1043 .immediate = true,
1044 .flags = FLAG_imm8,
1045 .emul = NULL /* group1 */
1046 },
1047
1048 /*
1049 * Group11
1050 */
1051 {
1052 /* Eb, Ib */
1053 .byte = 0xC6,
1054 .regmodrm = true,
1055 .regtorm = true,
1056 .szoverride = false,
1057 .defsize = OPSIZE_BYTE,
1058 .allsize = -1,
1059 .group11 = true,
1060 .immediate = true,
1061 .emul = NULL /* group11 */
1062 },
1063 {
1064 /* Ev, Iz */
1065 .byte = 0xC7,
1066 .regmodrm = true,
1067 .regtorm = true,
1068 .szoverride = true,
1069 .defsize = -1,
1070 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1071 .group11 = true,
1072 .immediate = true,
1073 .flags = FLAG_immz,
1074 .emul = NULL /* group11 */
1075 },
1076
1077 /*
1078 * OR
1079 */
1080 {
1081 /* Eb, Gb */
1082 .byte = 0x08,
1083 .regmodrm = true,
1084 .regtorm = true,
1085 .szoverride = false,
1086 .defsize = OPSIZE_BYTE,
1087 .allsize = -1,
1088 .emul = x86_emul_or
1089 },
1090 {
1091 /* Ev, Gv */
1092 .byte = 0x09,
1093 .regmodrm = true,
1094 .regtorm = true,
1095 .szoverride = true,
1096 .defsize = -1,
1097 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1098 .emul = x86_emul_or
1099 },
1100 {
1101 /* Gb, Eb */
1102 .byte = 0x0A,
1103 .regmodrm = true,
1104 .regtorm = false,
1105 .szoverride = false,
1106 .defsize = OPSIZE_BYTE,
1107 .allsize = -1,
1108 .emul = x86_emul_or
1109 },
1110 {
1111 /* Gv, Ev */
1112 .byte = 0x0B,
1113 .regmodrm = true,
1114 .regtorm = false,
1115 .szoverride = true,
1116 .defsize = -1,
1117 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1118 .emul = x86_emul_or
1119 },
1120
1121 /*
1122 * AND
1123 */
1124 {
1125 /* Eb, Gb */
1126 .byte = 0x20,
1127 .regmodrm = true,
1128 .regtorm = true,
1129 .szoverride = false,
1130 .defsize = OPSIZE_BYTE,
1131 .allsize = -1,
1132 .emul = x86_emul_and
1133 },
1134 {
1135 /* Ev, Gv */
1136 .byte = 0x21,
1137 .regmodrm = true,
1138 .regtorm = true,
1139 .szoverride = true,
1140 .defsize = -1,
1141 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1142 .emul = x86_emul_and
1143 },
1144 {
1145 /* Gb, Eb */
1146 .byte = 0x22,
1147 .regmodrm = true,
1148 .regtorm = false,
1149 .szoverride = false,
1150 .defsize = OPSIZE_BYTE,
1151 .allsize = -1,
1152 .emul = x86_emul_and
1153 },
1154 {
1155 /* Gv, Ev */
1156 .byte = 0x23,
1157 .regmodrm = true,
1158 .regtorm = false,
1159 .szoverride = true,
1160 .defsize = -1,
1161 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1162 .emul = x86_emul_and
1163 },
1164
1165 /*
1166 * XOR
1167 */
1168 {
1169 /* Eb, Gb */
1170 .byte = 0x30,
1171 .regmodrm = true,
1172 .regtorm = true,
1173 .szoverride = false,
1174 .defsize = OPSIZE_BYTE,
1175 .allsize = -1,
1176 .emul = x86_emul_xor
1177 },
1178 {
1179 /* Ev, Gv */
1180 .byte = 0x31,
1181 .regmodrm = true,
1182 .regtorm = true,
1183 .szoverride = true,
1184 .defsize = -1,
1185 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1186 .emul = x86_emul_xor
1187 },
1188 {
1189 /* Gb, Eb */
1190 .byte = 0x32,
1191 .regmodrm = true,
1192 .regtorm = false,
1193 .szoverride = false,
1194 .defsize = OPSIZE_BYTE,
1195 .allsize = -1,
1196 .emul = x86_emul_xor
1197 },
1198 {
1199 /* Gv, Ev */
1200 .byte = 0x33,
1201 .regmodrm = true,
1202 .regtorm = false,
1203 .szoverride = true,
1204 .defsize = -1,
1205 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1206 .emul = x86_emul_xor
1207 },
1208
1209 /*
1210 * MOV
1211 */
1212 {
1213 /* Eb, Gb */
1214 .byte = 0x88,
1215 .regmodrm = true,
1216 .regtorm = true,
1217 .szoverride = false,
1218 .defsize = OPSIZE_BYTE,
1219 .allsize = -1,
1220 .emul = x86_emul_mov
1221 },
1222 {
1223 /* Ev, Gv */
1224 .byte = 0x89,
1225 .regmodrm = true,
1226 .regtorm = true,
1227 .szoverride = true,
1228 .defsize = -1,
1229 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1230 .emul = x86_emul_mov
1231 },
1232 {
1233 /* Gb, Eb */
1234 .byte = 0x8A,
1235 .regmodrm = true,
1236 .regtorm = false,
1237 .szoverride = false,
1238 .defsize = OPSIZE_BYTE,
1239 .allsize = -1,
1240 .emul = x86_emul_mov
1241 },
1242 {
1243 /* Gv, Ev */
1244 .byte = 0x8B,
1245 .regmodrm = true,
1246 .regtorm = false,
1247 .szoverride = true,
1248 .defsize = -1,
1249 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1250 .emul = x86_emul_mov
1251 },
1252 {
1253 /* AL, Ob */
1254 .byte = 0xA0,
1255 .dmo = true,
1256 .todmo = false,
1257 .szoverride = false,
1258 .defsize = OPSIZE_BYTE,
1259 .allsize = -1,
1260 .emul = x86_emul_mov
1261 },
1262 {
1263 /* rAX, Ov */
1264 .byte = 0xA1,
1265 .dmo = true,
1266 .todmo = false,
1267 .szoverride = true,
1268 .defsize = -1,
1269 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1270 .emul = x86_emul_mov
1271 },
1272 {
1273 /* Ob, AL */
1274 .byte = 0xA2,
1275 .dmo = true,
1276 .todmo = true,
1277 .szoverride = false,
1278 .defsize = OPSIZE_BYTE,
1279 .allsize = -1,
1280 .emul = x86_emul_mov
1281 },
1282 {
1283 /* Ov, rAX */
1284 .byte = 0xA3,
1285 .dmo = true,
1286 .todmo = true,
1287 .szoverride = true,
1288 .defsize = -1,
1289 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1290 .emul = x86_emul_mov
1291 },
1292
1293 /*
1294 * MOVS
1295 */
1296 {
1297 /* Yb, Xb */
1298 .byte = 0xA4,
1299 .movs = true,
1300 .szoverride = false,
1301 .defsize = OPSIZE_BYTE,
1302 .allsize = -1,
1303 .emul = x86_emul_movs
1304 },
1305 {
1306 /* Yv, Xv */
1307 .byte = 0xA5,
1308 .movs = true,
1309 .szoverride = true,
1310 .defsize = -1,
1311 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1312 .emul = x86_emul_movs
1313 },
1314
1315 /*
1316 * STOS
1317 */
1318 {
1319 /* Yb, AL */
1320 .byte = 0xAA,
1321 .stos = true,
1322 .szoverride = false,
1323 .defsize = OPSIZE_BYTE,
1324 .allsize = -1,
1325 .emul = x86_emul_stos
1326 },
1327 {
1328 /* Yv, rAX */
1329 .byte = 0xAB,
1330 .stos = true,
1331 .szoverride = true,
1332 .defsize = -1,
1333 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1334 .emul = x86_emul_stos
1335 },
1336
1337 /*
1338 * LODS
1339 */
1340 {
1341 /* AL, Xb */
1342 .byte = 0xAC,
1343 .lods = true,
1344 .szoverride = false,
1345 .defsize = OPSIZE_BYTE,
1346 .allsize = -1,
1347 .emul = x86_emul_lods
1348 },
1349 {
1350 /* rAX, Xv */
1351 .byte = 0xAD,
1352 .lods = true,
1353 .szoverride = true,
1354 .defsize = -1,
1355 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1356 .emul = x86_emul_lods
1357 },
1358 };
1359
1360 static const struct x86_opcode secondary_opcode_table[] = {
1361 /*
1362 * MOVZX
1363 */
1364 {
1365 /* Gv, Eb */
1366 .byte = 0xB6,
1367 .regmodrm = true,
1368 .regtorm = false,
1369 .szoverride = true,
1370 .defsize = OPSIZE_BYTE,
1371 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1372 .flags = FLAG_ze,
1373 .emul = x86_emul_mov
1374 },
1375 {
1376 /* Gv, Ew */
1377 .byte = 0xB7,
1378 .regmodrm = true,
1379 .regtorm = false,
1380 .szoverride = true,
1381 .defsize = OPSIZE_WORD,
1382 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1383 .flags = FLAG_ze,
1384 .emul = x86_emul_mov
1385 },
1386 };
1387
1388 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1389
1390 /* [REX-present][enc][opsize] */
1391 static const struct x86_reg gpr_map__special[2][4][8] = {
1392 [false] = {
1393 /* No REX prefix. */
1394 [0b00] = {
1395 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1396 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1397 [2] = { -1, 0 },
1398 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1399 [4] = { -1, 0 },
1400 [5] = { -1, 0 },
1401 [6] = { -1, 0 },
1402 [7] = { -1, 0 },
1403 },
1404 [0b01] = {
1405 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1406 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1407 [2] = { -1, 0 },
1408 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1409 [4] = { -1, 0 },
1410 [5] = { -1, 0 },
1411 [6] = { -1, 0 },
1412 [7] = { -1, 0 },
1413 },
1414 [0b10] = {
1415 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1416 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1417 [2] = { -1, 0 },
1418 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1419 [4] = { -1, 0 },
1420 [5] = { -1, 0 },
1421 [6] = { -1, 0 },
1422 [7] = { -1, 0 },
1423 },
1424 [0b11] = {
1425 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1426 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1427 [2] = { -1, 0 },
1428 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1429 [4] = { -1, 0 },
1430 [5] = { -1, 0 },
1431 [6] = { -1, 0 },
1432 [7] = { -1, 0 },
1433 }
1434 },
1435 [true] = {
1436 /* Has REX prefix. */
1437 [0b00] = {
1438 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1439 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1440 [2] = { -1, 0 },
1441 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1442 [4] = { -1, 0 },
1443 [5] = { -1, 0 },
1444 [6] = { -1, 0 },
1445 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1446 },
1447 [0b01] = {
1448 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1449 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1450 [2] = { -1, 0 },
1451 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1452 [4] = { -1, 0 },
1453 [5] = { -1, 0 },
1454 [6] = { -1, 0 },
1455 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1456 },
1457 [0b10] = {
1458 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1459 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1460 [2] = { -1, 0 },
1461 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1462 [4] = { -1, 0 },
1463 [5] = { -1, 0 },
1464 [6] = { -1, 0 },
1465 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1466 },
1467 [0b11] = {
1468 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1469 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1470 [2] = { -1, 0 },
1471 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1472 [4] = { -1, 0 },
1473 [5] = { -1, 0 },
1474 [6] = { -1, 0 },
1475 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1476 }
1477 }
1478 };
1479
1480 /* [depends][enc][size] */
1481 static const struct x86_reg gpr_map[2][8][8] = {
1482 [false] = {
1483 /* Not extended. */
1484 [0b000] = {
1485 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1486 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1487 [2] = { -1, 0 },
1488 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1489 [4] = { -1, 0 },
1490 [5] = { -1, 0 },
1491 [6] = { -1, 0 },
1492 [7] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* RAX */
1493 },
1494 [0b001] = {
1495 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1496 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1497 [2] = { -1, 0 },
1498 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1499 [4] = { -1, 0 },
1500 [5] = { -1, 0 },
1501 [6] = { -1, 0 },
1502 [7] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* RCX */
1503 },
1504 [0b010] = {
1505 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1506 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1507 [2] = { -1, 0 },
1508 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1509 [4] = { -1, 0 },
1510 [5] = { -1, 0 },
1511 [6] = { -1, 0 },
1512 [7] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* RDX */
1513 },
1514 [0b011] = {
1515 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1516 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1517 [2] = { -1, 0 },
1518 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1519 [4] = { -1, 0 },
1520 [5] = { -1, 0 },
1521 [6] = { -1, 0 },
1522 [7] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* RBX */
1523 },
1524 [0b100] = {
1525 [0] = { -1, 0 }, /* SPECIAL */
1526 [1] = { -1, 0 }, /* SPECIAL */
1527 [2] = { -1, 0 },
1528 [3] = { -1, 0 }, /* SPECIAL */
1529 [4] = { -1, 0 },
1530 [5] = { -1, 0 },
1531 [6] = { -1, 0 },
1532 [7] = { -1, 0 }, /* SPECIAL */
1533 },
1534 [0b101] = {
1535 [0] = { -1, 0 }, /* SPECIAL */
1536 [1] = { -1, 0 }, /* SPECIAL */
1537 [2] = { -1, 0 },
1538 [3] = { -1, 0 }, /* SPECIAL */
1539 [4] = { -1, 0 },
1540 [5] = { -1, 0 },
1541 [6] = { -1, 0 },
1542 [7] = { -1, 0 }, /* SPECIAL */
1543 },
1544 [0b110] = {
1545 [0] = { -1, 0 }, /* SPECIAL */
1546 [1] = { -1, 0 }, /* SPECIAL */
1547 [2] = { -1, 0 },
1548 [3] = { -1, 0 }, /* SPECIAL */
1549 [4] = { -1, 0 },
1550 [5] = { -1, 0 },
1551 [6] = { -1, 0 },
1552 [7] = { -1, 0 }, /* SPECIAL */
1553 },
1554 [0b111] = {
1555 [0] = { -1, 0 }, /* SPECIAL */
1556 [1] = { -1, 0 }, /* SPECIAL */
1557 [2] = { -1, 0 },
1558 [3] = { -1, 0 }, /* SPECIAL */
1559 [4] = { -1, 0 },
1560 [5] = { -1, 0 },
1561 [6] = { -1, 0 },
1562 [7] = { -1, 0 }, /* SPECIAL */
1563 },
1564 },
1565 [true] = {
1566 /* Extended. */
1567 [0b000] = {
1568 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1569 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1570 [2] = { -1, 0 },
1571 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1572 [4] = { -1, 0 },
1573 [5] = { -1, 0 },
1574 [6] = { -1, 0 },
1575 [7] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8 */
1576 },
1577 [0b001] = {
1578 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1579 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1580 [2] = { -1, 0 },
1581 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1582 [4] = { -1, 0 },
1583 [5] = { -1, 0 },
1584 [6] = { -1, 0 },
1585 [7] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9 */
1586 },
1587 [0b010] = {
1588 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1589 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1590 [2] = { -1, 0 },
1591 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1592 [4] = { -1, 0 },
1593 [5] = { -1, 0 },
1594 [6] = { -1, 0 },
1595 [7] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10 */
1596 },
1597 [0b011] = {
1598 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1599 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1600 [2] = { -1, 0 },
1601 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1602 [4] = { -1, 0 },
1603 [5] = { -1, 0 },
1604 [6] = { -1, 0 },
1605 [7] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11 */
1606 },
1607 [0b100] = {
1608 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1609 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1610 [2] = { -1, 0 },
1611 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1612 [4] = { -1, 0 },
1613 [5] = { -1, 0 },
1614 [6] = { -1, 0 },
1615 [7] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12 */
1616 },
1617 [0b101] = {
1618 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1619 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1620 [2] = { -1, 0 },
1621 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1622 [4] = { -1, 0 },
1623 [5] = { -1, 0 },
1624 [6] = { -1, 0 },
1625 [7] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13 */
1626 },
1627 [0b110] = {
1628 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1629 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1630 [2] = { -1, 0 },
1631 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1632 [4] = { -1, 0 },
1633 [5] = { -1, 0 },
1634 [6] = { -1, 0 },
1635 [7] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14 */
1636 },
1637 [0b111] = {
1638 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1639 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1640 [2] = { -1, 0 },
1641 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1642 [4] = { -1, 0 },
1643 [5] = { -1, 0 },
1644 [6] = { -1, 0 },
1645 [7] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15 */
1646 },
1647 }
1648 };
1649
1650 static int
1651 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1652 {
1653 fsm->fn = NULL;
1654 return -1;
1655 }
1656
1657 static int
1658 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1659 {
1660 if (fsm->buf + n > fsm->end) {
1661 return -1;
1662 }
1663 memcpy(bytes, fsm->buf, n);
1664 return 0;
1665 }
1666
1667 static void
1668 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1669 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1670 {
1671 fsm->buf += n;
1672 if (fsm->buf > fsm->end) {
1673 fsm->fn = node_overflow;
1674 } else {
1675 fsm->fn = fn;
1676 }
1677 }
1678
1679 static const struct x86_reg *
1680 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1681 {
1682 enc &= 0b11;
1683 if (regsize == 8) {
1684 /* May be 64bit without REX */
1685 return &gpr_map__special[1][enc][regsize-1];
1686 }
1687 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1688 }
1689
1690 /*
1691 * Special node, for MOVS. Fake two displacements of zero on the source and
1692 * destination registers.
1693 */
1694 static int
1695 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1696 {
1697 size_t adrsize;
1698
1699 adrsize = instr->address_size;
1700
1701 /* DS:RSI */
1702 instr->src.type = STORE_REG;
1703 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1704 instr->src.disp.type = DISP_0;
1705
1706 /* ES:RDI, force ES */
1707 instr->dst.type = STORE_REG;
1708 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1709 instr->dst.disp.type = DISP_0;
1710 instr->dst.hardseg = NVMM_X64_SEG_ES;
1711
1712 fsm_advance(fsm, 0, NULL);
1713
1714 return 0;
1715 }
1716
1717 /*
1718 * Special node, for STOS and LODS. Fake a displacement of zero on the
1719 * destination register.
1720 */
1721 static int
1722 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1723 {
1724 const struct x86_opcode *opcode = instr->opcode;
1725 struct x86_store *stlo, *streg;
1726 size_t adrsize, regsize;
1727
1728 adrsize = instr->address_size;
1729 regsize = instr->operand_size;
1730
1731 if (opcode->stos) {
1732 streg = &instr->src;
1733 stlo = &instr->dst;
1734 } else {
1735 streg = &instr->dst;
1736 stlo = &instr->src;
1737 }
1738
1739 streg->type = STORE_REG;
1740 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1741
1742 stlo->type = STORE_REG;
1743 if (opcode->stos) {
1744 /* ES:RDI, force ES */
1745 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1746 stlo->hardseg = NVMM_X64_SEG_ES;
1747 } else {
1748 /* DS:RSI */
1749 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1750 }
1751 stlo->disp.type = DISP_0;
1752
1753 fsm_advance(fsm, 0, NULL);
1754
1755 return 0;
1756 }
1757
1758 static int
1759 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1760 {
1761 const struct x86_opcode *opcode = instr->opcode;
1762 struct x86_store *stdmo, *streg;
1763 size_t adrsize, regsize;
1764
1765 adrsize = instr->address_size;
1766 regsize = instr->operand_size;
1767
1768 if (opcode->todmo) {
1769 streg = &instr->src;
1770 stdmo = &instr->dst;
1771 } else {
1772 streg = &instr->dst;
1773 stdmo = &instr->src;
1774 }
1775
1776 streg->type = STORE_REG;
1777 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1778
1779 stdmo->type = STORE_DMO;
1780 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1781 return -1;
1782 }
1783 fsm_advance(fsm, adrsize, NULL);
1784
1785 return 0;
1786 }
1787
1788 static inline uint64_t
1789 sign_extend(uint64_t val, int size)
1790 {
1791 if (size == 1) {
1792 if (val & __BIT(7))
1793 val |= 0xFFFFFFFFFFFFFF00;
1794 } else if (size == 2) {
1795 if (val & __BIT(15))
1796 val |= 0xFFFFFFFFFFFF0000;
1797 } else if (size == 4) {
1798 if (val & __BIT(31))
1799 val |= 0xFFFFFFFF00000000;
1800 }
1801 return val;
1802 }
1803
1804 static int
1805 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1806 {
1807 const struct x86_opcode *opcode = instr->opcode;
1808 struct x86_store *store;
1809 uint8_t immsize;
1810 size_t sesize = 0;
1811
1812 /* The immediate is the source */
1813 store = &instr->src;
1814 immsize = instr->operand_size;
1815
1816 if (opcode->flags & FLAG_imm8) {
1817 sesize = immsize;
1818 immsize = 1;
1819 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1820 sesize = immsize;
1821 immsize = 4;
1822 }
1823
1824 store->type = STORE_IMM;
1825 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1826 return -1;
1827 }
1828 fsm_advance(fsm, immsize, NULL);
1829
1830 if (sesize != 0) {
1831 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1832 }
1833
1834 return 0;
1835 }
1836
1837 static int
1838 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1839 {
1840 const struct x86_opcode *opcode = instr->opcode;
1841 uint64_t data = 0;
1842 size_t n;
1843
1844 if (instr->strm->disp.type == DISP_1) {
1845 n = 1;
1846 } else { /* DISP4 */
1847 n = 4;
1848 }
1849
1850 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1851 return -1;
1852 }
1853
1854 if (__predict_true(fsm->is64bit)) {
1855 data = sign_extend(data, n);
1856 }
1857
1858 instr->strm->disp.data = data;
1859
1860 if (opcode->immediate) {
1861 fsm_advance(fsm, n, node_immediate);
1862 } else {
1863 fsm_advance(fsm, n, NULL);
1864 }
1865
1866 return 0;
1867 }
1868
1869 static const struct x86_reg *
1870 get_register_idx(struct x86_instr *instr, uint8_t index)
1871 {
1872 uint8_t enc = index;
1873 const struct x86_reg *reg;
1874 size_t regsize;
1875
1876 regsize = instr->address_size;
1877 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
1878
1879 if (reg->num == -1) {
1880 reg = resolve_special_register(instr, enc, regsize);
1881 }
1882
1883 return reg;
1884 }
1885
1886 static const struct x86_reg *
1887 get_register_bas(struct x86_instr *instr, uint8_t base)
1888 {
1889 uint8_t enc = base;
1890 const struct x86_reg *reg;
1891 size_t regsize;
1892
1893 regsize = instr->address_size;
1894 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1895 if (reg->num == -1) {
1896 reg = resolve_special_register(instr, enc, regsize);
1897 }
1898
1899 return reg;
1900 }
1901
1902 static int
1903 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1904 {
1905 const struct x86_opcode *opcode;
1906 uint8_t scale, index, base;
1907 bool noindex, nobase;
1908 uint8_t byte;
1909
1910 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
1911 return -1;
1912 }
1913
1914 scale = ((byte & 0b11000000) >> 6);
1915 index = ((byte & 0b00111000) >> 3);
1916 base = ((byte & 0b00000111) >> 0);
1917
1918 opcode = instr->opcode;
1919
1920 noindex = false;
1921 nobase = false;
1922
1923 if (index == 0b100 && !instr->rexpref.x) {
1924 /* Special case: the index is null */
1925 noindex = true;
1926 }
1927
1928 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
1929 /* Special case: the base is null + disp32 */
1930 instr->strm->disp.type = DISP_4;
1931 nobase = true;
1932 }
1933
1934 instr->strm->type = STORE_SIB;
1935 instr->strm->u.sib.scale = (1 << scale);
1936 if (!noindex)
1937 instr->strm->u.sib.idx = get_register_idx(instr, index);
1938 if (!nobase)
1939 instr->strm->u.sib.bas = get_register_bas(instr, base);
1940
1941 /* May have a displacement, or an immediate */
1942 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
1943 fsm_advance(fsm, 1, node_disp);
1944 } else if (opcode->immediate) {
1945 fsm_advance(fsm, 1, node_immediate);
1946 } else {
1947 fsm_advance(fsm, 1, NULL);
1948 }
1949
1950 return 0;
1951 }
1952
1953 static const struct x86_reg *
1954 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
1955 {
1956 uint8_t enc = instr->regmodrm.reg;
1957 const struct x86_reg *reg;
1958 size_t regsize;
1959
1960 regsize = instr->operand_size;
1961
1962 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
1963 if (reg->num == -1) {
1964 reg = resolve_special_register(instr, enc, regsize);
1965 }
1966
1967 return reg;
1968 }
1969
1970 static const struct x86_reg *
1971 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
1972 {
1973 uint8_t enc = instr->regmodrm.rm;
1974 const struct x86_reg *reg;
1975 size_t regsize;
1976
1977 if (instr->strm->disp.type == DISP_NONE) {
1978 regsize = instr->operand_size;
1979 } else {
1980 /* Indirect access, the size is that of the address. */
1981 regsize = instr->address_size;
1982 }
1983
1984 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
1985 if (reg->num == -1) {
1986 reg = resolve_special_register(instr, enc, regsize);
1987 }
1988
1989 return reg;
1990 }
1991
1992 static inline bool
1993 has_sib(struct x86_instr *instr)
1994 {
1995 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
1996 }
1997
1998 static inline bool
1999 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2000 {
2001 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2002 instr->regmodrm.rm == RM_RBP_DISP32);
2003 }
2004
2005 static inline bool
2006 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2007 {
2008 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2009 instr->regmodrm.rm == RM_RBP_DISP32);
2010 }
2011
2012 static enum x86_disp_type
2013 get_disp_type(struct x86_instr *instr)
2014 {
2015 switch (instr->regmodrm.mod) {
2016 case MOD_DIS0: /* indirect */
2017 return DISP_0;
2018 case MOD_DIS1: /* indirect+1 */
2019 return DISP_1;
2020 case MOD_DIS4: /* indirect+4 */
2021 return DISP_4;
2022 case MOD_REG: /* direct */
2023 default: /* gcc */
2024 return DISP_NONE;
2025 }
2026 }
2027
2028 static int
2029 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2030 {
2031 struct x86_store *strg, *strm;
2032 const struct x86_opcode *opcode;
2033 const struct x86_reg *reg;
2034 uint8_t byte;
2035
2036 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2037 return -1;
2038 }
2039
2040 opcode = instr->opcode;
2041
2042 instr->regmodrm.present = true;
2043 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2044 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2045 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2046
2047 if (opcode->regtorm) {
2048 strg = &instr->src;
2049 strm = &instr->dst;
2050 } else { /* RM to REG */
2051 strm = &instr->src;
2052 strg = &instr->dst;
2053 }
2054
2055 /* Save for later use. */
2056 instr->strm = strm;
2057
2058 /*
2059 * Special cases: Groups. The REG field of REGMODRM is the index in
2060 * the group. op1 gets overwritten in the Immediate node, if any.
2061 */
2062 if (opcode->group1) {
2063 if (group1[instr->regmodrm.reg].emul == NULL) {
2064 return -1;
2065 }
2066 instr->emul = group1[instr->regmodrm.reg].emul;
2067 } else if (opcode->group11) {
2068 if (group11[instr->regmodrm.reg].emul == NULL) {
2069 return -1;
2070 }
2071 instr->emul = group11[instr->regmodrm.reg].emul;
2072 }
2073
2074 if (!opcode->immediate) {
2075 reg = get_register_reg(instr, opcode);
2076 if (reg == NULL) {
2077 return -1;
2078 }
2079 strg->type = STORE_REG;
2080 strg->u.reg = reg;
2081 }
2082
2083 if (has_sib(instr)) {
2084 /* Overwrites RM */
2085 fsm_advance(fsm, 1, node_sib);
2086 return 0;
2087 }
2088
2089 /* The displacement applies to RM. */
2090 strm->disp.type = get_disp_type(instr);
2091
2092 if (is_rip_relative(fsm, instr)) {
2093 /* Overwrites RM */
2094 strm->type = STORE_REG;
2095 strm->u.reg = &gpr_map__rip;
2096 strm->disp.type = DISP_4;
2097 fsm_advance(fsm, 1, node_disp);
2098 return 0;
2099 }
2100
2101 if (is_disp32_only(fsm, instr)) {
2102 /* Overwrites RM */
2103 strm->type = STORE_REG;
2104 strm->u.reg = NULL;
2105 strm->disp.type = DISP_4;
2106 fsm_advance(fsm, 1, node_disp);
2107 return 0;
2108 }
2109
2110 reg = get_register_rm(instr, opcode);
2111 if (reg == NULL) {
2112 return -1;
2113 }
2114 strm->type = STORE_REG;
2115 strm->u.reg = reg;
2116
2117 if (strm->disp.type == DISP_NONE) {
2118 /* Direct register addressing mode */
2119 if (opcode->immediate) {
2120 fsm_advance(fsm, 1, node_immediate);
2121 } else {
2122 fsm_advance(fsm, 1, NULL);
2123 }
2124 } else if (strm->disp.type == DISP_0) {
2125 /* Indirect register addressing mode */
2126 if (opcode->immediate) {
2127 fsm_advance(fsm, 1, node_immediate);
2128 } else {
2129 fsm_advance(fsm, 1, NULL);
2130 }
2131 } else {
2132 fsm_advance(fsm, 1, node_disp);
2133 }
2134
2135 return 0;
2136 }
2137
2138 static size_t
2139 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2140 {
2141 const struct x86_opcode *opcode = instr->opcode;
2142 int opsize;
2143
2144 /* Get the opsize */
2145 if (!opcode->szoverride) {
2146 opsize = opcode->defsize;
2147 } else if (instr->rexpref.present && instr->rexpref.w) {
2148 opsize = 8;
2149 } else {
2150 if (!fsm->is16bit) {
2151 if (instr->legpref.opr_ovr) {
2152 opsize = 2;
2153 } else {
2154 opsize = 4;
2155 }
2156 } else { /* 16bit */
2157 if (instr->legpref.opr_ovr) {
2158 opsize = 4;
2159 } else {
2160 opsize = 2;
2161 }
2162 }
2163 }
2164
2165 /* See if available */
2166 if ((opcode->allsize & opsize) == 0) {
2167 // XXX do we care?
2168 }
2169
2170 return opsize;
2171 }
2172
2173 static size_t
2174 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2175 {
2176 if (fsm->is64bit) {
2177 if (__predict_false(instr->legpref.adr_ovr)) {
2178 return 4;
2179 }
2180 return 8;
2181 }
2182
2183 if (fsm->is32bit) {
2184 if (__predict_false(instr->legpref.adr_ovr)) {
2185 return 2;
2186 }
2187 return 4;
2188 }
2189
2190 /* 16bit. */
2191 if (__predict_false(instr->legpref.adr_ovr)) {
2192 return 4;
2193 }
2194 return 2;
2195 }
2196
2197 static int
2198 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2199 {
2200 const struct x86_opcode *opcode;
2201 uint8_t byte;
2202 size_t i, n;
2203
2204 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2205 return -1;
2206 }
2207
2208 n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
2209 for (i = 0; i < n; i++) {
2210 if (primary_opcode_table[i].byte == byte)
2211 break;
2212 }
2213 if (i == n) {
2214 return -1;
2215 }
2216 opcode = &primary_opcode_table[i];
2217
2218 instr->opcode = opcode;
2219 instr->emul = opcode->emul;
2220 instr->operand_size = get_operand_size(fsm, instr);
2221 instr->address_size = get_address_size(fsm, instr);
2222
2223 if (fsm->is64bit && (instr->operand_size == 4)) {
2224 /* Zero-extend to 64 bits. */
2225 instr->zeroextend_mask = ~size_to_mask(4);
2226 }
2227
2228 if (opcode->regmodrm) {
2229 fsm_advance(fsm, 1, node_regmodrm);
2230 } else if (opcode->dmo) {
2231 /* Direct-Memory Offsets */
2232 fsm_advance(fsm, 1, node_dmo);
2233 } else if (opcode->stos || opcode->lods) {
2234 fsm_advance(fsm, 1, node_stlo);
2235 } else if (opcode->movs) {
2236 fsm_advance(fsm, 1, node_movs);
2237 } else {
2238 return -1;
2239 }
2240
2241 return 0;
2242 }
2243
2244 static int
2245 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2246 {
2247 const struct x86_opcode *opcode;
2248 uint8_t byte;
2249 size_t i, n;
2250
2251 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2252 return -1;
2253 }
2254
2255 n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
2256 for (i = 0; i < n; i++) {
2257 if (secondary_opcode_table[i].byte == byte)
2258 break;
2259 }
2260 if (i == n) {
2261 return -1;
2262 }
2263 opcode = &secondary_opcode_table[i];
2264
2265 instr->opcode = opcode;
2266 instr->emul = opcode->emul;
2267 instr->operand_size = get_operand_size(fsm, instr);
2268 instr->address_size = get_address_size(fsm, instr);
2269
2270 if (opcode->flags & FLAG_ze) {
2271 /*
2272 * Compute the mask for zero-extend. Update the operand size,
2273 * we move fewer bytes.
2274 */
2275 instr->zeroextend_mask = size_to_mask(instr->operand_size);
2276 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2277 instr->operand_size = opcode->defsize;
2278 }
2279
2280 if (opcode->regmodrm) {
2281 fsm_advance(fsm, 1, node_regmodrm);
2282 } else {
2283 return -1;
2284 }
2285
2286 return 0;
2287 }
2288
2289 static int
2290 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2291 {
2292 uint8_t byte;
2293
2294 #define ESCAPE 0x0F
2295 #define VEX_1 0xC5
2296 #define VEX_2 0xC4
2297 #define XOP 0x8F
2298
2299 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2300 return -1;
2301 }
2302
2303 /*
2304 * We don't take XOP. It is AMD-specific, and it was removed shortly
2305 * after being introduced.
2306 */
2307 if (byte == ESCAPE) {
2308 fsm_advance(fsm, 1, node_secondary_opcode);
2309 } else if (!instr->rexpref.present) {
2310 if (byte == VEX_1) {
2311 return -1;
2312 } else if (byte == VEX_2) {
2313 return -1;
2314 } else {
2315 fsm->fn = node_primary_opcode;
2316 }
2317 } else {
2318 fsm->fn = node_primary_opcode;
2319 }
2320
2321 return 0;
2322 }
2323
2324 static int
2325 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2326 {
2327 struct x86_rexpref *rexpref = &instr->rexpref;
2328 uint8_t byte;
2329 size_t n = 0;
2330
2331 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2332 return -1;
2333 }
2334
2335 if (byte >= 0x40 && byte <= 0x4F) {
2336 if (__predict_false(!fsm->is64bit)) {
2337 return -1;
2338 }
2339 rexpref->present = true;
2340 rexpref->w = ((byte & 0x8) != 0);
2341 rexpref->r = ((byte & 0x4) != 0);
2342 rexpref->x = ((byte & 0x2) != 0);
2343 rexpref->b = ((byte & 0x1) != 0);
2344 n = 1;
2345 }
2346
2347 fsm_advance(fsm, n, node_main);
2348 return 0;
2349 }
2350
2351 static int
2352 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2353 {
2354 uint8_t byte;
2355
2356 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2357 return -1;
2358 }
2359
2360 if (byte == LEG_OPR_OVR) {
2361 instr->legpref.opr_ovr = 1;
2362 } else if (byte == LEG_OVR_DS) {
2363 instr->legpref.seg = NVMM_X64_SEG_DS;
2364 } else if (byte == LEG_OVR_ES) {
2365 instr->legpref.seg = NVMM_X64_SEG_ES;
2366 } else if (byte == LEG_REP) {
2367 instr->legpref.rep = 1;
2368 } else if (byte == LEG_OVR_GS) {
2369 instr->legpref.seg = NVMM_X64_SEG_GS;
2370 } else if (byte == LEG_OVR_FS) {
2371 instr->legpref.seg = NVMM_X64_SEG_FS;
2372 } else if (byte == LEG_ADR_OVR) {
2373 instr->legpref.adr_ovr = 1;
2374 } else if (byte == LEG_OVR_CS) {
2375 instr->legpref.seg = NVMM_X64_SEG_CS;
2376 } else if (byte == LEG_OVR_SS) {
2377 instr->legpref.seg = NVMM_X64_SEG_SS;
2378 } else if (byte == LEG_REPN) {
2379 instr->legpref.repn = 1;
2380 } else if (byte == LEG_LOCK) {
2381 /* ignore */
2382 } else {
2383 /* not a legacy prefix */
2384 fsm_advance(fsm, 0, node_rex_prefix);
2385 return 0;
2386 }
2387
2388 fsm_advance(fsm, 1, node_legacy_prefix);
2389 return 0;
2390 }
2391
2392 static int
2393 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2394 struct nvmm_x64_state *state)
2395 {
2396 struct x86_decode_fsm fsm;
2397 int ret;
2398
2399 memset(instr, 0, sizeof(*instr));
2400 instr->legpref.seg = -1;
2401
2402 fsm.is64bit = is_64bit(state);
2403 fsm.is32bit = is_32bit(state);
2404 fsm.is16bit = is_16bit(state);
2405
2406 fsm.fn = node_legacy_prefix;
2407 fsm.buf = inst_bytes;
2408 fsm.end = inst_bytes + inst_len;
2409
2410 while (fsm.fn != NULL) {
2411 ret = (*fsm.fn)(&fsm, instr);
2412 if (ret == -1)
2413 return -1;
2414 }
2415
2416 instr->len = fsm.buf - inst_bytes;
2417
2418 return 0;
2419 }
2420
2421 /* -------------------------------------------------------------------------- */
2422
2423 static inline uint8_t
2424 compute_parity(uint8_t *data)
2425 {
2426 uint64_t *ptr = (uint64_t *)data;
2427 uint64_t val = *ptr;
2428
2429 val ^= val >> 32;
2430 val ^= val >> 16;
2431 val ^= val >> 8;
2432 val ^= val >> 4;
2433 val ^= val >> 2;
2434 val ^= val >> 1;
2435 return (~val) & 1;
2436 }
2437
2438 static void
2439 x86_emul_or(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2440 uint64_t *gprs)
2441 {
2442 const bool write = mem->write;
2443 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2444 uint8_t data[8];
2445 size_t i;
2446
2447 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2448
2449 memcpy(data, mem->data, sizeof(data));
2450
2451 /* Fetch the value to be OR'ed. */
2452 mem->write = false;
2453 (*cb)(mem);
2454
2455 /* Perform the OR. */
2456 for (i = 0; i < mem->size; i++) {
2457 mem->data[i] |= data[i];
2458 if (mem->data[i] != 0)
2459 fl |= PSL_Z;
2460 }
2461 if (mem->data[mem->size-1] & __BIT(7))
2462 fl |= PSL_N;
2463 if (compute_parity(mem->data))
2464 fl |= PSL_PF;
2465
2466 if (write) {
2467 /* Write back the result. */
2468 mem->write = true;
2469 (*cb)(mem);
2470 }
2471
2472 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2473 }
2474
2475 static void
2476 x86_emul_and(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2477 uint64_t *gprs)
2478 {
2479 const bool write = mem->write;
2480 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2481 uint8_t data[8];
2482 size_t i;
2483
2484 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2485
2486 memcpy(data, mem->data, sizeof(data));
2487
2488 /* Fetch the value to be AND'ed. */
2489 mem->write = false;
2490 (*cb)(mem);
2491
2492 /* Perform the AND. */
2493 for (i = 0; i < mem->size; i++) {
2494 mem->data[i] &= data[i];
2495 if (mem->data[i] != 0)
2496 fl |= PSL_Z;
2497 }
2498 if (mem->data[mem->size-1] & __BIT(7))
2499 fl |= PSL_N;
2500 if (compute_parity(mem->data))
2501 fl |= PSL_PF;
2502
2503 if (write) {
2504 /* Write back the result. */
2505 mem->write = true;
2506 (*cb)(mem);
2507 }
2508
2509 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2510 }
2511
2512 static void
2513 x86_emul_xor(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2514 uint64_t *gprs)
2515 {
2516 const bool write = mem->write;
2517 uint64_t fl = gprs[NVMM_X64_GPR_RFLAGS];
2518 uint8_t data[8];
2519 size_t i;
2520
2521 fl &= ~(PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF);
2522
2523 memcpy(data, mem->data, sizeof(data));
2524
2525 /* Fetch the value to be XOR'ed. */
2526 mem->write = false;
2527 (*cb)(mem);
2528
2529 /* Perform the XOR. */
2530 for (i = 0; i < mem->size; i++) {
2531 mem->data[i] ^= data[i];
2532 if (mem->data[i] != 0)
2533 fl |= PSL_Z;
2534 }
2535 if (mem->data[mem->size-1] & __BIT(7))
2536 fl |= PSL_N;
2537 if (compute_parity(mem->data))
2538 fl |= PSL_PF;
2539
2540 if (write) {
2541 /* Write back the result. */
2542 mem->write = true;
2543 (*cb)(mem);
2544 }
2545
2546 gprs[NVMM_X64_GPR_RFLAGS] = fl;
2547 }
2548
2549 static void
2550 x86_emul_mov(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2551 uint64_t *gprs)
2552 {
2553 /*
2554 * Nothing special, just move without emulation.
2555 */
2556 (*cb)(mem);
2557 }
2558
2559 static void
2560 x86_emul_stos(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2561 uint64_t *gprs)
2562 {
2563 /*
2564 * Just move, and update RDI.
2565 */
2566 (*cb)(mem);
2567
2568 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2569 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2570 } else {
2571 gprs[NVMM_X64_GPR_RDI] += mem->size;
2572 }
2573 }
2574
2575 static void
2576 x86_emul_lods(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2577 uint64_t *gprs)
2578 {
2579 /*
2580 * Just move, and update RSI.
2581 */
2582 (*cb)(mem);
2583
2584 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2585 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2586 } else {
2587 gprs[NVMM_X64_GPR_RSI] += mem->size;
2588 }
2589 }
2590
2591 static void
2592 x86_emul_movs(struct nvmm_mem *mem, void (*cb)(struct nvmm_mem *),
2593 uint64_t *gprs)
2594 {
2595 /*
2596 * Special instruction: double memory operand. Don't call the cb,
2597 * because the storage has already been performed earlier.
2598 */
2599
2600 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2601 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2602 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2603 } else {
2604 gprs[NVMM_X64_GPR_RSI] += mem->size;
2605 gprs[NVMM_X64_GPR_RDI] += mem->size;
2606 }
2607 }
2608
2609 /* -------------------------------------------------------------------------- */
2610
2611 static inline uint64_t
2612 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2613 {
2614 uint64_t val;
2615
2616 val = state->gprs[gpr];
2617 val &= size_to_mask(instr->address_size);
2618
2619 return val;
2620 }
2621
2622 static int
2623 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2624 struct x86_store *store, gvaddr_t *gvap, size_t size)
2625 {
2626 struct x86_sib *sib;
2627 gvaddr_t gva = 0;
2628 uint64_t reg;
2629 int ret, seg;
2630
2631 if (store->type == STORE_SIB) {
2632 sib = &store->u.sib;
2633 if (sib->bas != NULL)
2634 gva += gpr_read_address(instr, state, sib->bas->num);
2635 if (sib->idx != NULL) {
2636 reg = gpr_read_address(instr, state, sib->idx->num);
2637 gva += sib->scale * reg;
2638 }
2639 } else if (store->type == STORE_REG) {
2640 if (store->u.reg == NULL) {
2641 /* The base is null. Happens with disp32-only. */
2642 } else {
2643 gva = gpr_read_address(instr, state, store->u.reg->num);
2644 }
2645 } else {
2646 gva = store->u.dmo;
2647 }
2648
2649 if (store->disp.type != DISP_NONE) {
2650 gva += store->disp.data;
2651 }
2652
2653 if (store->hardseg != 0) {
2654 seg = store->hardseg;
2655 } else {
2656 if (__predict_false(instr->legpref.seg != -1)) {
2657 seg = instr->legpref.seg;
2658 } else {
2659 seg = NVMM_X64_SEG_DS;
2660 }
2661 }
2662
2663 if (__predict_true(is_long_mode(state))) {
2664 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2665 segment_apply(&state->segs[seg], &gva);
2666 }
2667 } else {
2668 ret = segment_check(&state->segs[seg], gva, size);
2669 if (ret == -1)
2670 return -1;
2671 segment_apply(&state->segs[seg], &gva);
2672 }
2673
2674 *gvap = gva;
2675 return 0;
2676 }
2677
2678 static int
2679 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2680 {
2681 uint8_t inst_bytes[15], byte;
2682 size_t i, fetchsize;
2683 gvaddr_t gva;
2684 int ret, seg;
2685
2686 fetchsize = sizeof(inst_bytes);
2687
2688 gva = state->gprs[NVMM_X64_GPR_RIP];
2689 if (__predict_false(!is_long_mode(state))) {
2690 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2691 fetchsize);
2692 if (ret == -1)
2693 return -1;
2694 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2695 }
2696
2697 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2698 if (ret == -1)
2699 return -1;
2700
2701 seg = NVMM_X64_SEG_DS;
2702 for (i = 0; i < fetchsize; i++) {
2703 byte = inst_bytes[i];
2704
2705 if (byte == LEG_OVR_DS) {
2706 seg = NVMM_X64_SEG_DS;
2707 } else if (byte == LEG_OVR_ES) {
2708 seg = NVMM_X64_SEG_ES;
2709 } else if (byte == LEG_OVR_GS) {
2710 seg = NVMM_X64_SEG_GS;
2711 } else if (byte == LEG_OVR_FS) {
2712 seg = NVMM_X64_SEG_FS;
2713 } else if (byte == LEG_OVR_CS) {
2714 seg = NVMM_X64_SEG_CS;
2715 } else if (byte == LEG_OVR_SS) {
2716 seg = NVMM_X64_SEG_SS;
2717 } else if (byte == LEG_OPR_OVR) {
2718 /* nothing */
2719 } else if (byte == LEG_ADR_OVR) {
2720 /* nothing */
2721 } else if (byte == LEG_REP) {
2722 /* nothing */
2723 } else if (byte == LEG_REPN) {
2724 /* nothing */
2725 } else if (byte == LEG_LOCK) {
2726 /* nothing */
2727 } else {
2728 return seg;
2729 }
2730 }
2731
2732 return seg;
2733 }
2734
2735 static int
2736 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2737 struct nvmm_exit *exit)
2738 {
2739 size_t fetchsize;
2740 gvaddr_t gva;
2741 int ret;
2742
2743 fetchsize = sizeof(exit->u.mem.inst_bytes);
2744
2745 gva = state->gprs[NVMM_X64_GPR_RIP];
2746 if (__predict_false(!is_long_mode(state))) {
2747 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2748 fetchsize);
2749 if (ret == -1)
2750 return -1;
2751 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2752 }
2753
2754 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
2755 fetchsize);
2756 if (ret == -1)
2757 return -1;
2758
2759 exit->u.mem.inst_len = fetchsize;
2760
2761 return 0;
2762 }
2763
2764 static int
2765 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2766 struct x86_instr *instr)
2767 {
2768 struct nvmm_mem mem;
2769 uint8_t data[8];
2770 gvaddr_t gva;
2771 size_t size;
2772 int ret;
2773
2774 size = instr->operand_size;
2775
2776 /* Source. */
2777 ret = store_to_gva(state, instr, &instr->src, &gva, size);
2778 if (ret == -1)
2779 return -1;
2780 ret = read_guest_memory(mach, state, gva, data, size);
2781 if (ret == -1)
2782 return -1;
2783
2784 /* Destination. */
2785 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
2786 if (ret == -1)
2787 return -1;
2788 ret = write_guest_memory(mach, state, gva, data, size);
2789 if (ret == -1)
2790 return -1;
2791
2792 mem.size = size;
2793 (*instr->emul)(&mem, NULL, state->gprs);
2794
2795 return 0;
2796 }
2797
2798 #define DISASSEMBLER_BUG() \
2799 do { \
2800 errno = EINVAL; \
2801 return -1; \
2802 } while (0);
2803
2804 static int
2805 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
2806 struct x86_instr *instr, struct nvmm_exit *exit)
2807 {
2808 struct nvmm_mem mem;
2809 uint8_t membuf[8];
2810 uint64_t val;
2811
2812 memset(membuf, 0, sizeof(membuf));
2813
2814 mem.gpa = exit->u.mem.gpa;
2815 mem.size = instr->operand_size;
2816 mem.data = membuf;
2817
2818 /* Determine the direction. */
2819 switch (instr->src.type) {
2820 case STORE_REG:
2821 if (instr->src.disp.type != DISP_NONE) {
2822 /* Indirect access. */
2823 mem.write = false;
2824 } else {
2825 /* Direct access. */
2826 mem.write = true;
2827 }
2828 break;
2829 case STORE_IMM:
2830 mem.write = true;
2831 break;
2832 case STORE_SIB:
2833 mem.write = false;
2834 break;
2835 case STORE_DMO:
2836 mem.write = false;
2837 break;
2838 default:
2839 DISASSEMBLER_BUG();
2840 }
2841
2842 if (mem.write) {
2843 switch (instr->src.type) {
2844 case STORE_REG:
2845 if (instr->src.disp.type != DISP_NONE) {
2846 DISASSEMBLER_BUG();
2847 }
2848 val = state->gprs[instr->src.u.reg->num];
2849 val = __SHIFTOUT(val, instr->src.u.reg->mask);
2850 memcpy(mem.data, &val, mem.size);
2851 break;
2852 case STORE_IMM:
2853 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
2854 break;
2855 default:
2856 DISASSEMBLER_BUG();
2857 }
2858 }
2859
2860 (*instr->emul)(&mem, __callbacks.mem, state->gprs);
2861
2862 if (!mem.write) {
2863 if (instr->dst.type != STORE_REG) {
2864 DISASSEMBLER_BUG();
2865 }
2866 memcpy(&val, mem.data, sizeof(uint64_t));
2867 val = __SHIFTIN(val, instr->dst.u.reg->mask);
2868 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
2869 state->gprs[instr->dst.u.reg->num] |= val;
2870 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
2871 }
2872
2873 return 0;
2874 }
2875
2876 int
2877 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
2878 struct nvmm_exit *exit)
2879 {
2880 struct nvmm_x64_state state;
2881 struct x86_instr instr;
2882 uint64_t cnt = 0; /* GCC */
2883 int ret;
2884
2885 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
2886 errno = EINVAL;
2887 return -1;
2888 }
2889
2890 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
2891 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
2892 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
2893 if (ret == -1)
2894 return -1;
2895
2896 if (exit->u.mem.inst_len == 0) {
2897 /*
2898 * The instruction was not fetched from the kernel. Fetch
2899 * it ourselves.
2900 */
2901 ret = fetch_instruction(mach, &state, exit);
2902 if (ret == -1)
2903 return -1;
2904 }
2905
2906 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
2907 &instr, &state);
2908 if (ret == -1) {
2909 errno = ENODEV;
2910 return -1;
2911 }
2912
2913 if (instr.legpref.rep || instr.legpref.repn) {
2914 cnt = rep_get_cnt(&state, instr.address_size);
2915 if (__predict_false(cnt == 0)) {
2916 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2917 goto out;
2918 }
2919 }
2920
2921 if (instr.opcode->movs) {
2922 ret = assist_mem_double(mach, &state, &instr);
2923 } else {
2924 ret = assist_mem_single(mach, &state, &instr, exit);
2925 }
2926 if (ret == -1) {
2927 errno = ENODEV;
2928 return -1;
2929 }
2930
2931 if (instr.legpref.rep || instr.legpref.repn) {
2932 cnt -= 1;
2933 rep_set_cnt(&state, instr.address_size, cnt);
2934 if (cnt == 0) {
2935 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2936 } else if (__predict_false(instr.legpref.repn)) {
2937 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
2938 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2939 }
2940 }
2941 } else {
2942 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
2943 }
2944
2945 out:
2946 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
2947 if (ret == -1)
2948 return -1;
2949
2950 return 0;
2951 }
2952