libnvmm_x86.c revision 1.23 1 /* $NetBSD: libnvmm_x86.c,v 1.23 2019/02/15 16:42:27 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49
50 #include <x86/specialreg.h>
51
52 extern struct nvmm_callbacks __callbacks;
53
54 /* -------------------------------------------------------------------------- */
55
56 /*
57 * Undocumented debugging function. Helpful.
58 */
59 int
60 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
61 {
62 struct nvmm_x64_state state;
63 size_t i;
64 int ret;
65
66 const char *segnames[] = {
67 "CS", "DS", "ES", "FS", "GS", "SS", "GDT", "IDT", "LDT", "TR"
68 };
69
70 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
71 if (ret == -1)
72 return -1;
73
74 printf("+ VCPU id=%d\n", (int)cpuid);
75 printf("| -> RIP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RIP]);
76 printf("| -> RSP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RSP]);
77 printf("| -> RAX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RAX]);
78 printf("| -> RBX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RBX]);
79 printf("| -> RCX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RCX]);
80 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
81 for (i = 0; i < NVMM_X64_NSEG; i++) {
82 printf("| -> %s: sel=0x%lx base=%p, limit=%p, P=%d, D=%d L=%d\n",
83 segnames[i],
84 state.segs[i].selector,
85 (void *)state.segs[i].base,
86 (void *)state.segs[i].limit,
87 state.segs[i].attrib.p, state.segs[i].attrib.def32,
88 state.segs[i].attrib.lng);
89 }
90 printf("| -> MSR_EFER=%p\n", (void *)state.msrs[NVMM_X64_MSR_EFER]);
91 printf("| -> CR0=%p\n", (void *)state.crs[NVMM_X64_CR_CR0]);
92 printf("| -> CR3=%p\n", (void *)state.crs[NVMM_X64_CR_CR3]);
93 printf("| -> CR4=%p\n", (void *)state.crs[NVMM_X64_CR_CR4]);
94 printf("| -> CR8=%p\n", (void *)state.crs[NVMM_X64_CR_CR8]);
95
96 return 0;
97 }
98
99 /* -------------------------------------------------------------------------- */
100
101 #define PTE32_L1_SHIFT 12
102 #define PTE32_L2_SHIFT 22
103
104 #define PTE32_L2_MASK 0xffc00000
105 #define PTE32_L1_MASK 0x003ff000
106
107 #define PTE32_L2_FRAME (PTE32_L2_MASK)
108 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
109
110 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
111 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
112
113 #define CR3_FRAME_32BIT PG_FRAME
114
115 typedef uint32_t pte_32bit_t;
116
117 static int
118 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
119 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
120 {
121 gpaddr_t L2gpa, L1gpa;
122 uintptr_t L2hva, L1hva;
123 pte_32bit_t *pdir, pte;
124
125 /* We begin with an RWXU access. */
126 *prot = NVMM_PROT_ALL;
127
128 /* Parse L2. */
129 L2gpa = (cr3 & CR3_FRAME_32BIT);
130 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
131 return -1;
132 pdir = (pte_32bit_t *)L2hva;
133 pte = pdir[pte32_l2idx(gva)];
134 if ((pte & PG_V) == 0)
135 return -1;
136 if ((pte & PG_u) == 0)
137 *prot &= ~NVMM_PROT_USER;
138 if ((pte & PG_KW) == 0)
139 *prot &= ~NVMM_PROT_WRITE;
140 if ((pte & PG_PS) && !has_pse)
141 return -1;
142 if (pte & PG_PS) {
143 *gpa = (pte & PTE32_L2_FRAME);
144 *gpa = *gpa + (gva & PTE32_L1_MASK);
145 return 0;
146 }
147
148 /* Parse L1. */
149 L1gpa = (pte & PG_FRAME);
150 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
151 return -1;
152 pdir = (pte_32bit_t *)L1hva;
153 pte = pdir[pte32_l1idx(gva)];
154 if ((pte & PG_V) == 0)
155 return -1;
156 if ((pte & PG_u) == 0)
157 *prot &= ~NVMM_PROT_USER;
158 if ((pte & PG_KW) == 0)
159 *prot &= ~NVMM_PROT_WRITE;
160 if (pte & PG_PS)
161 return -1;
162
163 *gpa = (pte & PG_FRAME);
164 return 0;
165 }
166
167 /* -------------------------------------------------------------------------- */
168
169 #define PTE32_PAE_L1_SHIFT 12
170 #define PTE32_PAE_L2_SHIFT 21
171 #define PTE32_PAE_L3_SHIFT 30
172
173 #define PTE32_PAE_L3_MASK 0xc0000000
174 #define PTE32_PAE_L2_MASK 0x3fe00000
175 #define PTE32_PAE_L1_MASK 0x001ff000
176
177 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
178 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
179 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
180
181 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
182 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
183 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
184
185 #define CR3_FRAME_32BIT_PAE __BITS(31, 5)
186
187 typedef uint64_t pte_32bit_pae_t;
188
189 static int
190 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
191 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
192 {
193 gpaddr_t L3gpa, L2gpa, L1gpa;
194 uintptr_t L3hva, L2hva, L1hva;
195 pte_32bit_pae_t *pdir, pte;
196
197 /* We begin with an RWXU access. */
198 *prot = NVMM_PROT_ALL;
199
200 /* Parse L3. */
201 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
202 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
203 return -1;
204 pdir = (pte_32bit_pae_t *)L3hva;
205 pte = pdir[pte32_pae_l3idx(gva)];
206 if ((pte & PG_V) == 0)
207 return -1;
208 if (pte & PG_NX)
209 *prot &= ~NVMM_PROT_EXEC;
210 if (pte & PG_PS)
211 return -1;
212
213 /* Parse L2. */
214 L2gpa = (pte & PG_FRAME);
215 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
216 return -1;
217 pdir = (pte_32bit_pae_t *)L2hva;
218 pte = pdir[pte32_pae_l2idx(gva)];
219 if ((pte & PG_V) == 0)
220 return -1;
221 if ((pte & PG_u) == 0)
222 *prot &= ~NVMM_PROT_USER;
223 if ((pte & PG_KW) == 0)
224 *prot &= ~NVMM_PROT_WRITE;
225 if (pte & PG_NX)
226 *prot &= ~NVMM_PROT_EXEC;
227 if (pte & PG_PS) {
228 *gpa = (pte & PTE32_PAE_L2_FRAME);
229 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
230 return 0;
231 }
232
233 /* Parse L1. */
234 L1gpa = (pte & PG_FRAME);
235 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
236 return -1;
237 pdir = (pte_32bit_pae_t *)L1hva;
238 pte = pdir[pte32_pae_l1idx(gva)];
239 if ((pte & PG_V) == 0)
240 return -1;
241 if ((pte & PG_u) == 0)
242 *prot &= ~NVMM_PROT_USER;
243 if ((pte & PG_KW) == 0)
244 *prot &= ~NVMM_PROT_WRITE;
245 if (pte & PG_NX)
246 *prot &= ~NVMM_PROT_EXEC;
247 if (pte & PG_PS)
248 return -1;
249
250 *gpa = (pte & PG_FRAME);
251 return 0;
252 }
253
254 /* -------------------------------------------------------------------------- */
255
256 #define PTE64_L1_SHIFT 12
257 #define PTE64_L2_SHIFT 21
258 #define PTE64_L3_SHIFT 30
259 #define PTE64_L4_SHIFT 39
260
261 #define PTE64_L4_MASK 0x0000ff8000000000
262 #define PTE64_L3_MASK 0x0000007fc0000000
263 #define PTE64_L2_MASK 0x000000003fe00000
264 #define PTE64_L1_MASK 0x00000000001ff000
265
266 #define PTE64_L4_FRAME PTE64_L4_MASK
267 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
268 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
269 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
270
271 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
272 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
273 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
274 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
275
276 #define CR3_FRAME_64BIT PG_FRAME
277
278 typedef uint64_t pte_64bit_t;
279
280 static inline bool
281 x86_gva_64bit_canonical(gvaddr_t gva)
282 {
283 /* Bits 63:47 must have the same value. */
284 #define SIGN_EXTEND 0xffff800000000000ULL
285 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
286 }
287
288 static int
289 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
290 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
291 {
292 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
293 uintptr_t L4hva, L3hva, L2hva, L1hva;
294 pte_64bit_t *pdir, pte;
295
296 /* We begin with an RWXU access. */
297 *prot = NVMM_PROT_ALL;
298
299 if (!x86_gva_64bit_canonical(gva))
300 return -1;
301
302 /* Parse L4. */
303 L4gpa = (cr3 & CR3_FRAME_64BIT);
304 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
305 return -1;
306 pdir = (pte_64bit_t *)L4hva;
307 pte = pdir[pte64_l4idx(gva)];
308 if ((pte & PG_V) == 0)
309 return -1;
310 if ((pte & PG_u) == 0)
311 *prot &= ~NVMM_PROT_USER;
312 if ((pte & PG_KW) == 0)
313 *prot &= ~NVMM_PROT_WRITE;
314 if (pte & PG_NX)
315 *prot &= ~NVMM_PROT_EXEC;
316 if (pte & PG_PS)
317 return -1;
318
319 /* Parse L3. */
320 L3gpa = (pte & PG_FRAME);
321 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
322 return -1;
323 pdir = (pte_64bit_t *)L3hva;
324 pte = pdir[pte64_l3idx(gva)];
325 if ((pte & PG_V) == 0)
326 return -1;
327 if ((pte & PG_u) == 0)
328 *prot &= ~NVMM_PROT_USER;
329 if ((pte & PG_KW) == 0)
330 *prot &= ~NVMM_PROT_WRITE;
331 if (pte & PG_NX)
332 *prot &= ~NVMM_PROT_EXEC;
333 if (pte & PG_PS) {
334 *gpa = (pte & PTE64_L3_FRAME);
335 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
336 return 0;
337 }
338
339 /* Parse L2. */
340 L2gpa = (pte & PG_FRAME);
341 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
342 return -1;
343 pdir = (pte_64bit_t *)L2hva;
344 pte = pdir[pte64_l2idx(gva)];
345 if ((pte & PG_V) == 0)
346 return -1;
347 if ((pte & PG_u) == 0)
348 *prot &= ~NVMM_PROT_USER;
349 if ((pte & PG_KW) == 0)
350 *prot &= ~NVMM_PROT_WRITE;
351 if (pte & PG_NX)
352 *prot &= ~NVMM_PROT_EXEC;
353 if (pte & PG_PS) {
354 *gpa = (pte & PTE64_L2_FRAME);
355 *gpa = *gpa + (gva & PTE64_L1_MASK);
356 return 0;
357 }
358
359 /* Parse L1. */
360 L1gpa = (pte & PG_FRAME);
361 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
362 return -1;
363 pdir = (pte_64bit_t *)L1hva;
364 pte = pdir[pte64_l1idx(gva)];
365 if ((pte & PG_V) == 0)
366 return -1;
367 if ((pte & PG_u) == 0)
368 *prot &= ~NVMM_PROT_USER;
369 if ((pte & PG_KW) == 0)
370 *prot &= ~NVMM_PROT_WRITE;
371 if (pte & PG_NX)
372 *prot &= ~NVMM_PROT_EXEC;
373 if (pte & PG_PS)
374 return -1;
375
376 *gpa = (pte & PG_FRAME);
377 return 0;
378 }
379
380 static inline int
381 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
382 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
383 {
384 bool is_pae, is_lng, has_pse;
385 uint64_t cr3;
386 size_t off;
387 int ret;
388
389 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
390 /* No paging. */
391 *prot = NVMM_PROT_ALL;
392 *gpa = gva;
393 return 0;
394 }
395
396 off = (gva & PAGE_MASK);
397 gva &= ~PAGE_MASK;
398
399 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
400 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
401 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
402 cr3 = state->crs[NVMM_X64_CR_CR3];
403
404 if (is_pae && is_lng) {
405 /* 64bit */
406 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
407 } else if (is_pae && !is_lng) {
408 /* 32bit PAE */
409 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
410 } else if (!is_pae && !is_lng) {
411 /* 32bit */
412 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
413 } else {
414 ret = -1;
415 }
416
417 if (ret == -1) {
418 errno = EFAULT;
419 }
420
421 *gpa = *gpa + off;
422
423 return ret;
424 }
425
426 int
427 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
428 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
429 {
430 struct nvmm_x64_state state;
431 int ret;
432
433 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
434 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
435 if (ret == -1)
436 return -1;
437
438 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
439 }
440
441 /* -------------------------------------------------------------------------- */
442
443 static inline bool
444 is_long_mode(struct nvmm_x64_state *state)
445 {
446 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
447 }
448
449 static inline bool
450 is_64bit(struct nvmm_x64_state *state)
451 {
452 return (state->segs[NVMM_X64_SEG_CS].attrib.lng != 0);
453 }
454
455 static inline bool
456 is_32bit(struct nvmm_x64_state *state)
457 {
458 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
459 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 1);
460 }
461
462 static inline bool
463 is_16bit(struct nvmm_x64_state *state)
464 {
465 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
466 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 0);
467 }
468
469 static int
470 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
471 {
472 uint64_t limit;
473
474 /*
475 * This is incomplete. We should check topdown, etc, really that's
476 * tiring.
477 */
478 if (__predict_false(!seg->attrib.p)) {
479 goto error;
480 }
481
482 limit = (seg->limit + 1);
483 if (__predict_true(seg->attrib.gran)) {
484 limit *= PAGE_SIZE;
485 }
486
487 if (__predict_false(gva + size > limit)) {
488 goto error;
489 }
490
491 return 0;
492
493 error:
494 errno = EFAULT;
495 return -1;
496 }
497
498 static inline void
499 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
500 {
501 *gva += seg->base;
502 }
503
504 static inline uint64_t
505 size_to_mask(size_t size)
506 {
507 switch (size) {
508 case 1:
509 return 0x00000000000000FF;
510 case 2:
511 return 0x000000000000FFFF;
512 case 4:
513 return 0x00000000FFFFFFFF;
514 case 8:
515 default:
516 return 0xFFFFFFFFFFFFFFFF;
517 }
518 }
519
520 static uint64_t
521 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
522 {
523 uint64_t mask, cnt;
524
525 mask = size_to_mask(adsize);
526 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
527
528 return cnt;
529 }
530
531 static void
532 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
533 {
534 uint64_t mask;
535
536 /* XXX: should we zero-extend? */
537 mask = size_to_mask(adsize);
538 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
539 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
540 }
541
542 static int
543 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
544 gvaddr_t gva, uint8_t *data, size_t size)
545 {
546 struct nvmm_mem mem;
547 nvmm_prot_t prot;
548 gpaddr_t gpa;
549 uintptr_t hva;
550 bool is_mmio;
551 int ret, remain;
552
553 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
554 if (__predict_false(ret == -1)) {
555 return -1;
556 }
557 if (__predict_false(!(prot & NVMM_PROT_READ))) {
558 errno = EFAULT;
559 return -1;
560 }
561
562 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
563 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
564 } else {
565 remain = 0;
566 }
567 size -= remain;
568
569 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
570 is_mmio = (ret == -1);
571
572 if (is_mmio) {
573 mem.data = data;
574 mem.gpa = gpa;
575 mem.write = false;
576 mem.size = size;
577 (*__callbacks.mem)(&mem);
578 } else {
579 memcpy(data, (uint8_t *)hva, size);
580 }
581
582 if (remain > 0) {
583 ret = read_guest_memory(mach, state, gva + size,
584 data + size, remain);
585 } else {
586 ret = 0;
587 }
588
589 return ret;
590 }
591
592 static int
593 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
594 gvaddr_t gva, uint8_t *data, size_t size)
595 {
596 struct nvmm_mem mem;
597 nvmm_prot_t prot;
598 gpaddr_t gpa;
599 uintptr_t hva;
600 bool is_mmio;
601 int ret, remain;
602
603 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
604 if (__predict_false(ret == -1)) {
605 return -1;
606 }
607 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
608 errno = EFAULT;
609 return -1;
610 }
611
612 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
613 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
614 } else {
615 remain = 0;
616 }
617 size -= remain;
618
619 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
620 is_mmio = (ret == -1);
621
622 if (is_mmio) {
623 mem.data = data;
624 mem.gpa = gpa;
625 mem.write = true;
626 mem.size = size;
627 (*__callbacks.mem)(&mem);
628 } else {
629 memcpy((uint8_t *)hva, data, size);
630 }
631
632 if (remain > 0) {
633 ret = write_guest_memory(mach, state, gva + size,
634 data + size, remain);
635 } else {
636 ret = 0;
637 }
638
639 return ret;
640 }
641
642 /* -------------------------------------------------------------------------- */
643
644 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
645
646 #define NVMM_IO_BATCH_SIZE 32
647
648 static int
649 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
650 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
651 {
652 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
653 size_t i, iosize, iocnt;
654 int ret;
655
656 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
657 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
658 iocnt = iosize / io->size;
659
660 io->data = iobuf;
661
662 if (!io->in) {
663 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
664 if (ret == -1)
665 return -1;
666 }
667
668 for (i = 0; i < iocnt; i++) {
669 (*__callbacks.io)(io);
670 io->data += io->size;
671 }
672
673 if (io->in) {
674 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
675 if (ret == -1)
676 return -1;
677 }
678
679 return iocnt;
680 }
681
682 int
683 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
684 struct nvmm_exit *exit)
685 {
686 struct nvmm_x64_state state;
687 struct nvmm_io io;
688 uint64_t cnt = 0; /* GCC */
689 uint8_t iobuf[8];
690 int iocnt = 1;
691 gvaddr_t gva = 0; /* GCC */
692 int reg = 0; /* GCC */
693 int ret, seg;
694 bool psld = false;
695
696 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
697 errno = EINVAL;
698 return -1;
699 }
700
701 io.port = exit->u.io.port;
702 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
703 io.size = exit->u.io.operand_size;
704 io.data = iobuf;
705
706 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
707 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
708 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
709 if (ret == -1)
710 return -1;
711
712 if (exit->u.io.rep) {
713 cnt = rep_get_cnt(&state, exit->u.io.address_size);
714 if (__predict_false(cnt == 0)) {
715 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
716 goto out;
717 }
718 }
719
720 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
721 psld = true;
722 }
723
724 /*
725 * Determine GVA.
726 */
727 if (exit->u.io.str) {
728 if (io.in) {
729 reg = NVMM_X64_GPR_RDI;
730 } else {
731 reg = NVMM_X64_GPR_RSI;
732 }
733
734 gva = state.gprs[reg];
735 gva &= size_to_mask(exit->u.io.address_size);
736
737 if (exit->u.io.seg != -1) {
738 seg = exit->u.io.seg;
739 } else {
740 if (io.in) {
741 seg = NVMM_X64_SEG_ES;
742 } else {
743 seg = fetch_segment(mach, &state);
744 if (seg == -1)
745 return -1;
746 }
747 }
748
749 if (__predict_true(is_long_mode(&state))) {
750 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
751 segment_apply(&state.segs[seg], &gva);
752 }
753 } else {
754 ret = segment_check(&state.segs[seg], gva, io.size);
755 if (ret == -1)
756 return -1;
757 segment_apply(&state.segs[seg], &gva);
758 }
759
760 if (exit->u.io.rep && !psld) {
761 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
762 if (iocnt == -1)
763 return -1;
764 goto done;
765 }
766 }
767
768 if (!io.in) {
769 if (!exit->u.io.str) {
770 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
771 } else {
772 ret = read_guest_memory(mach, &state, gva, io.data,
773 io.size);
774 if (ret == -1)
775 return -1;
776 }
777 }
778
779 (*__callbacks.io)(&io);
780
781 if (io.in) {
782 if (!exit->u.io.str) {
783 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
784 if (io.size == 4) {
785 /* Zero-extend to 64 bits. */
786 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
787 }
788 } else {
789 ret = write_guest_memory(mach, &state, gva, io.data,
790 io.size);
791 if (ret == -1)
792 return -1;
793 }
794 }
795
796 done:
797 if (exit->u.io.str) {
798 if (__predict_false(psld)) {
799 state.gprs[reg] -= iocnt * io.size;
800 } else {
801 state.gprs[reg] += iocnt * io.size;
802 }
803 }
804
805 if (exit->u.io.rep) {
806 cnt -= iocnt;
807 rep_set_cnt(&state, exit->u.io.address_size, cnt);
808 if (cnt == 0) {
809 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
810 }
811 } else {
812 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
813 }
814
815 out:
816 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
817 if (ret == -1)
818 return -1;
819
820 return 0;
821 }
822
823 /* -------------------------------------------------------------------------- */
824
825 struct x86_emul {
826 bool read;
827 bool notouch;
828 void (*func)(struct nvmm_mem *, uint64_t *);
829 };
830
831 static void x86_func_or(struct nvmm_mem *, uint64_t *);
832 static void x86_func_and(struct nvmm_mem *, uint64_t *);
833 static void x86_func_sub(struct nvmm_mem *, uint64_t *);
834 static void x86_func_xor(struct nvmm_mem *, uint64_t *);
835 static void x86_func_cmp(struct nvmm_mem *, uint64_t *);
836 static void x86_func_test(struct nvmm_mem *, uint64_t *);
837 static void x86_func_mov(struct nvmm_mem *, uint64_t *);
838 static void x86_func_stos(struct nvmm_mem *, uint64_t *);
839 static void x86_func_lods(struct nvmm_mem *, uint64_t *);
840 static void x86_func_movs(struct nvmm_mem *, uint64_t *);
841
842 static const struct x86_emul x86_emul_or = {
843 .read = true,
844 .func = x86_func_or
845 };
846
847 static const struct x86_emul x86_emul_and = {
848 .read = true,
849 .func = x86_func_and
850 };
851
852 static const struct x86_emul x86_emul_sub = {
853 .read = true,
854 .func = x86_func_sub
855 };
856
857 static const struct x86_emul x86_emul_xor = {
858 .read = true,
859 .func = x86_func_xor
860 };
861
862 static const struct x86_emul x86_emul_cmp = {
863 .notouch = true,
864 .func = x86_func_cmp
865 };
866
867 static const struct x86_emul x86_emul_test = {
868 .notouch = true,
869 .func = x86_func_test
870 };
871
872 static const struct x86_emul x86_emul_mov = {
873 .func = x86_func_mov
874 };
875
876 static const struct x86_emul x86_emul_stos = {
877 .func = x86_func_stos
878 };
879
880 static const struct x86_emul x86_emul_lods = {
881 .func = x86_func_lods
882 };
883
884 static const struct x86_emul x86_emul_movs = {
885 .func = x86_func_movs
886 };
887
888 /* Legacy prefixes. */
889 #define LEG_LOCK 0xF0
890 #define LEG_REPN 0xF2
891 #define LEG_REP 0xF3
892 #define LEG_OVR_CS 0x2E
893 #define LEG_OVR_SS 0x36
894 #define LEG_OVR_DS 0x3E
895 #define LEG_OVR_ES 0x26
896 #define LEG_OVR_FS 0x64
897 #define LEG_OVR_GS 0x65
898 #define LEG_OPR_OVR 0x66
899 #define LEG_ADR_OVR 0x67
900
901 struct x86_legpref {
902 bool opr_ovr:1;
903 bool adr_ovr:1;
904 bool rep:1;
905 bool repn:1;
906 int seg;
907 };
908
909 struct x86_rexpref {
910 bool present;
911 bool w;
912 bool r;
913 bool x;
914 bool b;
915 };
916
917 struct x86_reg {
918 int num; /* NVMM GPR state index */
919 uint64_t mask;
920 };
921
922 enum x86_disp_type {
923 DISP_NONE,
924 DISP_0,
925 DISP_1,
926 DISP_4
927 };
928
929 struct x86_disp {
930 enum x86_disp_type type;
931 uint64_t data; /* 4 bytes, but can be sign-extended */
932 };
933
934 enum REGMODRM__Mod {
935 MOD_DIS0, /* also, register indirect */
936 MOD_DIS1,
937 MOD_DIS4,
938 MOD_REG
939 };
940
941 enum REGMODRM__Reg {
942 REG_000, /* these fields are indexes to the register map */
943 REG_001,
944 REG_010,
945 REG_011,
946 REG_100,
947 REG_101,
948 REG_110,
949 REG_111
950 };
951
952 enum REGMODRM__Rm {
953 RM_000, /* reg */
954 RM_001, /* reg */
955 RM_010, /* reg */
956 RM_011, /* reg */
957 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
958 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
959 RM_110,
960 RM_111
961 };
962
963 struct x86_regmodrm {
964 bool present;
965 enum REGMODRM__Mod mod;
966 enum REGMODRM__Reg reg;
967 enum REGMODRM__Rm rm;
968 };
969
970 struct x86_immediate {
971 uint64_t data;
972 };
973
974 struct x86_sib {
975 uint8_t scale;
976 const struct x86_reg *idx;
977 const struct x86_reg *bas;
978 };
979
980 enum x86_store_type {
981 STORE_NONE,
982 STORE_REG,
983 STORE_IMM,
984 STORE_SIB,
985 STORE_DMO
986 };
987
988 struct x86_store {
989 enum x86_store_type type;
990 union {
991 const struct x86_reg *reg;
992 struct x86_immediate imm;
993 struct x86_sib sib;
994 uint64_t dmo;
995 } u;
996 struct x86_disp disp;
997 int hardseg;
998 };
999
1000 struct x86_instr {
1001 size_t len;
1002 struct x86_legpref legpref;
1003 struct x86_rexpref rexpref;
1004 size_t operand_size;
1005 size_t address_size;
1006 uint64_t zeroextend_mask;
1007
1008 struct x86_regmodrm regmodrm;
1009
1010 const struct x86_opcode *opcode;
1011
1012 struct x86_store src;
1013 struct x86_store dst;
1014 struct x86_store *strm;
1015
1016 const struct x86_emul *emul;
1017 };
1018
1019 struct x86_decode_fsm {
1020 /* vcpu */
1021 bool is64bit;
1022 bool is32bit;
1023 bool is16bit;
1024
1025 /* fsm */
1026 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1027 uint8_t *buf;
1028 uint8_t *end;
1029 };
1030
1031 struct x86_opcode {
1032 uint8_t byte;
1033 bool regmodrm;
1034 bool regtorm;
1035 bool dmo;
1036 bool todmo;
1037 bool movs;
1038 bool stos;
1039 bool lods;
1040 bool szoverride;
1041 int defsize;
1042 int allsize;
1043 bool group1;
1044 bool group3;
1045 bool group11;
1046 bool immediate;
1047 int flags;
1048 const struct x86_emul *emul;
1049 };
1050
1051 struct x86_group_entry {
1052 const struct x86_emul *emul;
1053 };
1054
1055 #define OPSIZE_BYTE 0x01
1056 #define OPSIZE_WORD 0x02 /* 2 bytes */
1057 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1058 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1059
1060 #define FLAG_imm8 0x01
1061 #define FLAG_immz 0x02
1062 #define FLAG_ze 0x04
1063
1064 static const struct x86_group_entry group1[8] = {
1065 [1] = { .emul = &x86_emul_or },
1066 [4] = { .emul = &x86_emul_and },
1067 [6] = { .emul = &x86_emul_xor },
1068 [7] = { .emul = &x86_emul_cmp }
1069 };
1070
1071 static const struct x86_group_entry group3[8] = {
1072 [0] = { .emul = &x86_emul_test },
1073 [1] = { .emul = &x86_emul_test }
1074 };
1075
1076 static const struct x86_group_entry group11[8] = {
1077 [0] = { .emul = &x86_emul_mov }
1078 };
1079
1080 static const struct x86_opcode primary_opcode_table[] = {
1081 /*
1082 * Group1
1083 */
1084 {
1085 /* Eb, Ib */
1086 .byte = 0x80,
1087 .regmodrm = true,
1088 .regtorm = true,
1089 .szoverride = false,
1090 .defsize = OPSIZE_BYTE,
1091 .allsize = -1,
1092 .group1 = true,
1093 .immediate = true,
1094 .emul = NULL /* group1 */
1095 },
1096 {
1097 /* Ev, Iz */
1098 .byte = 0x81,
1099 .regmodrm = true,
1100 .regtorm = true,
1101 .szoverride = true,
1102 .defsize = -1,
1103 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1104 .group1 = true,
1105 .immediate = true,
1106 .flags = FLAG_immz,
1107 .emul = NULL /* group1 */
1108 },
1109 {
1110 /* Ev, Ib */
1111 .byte = 0x83,
1112 .regmodrm = true,
1113 .regtorm = true,
1114 .szoverride = true,
1115 .defsize = -1,
1116 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1117 .group1 = true,
1118 .immediate = true,
1119 .flags = FLAG_imm8,
1120 .emul = NULL /* group1 */
1121 },
1122
1123 /*
1124 * Group3
1125 */
1126 {
1127 /* Eb, Ib */
1128 .byte = 0xF6,
1129 .regmodrm = true,
1130 .regtorm = true,
1131 .szoverride = false,
1132 .defsize = OPSIZE_BYTE,
1133 .allsize = -1,
1134 .group3 = true,
1135 .immediate = true,
1136 .emul = NULL /* group3 */
1137 },
1138 {
1139 /* Ev, Iz */
1140 .byte = 0xF7,
1141 .regmodrm = true,
1142 .regtorm = true,
1143 .szoverride = true,
1144 .defsize = -1,
1145 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1146 .group3 = true,
1147 .immediate = true,
1148 .flags = FLAG_immz,
1149 .emul = NULL /* group3 */
1150 },
1151
1152 /*
1153 * Group11
1154 */
1155 {
1156 /* Eb, Ib */
1157 .byte = 0xC6,
1158 .regmodrm = true,
1159 .regtorm = true,
1160 .szoverride = false,
1161 .defsize = OPSIZE_BYTE,
1162 .allsize = -1,
1163 .group11 = true,
1164 .immediate = true,
1165 .emul = NULL /* group11 */
1166 },
1167 {
1168 /* Ev, Iz */
1169 .byte = 0xC7,
1170 .regmodrm = true,
1171 .regtorm = true,
1172 .szoverride = true,
1173 .defsize = -1,
1174 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1175 .group11 = true,
1176 .immediate = true,
1177 .flags = FLAG_immz,
1178 .emul = NULL /* group11 */
1179 },
1180
1181 /*
1182 * OR
1183 */
1184 {
1185 /* Eb, Gb */
1186 .byte = 0x08,
1187 .regmodrm = true,
1188 .regtorm = true,
1189 .szoverride = false,
1190 .defsize = OPSIZE_BYTE,
1191 .allsize = -1,
1192 .emul = &x86_emul_or
1193 },
1194 {
1195 /* Ev, Gv */
1196 .byte = 0x09,
1197 .regmodrm = true,
1198 .regtorm = true,
1199 .szoverride = true,
1200 .defsize = -1,
1201 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1202 .emul = &x86_emul_or
1203 },
1204 {
1205 /* Gb, Eb */
1206 .byte = 0x0A,
1207 .regmodrm = true,
1208 .regtorm = false,
1209 .szoverride = false,
1210 .defsize = OPSIZE_BYTE,
1211 .allsize = -1,
1212 .emul = &x86_emul_or
1213 },
1214 {
1215 /* Gv, Ev */
1216 .byte = 0x0B,
1217 .regmodrm = true,
1218 .regtorm = false,
1219 .szoverride = true,
1220 .defsize = -1,
1221 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1222 .emul = &x86_emul_or
1223 },
1224
1225 /*
1226 * AND
1227 */
1228 {
1229 /* Eb, Gb */
1230 .byte = 0x20,
1231 .regmodrm = true,
1232 .regtorm = true,
1233 .szoverride = false,
1234 .defsize = OPSIZE_BYTE,
1235 .allsize = -1,
1236 .emul = &x86_emul_and
1237 },
1238 {
1239 /* Ev, Gv */
1240 .byte = 0x21,
1241 .regmodrm = true,
1242 .regtorm = true,
1243 .szoverride = true,
1244 .defsize = -1,
1245 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1246 .emul = &x86_emul_and
1247 },
1248 {
1249 /* Gb, Eb */
1250 .byte = 0x22,
1251 .regmodrm = true,
1252 .regtorm = false,
1253 .szoverride = false,
1254 .defsize = OPSIZE_BYTE,
1255 .allsize = -1,
1256 .emul = &x86_emul_and
1257 },
1258 {
1259 /* Gv, Ev */
1260 .byte = 0x23,
1261 .regmodrm = true,
1262 .regtorm = false,
1263 .szoverride = true,
1264 .defsize = -1,
1265 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1266 .emul = &x86_emul_and
1267 },
1268
1269 /*
1270 * SUB
1271 */
1272 {
1273 /* Eb, Gb */
1274 .byte = 0x28,
1275 .regmodrm = true,
1276 .regtorm = true,
1277 .szoverride = false,
1278 .defsize = OPSIZE_BYTE,
1279 .allsize = -1,
1280 .emul = &x86_emul_sub
1281 },
1282 {
1283 /* Ev, Gv */
1284 .byte = 0x29,
1285 .regmodrm = true,
1286 .regtorm = true,
1287 .szoverride = true,
1288 .defsize = -1,
1289 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1290 .emul = &x86_emul_sub
1291 },
1292 {
1293 /* Gb, Eb */
1294 .byte = 0x2A,
1295 .regmodrm = true,
1296 .regtorm = false,
1297 .szoverride = false,
1298 .defsize = OPSIZE_BYTE,
1299 .allsize = -1,
1300 .emul = &x86_emul_sub
1301 },
1302 {
1303 /* Gv, Ev */
1304 .byte = 0x2B,
1305 .regmodrm = true,
1306 .regtorm = false,
1307 .szoverride = true,
1308 .defsize = -1,
1309 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1310 .emul = &x86_emul_sub
1311 },
1312
1313 /*
1314 * XOR
1315 */
1316 {
1317 /* Eb, Gb */
1318 .byte = 0x30,
1319 .regmodrm = true,
1320 .regtorm = true,
1321 .szoverride = false,
1322 .defsize = OPSIZE_BYTE,
1323 .allsize = -1,
1324 .emul = &x86_emul_xor
1325 },
1326 {
1327 /* Ev, Gv */
1328 .byte = 0x31,
1329 .regmodrm = true,
1330 .regtorm = true,
1331 .szoverride = true,
1332 .defsize = -1,
1333 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1334 .emul = &x86_emul_xor
1335 },
1336 {
1337 /* Gb, Eb */
1338 .byte = 0x32,
1339 .regmodrm = true,
1340 .regtorm = false,
1341 .szoverride = false,
1342 .defsize = OPSIZE_BYTE,
1343 .allsize = -1,
1344 .emul = &x86_emul_xor
1345 },
1346 {
1347 /* Gv, Ev */
1348 .byte = 0x33,
1349 .regmodrm = true,
1350 .regtorm = false,
1351 .szoverride = true,
1352 .defsize = -1,
1353 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1354 .emul = &x86_emul_xor
1355 },
1356
1357 /*
1358 * MOV
1359 */
1360 {
1361 /* Eb, Gb */
1362 .byte = 0x88,
1363 .regmodrm = true,
1364 .regtorm = true,
1365 .szoverride = false,
1366 .defsize = OPSIZE_BYTE,
1367 .allsize = -1,
1368 .emul = &x86_emul_mov
1369 },
1370 {
1371 /* Ev, Gv */
1372 .byte = 0x89,
1373 .regmodrm = true,
1374 .regtorm = true,
1375 .szoverride = true,
1376 .defsize = -1,
1377 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1378 .emul = &x86_emul_mov
1379 },
1380 {
1381 /* Gb, Eb */
1382 .byte = 0x8A,
1383 .regmodrm = true,
1384 .regtorm = false,
1385 .szoverride = false,
1386 .defsize = OPSIZE_BYTE,
1387 .allsize = -1,
1388 .emul = &x86_emul_mov
1389 },
1390 {
1391 /* Gv, Ev */
1392 .byte = 0x8B,
1393 .regmodrm = true,
1394 .regtorm = false,
1395 .szoverride = true,
1396 .defsize = -1,
1397 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1398 .emul = &x86_emul_mov
1399 },
1400 {
1401 /* AL, Ob */
1402 .byte = 0xA0,
1403 .dmo = true,
1404 .todmo = false,
1405 .szoverride = false,
1406 .defsize = OPSIZE_BYTE,
1407 .allsize = -1,
1408 .emul = &x86_emul_mov
1409 },
1410 {
1411 /* rAX, Ov */
1412 .byte = 0xA1,
1413 .dmo = true,
1414 .todmo = false,
1415 .szoverride = true,
1416 .defsize = -1,
1417 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1418 .emul = &x86_emul_mov
1419 },
1420 {
1421 /* Ob, AL */
1422 .byte = 0xA2,
1423 .dmo = true,
1424 .todmo = true,
1425 .szoverride = false,
1426 .defsize = OPSIZE_BYTE,
1427 .allsize = -1,
1428 .emul = &x86_emul_mov
1429 },
1430 {
1431 /* Ov, rAX */
1432 .byte = 0xA3,
1433 .dmo = true,
1434 .todmo = true,
1435 .szoverride = true,
1436 .defsize = -1,
1437 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1438 .emul = &x86_emul_mov
1439 },
1440
1441 /*
1442 * MOVS
1443 */
1444 {
1445 /* Yb, Xb */
1446 .byte = 0xA4,
1447 .movs = true,
1448 .szoverride = false,
1449 .defsize = OPSIZE_BYTE,
1450 .allsize = -1,
1451 .emul = &x86_emul_movs
1452 },
1453 {
1454 /* Yv, Xv */
1455 .byte = 0xA5,
1456 .movs = true,
1457 .szoverride = true,
1458 .defsize = -1,
1459 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1460 .emul = &x86_emul_movs
1461 },
1462
1463 /*
1464 * STOS
1465 */
1466 {
1467 /* Yb, AL */
1468 .byte = 0xAA,
1469 .stos = true,
1470 .szoverride = false,
1471 .defsize = OPSIZE_BYTE,
1472 .allsize = -1,
1473 .emul = &x86_emul_stos
1474 },
1475 {
1476 /* Yv, rAX */
1477 .byte = 0xAB,
1478 .stos = true,
1479 .szoverride = true,
1480 .defsize = -1,
1481 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1482 .emul = &x86_emul_stos
1483 },
1484
1485 /*
1486 * LODS
1487 */
1488 {
1489 /* AL, Xb */
1490 .byte = 0xAC,
1491 .lods = true,
1492 .szoverride = false,
1493 .defsize = OPSIZE_BYTE,
1494 .allsize = -1,
1495 .emul = &x86_emul_lods
1496 },
1497 {
1498 /* rAX, Xv */
1499 .byte = 0xAD,
1500 .lods = true,
1501 .szoverride = true,
1502 .defsize = -1,
1503 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1504 .emul = &x86_emul_lods
1505 },
1506 };
1507
1508 static const struct x86_opcode secondary_opcode_table[] = {
1509 /*
1510 * MOVZX
1511 */
1512 {
1513 /* Gv, Eb */
1514 .byte = 0xB6,
1515 .regmodrm = true,
1516 .regtorm = false,
1517 .szoverride = true,
1518 .defsize = OPSIZE_BYTE,
1519 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1520 .flags = FLAG_ze,
1521 .emul = &x86_emul_mov
1522 },
1523 {
1524 /* Gv, Ew */
1525 .byte = 0xB7,
1526 .regmodrm = true,
1527 .regtorm = false,
1528 .szoverride = true,
1529 .defsize = OPSIZE_WORD,
1530 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1531 .flags = FLAG_ze,
1532 .emul = &x86_emul_mov
1533 },
1534 };
1535
1536 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1537
1538 /* [REX-present][enc][opsize] */
1539 static const struct x86_reg gpr_map__special[2][4][8] = {
1540 [false] = {
1541 /* No REX prefix. */
1542 [0b00] = {
1543 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1544 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1545 [2] = { -1, 0 },
1546 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1547 [4] = { -1, 0 },
1548 [5] = { -1, 0 },
1549 [6] = { -1, 0 },
1550 [7] = { -1, 0 },
1551 },
1552 [0b01] = {
1553 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1554 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1555 [2] = { -1, 0 },
1556 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1557 [4] = { -1, 0 },
1558 [5] = { -1, 0 },
1559 [6] = { -1, 0 },
1560 [7] = { -1, 0 },
1561 },
1562 [0b10] = {
1563 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1564 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1565 [2] = { -1, 0 },
1566 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1567 [4] = { -1, 0 },
1568 [5] = { -1, 0 },
1569 [6] = { -1, 0 },
1570 [7] = { -1, 0 },
1571 },
1572 [0b11] = {
1573 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1574 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1575 [2] = { -1, 0 },
1576 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1577 [4] = { -1, 0 },
1578 [5] = { -1, 0 },
1579 [6] = { -1, 0 },
1580 [7] = { -1, 0 },
1581 }
1582 },
1583 [true] = {
1584 /* Has REX prefix. */
1585 [0b00] = {
1586 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1587 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1588 [2] = { -1, 0 },
1589 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1590 [4] = { -1, 0 },
1591 [5] = { -1, 0 },
1592 [6] = { -1, 0 },
1593 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1594 },
1595 [0b01] = {
1596 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1597 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1598 [2] = { -1, 0 },
1599 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1600 [4] = { -1, 0 },
1601 [5] = { -1, 0 },
1602 [6] = { -1, 0 },
1603 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1604 },
1605 [0b10] = {
1606 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1607 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1608 [2] = { -1, 0 },
1609 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1610 [4] = { -1, 0 },
1611 [5] = { -1, 0 },
1612 [6] = { -1, 0 },
1613 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1614 },
1615 [0b11] = {
1616 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1617 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1618 [2] = { -1, 0 },
1619 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1620 [4] = { -1, 0 },
1621 [5] = { -1, 0 },
1622 [6] = { -1, 0 },
1623 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1624 }
1625 }
1626 };
1627
1628 /* [depends][enc][size] */
1629 static const struct x86_reg gpr_map[2][8][8] = {
1630 [false] = {
1631 /* Not extended. */
1632 [0b000] = {
1633 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1634 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1635 [2] = { -1, 0 },
1636 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1637 [4] = { -1, 0 },
1638 [5] = { -1, 0 },
1639 [6] = { -1, 0 },
1640 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1641 },
1642 [0b001] = {
1643 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1644 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1645 [2] = { -1, 0 },
1646 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1647 [4] = { -1, 0 },
1648 [5] = { -1, 0 },
1649 [6] = { -1, 0 },
1650 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1651 },
1652 [0b010] = {
1653 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1654 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1655 [2] = { -1, 0 },
1656 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1657 [4] = { -1, 0 },
1658 [5] = { -1, 0 },
1659 [6] = { -1, 0 },
1660 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1661 },
1662 [0b011] = {
1663 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1664 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1665 [2] = { -1, 0 },
1666 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1667 [4] = { -1, 0 },
1668 [5] = { -1, 0 },
1669 [6] = { -1, 0 },
1670 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1671 },
1672 [0b100] = {
1673 [0] = { -1, 0 }, /* SPECIAL */
1674 [1] = { -1, 0 }, /* SPECIAL */
1675 [2] = { -1, 0 },
1676 [3] = { -1, 0 }, /* SPECIAL */
1677 [4] = { -1, 0 },
1678 [5] = { -1, 0 },
1679 [6] = { -1, 0 },
1680 [7] = { -1, 0 }, /* SPECIAL */
1681 },
1682 [0b101] = {
1683 [0] = { -1, 0 }, /* SPECIAL */
1684 [1] = { -1, 0 }, /* SPECIAL */
1685 [2] = { -1, 0 },
1686 [3] = { -1, 0 }, /* SPECIAL */
1687 [4] = { -1, 0 },
1688 [5] = { -1, 0 },
1689 [6] = { -1, 0 },
1690 [7] = { -1, 0 }, /* SPECIAL */
1691 },
1692 [0b110] = {
1693 [0] = { -1, 0 }, /* SPECIAL */
1694 [1] = { -1, 0 }, /* SPECIAL */
1695 [2] = { -1, 0 },
1696 [3] = { -1, 0 }, /* SPECIAL */
1697 [4] = { -1, 0 },
1698 [5] = { -1, 0 },
1699 [6] = { -1, 0 },
1700 [7] = { -1, 0 }, /* SPECIAL */
1701 },
1702 [0b111] = {
1703 [0] = { -1, 0 }, /* SPECIAL */
1704 [1] = { -1, 0 }, /* SPECIAL */
1705 [2] = { -1, 0 },
1706 [3] = { -1, 0 }, /* SPECIAL */
1707 [4] = { -1, 0 },
1708 [5] = { -1, 0 },
1709 [6] = { -1, 0 },
1710 [7] = { -1, 0 }, /* SPECIAL */
1711 },
1712 },
1713 [true] = {
1714 /* Extended. */
1715 [0b000] = {
1716 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1717 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1718 [2] = { -1, 0 },
1719 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1720 [4] = { -1, 0 },
1721 [5] = { -1, 0 },
1722 [6] = { -1, 0 },
1723 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1724 },
1725 [0b001] = {
1726 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1727 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1728 [2] = { -1, 0 },
1729 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1730 [4] = { -1, 0 },
1731 [5] = { -1, 0 },
1732 [6] = { -1, 0 },
1733 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1734 },
1735 [0b010] = {
1736 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1737 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1738 [2] = { -1, 0 },
1739 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1740 [4] = { -1, 0 },
1741 [5] = { -1, 0 },
1742 [6] = { -1, 0 },
1743 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1744 },
1745 [0b011] = {
1746 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1747 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1748 [2] = { -1, 0 },
1749 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1750 [4] = { -1, 0 },
1751 [5] = { -1, 0 },
1752 [6] = { -1, 0 },
1753 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1754 },
1755 [0b100] = {
1756 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1757 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1758 [2] = { -1, 0 },
1759 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1760 [4] = { -1, 0 },
1761 [5] = { -1, 0 },
1762 [6] = { -1, 0 },
1763 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1764 },
1765 [0b101] = {
1766 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1767 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1768 [2] = { -1, 0 },
1769 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1770 [4] = { -1, 0 },
1771 [5] = { -1, 0 },
1772 [6] = { -1, 0 },
1773 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1774 },
1775 [0b110] = {
1776 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1777 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1778 [2] = { -1, 0 },
1779 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1780 [4] = { -1, 0 },
1781 [5] = { -1, 0 },
1782 [6] = { -1, 0 },
1783 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1784 },
1785 [0b111] = {
1786 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1787 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1788 [2] = { -1, 0 },
1789 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1790 [4] = { -1, 0 },
1791 [5] = { -1, 0 },
1792 [6] = { -1, 0 },
1793 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1794 },
1795 }
1796 };
1797
1798 static int
1799 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1800 {
1801 fsm->fn = NULL;
1802 return -1;
1803 }
1804
1805 static int
1806 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1807 {
1808 if (fsm->buf + n > fsm->end) {
1809 return -1;
1810 }
1811 memcpy(bytes, fsm->buf, n);
1812 return 0;
1813 }
1814
1815 static void
1816 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1817 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1818 {
1819 fsm->buf += n;
1820 if (fsm->buf > fsm->end) {
1821 fsm->fn = node_overflow;
1822 } else {
1823 fsm->fn = fn;
1824 }
1825 }
1826
1827 static const struct x86_reg *
1828 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1829 {
1830 enc &= 0b11;
1831 if (regsize == 8) {
1832 /* May be 64bit without REX */
1833 return &gpr_map__special[1][enc][regsize-1];
1834 }
1835 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1836 }
1837
1838 /*
1839 * Special node, for MOVS. Fake two displacements of zero on the source and
1840 * destination registers.
1841 */
1842 static int
1843 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1844 {
1845 size_t adrsize;
1846
1847 adrsize = instr->address_size;
1848
1849 /* DS:RSI */
1850 instr->src.type = STORE_REG;
1851 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1852 instr->src.disp.type = DISP_0;
1853
1854 /* ES:RDI, force ES */
1855 instr->dst.type = STORE_REG;
1856 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1857 instr->dst.disp.type = DISP_0;
1858 instr->dst.hardseg = NVMM_X64_SEG_ES;
1859
1860 fsm_advance(fsm, 0, NULL);
1861
1862 return 0;
1863 }
1864
1865 /*
1866 * Special node, for STOS and LODS. Fake a displacement of zero on the
1867 * destination register.
1868 */
1869 static int
1870 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1871 {
1872 const struct x86_opcode *opcode = instr->opcode;
1873 struct x86_store *stlo, *streg;
1874 size_t adrsize, regsize;
1875
1876 adrsize = instr->address_size;
1877 regsize = instr->operand_size;
1878
1879 if (opcode->stos) {
1880 streg = &instr->src;
1881 stlo = &instr->dst;
1882 } else {
1883 streg = &instr->dst;
1884 stlo = &instr->src;
1885 }
1886
1887 streg->type = STORE_REG;
1888 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1889
1890 stlo->type = STORE_REG;
1891 if (opcode->stos) {
1892 /* ES:RDI, force ES */
1893 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1894 stlo->hardseg = NVMM_X64_SEG_ES;
1895 } else {
1896 /* DS:RSI */
1897 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1898 }
1899 stlo->disp.type = DISP_0;
1900
1901 fsm_advance(fsm, 0, NULL);
1902
1903 return 0;
1904 }
1905
1906 static int
1907 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1908 {
1909 const struct x86_opcode *opcode = instr->opcode;
1910 struct x86_store *stdmo, *streg;
1911 size_t adrsize, regsize;
1912
1913 adrsize = instr->address_size;
1914 regsize = instr->operand_size;
1915
1916 if (opcode->todmo) {
1917 streg = &instr->src;
1918 stdmo = &instr->dst;
1919 } else {
1920 streg = &instr->dst;
1921 stdmo = &instr->src;
1922 }
1923
1924 streg->type = STORE_REG;
1925 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1926
1927 stdmo->type = STORE_DMO;
1928 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1929 return -1;
1930 }
1931 fsm_advance(fsm, adrsize, NULL);
1932
1933 return 0;
1934 }
1935
1936 static inline uint64_t
1937 sign_extend(uint64_t val, int size)
1938 {
1939 if (size == 1) {
1940 if (val & __BIT(7))
1941 val |= 0xFFFFFFFFFFFFFF00;
1942 } else if (size == 2) {
1943 if (val & __BIT(15))
1944 val |= 0xFFFFFFFFFFFF0000;
1945 } else if (size == 4) {
1946 if (val & __BIT(31))
1947 val |= 0xFFFFFFFF00000000;
1948 }
1949 return val;
1950 }
1951
1952 static int
1953 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1954 {
1955 const struct x86_opcode *opcode = instr->opcode;
1956 struct x86_store *store;
1957 uint8_t immsize;
1958 size_t sesize = 0;
1959
1960 /* The immediate is the source */
1961 store = &instr->src;
1962 immsize = instr->operand_size;
1963
1964 if (opcode->flags & FLAG_imm8) {
1965 sesize = immsize;
1966 immsize = 1;
1967 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1968 sesize = immsize;
1969 immsize = 4;
1970 }
1971
1972 store->type = STORE_IMM;
1973 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1974 return -1;
1975 }
1976 fsm_advance(fsm, immsize, NULL);
1977
1978 if (sesize != 0) {
1979 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1980 }
1981
1982 return 0;
1983 }
1984
1985 static int
1986 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1987 {
1988 const struct x86_opcode *opcode = instr->opcode;
1989 uint64_t data = 0;
1990 size_t n;
1991
1992 if (instr->strm->disp.type == DISP_1) {
1993 n = 1;
1994 } else { /* DISP4 */
1995 n = 4;
1996 }
1997
1998 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1999 return -1;
2000 }
2001
2002 if (__predict_true(fsm->is64bit)) {
2003 data = sign_extend(data, n);
2004 }
2005
2006 instr->strm->disp.data = data;
2007
2008 if (opcode->immediate) {
2009 fsm_advance(fsm, n, node_immediate);
2010 } else {
2011 fsm_advance(fsm, n, NULL);
2012 }
2013
2014 return 0;
2015 }
2016
2017 static const struct x86_reg *
2018 get_register_idx(struct x86_instr *instr, uint8_t index)
2019 {
2020 uint8_t enc = index;
2021 const struct x86_reg *reg;
2022 size_t regsize;
2023
2024 regsize = instr->address_size;
2025 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
2026
2027 if (reg->num == -1) {
2028 reg = resolve_special_register(instr, enc, regsize);
2029 }
2030
2031 return reg;
2032 }
2033
2034 static const struct x86_reg *
2035 get_register_bas(struct x86_instr *instr, uint8_t base)
2036 {
2037 uint8_t enc = base;
2038 const struct x86_reg *reg;
2039 size_t regsize;
2040
2041 regsize = instr->address_size;
2042 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2043 if (reg->num == -1) {
2044 reg = resolve_special_register(instr, enc, regsize);
2045 }
2046
2047 return reg;
2048 }
2049
2050 static int
2051 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2052 {
2053 const struct x86_opcode *opcode;
2054 uint8_t scale, index, base;
2055 bool noindex, nobase;
2056 uint8_t byte;
2057
2058 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2059 return -1;
2060 }
2061
2062 scale = ((byte & 0b11000000) >> 6);
2063 index = ((byte & 0b00111000) >> 3);
2064 base = ((byte & 0b00000111) >> 0);
2065
2066 opcode = instr->opcode;
2067
2068 noindex = false;
2069 nobase = false;
2070
2071 if (index == 0b100 && !instr->rexpref.x) {
2072 /* Special case: the index is null */
2073 noindex = true;
2074 }
2075
2076 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2077 /* Special case: the base is null + disp32 */
2078 instr->strm->disp.type = DISP_4;
2079 nobase = true;
2080 }
2081
2082 instr->strm->type = STORE_SIB;
2083 instr->strm->u.sib.scale = (1 << scale);
2084 if (!noindex)
2085 instr->strm->u.sib.idx = get_register_idx(instr, index);
2086 if (!nobase)
2087 instr->strm->u.sib.bas = get_register_bas(instr, base);
2088
2089 /* May have a displacement, or an immediate */
2090 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
2091 fsm_advance(fsm, 1, node_disp);
2092 } else if (opcode->immediate) {
2093 fsm_advance(fsm, 1, node_immediate);
2094 } else {
2095 fsm_advance(fsm, 1, NULL);
2096 }
2097
2098 return 0;
2099 }
2100
2101 static const struct x86_reg *
2102 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2103 {
2104 uint8_t enc = instr->regmodrm.reg;
2105 const struct x86_reg *reg;
2106 size_t regsize;
2107
2108 regsize = instr->operand_size;
2109
2110 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2111 if (reg->num == -1) {
2112 reg = resolve_special_register(instr, enc, regsize);
2113 }
2114
2115 return reg;
2116 }
2117
2118 static const struct x86_reg *
2119 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2120 {
2121 uint8_t enc = instr->regmodrm.rm;
2122 const struct x86_reg *reg;
2123 size_t regsize;
2124
2125 if (instr->strm->disp.type == DISP_NONE) {
2126 regsize = instr->operand_size;
2127 } else {
2128 /* Indirect access, the size is that of the address. */
2129 regsize = instr->address_size;
2130 }
2131
2132 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2133 if (reg->num == -1) {
2134 reg = resolve_special_register(instr, enc, regsize);
2135 }
2136
2137 return reg;
2138 }
2139
2140 static inline bool
2141 has_sib(struct x86_instr *instr)
2142 {
2143 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
2144 }
2145
2146 static inline bool
2147 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2148 {
2149 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2150 instr->regmodrm.rm == RM_RBP_DISP32);
2151 }
2152
2153 static inline bool
2154 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2155 {
2156 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2157 instr->regmodrm.rm == RM_RBP_DISP32);
2158 }
2159
2160 static enum x86_disp_type
2161 get_disp_type(struct x86_instr *instr)
2162 {
2163 switch (instr->regmodrm.mod) {
2164 case MOD_DIS0: /* indirect */
2165 return DISP_0;
2166 case MOD_DIS1: /* indirect+1 */
2167 return DISP_1;
2168 case MOD_DIS4: /* indirect+4 */
2169 return DISP_4;
2170 case MOD_REG: /* direct */
2171 default: /* gcc */
2172 return DISP_NONE;
2173 }
2174 }
2175
2176 static int
2177 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2178 {
2179 struct x86_store *strg, *strm;
2180 const struct x86_opcode *opcode;
2181 const struct x86_reg *reg;
2182 uint8_t byte;
2183
2184 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2185 return -1;
2186 }
2187
2188 opcode = instr->opcode;
2189
2190 instr->regmodrm.present = true;
2191 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2192 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2193 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2194
2195 if (opcode->regtorm) {
2196 strg = &instr->src;
2197 strm = &instr->dst;
2198 } else { /* RM to REG */
2199 strm = &instr->src;
2200 strg = &instr->dst;
2201 }
2202
2203 /* Save for later use. */
2204 instr->strm = strm;
2205
2206 /*
2207 * Special cases: Groups. The REG field of REGMODRM is the index in
2208 * the group. op1 gets overwritten in the Immediate node, if any.
2209 */
2210 if (opcode->group1) {
2211 if (group1[instr->regmodrm.reg].emul == NULL) {
2212 return -1;
2213 }
2214 instr->emul = group1[instr->regmodrm.reg].emul;
2215 } else if (opcode->group3) {
2216 if (group3[instr->regmodrm.reg].emul == NULL) {
2217 return -1;
2218 }
2219 instr->emul = group3[instr->regmodrm.reg].emul;
2220 } else if (opcode->group11) {
2221 if (group11[instr->regmodrm.reg].emul == NULL) {
2222 return -1;
2223 }
2224 instr->emul = group11[instr->regmodrm.reg].emul;
2225 }
2226
2227 if (!opcode->immediate) {
2228 reg = get_register_reg(instr, opcode);
2229 if (reg == NULL) {
2230 return -1;
2231 }
2232 strg->type = STORE_REG;
2233 strg->u.reg = reg;
2234 }
2235
2236 if (has_sib(instr)) {
2237 /* Overwrites RM */
2238 fsm_advance(fsm, 1, node_sib);
2239 return 0;
2240 }
2241
2242 /* The displacement applies to RM. */
2243 strm->disp.type = get_disp_type(instr);
2244
2245 if (is_rip_relative(fsm, instr)) {
2246 /* Overwrites RM */
2247 strm->type = STORE_REG;
2248 strm->u.reg = &gpr_map__rip;
2249 strm->disp.type = DISP_4;
2250 fsm_advance(fsm, 1, node_disp);
2251 return 0;
2252 }
2253
2254 if (is_disp32_only(fsm, instr)) {
2255 /* Overwrites RM */
2256 strm->type = STORE_REG;
2257 strm->u.reg = NULL;
2258 strm->disp.type = DISP_4;
2259 fsm_advance(fsm, 1, node_disp);
2260 return 0;
2261 }
2262
2263 reg = get_register_rm(instr, opcode);
2264 if (reg == NULL) {
2265 return -1;
2266 }
2267 strm->type = STORE_REG;
2268 strm->u.reg = reg;
2269
2270 if (strm->disp.type == DISP_NONE) {
2271 /* Direct register addressing mode */
2272 if (opcode->immediate) {
2273 fsm_advance(fsm, 1, node_immediate);
2274 } else {
2275 fsm_advance(fsm, 1, NULL);
2276 }
2277 } else if (strm->disp.type == DISP_0) {
2278 /* Indirect register addressing mode */
2279 if (opcode->immediate) {
2280 fsm_advance(fsm, 1, node_immediate);
2281 } else {
2282 fsm_advance(fsm, 1, NULL);
2283 }
2284 } else {
2285 fsm_advance(fsm, 1, node_disp);
2286 }
2287
2288 return 0;
2289 }
2290
2291 static size_t
2292 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2293 {
2294 const struct x86_opcode *opcode = instr->opcode;
2295 int opsize;
2296
2297 /* Get the opsize */
2298 if (!opcode->szoverride) {
2299 opsize = opcode->defsize;
2300 } else if (instr->rexpref.present && instr->rexpref.w) {
2301 opsize = 8;
2302 } else {
2303 if (!fsm->is16bit) {
2304 if (instr->legpref.opr_ovr) {
2305 opsize = 2;
2306 } else {
2307 opsize = 4;
2308 }
2309 } else { /* 16bit */
2310 if (instr->legpref.opr_ovr) {
2311 opsize = 4;
2312 } else {
2313 opsize = 2;
2314 }
2315 }
2316 }
2317
2318 /* See if available */
2319 if ((opcode->allsize & opsize) == 0) {
2320 // XXX do we care?
2321 }
2322
2323 return opsize;
2324 }
2325
2326 static size_t
2327 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2328 {
2329 if (fsm->is64bit) {
2330 if (__predict_false(instr->legpref.adr_ovr)) {
2331 return 4;
2332 }
2333 return 8;
2334 }
2335
2336 if (fsm->is32bit) {
2337 if (__predict_false(instr->legpref.adr_ovr)) {
2338 return 2;
2339 }
2340 return 4;
2341 }
2342
2343 /* 16bit. */
2344 if (__predict_false(instr->legpref.adr_ovr)) {
2345 return 4;
2346 }
2347 return 2;
2348 }
2349
2350 static int
2351 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2352 {
2353 const struct x86_opcode *opcode;
2354 uint8_t byte;
2355 size_t i, n;
2356
2357 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2358 return -1;
2359 }
2360
2361 n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
2362 for (i = 0; i < n; i++) {
2363 if (primary_opcode_table[i].byte == byte)
2364 break;
2365 }
2366 if (i == n) {
2367 return -1;
2368 }
2369 opcode = &primary_opcode_table[i];
2370
2371 instr->opcode = opcode;
2372 instr->emul = opcode->emul;
2373 instr->operand_size = get_operand_size(fsm, instr);
2374 instr->address_size = get_address_size(fsm, instr);
2375
2376 if (fsm->is64bit && (instr->operand_size == 4)) {
2377 /* Zero-extend to 64 bits. */
2378 instr->zeroextend_mask = ~size_to_mask(4);
2379 }
2380
2381 if (opcode->regmodrm) {
2382 fsm_advance(fsm, 1, node_regmodrm);
2383 } else if (opcode->dmo) {
2384 /* Direct-Memory Offsets */
2385 fsm_advance(fsm, 1, node_dmo);
2386 } else if (opcode->stos || opcode->lods) {
2387 fsm_advance(fsm, 1, node_stlo);
2388 } else if (opcode->movs) {
2389 fsm_advance(fsm, 1, node_movs);
2390 } else {
2391 return -1;
2392 }
2393
2394 return 0;
2395 }
2396
2397 static int
2398 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2399 {
2400 const struct x86_opcode *opcode;
2401 uint8_t byte;
2402 size_t i, n;
2403
2404 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2405 return -1;
2406 }
2407
2408 n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
2409 for (i = 0; i < n; i++) {
2410 if (secondary_opcode_table[i].byte == byte)
2411 break;
2412 }
2413 if (i == n) {
2414 return -1;
2415 }
2416 opcode = &secondary_opcode_table[i];
2417
2418 instr->opcode = opcode;
2419 instr->emul = opcode->emul;
2420 instr->operand_size = get_operand_size(fsm, instr);
2421 instr->address_size = get_address_size(fsm, instr);
2422
2423 if (fsm->is64bit && (instr->operand_size == 4)) {
2424 /* Zero-extend to 64 bits. */
2425 instr->zeroextend_mask = ~size_to_mask(4);
2426 }
2427
2428 if (opcode->flags & FLAG_ze) {
2429 /*
2430 * Compute the mask for zero-extend. Update the operand size,
2431 * we move fewer bytes.
2432 */
2433 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2434 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2435 instr->operand_size = opcode->defsize;
2436 }
2437
2438 if (opcode->regmodrm) {
2439 fsm_advance(fsm, 1, node_regmodrm);
2440 } else {
2441 return -1;
2442 }
2443
2444 return 0;
2445 }
2446
2447 static int
2448 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2449 {
2450 uint8_t byte;
2451
2452 #define ESCAPE 0x0F
2453 #define VEX_1 0xC5
2454 #define VEX_2 0xC4
2455 #define XOP 0x8F
2456
2457 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2458 return -1;
2459 }
2460
2461 /*
2462 * We don't take XOP. It is AMD-specific, and it was removed shortly
2463 * after being introduced.
2464 */
2465 if (byte == ESCAPE) {
2466 fsm_advance(fsm, 1, node_secondary_opcode);
2467 } else if (!instr->rexpref.present) {
2468 if (byte == VEX_1) {
2469 return -1;
2470 } else if (byte == VEX_2) {
2471 return -1;
2472 } else {
2473 fsm->fn = node_primary_opcode;
2474 }
2475 } else {
2476 fsm->fn = node_primary_opcode;
2477 }
2478
2479 return 0;
2480 }
2481
2482 static int
2483 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2484 {
2485 struct x86_rexpref *rexpref = &instr->rexpref;
2486 uint8_t byte;
2487 size_t n = 0;
2488
2489 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2490 return -1;
2491 }
2492
2493 if (byte >= 0x40 && byte <= 0x4F) {
2494 if (__predict_false(!fsm->is64bit)) {
2495 return -1;
2496 }
2497 rexpref->present = true;
2498 rexpref->w = ((byte & 0x8) != 0);
2499 rexpref->r = ((byte & 0x4) != 0);
2500 rexpref->x = ((byte & 0x2) != 0);
2501 rexpref->b = ((byte & 0x1) != 0);
2502 n = 1;
2503 }
2504
2505 fsm_advance(fsm, n, node_main);
2506 return 0;
2507 }
2508
2509 static int
2510 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2511 {
2512 uint8_t byte;
2513
2514 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2515 return -1;
2516 }
2517
2518 if (byte == LEG_OPR_OVR) {
2519 instr->legpref.opr_ovr = 1;
2520 } else if (byte == LEG_OVR_DS) {
2521 instr->legpref.seg = NVMM_X64_SEG_DS;
2522 } else if (byte == LEG_OVR_ES) {
2523 instr->legpref.seg = NVMM_X64_SEG_ES;
2524 } else if (byte == LEG_REP) {
2525 instr->legpref.rep = 1;
2526 } else if (byte == LEG_OVR_GS) {
2527 instr->legpref.seg = NVMM_X64_SEG_GS;
2528 } else if (byte == LEG_OVR_FS) {
2529 instr->legpref.seg = NVMM_X64_SEG_FS;
2530 } else if (byte == LEG_ADR_OVR) {
2531 instr->legpref.adr_ovr = 1;
2532 } else if (byte == LEG_OVR_CS) {
2533 instr->legpref.seg = NVMM_X64_SEG_CS;
2534 } else if (byte == LEG_OVR_SS) {
2535 instr->legpref.seg = NVMM_X64_SEG_SS;
2536 } else if (byte == LEG_REPN) {
2537 instr->legpref.repn = 1;
2538 } else if (byte == LEG_LOCK) {
2539 /* ignore */
2540 } else {
2541 /* not a legacy prefix */
2542 fsm_advance(fsm, 0, node_rex_prefix);
2543 return 0;
2544 }
2545
2546 fsm_advance(fsm, 1, node_legacy_prefix);
2547 return 0;
2548 }
2549
2550 static int
2551 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2552 struct nvmm_x64_state *state)
2553 {
2554 struct x86_decode_fsm fsm;
2555 int ret;
2556
2557 memset(instr, 0, sizeof(*instr));
2558 instr->legpref.seg = -1;
2559
2560 fsm.is64bit = is_64bit(state);
2561 fsm.is32bit = is_32bit(state);
2562 fsm.is16bit = is_16bit(state);
2563
2564 fsm.fn = node_legacy_prefix;
2565 fsm.buf = inst_bytes;
2566 fsm.end = inst_bytes + inst_len;
2567
2568 while (fsm.fn != NULL) {
2569 ret = (*fsm.fn)(&fsm, instr);
2570 if (ret == -1)
2571 return -1;
2572 }
2573
2574 instr->len = fsm.buf - inst_bytes;
2575
2576 return 0;
2577 }
2578
2579 /* -------------------------------------------------------------------------- */
2580
2581 #define EXEC_INSTR(sz, instr) \
2582 static uint##sz##_t \
2583 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
2584 { \
2585 uint##sz##_t res; \
2586 __asm __volatile ( \
2587 #instr " %2, %3;" \
2588 "mov %3, %1;" \
2589 "pushfq;" \
2590 "popq %0" \
2591 : "=r" (*rflags), "=r" (res) \
2592 : "r" (op1), "r" (op2)); \
2593 return res; \
2594 }
2595
2596 #define EXEC_DISPATCHER(instr) \
2597 static uint64_t \
2598 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2599 { \
2600 switch (opsize) { \
2601 case 1: \
2602 return exec_##instr##8(op1, op2, rflags); \
2603 case 2: \
2604 return exec_##instr##16(op1, op2, rflags); \
2605 case 4: \
2606 return exec_##instr##32(op1, op2, rflags); \
2607 default: \
2608 return exec_##instr##64(op1, op2, rflags); \
2609 } \
2610 }
2611
2612 /* SUB: ret = op1 - op2 */
2613 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2614 EXEC_INSTR(8, sub)
2615 EXEC_INSTR(16, sub)
2616 EXEC_INSTR(32, sub)
2617 EXEC_INSTR(64, sub)
2618 EXEC_DISPATCHER(sub)
2619
2620 /* OR: ret = op1 | op2 */
2621 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2622 EXEC_INSTR(8, or)
2623 EXEC_INSTR(16, or)
2624 EXEC_INSTR(32, or)
2625 EXEC_INSTR(64, or)
2626 EXEC_DISPATCHER(or)
2627
2628 /* AND: ret = op1 & op2 */
2629 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2630 EXEC_INSTR(8, and)
2631 EXEC_INSTR(16, and)
2632 EXEC_INSTR(32, and)
2633 EXEC_INSTR(64, and)
2634 EXEC_DISPATCHER(and)
2635
2636 /* XOR: ret = op1 ^ op2 */
2637 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2638 EXEC_INSTR(8, xor)
2639 EXEC_INSTR(16, xor)
2640 EXEC_INSTR(32, xor)
2641 EXEC_INSTR(64, xor)
2642 EXEC_DISPATCHER(xor)
2643
2644 /* -------------------------------------------------------------------------- */
2645
2646 /*
2647 * Emulation functions. We don't care about the order of the operands, except
2648 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
2649 * is op1 and who is op2.
2650 */
2651
2652 static void
2653 x86_func_or(struct nvmm_mem *mem, uint64_t *gprs)
2654 {
2655 uint64_t *retval = (uint64_t *)mem->data;
2656 const bool write = mem->write;
2657 uint64_t *op1, op2, fl, ret;
2658
2659 op1 = (uint64_t *)mem->data;
2660 op2 = 0;
2661
2662 /* Fetch the value to be OR'ed (op2). */
2663 mem->data = (uint8_t *)&op2;
2664 mem->write = false;
2665 (*__callbacks.mem)(mem);
2666
2667 /* Perform the OR. */
2668 ret = exec_or(*op1, op2, &fl, mem->size);
2669
2670 if (write) {
2671 /* Write back the result. */
2672 mem->data = (uint8_t *)&ret;
2673 mem->write = true;
2674 (*__callbacks.mem)(mem);
2675 } else {
2676 /* Return data to the caller. */
2677 *retval = ret;
2678 }
2679
2680 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2681 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2682 }
2683
2684 static void
2685 x86_func_and(struct nvmm_mem *mem, uint64_t *gprs)
2686 {
2687 uint64_t *retval = (uint64_t *)mem->data;
2688 const bool write = mem->write;
2689 uint64_t *op1, op2, fl, ret;
2690
2691 op1 = (uint64_t *)mem->data;
2692 op2 = 0;
2693
2694 /* Fetch the value to be AND'ed (op2). */
2695 mem->data = (uint8_t *)&op2;
2696 mem->write = false;
2697 (*__callbacks.mem)(mem);
2698
2699 /* Perform the AND. */
2700 ret = exec_and(*op1, op2, &fl, mem->size);
2701
2702 if (write) {
2703 /* Write back the result. */
2704 mem->data = (uint8_t *)&ret;
2705 mem->write = true;
2706 (*__callbacks.mem)(mem);
2707 } else {
2708 /* Return data to the caller. */
2709 *retval = ret;
2710 }
2711
2712 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2713 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2714 }
2715
2716 static void
2717 x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs)
2718 {
2719 uint64_t *retval = (uint64_t *)mem->data;
2720 const bool write = mem->write;
2721 uint64_t *op1, *op2, fl, ret;
2722 uint64_t tmp;
2723 bool memop1;
2724
2725 memop1 = !mem->write;
2726 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2727 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2728
2729 /* Fetch the value to be SUB'ed (op1 or op2). */
2730 mem->data = (uint8_t *)&tmp;
2731 mem->write = false;
2732 (*__callbacks.mem)(mem);
2733
2734 /* Perform the SUB. */
2735 ret = exec_sub(*op1, *op2, &fl, mem->size);
2736
2737 if (write) {
2738 /* Write back the result. */
2739 mem->data = (uint8_t *)&ret;
2740 mem->write = true;
2741 (*__callbacks.mem)(mem);
2742 } else {
2743 /* Return data to the caller. */
2744 *retval = ret;
2745 }
2746
2747 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2748 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2749 }
2750
2751 static void
2752 x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs)
2753 {
2754 uint64_t *retval = (uint64_t *)mem->data;
2755 const bool write = mem->write;
2756 uint64_t *op1, op2, fl, ret;
2757
2758 op1 = (uint64_t *)mem->data;
2759 op2 = 0;
2760
2761 /* Fetch the value to be XOR'ed (op2). */
2762 mem->data = (uint8_t *)&op2;
2763 mem->write = false;
2764 (*__callbacks.mem)(mem);
2765
2766 /* Perform the XOR. */
2767 ret = exec_xor(*op1, op2, &fl, mem->size);
2768
2769 if (write) {
2770 /* Write back the result. */
2771 mem->data = (uint8_t *)&ret;
2772 mem->write = true;
2773 (*__callbacks.mem)(mem);
2774 } else {
2775 /* Return data to the caller. */
2776 *retval = ret;
2777 }
2778
2779 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2780 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2781 }
2782
2783 static void
2784 x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs)
2785 {
2786 uint64_t *op1, *op2, fl;
2787 uint64_t tmp;
2788 bool memop1;
2789
2790 memop1 = !mem->write;
2791 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2792 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2793
2794 /* Fetch the value to be CMP'ed (op1 or op2). */
2795 mem->data = (uint8_t *)&tmp;
2796 mem->write = false;
2797 (*__callbacks.mem)(mem);
2798
2799 /* Perform the CMP. */
2800 exec_sub(*op1, *op2, &fl, mem->size);
2801
2802 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2803 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2804 }
2805
2806 static void
2807 x86_func_test(struct nvmm_mem *mem, uint64_t *gprs)
2808 {
2809 uint64_t *op1, *op2, fl;
2810 uint64_t tmp;
2811 bool memop1;
2812
2813 memop1 = !mem->write;
2814 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2815 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2816
2817 /* Fetch the value to be TEST'ed (op1 or op2). */
2818 mem->data = (uint8_t *)&tmp;
2819 mem->write = false;
2820 (*__callbacks.mem)(mem);
2821
2822 /* Perform the TEST. */
2823 exec_and(*op1, *op2, &fl, mem->size);
2824
2825 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2826 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2827 }
2828
2829 static void
2830 x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs)
2831 {
2832 /*
2833 * Nothing special, just move without emulation.
2834 */
2835 (*__callbacks.mem)(mem);
2836 }
2837
2838 static void
2839 x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs)
2840 {
2841 /*
2842 * Just move, and update RDI.
2843 */
2844 (*__callbacks.mem)(mem);
2845
2846 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2847 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2848 } else {
2849 gprs[NVMM_X64_GPR_RDI] += mem->size;
2850 }
2851 }
2852
2853 static void
2854 x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs)
2855 {
2856 /*
2857 * Just move, and update RSI.
2858 */
2859 (*__callbacks.mem)(mem);
2860
2861 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2862 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2863 } else {
2864 gprs[NVMM_X64_GPR_RSI] += mem->size;
2865 }
2866 }
2867
2868 static void
2869 x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs)
2870 {
2871 /*
2872 * Special instruction: double memory operand. Don't call the cb,
2873 * because the storage has already been performed earlier.
2874 */
2875
2876 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2877 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2878 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2879 } else {
2880 gprs[NVMM_X64_GPR_RSI] += mem->size;
2881 gprs[NVMM_X64_GPR_RDI] += mem->size;
2882 }
2883 }
2884
2885 /* -------------------------------------------------------------------------- */
2886
2887 static inline uint64_t
2888 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2889 {
2890 uint64_t val;
2891
2892 val = state->gprs[gpr];
2893 val &= size_to_mask(instr->address_size);
2894
2895 return val;
2896 }
2897
2898 static int
2899 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2900 struct x86_store *store, gvaddr_t *gvap, size_t size)
2901 {
2902 struct x86_sib *sib;
2903 gvaddr_t gva = 0;
2904 uint64_t reg;
2905 int ret, seg;
2906
2907 if (store->type == STORE_SIB) {
2908 sib = &store->u.sib;
2909 if (sib->bas != NULL)
2910 gva += gpr_read_address(instr, state, sib->bas->num);
2911 if (sib->idx != NULL) {
2912 reg = gpr_read_address(instr, state, sib->idx->num);
2913 gva += sib->scale * reg;
2914 }
2915 } else if (store->type == STORE_REG) {
2916 if (store->u.reg == NULL) {
2917 /* The base is null. Happens with disp32-only. */
2918 } else {
2919 gva = gpr_read_address(instr, state, store->u.reg->num);
2920 }
2921 } else {
2922 gva = store->u.dmo;
2923 }
2924
2925 if (store->disp.type != DISP_NONE) {
2926 gva += store->disp.data;
2927 }
2928
2929 if (store->hardseg != 0) {
2930 seg = store->hardseg;
2931 } else {
2932 if (__predict_false(instr->legpref.seg != -1)) {
2933 seg = instr->legpref.seg;
2934 } else {
2935 seg = NVMM_X64_SEG_DS;
2936 }
2937 }
2938
2939 if (__predict_true(is_long_mode(state))) {
2940 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2941 segment_apply(&state->segs[seg], &gva);
2942 }
2943 } else {
2944 ret = segment_check(&state->segs[seg], gva, size);
2945 if (ret == -1)
2946 return -1;
2947 segment_apply(&state->segs[seg], &gva);
2948 }
2949
2950 *gvap = gva;
2951 return 0;
2952 }
2953
2954 static int
2955 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2956 {
2957 uint8_t inst_bytes[5], byte;
2958 size_t i, fetchsize;
2959 gvaddr_t gva;
2960 int ret, seg;
2961
2962 fetchsize = sizeof(inst_bytes);
2963
2964 gva = state->gprs[NVMM_X64_GPR_RIP];
2965 if (__predict_false(!is_long_mode(state))) {
2966 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2967 fetchsize);
2968 if (ret == -1)
2969 return -1;
2970 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2971 }
2972
2973 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2974 if (ret == -1)
2975 return -1;
2976
2977 seg = NVMM_X64_SEG_DS;
2978 for (i = 0; i < fetchsize; i++) {
2979 byte = inst_bytes[i];
2980
2981 if (byte == LEG_OVR_DS) {
2982 seg = NVMM_X64_SEG_DS;
2983 } else if (byte == LEG_OVR_ES) {
2984 seg = NVMM_X64_SEG_ES;
2985 } else if (byte == LEG_OVR_GS) {
2986 seg = NVMM_X64_SEG_GS;
2987 } else if (byte == LEG_OVR_FS) {
2988 seg = NVMM_X64_SEG_FS;
2989 } else if (byte == LEG_OVR_CS) {
2990 seg = NVMM_X64_SEG_CS;
2991 } else if (byte == LEG_OVR_SS) {
2992 seg = NVMM_X64_SEG_SS;
2993 } else if (byte == LEG_OPR_OVR) {
2994 /* nothing */
2995 } else if (byte == LEG_ADR_OVR) {
2996 /* nothing */
2997 } else if (byte == LEG_REP) {
2998 /* nothing */
2999 } else if (byte == LEG_REPN) {
3000 /* nothing */
3001 } else if (byte == LEG_LOCK) {
3002 /* nothing */
3003 } else {
3004 return seg;
3005 }
3006 }
3007
3008 return seg;
3009 }
3010
3011 static int
3012 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3013 struct nvmm_exit *exit)
3014 {
3015 size_t fetchsize;
3016 gvaddr_t gva;
3017 int ret;
3018
3019 fetchsize = sizeof(exit->u.mem.inst_bytes);
3020
3021 gva = state->gprs[NVMM_X64_GPR_RIP];
3022 if (__predict_false(!is_long_mode(state))) {
3023 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3024 fetchsize);
3025 if (ret == -1)
3026 return -1;
3027 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3028 }
3029
3030 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
3031 fetchsize);
3032 if (ret == -1)
3033 return -1;
3034
3035 exit->u.mem.inst_len = fetchsize;
3036
3037 return 0;
3038 }
3039
3040 static int
3041 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3042 struct x86_instr *instr)
3043 {
3044 struct nvmm_mem mem;
3045 uint8_t data[8];
3046 gvaddr_t gva;
3047 size_t size;
3048 int ret;
3049
3050 size = instr->operand_size;
3051
3052 /* Source. */
3053 ret = store_to_gva(state, instr, &instr->src, &gva, size);
3054 if (ret == -1)
3055 return -1;
3056 ret = read_guest_memory(mach, state, gva, data, size);
3057 if (ret == -1)
3058 return -1;
3059
3060 /* Destination. */
3061 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3062 if (ret == -1)
3063 return -1;
3064 ret = write_guest_memory(mach, state, gva, data, size);
3065 if (ret == -1)
3066 return -1;
3067
3068 mem.size = size;
3069 (*instr->emul->func)(&mem, state->gprs);
3070
3071 return 0;
3072 }
3073
3074 #define DISASSEMBLER_BUG() \
3075 do { \
3076 errno = EINVAL; \
3077 return -1; \
3078 } while (0);
3079
3080 static int
3081 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3082 struct x86_instr *instr, struct nvmm_exit *exit)
3083 {
3084 struct nvmm_mem mem;
3085 uint8_t membuf[8];
3086 uint64_t val;
3087
3088 memset(membuf, 0, sizeof(membuf));
3089
3090 mem.gpa = exit->u.mem.gpa;
3091 mem.size = instr->operand_size;
3092 mem.data = membuf;
3093
3094 /* Determine the direction. */
3095 switch (instr->src.type) {
3096 case STORE_REG:
3097 if (instr->src.disp.type != DISP_NONE) {
3098 /* Indirect access. */
3099 mem.write = false;
3100 } else {
3101 /* Direct access. */
3102 mem.write = true;
3103 }
3104 break;
3105 case STORE_IMM:
3106 mem.write = true;
3107 break;
3108 case STORE_SIB:
3109 mem.write = false;
3110 break;
3111 case STORE_DMO:
3112 mem.write = false;
3113 break;
3114 default:
3115 DISASSEMBLER_BUG();
3116 }
3117
3118 if (mem.write) {
3119 switch (instr->src.type) {
3120 case STORE_REG:
3121 if (instr->src.disp.type != DISP_NONE) {
3122 DISASSEMBLER_BUG();
3123 }
3124 val = state->gprs[instr->src.u.reg->num];
3125 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3126 memcpy(mem.data, &val, mem.size);
3127 break;
3128 case STORE_IMM:
3129 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3130 break;
3131 default:
3132 DISASSEMBLER_BUG();
3133 }
3134 } else if (instr->emul->read) {
3135 if (instr->dst.type != STORE_REG) {
3136 DISASSEMBLER_BUG();
3137 }
3138 if (instr->dst.disp.type != DISP_NONE) {
3139 DISASSEMBLER_BUG();
3140 }
3141 val = state->gprs[instr->dst.u.reg->num];
3142 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3143 memcpy(mem.data, &val, mem.size);
3144 }
3145
3146 (*instr->emul->func)(&mem, state->gprs);
3147
3148 if (!instr->emul->notouch && !mem.write) {
3149 if (instr->dst.type != STORE_REG) {
3150 DISASSEMBLER_BUG();
3151 }
3152 memcpy(&val, membuf, sizeof(uint64_t));
3153 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3154 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3155 state->gprs[instr->dst.u.reg->num] |= val;
3156 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3157 }
3158
3159 return 0;
3160 }
3161
3162 int
3163 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
3164 struct nvmm_exit *exit)
3165 {
3166 struct nvmm_x64_state state;
3167 struct x86_instr instr;
3168 uint64_t cnt = 0; /* GCC */
3169 int ret;
3170
3171 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3172 errno = EINVAL;
3173 return -1;
3174 }
3175
3176 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
3177 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3178 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3179 if (ret == -1)
3180 return -1;
3181
3182 if (exit->u.mem.inst_len == 0) {
3183 /*
3184 * The instruction was not fetched from the kernel. Fetch
3185 * it ourselves.
3186 */
3187 ret = fetch_instruction(mach, &state, exit);
3188 if (ret == -1)
3189 return -1;
3190 }
3191
3192 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3193 &instr, &state);
3194 if (ret == -1) {
3195 errno = ENODEV;
3196 return -1;
3197 }
3198
3199 if (instr.legpref.rep || instr.legpref.repn) {
3200 cnt = rep_get_cnt(&state, instr.address_size);
3201 if (__predict_false(cnt == 0)) {
3202 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3203 goto out;
3204 }
3205 }
3206
3207 if (instr.opcode->movs) {
3208 ret = assist_mem_double(mach, &state, &instr);
3209 } else {
3210 ret = assist_mem_single(mach, &state, &instr, exit);
3211 }
3212 if (ret == -1) {
3213 errno = ENODEV;
3214 return -1;
3215 }
3216
3217 if (instr.legpref.rep || instr.legpref.repn) {
3218 cnt -= 1;
3219 rep_set_cnt(&state, instr.address_size, cnt);
3220 if (cnt == 0) {
3221 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3222 } else if (__predict_false(instr.legpref.repn)) {
3223 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3224 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3225 }
3226 }
3227 } else {
3228 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3229 }
3230
3231 out:
3232 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
3233 if (ret == -1)
3234 return -1;
3235
3236 return 0;
3237 }
3238