libnvmm_x86.c revision 1.19 1 /* $NetBSD: libnvmm_x86.c,v 1.19 2019/02/07 10:58:45 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #include "nvmm.h"
47
48 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
49
50 #include <x86/specialreg.h>
51
52 extern struct nvmm_callbacks __callbacks;
53
54 /* -------------------------------------------------------------------------- */
55
56 /*
57 * Undocumented debugging function. Helpful.
58 */
59 int
60 nvmm_vcpu_dump(struct nvmm_machine *mach, nvmm_cpuid_t cpuid)
61 {
62 struct nvmm_x64_state state;
63 size_t i;
64 int ret;
65
66 const char *segnames[] = {
67 "CS", "DS", "ES", "FS", "GS", "SS", "GDT", "IDT", "LDT", "TR"
68 };
69
70 ret = nvmm_vcpu_getstate(mach, cpuid, &state, NVMM_X64_STATE_ALL);
71 if (ret == -1)
72 return -1;
73
74 printf("+ VCPU id=%d\n", (int)cpuid);
75 printf("| -> RIP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RIP]);
76 printf("| -> RSP=%p\n", (void *)state.gprs[NVMM_X64_GPR_RSP]);
77 printf("| -> RAX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RAX]);
78 printf("| -> RBX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RBX]);
79 printf("| -> RCX=%p\n", (void *)state.gprs[NVMM_X64_GPR_RCX]);
80 printf("| -> RFLAGS=%p\n", (void *)state.gprs[NVMM_X64_GPR_RFLAGS]);
81 for (i = 0; i < NVMM_X64_NSEG; i++) {
82 printf("| -> %s: sel=0x%lx base=%p, limit=%p, P=%d, D=%d L=%d\n",
83 segnames[i],
84 state.segs[i].selector,
85 (void *)state.segs[i].base,
86 (void *)state.segs[i].limit,
87 state.segs[i].attrib.p, state.segs[i].attrib.def32,
88 state.segs[i].attrib.lng);
89 }
90 printf("| -> MSR_EFER=%p\n", (void *)state.msrs[NVMM_X64_MSR_EFER]);
91 printf("| -> CR0=%p\n", (void *)state.crs[NVMM_X64_CR_CR0]);
92 printf("| -> CR3=%p\n", (void *)state.crs[NVMM_X64_CR_CR3]);
93 printf("| -> CR4=%p\n", (void *)state.crs[NVMM_X64_CR_CR4]);
94 printf("| -> CR8=%p\n", (void *)state.crs[NVMM_X64_CR_CR8]);
95 printf("| -> CPL=%p\n", (void *)state.misc[NVMM_X64_MISC_CPL]);
96
97 return 0;
98 }
99
100 /* -------------------------------------------------------------------------- */
101
102 #define PTE32_L1_SHIFT 12
103 #define PTE32_L2_SHIFT 22
104
105 #define PTE32_L2_MASK 0xffc00000
106 #define PTE32_L1_MASK 0x003ff000
107
108 #define PTE32_L2_FRAME (PTE32_L2_MASK)
109 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
110
111 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
112 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
113
114 #define CR3_FRAME_32BIT PG_FRAME
115
116 typedef uint32_t pte_32bit_t;
117
118 static int
119 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
120 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
121 {
122 gpaddr_t L2gpa, L1gpa;
123 uintptr_t L2hva, L1hva;
124 pte_32bit_t *pdir, pte;
125
126 /* We begin with an RWXU access. */
127 *prot = NVMM_PROT_ALL;
128
129 /* Parse L2. */
130 L2gpa = (cr3 & CR3_FRAME_32BIT);
131 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
132 return -1;
133 pdir = (pte_32bit_t *)L2hva;
134 pte = pdir[pte32_l2idx(gva)];
135 if ((pte & PG_V) == 0)
136 return -1;
137 if ((pte & PG_u) == 0)
138 *prot &= ~NVMM_PROT_USER;
139 if ((pte & PG_KW) == 0)
140 *prot &= ~NVMM_PROT_WRITE;
141 if ((pte & PG_PS) && !has_pse)
142 return -1;
143 if (pte & PG_PS) {
144 *gpa = (pte & PTE32_L2_FRAME);
145 *gpa = *gpa + (gva & PTE32_L1_MASK);
146 return 0;
147 }
148
149 /* Parse L1. */
150 L1gpa = (pte & PG_FRAME);
151 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
152 return -1;
153 pdir = (pte_32bit_t *)L1hva;
154 pte = pdir[pte32_l1idx(gva)];
155 if ((pte & PG_V) == 0)
156 return -1;
157 if ((pte & PG_u) == 0)
158 *prot &= ~NVMM_PROT_USER;
159 if ((pte & PG_KW) == 0)
160 *prot &= ~NVMM_PROT_WRITE;
161 if (pte & PG_PS)
162 return -1;
163
164 *gpa = (pte & PG_FRAME);
165 return 0;
166 }
167
168 /* -------------------------------------------------------------------------- */
169
170 #define PTE32_PAE_L1_SHIFT 12
171 #define PTE32_PAE_L2_SHIFT 21
172 #define PTE32_PAE_L3_SHIFT 30
173
174 #define PTE32_PAE_L3_MASK 0xc0000000
175 #define PTE32_PAE_L2_MASK 0x3fe00000
176 #define PTE32_PAE_L1_MASK 0x001ff000
177
178 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
179 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
180 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
181
182 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
183 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
184 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
185
186 #define CR3_FRAME_32BIT_PAE __BITS(31, 5)
187
188 typedef uint64_t pte_32bit_pae_t;
189
190 static int
191 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
192 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
193 {
194 gpaddr_t L3gpa, L2gpa, L1gpa;
195 uintptr_t L3hva, L2hva, L1hva;
196 pte_32bit_pae_t *pdir, pte;
197
198 /* We begin with an RWXU access. */
199 *prot = NVMM_PROT_ALL;
200
201 /* Parse L3. */
202 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
203 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
204 return -1;
205 pdir = (pte_32bit_pae_t *)L3hva;
206 pte = pdir[pte32_pae_l3idx(gva)];
207 if ((pte & PG_V) == 0)
208 return -1;
209 if (pte & PG_NX)
210 *prot &= ~NVMM_PROT_EXEC;
211 if (pte & PG_PS)
212 return -1;
213
214 /* Parse L2. */
215 L2gpa = (pte & PG_FRAME);
216 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
217 return -1;
218 pdir = (pte_32bit_pae_t *)L2hva;
219 pte = pdir[pte32_pae_l2idx(gva)];
220 if ((pte & PG_V) == 0)
221 return -1;
222 if ((pte & PG_u) == 0)
223 *prot &= ~NVMM_PROT_USER;
224 if ((pte & PG_KW) == 0)
225 *prot &= ~NVMM_PROT_WRITE;
226 if (pte & PG_NX)
227 *prot &= ~NVMM_PROT_EXEC;
228 if ((pte & PG_PS) && !has_pse)
229 return -1;
230 if (pte & PG_PS) {
231 *gpa = (pte & PTE32_PAE_L2_FRAME);
232 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
233 return 0;
234 }
235
236 /* Parse L1. */
237 L1gpa = (pte & PG_FRAME);
238 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
239 return -1;
240 pdir = (pte_32bit_pae_t *)L1hva;
241 pte = pdir[pte32_pae_l1idx(gva)];
242 if ((pte & PG_V) == 0)
243 return -1;
244 if ((pte & PG_u) == 0)
245 *prot &= ~NVMM_PROT_USER;
246 if ((pte & PG_KW) == 0)
247 *prot &= ~NVMM_PROT_WRITE;
248 if (pte & PG_NX)
249 *prot &= ~NVMM_PROT_EXEC;
250 if (pte & PG_PS)
251 return -1;
252
253 *gpa = (pte & PG_FRAME);
254 return 0;
255 }
256
257 /* -------------------------------------------------------------------------- */
258
259 #define PTE64_L1_SHIFT 12
260 #define PTE64_L2_SHIFT 21
261 #define PTE64_L3_SHIFT 30
262 #define PTE64_L4_SHIFT 39
263
264 #define PTE64_L4_MASK 0x0000ff8000000000
265 #define PTE64_L3_MASK 0x0000007fc0000000
266 #define PTE64_L2_MASK 0x000000003fe00000
267 #define PTE64_L1_MASK 0x00000000001ff000
268
269 #define PTE64_L4_FRAME PTE64_L4_MASK
270 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
271 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
272 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
273
274 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
275 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
276 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
277 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
278
279 #define CR3_FRAME_64BIT PG_FRAME
280
281 typedef uint64_t pte_64bit_t;
282
283 static inline bool
284 x86_gva_64bit_canonical(gvaddr_t gva)
285 {
286 /* Bits 63:47 must have the same value. */
287 #define SIGN_EXTEND 0xffff800000000000ULL
288 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
289 }
290
291 static int
292 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
293 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
294 {
295 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
296 uintptr_t L4hva, L3hva, L2hva, L1hva;
297 pte_64bit_t *pdir, pte;
298
299 /* We begin with an RWXU access. */
300 *prot = NVMM_PROT_ALL;
301
302 if (!x86_gva_64bit_canonical(gva))
303 return -1;
304
305 /* Parse L4. */
306 L4gpa = (cr3 & CR3_FRAME_64BIT);
307 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva) == -1)
308 return -1;
309 pdir = (pte_64bit_t *)L4hva;
310 pte = pdir[pte64_l4idx(gva)];
311 if ((pte & PG_V) == 0)
312 return -1;
313 if ((pte & PG_u) == 0)
314 *prot &= ~NVMM_PROT_USER;
315 if ((pte & PG_KW) == 0)
316 *prot &= ~NVMM_PROT_WRITE;
317 if (pte & PG_NX)
318 *prot &= ~NVMM_PROT_EXEC;
319 if (pte & PG_PS)
320 return -1;
321
322 /* Parse L3. */
323 L3gpa = (pte & PG_FRAME);
324 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva) == -1)
325 return -1;
326 pdir = (pte_64bit_t *)L3hva;
327 pte = pdir[pte64_l3idx(gva)];
328 if ((pte & PG_V) == 0)
329 return -1;
330 if ((pte & PG_u) == 0)
331 *prot &= ~NVMM_PROT_USER;
332 if ((pte & PG_KW) == 0)
333 *prot &= ~NVMM_PROT_WRITE;
334 if (pte & PG_NX)
335 *prot &= ~NVMM_PROT_EXEC;
336 if (pte & PG_PS) {
337 *gpa = (pte & PTE64_L3_FRAME);
338 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
339 return 0;
340 }
341
342 /* Parse L2. */
343 L2gpa = (pte & PG_FRAME);
344 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva) == -1)
345 return -1;
346 pdir = (pte_64bit_t *)L2hva;
347 pte = pdir[pte64_l2idx(gva)];
348 if ((pte & PG_V) == 0)
349 return -1;
350 if ((pte & PG_u) == 0)
351 *prot &= ~NVMM_PROT_USER;
352 if ((pte & PG_KW) == 0)
353 *prot &= ~NVMM_PROT_WRITE;
354 if (pte & PG_NX)
355 *prot &= ~NVMM_PROT_EXEC;
356 if (pte & PG_PS) {
357 *gpa = (pte & PTE64_L2_FRAME);
358 *gpa = *gpa + (gva & PTE64_L1_MASK);
359 return 0;
360 }
361
362 /* Parse L1. */
363 L1gpa = (pte & PG_FRAME);
364 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva) == -1)
365 return -1;
366 pdir = (pte_64bit_t *)L1hva;
367 pte = pdir[pte64_l1idx(gva)];
368 if ((pte & PG_V) == 0)
369 return -1;
370 if ((pte & PG_u) == 0)
371 *prot &= ~NVMM_PROT_USER;
372 if ((pte & PG_KW) == 0)
373 *prot &= ~NVMM_PROT_WRITE;
374 if (pte & PG_NX)
375 *prot &= ~NVMM_PROT_EXEC;
376 if (pte & PG_PS)
377 return -1;
378
379 *gpa = (pte & PG_FRAME);
380 return 0;
381 }
382
383 static inline int
384 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
385 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
386 {
387 bool is_pae, is_lng, has_pse;
388 uint64_t cr3;
389 size_t off;
390 int ret;
391
392 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
393 /* No paging. */
394 *prot = NVMM_PROT_ALL;
395 *gpa = gva;
396 return 0;
397 }
398
399 off = (gva & PAGE_MASK);
400 gva &= ~PAGE_MASK;
401
402 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
403 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
404 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
405 cr3 = state->crs[NVMM_X64_CR_CR3];
406
407 if (is_pae && is_lng) {
408 /* 64bit */
409 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
410 } else if (is_pae && !is_lng) {
411 /* 32bit PAE */
412 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, has_pse,
413 prot);
414 } else if (!is_pae && !is_lng) {
415 /* 32bit */
416 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
417 } else {
418 ret = -1;
419 }
420
421 if (ret == -1) {
422 errno = EFAULT;
423 }
424
425 *gpa = *gpa + off;
426
427 return ret;
428 }
429
430 int
431 nvmm_gva_to_gpa(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
432 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
433 {
434 struct nvmm_x64_state state;
435 int ret;
436
437 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
438 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
439 if (ret == -1)
440 return -1;
441
442 return x86_gva_to_gpa(mach, &state, gva, gpa, prot);
443 }
444
445 /* -------------------------------------------------------------------------- */
446
447 static inline bool
448 is_long_mode(struct nvmm_x64_state *state)
449 {
450 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
451 }
452
453 static inline bool
454 is_64bit(struct nvmm_x64_state *state)
455 {
456 return (state->segs[NVMM_X64_SEG_CS].attrib.lng != 0);
457 }
458
459 static inline bool
460 is_32bit(struct nvmm_x64_state *state)
461 {
462 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
463 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 1);
464 }
465
466 static inline bool
467 is_16bit(struct nvmm_x64_state *state)
468 {
469 return (state->segs[NVMM_X64_SEG_CS].attrib.lng == 0) &&
470 (state->segs[NVMM_X64_SEG_CS].attrib.def32 == 0);
471 }
472
473 static int
474 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
475 {
476 uint64_t limit;
477
478 /*
479 * This is incomplete. We should check topdown, etc, really that's
480 * tiring.
481 */
482 if (__predict_false(!seg->attrib.p)) {
483 goto error;
484 }
485
486 limit = (seg->limit + 1);
487 if (__predict_true(seg->attrib.gran)) {
488 limit *= PAGE_SIZE;
489 }
490
491 if (__predict_false(gva + size > limit)) {
492 goto error;
493 }
494
495 return 0;
496
497 error:
498 errno = EFAULT;
499 return -1;
500 }
501
502 static inline void
503 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
504 {
505 *gva += seg->base;
506 }
507
508 static inline uint64_t
509 size_to_mask(size_t size)
510 {
511 switch (size) {
512 case 1:
513 return 0x00000000000000FF;
514 case 2:
515 return 0x000000000000FFFF;
516 case 4:
517 return 0x00000000FFFFFFFF;
518 case 8:
519 default:
520 return 0xFFFFFFFFFFFFFFFF;
521 }
522 }
523
524 static uint64_t
525 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
526 {
527 uint64_t mask, cnt;
528
529 mask = size_to_mask(adsize);
530 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
531
532 return cnt;
533 }
534
535 static void
536 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
537 {
538 uint64_t mask;
539
540 /* XXX: should we zero-extend? */
541 mask = size_to_mask(adsize);
542 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
543 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
544 }
545
546 static int
547 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
548 gvaddr_t gva, uint8_t *data, size_t size)
549 {
550 struct nvmm_mem mem;
551 nvmm_prot_t prot;
552 gpaddr_t gpa;
553 uintptr_t hva;
554 bool is_mmio;
555 int ret, remain;
556
557 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
558 if (__predict_false(ret == -1)) {
559 return -1;
560 }
561 if (__predict_false(!(prot & NVMM_PROT_READ))) {
562 errno = EFAULT;
563 return -1;
564 }
565
566 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
567 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
568 } else {
569 remain = 0;
570 }
571 size -= remain;
572
573 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
574 is_mmio = (ret == -1);
575
576 if (is_mmio) {
577 mem.data = data;
578 mem.gpa = gpa;
579 mem.write = false;
580 mem.size = size;
581 (*__callbacks.mem)(&mem);
582 } else {
583 memcpy(data, (uint8_t *)hva, size);
584 }
585
586 if (remain > 0) {
587 ret = read_guest_memory(mach, state, gva + size,
588 data + size, remain);
589 } else {
590 ret = 0;
591 }
592
593 return ret;
594 }
595
596 static int
597 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
598 gvaddr_t gva, uint8_t *data, size_t size)
599 {
600 struct nvmm_mem mem;
601 nvmm_prot_t prot;
602 gpaddr_t gpa;
603 uintptr_t hva;
604 bool is_mmio;
605 int ret, remain;
606
607 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
608 if (__predict_false(ret == -1)) {
609 return -1;
610 }
611 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
612 errno = EFAULT;
613 return -1;
614 }
615
616 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
617 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
618 } else {
619 remain = 0;
620 }
621 size -= remain;
622
623 ret = nvmm_gpa_to_hva(mach, gpa, &hva);
624 is_mmio = (ret == -1);
625
626 if (is_mmio) {
627 mem.data = data;
628 mem.gpa = gpa;
629 mem.write = true;
630 mem.size = size;
631 (*__callbacks.mem)(&mem);
632 } else {
633 memcpy((uint8_t *)hva, data, size);
634 }
635
636 if (remain > 0) {
637 ret = write_guest_memory(mach, state, gva + size,
638 data + size, remain);
639 } else {
640 ret = 0;
641 }
642
643 return ret;
644 }
645
646 /* -------------------------------------------------------------------------- */
647
648 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
649
650 #define NVMM_IO_BATCH_SIZE 32
651
652 static int
653 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
654 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
655 {
656 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
657 size_t i, iosize, iocnt;
658 int ret;
659
660 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
661 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
662 iocnt = iosize / io->size;
663
664 io->data = iobuf;
665
666 if (!io->in) {
667 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
668 if (ret == -1)
669 return -1;
670 }
671
672 for (i = 0; i < iocnt; i++) {
673 (*__callbacks.io)(io);
674 io->data += io->size;
675 }
676
677 if (io->in) {
678 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
679 if (ret == -1)
680 return -1;
681 }
682
683 return iocnt;
684 }
685
686 int
687 nvmm_assist_io(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
688 struct nvmm_exit *exit)
689 {
690 struct nvmm_x64_state state;
691 struct nvmm_io io;
692 uint64_t cnt = 0; /* GCC */
693 uint8_t iobuf[8];
694 int iocnt = 1;
695 gvaddr_t gva = 0; /* GCC */
696 int reg = 0; /* GCC */
697 int ret, seg;
698 bool psld = false;
699
700 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
701 errno = EINVAL;
702 return -1;
703 }
704
705 io.port = exit->u.io.port;
706 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
707 io.size = exit->u.io.operand_size;
708 io.data = iobuf;
709
710 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
711 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
712 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
713 if (ret == -1)
714 return -1;
715
716 if (exit->u.io.rep) {
717 cnt = rep_get_cnt(&state, exit->u.io.address_size);
718 if (__predict_false(cnt == 0)) {
719 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
720 goto out;
721 }
722 }
723
724 if (__predict_false(state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
725 psld = true;
726 }
727
728 /*
729 * Determine GVA.
730 */
731 if (exit->u.io.str) {
732 if (io.in) {
733 reg = NVMM_X64_GPR_RDI;
734 } else {
735 reg = NVMM_X64_GPR_RSI;
736 }
737
738 gva = state.gprs[reg];
739 gva &= size_to_mask(exit->u.io.address_size);
740
741 if (exit->u.io.seg != -1) {
742 seg = exit->u.io.seg;
743 } else {
744 if (io.in) {
745 seg = NVMM_X64_SEG_ES;
746 } else {
747 seg = fetch_segment(mach, &state);
748 if (seg == -1)
749 return -1;
750 }
751 }
752
753 if (__predict_true(is_long_mode(&state))) {
754 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
755 segment_apply(&state.segs[seg], &gva);
756 }
757 } else {
758 ret = segment_check(&state.segs[seg], gva, io.size);
759 if (ret == -1)
760 return -1;
761 segment_apply(&state.segs[seg], &gva);
762 }
763
764 if (exit->u.io.rep && !psld) {
765 iocnt = assist_io_batch(mach, &state, &io, gva, cnt);
766 if (iocnt == -1)
767 return -1;
768 goto done;
769 }
770 }
771
772 if (!io.in) {
773 if (!exit->u.io.str) {
774 memcpy(io.data, &state.gprs[NVMM_X64_GPR_RAX], io.size);
775 } else {
776 ret = read_guest_memory(mach, &state, gva, io.data,
777 io.size);
778 if (ret == -1)
779 return -1;
780 }
781 }
782
783 (*__callbacks.io)(&io);
784
785 if (io.in) {
786 if (!exit->u.io.str) {
787 memcpy(&state.gprs[NVMM_X64_GPR_RAX], io.data, io.size);
788 if (io.size == 4) {
789 /* Zero-extend to 64 bits. */
790 state.gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
791 }
792 } else {
793 ret = write_guest_memory(mach, &state, gva, io.data,
794 io.size);
795 if (ret == -1)
796 return -1;
797 }
798 }
799
800 done:
801 if (exit->u.io.str) {
802 if (__predict_false(psld)) {
803 state.gprs[reg] -= iocnt * io.size;
804 } else {
805 state.gprs[reg] += iocnt * io.size;
806 }
807 }
808
809 if (exit->u.io.rep) {
810 cnt -= iocnt;
811 rep_set_cnt(&state, exit->u.io.address_size, cnt);
812 if (cnt == 0) {
813 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
814 }
815 } else {
816 state.gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
817 }
818
819 out:
820 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
821 if (ret == -1)
822 return -1;
823
824 return 0;
825 }
826
827 /* -------------------------------------------------------------------------- */
828
829 struct x86_emul {
830 bool read;
831 bool notouch;
832 void (*func)(struct nvmm_mem *, uint64_t *);
833 };
834
835 static void x86_func_or(struct nvmm_mem *, uint64_t *);
836 static void x86_func_and(struct nvmm_mem *, uint64_t *);
837 static void x86_func_sub(struct nvmm_mem *, uint64_t *);
838 static void x86_func_xor(struct nvmm_mem *, uint64_t *);
839 static void x86_func_cmp(struct nvmm_mem *, uint64_t *);
840 static void x86_func_test(struct nvmm_mem *, uint64_t *);
841 static void x86_func_mov(struct nvmm_mem *, uint64_t *);
842 static void x86_func_stos(struct nvmm_mem *, uint64_t *);
843 static void x86_func_lods(struct nvmm_mem *, uint64_t *);
844 static void x86_func_movs(struct nvmm_mem *, uint64_t *);
845
846 static const struct x86_emul x86_emul_or = {
847 .read = true,
848 .func = x86_func_or
849 };
850
851 static const struct x86_emul x86_emul_and = {
852 .read = true,
853 .func = x86_func_and
854 };
855
856 static const struct x86_emul x86_emul_sub = {
857 .read = true,
858 .func = x86_func_sub
859 };
860
861 static const struct x86_emul x86_emul_xor = {
862 .read = true,
863 .func = x86_func_xor
864 };
865
866 static const struct x86_emul x86_emul_cmp = {
867 .notouch = true,
868 .func = x86_func_cmp
869 };
870
871 static const struct x86_emul x86_emul_test = {
872 .notouch = true,
873 .func = x86_func_test
874 };
875
876 static const struct x86_emul x86_emul_mov = {
877 .func = x86_func_mov
878 };
879
880 static const struct x86_emul x86_emul_stos = {
881 .func = x86_func_stos
882 };
883
884 static const struct x86_emul x86_emul_lods = {
885 .func = x86_func_lods
886 };
887
888 static const struct x86_emul x86_emul_movs = {
889 .func = x86_func_movs
890 };
891
892 /* Legacy prefixes. */
893 #define LEG_LOCK 0xF0
894 #define LEG_REPN 0xF2
895 #define LEG_REP 0xF3
896 #define LEG_OVR_CS 0x2E
897 #define LEG_OVR_SS 0x36
898 #define LEG_OVR_DS 0x3E
899 #define LEG_OVR_ES 0x26
900 #define LEG_OVR_FS 0x64
901 #define LEG_OVR_GS 0x65
902 #define LEG_OPR_OVR 0x66
903 #define LEG_ADR_OVR 0x67
904
905 struct x86_legpref {
906 bool opr_ovr:1;
907 bool adr_ovr:1;
908 bool rep:1;
909 bool repn:1;
910 int seg;
911 };
912
913 struct x86_rexpref {
914 bool present;
915 bool w;
916 bool r;
917 bool x;
918 bool b;
919 };
920
921 struct x86_reg {
922 int num; /* NVMM GPR state index */
923 uint64_t mask;
924 };
925
926 enum x86_disp_type {
927 DISP_NONE,
928 DISP_0,
929 DISP_1,
930 DISP_4
931 };
932
933 struct x86_disp {
934 enum x86_disp_type type;
935 uint64_t data; /* 4 bytes, but can be sign-extended */
936 };
937
938 enum REGMODRM__Mod {
939 MOD_DIS0, /* also, register indirect */
940 MOD_DIS1,
941 MOD_DIS4,
942 MOD_REG
943 };
944
945 enum REGMODRM__Reg {
946 REG_000, /* these fields are indexes to the register map */
947 REG_001,
948 REG_010,
949 REG_011,
950 REG_100,
951 REG_101,
952 REG_110,
953 REG_111
954 };
955
956 enum REGMODRM__Rm {
957 RM_000, /* reg */
958 RM_001, /* reg */
959 RM_010, /* reg */
960 RM_011, /* reg */
961 RM_RSP_SIB, /* reg or SIB, depending on the MOD */
962 RM_RBP_DISP32, /* reg or displacement-only (= RIP-relative on amd64) */
963 RM_110,
964 RM_111
965 };
966
967 struct x86_regmodrm {
968 bool present;
969 enum REGMODRM__Mod mod;
970 enum REGMODRM__Reg reg;
971 enum REGMODRM__Rm rm;
972 };
973
974 struct x86_immediate {
975 uint64_t data;
976 };
977
978 struct x86_sib {
979 uint8_t scale;
980 const struct x86_reg *idx;
981 const struct x86_reg *bas;
982 };
983
984 enum x86_store_type {
985 STORE_NONE,
986 STORE_REG,
987 STORE_IMM,
988 STORE_SIB,
989 STORE_DMO
990 };
991
992 struct x86_store {
993 enum x86_store_type type;
994 union {
995 const struct x86_reg *reg;
996 struct x86_immediate imm;
997 struct x86_sib sib;
998 uint64_t dmo;
999 } u;
1000 struct x86_disp disp;
1001 int hardseg;
1002 };
1003
1004 struct x86_instr {
1005 size_t len;
1006 struct x86_legpref legpref;
1007 struct x86_rexpref rexpref;
1008 size_t operand_size;
1009 size_t address_size;
1010 uint64_t zeroextend_mask;
1011
1012 struct x86_regmodrm regmodrm;
1013
1014 const struct x86_opcode *opcode;
1015
1016 struct x86_store src;
1017 struct x86_store dst;
1018 struct x86_store *strm;
1019
1020 const struct x86_emul *emul;
1021 };
1022
1023 struct x86_decode_fsm {
1024 /* vcpu */
1025 bool is64bit;
1026 bool is32bit;
1027 bool is16bit;
1028
1029 /* fsm */
1030 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1031 uint8_t *buf;
1032 uint8_t *end;
1033 };
1034
1035 struct x86_opcode {
1036 uint8_t byte;
1037 bool regmodrm;
1038 bool regtorm;
1039 bool dmo;
1040 bool todmo;
1041 bool movs;
1042 bool stos;
1043 bool lods;
1044 bool szoverride;
1045 int defsize;
1046 int allsize;
1047 bool group1;
1048 bool group3;
1049 bool group11;
1050 bool immediate;
1051 int flags;
1052 const struct x86_emul *emul;
1053 };
1054
1055 struct x86_group_entry {
1056 const struct x86_emul *emul;
1057 };
1058
1059 #define OPSIZE_BYTE 0x01
1060 #define OPSIZE_WORD 0x02 /* 2 bytes */
1061 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1062 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1063
1064 #define FLAG_imm8 0x01
1065 #define FLAG_immz 0x02
1066 #define FLAG_ze 0x04
1067
1068 static const struct x86_group_entry group1[8] = {
1069 [1] = { .emul = &x86_emul_or },
1070 [4] = { .emul = &x86_emul_and },
1071 [6] = { .emul = &x86_emul_xor },
1072 [7] = { .emul = &x86_emul_cmp }
1073 };
1074
1075 static const struct x86_group_entry group3[8] = {
1076 [0] = { .emul = &x86_emul_test },
1077 [1] = { .emul = &x86_emul_test }
1078 };
1079
1080 static const struct x86_group_entry group11[8] = {
1081 [0] = { .emul = &x86_emul_mov }
1082 };
1083
1084 static const struct x86_opcode primary_opcode_table[] = {
1085 /*
1086 * Group1
1087 */
1088 {
1089 /* Eb, Ib */
1090 .byte = 0x80,
1091 .regmodrm = true,
1092 .regtorm = true,
1093 .szoverride = false,
1094 .defsize = OPSIZE_BYTE,
1095 .allsize = -1,
1096 .group1 = true,
1097 .immediate = true,
1098 .emul = NULL /* group1 */
1099 },
1100 {
1101 /* Ev, Iz */
1102 .byte = 0x81,
1103 .regmodrm = true,
1104 .regtorm = true,
1105 .szoverride = true,
1106 .defsize = -1,
1107 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1108 .group1 = true,
1109 .immediate = true,
1110 .flags = FLAG_immz,
1111 .emul = NULL /* group1 */
1112 },
1113 {
1114 /* Ev, Ib */
1115 .byte = 0x83,
1116 .regmodrm = true,
1117 .regtorm = true,
1118 .szoverride = true,
1119 .defsize = -1,
1120 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1121 .group1 = true,
1122 .immediate = true,
1123 .flags = FLAG_imm8,
1124 .emul = NULL /* group1 */
1125 },
1126
1127 /*
1128 * Group3
1129 */
1130 {
1131 /* Eb, Ib */
1132 .byte = 0xF6,
1133 .regmodrm = true,
1134 .regtorm = true,
1135 .szoverride = false,
1136 .defsize = OPSIZE_BYTE,
1137 .allsize = -1,
1138 .group3 = true,
1139 .immediate = true,
1140 .emul = NULL /* group3 */
1141 },
1142 {
1143 /* Ev, Iz */
1144 .byte = 0xF7,
1145 .regmodrm = true,
1146 .regtorm = true,
1147 .szoverride = true,
1148 .defsize = -1,
1149 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1150 .group3 = true,
1151 .immediate = true,
1152 .flags = FLAG_immz,
1153 .emul = NULL /* group3 */
1154 },
1155
1156 /*
1157 * Group11
1158 */
1159 {
1160 /* Eb, Ib */
1161 .byte = 0xC6,
1162 .regmodrm = true,
1163 .regtorm = true,
1164 .szoverride = false,
1165 .defsize = OPSIZE_BYTE,
1166 .allsize = -1,
1167 .group11 = true,
1168 .immediate = true,
1169 .emul = NULL /* group11 */
1170 },
1171 {
1172 /* Ev, Iz */
1173 .byte = 0xC7,
1174 .regmodrm = true,
1175 .regtorm = true,
1176 .szoverride = true,
1177 .defsize = -1,
1178 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1179 .group11 = true,
1180 .immediate = true,
1181 .flags = FLAG_immz,
1182 .emul = NULL /* group11 */
1183 },
1184
1185 /*
1186 * OR
1187 */
1188 {
1189 /* Eb, Gb */
1190 .byte = 0x08,
1191 .regmodrm = true,
1192 .regtorm = true,
1193 .szoverride = false,
1194 .defsize = OPSIZE_BYTE,
1195 .allsize = -1,
1196 .emul = &x86_emul_or
1197 },
1198 {
1199 /* Ev, Gv */
1200 .byte = 0x09,
1201 .regmodrm = true,
1202 .regtorm = true,
1203 .szoverride = true,
1204 .defsize = -1,
1205 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1206 .emul = &x86_emul_or
1207 },
1208 {
1209 /* Gb, Eb */
1210 .byte = 0x0A,
1211 .regmodrm = true,
1212 .regtorm = false,
1213 .szoverride = false,
1214 .defsize = OPSIZE_BYTE,
1215 .allsize = -1,
1216 .emul = &x86_emul_or
1217 },
1218 {
1219 /* Gv, Ev */
1220 .byte = 0x0B,
1221 .regmodrm = true,
1222 .regtorm = false,
1223 .szoverride = true,
1224 .defsize = -1,
1225 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1226 .emul = &x86_emul_or
1227 },
1228
1229 /*
1230 * AND
1231 */
1232 {
1233 /* Eb, Gb */
1234 .byte = 0x20,
1235 .regmodrm = true,
1236 .regtorm = true,
1237 .szoverride = false,
1238 .defsize = OPSIZE_BYTE,
1239 .allsize = -1,
1240 .emul = &x86_emul_and
1241 },
1242 {
1243 /* Ev, Gv */
1244 .byte = 0x21,
1245 .regmodrm = true,
1246 .regtorm = true,
1247 .szoverride = true,
1248 .defsize = -1,
1249 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1250 .emul = &x86_emul_and
1251 },
1252 {
1253 /* Gb, Eb */
1254 .byte = 0x22,
1255 .regmodrm = true,
1256 .regtorm = false,
1257 .szoverride = false,
1258 .defsize = OPSIZE_BYTE,
1259 .allsize = -1,
1260 .emul = &x86_emul_and
1261 },
1262 {
1263 /* Gv, Ev */
1264 .byte = 0x23,
1265 .regmodrm = true,
1266 .regtorm = false,
1267 .szoverride = true,
1268 .defsize = -1,
1269 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1270 .emul = &x86_emul_and
1271 },
1272
1273 /*
1274 * SUB
1275 */
1276 {
1277 /* Eb, Gb */
1278 .byte = 0x28,
1279 .regmodrm = true,
1280 .regtorm = true,
1281 .szoverride = false,
1282 .defsize = OPSIZE_BYTE,
1283 .allsize = -1,
1284 .emul = &x86_emul_sub
1285 },
1286 {
1287 /* Ev, Gv */
1288 .byte = 0x29,
1289 .regmodrm = true,
1290 .regtorm = true,
1291 .szoverride = true,
1292 .defsize = -1,
1293 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1294 .emul = &x86_emul_sub
1295 },
1296 {
1297 /* Gb, Eb */
1298 .byte = 0x2A,
1299 .regmodrm = true,
1300 .regtorm = false,
1301 .szoverride = false,
1302 .defsize = OPSIZE_BYTE,
1303 .allsize = -1,
1304 .emul = &x86_emul_sub
1305 },
1306 {
1307 /* Gv, Ev */
1308 .byte = 0x2B,
1309 .regmodrm = true,
1310 .regtorm = false,
1311 .szoverride = true,
1312 .defsize = -1,
1313 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1314 .emul = &x86_emul_sub
1315 },
1316
1317 /*
1318 * XOR
1319 */
1320 {
1321 /* Eb, Gb */
1322 .byte = 0x30,
1323 .regmodrm = true,
1324 .regtorm = true,
1325 .szoverride = false,
1326 .defsize = OPSIZE_BYTE,
1327 .allsize = -1,
1328 .emul = &x86_emul_xor
1329 },
1330 {
1331 /* Ev, Gv */
1332 .byte = 0x31,
1333 .regmodrm = true,
1334 .regtorm = true,
1335 .szoverride = true,
1336 .defsize = -1,
1337 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1338 .emul = &x86_emul_xor
1339 },
1340 {
1341 /* Gb, Eb */
1342 .byte = 0x32,
1343 .regmodrm = true,
1344 .regtorm = false,
1345 .szoverride = false,
1346 .defsize = OPSIZE_BYTE,
1347 .allsize = -1,
1348 .emul = &x86_emul_xor
1349 },
1350 {
1351 /* Gv, Ev */
1352 .byte = 0x33,
1353 .regmodrm = true,
1354 .regtorm = false,
1355 .szoverride = true,
1356 .defsize = -1,
1357 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1358 .emul = &x86_emul_xor
1359 },
1360
1361 /*
1362 * MOV
1363 */
1364 {
1365 /* Eb, Gb */
1366 .byte = 0x88,
1367 .regmodrm = true,
1368 .regtorm = true,
1369 .szoverride = false,
1370 .defsize = OPSIZE_BYTE,
1371 .allsize = -1,
1372 .emul = &x86_emul_mov
1373 },
1374 {
1375 /* Ev, Gv */
1376 .byte = 0x89,
1377 .regmodrm = true,
1378 .regtorm = true,
1379 .szoverride = true,
1380 .defsize = -1,
1381 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1382 .emul = &x86_emul_mov
1383 },
1384 {
1385 /* Gb, Eb */
1386 .byte = 0x8A,
1387 .regmodrm = true,
1388 .regtorm = false,
1389 .szoverride = false,
1390 .defsize = OPSIZE_BYTE,
1391 .allsize = -1,
1392 .emul = &x86_emul_mov
1393 },
1394 {
1395 /* Gv, Ev */
1396 .byte = 0x8B,
1397 .regmodrm = true,
1398 .regtorm = false,
1399 .szoverride = true,
1400 .defsize = -1,
1401 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1402 .emul = &x86_emul_mov
1403 },
1404 {
1405 /* AL, Ob */
1406 .byte = 0xA0,
1407 .dmo = true,
1408 .todmo = false,
1409 .szoverride = false,
1410 .defsize = OPSIZE_BYTE,
1411 .allsize = -1,
1412 .emul = &x86_emul_mov
1413 },
1414 {
1415 /* rAX, Ov */
1416 .byte = 0xA1,
1417 .dmo = true,
1418 .todmo = false,
1419 .szoverride = true,
1420 .defsize = -1,
1421 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1422 .emul = &x86_emul_mov
1423 },
1424 {
1425 /* Ob, AL */
1426 .byte = 0xA2,
1427 .dmo = true,
1428 .todmo = true,
1429 .szoverride = false,
1430 .defsize = OPSIZE_BYTE,
1431 .allsize = -1,
1432 .emul = &x86_emul_mov
1433 },
1434 {
1435 /* Ov, rAX */
1436 .byte = 0xA3,
1437 .dmo = true,
1438 .todmo = true,
1439 .szoverride = true,
1440 .defsize = -1,
1441 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1442 .emul = &x86_emul_mov
1443 },
1444
1445 /*
1446 * MOVS
1447 */
1448 {
1449 /* Yb, Xb */
1450 .byte = 0xA4,
1451 .movs = true,
1452 .szoverride = false,
1453 .defsize = OPSIZE_BYTE,
1454 .allsize = -1,
1455 .emul = &x86_emul_movs
1456 },
1457 {
1458 /* Yv, Xv */
1459 .byte = 0xA5,
1460 .movs = true,
1461 .szoverride = true,
1462 .defsize = -1,
1463 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1464 .emul = &x86_emul_movs
1465 },
1466
1467 /*
1468 * STOS
1469 */
1470 {
1471 /* Yb, AL */
1472 .byte = 0xAA,
1473 .stos = true,
1474 .szoverride = false,
1475 .defsize = OPSIZE_BYTE,
1476 .allsize = -1,
1477 .emul = &x86_emul_stos
1478 },
1479 {
1480 /* Yv, rAX */
1481 .byte = 0xAB,
1482 .stos = true,
1483 .szoverride = true,
1484 .defsize = -1,
1485 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1486 .emul = &x86_emul_stos
1487 },
1488
1489 /*
1490 * LODS
1491 */
1492 {
1493 /* AL, Xb */
1494 .byte = 0xAC,
1495 .lods = true,
1496 .szoverride = false,
1497 .defsize = OPSIZE_BYTE,
1498 .allsize = -1,
1499 .emul = &x86_emul_lods
1500 },
1501 {
1502 /* rAX, Xv */
1503 .byte = 0xAD,
1504 .lods = true,
1505 .szoverride = true,
1506 .defsize = -1,
1507 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1508 .emul = &x86_emul_lods
1509 },
1510 };
1511
1512 static const struct x86_opcode secondary_opcode_table[] = {
1513 /*
1514 * MOVZX
1515 */
1516 {
1517 /* Gv, Eb */
1518 .byte = 0xB6,
1519 .regmodrm = true,
1520 .regtorm = false,
1521 .szoverride = true,
1522 .defsize = OPSIZE_BYTE,
1523 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1524 .flags = FLAG_ze,
1525 .emul = &x86_emul_mov
1526 },
1527 {
1528 /* Gv, Ew */
1529 .byte = 0xB7,
1530 .regmodrm = true,
1531 .regtorm = false,
1532 .szoverride = true,
1533 .defsize = OPSIZE_WORD,
1534 .allsize = OPSIZE_WORD|OPSIZE_DOUB|OPSIZE_QUAD,
1535 .flags = FLAG_ze,
1536 .emul = &x86_emul_mov
1537 },
1538 };
1539
1540 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1541
1542 /* [REX-present][enc][opsize] */
1543 static const struct x86_reg gpr_map__special[2][4][8] = {
1544 [false] = {
1545 /* No REX prefix. */
1546 [0b00] = {
1547 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1548 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1549 [2] = { -1, 0 },
1550 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1551 [4] = { -1, 0 },
1552 [5] = { -1, 0 },
1553 [6] = { -1, 0 },
1554 [7] = { -1, 0 },
1555 },
1556 [0b01] = {
1557 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1558 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1559 [2] = { -1, 0 },
1560 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1561 [4] = { -1, 0 },
1562 [5] = { -1, 0 },
1563 [6] = { -1, 0 },
1564 [7] = { -1, 0 },
1565 },
1566 [0b10] = {
1567 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1568 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1569 [2] = { -1, 0 },
1570 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1571 [4] = { -1, 0 },
1572 [5] = { -1, 0 },
1573 [6] = { -1, 0 },
1574 [7] = { -1, 0 },
1575 },
1576 [0b11] = {
1577 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1578 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1579 [2] = { -1, 0 },
1580 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1581 [4] = { -1, 0 },
1582 [5] = { -1, 0 },
1583 [6] = { -1, 0 },
1584 [7] = { -1, 0 },
1585 }
1586 },
1587 [true] = {
1588 /* Has REX prefix. */
1589 [0b00] = {
1590 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1591 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1592 [2] = { -1, 0 },
1593 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1594 [4] = { -1, 0 },
1595 [5] = { -1, 0 },
1596 [6] = { -1, 0 },
1597 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1598 },
1599 [0b01] = {
1600 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1601 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1602 [2] = { -1, 0 },
1603 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1604 [4] = { -1, 0 },
1605 [5] = { -1, 0 },
1606 [6] = { -1, 0 },
1607 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1608 },
1609 [0b10] = {
1610 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1611 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1612 [2] = { -1, 0 },
1613 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1614 [4] = { -1, 0 },
1615 [5] = { -1, 0 },
1616 [6] = { -1, 0 },
1617 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1618 },
1619 [0b11] = {
1620 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1621 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1622 [2] = { -1, 0 },
1623 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1624 [4] = { -1, 0 },
1625 [5] = { -1, 0 },
1626 [6] = { -1, 0 },
1627 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1628 }
1629 }
1630 };
1631
1632 /* [depends][enc][size] */
1633 static const struct x86_reg gpr_map[2][8][8] = {
1634 [false] = {
1635 /* Not extended. */
1636 [0b000] = {
1637 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1638 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1639 [2] = { -1, 0 },
1640 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1641 [4] = { -1, 0 },
1642 [5] = { -1, 0 },
1643 [6] = { -1, 0 },
1644 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1645 },
1646 [0b001] = {
1647 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1648 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1649 [2] = { -1, 0 },
1650 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1651 [4] = { -1, 0 },
1652 [5] = { -1, 0 },
1653 [6] = { -1, 0 },
1654 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1655 },
1656 [0b010] = {
1657 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1658 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1659 [2] = { -1, 0 },
1660 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1661 [4] = { -1, 0 },
1662 [5] = { -1, 0 },
1663 [6] = { -1, 0 },
1664 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1665 },
1666 [0b011] = {
1667 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1668 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1669 [2] = { -1, 0 },
1670 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1671 [4] = { -1, 0 },
1672 [5] = { -1, 0 },
1673 [6] = { -1, 0 },
1674 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1675 },
1676 [0b100] = {
1677 [0] = { -1, 0 }, /* SPECIAL */
1678 [1] = { -1, 0 }, /* SPECIAL */
1679 [2] = { -1, 0 },
1680 [3] = { -1, 0 }, /* SPECIAL */
1681 [4] = { -1, 0 },
1682 [5] = { -1, 0 },
1683 [6] = { -1, 0 },
1684 [7] = { -1, 0 }, /* SPECIAL */
1685 },
1686 [0b101] = {
1687 [0] = { -1, 0 }, /* SPECIAL */
1688 [1] = { -1, 0 }, /* SPECIAL */
1689 [2] = { -1, 0 },
1690 [3] = { -1, 0 }, /* SPECIAL */
1691 [4] = { -1, 0 },
1692 [5] = { -1, 0 },
1693 [6] = { -1, 0 },
1694 [7] = { -1, 0 }, /* SPECIAL */
1695 },
1696 [0b110] = {
1697 [0] = { -1, 0 }, /* SPECIAL */
1698 [1] = { -1, 0 }, /* SPECIAL */
1699 [2] = { -1, 0 },
1700 [3] = { -1, 0 }, /* SPECIAL */
1701 [4] = { -1, 0 },
1702 [5] = { -1, 0 },
1703 [6] = { -1, 0 },
1704 [7] = { -1, 0 }, /* SPECIAL */
1705 },
1706 [0b111] = {
1707 [0] = { -1, 0 }, /* SPECIAL */
1708 [1] = { -1, 0 }, /* SPECIAL */
1709 [2] = { -1, 0 },
1710 [3] = { -1, 0 }, /* SPECIAL */
1711 [4] = { -1, 0 },
1712 [5] = { -1, 0 },
1713 [6] = { -1, 0 },
1714 [7] = { -1, 0 }, /* SPECIAL */
1715 },
1716 },
1717 [true] = {
1718 /* Extended. */
1719 [0b000] = {
1720 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1721 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1722 [2] = { -1, 0 },
1723 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1724 [4] = { -1, 0 },
1725 [5] = { -1, 0 },
1726 [6] = { -1, 0 },
1727 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1728 },
1729 [0b001] = {
1730 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1731 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1732 [2] = { -1, 0 },
1733 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1734 [4] = { -1, 0 },
1735 [5] = { -1, 0 },
1736 [6] = { -1, 0 },
1737 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1738 },
1739 [0b010] = {
1740 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1741 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1742 [2] = { -1, 0 },
1743 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1744 [4] = { -1, 0 },
1745 [5] = { -1, 0 },
1746 [6] = { -1, 0 },
1747 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1748 },
1749 [0b011] = {
1750 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1751 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1752 [2] = { -1, 0 },
1753 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1754 [4] = { -1, 0 },
1755 [5] = { -1, 0 },
1756 [6] = { -1, 0 },
1757 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1758 },
1759 [0b100] = {
1760 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1761 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1762 [2] = { -1, 0 },
1763 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1764 [4] = { -1, 0 },
1765 [5] = { -1, 0 },
1766 [6] = { -1, 0 },
1767 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1768 },
1769 [0b101] = {
1770 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1771 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1772 [2] = { -1, 0 },
1773 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1774 [4] = { -1, 0 },
1775 [5] = { -1, 0 },
1776 [6] = { -1, 0 },
1777 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1778 },
1779 [0b110] = {
1780 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1781 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1782 [2] = { -1, 0 },
1783 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1784 [4] = { -1, 0 },
1785 [5] = { -1, 0 },
1786 [6] = { -1, 0 },
1787 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1788 },
1789 [0b111] = {
1790 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1791 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1792 [2] = { -1, 0 },
1793 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1794 [4] = { -1, 0 },
1795 [5] = { -1, 0 },
1796 [6] = { -1, 0 },
1797 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1798 },
1799 }
1800 };
1801
1802 static int
1803 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1804 {
1805 fsm->fn = NULL;
1806 return -1;
1807 }
1808
1809 static int
1810 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1811 {
1812 if (fsm->buf + n > fsm->end) {
1813 return -1;
1814 }
1815 memcpy(bytes, fsm->buf, n);
1816 return 0;
1817 }
1818
1819 static void
1820 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1821 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1822 {
1823 fsm->buf += n;
1824 if (fsm->buf > fsm->end) {
1825 fsm->fn = node_overflow;
1826 } else {
1827 fsm->fn = fn;
1828 }
1829 }
1830
1831 static const struct x86_reg *
1832 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1833 {
1834 enc &= 0b11;
1835 if (regsize == 8) {
1836 /* May be 64bit without REX */
1837 return &gpr_map__special[1][enc][regsize-1];
1838 }
1839 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1840 }
1841
1842 /*
1843 * Special node, for MOVS. Fake two displacements of zero on the source and
1844 * destination registers.
1845 */
1846 static int
1847 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1848 {
1849 size_t adrsize;
1850
1851 adrsize = instr->address_size;
1852
1853 /* DS:RSI */
1854 instr->src.type = STORE_REG;
1855 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1856 instr->src.disp.type = DISP_0;
1857
1858 /* ES:RDI, force ES */
1859 instr->dst.type = STORE_REG;
1860 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1861 instr->dst.disp.type = DISP_0;
1862 instr->dst.hardseg = NVMM_X64_SEG_ES;
1863
1864 fsm_advance(fsm, 0, NULL);
1865
1866 return 0;
1867 }
1868
1869 /*
1870 * Special node, for STOS and LODS. Fake a displacement of zero on the
1871 * destination register.
1872 */
1873 static int
1874 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1875 {
1876 const struct x86_opcode *opcode = instr->opcode;
1877 struct x86_store *stlo, *streg;
1878 size_t adrsize, regsize;
1879
1880 adrsize = instr->address_size;
1881 regsize = instr->operand_size;
1882
1883 if (opcode->stos) {
1884 streg = &instr->src;
1885 stlo = &instr->dst;
1886 } else {
1887 streg = &instr->dst;
1888 stlo = &instr->src;
1889 }
1890
1891 streg->type = STORE_REG;
1892 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1893
1894 stlo->type = STORE_REG;
1895 if (opcode->stos) {
1896 /* ES:RDI, force ES */
1897 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1898 stlo->hardseg = NVMM_X64_SEG_ES;
1899 } else {
1900 /* DS:RSI */
1901 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1902 }
1903 stlo->disp.type = DISP_0;
1904
1905 fsm_advance(fsm, 0, NULL);
1906
1907 return 0;
1908 }
1909
1910 static int
1911 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1912 {
1913 const struct x86_opcode *opcode = instr->opcode;
1914 struct x86_store *stdmo, *streg;
1915 size_t adrsize, regsize;
1916
1917 adrsize = instr->address_size;
1918 regsize = instr->operand_size;
1919
1920 if (opcode->todmo) {
1921 streg = &instr->src;
1922 stdmo = &instr->dst;
1923 } else {
1924 streg = &instr->dst;
1925 stdmo = &instr->src;
1926 }
1927
1928 streg->type = STORE_REG;
1929 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1930
1931 stdmo->type = STORE_DMO;
1932 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1933 return -1;
1934 }
1935 fsm_advance(fsm, adrsize, NULL);
1936
1937 return 0;
1938 }
1939
1940 static inline uint64_t
1941 sign_extend(uint64_t val, int size)
1942 {
1943 if (size == 1) {
1944 if (val & __BIT(7))
1945 val |= 0xFFFFFFFFFFFFFF00;
1946 } else if (size == 2) {
1947 if (val & __BIT(15))
1948 val |= 0xFFFFFFFFFFFF0000;
1949 } else if (size == 4) {
1950 if (val & __BIT(31))
1951 val |= 0xFFFFFFFF00000000;
1952 }
1953 return val;
1954 }
1955
1956 static int
1957 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1958 {
1959 const struct x86_opcode *opcode = instr->opcode;
1960 struct x86_store *store;
1961 uint8_t immsize;
1962 size_t sesize = 0;
1963
1964 /* The immediate is the source */
1965 store = &instr->src;
1966 immsize = instr->operand_size;
1967
1968 if (opcode->flags & FLAG_imm8) {
1969 sesize = immsize;
1970 immsize = 1;
1971 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1972 sesize = immsize;
1973 immsize = 4;
1974 }
1975
1976 store->type = STORE_IMM;
1977 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1978 return -1;
1979 }
1980 fsm_advance(fsm, immsize, NULL);
1981
1982 if (sesize != 0) {
1983 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1984 }
1985
1986 return 0;
1987 }
1988
1989 static int
1990 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1991 {
1992 const struct x86_opcode *opcode = instr->opcode;
1993 uint64_t data = 0;
1994 size_t n;
1995
1996 if (instr->strm->disp.type == DISP_1) {
1997 n = 1;
1998 } else { /* DISP4 */
1999 n = 4;
2000 }
2001
2002 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
2003 return -1;
2004 }
2005
2006 if (__predict_true(fsm->is64bit)) {
2007 data = sign_extend(data, n);
2008 }
2009
2010 instr->strm->disp.data = data;
2011
2012 if (opcode->immediate) {
2013 fsm_advance(fsm, n, node_immediate);
2014 } else {
2015 fsm_advance(fsm, n, NULL);
2016 }
2017
2018 return 0;
2019 }
2020
2021 static const struct x86_reg *
2022 get_register_idx(struct x86_instr *instr, uint8_t index)
2023 {
2024 uint8_t enc = index;
2025 const struct x86_reg *reg;
2026 size_t regsize;
2027
2028 regsize = instr->address_size;
2029 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
2030
2031 if (reg->num == -1) {
2032 reg = resolve_special_register(instr, enc, regsize);
2033 }
2034
2035 return reg;
2036 }
2037
2038 static const struct x86_reg *
2039 get_register_bas(struct x86_instr *instr, uint8_t base)
2040 {
2041 uint8_t enc = base;
2042 const struct x86_reg *reg;
2043 size_t regsize;
2044
2045 regsize = instr->address_size;
2046 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2047 if (reg->num == -1) {
2048 reg = resolve_special_register(instr, enc, regsize);
2049 }
2050
2051 return reg;
2052 }
2053
2054 static int
2055 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2056 {
2057 const struct x86_opcode *opcode;
2058 uint8_t scale, index, base;
2059 bool noindex, nobase;
2060 uint8_t byte;
2061
2062 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2063 return -1;
2064 }
2065
2066 scale = ((byte & 0b11000000) >> 6);
2067 index = ((byte & 0b00111000) >> 3);
2068 base = ((byte & 0b00000111) >> 0);
2069
2070 opcode = instr->opcode;
2071
2072 noindex = false;
2073 nobase = false;
2074
2075 if (index == 0b100 && !instr->rexpref.x) {
2076 /* Special case: the index is null */
2077 noindex = true;
2078 }
2079
2080 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2081 /* Special case: the base is null + disp32 */
2082 instr->strm->disp.type = DISP_4;
2083 nobase = true;
2084 }
2085
2086 instr->strm->type = STORE_SIB;
2087 instr->strm->u.sib.scale = (1 << scale);
2088 if (!noindex)
2089 instr->strm->u.sib.idx = get_register_idx(instr, index);
2090 if (!nobase)
2091 instr->strm->u.sib.bas = get_register_bas(instr, base);
2092
2093 /* May have a displacement, or an immediate */
2094 if (instr->strm->disp.type == DISP_1 || instr->strm->disp.type == DISP_4) {
2095 fsm_advance(fsm, 1, node_disp);
2096 } else if (opcode->immediate) {
2097 fsm_advance(fsm, 1, node_immediate);
2098 } else {
2099 fsm_advance(fsm, 1, NULL);
2100 }
2101
2102 return 0;
2103 }
2104
2105 static const struct x86_reg *
2106 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2107 {
2108 uint8_t enc = instr->regmodrm.reg;
2109 const struct x86_reg *reg;
2110 size_t regsize;
2111
2112 regsize = instr->operand_size;
2113
2114 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2115 if (reg->num == -1) {
2116 reg = resolve_special_register(instr, enc, regsize);
2117 }
2118
2119 return reg;
2120 }
2121
2122 static const struct x86_reg *
2123 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2124 {
2125 uint8_t enc = instr->regmodrm.rm;
2126 const struct x86_reg *reg;
2127 size_t regsize;
2128
2129 if (instr->strm->disp.type == DISP_NONE) {
2130 regsize = instr->operand_size;
2131 } else {
2132 /* Indirect access, the size is that of the address. */
2133 regsize = instr->address_size;
2134 }
2135
2136 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2137 if (reg->num == -1) {
2138 reg = resolve_special_register(instr, enc, regsize);
2139 }
2140
2141 return reg;
2142 }
2143
2144 static inline bool
2145 has_sib(struct x86_instr *instr)
2146 {
2147 return (instr->regmodrm.mod != 3 && instr->regmodrm.rm == 4);
2148 }
2149
2150 static inline bool
2151 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2152 {
2153 return (fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2154 instr->regmodrm.rm == RM_RBP_DISP32);
2155 }
2156
2157 static inline bool
2158 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2159 {
2160 return (!fsm->is64bit && instr->strm->disp.type == DISP_0 &&
2161 instr->regmodrm.rm == RM_RBP_DISP32);
2162 }
2163
2164 static enum x86_disp_type
2165 get_disp_type(struct x86_instr *instr)
2166 {
2167 switch (instr->regmodrm.mod) {
2168 case MOD_DIS0: /* indirect */
2169 return DISP_0;
2170 case MOD_DIS1: /* indirect+1 */
2171 return DISP_1;
2172 case MOD_DIS4: /* indirect+4 */
2173 return DISP_4;
2174 case MOD_REG: /* direct */
2175 default: /* gcc */
2176 return DISP_NONE;
2177 }
2178 }
2179
2180 static int
2181 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2182 {
2183 struct x86_store *strg, *strm;
2184 const struct x86_opcode *opcode;
2185 const struct x86_reg *reg;
2186 uint8_t byte;
2187
2188 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2189 return -1;
2190 }
2191
2192 opcode = instr->opcode;
2193
2194 instr->regmodrm.present = true;
2195 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2196 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2197 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2198
2199 if (opcode->regtorm) {
2200 strg = &instr->src;
2201 strm = &instr->dst;
2202 } else { /* RM to REG */
2203 strm = &instr->src;
2204 strg = &instr->dst;
2205 }
2206
2207 /* Save for later use. */
2208 instr->strm = strm;
2209
2210 /*
2211 * Special cases: Groups. The REG field of REGMODRM is the index in
2212 * the group. op1 gets overwritten in the Immediate node, if any.
2213 */
2214 if (opcode->group1) {
2215 if (group1[instr->regmodrm.reg].emul == NULL) {
2216 return -1;
2217 }
2218 instr->emul = group1[instr->regmodrm.reg].emul;
2219 } else if (opcode->group3) {
2220 if (group3[instr->regmodrm.reg].emul == NULL) {
2221 return -1;
2222 }
2223 instr->emul = group3[instr->regmodrm.reg].emul;
2224 } else if (opcode->group11) {
2225 if (group11[instr->regmodrm.reg].emul == NULL) {
2226 return -1;
2227 }
2228 instr->emul = group11[instr->regmodrm.reg].emul;
2229 }
2230
2231 if (!opcode->immediate) {
2232 reg = get_register_reg(instr, opcode);
2233 if (reg == NULL) {
2234 return -1;
2235 }
2236 strg->type = STORE_REG;
2237 strg->u.reg = reg;
2238 }
2239
2240 if (has_sib(instr)) {
2241 /* Overwrites RM */
2242 fsm_advance(fsm, 1, node_sib);
2243 return 0;
2244 }
2245
2246 /* The displacement applies to RM. */
2247 strm->disp.type = get_disp_type(instr);
2248
2249 if (is_rip_relative(fsm, instr)) {
2250 /* Overwrites RM */
2251 strm->type = STORE_REG;
2252 strm->u.reg = &gpr_map__rip;
2253 strm->disp.type = DISP_4;
2254 fsm_advance(fsm, 1, node_disp);
2255 return 0;
2256 }
2257
2258 if (is_disp32_only(fsm, instr)) {
2259 /* Overwrites RM */
2260 strm->type = STORE_REG;
2261 strm->u.reg = NULL;
2262 strm->disp.type = DISP_4;
2263 fsm_advance(fsm, 1, node_disp);
2264 return 0;
2265 }
2266
2267 reg = get_register_rm(instr, opcode);
2268 if (reg == NULL) {
2269 return -1;
2270 }
2271 strm->type = STORE_REG;
2272 strm->u.reg = reg;
2273
2274 if (strm->disp.type == DISP_NONE) {
2275 /* Direct register addressing mode */
2276 if (opcode->immediate) {
2277 fsm_advance(fsm, 1, node_immediate);
2278 } else {
2279 fsm_advance(fsm, 1, NULL);
2280 }
2281 } else if (strm->disp.type == DISP_0) {
2282 /* Indirect register addressing mode */
2283 if (opcode->immediate) {
2284 fsm_advance(fsm, 1, node_immediate);
2285 } else {
2286 fsm_advance(fsm, 1, NULL);
2287 }
2288 } else {
2289 fsm_advance(fsm, 1, node_disp);
2290 }
2291
2292 return 0;
2293 }
2294
2295 static size_t
2296 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2297 {
2298 const struct x86_opcode *opcode = instr->opcode;
2299 int opsize;
2300
2301 /* Get the opsize */
2302 if (!opcode->szoverride) {
2303 opsize = opcode->defsize;
2304 } else if (instr->rexpref.present && instr->rexpref.w) {
2305 opsize = 8;
2306 } else {
2307 if (!fsm->is16bit) {
2308 if (instr->legpref.opr_ovr) {
2309 opsize = 2;
2310 } else {
2311 opsize = 4;
2312 }
2313 } else { /* 16bit */
2314 if (instr->legpref.opr_ovr) {
2315 opsize = 4;
2316 } else {
2317 opsize = 2;
2318 }
2319 }
2320 }
2321
2322 /* See if available */
2323 if ((opcode->allsize & opsize) == 0) {
2324 // XXX do we care?
2325 }
2326
2327 return opsize;
2328 }
2329
2330 static size_t
2331 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2332 {
2333 if (fsm->is64bit) {
2334 if (__predict_false(instr->legpref.adr_ovr)) {
2335 return 4;
2336 }
2337 return 8;
2338 }
2339
2340 if (fsm->is32bit) {
2341 if (__predict_false(instr->legpref.adr_ovr)) {
2342 return 2;
2343 }
2344 return 4;
2345 }
2346
2347 /* 16bit. */
2348 if (__predict_false(instr->legpref.adr_ovr)) {
2349 return 4;
2350 }
2351 return 2;
2352 }
2353
2354 static int
2355 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2356 {
2357 const struct x86_opcode *opcode;
2358 uint8_t byte;
2359 size_t i, n;
2360
2361 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2362 return -1;
2363 }
2364
2365 n = sizeof(primary_opcode_table) / sizeof(primary_opcode_table[0]);
2366 for (i = 0; i < n; i++) {
2367 if (primary_opcode_table[i].byte == byte)
2368 break;
2369 }
2370 if (i == n) {
2371 return -1;
2372 }
2373 opcode = &primary_opcode_table[i];
2374
2375 instr->opcode = opcode;
2376 instr->emul = opcode->emul;
2377 instr->operand_size = get_operand_size(fsm, instr);
2378 instr->address_size = get_address_size(fsm, instr);
2379
2380 if (fsm->is64bit && (instr->operand_size == 4)) {
2381 /* Zero-extend to 64 bits. */
2382 instr->zeroextend_mask = ~size_to_mask(4);
2383 }
2384
2385 if (opcode->regmodrm) {
2386 fsm_advance(fsm, 1, node_regmodrm);
2387 } else if (opcode->dmo) {
2388 /* Direct-Memory Offsets */
2389 fsm_advance(fsm, 1, node_dmo);
2390 } else if (opcode->stos || opcode->lods) {
2391 fsm_advance(fsm, 1, node_stlo);
2392 } else if (opcode->movs) {
2393 fsm_advance(fsm, 1, node_movs);
2394 } else {
2395 return -1;
2396 }
2397
2398 return 0;
2399 }
2400
2401 static int
2402 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2403 {
2404 const struct x86_opcode *opcode;
2405 uint8_t byte;
2406 size_t i, n;
2407
2408 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2409 return -1;
2410 }
2411
2412 n = sizeof(secondary_opcode_table) / sizeof(secondary_opcode_table[0]);
2413 for (i = 0; i < n; i++) {
2414 if (secondary_opcode_table[i].byte == byte)
2415 break;
2416 }
2417 if (i == n) {
2418 return -1;
2419 }
2420 opcode = &secondary_opcode_table[i];
2421
2422 instr->opcode = opcode;
2423 instr->emul = opcode->emul;
2424 instr->operand_size = get_operand_size(fsm, instr);
2425 instr->address_size = get_address_size(fsm, instr);
2426
2427 if (fsm->is64bit && (instr->operand_size == 4)) {
2428 /* Zero-extend to 64 bits. */
2429 instr->zeroextend_mask = ~size_to_mask(4);
2430 }
2431
2432 if (opcode->flags & FLAG_ze) {
2433 /*
2434 * Compute the mask for zero-extend. Update the operand size,
2435 * we move fewer bytes.
2436 */
2437 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2438 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2439 instr->operand_size = opcode->defsize;
2440 }
2441
2442 if (opcode->regmodrm) {
2443 fsm_advance(fsm, 1, node_regmodrm);
2444 } else {
2445 return -1;
2446 }
2447
2448 return 0;
2449 }
2450
2451 static int
2452 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2453 {
2454 uint8_t byte;
2455
2456 #define ESCAPE 0x0F
2457 #define VEX_1 0xC5
2458 #define VEX_2 0xC4
2459 #define XOP 0x8F
2460
2461 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2462 return -1;
2463 }
2464
2465 /*
2466 * We don't take XOP. It is AMD-specific, and it was removed shortly
2467 * after being introduced.
2468 */
2469 if (byte == ESCAPE) {
2470 fsm_advance(fsm, 1, node_secondary_opcode);
2471 } else if (!instr->rexpref.present) {
2472 if (byte == VEX_1) {
2473 return -1;
2474 } else if (byte == VEX_2) {
2475 return -1;
2476 } else {
2477 fsm->fn = node_primary_opcode;
2478 }
2479 } else {
2480 fsm->fn = node_primary_opcode;
2481 }
2482
2483 return 0;
2484 }
2485
2486 static int
2487 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2488 {
2489 struct x86_rexpref *rexpref = &instr->rexpref;
2490 uint8_t byte;
2491 size_t n = 0;
2492
2493 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2494 return -1;
2495 }
2496
2497 if (byte >= 0x40 && byte <= 0x4F) {
2498 if (__predict_false(!fsm->is64bit)) {
2499 return -1;
2500 }
2501 rexpref->present = true;
2502 rexpref->w = ((byte & 0x8) != 0);
2503 rexpref->r = ((byte & 0x4) != 0);
2504 rexpref->x = ((byte & 0x2) != 0);
2505 rexpref->b = ((byte & 0x1) != 0);
2506 n = 1;
2507 }
2508
2509 fsm_advance(fsm, n, node_main);
2510 return 0;
2511 }
2512
2513 static int
2514 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2515 {
2516 uint8_t byte;
2517
2518 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2519 return -1;
2520 }
2521
2522 if (byte == LEG_OPR_OVR) {
2523 instr->legpref.opr_ovr = 1;
2524 } else if (byte == LEG_OVR_DS) {
2525 instr->legpref.seg = NVMM_X64_SEG_DS;
2526 } else if (byte == LEG_OVR_ES) {
2527 instr->legpref.seg = NVMM_X64_SEG_ES;
2528 } else if (byte == LEG_REP) {
2529 instr->legpref.rep = 1;
2530 } else if (byte == LEG_OVR_GS) {
2531 instr->legpref.seg = NVMM_X64_SEG_GS;
2532 } else if (byte == LEG_OVR_FS) {
2533 instr->legpref.seg = NVMM_X64_SEG_FS;
2534 } else if (byte == LEG_ADR_OVR) {
2535 instr->legpref.adr_ovr = 1;
2536 } else if (byte == LEG_OVR_CS) {
2537 instr->legpref.seg = NVMM_X64_SEG_CS;
2538 } else if (byte == LEG_OVR_SS) {
2539 instr->legpref.seg = NVMM_X64_SEG_SS;
2540 } else if (byte == LEG_REPN) {
2541 instr->legpref.repn = 1;
2542 } else if (byte == LEG_LOCK) {
2543 /* ignore */
2544 } else {
2545 /* not a legacy prefix */
2546 fsm_advance(fsm, 0, node_rex_prefix);
2547 return 0;
2548 }
2549
2550 fsm_advance(fsm, 1, node_legacy_prefix);
2551 return 0;
2552 }
2553
2554 static int
2555 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2556 struct nvmm_x64_state *state)
2557 {
2558 struct x86_decode_fsm fsm;
2559 int ret;
2560
2561 memset(instr, 0, sizeof(*instr));
2562 instr->legpref.seg = -1;
2563
2564 fsm.is64bit = is_64bit(state);
2565 fsm.is32bit = is_32bit(state);
2566 fsm.is16bit = is_16bit(state);
2567
2568 fsm.fn = node_legacy_prefix;
2569 fsm.buf = inst_bytes;
2570 fsm.end = inst_bytes + inst_len;
2571
2572 while (fsm.fn != NULL) {
2573 ret = (*fsm.fn)(&fsm, instr);
2574 if (ret == -1)
2575 return -1;
2576 }
2577
2578 instr->len = fsm.buf - inst_bytes;
2579
2580 return 0;
2581 }
2582
2583 /* -------------------------------------------------------------------------- */
2584
2585 #define EXEC_INSTR(sz, instr) \
2586 static uint##sz##_t \
2587 exec_##instr####sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags)\
2588 { \
2589 uint##sz##_t res; \
2590 __asm __volatile ( \
2591 #instr " %2, %3;" \
2592 "mov %3, %1;" \
2593 "pushfq;" \
2594 "popq %0" \
2595 : "=r" (*rflags), "=r" (res) \
2596 : "r" (op1), "r" (op2)); \
2597 return res; \
2598 }
2599
2600 #define EXEC_DISPATCHER(instr) \
2601 static uint64_t \
2602 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2603 { \
2604 switch (opsize) { \
2605 case 1: \
2606 return exec_##instr##8(op1, op2, rflags); \
2607 case 2: \
2608 return exec_##instr##16(op1, op2, rflags); \
2609 case 4: \
2610 return exec_##instr##32(op1, op2, rflags); \
2611 default: \
2612 return exec_##instr##64(op1, op2, rflags); \
2613 } \
2614 }
2615
2616 /* SUB: ret = op1 - op2 */
2617 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2618 EXEC_INSTR(8, sub)
2619 EXEC_INSTR(16, sub)
2620 EXEC_INSTR(32, sub)
2621 EXEC_INSTR(64, sub)
2622 EXEC_DISPATCHER(sub)
2623
2624 /* OR: ret = op1 | op2 */
2625 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2626 EXEC_INSTR(8, or)
2627 EXEC_INSTR(16, or)
2628 EXEC_INSTR(32, or)
2629 EXEC_INSTR(64, or)
2630 EXEC_DISPATCHER(or)
2631
2632 /* AND: ret = op1 & op2 */
2633 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2634 EXEC_INSTR(8, and)
2635 EXEC_INSTR(16, and)
2636 EXEC_INSTR(32, and)
2637 EXEC_INSTR(64, and)
2638 EXEC_DISPATCHER(and)
2639
2640 /* XOR: ret = op1 ^ op2 */
2641 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2642 EXEC_INSTR(8, xor)
2643 EXEC_INSTR(16, xor)
2644 EXEC_INSTR(32, xor)
2645 EXEC_INSTR(64, xor)
2646 EXEC_DISPATCHER(xor)
2647
2648 /* -------------------------------------------------------------------------- */
2649
2650 /*
2651 * Emulation functions. We don't care about the order of the operands, except
2652 * for SUB, CMP and TEST. For these ones we look at mem->write todetermine who
2653 * is op1 and who is op2.
2654 */
2655
2656 static void
2657 x86_func_or(struct nvmm_mem *mem, uint64_t *gprs)
2658 {
2659 uint64_t *retval = (uint64_t *)mem->data;
2660 const bool write = mem->write;
2661 uint64_t *op1, op2, fl, ret;
2662
2663 op1 = (uint64_t *)mem->data;
2664 op2 = 0;
2665
2666 /* Fetch the value to be OR'ed (op2). */
2667 mem->data = (uint8_t *)&op2;
2668 mem->write = false;
2669 (*__callbacks.mem)(mem);
2670
2671 /* Perform the OR. */
2672 ret = exec_or(*op1, op2, &fl, mem->size);
2673
2674 if (write) {
2675 /* Write back the result. */
2676 mem->data = (uint8_t *)&ret;
2677 mem->write = true;
2678 (*__callbacks.mem)(mem);
2679 } else {
2680 /* Return data to the caller. */
2681 *retval = ret;
2682 }
2683
2684 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2685 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2686 }
2687
2688 static void
2689 x86_func_and(struct nvmm_mem *mem, uint64_t *gprs)
2690 {
2691 uint64_t *retval = (uint64_t *)mem->data;
2692 const bool write = mem->write;
2693 uint64_t *op1, op2, fl, ret;
2694
2695 op1 = (uint64_t *)mem->data;
2696 op2 = 0;
2697
2698 /* Fetch the value to be AND'ed (op2). */
2699 mem->data = (uint8_t *)&op2;
2700 mem->write = false;
2701 (*__callbacks.mem)(mem);
2702
2703 /* Perform the AND. */
2704 ret = exec_and(*op1, op2, &fl, mem->size);
2705
2706 if (write) {
2707 /* Write back the result. */
2708 mem->data = (uint8_t *)&ret;
2709 mem->write = true;
2710 (*__callbacks.mem)(mem);
2711 } else {
2712 /* Return data to the caller. */
2713 *retval = ret;
2714 }
2715
2716 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2717 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2718 }
2719
2720 static void
2721 x86_func_sub(struct nvmm_mem *mem, uint64_t *gprs)
2722 {
2723 uint64_t *retval = (uint64_t *)mem->data;
2724 const bool write = mem->write;
2725 uint64_t *op1, *op2, fl, ret;
2726 uint64_t tmp;
2727 bool memop1;
2728
2729 memop1 = !mem->write;
2730 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2731 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2732
2733 /* Fetch the value to be SUB'ed (op1 or op2). */
2734 mem->data = (uint8_t *)&tmp;
2735 mem->write = false;
2736 (*__callbacks.mem)(mem);
2737
2738 /* Perform the SUB. */
2739 ret = exec_sub(*op1, *op2, &fl, mem->size);
2740
2741 if (write) {
2742 /* Write back the result. */
2743 mem->data = (uint8_t *)&ret;
2744 mem->write = true;
2745 (*__callbacks.mem)(mem);
2746 } else {
2747 /* Return data to the caller. */
2748 *retval = ret;
2749 }
2750
2751 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2752 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2753 }
2754
2755 static void
2756 x86_func_xor(struct nvmm_mem *mem, uint64_t *gprs)
2757 {
2758 uint64_t *retval = (uint64_t *)mem->data;
2759 const bool write = mem->write;
2760 uint64_t *op1, op2, fl, ret;
2761
2762 op1 = (uint64_t *)mem->data;
2763 op2 = 0;
2764
2765 /* Fetch the value to be XOR'ed (op2). */
2766 mem->data = (uint8_t *)&op2;
2767 mem->write = false;
2768 (*__callbacks.mem)(mem);
2769
2770 /* Perform the XOR. */
2771 ret = exec_xor(*op1, op2, &fl, mem->size);
2772
2773 if (write) {
2774 /* Write back the result. */
2775 mem->data = (uint8_t *)&ret;
2776 mem->write = true;
2777 (*__callbacks.mem)(mem);
2778 } else {
2779 /* Return data to the caller. */
2780 *retval = ret;
2781 }
2782
2783 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2784 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2785 }
2786
2787 static void
2788 x86_func_cmp(struct nvmm_mem *mem, uint64_t *gprs)
2789 {
2790 uint64_t *op1, *op2, fl;
2791 uint64_t tmp;
2792 bool memop1;
2793
2794 memop1 = !mem->write;
2795 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2796 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2797
2798 /* Fetch the value to be CMP'ed (op1 or op2). */
2799 mem->data = (uint8_t *)&tmp;
2800 mem->write = false;
2801 (*__callbacks.mem)(mem);
2802
2803 /* Perform the CMP. */
2804 exec_sub(*op1, *op2, &fl, mem->size);
2805
2806 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2807 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2808 }
2809
2810 static void
2811 x86_func_test(struct nvmm_mem *mem, uint64_t *gprs)
2812 {
2813 uint64_t *op1, *op2, fl;
2814 uint64_t tmp;
2815 bool memop1;
2816
2817 memop1 = !mem->write;
2818 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2819 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2820
2821 /* Fetch the value to be TEST'ed (op1 or op2). */
2822 mem->data = (uint8_t *)&tmp;
2823 mem->write = false;
2824 (*__callbacks.mem)(mem);
2825
2826 /* Perform the TEST. */
2827 exec_and(*op1, *op2, &fl, mem->size);
2828
2829 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2830 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2831 }
2832
2833 static void
2834 x86_func_mov(struct nvmm_mem *mem, uint64_t *gprs)
2835 {
2836 /*
2837 * Nothing special, just move without emulation.
2838 */
2839 (*__callbacks.mem)(mem);
2840 }
2841
2842 static void
2843 x86_func_stos(struct nvmm_mem *mem, uint64_t *gprs)
2844 {
2845 /*
2846 * Just move, and update RDI.
2847 */
2848 (*__callbacks.mem)(mem);
2849
2850 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2851 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2852 } else {
2853 gprs[NVMM_X64_GPR_RDI] += mem->size;
2854 }
2855 }
2856
2857 static void
2858 x86_func_lods(struct nvmm_mem *mem, uint64_t *gprs)
2859 {
2860 /*
2861 * Just move, and update RSI.
2862 */
2863 (*__callbacks.mem)(mem);
2864
2865 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2866 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2867 } else {
2868 gprs[NVMM_X64_GPR_RSI] += mem->size;
2869 }
2870 }
2871
2872 static void
2873 x86_func_movs(struct nvmm_mem *mem, uint64_t *gprs)
2874 {
2875 /*
2876 * Special instruction: double memory operand. Don't call the cb,
2877 * because the storage has already been performed earlier.
2878 */
2879
2880 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2881 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2882 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2883 } else {
2884 gprs[NVMM_X64_GPR_RSI] += mem->size;
2885 gprs[NVMM_X64_GPR_RDI] += mem->size;
2886 }
2887 }
2888
2889 /* -------------------------------------------------------------------------- */
2890
2891 static inline uint64_t
2892 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2893 {
2894 uint64_t val;
2895
2896 val = state->gprs[gpr];
2897 val &= size_to_mask(instr->address_size);
2898
2899 return val;
2900 }
2901
2902 static int
2903 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2904 struct x86_store *store, gvaddr_t *gvap, size_t size)
2905 {
2906 struct x86_sib *sib;
2907 gvaddr_t gva = 0;
2908 uint64_t reg;
2909 int ret, seg;
2910
2911 if (store->type == STORE_SIB) {
2912 sib = &store->u.sib;
2913 if (sib->bas != NULL)
2914 gva += gpr_read_address(instr, state, sib->bas->num);
2915 if (sib->idx != NULL) {
2916 reg = gpr_read_address(instr, state, sib->idx->num);
2917 gva += sib->scale * reg;
2918 }
2919 } else if (store->type == STORE_REG) {
2920 if (store->u.reg == NULL) {
2921 /* The base is null. Happens with disp32-only. */
2922 } else {
2923 gva = gpr_read_address(instr, state, store->u.reg->num);
2924 }
2925 } else {
2926 gva = store->u.dmo;
2927 }
2928
2929 if (store->disp.type != DISP_NONE) {
2930 gva += store->disp.data;
2931 }
2932
2933 if (store->hardseg != 0) {
2934 seg = store->hardseg;
2935 } else {
2936 if (__predict_false(instr->legpref.seg != -1)) {
2937 seg = instr->legpref.seg;
2938 } else {
2939 seg = NVMM_X64_SEG_DS;
2940 }
2941 }
2942
2943 if (__predict_true(is_long_mode(state))) {
2944 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
2945 segment_apply(&state->segs[seg], &gva);
2946 }
2947 } else {
2948 ret = segment_check(&state->segs[seg], gva, size);
2949 if (ret == -1)
2950 return -1;
2951 segment_apply(&state->segs[seg], &gva);
2952 }
2953
2954 *gvap = gva;
2955 return 0;
2956 }
2957
2958 static int
2959 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
2960 {
2961 uint8_t inst_bytes[15], byte;
2962 size_t i, fetchsize;
2963 gvaddr_t gva;
2964 int ret, seg;
2965
2966 fetchsize = sizeof(inst_bytes);
2967
2968 gva = state->gprs[NVMM_X64_GPR_RIP];
2969 if (__predict_false(!is_long_mode(state))) {
2970 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
2971 fetchsize);
2972 if (ret == -1)
2973 return -1;
2974 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
2975 }
2976
2977 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
2978 if (ret == -1)
2979 return -1;
2980
2981 seg = NVMM_X64_SEG_DS;
2982 for (i = 0; i < fetchsize; i++) {
2983 byte = inst_bytes[i];
2984
2985 if (byte == LEG_OVR_DS) {
2986 seg = NVMM_X64_SEG_DS;
2987 } else if (byte == LEG_OVR_ES) {
2988 seg = NVMM_X64_SEG_ES;
2989 } else if (byte == LEG_OVR_GS) {
2990 seg = NVMM_X64_SEG_GS;
2991 } else if (byte == LEG_OVR_FS) {
2992 seg = NVMM_X64_SEG_FS;
2993 } else if (byte == LEG_OVR_CS) {
2994 seg = NVMM_X64_SEG_CS;
2995 } else if (byte == LEG_OVR_SS) {
2996 seg = NVMM_X64_SEG_SS;
2997 } else if (byte == LEG_OPR_OVR) {
2998 /* nothing */
2999 } else if (byte == LEG_ADR_OVR) {
3000 /* nothing */
3001 } else if (byte == LEG_REP) {
3002 /* nothing */
3003 } else if (byte == LEG_REPN) {
3004 /* nothing */
3005 } else if (byte == LEG_LOCK) {
3006 /* nothing */
3007 } else {
3008 return seg;
3009 }
3010 }
3011
3012 return seg;
3013 }
3014
3015 static int
3016 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3017 struct nvmm_exit *exit)
3018 {
3019 size_t fetchsize;
3020 gvaddr_t gva;
3021 int ret;
3022
3023 fetchsize = sizeof(exit->u.mem.inst_bytes);
3024
3025 gva = state->gprs[NVMM_X64_GPR_RIP];
3026 if (__predict_false(!is_long_mode(state))) {
3027 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3028 fetchsize);
3029 if (ret == -1)
3030 return -1;
3031 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3032 }
3033
3034 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
3035 fetchsize);
3036 if (ret == -1)
3037 return -1;
3038
3039 exit->u.mem.inst_len = fetchsize;
3040
3041 return 0;
3042 }
3043
3044 static int
3045 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3046 struct x86_instr *instr)
3047 {
3048 struct nvmm_mem mem;
3049 uint8_t data[8];
3050 gvaddr_t gva;
3051 size_t size;
3052 int ret;
3053
3054 size = instr->operand_size;
3055
3056 /* Source. */
3057 ret = store_to_gva(state, instr, &instr->src, &gva, size);
3058 if (ret == -1)
3059 return -1;
3060 ret = read_guest_memory(mach, state, gva, data, size);
3061 if (ret == -1)
3062 return -1;
3063
3064 /* Destination. */
3065 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3066 if (ret == -1)
3067 return -1;
3068 ret = write_guest_memory(mach, state, gva, data, size);
3069 if (ret == -1)
3070 return -1;
3071
3072 mem.size = size;
3073 (*instr->emul->func)(&mem, state->gprs);
3074
3075 return 0;
3076 }
3077
3078 #define DISASSEMBLER_BUG() \
3079 do { \
3080 errno = EINVAL; \
3081 return -1; \
3082 } while (0);
3083
3084 static int
3085 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3086 struct x86_instr *instr, struct nvmm_exit *exit)
3087 {
3088 struct nvmm_mem mem;
3089 uint8_t membuf[8];
3090 uint64_t val;
3091
3092 memset(membuf, 0, sizeof(membuf));
3093
3094 mem.gpa = exit->u.mem.gpa;
3095 mem.size = instr->operand_size;
3096 mem.data = membuf;
3097
3098 /* Determine the direction. */
3099 switch (instr->src.type) {
3100 case STORE_REG:
3101 if (instr->src.disp.type != DISP_NONE) {
3102 /* Indirect access. */
3103 mem.write = false;
3104 } else {
3105 /* Direct access. */
3106 mem.write = true;
3107 }
3108 break;
3109 case STORE_IMM:
3110 mem.write = true;
3111 break;
3112 case STORE_SIB:
3113 mem.write = false;
3114 break;
3115 case STORE_DMO:
3116 mem.write = false;
3117 break;
3118 default:
3119 DISASSEMBLER_BUG();
3120 }
3121
3122 if (mem.write) {
3123 switch (instr->src.type) {
3124 case STORE_REG:
3125 if (instr->src.disp.type != DISP_NONE) {
3126 DISASSEMBLER_BUG();
3127 }
3128 val = state->gprs[instr->src.u.reg->num];
3129 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3130 memcpy(mem.data, &val, mem.size);
3131 break;
3132 case STORE_IMM:
3133 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3134 break;
3135 default:
3136 DISASSEMBLER_BUG();
3137 }
3138 } else if (instr->emul->read) {
3139 if (instr->dst.type != STORE_REG) {
3140 DISASSEMBLER_BUG();
3141 }
3142 if (instr->dst.disp.type != DISP_NONE) {
3143 DISASSEMBLER_BUG();
3144 }
3145 val = state->gprs[instr->dst.u.reg->num];
3146 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3147 memcpy(mem.data, &val, mem.size);
3148 }
3149
3150 (*instr->emul->func)(&mem, state->gprs);
3151
3152 if (!instr->emul->notouch && !mem.write) {
3153 if (instr->dst.type != STORE_REG) {
3154 DISASSEMBLER_BUG();
3155 }
3156 memcpy(&val, membuf, sizeof(uint64_t));
3157 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3158 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3159 state->gprs[instr->dst.u.reg->num] |= val;
3160 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3161 }
3162
3163 return 0;
3164 }
3165
3166 int
3167 nvmm_assist_mem(struct nvmm_machine *mach, nvmm_cpuid_t cpuid,
3168 struct nvmm_exit *exit)
3169 {
3170 struct nvmm_x64_state state;
3171 struct x86_instr instr;
3172 uint64_t cnt = 0; /* GCC */
3173 int ret;
3174
3175 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3176 errno = EINVAL;
3177 return -1;
3178 }
3179
3180 ret = nvmm_vcpu_getstate(mach, cpuid, &state,
3181 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3182 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3183 if (ret == -1)
3184 return -1;
3185
3186 if (exit->u.mem.inst_len == 0) {
3187 /*
3188 * The instruction was not fetched from the kernel. Fetch
3189 * it ourselves.
3190 */
3191 ret = fetch_instruction(mach, &state, exit);
3192 if (ret == -1)
3193 return -1;
3194 }
3195
3196 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3197 &instr, &state);
3198 if (ret == -1) {
3199 errno = ENODEV;
3200 return -1;
3201 }
3202
3203 if (instr.legpref.rep || instr.legpref.repn) {
3204 cnt = rep_get_cnt(&state, instr.address_size);
3205 if (__predict_false(cnt == 0)) {
3206 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3207 goto out;
3208 }
3209 }
3210
3211 if (instr.opcode->movs) {
3212 ret = assist_mem_double(mach, &state, &instr);
3213 } else {
3214 ret = assist_mem_single(mach, &state, &instr, exit);
3215 }
3216 if (ret == -1) {
3217 errno = ENODEV;
3218 return -1;
3219 }
3220
3221 if (instr.legpref.rep || instr.legpref.repn) {
3222 cnt -= 1;
3223 rep_set_cnt(&state, instr.address_size, cnt);
3224 if (cnt == 0) {
3225 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3226 } else if (__predict_false(instr.legpref.repn)) {
3227 if (state.gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3228 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3229 }
3230 }
3231 } else {
3232 state.gprs[NVMM_X64_GPR_RIP] += instr.len;
3233 }
3234
3235 out:
3236 ret = nvmm_vcpu_setstate(mach, cpuid, &state, NVMM_X64_STATE_GPRS);
3237 if (ret == -1)
3238 return -1;
3239
3240 return 0;
3241 }
3242