libnvmm_x86.c revision 1.33 1 /* $NetBSD: libnvmm_x86.c,v 1.33 2019/10/14 10:39:24 maxv Exp $ */
2
3 /*
4 * Copyright (c) 2018-2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Maxime Villard.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <errno.h>
40 #include <sys/ioctl.h>
41 #include <sys/mman.h>
42 #include <machine/vmparam.h>
43 #include <machine/pte.h>
44 #include <machine/psl.h>
45
46 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
47 #define __cacheline_aligned __attribute__((__aligned__(64)))
48
49 #include <x86/specialreg.h>
50
51 /* -------------------------------------------------------------------------- */
52
53 /*
54 * Undocumented debugging function. Helpful.
55 */
56 int
57 nvmm_vcpu_dump(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
58 {
59 struct nvmm_x64_state *state = vcpu->state;
60 uint16_t *attr;
61 size_t i;
62 int ret;
63
64 const char *segnames[] = {
65 "ES", "CS", "SS", "DS", "FS", "GS", "GDT", "IDT", "LDT", "TR"
66 };
67
68 ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_ALL);
69 if (ret == -1)
70 return -1;
71
72 printf("+ VCPU id=%d\n", (int)vcpu->cpuid);
73 printf("| -> RIP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RIP]);
74 printf("| -> RSP=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RSP]);
75 printf("| -> RAX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RAX]);
76 printf("| -> RBX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RBX]);
77 printf("| -> RCX=%"PRIx64"\n", state->gprs[NVMM_X64_GPR_RCX]);
78 printf("| -> RFLAGS=%p\n", (void *)state->gprs[NVMM_X64_GPR_RFLAGS]);
79 for (i = 0; i < NVMM_X64_NSEG; i++) {
80 attr = (uint16_t *)&state->segs[i].attrib;
81 printf("| -> %s: sel=0x%x base=%"PRIx64", limit=%x, attrib=%x\n",
82 segnames[i],
83 state->segs[i].selector,
84 state->segs[i].base,
85 state->segs[i].limit,
86 *attr);
87 }
88 printf("| -> MSR_EFER=%"PRIx64"\n", state->msrs[NVMM_X64_MSR_EFER]);
89 printf("| -> CR0=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR0]);
90 printf("| -> CR3=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR3]);
91 printf("| -> CR4=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR4]);
92 printf("| -> CR8=%"PRIx64"\n", state->crs[NVMM_X64_CR_CR8]);
93
94 return 0;
95 }
96
97 /* -------------------------------------------------------------------------- */
98
99 #define PTE32_L1_SHIFT 12
100 #define PTE32_L2_SHIFT 22
101
102 #define PTE32_L2_MASK 0xffc00000
103 #define PTE32_L1_MASK 0x003ff000
104
105 #define PTE32_L2_FRAME (PTE32_L2_MASK)
106 #define PTE32_L1_FRAME (PTE32_L2_FRAME|PTE32_L1_MASK)
107
108 #define pte32_l1idx(va) (((va) & PTE32_L1_MASK) >> PTE32_L1_SHIFT)
109 #define pte32_l2idx(va) (((va) & PTE32_L2_MASK) >> PTE32_L2_SHIFT)
110
111 #define CR3_FRAME_32BIT PG_FRAME
112
113 typedef uint32_t pte_32bit_t;
114
115 static int
116 x86_gva_to_gpa_32bit(struct nvmm_machine *mach, uint64_t cr3,
117 gvaddr_t gva, gpaddr_t *gpa, bool has_pse, nvmm_prot_t *prot)
118 {
119 gpaddr_t L2gpa, L1gpa;
120 uintptr_t L2hva, L1hva;
121 pte_32bit_t *pdir, pte;
122 nvmm_prot_t pageprot;
123
124 /* We begin with an RWXU access. */
125 *prot = NVMM_PROT_ALL;
126
127 /* Parse L2. */
128 L2gpa = (cr3 & CR3_FRAME_32BIT);
129 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
130 return -1;
131 pdir = (pte_32bit_t *)L2hva;
132 pte = pdir[pte32_l2idx(gva)];
133 if ((pte & PG_V) == 0)
134 return -1;
135 if ((pte & PG_u) == 0)
136 *prot &= ~NVMM_PROT_USER;
137 if ((pte & PG_KW) == 0)
138 *prot &= ~NVMM_PROT_WRITE;
139 if ((pte & PG_PS) && !has_pse)
140 return -1;
141 if (pte & PG_PS) {
142 *gpa = (pte & PTE32_L2_FRAME);
143 *gpa = *gpa + (gva & PTE32_L1_MASK);
144 return 0;
145 }
146
147 /* Parse L1. */
148 L1gpa = (pte & PG_FRAME);
149 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
150 return -1;
151 pdir = (pte_32bit_t *)L1hva;
152 pte = pdir[pte32_l1idx(gva)];
153 if ((pte & PG_V) == 0)
154 return -1;
155 if ((pte & PG_u) == 0)
156 *prot &= ~NVMM_PROT_USER;
157 if ((pte & PG_KW) == 0)
158 *prot &= ~NVMM_PROT_WRITE;
159 if (pte & PG_PS)
160 return -1;
161
162 *gpa = (pte & PG_FRAME);
163 return 0;
164 }
165
166 /* -------------------------------------------------------------------------- */
167
168 #define PTE32_PAE_L1_SHIFT 12
169 #define PTE32_PAE_L2_SHIFT 21
170 #define PTE32_PAE_L3_SHIFT 30
171
172 #define PTE32_PAE_L3_MASK 0xc0000000
173 #define PTE32_PAE_L2_MASK 0x3fe00000
174 #define PTE32_PAE_L1_MASK 0x001ff000
175
176 #define PTE32_PAE_L3_FRAME (PTE32_PAE_L3_MASK)
177 #define PTE32_PAE_L2_FRAME (PTE32_PAE_L3_FRAME|PTE32_PAE_L2_MASK)
178 #define PTE32_PAE_L1_FRAME (PTE32_PAE_L2_FRAME|PTE32_PAE_L1_MASK)
179
180 #define pte32_pae_l1idx(va) (((va) & PTE32_PAE_L1_MASK) >> PTE32_PAE_L1_SHIFT)
181 #define pte32_pae_l2idx(va) (((va) & PTE32_PAE_L2_MASK) >> PTE32_PAE_L2_SHIFT)
182 #define pte32_pae_l3idx(va) (((va) & PTE32_PAE_L3_MASK) >> PTE32_PAE_L3_SHIFT)
183
184 #define CR3_FRAME_32BIT_PAE __BITS(31, 5)
185
186 typedef uint64_t pte_32bit_pae_t;
187
188 static int
189 x86_gva_to_gpa_32bit_pae(struct nvmm_machine *mach, uint64_t cr3,
190 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
191 {
192 gpaddr_t L3gpa, L2gpa, L1gpa;
193 uintptr_t L3hva, L2hva, L1hva;
194 pte_32bit_pae_t *pdir, pte;
195 nvmm_prot_t pageprot;
196
197 /* We begin with an RWXU access. */
198 *prot = NVMM_PROT_ALL;
199
200 /* Parse L3. */
201 L3gpa = (cr3 & CR3_FRAME_32BIT_PAE);
202 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
203 return -1;
204 pdir = (pte_32bit_pae_t *)L3hva;
205 pte = pdir[pte32_pae_l3idx(gva)];
206 if ((pte & PG_V) == 0)
207 return -1;
208 if (pte & PG_NX)
209 *prot &= ~NVMM_PROT_EXEC;
210 if (pte & PG_PS)
211 return -1;
212
213 /* Parse L2. */
214 L2gpa = (pte & PG_FRAME);
215 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
216 return -1;
217 pdir = (pte_32bit_pae_t *)L2hva;
218 pte = pdir[pte32_pae_l2idx(gva)];
219 if ((pte & PG_V) == 0)
220 return -1;
221 if ((pte & PG_u) == 0)
222 *prot &= ~NVMM_PROT_USER;
223 if ((pte & PG_KW) == 0)
224 *prot &= ~NVMM_PROT_WRITE;
225 if (pte & PG_NX)
226 *prot &= ~NVMM_PROT_EXEC;
227 if (pte & PG_PS) {
228 *gpa = (pte & PTE32_PAE_L2_FRAME);
229 *gpa = *gpa + (gva & PTE32_PAE_L1_MASK);
230 return 0;
231 }
232
233 /* Parse L1. */
234 L1gpa = (pte & PG_FRAME);
235 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
236 return -1;
237 pdir = (pte_32bit_pae_t *)L1hva;
238 pte = pdir[pte32_pae_l1idx(gva)];
239 if ((pte & PG_V) == 0)
240 return -1;
241 if ((pte & PG_u) == 0)
242 *prot &= ~NVMM_PROT_USER;
243 if ((pte & PG_KW) == 0)
244 *prot &= ~NVMM_PROT_WRITE;
245 if (pte & PG_NX)
246 *prot &= ~NVMM_PROT_EXEC;
247 if (pte & PG_PS)
248 return -1;
249
250 *gpa = (pte & PG_FRAME);
251 return 0;
252 }
253
254 /* -------------------------------------------------------------------------- */
255
256 #define PTE64_L1_SHIFT 12
257 #define PTE64_L2_SHIFT 21
258 #define PTE64_L3_SHIFT 30
259 #define PTE64_L4_SHIFT 39
260
261 #define PTE64_L4_MASK 0x0000ff8000000000
262 #define PTE64_L3_MASK 0x0000007fc0000000
263 #define PTE64_L2_MASK 0x000000003fe00000
264 #define PTE64_L1_MASK 0x00000000001ff000
265
266 #define PTE64_L4_FRAME PTE64_L4_MASK
267 #define PTE64_L3_FRAME (PTE64_L4_FRAME|PTE64_L3_MASK)
268 #define PTE64_L2_FRAME (PTE64_L3_FRAME|PTE64_L2_MASK)
269 #define PTE64_L1_FRAME (PTE64_L2_FRAME|PTE64_L1_MASK)
270
271 #define pte64_l1idx(va) (((va) & PTE64_L1_MASK) >> PTE64_L1_SHIFT)
272 #define pte64_l2idx(va) (((va) & PTE64_L2_MASK) >> PTE64_L2_SHIFT)
273 #define pte64_l3idx(va) (((va) & PTE64_L3_MASK) >> PTE64_L3_SHIFT)
274 #define pte64_l4idx(va) (((va) & PTE64_L4_MASK) >> PTE64_L4_SHIFT)
275
276 #define CR3_FRAME_64BIT PG_FRAME
277
278 typedef uint64_t pte_64bit_t;
279
280 static inline bool
281 x86_gva_64bit_canonical(gvaddr_t gva)
282 {
283 /* Bits 63:47 must have the same value. */
284 #define SIGN_EXTEND 0xffff800000000000ULL
285 return (gva & SIGN_EXTEND) == 0 || (gva & SIGN_EXTEND) == SIGN_EXTEND;
286 }
287
288 static int
289 x86_gva_to_gpa_64bit(struct nvmm_machine *mach, uint64_t cr3,
290 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
291 {
292 gpaddr_t L4gpa, L3gpa, L2gpa, L1gpa;
293 uintptr_t L4hva, L3hva, L2hva, L1hva;
294 pte_64bit_t *pdir, pte;
295 nvmm_prot_t pageprot;
296
297 /* We begin with an RWXU access. */
298 *prot = NVMM_PROT_ALL;
299
300 if (!x86_gva_64bit_canonical(gva))
301 return -1;
302
303 /* Parse L4. */
304 L4gpa = (cr3 & CR3_FRAME_64BIT);
305 if (nvmm_gpa_to_hva(mach, L4gpa, &L4hva, &pageprot) == -1)
306 return -1;
307 pdir = (pte_64bit_t *)L4hva;
308 pte = pdir[pte64_l4idx(gva)];
309 if ((pte & PG_V) == 0)
310 return -1;
311 if ((pte & PG_u) == 0)
312 *prot &= ~NVMM_PROT_USER;
313 if ((pte & PG_KW) == 0)
314 *prot &= ~NVMM_PROT_WRITE;
315 if (pte & PG_NX)
316 *prot &= ~NVMM_PROT_EXEC;
317 if (pte & PG_PS)
318 return -1;
319
320 /* Parse L3. */
321 L3gpa = (pte & PG_FRAME);
322 if (nvmm_gpa_to_hva(mach, L3gpa, &L3hva, &pageprot) == -1)
323 return -1;
324 pdir = (pte_64bit_t *)L3hva;
325 pte = pdir[pte64_l3idx(gva)];
326 if ((pte & PG_V) == 0)
327 return -1;
328 if ((pte & PG_u) == 0)
329 *prot &= ~NVMM_PROT_USER;
330 if ((pte & PG_KW) == 0)
331 *prot &= ~NVMM_PROT_WRITE;
332 if (pte & PG_NX)
333 *prot &= ~NVMM_PROT_EXEC;
334 if (pte & PG_PS) {
335 *gpa = (pte & PTE64_L3_FRAME);
336 *gpa = *gpa + (gva & (PTE64_L2_MASK|PTE64_L1_MASK));
337 return 0;
338 }
339
340 /* Parse L2. */
341 L2gpa = (pte & PG_FRAME);
342 if (nvmm_gpa_to_hva(mach, L2gpa, &L2hva, &pageprot) == -1)
343 return -1;
344 pdir = (pte_64bit_t *)L2hva;
345 pte = pdir[pte64_l2idx(gva)];
346 if ((pte & PG_V) == 0)
347 return -1;
348 if ((pte & PG_u) == 0)
349 *prot &= ~NVMM_PROT_USER;
350 if ((pte & PG_KW) == 0)
351 *prot &= ~NVMM_PROT_WRITE;
352 if (pte & PG_NX)
353 *prot &= ~NVMM_PROT_EXEC;
354 if (pte & PG_PS) {
355 *gpa = (pte & PTE64_L2_FRAME);
356 *gpa = *gpa + (gva & PTE64_L1_MASK);
357 return 0;
358 }
359
360 /* Parse L1. */
361 L1gpa = (pte & PG_FRAME);
362 if (nvmm_gpa_to_hva(mach, L1gpa, &L1hva, &pageprot) == -1)
363 return -1;
364 pdir = (pte_64bit_t *)L1hva;
365 pte = pdir[pte64_l1idx(gva)];
366 if ((pte & PG_V) == 0)
367 return -1;
368 if ((pte & PG_u) == 0)
369 *prot &= ~NVMM_PROT_USER;
370 if ((pte & PG_KW) == 0)
371 *prot &= ~NVMM_PROT_WRITE;
372 if (pte & PG_NX)
373 *prot &= ~NVMM_PROT_EXEC;
374 if (pte & PG_PS)
375 return -1;
376
377 *gpa = (pte & PG_FRAME);
378 return 0;
379 }
380
381 static inline int
382 x86_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_x64_state *state,
383 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
384 {
385 bool is_pae, is_lng, has_pse;
386 uint64_t cr3;
387 size_t off;
388 int ret;
389
390 if ((state->crs[NVMM_X64_CR_CR0] & CR0_PG) == 0) {
391 /* No paging. */
392 *prot = NVMM_PROT_ALL;
393 *gpa = gva;
394 return 0;
395 }
396
397 off = (gva & PAGE_MASK);
398 gva &= ~PAGE_MASK;
399
400 is_pae = (state->crs[NVMM_X64_CR_CR4] & CR4_PAE) != 0;
401 is_lng = (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
402 has_pse = (state->crs[NVMM_X64_CR_CR4] & CR4_PSE) != 0;
403 cr3 = state->crs[NVMM_X64_CR_CR3];
404
405 if (is_pae && is_lng) {
406 /* 64bit */
407 ret = x86_gva_to_gpa_64bit(mach, cr3, gva, gpa, prot);
408 } else if (is_pae && !is_lng) {
409 /* 32bit PAE */
410 ret = x86_gva_to_gpa_32bit_pae(mach, cr3, gva, gpa, prot);
411 } else if (!is_pae && !is_lng) {
412 /* 32bit */
413 ret = x86_gva_to_gpa_32bit(mach, cr3, gva, gpa, has_pse, prot);
414 } else {
415 ret = -1;
416 }
417
418 if (ret == -1) {
419 errno = EFAULT;
420 }
421
422 *gpa = *gpa + off;
423
424 return ret;
425 }
426
427 int
428 nvmm_gva_to_gpa(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu,
429 gvaddr_t gva, gpaddr_t *gpa, nvmm_prot_t *prot)
430 {
431 struct nvmm_x64_state *state = vcpu->state;
432 int ret;
433
434 ret = nvmm_vcpu_getstate(mach, vcpu,
435 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
436 if (ret == -1)
437 return -1;
438
439 return x86_gva_to_gpa(mach, state, gva, gpa, prot);
440 }
441
442 /* -------------------------------------------------------------------------- */
443
444 #define DISASSEMBLER_BUG() \
445 do { \
446 errno = EINVAL; \
447 return -1; \
448 } while (0);
449
450 static inline bool
451 is_long_mode(struct nvmm_x64_state *state)
452 {
453 return (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) != 0;
454 }
455
456 static inline bool
457 is_64bit(struct nvmm_x64_state *state)
458 {
459 return (state->segs[NVMM_X64_SEG_CS].attrib.l != 0);
460 }
461
462 static inline bool
463 is_32bit(struct nvmm_x64_state *state)
464 {
465 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
466 (state->segs[NVMM_X64_SEG_CS].attrib.def == 1);
467 }
468
469 static inline bool
470 is_16bit(struct nvmm_x64_state *state)
471 {
472 return (state->segs[NVMM_X64_SEG_CS].attrib.l == 0) &&
473 (state->segs[NVMM_X64_SEG_CS].attrib.def == 0);
474 }
475
476 static int
477 segment_check(struct nvmm_x64_state_seg *seg, gvaddr_t gva, size_t size)
478 {
479 uint64_t limit;
480
481 /*
482 * This is incomplete. We should check topdown, etc, really that's
483 * tiring.
484 */
485 if (__predict_false(!seg->attrib.p)) {
486 goto error;
487 }
488
489 limit = (uint64_t)seg->limit + 1;
490 if (__predict_true(seg->attrib.g)) {
491 limit *= PAGE_SIZE;
492 }
493
494 if (__predict_false(gva + size > limit)) {
495 goto error;
496 }
497
498 return 0;
499
500 error:
501 errno = EFAULT;
502 return -1;
503 }
504
505 static inline void
506 segment_apply(struct nvmm_x64_state_seg *seg, gvaddr_t *gva)
507 {
508 *gva += seg->base;
509 }
510
511 static inline uint64_t
512 size_to_mask(size_t size)
513 {
514 switch (size) {
515 case 1:
516 return 0x00000000000000FF;
517 case 2:
518 return 0x000000000000FFFF;
519 case 4:
520 return 0x00000000FFFFFFFF;
521 case 8:
522 default:
523 return 0xFFFFFFFFFFFFFFFF;
524 }
525 }
526
527 static uint64_t
528 rep_get_cnt(struct nvmm_x64_state *state, size_t adsize)
529 {
530 uint64_t mask, cnt;
531
532 mask = size_to_mask(adsize);
533 cnt = state->gprs[NVMM_X64_GPR_RCX] & mask;
534
535 return cnt;
536 }
537
538 static void
539 rep_set_cnt(struct nvmm_x64_state *state, size_t adsize, uint64_t cnt)
540 {
541 uint64_t mask;
542
543 /* XXX: should we zero-extend? */
544 mask = size_to_mask(adsize);
545 state->gprs[NVMM_X64_GPR_RCX] &= ~mask;
546 state->gprs[NVMM_X64_GPR_RCX] |= cnt;
547 }
548
549 static int
550 read_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
551 gvaddr_t gva, uint8_t *data, size_t size)
552 {
553 struct nvmm_mem mem;
554 nvmm_prot_t prot;
555 gpaddr_t gpa;
556 uintptr_t hva;
557 bool is_mmio;
558 int ret, remain;
559
560 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
561 if (__predict_false(ret == -1)) {
562 return -1;
563 }
564 if (__predict_false(!(prot & NVMM_PROT_READ))) {
565 errno = EFAULT;
566 return -1;
567 }
568
569 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
570 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
571 } else {
572 remain = 0;
573 }
574 size -= remain;
575
576 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
577 is_mmio = (ret == -1);
578
579 if (is_mmio) {
580 mem.data = data;
581 mem.gpa = gpa;
582 mem.write = false;
583 mem.size = size;
584 (*mach->cbs.mem)(&mem);
585 } else {
586 if (__predict_false(!(prot & NVMM_PROT_READ))) {
587 errno = EFAULT;
588 return -1;
589 }
590 memcpy(data, (uint8_t *)hva, size);
591 }
592
593 if (remain > 0) {
594 ret = read_guest_memory(mach, state, gva + size,
595 data + size, remain);
596 } else {
597 ret = 0;
598 }
599
600 return ret;
601 }
602
603 static int
604 write_guest_memory(struct nvmm_machine *mach, struct nvmm_x64_state *state,
605 gvaddr_t gva, uint8_t *data, size_t size)
606 {
607 struct nvmm_mem mem;
608 nvmm_prot_t prot;
609 gpaddr_t gpa;
610 uintptr_t hva;
611 bool is_mmio;
612 int ret, remain;
613
614 ret = x86_gva_to_gpa(mach, state, gva, &gpa, &prot);
615 if (__predict_false(ret == -1)) {
616 return -1;
617 }
618 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
619 errno = EFAULT;
620 return -1;
621 }
622
623 if ((gva & PAGE_MASK) + size > PAGE_SIZE) {
624 remain = ((gva & PAGE_MASK) + size - PAGE_SIZE);
625 } else {
626 remain = 0;
627 }
628 size -= remain;
629
630 ret = nvmm_gpa_to_hva(mach, gpa, &hva, &prot);
631 is_mmio = (ret == -1);
632
633 if (is_mmio) {
634 mem.data = data;
635 mem.gpa = gpa;
636 mem.write = true;
637 mem.size = size;
638 (*mach->cbs.mem)(&mem);
639 } else {
640 if (__predict_false(!(prot & NVMM_PROT_WRITE))) {
641 errno = EFAULT;
642 return -1;
643 }
644 memcpy((uint8_t *)hva, data, size);
645 }
646
647 if (remain > 0) {
648 ret = write_guest_memory(mach, state, gva + size,
649 data + size, remain);
650 } else {
651 ret = 0;
652 }
653
654 return ret;
655 }
656
657 /* -------------------------------------------------------------------------- */
658
659 static int fetch_segment(struct nvmm_machine *, struct nvmm_x64_state *);
660
661 #define NVMM_IO_BATCH_SIZE 32
662
663 static int
664 assist_io_batch(struct nvmm_machine *mach, struct nvmm_x64_state *state,
665 struct nvmm_io *io, gvaddr_t gva, uint64_t cnt)
666 {
667 uint8_t iobuf[NVMM_IO_BATCH_SIZE];
668 size_t i, iosize, iocnt;
669 int ret;
670
671 cnt = MIN(cnt, NVMM_IO_BATCH_SIZE);
672 iosize = MIN(io->size * cnt, NVMM_IO_BATCH_SIZE);
673 iocnt = iosize / io->size;
674
675 io->data = iobuf;
676
677 if (!io->in) {
678 ret = read_guest_memory(mach, state, gva, iobuf, iosize);
679 if (ret == -1)
680 return -1;
681 }
682
683 for (i = 0; i < iocnt; i++) {
684 (*mach->cbs.io)(io);
685 io->data += io->size;
686 }
687
688 if (io->in) {
689 ret = write_guest_memory(mach, state, gva, iobuf, iosize);
690 if (ret == -1)
691 return -1;
692 }
693
694 return iocnt;
695 }
696
697 int
698 nvmm_assist_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
699 {
700 struct nvmm_x64_state *state = vcpu->state;
701 struct nvmm_exit *exit = vcpu->exit;
702 struct nvmm_io io;
703 uint64_t cnt = 0; /* GCC */
704 uint8_t iobuf[8];
705 int iocnt = 1;
706 gvaddr_t gva = 0; /* GCC */
707 int reg = 0; /* GCC */
708 int ret, seg;
709 bool psld = false;
710
711 if (__predict_false(exit->reason != NVMM_EXIT_IO)) {
712 errno = EINVAL;
713 return -1;
714 }
715
716 io.port = exit->u.io.port;
717 io.in = (exit->u.io.type == NVMM_EXIT_IO_IN);
718 io.size = exit->u.io.operand_size;
719 io.data = iobuf;
720
721 ret = nvmm_vcpu_getstate(mach, vcpu,
722 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
723 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
724 if (ret == -1)
725 return -1;
726
727 if (exit->u.io.rep) {
728 cnt = rep_get_cnt(state, exit->u.io.address_size);
729 if (__predict_false(cnt == 0)) {
730 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
731 goto out;
732 }
733 }
734
735 if (__predict_false(state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_D)) {
736 psld = true;
737 }
738
739 /*
740 * Determine GVA.
741 */
742 if (exit->u.io.str) {
743 if (io.in) {
744 reg = NVMM_X64_GPR_RDI;
745 } else {
746 reg = NVMM_X64_GPR_RSI;
747 }
748
749 gva = state->gprs[reg];
750 gva &= size_to_mask(exit->u.io.address_size);
751
752 if (exit->u.io.seg != -1) {
753 seg = exit->u.io.seg;
754 } else {
755 if (io.in) {
756 seg = NVMM_X64_SEG_ES;
757 } else {
758 seg = fetch_segment(mach, state);
759 if (seg == -1)
760 return -1;
761 }
762 }
763
764 if (__predict_true(is_long_mode(state))) {
765 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
766 segment_apply(&state->segs[seg], &gva);
767 }
768 } else {
769 ret = segment_check(&state->segs[seg], gva, io.size);
770 if (ret == -1)
771 return -1;
772 segment_apply(&state->segs[seg], &gva);
773 }
774
775 if (exit->u.io.rep && !psld) {
776 iocnt = assist_io_batch(mach, state, &io, gva, cnt);
777 if (iocnt == -1)
778 return -1;
779 goto done;
780 }
781 }
782
783 if (!io.in) {
784 if (!exit->u.io.str) {
785 memcpy(io.data, &state->gprs[NVMM_X64_GPR_RAX], io.size);
786 } else {
787 ret = read_guest_memory(mach, state, gva, io.data,
788 io.size);
789 if (ret == -1)
790 return -1;
791 }
792 }
793
794 (*mach->cbs.io)(&io);
795
796 if (io.in) {
797 if (!exit->u.io.str) {
798 memcpy(&state->gprs[NVMM_X64_GPR_RAX], io.data, io.size);
799 if (io.size == 4) {
800 /* Zero-extend to 64 bits. */
801 state->gprs[NVMM_X64_GPR_RAX] &= size_to_mask(4);
802 }
803 } else {
804 ret = write_guest_memory(mach, state, gva, io.data,
805 io.size);
806 if (ret == -1)
807 return -1;
808 }
809 }
810
811 done:
812 if (exit->u.io.str) {
813 if (__predict_false(psld)) {
814 state->gprs[reg] -= iocnt * io.size;
815 } else {
816 state->gprs[reg] += iocnt * io.size;
817 }
818 }
819
820 if (exit->u.io.rep) {
821 cnt -= iocnt;
822 rep_set_cnt(state, exit->u.io.address_size, cnt);
823 if (cnt == 0) {
824 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
825 }
826 } else {
827 state->gprs[NVMM_X64_GPR_RIP] = exit->u.io.npc;
828 }
829
830 out:
831 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
832 if (ret == -1)
833 return -1;
834
835 return 0;
836 }
837
838 /* -------------------------------------------------------------------------- */
839
840 struct x86_emul {
841 bool readreg;
842 bool backprop;
843 bool notouch;
844 void (*func)(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
845 };
846
847 static void x86_func_or(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
848 static void x86_func_and(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
849 static void x86_func_xchg(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
850 static void x86_func_sub(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
851 static void x86_func_xor(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
852 static void x86_func_cmp(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
853 static void x86_func_test(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
854 static void x86_func_mov(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
855 static void x86_func_stos(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
856 static void x86_func_lods(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
857 static void x86_func_movs(struct nvmm_machine *, struct nvmm_mem *, uint64_t *);
858
859 static const struct x86_emul x86_emul_or = {
860 .readreg = true,
861 .func = x86_func_or
862 };
863
864 static const struct x86_emul x86_emul_and = {
865 .readreg = true,
866 .func = x86_func_and
867 };
868
869 static const struct x86_emul x86_emul_xchg = {
870 .readreg = true,
871 .backprop = true,
872 .func = x86_func_xchg
873 };
874
875 static const struct x86_emul x86_emul_sub = {
876 .readreg = true,
877 .func = x86_func_sub
878 };
879
880 static const struct x86_emul x86_emul_xor = {
881 .readreg = true,
882 .func = x86_func_xor
883 };
884
885 static const struct x86_emul x86_emul_cmp = {
886 .notouch = true,
887 .func = x86_func_cmp
888 };
889
890 static const struct x86_emul x86_emul_test = {
891 .notouch = true,
892 .func = x86_func_test
893 };
894
895 static const struct x86_emul x86_emul_mov = {
896 .func = x86_func_mov
897 };
898
899 static const struct x86_emul x86_emul_stos = {
900 .func = x86_func_stos
901 };
902
903 static const struct x86_emul x86_emul_lods = {
904 .func = x86_func_lods
905 };
906
907 static const struct x86_emul x86_emul_movs = {
908 .func = x86_func_movs
909 };
910
911 /* Legacy prefixes. */
912 #define LEG_LOCK 0xF0
913 #define LEG_REPN 0xF2
914 #define LEG_REP 0xF3
915 #define LEG_OVR_CS 0x2E
916 #define LEG_OVR_SS 0x36
917 #define LEG_OVR_DS 0x3E
918 #define LEG_OVR_ES 0x26
919 #define LEG_OVR_FS 0x64
920 #define LEG_OVR_GS 0x65
921 #define LEG_OPR_OVR 0x66
922 #define LEG_ADR_OVR 0x67
923
924 struct x86_legpref {
925 bool opr_ovr:1;
926 bool adr_ovr:1;
927 bool rep:1;
928 bool repn:1;
929 int8_t seg;
930 };
931
932 struct x86_rexpref {
933 bool b:1;
934 bool x:1;
935 bool r:1;
936 bool w:1;
937 bool present:1;
938 };
939
940 struct x86_reg {
941 int num; /* NVMM GPR state index */
942 uint64_t mask;
943 };
944
945 struct x86_dualreg {
946 int reg1;
947 int reg2;
948 };
949
950 enum x86_disp_type {
951 DISP_NONE,
952 DISP_0,
953 DISP_1,
954 DISP_2,
955 DISP_4
956 };
957
958 struct x86_disp {
959 enum x86_disp_type type;
960 uint64_t data; /* 4 bytes, but can be sign-extended */
961 };
962
963 struct x86_regmodrm {
964 uint8_t mod:2;
965 uint8_t reg:3;
966 uint8_t rm:3;
967 };
968
969 struct x86_immediate {
970 uint64_t data;
971 };
972
973 struct x86_sib {
974 uint8_t scale;
975 const struct x86_reg *idx;
976 const struct x86_reg *bas;
977 };
978
979 enum x86_store_type {
980 STORE_NONE,
981 STORE_REG,
982 STORE_DUALREG,
983 STORE_IMM,
984 STORE_SIB,
985 STORE_DMO
986 };
987
988 struct x86_store {
989 enum x86_store_type type;
990 union {
991 const struct x86_reg *reg;
992 struct x86_dualreg dualreg;
993 struct x86_immediate imm;
994 struct x86_sib sib;
995 uint64_t dmo;
996 } u;
997 struct x86_disp disp;
998 int hardseg;
999 };
1000
1001 struct x86_instr {
1002 uint8_t len;
1003 struct x86_legpref legpref;
1004 struct x86_rexpref rexpref;
1005 struct x86_regmodrm regmodrm;
1006 uint8_t operand_size;
1007 uint8_t address_size;
1008 uint64_t zeroextend_mask;
1009
1010 const struct x86_opcode *opcode;
1011 const struct x86_emul *emul;
1012
1013 struct x86_store src;
1014 struct x86_store dst;
1015 struct x86_store *strm;
1016 };
1017
1018 struct x86_decode_fsm {
1019 /* vcpu */
1020 bool is64bit;
1021 bool is32bit;
1022 bool is16bit;
1023
1024 /* fsm */
1025 int (*fn)(struct x86_decode_fsm *, struct x86_instr *);
1026 uint8_t *buf;
1027 uint8_t *end;
1028 };
1029
1030 struct x86_opcode {
1031 bool valid:1;
1032 bool regmodrm:1;
1033 bool regtorm:1;
1034 bool dmo:1;
1035 bool todmo:1;
1036 bool movs:1;
1037 bool stos:1;
1038 bool lods:1;
1039 bool szoverride:1;
1040 bool group1:1;
1041 bool group3:1;
1042 bool group11:1;
1043 bool immediate:1;
1044 uint8_t defsize;
1045 uint8_t flags;
1046 const struct x86_emul *emul;
1047 };
1048
1049 struct x86_group_entry {
1050 const struct x86_emul *emul;
1051 };
1052
1053 #define OPSIZE_BYTE 0x01
1054 #define OPSIZE_WORD 0x02 /* 2 bytes */
1055 #define OPSIZE_DOUB 0x04 /* 4 bytes */
1056 #define OPSIZE_QUAD 0x08 /* 8 bytes */
1057
1058 #define FLAG_imm8 0x01
1059 #define FLAG_immz 0x02
1060 #define FLAG_ze 0x04
1061
1062 static const struct x86_group_entry group1[8] __cacheline_aligned = {
1063 [1] = { .emul = &x86_emul_or },
1064 [4] = { .emul = &x86_emul_and },
1065 [6] = { .emul = &x86_emul_xor },
1066 [7] = { .emul = &x86_emul_cmp }
1067 };
1068
1069 static const struct x86_group_entry group3[8] __cacheline_aligned = {
1070 [0] = { .emul = &x86_emul_test },
1071 [1] = { .emul = &x86_emul_test }
1072 };
1073
1074 static const struct x86_group_entry group11[8] __cacheline_aligned = {
1075 [0] = { .emul = &x86_emul_mov }
1076 };
1077
1078 static const struct x86_opcode primary_opcode_table[256] __cacheline_aligned = {
1079 /*
1080 * Group1
1081 */
1082 [0x80] = {
1083 /* Eb, Ib */
1084 .valid = true,
1085 .regmodrm = true,
1086 .regtorm = true,
1087 .szoverride = false,
1088 .defsize = OPSIZE_BYTE,
1089 .group1 = true,
1090 .immediate = true,
1091 .emul = NULL /* group1 */
1092 },
1093 [0x81] = {
1094 /* Ev, Iz */
1095 .valid = true,
1096 .regmodrm = true,
1097 .regtorm = true,
1098 .szoverride = true,
1099 .defsize = -1,
1100 .group1 = true,
1101 .immediate = true,
1102 .flags = FLAG_immz,
1103 .emul = NULL /* group1 */
1104 },
1105 [0x83] = {
1106 /* Ev, Ib */
1107 .valid = true,
1108 .regmodrm = true,
1109 .regtorm = true,
1110 .szoverride = true,
1111 .defsize = -1,
1112 .group1 = true,
1113 .immediate = true,
1114 .flags = FLAG_imm8,
1115 .emul = NULL /* group1 */
1116 },
1117
1118 /*
1119 * Group3
1120 */
1121 [0xF6] = {
1122 /* Eb, Ib */
1123 .valid = true,
1124 .regmodrm = true,
1125 .regtorm = true,
1126 .szoverride = false,
1127 .defsize = OPSIZE_BYTE,
1128 .group3 = true,
1129 .immediate = true,
1130 .emul = NULL /* group3 */
1131 },
1132 [0xF7] = {
1133 /* Ev, Iz */
1134 .valid = true,
1135 .regmodrm = true,
1136 .regtorm = true,
1137 .szoverride = true,
1138 .defsize = -1,
1139 .group3 = true,
1140 .immediate = true,
1141 .flags = FLAG_immz,
1142 .emul = NULL /* group3 */
1143 },
1144
1145 /*
1146 * Group11
1147 */
1148 [0xC6] = {
1149 /* Eb, Ib */
1150 .valid = true,
1151 .regmodrm = true,
1152 .regtorm = true,
1153 .szoverride = false,
1154 .defsize = OPSIZE_BYTE,
1155 .group11 = true,
1156 .immediate = true,
1157 .emul = NULL /* group11 */
1158 },
1159 [0xC7] = {
1160 /* Ev, Iz */
1161 .valid = true,
1162 .regmodrm = true,
1163 .regtorm = true,
1164 .szoverride = true,
1165 .defsize = -1,
1166 .group11 = true,
1167 .immediate = true,
1168 .flags = FLAG_immz,
1169 .emul = NULL /* group11 */
1170 },
1171
1172 /*
1173 * OR
1174 */
1175 [0x08] = {
1176 /* Eb, Gb */
1177 .valid = true,
1178 .regmodrm = true,
1179 .regtorm = true,
1180 .szoverride = false,
1181 .defsize = OPSIZE_BYTE,
1182 .emul = &x86_emul_or
1183 },
1184 [0x09] = {
1185 /* Ev, Gv */
1186 .valid = true,
1187 .regmodrm = true,
1188 .regtorm = true,
1189 .szoverride = true,
1190 .defsize = -1,
1191 .emul = &x86_emul_or
1192 },
1193 [0x0A] = {
1194 /* Gb, Eb */
1195 .valid = true,
1196 .regmodrm = true,
1197 .regtorm = false,
1198 .szoverride = false,
1199 .defsize = OPSIZE_BYTE,
1200 .emul = &x86_emul_or
1201 },
1202 [0x0B] = {
1203 /* Gv, Ev */
1204 .valid = true,
1205 .regmodrm = true,
1206 .regtorm = false,
1207 .szoverride = true,
1208 .defsize = -1,
1209 .emul = &x86_emul_or
1210 },
1211
1212 /*
1213 * AND
1214 */
1215 [0x20] = {
1216 /* Eb, Gb */
1217 .valid = true,
1218 .regmodrm = true,
1219 .regtorm = true,
1220 .szoverride = false,
1221 .defsize = OPSIZE_BYTE,
1222 .emul = &x86_emul_and
1223 },
1224 [0x21] = {
1225 /* Ev, Gv */
1226 .valid = true,
1227 .regmodrm = true,
1228 .regtorm = true,
1229 .szoverride = true,
1230 .defsize = -1,
1231 .emul = &x86_emul_and
1232 },
1233 [0x22] = {
1234 /* Gb, Eb */
1235 .valid = true,
1236 .regmodrm = true,
1237 .regtorm = false,
1238 .szoverride = false,
1239 .defsize = OPSIZE_BYTE,
1240 .emul = &x86_emul_and
1241 },
1242 [0x23] = {
1243 /* Gv, Ev */
1244 .valid = true,
1245 .regmodrm = true,
1246 .regtorm = false,
1247 .szoverride = true,
1248 .defsize = -1,
1249 .emul = &x86_emul_and
1250 },
1251
1252 /*
1253 * SUB
1254 */
1255 [0x28] = {
1256 /* Eb, Gb */
1257 .valid = true,
1258 .regmodrm = true,
1259 .regtorm = true,
1260 .szoverride = false,
1261 .defsize = OPSIZE_BYTE,
1262 .emul = &x86_emul_sub
1263 },
1264 [0x29] = {
1265 /* Ev, Gv */
1266 .valid = true,
1267 .regmodrm = true,
1268 .regtorm = true,
1269 .szoverride = true,
1270 .defsize = -1,
1271 .emul = &x86_emul_sub
1272 },
1273 [0x2A] = {
1274 /* Gb, Eb */
1275 .valid = true,
1276 .regmodrm = true,
1277 .regtorm = false,
1278 .szoverride = false,
1279 .defsize = OPSIZE_BYTE,
1280 .emul = &x86_emul_sub
1281 },
1282 [0x2B] = {
1283 /* Gv, Ev */
1284 .valid = true,
1285 .regmodrm = true,
1286 .regtorm = false,
1287 .szoverride = true,
1288 .defsize = -1,
1289 .emul = &x86_emul_sub
1290 },
1291
1292 /*
1293 * XOR
1294 */
1295 [0x30] = {
1296 /* Eb, Gb */
1297 .valid = true,
1298 .regmodrm = true,
1299 .regtorm = true,
1300 .szoverride = false,
1301 .defsize = OPSIZE_BYTE,
1302 .emul = &x86_emul_xor
1303 },
1304 [0x31] = {
1305 /* Ev, Gv */
1306 .valid = true,
1307 .regmodrm = true,
1308 .regtorm = true,
1309 .szoverride = true,
1310 .defsize = -1,
1311 .emul = &x86_emul_xor
1312 },
1313 [0x32] = {
1314 /* Gb, Eb */
1315 .valid = true,
1316 .regmodrm = true,
1317 .regtorm = false,
1318 .szoverride = false,
1319 .defsize = OPSIZE_BYTE,
1320 .emul = &x86_emul_xor
1321 },
1322 [0x33] = {
1323 /* Gv, Ev */
1324 .valid = true,
1325 .regmodrm = true,
1326 .regtorm = false,
1327 .szoverride = true,
1328 .defsize = -1,
1329 .emul = &x86_emul_xor
1330 },
1331
1332 /*
1333 * XCHG
1334 */
1335 [0x86] = {
1336 /* Eb, Gb */
1337 .valid = true,
1338 .regmodrm = true,
1339 .regtorm = true,
1340 .szoverride = false,
1341 .defsize = OPSIZE_BYTE,
1342 .emul = &x86_emul_xchg
1343 },
1344 [0x87] = {
1345 /* Ev, Gv */
1346 .valid = true,
1347 .regmodrm = true,
1348 .regtorm = true,
1349 .szoverride = true,
1350 .defsize = -1,
1351 .emul = &x86_emul_xchg
1352 },
1353
1354 /*
1355 * MOV
1356 */
1357 [0x88] = {
1358 /* Eb, Gb */
1359 .valid = true,
1360 .regmodrm = true,
1361 .regtorm = true,
1362 .szoverride = false,
1363 .defsize = OPSIZE_BYTE,
1364 .emul = &x86_emul_mov
1365 },
1366 [0x89] = {
1367 /* Ev, Gv */
1368 .valid = true,
1369 .regmodrm = true,
1370 .regtorm = true,
1371 .szoverride = true,
1372 .defsize = -1,
1373 .emul = &x86_emul_mov
1374 },
1375 [0x8A] = {
1376 /* Gb, Eb */
1377 .valid = true,
1378 .regmodrm = true,
1379 .regtorm = false,
1380 .szoverride = false,
1381 .defsize = OPSIZE_BYTE,
1382 .emul = &x86_emul_mov
1383 },
1384 [0x8B] = {
1385 /* Gv, Ev */
1386 .valid = true,
1387 .regmodrm = true,
1388 .regtorm = false,
1389 .szoverride = true,
1390 .defsize = -1,
1391 .emul = &x86_emul_mov
1392 },
1393 [0xA0] = {
1394 /* AL, Ob */
1395 .valid = true,
1396 .dmo = true,
1397 .todmo = false,
1398 .szoverride = false,
1399 .defsize = OPSIZE_BYTE,
1400 .emul = &x86_emul_mov
1401 },
1402 [0xA1] = {
1403 /* rAX, Ov */
1404 .valid = true,
1405 .dmo = true,
1406 .todmo = false,
1407 .szoverride = true,
1408 .defsize = -1,
1409 .emul = &x86_emul_mov
1410 },
1411 [0xA2] = {
1412 /* Ob, AL */
1413 .valid = true,
1414 .dmo = true,
1415 .todmo = true,
1416 .szoverride = false,
1417 .defsize = OPSIZE_BYTE,
1418 .emul = &x86_emul_mov
1419 },
1420 [0xA3] = {
1421 /* Ov, rAX */
1422 .valid = true,
1423 .dmo = true,
1424 .todmo = true,
1425 .szoverride = true,
1426 .defsize = -1,
1427 .emul = &x86_emul_mov
1428 },
1429
1430 /*
1431 * MOVS
1432 */
1433 [0xA4] = {
1434 /* Yb, Xb */
1435 .valid = true,
1436 .movs = true,
1437 .szoverride = false,
1438 .defsize = OPSIZE_BYTE,
1439 .emul = &x86_emul_movs
1440 },
1441 [0xA5] = {
1442 /* Yv, Xv */
1443 .valid = true,
1444 .movs = true,
1445 .szoverride = true,
1446 .defsize = -1,
1447 .emul = &x86_emul_movs
1448 },
1449
1450 /*
1451 * STOS
1452 */
1453 [0xAA] = {
1454 /* Yb, AL */
1455 .valid = true,
1456 .stos = true,
1457 .szoverride = false,
1458 .defsize = OPSIZE_BYTE,
1459 .emul = &x86_emul_stos
1460 },
1461 [0xAB] = {
1462 /* Yv, rAX */
1463 .valid = true,
1464 .stos = true,
1465 .szoverride = true,
1466 .defsize = -1,
1467 .emul = &x86_emul_stos
1468 },
1469
1470 /*
1471 * LODS
1472 */
1473 [0xAC] = {
1474 /* AL, Xb */
1475 .valid = true,
1476 .lods = true,
1477 .szoverride = false,
1478 .defsize = OPSIZE_BYTE,
1479 .emul = &x86_emul_lods
1480 },
1481 [0xAD] = {
1482 /* rAX, Xv */
1483 .valid = true,
1484 .lods = true,
1485 .szoverride = true,
1486 .defsize = -1,
1487 .emul = &x86_emul_lods
1488 },
1489 };
1490
1491 static const struct x86_opcode secondary_opcode_table[256] __cacheline_aligned = {
1492 /*
1493 * MOVZX
1494 */
1495 [0xB6] = {
1496 /* Gv, Eb */
1497 .valid = true,
1498 .regmodrm = true,
1499 .regtorm = false,
1500 .szoverride = true,
1501 .defsize = OPSIZE_BYTE,
1502 .flags = FLAG_ze,
1503 .emul = &x86_emul_mov
1504 },
1505 [0xB7] = {
1506 /* Gv, Ew */
1507 .valid = true,
1508 .regmodrm = true,
1509 .regtorm = false,
1510 .szoverride = true,
1511 .defsize = OPSIZE_WORD,
1512 .flags = FLAG_ze,
1513 .emul = &x86_emul_mov
1514 },
1515 };
1516
1517 static const struct x86_reg gpr_map__rip = { NVMM_X64_GPR_RIP, 0xFFFFFFFFFFFFFFFF };
1518
1519 /* [REX-present][enc][opsize] */
1520 static const struct x86_reg gpr_map__special[2][4][8] __cacheline_aligned = {
1521 [false] = {
1522 /* No REX prefix. */
1523 [0b00] = {
1524 [0] = { NVMM_X64_GPR_RAX, 0x000000000000FF00 }, /* AH */
1525 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1526 [2] = { -1, 0 },
1527 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1528 [4] = { -1, 0 },
1529 [5] = { -1, 0 },
1530 [6] = { -1, 0 },
1531 [7] = { -1, 0 },
1532 },
1533 [0b01] = {
1534 [0] = { NVMM_X64_GPR_RCX, 0x000000000000FF00 }, /* CH */
1535 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1536 [2] = { -1, 0 },
1537 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1538 [4] = { -1, 0 },
1539 [5] = { -1, 0 },
1540 [6] = { -1, 0 },
1541 [7] = { -1, 0 },
1542 },
1543 [0b10] = {
1544 [0] = { NVMM_X64_GPR_RDX, 0x000000000000FF00 }, /* DH */
1545 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1546 [2] = { -1, 0 },
1547 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1548 [4] = { -1, 0 },
1549 [5] = { -1, 0 },
1550 [6] = { -1, 0 },
1551 [7] = { -1, 0 },
1552 },
1553 [0b11] = {
1554 [0] = { NVMM_X64_GPR_RBX, 0x000000000000FF00 }, /* BH */
1555 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1556 [2] = { -1, 0 },
1557 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1558 [4] = { -1, 0 },
1559 [5] = { -1, 0 },
1560 [6] = { -1, 0 },
1561 [7] = { -1, 0 },
1562 }
1563 },
1564 [true] = {
1565 /* Has REX prefix. */
1566 [0b00] = {
1567 [0] = { NVMM_X64_GPR_RSP, 0x00000000000000FF }, /* SPL */
1568 [1] = { NVMM_X64_GPR_RSP, 0x000000000000FFFF }, /* SP */
1569 [2] = { -1, 0 },
1570 [3] = { NVMM_X64_GPR_RSP, 0x00000000FFFFFFFF }, /* ESP */
1571 [4] = { -1, 0 },
1572 [5] = { -1, 0 },
1573 [6] = { -1, 0 },
1574 [7] = { NVMM_X64_GPR_RSP, 0xFFFFFFFFFFFFFFFF }, /* RSP */
1575 },
1576 [0b01] = {
1577 [0] = { NVMM_X64_GPR_RBP, 0x00000000000000FF }, /* BPL */
1578 [1] = { NVMM_X64_GPR_RBP, 0x000000000000FFFF }, /* BP */
1579 [2] = { -1, 0 },
1580 [3] = { NVMM_X64_GPR_RBP, 0x00000000FFFFFFFF }, /* EBP */
1581 [4] = { -1, 0 },
1582 [5] = { -1, 0 },
1583 [6] = { -1, 0 },
1584 [7] = { NVMM_X64_GPR_RBP, 0xFFFFFFFFFFFFFFFF }, /* RBP */
1585 },
1586 [0b10] = {
1587 [0] = { NVMM_X64_GPR_RSI, 0x00000000000000FF }, /* SIL */
1588 [1] = { NVMM_X64_GPR_RSI, 0x000000000000FFFF }, /* SI */
1589 [2] = { -1, 0 },
1590 [3] = { NVMM_X64_GPR_RSI, 0x00000000FFFFFFFF }, /* ESI */
1591 [4] = { -1, 0 },
1592 [5] = { -1, 0 },
1593 [6] = { -1, 0 },
1594 [7] = { NVMM_X64_GPR_RSI, 0xFFFFFFFFFFFFFFFF }, /* RSI */
1595 },
1596 [0b11] = {
1597 [0] = { NVMM_X64_GPR_RDI, 0x00000000000000FF }, /* DIL */
1598 [1] = { NVMM_X64_GPR_RDI, 0x000000000000FFFF }, /* DI */
1599 [2] = { -1, 0 },
1600 [3] = { NVMM_X64_GPR_RDI, 0x00000000FFFFFFFF }, /* EDI */
1601 [4] = { -1, 0 },
1602 [5] = { -1, 0 },
1603 [6] = { -1, 0 },
1604 [7] = { NVMM_X64_GPR_RDI, 0xFFFFFFFFFFFFFFFF }, /* RDI */
1605 }
1606 }
1607 };
1608
1609 /* [depends][enc][size] */
1610 static const struct x86_reg gpr_map[2][8][8] __cacheline_aligned = {
1611 [false] = {
1612 /* Not extended. */
1613 [0b000] = {
1614 [0] = { NVMM_X64_GPR_RAX, 0x00000000000000FF }, /* AL */
1615 [1] = { NVMM_X64_GPR_RAX, 0x000000000000FFFF }, /* AX */
1616 [2] = { -1, 0 },
1617 [3] = { NVMM_X64_GPR_RAX, 0x00000000FFFFFFFF }, /* EAX */
1618 [4] = { -1, 0 },
1619 [5] = { -1, 0 },
1620 [6] = { -1, 0 },
1621 [7] = { NVMM_X64_GPR_RAX, 0xFFFFFFFFFFFFFFFF }, /* RAX */
1622 },
1623 [0b001] = {
1624 [0] = { NVMM_X64_GPR_RCX, 0x00000000000000FF }, /* CL */
1625 [1] = { NVMM_X64_GPR_RCX, 0x000000000000FFFF }, /* CX */
1626 [2] = { -1, 0 },
1627 [3] = { NVMM_X64_GPR_RCX, 0x00000000FFFFFFFF }, /* ECX */
1628 [4] = { -1, 0 },
1629 [5] = { -1, 0 },
1630 [6] = { -1, 0 },
1631 [7] = { NVMM_X64_GPR_RCX, 0xFFFFFFFFFFFFFFFF }, /* RCX */
1632 },
1633 [0b010] = {
1634 [0] = { NVMM_X64_GPR_RDX, 0x00000000000000FF }, /* DL */
1635 [1] = { NVMM_X64_GPR_RDX, 0x000000000000FFFF }, /* DX */
1636 [2] = { -1, 0 },
1637 [3] = { NVMM_X64_GPR_RDX, 0x00000000FFFFFFFF }, /* EDX */
1638 [4] = { -1, 0 },
1639 [5] = { -1, 0 },
1640 [6] = { -1, 0 },
1641 [7] = { NVMM_X64_GPR_RDX, 0xFFFFFFFFFFFFFFFF }, /* RDX */
1642 },
1643 [0b011] = {
1644 [0] = { NVMM_X64_GPR_RBX, 0x00000000000000FF }, /* BL */
1645 [1] = { NVMM_X64_GPR_RBX, 0x000000000000FFFF }, /* BX */
1646 [2] = { -1, 0 },
1647 [3] = { NVMM_X64_GPR_RBX, 0x00000000FFFFFFFF }, /* EBX */
1648 [4] = { -1, 0 },
1649 [5] = { -1, 0 },
1650 [6] = { -1, 0 },
1651 [7] = { NVMM_X64_GPR_RBX, 0xFFFFFFFFFFFFFFFF }, /* RBX */
1652 },
1653 [0b100] = {
1654 [0] = { -1, 0 }, /* SPECIAL */
1655 [1] = { -1, 0 }, /* SPECIAL */
1656 [2] = { -1, 0 },
1657 [3] = { -1, 0 }, /* SPECIAL */
1658 [4] = { -1, 0 },
1659 [5] = { -1, 0 },
1660 [6] = { -1, 0 },
1661 [7] = { -1, 0 }, /* SPECIAL */
1662 },
1663 [0b101] = {
1664 [0] = { -1, 0 }, /* SPECIAL */
1665 [1] = { -1, 0 }, /* SPECIAL */
1666 [2] = { -1, 0 },
1667 [3] = { -1, 0 }, /* SPECIAL */
1668 [4] = { -1, 0 },
1669 [5] = { -1, 0 },
1670 [6] = { -1, 0 },
1671 [7] = { -1, 0 }, /* SPECIAL */
1672 },
1673 [0b110] = {
1674 [0] = { -1, 0 }, /* SPECIAL */
1675 [1] = { -1, 0 }, /* SPECIAL */
1676 [2] = { -1, 0 },
1677 [3] = { -1, 0 }, /* SPECIAL */
1678 [4] = { -1, 0 },
1679 [5] = { -1, 0 },
1680 [6] = { -1, 0 },
1681 [7] = { -1, 0 }, /* SPECIAL */
1682 },
1683 [0b111] = {
1684 [0] = { -1, 0 }, /* SPECIAL */
1685 [1] = { -1, 0 }, /* SPECIAL */
1686 [2] = { -1, 0 },
1687 [3] = { -1, 0 }, /* SPECIAL */
1688 [4] = { -1, 0 },
1689 [5] = { -1, 0 },
1690 [6] = { -1, 0 },
1691 [7] = { -1, 0 }, /* SPECIAL */
1692 },
1693 },
1694 [true] = {
1695 /* Extended. */
1696 [0b000] = {
1697 [0] = { NVMM_X64_GPR_R8, 0x00000000000000FF }, /* R8B */
1698 [1] = { NVMM_X64_GPR_R8, 0x000000000000FFFF }, /* R8W */
1699 [2] = { -1, 0 },
1700 [3] = { NVMM_X64_GPR_R8, 0x00000000FFFFFFFF }, /* R8D */
1701 [4] = { -1, 0 },
1702 [5] = { -1, 0 },
1703 [6] = { -1, 0 },
1704 [7] = { NVMM_X64_GPR_R8, 0xFFFFFFFFFFFFFFFF }, /* R8 */
1705 },
1706 [0b001] = {
1707 [0] = { NVMM_X64_GPR_R9, 0x00000000000000FF }, /* R9B */
1708 [1] = { NVMM_X64_GPR_R9, 0x000000000000FFFF }, /* R9W */
1709 [2] = { -1, 0 },
1710 [3] = { NVMM_X64_GPR_R9, 0x00000000FFFFFFFF }, /* R9D */
1711 [4] = { -1, 0 },
1712 [5] = { -1, 0 },
1713 [6] = { -1, 0 },
1714 [7] = { NVMM_X64_GPR_R9, 0xFFFFFFFFFFFFFFFF }, /* R9 */
1715 },
1716 [0b010] = {
1717 [0] = { NVMM_X64_GPR_R10, 0x00000000000000FF }, /* R10B */
1718 [1] = { NVMM_X64_GPR_R10, 0x000000000000FFFF }, /* R10W */
1719 [2] = { -1, 0 },
1720 [3] = { NVMM_X64_GPR_R10, 0x00000000FFFFFFFF }, /* R10D */
1721 [4] = { -1, 0 },
1722 [5] = { -1, 0 },
1723 [6] = { -1, 0 },
1724 [7] = { NVMM_X64_GPR_R10, 0xFFFFFFFFFFFFFFFF }, /* R10 */
1725 },
1726 [0b011] = {
1727 [0] = { NVMM_X64_GPR_R11, 0x00000000000000FF }, /* R11B */
1728 [1] = { NVMM_X64_GPR_R11, 0x000000000000FFFF }, /* R11W */
1729 [2] = { -1, 0 },
1730 [3] = { NVMM_X64_GPR_R11, 0x00000000FFFFFFFF }, /* R11D */
1731 [4] = { -1, 0 },
1732 [5] = { -1, 0 },
1733 [6] = { -1, 0 },
1734 [7] = { NVMM_X64_GPR_R11, 0xFFFFFFFFFFFFFFFF }, /* R11 */
1735 },
1736 [0b100] = {
1737 [0] = { NVMM_X64_GPR_R12, 0x00000000000000FF }, /* R12B */
1738 [1] = { NVMM_X64_GPR_R12, 0x000000000000FFFF }, /* R12W */
1739 [2] = { -1, 0 },
1740 [3] = { NVMM_X64_GPR_R12, 0x00000000FFFFFFFF }, /* R12D */
1741 [4] = { -1, 0 },
1742 [5] = { -1, 0 },
1743 [6] = { -1, 0 },
1744 [7] = { NVMM_X64_GPR_R12, 0xFFFFFFFFFFFFFFFF }, /* R12 */
1745 },
1746 [0b101] = {
1747 [0] = { NVMM_X64_GPR_R13, 0x00000000000000FF }, /* R13B */
1748 [1] = { NVMM_X64_GPR_R13, 0x000000000000FFFF }, /* R13W */
1749 [2] = { -1, 0 },
1750 [3] = { NVMM_X64_GPR_R13, 0x00000000FFFFFFFF }, /* R13D */
1751 [4] = { -1, 0 },
1752 [5] = { -1, 0 },
1753 [6] = { -1, 0 },
1754 [7] = { NVMM_X64_GPR_R13, 0xFFFFFFFFFFFFFFFF }, /* R13 */
1755 },
1756 [0b110] = {
1757 [0] = { NVMM_X64_GPR_R14, 0x00000000000000FF }, /* R14B */
1758 [1] = { NVMM_X64_GPR_R14, 0x000000000000FFFF }, /* R14W */
1759 [2] = { -1, 0 },
1760 [3] = { NVMM_X64_GPR_R14, 0x00000000FFFFFFFF }, /* R14D */
1761 [4] = { -1, 0 },
1762 [5] = { -1, 0 },
1763 [6] = { -1, 0 },
1764 [7] = { NVMM_X64_GPR_R14, 0xFFFFFFFFFFFFFFFF }, /* R14 */
1765 },
1766 [0b111] = {
1767 [0] = { NVMM_X64_GPR_R15, 0x00000000000000FF }, /* R15B */
1768 [1] = { NVMM_X64_GPR_R15, 0x000000000000FFFF }, /* R15W */
1769 [2] = { -1, 0 },
1770 [3] = { NVMM_X64_GPR_R15, 0x00000000FFFFFFFF }, /* R15D */
1771 [4] = { -1, 0 },
1772 [5] = { -1, 0 },
1773 [6] = { -1, 0 },
1774 [7] = { NVMM_X64_GPR_R15, 0xFFFFFFFFFFFFFFFF }, /* R15 */
1775 },
1776 }
1777 };
1778
1779 /* [enc] */
1780 static const int gpr_dual_reg1_rm[8] __cacheline_aligned = {
1781 [0b000] = NVMM_X64_GPR_RBX, /* BX (+SI) */
1782 [0b001] = NVMM_X64_GPR_RBX, /* BX (+DI) */
1783 [0b010] = NVMM_X64_GPR_RBP, /* BP (+SI) */
1784 [0b011] = NVMM_X64_GPR_RBP, /* BP (+DI) */
1785 [0b100] = NVMM_X64_GPR_RSI, /* SI */
1786 [0b101] = NVMM_X64_GPR_RDI, /* DI */
1787 [0b110] = NVMM_X64_GPR_RBP, /* BP */
1788 [0b111] = NVMM_X64_GPR_RBX, /* BX */
1789 };
1790
1791 static int
1792 node_overflow(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1793 {
1794 fsm->fn = NULL;
1795 return -1;
1796 }
1797
1798 static int
1799 fsm_read(struct x86_decode_fsm *fsm, uint8_t *bytes, size_t n)
1800 {
1801 if (fsm->buf + n > fsm->end) {
1802 return -1;
1803 }
1804 memcpy(bytes, fsm->buf, n);
1805 return 0;
1806 }
1807
1808 static inline void
1809 fsm_advance(struct x86_decode_fsm *fsm, size_t n,
1810 int (*fn)(struct x86_decode_fsm *, struct x86_instr *))
1811 {
1812 fsm->buf += n;
1813 if (fsm->buf > fsm->end) {
1814 fsm->fn = node_overflow;
1815 } else {
1816 fsm->fn = fn;
1817 }
1818 }
1819
1820 static const struct x86_reg *
1821 resolve_special_register(struct x86_instr *instr, uint8_t enc, size_t regsize)
1822 {
1823 enc &= 0b11;
1824 if (regsize == 8) {
1825 /* May be 64bit without REX */
1826 return &gpr_map__special[1][enc][regsize-1];
1827 }
1828 return &gpr_map__special[instr->rexpref.present][enc][regsize-1];
1829 }
1830
1831 /*
1832 * Special node, for MOVS. Fake two displacements of zero on the source and
1833 * destination registers.
1834 */
1835 static int
1836 node_movs(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1837 {
1838 size_t adrsize;
1839
1840 adrsize = instr->address_size;
1841
1842 /* DS:RSI */
1843 instr->src.type = STORE_REG;
1844 instr->src.u.reg = &gpr_map__special[1][2][adrsize-1];
1845 instr->src.disp.type = DISP_0;
1846
1847 /* ES:RDI, force ES */
1848 instr->dst.type = STORE_REG;
1849 instr->dst.u.reg = &gpr_map__special[1][3][adrsize-1];
1850 instr->dst.disp.type = DISP_0;
1851 instr->dst.hardseg = NVMM_X64_SEG_ES;
1852
1853 fsm_advance(fsm, 0, NULL);
1854
1855 return 0;
1856 }
1857
1858 /*
1859 * Special node, for STOS and LODS. Fake a displacement of zero on the
1860 * destination register.
1861 */
1862 static int
1863 node_stlo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1864 {
1865 const struct x86_opcode *opcode = instr->opcode;
1866 struct x86_store *stlo, *streg;
1867 size_t adrsize, regsize;
1868
1869 adrsize = instr->address_size;
1870 regsize = instr->operand_size;
1871
1872 if (opcode->stos) {
1873 streg = &instr->src;
1874 stlo = &instr->dst;
1875 } else {
1876 streg = &instr->dst;
1877 stlo = &instr->src;
1878 }
1879
1880 streg->type = STORE_REG;
1881 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1882
1883 stlo->type = STORE_REG;
1884 if (opcode->stos) {
1885 /* ES:RDI, force ES */
1886 stlo->u.reg = &gpr_map__special[1][3][adrsize-1];
1887 stlo->hardseg = NVMM_X64_SEG_ES;
1888 } else {
1889 /* DS:RSI */
1890 stlo->u.reg = &gpr_map__special[1][2][adrsize-1];
1891 }
1892 stlo->disp.type = DISP_0;
1893
1894 fsm_advance(fsm, 0, NULL);
1895
1896 return 0;
1897 }
1898
1899 static int
1900 node_dmo(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1901 {
1902 const struct x86_opcode *opcode = instr->opcode;
1903 struct x86_store *stdmo, *streg;
1904 size_t adrsize, regsize;
1905
1906 adrsize = instr->address_size;
1907 regsize = instr->operand_size;
1908
1909 if (opcode->todmo) {
1910 streg = &instr->src;
1911 stdmo = &instr->dst;
1912 } else {
1913 streg = &instr->dst;
1914 stdmo = &instr->src;
1915 }
1916
1917 streg->type = STORE_REG;
1918 streg->u.reg = &gpr_map[0][0][regsize-1]; /* ?AX */
1919
1920 stdmo->type = STORE_DMO;
1921 if (fsm_read(fsm, (uint8_t *)&stdmo->u.dmo, adrsize) == -1) {
1922 return -1;
1923 }
1924 fsm_advance(fsm, adrsize, NULL);
1925
1926 return 0;
1927 }
1928
1929 static inline uint64_t
1930 sign_extend(uint64_t val, int size)
1931 {
1932 if (size == 1) {
1933 if (val & __BIT(7))
1934 val |= 0xFFFFFFFFFFFFFF00;
1935 } else if (size == 2) {
1936 if (val & __BIT(15))
1937 val |= 0xFFFFFFFFFFFF0000;
1938 } else if (size == 4) {
1939 if (val & __BIT(31))
1940 val |= 0xFFFFFFFF00000000;
1941 }
1942 return val;
1943 }
1944
1945 static int
1946 node_immediate(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1947 {
1948 const struct x86_opcode *opcode = instr->opcode;
1949 struct x86_store *store;
1950 uint8_t immsize;
1951 size_t sesize = 0;
1952
1953 /* The immediate is the source */
1954 store = &instr->src;
1955 immsize = instr->operand_size;
1956
1957 if (opcode->flags & FLAG_imm8) {
1958 sesize = immsize;
1959 immsize = 1;
1960 } else if ((opcode->flags & FLAG_immz) && (immsize == 8)) {
1961 sesize = immsize;
1962 immsize = 4;
1963 }
1964
1965 store->type = STORE_IMM;
1966 if (fsm_read(fsm, (uint8_t *)&store->u.imm.data, immsize) == -1) {
1967 return -1;
1968 }
1969 fsm_advance(fsm, immsize, NULL);
1970
1971 if (sesize != 0) {
1972 store->u.imm.data = sign_extend(store->u.imm.data, sesize);
1973 }
1974
1975 return 0;
1976 }
1977
1978 static int
1979 node_disp(struct x86_decode_fsm *fsm, struct x86_instr *instr)
1980 {
1981 const struct x86_opcode *opcode = instr->opcode;
1982 uint64_t data = 0;
1983 size_t n;
1984
1985 if (instr->strm->disp.type == DISP_1) {
1986 n = 1;
1987 } else if (instr->strm->disp.type == DISP_2) {
1988 n = 2;
1989 } else if (instr->strm->disp.type == DISP_4) {
1990 n = 4;
1991 } else {
1992 DISASSEMBLER_BUG();
1993 }
1994
1995 if (fsm_read(fsm, (uint8_t *)&data, n) == -1) {
1996 return -1;
1997 }
1998
1999 if (__predict_true(fsm->is64bit)) {
2000 data = sign_extend(data, n);
2001 }
2002
2003 instr->strm->disp.data = data;
2004
2005 if (opcode->immediate) {
2006 fsm_advance(fsm, n, node_immediate);
2007 } else {
2008 fsm_advance(fsm, n, NULL);
2009 }
2010
2011 return 0;
2012 }
2013
2014 /*
2015 * Special node to handle 16bit addressing encoding, which can reference two
2016 * registers at once.
2017 */
2018 static int
2019 node_dual(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2020 {
2021 int reg1, reg2;
2022
2023 reg1 = gpr_dual_reg1_rm[instr->regmodrm.rm];
2024
2025 if (instr->regmodrm.rm == 0b000 ||
2026 instr->regmodrm.rm == 0b010) {
2027 reg2 = NVMM_X64_GPR_RSI;
2028 } else if (instr->regmodrm.rm == 0b001 ||
2029 instr->regmodrm.rm == 0b011) {
2030 reg2 = NVMM_X64_GPR_RDI;
2031 } else {
2032 DISASSEMBLER_BUG();
2033 }
2034
2035 instr->strm->type = STORE_DUALREG;
2036 instr->strm->u.dualreg.reg1 = reg1;
2037 instr->strm->u.dualreg.reg2 = reg2;
2038
2039 if (instr->strm->disp.type == DISP_NONE) {
2040 DISASSEMBLER_BUG();
2041 } else if (instr->strm->disp.type == DISP_0) {
2042 /* Indirect register addressing mode */
2043 if (instr->opcode->immediate) {
2044 fsm_advance(fsm, 1, node_immediate);
2045 } else {
2046 fsm_advance(fsm, 1, NULL);
2047 }
2048 } else {
2049 fsm_advance(fsm, 1, node_disp);
2050 }
2051
2052 return 0;
2053 }
2054
2055 static const struct x86_reg *
2056 get_register_idx(struct x86_instr *instr, uint8_t index)
2057 {
2058 uint8_t enc = index;
2059 const struct x86_reg *reg;
2060 size_t regsize;
2061
2062 regsize = instr->address_size;
2063 reg = &gpr_map[instr->rexpref.x][enc][regsize-1];
2064
2065 if (reg->num == -1) {
2066 reg = resolve_special_register(instr, enc, regsize);
2067 }
2068
2069 return reg;
2070 }
2071
2072 static const struct x86_reg *
2073 get_register_bas(struct x86_instr *instr, uint8_t base)
2074 {
2075 uint8_t enc = base;
2076 const struct x86_reg *reg;
2077 size_t regsize;
2078
2079 regsize = instr->address_size;
2080 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2081 if (reg->num == -1) {
2082 reg = resolve_special_register(instr, enc, regsize);
2083 }
2084
2085 return reg;
2086 }
2087
2088 static int
2089 node_sib(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2090 {
2091 const struct x86_opcode *opcode;
2092 uint8_t scale, index, base;
2093 bool noindex, nobase;
2094 uint8_t byte;
2095
2096 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2097 return -1;
2098 }
2099
2100 scale = ((byte & 0b11000000) >> 6);
2101 index = ((byte & 0b00111000) >> 3);
2102 base = ((byte & 0b00000111) >> 0);
2103
2104 opcode = instr->opcode;
2105
2106 noindex = false;
2107 nobase = false;
2108
2109 if (index == 0b100 && !instr->rexpref.x) {
2110 /* Special case: the index is null */
2111 noindex = true;
2112 }
2113
2114 if (instr->regmodrm.mod == 0b00 && base == 0b101) {
2115 /* Special case: the base is null + disp32 */
2116 instr->strm->disp.type = DISP_4;
2117 nobase = true;
2118 }
2119
2120 instr->strm->type = STORE_SIB;
2121 instr->strm->u.sib.scale = (1 << scale);
2122 if (!noindex)
2123 instr->strm->u.sib.idx = get_register_idx(instr, index);
2124 if (!nobase)
2125 instr->strm->u.sib.bas = get_register_bas(instr, base);
2126
2127 /* May have a displacement, or an immediate */
2128 if (instr->strm->disp.type == DISP_1 ||
2129 instr->strm->disp.type == DISP_2 ||
2130 instr->strm->disp.type == DISP_4) {
2131 fsm_advance(fsm, 1, node_disp);
2132 } else if (opcode->immediate) {
2133 fsm_advance(fsm, 1, node_immediate);
2134 } else {
2135 fsm_advance(fsm, 1, NULL);
2136 }
2137
2138 return 0;
2139 }
2140
2141 static const struct x86_reg *
2142 get_register_reg(struct x86_instr *instr, const struct x86_opcode *opcode)
2143 {
2144 uint8_t enc = instr->regmodrm.reg;
2145 const struct x86_reg *reg;
2146 size_t regsize;
2147
2148 regsize = instr->operand_size;
2149
2150 reg = &gpr_map[instr->rexpref.r][enc][regsize-1];
2151 if (reg->num == -1) {
2152 reg = resolve_special_register(instr, enc, regsize);
2153 }
2154
2155 return reg;
2156 }
2157
2158 static const struct x86_reg *
2159 get_register_rm(struct x86_instr *instr, const struct x86_opcode *opcode)
2160 {
2161 uint8_t enc = instr->regmodrm.rm;
2162 const struct x86_reg *reg;
2163 size_t regsize;
2164
2165 if (instr->strm->disp.type == DISP_NONE) {
2166 regsize = instr->operand_size;
2167 } else {
2168 /* Indirect access, the size is that of the address. */
2169 regsize = instr->address_size;
2170 }
2171
2172 reg = &gpr_map[instr->rexpref.b][enc][regsize-1];
2173 if (reg->num == -1) {
2174 reg = resolve_special_register(instr, enc, regsize);
2175 }
2176
2177 return reg;
2178 }
2179
2180 static inline bool
2181 has_sib(struct x86_instr *instr)
2182 {
2183 return (instr->address_size != 2 && /* no SIB in 16bit addressing */
2184 instr->regmodrm.mod != 0b11 &&
2185 instr->regmodrm.rm == 0b100);
2186 }
2187
2188 static inline bool
2189 is_rip_relative(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2190 {
2191 return (fsm->is64bit && /* RIP-relative only in 64bit mode */
2192 instr->regmodrm.mod == 0b00 &&
2193 instr->regmodrm.rm == 0b101);
2194 }
2195
2196 static inline bool
2197 is_disp32_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2198 {
2199 return (!fsm->is64bit && /* no disp32-only in 64bit mode */
2200 instr->address_size != 2 && /* no disp32-only in 16bit addressing */
2201 instr->regmodrm.mod == 0b00 &&
2202 instr->regmodrm.rm == 0b101);
2203 }
2204
2205 static inline bool
2206 is_disp16_only(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2207 {
2208 return (instr->address_size == 2 && /* disp16-only only in 16bit addr */
2209 instr->regmodrm.mod == 0b00 &&
2210 instr->regmodrm.rm == 0b110);
2211 }
2212
2213 static inline bool
2214 is_dual(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2215 {
2216 return (instr->address_size == 2 &&
2217 instr->regmodrm.mod != 0b11 &&
2218 instr->regmodrm.rm <= 0b011);
2219 }
2220
2221 static enum x86_disp_type
2222 get_disp_type(struct x86_instr *instr)
2223 {
2224 switch (instr->regmodrm.mod) {
2225 case 0b00: /* indirect */
2226 return DISP_0;
2227 case 0b01: /* indirect+1 */
2228 return DISP_1;
2229 case 0b10: /* indirect+{2,4} */
2230 if (__predict_false(instr->address_size == 2)) {
2231 return DISP_2;
2232 }
2233 return DISP_4;
2234 case 0b11: /* direct */
2235 return DISP_NONE;
2236 }
2237 }
2238
2239 static int
2240 node_regmodrm(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2241 {
2242 struct x86_store *strg, *strm;
2243 const struct x86_opcode *opcode;
2244 const struct x86_reg *reg;
2245 uint8_t byte;
2246
2247 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2248 return -1;
2249 }
2250
2251 opcode = instr->opcode;
2252
2253 instr->regmodrm.rm = ((byte & 0b00000111) >> 0);
2254 instr->regmodrm.reg = ((byte & 0b00111000) >> 3);
2255 instr->regmodrm.mod = ((byte & 0b11000000) >> 6);
2256
2257 if (opcode->regtorm) {
2258 strg = &instr->src;
2259 strm = &instr->dst;
2260 } else { /* RM to REG */
2261 strm = &instr->src;
2262 strg = &instr->dst;
2263 }
2264
2265 /* Save for later use. */
2266 instr->strm = strm;
2267
2268 /*
2269 * Special cases: Groups. The REG field of REGMODRM is the index in
2270 * the group. op1 gets overwritten in the Immediate node, if any.
2271 */
2272 if (opcode->group1) {
2273 if (group1[instr->regmodrm.reg].emul == NULL) {
2274 return -1;
2275 }
2276 instr->emul = group1[instr->regmodrm.reg].emul;
2277 } else if (opcode->group3) {
2278 if (group3[instr->regmodrm.reg].emul == NULL) {
2279 return -1;
2280 }
2281 instr->emul = group3[instr->regmodrm.reg].emul;
2282 } else if (opcode->group11) {
2283 if (group11[instr->regmodrm.reg].emul == NULL) {
2284 return -1;
2285 }
2286 instr->emul = group11[instr->regmodrm.reg].emul;
2287 }
2288
2289 if (!opcode->immediate) {
2290 reg = get_register_reg(instr, opcode);
2291 if (reg == NULL) {
2292 return -1;
2293 }
2294 strg->type = STORE_REG;
2295 strg->u.reg = reg;
2296 }
2297
2298 /* The displacement applies to RM. */
2299 strm->disp.type = get_disp_type(instr);
2300
2301 if (has_sib(instr)) {
2302 /* Overwrites RM */
2303 fsm_advance(fsm, 1, node_sib);
2304 return 0;
2305 }
2306
2307 if (is_rip_relative(fsm, instr)) {
2308 /* Overwrites RM */
2309 strm->type = STORE_REG;
2310 strm->u.reg = &gpr_map__rip;
2311 strm->disp.type = DISP_4;
2312 fsm_advance(fsm, 1, node_disp);
2313 return 0;
2314 }
2315
2316 if (is_disp32_only(fsm, instr)) {
2317 /* Overwrites RM */
2318 strm->type = STORE_REG;
2319 strm->u.reg = NULL;
2320 strm->disp.type = DISP_4;
2321 fsm_advance(fsm, 1, node_disp);
2322 return 0;
2323 }
2324
2325 if (__predict_false(is_disp16_only(fsm, instr))) {
2326 /* Overwrites RM */
2327 strm->type = STORE_REG;
2328 strm->u.reg = NULL;
2329 strm->disp.type = DISP_2;
2330 fsm_advance(fsm, 1, node_disp);
2331 return 0;
2332 }
2333
2334 if (__predict_false(is_dual(fsm, instr))) {
2335 /* Overwrites RM */
2336 fsm_advance(fsm, 0, node_dual);
2337 return 0;
2338 }
2339
2340 reg = get_register_rm(instr, opcode);
2341 if (reg == NULL) {
2342 return -1;
2343 }
2344 strm->type = STORE_REG;
2345 strm->u.reg = reg;
2346
2347 if (strm->disp.type == DISP_NONE) {
2348 /* Direct register addressing mode */
2349 if (opcode->immediate) {
2350 fsm_advance(fsm, 1, node_immediate);
2351 } else {
2352 fsm_advance(fsm, 1, NULL);
2353 }
2354 } else if (strm->disp.type == DISP_0) {
2355 /* Indirect register addressing mode */
2356 if (opcode->immediate) {
2357 fsm_advance(fsm, 1, node_immediate);
2358 } else {
2359 fsm_advance(fsm, 1, NULL);
2360 }
2361 } else {
2362 fsm_advance(fsm, 1, node_disp);
2363 }
2364
2365 return 0;
2366 }
2367
2368 static size_t
2369 get_operand_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2370 {
2371 const struct x86_opcode *opcode = instr->opcode;
2372 int opsize;
2373
2374 /* Get the opsize */
2375 if (!opcode->szoverride) {
2376 opsize = opcode->defsize;
2377 } else if (instr->rexpref.present && instr->rexpref.w) {
2378 opsize = 8;
2379 } else {
2380 if (!fsm->is16bit) {
2381 if (instr->legpref.opr_ovr) {
2382 opsize = 2;
2383 } else {
2384 opsize = 4;
2385 }
2386 } else { /* 16bit */
2387 if (instr->legpref.opr_ovr) {
2388 opsize = 4;
2389 } else {
2390 opsize = 2;
2391 }
2392 }
2393 }
2394
2395 return opsize;
2396 }
2397
2398 static size_t
2399 get_address_size(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2400 {
2401 if (fsm->is64bit) {
2402 if (__predict_false(instr->legpref.adr_ovr)) {
2403 return 4;
2404 }
2405 return 8;
2406 }
2407
2408 if (fsm->is32bit) {
2409 if (__predict_false(instr->legpref.adr_ovr)) {
2410 return 2;
2411 }
2412 return 4;
2413 }
2414
2415 /* 16bit. */
2416 if (__predict_false(instr->legpref.adr_ovr)) {
2417 return 4;
2418 }
2419 return 2;
2420 }
2421
2422 static int
2423 node_primary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2424 {
2425 const struct x86_opcode *opcode;
2426 uint8_t byte;
2427
2428 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2429 return -1;
2430 }
2431
2432 opcode = &primary_opcode_table[byte];
2433 if (__predict_false(!opcode->valid)) {
2434 return -1;
2435 }
2436
2437 instr->opcode = opcode;
2438 instr->emul = opcode->emul;
2439 instr->operand_size = get_operand_size(fsm, instr);
2440 instr->address_size = get_address_size(fsm, instr);
2441
2442 if (fsm->is64bit && (instr->operand_size == 4)) {
2443 /* Zero-extend to 64 bits. */
2444 instr->zeroextend_mask = ~size_to_mask(4);
2445 }
2446
2447 if (opcode->regmodrm) {
2448 fsm_advance(fsm, 1, node_regmodrm);
2449 } else if (opcode->dmo) {
2450 /* Direct-Memory Offsets */
2451 fsm_advance(fsm, 1, node_dmo);
2452 } else if (opcode->stos || opcode->lods) {
2453 fsm_advance(fsm, 1, node_stlo);
2454 } else if (opcode->movs) {
2455 fsm_advance(fsm, 1, node_movs);
2456 } else {
2457 return -1;
2458 }
2459
2460 return 0;
2461 }
2462
2463 static int
2464 node_secondary_opcode(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2465 {
2466 const struct x86_opcode *opcode;
2467 uint8_t byte;
2468
2469 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2470 return -1;
2471 }
2472
2473 opcode = &secondary_opcode_table[byte];
2474 if (__predict_false(!opcode->valid)) {
2475 return -1;
2476 }
2477
2478 instr->opcode = opcode;
2479 instr->emul = opcode->emul;
2480 instr->operand_size = get_operand_size(fsm, instr);
2481 instr->address_size = get_address_size(fsm, instr);
2482
2483 if (fsm->is64bit && (instr->operand_size == 4)) {
2484 /* Zero-extend to 64 bits. */
2485 instr->zeroextend_mask = ~size_to_mask(4);
2486 }
2487
2488 if (opcode->flags & FLAG_ze) {
2489 /*
2490 * Compute the mask for zero-extend. Update the operand size,
2491 * we move fewer bytes.
2492 */
2493 instr->zeroextend_mask |= size_to_mask(instr->operand_size);
2494 instr->zeroextend_mask &= ~size_to_mask(opcode->defsize);
2495 instr->operand_size = opcode->defsize;
2496 }
2497
2498 if (opcode->regmodrm) {
2499 fsm_advance(fsm, 1, node_regmodrm);
2500 } else {
2501 return -1;
2502 }
2503
2504 return 0;
2505 }
2506
2507 static int
2508 node_main(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2509 {
2510 uint8_t byte;
2511
2512 #define ESCAPE 0x0F
2513 #define VEX_1 0xC5
2514 #define VEX_2 0xC4
2515 #define XOP 0x8F
2516
2517 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2518 return -1;
2519 }
2520
2521 /*
2522 * We don't take XOP. It is AMD-specific, and it was removed shortly
2523 * after being introduced.
2524 */
2525 if (byte == ESCAPE) {
2526 fsm_advance(fsm, 1, node_secondary_opcode);
2527 } else if (!instr->rexpref.present) {
2528 if (byte == VEX_1) {
2529 return -1;
2530 } else if (byte == VEX_2) {
2531 return -1;
2532 } else {
2533 fsm->fn = node_primary_opcode;
2534 }
2535 } else {
2536 fsm->fn = node_primary_opcode;
2537 }
2538
2539 return 0;
2540 }
2541
2542 static int
2543 node_rex_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2544 {
2545 struct x86_rexpref *rexpref = &instr->rexpref;
2546 uint8_t byte;
2547 size_t n = 0;
2548
2549 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2550 return -1;
2551 }
2552
2553 if (byte >= 0x40 && byte <= 0x4F) {
2554 if (__predict_false(!fsm->is64bit)) {
2555 return -1;
2556 }
2557 rexpref->b = ((byte & 0x1) != 0);
2558 rexpref->x = ((byte & 0x2) != 0);
2559 rexpref->r = ((byte & 0x4) != 0);
2560 rexpref->w = ((byte & 0x8) != 0);
2561 rexpref->present = true;
2562 n = 1;
2563 }
2564
2565 fsm_advance(fsm, n, node_main);
2566 return 0;
2567 }
2568
2569 static int
2570 node_legacy_prefix(struct x86_decode_fsm *fsm, struct x86_instr *instr)
2571 {
2572 uint8_t byte;
2573
2574 if (fsm_read(fsm, &byte, sizeof(byte)) == -1) {
2575 return -1;
2576 }
2577
2578 if (byte == LEG_OPR_OVR) {
2579 instr->legpref.opr_ovr = 1;
2580 } else if (byte == LEG_OVR_DS) {
2581 instr->legpref.seg = NVMM_X64_SEG_DS;
2582 } else if (byte == LEG_OVR_ES) {
2583 instr->legpref.seg = NVMM_X64_SEG_ES;
2584 } else if (byte == LEG_REP) {
2585 instr->legpref.rep = 1;
2586 } else if (byte == LEG_OVR_GS) {
2587 instr->legpref.seg = NVMM_X64_SEG_GS;
2588 } else if (byte == LEG_OVR_FS) {
2589 instr->legpref.seg = NVMM_X64_SEG_FS;
2590 } else if (byte == LEG_ADR_OVR) {
2591 instr->legpref.adr_ovr = 1;
2592 } else if (byte == LEG_OVR_CS) {
2593 instr->legpref.seg = NVMM_X64_SEG_CS;
2594 } else if (byte == LEG_OVR_SS) {
2595 instr->legpref.seg = NVMM_X64_SEG_SS;
2596 } else if (byte == LEG_REPN) {
2597 instr->legpref.repn = 1;
2598 } else if (byte == LEG_LOCK) {
2599 /* ignore */
2600 } else {
2601 /* not a legacy prefix */
2602 fsm_advance(fsm, 0, node_rex_prefix);
2603 return 0;
2604 }
2605
2606 fsm_advance(fsm, 1, node_legacy_prefix);
2607 return 0;
2608 }
2609
2610 static int
2611 x86_decode(uint8_t *inst_bytes, size_t inst_len, struct x86_instr *instr,
2612 struct nvmm_x64_state *state)
2613 {
2614 struct x86_decode_fsm fsm;
2615 int ret;
2616
2617 memset(instr, 0, sizeof(*instr));
2618 instr->legpref.seg = -1;
2619 instr->src.hardseg = -1;
2620 instr->dst.hardseg = -1;
2621
2622 fsm.is64bit = is_64bit(state);
2623 fsm.is32bit = is_32bit(state);
2624 fsm.is16bit = is_16bit(state);
2625
2626 fsm.fn = node_legacy_prefix;
2627 fsm.buf = inst_bytes;
2628 fsm.end = inst_bytes + inst_len;
2629
2630 while (fsm.fn != NULL) {
2631 ret = (*fsm.fn)(&fsm, instr);
2632 if (ret == -1)
2633 return -1;
2634 }
2635
2636 instr->len = fsm.buf - inst_bytes;
2637
2638 return 0;
2639 }
2640
2641 /* -------------------------------------------------------------------------- */
2642
2643 #define EXEC_INSTR(sz, instr) \
2644 static uint##sz##_t \
2645 exec_##instr##sz(uint##sz##_t op1, uint##sz##_t op2, uint64_t *rflags) \
2646 { \
2647 uint##sz##_t res; \
2648 __asm __volatile ( \
2649 #instr" %2, %3;" \
2650 "mov %3, %1;" \
2651 "pushfq;" \
2652 "popq %0" \
2653 : "=r" (*rflags), "=r" (res) \
2654 : "r" (op1), "r" (op2)); \
2655 return res; \
2656 }
2657
2658 #define EXEC_DISPATCHER(instr) \
2659 static uint64_t \
2660 exec_##instr(uint64_t op1, uint64_t op2, uint64_t *rflags, size_t opsize) \
2661 { \
2662 switch (opsize) { \
2663 case 1: \
2664 return exec_##instr##8(op1, op2, rflags); \
2665 case 2: \
2666 return exec_##instr##16(op1, op2, rflags); \
2667 case 4: \
2668 return exec_##instr##32(op1, op2, rflags); \
2669 default: \
2670 return exec_##instr##64(op1, op2, rflags); \
2671 } \
2672 }
2673
2674 /* SUB: ret = op1 - op2 */
2675 #define PSL_SUB_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF|PSL_AF)
2676 EXEC_INSTR(8, sub)
2677 EXEC_INSTR(16, sub)
2678 EXEC_INSTR(32, sub)
2679 EXEC_INSTR(64, sub)
2680 EXEC_DISPATCHER(sub)
2681
2682 /* OR: ret = op1 | op2 */
2683 #define PSL_OR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2684 EXEC_INSTR(8, or)
2685 EXEC_INSTR(16, or)
2686 EXEC_INSTR(32, or)
2687 EXEC_INSTR(64, or)
2688 EXEC_DISPATCHER(or)
2689
2690 /* AND: ret = op1 & op2 */
2691 #define PSL_AND_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2692 EXEC_INSTR(8, and)
2693 EXEC_INSTR(16, and)
2694 EXEC_INSTR(32, and)
2695 EXEC_INSTR(64, and)
2696 EXEC_DISPATCHER(and)
2697
2698 /* XOR: ret = op1 ^ op2 */
2699 #define PSL_XOR_MASK (PSL_V|PSL_C|PSL_Z|PSL_N|PSL_PF)
2700 EXEC_INSTR(8, xor)
2701 EXEC_INSTR(16, xor)
2702 EXEC_INSTR(32, xor)
2703 EXEC_INSTR(64, xor)
2704 EXEC_DISPATCHER(xor)
2705
2706 /* -------------------------------------------------------------------------- */
2707
2708 /*
2709 * Emulation functions. We don't care about the order of the operands, except
2710 * for SUB, CMP and TEST. For these ones we look at mem->write to determine who
2711 * is op1 and who is op2.
2712 */
2713
2714 static void
2715 x86_func_or(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2716 {
2717 uint64_t *retval = (uint64_t *)mem->data;
2718 const bool write = mem->write;
2719 uint64_t *op1, op2, fl, ret;
2720
2721 op1 = (uint64_t *)mem->data;
2722 op2 = 0;
2723
2724 /* Fetch the value to be OR'ed (op2). */
2725 mem->data = (uint8_t *)&op2;
2726 mem->write = false;
2727 (*mach->cbs.mem)(mem);
2728
2729 /* Perform the OR. */
2730 ret = exec_or(*op1, op2, &fl, mem->size);
2731
2732 if (write) {
2733 /* Write back the result. */
2734 mem->data = (uint8_t *)&ret;
2735 mem->write = true;
2736 (*mach->cbs.mem)(mem);
2737 } else {
2738 /* Return data to the caller. */
2739 *retval = ret;
2740 }
2741
2742 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_OR_MASK;
2743 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_OR_MASK);
2744 }
2745
2746 static void
2747 x86_func_and(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2748 {
2749 uint64_t *retval = (uint64_t *)mem->data;
2750 const bool write = mem->write;
2751 uint64_t *op1, op2, fl, ret;
2752
2753 op1 = (uint64_t *)mem->data;
2754 op2 = 0;
2755
2756 /* Fetch the value to be AND'ed (op2). */
2757 mem->data = (uint8_t *)&op2;
2758 mem->write = false;
2759 (*mach->cbs.mem)(mem);
2760
2761 /* Perform the AND. */
2762 ret = exec_and(*op1, op2, &fl, mem->size);
2763
2764 if (write) {
2765 /* Write back the result. */
2766 mem->data = (uint8_t *)&ret;
2767 mem->write = true;
2768 (*mach->cbs.mem)(mem);
2769 } else {
2770 /* Return data to the caller. */
2771 *retval = ret;
2772 }
2773
2774 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2775 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2776 }
2777
2778 static void
2779 x86_func_xchg(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2780 {
2781 uint64_t *op1, op2;
2782
2783 op1 = (uint64_t *)mem->data;
2784 op2 = 0;
2785
2786 /* Fetch op2. */
2787 mem->data = (uint8_t *)&op2;
2788 mem->write = false;
2789 (*mach->cbs.mem)(mem);
2790
2791 /* Write op1 in op2. */
2792 mem->data = (uint8_t *)op1;
2793 mem->write = true;
2794 (*mach->cbs.mem)(mem);
2795
2796 /* Write op2 in op1. */
2797 *op1 = op2;
2798 }
2799
2800 static void
2801 x86_func_sub(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2802 {
2803 uint64_t *retval = (uint64_t *)mem->data;
2804 const bool write = mem->write;
2805 uint64_t *op1, *op2, fl, ret;
2806 uint64_t tmp;
2807 bool memop1;
2808
2809 memop1 = !mem->write;
2810 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2811 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2812
2813 /* Fetch the value to be SUB'ed (op1 or op2). */
2814 mem->data = (uint8_t *)&tmp;
2815 mem->write = false;
2816 (*mach->cbs.mem)(mem);
2817
2818 /* Perform the SUB. */
2819 ret = exec_sub(*op1, *op2, &fl, mem->size);
2820
2821 if (write) {
2822 /* Write back the result. */
2823 mem->data = (uint8_t *)&ret;
2824 mem->write = true;
2825 (*mach->cbs.mem)(mem);
2826 } else {
2827 /* Return data to the caller. */
2828 *retval = ret;
2829 }
2830
2831 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2832 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2833 }
2834
2835 static void
2836 x86_func_xor(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2837 {
2838 uint64_t *retval = (uint64_t *)mem->data;
2839 const bool write = mem->write;
2840 uint64_t *op1, op2, fl, ret;
2841
2842 op1 = (uint64_t *)mem->data;
2843 op2 = 0;
2844
2845 /* Fetch the value to be XOR'ed (op2). */
2846 mem->data = (uint8_t *)&op2;
2847 mem->write = false;
2848 (*mach->cbs.mem)(mem);
2849
2850 /* Perform the XOR. */
2851 ret = exec_xor(*op1, op2, &fl, mem->size);
2852
2853 if (write) {
2854 /* Write back the result. */
2855 mem->data = (uint8_t *)&ret;
2856 mem->write = true;
2857 (*mach->cbs.mem)(mem);
2858 } else {
2859 /* Return data to the caller. */
2860 *retval = ret;
2861 }
2862
2863 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_XOR_MASK;
2864 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_XOR_MASK);
2865 }
2866
2867 static void
2868 x86_func_cmp(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2869 {
2870 uint64_t *op1, *op2, fl;
2871 uint64_t tmp;
2872 bool memop1;
2873
2874 memop1 = !mem->write;
2875 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2876 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2877
2878 /* Fetch the value to be CMP'ed (op1 or op2). */
2879 mem->data = (uint8_t *)&tmp;
2880 mem->write = false;
2881 (*mach->cbs.mem)(mem);
2882
2883 /* Perform the CMP. */
2884 exec_sub(*op1, *op2, &fl, mem->size);
2885
2886 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_SUB_MASK;
2887 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_SUB_MASK);
2888 }
2889
2890 static void
2891 x86_func_test(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2892 {
2893 uint64_t *op1, *op2, fl;
2894 uint64_t tmp;
2895 bool memop1;
2896
2897 memop1 = !mem->write;
2898 op1 = memop1 ? &tmp : (uint64_t *)mem->data;
2899 op2 = memop1 ? (uint64_t *)mem->data : &tmp;
2900
2901 /* Fetch the value to be TEST'ed (op1 or op2). */
2902 mem->data = (uint8_t *)&tmp;
2903 mem->write = false;
2904 (*mach->cbs.mem)(mem);
2905
2906 /* Perform the TEST. */
2907 exec_and(*op1, *op2, &fl, mem->size);
2908
2909 gprs[NVMM_X64_GPR_RFLAGS] &= ~PSL_AND_MASK;
2910 gprs[NVMM_X64_GPR_RFLAGS] |= (fl & PSL_AND_MASK);
2911 }
2912
2913 static void
2914 x86_func_mov(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2915 {
2916 /*
2917 * Nothing special, just move without emulation.
2918 */
2919 (*mach->cbs.mem)(mem);
2920 }
2921
2922 static void
2923 x86_func_stos(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2924 {
2925 /*
2926 * Just move, and update RDI.
2927 */
2928 (*mach->cbs.mem)(mem);
2929
2930 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2931 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2932 } else {
2933 gprs[NVMM_X64_GPR_RDI] += mem->size;
2934 }
2935 }
2936
2937 static void
2938 x86_func_lods(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2939 {
2940 /*
2941 * Just move, and update RSI.
2942 */
2943 (*mach->cbs.mem)(mem);
2944
2945 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2946 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2947 } else {
2948 gprs[NVMM_X64_GPR_RSI] += mem->size;
2949 }
2950 }
2951
2952 static void
2953 x86_func_movs(struct nvmm_machine *mach, struct nvmm_mem *mem, uint64_t *gprs)
2954 {
2955 /*
2956 * Special instruction: double memory operand. Don't call the cb,
2957 * because the storage has already been performed earlier.
2958 */
2959
2960 if (gprs[NVMM_X64_GPR_RFLAGS] & PSL_D) {
2961 gprs[NVMM_X64_GPR_RSI] -= mem->size;
2962 gprs[NVMM_X64_GPR_RDI] -= mem->size;
2963 } else {
2964 gprs[NVMM_X64_GPR_RSI] += mem->size;
2965 gprs[NVMM_X64_GPR_RDI] += mem->size;
2966 }
2967 }
2968
2969 /* -------------------------------------------------------------------------- */
2970
2971 static inline uint64_t
2972 gpr_read_address(struct x86_instr *instr, struct nvmm_x64_state *state, int gpr)
2973 {
2974 uint64_t val;
2975
2976 val = state->gprs[gpr];
2977 val &= size_to_mask(instr->address_size);
2978
2979 return val;
2980 }
2981
2982 static int
2983 store_to_gva(struct nvmm_x64_state *state, struct x86_instr *instr,
2984 struct x86_store *store, gvaddr_t *gvap, size_t size)
2985 {
2986 struct x86_sib *sib;
2987 gvaddr_t gva = 0;
2988 uint64_t reg;
2989 int ret, seg;
2990
2991 if (store->type == STORE_SIB) {
2992 sib = &store->u.sib;
2993 if (sib->bas != NULL)
2994 gva += gpr_read_address(instr, state, sib->bas->num);
2995 if (sib->idx != NULL) {
2996 reg = gpr_read_address(instr, state, sib->idx->num);
2997 gva += sib->scale * reg;
2998 }
2999 } else if (store->type == STORE_REG) {
3000 if (store->u.reg == NULL) {
3001 /* The base is null. Happens with disp32-only and
3002 * disp16-only. */
3003 } else {
3004 gva = gpr_read_address(instr, state, store->u.reg->num);
3005 }
3006 } else if (store->type == STORE_DUALREG) {
3007 gva = gpr_read_address(instr, state, store->u.dualreg.reg1) +
3008 gpr_read_address(instr, state, store->u.dualreg.reg2);
3009 } else {
3010 gva = store->u.dmo;
3011 }
3012
3013 if (store->disp.type != DISP_NONE) {
3014 gva += store->disp.data;
3015 }
3016
3017 if (store->hardseg != -1) {
3018 seg = store->hardseg;
3019 } else {
3020 if (__predict_false(instr->legpref.seg != -1)) {
3021 seg = instr->legpref.seg;
3022 } else {
3023 seg = NVMM_X64_SEG_DS;
3024 }
3025 }
3026
3027 if (__predict_true(is_long_mode(state))) {
3028 if (seg == NVMM_X64_SEG_GS || seg == NVMM_X64_SEG_FS) {
3029 segment_apply(&state->segs[seg], &gva);
3030 }
3031 } else {
3032 ret = segment_check(&state->segs[seg], gva, size);
3033 if (ret == -1)
3034 return -1;
3035 segment_apply(&state->segs[seg], &gva);
3036 }
3037
3038 *gvap = gva;
3039 return 0;
3040 }
3041
3042 static int
3043 fetch_segment(struct nvmm_machine *mach, struct nvmm_x64_state *state)
3044 {
3045 uint8_t inst_bytes[5], byte;
3046 size_t i, fetchsize;
3047 gvaddr_t gva;
3048 int ret, seg;
3049
3050 fetchsize = sizeof(inst_bytes);
3051
3052 gva = state->gprs[NVMM_X64_GPR_RIP];
3053 if (__predict_false(!is_long_mode(state))) {
3054 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3055 fetchsize);
3056 if (ret == -1)
3057 return -1;
3058 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3059 }
3060
3061 ret = read_guest_memory(mach, state, gva, inst_bytes, fetchsize);
3062 if (ret == -1)
3063 return -1;
3064
3065 seg = NVMM_X64_SEG_DS;
3066 for (i = 0; i < fetchsize; i++) {
3067 byte = inst_bytes[i];
3068
3069 if (byte == LEG_OVR_DS) {
3070 seg = NVMM_X64_SEG_DS;
3071 } else if (byte == LEG_OVR_ES) {
3072 seg = NVMM_X64_SEG_ES;
3073 } else if (byte == LEG_OVR_GS) {
3074 seg = NVMM_X64_SEG_GS;
3075 } else if (byte == LEG_OVR_FS) {
3076 seg = NVMM_X64_SEG_FS;
3077 } else if (byte == LEG_OVR_CS) {
3078 seg = NVMM_X64_SEG_CS;
3079 } else if (byte == LEG_OVR_SS) {
3080 seg = NVMM_X64_SEG_SS;
3081 } else if (byte == LEG_OPR_OVR) {
3082 /* nothing */
3083 } else if (byte == LEG_ADR_OVR) {
3084 /* nothing */
3085 } else if (byte == LEG_REP) {
3086 /* nothing */
3087 } else if (byte == LEG_REPN) {
3088 /* nothing */
3089 } else if (byte == LEG_LOCK) {
3090 /* nothing */
3091 } else {
3092 return seg;
3093 }
3094 }
3095
3096 return seg;
3097 }
3098
3099 static int
3100 fetch_instruction(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3101 struct nvmm_exit *exit)
3102 {
3103 size_t fetchsize;
3104 gvaddr_t gva;
3105 int ret;
3106
3107 fetchsize = sizeof(exit->u.mem.inst_bytes);
3108
3109 gva = state->gprs[NVMM_X64_GPR_RIP];
3110 if (__predict_false(!is_long_mode(state))) {
3111 ret = segment_check(&state->segs[NVMM_X64_SEG_CS], gva,
3112 fetchsize);
3113 if (ret == -1)
3114 return -1;
3115 segment_apply(&state->segs[NVMM_X64_SEG_CS], &gva);
3116 }
3117
3118 ret = read_guest_memory(mach, state, gva, exit->u.mem.inst_bytes,
3119 fetchsize);
3120 if (ret == -1)
3121 return -1;
3122
3123 exit->u.mem.inst_len = fetchsize;
3124
3125 return 0;
3126 }
3127
3128 static int
3129 assist_mem_double(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3130 struct x86_instr *instr)
3131 {
3132 struct nvmm_mem mem;
3133 uint8_t data[8];
3134 gvaddr_t gva;
3135 size_t size;
3136 int ret;
3137
3138 size = instr->operand_size;
3139
3140 /* Source. */
3141 ret = store_to_gva(state, instr, &instr->src, &gva, size);
3142 if (ret == -1)
3143 return -1;
3144 ret = read_guest_memory(mach, state, gva, data, size);
3145 if (ret == -1)
3146 return -1;
3147
3148 /* Destination. */
3149 ret = store_to_gva(state, instr, &instr->dst, &gva, size);
3150 if (ret == -1)
3151 return -1;
3152 ret = write_guest_memory(mach, state, gva, data, size);
3153 if (ret == -1)
3154 return -1;
3155
3156 mem.size = size;
3157 (*instr->emul->func)(mach, &mem, state->gprs);
3158
3159 return 0;
3160 }
3161
3162 static int
3163 assist_mem_single(struct nvmm_machine *mach, struct nvmm_x64_state *state,
3164 struct x86_instr *instr, struct nvmm_exit *exit)
3165 {
3166 struct nvmm_mem mem;
3167 uint8_t membuf[8];
3168 uint64_t val;
3169
3170 memset(membuf, 0, sizeof(membuf));
3171
3172 mem.gpa = exit->u.mem.gpa;
3173 mem.size = instr->operand_size;
3174 mem.data = membuf;
3175
3176 /* Determine the direction. */
3177 switch (instr->src.type) {
3178 case STORE_REG:
3179 if (instr->src.disp.type != DISP_NONE) {
3180 /* Indirect access. */
3181 mem.write = false;
3182 } else {
3183 /* Direct access. */
3184 mem.write = true;
3185 }
3186 break;
3187 case STORE_DUALREG:
3188 if (instr->src.disp.type == DISP_NONE) {
3189 DISASSEMBLER_BUG();
3190 }
3191 mem.write = false;
3192 break;
3193 case STORE_IMM:
3194 mem.write = true;
3195 break;
3196 case STORE_SIB:
3197 mem.write = false;
3198 break;
3199 case STORE_DMO:
3200 mem.write = false;
3201 break;
3202 default:
3203 DISASSEMBLER_BUG();
3204 }
3205
3206 if (mem.write) {
3207 switch (instr->src.type) {
3208 case STORE_REG:
3209 /* The instruction was "reg -> mem". Fetch the register
3210 * in membuf. */
3211 if (__predict_false(instr->src.disp.type != DISP_NONE)) {
3212 DISASSEMBLER_BUG();
3213 }
3214 val = state->gprs[instr->src.u.reg->num];
3215 val = __SHIFTOUT(val, instr->src.u.reg->mask);
3216 memcpy(mem.data, &val, mem.size);
3217 break;
3218 case STORE_IMM:
3219 /* The instruction was "imm -> mem". Fetch the immediate
3220 * in membuf. */
3221 memcpy(mem.data, &instr->src.u.imm.data, mem.size);
3222 break;
3223 default:
3224 DISASSEMBLER_BUG();
3225 }
3226 } else if (instr->emul->readreg) {
3227 /* The instruction was "mem -> reg", but the value of the
3228 * register matters for the emul func. Fetch it in membuf. */
3229 if (__predict_false(instr->dst.type != STORE_REG)) {
3230 DISASSEMBLER_BUG();
3231 }
3232 if (__predict_false(instr->dst.disp.type != DISP_NONE)) {
3233 DISASSEMBLER_BUG();
3234 }
3235 val = state->gprs[instr->dst.u.reg->num];
3236 val = __SHIFTOUT(val, instr->dst.u.reg->mask);
3237 memcpy(mem.data, &val, mem.size);
3238 }
3239
3240 (*instr->emul->func)(mach, &mem, state->gprs);
3241
3242 if (instr->emul->notouch) {
3243 /* We're done. */
3244 return 0;
3245 }
3246
3247 if (!mem.write) {
3248 /* The instruction was "mem -> reg". The emul func has filled
3249 * membuf with the memory content. Install membuf in the
3250 * register. */
3251 if (__predict_false(instr->dst.type != STORE_REG)) {
3252 DISASSEMBLER_BUG();
3253 }
3254 if (__predict_false(instr->dst.disp.type != DISP_NONE)) {
3255 DISASSEMBLER_BUG();
3256 }
3257 memcpy(&val, membuf, sizeof(uint64_t));
3258 val = __SHIFTIN(val, instr->dst.u.reg->mask);
3259 state->gprs[instr->dst.u.reg->num] &= ~instr->dst.u.reg->mask;
3260 state->gprs[instr->dst.u.reg->num] |= val;
3261 state->gprs[instr->dst.u.reg->num] &= ~instr->zeroextend_mask;
3262 } else if (instr->emul->backprop) {
3263 /* The instruction was "reg -> mem", but the memory must be
3264 * back-propagated to the register. Install membuf in the
3265 * register. */
3266 if (__predict_false(instr->src.type != STORE_REG)) {
3267 DISASSEMBLER_BUG();
3268 }
3269 if (__predict_false(instr->src.disp.type != DISP_NONE)) {
3270 DISASSEMBLER_BUG();
3271 }
3272 memcpy(&val, membuf, sizeof(uint64_t));
3273 val = __SHIFTIN(val, instr->src.u.reg->mask);
3274 state->gprs[instr->src.u.reg->num] &= ~instr->src.u.reg->mask;
3275 state->gprs[instr->src.u.reg->num] |= val;
3276 state->gprs[instr->src.u.reg->num] &= ~instr->zeroextend_mask;
3277 }
3278
3279 return 0;
3280 }
3281
3282 int
3283 nvmm_assist_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu)
3284 {
3285 struct nvmm_x64_state *state = vcpu->state;
3286 struct nvmm_exit *exit = vcpu->exit;
3287 struct x86_instr instr;
3288 uint64_t cnt = 0; /* GCC */
3289 int ret;
3290
3291 if (__predict_false(exit->reason != NVMM_EXIT_MEMORY)) {
3292 errno = EINVAL;
3293 return -1;
3294 }
3295
3296 ret = nvmm_vcpu_getstate(mach, vcpu,
3297 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS |
3298 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS);
3299 if (ret == -1)
3300 return -1;
3301
3302 if (exit->u.mem.inst_len == 0) {
3303 /*
3304 * The instruction was not fetched from the kernel. Fetch
3305 * it ourselves.
3306 */
3307 ret = fetch_instruction(mach, state, exit);
3308 if (ret == -1)
3309 return -1;
3310 }
3311
3312 ret = x86_decode(exit->u.mem.inst_bytes, exit->u.mem.inst_len,
3313 &instr, state);
3314 if (ret == -1) {
3315 errno = ENODEV;
3316 return -1;
3317 }
3318
3319 if (instr.legpref.rep || instr.legpref.repn) {
3320 cnt = rep_get_cnt(state, instr.address_size);
3321 if (__predict_false(cnt == 0)) {
3322 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3323 goto out;
3324 }
3325 }
3326
3327 if (instr.opcode->movs) {
3328 ret = assist_mem_double(mach, state, &instr);
3329 } else {
3330 ret = assist_mem_single(mach, state, &instr, exit);
3331 }
3332 if (ret == -1) {
3333 errno = ENODEV;
3334 return -1;
3335 }
3336
3337 if (instr.legpref.rep || instr.legpref.repn) {
3338 cnt -= 1;
3339 rep_set_cnt(state, instr.address_size, cnt);
3340 if (cnt == 0) {
3341 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3342 } else if (__predict_false(instr.legpref.repn)) {
3343 if (state->gprs[NVMM_X64_GPR_RFLAGS] & PSL_Z) {
3344 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3345 }
3346 }
3347 } else {
3348 state->gprs[NVMM_X64_GPR_RIP] += instr.len;
3349 }
3350
3351 out:
3352 ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS);
3353 if (ret == -1)
3354 return -1;
3355
3356 return 0;
3357 }
3358