Home | History | Annotate | Line # | Download | only in x86
sys_machdep.c revision 1.32
      1 /*	$NetBSD: sys_machdep.c,v 1.32 2017/02/14 09:11:05 maxv Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1998, 2007, 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Charles M. Hannum, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: sys_machdep.c,v 1.32 2017/02/14 09:11:05 maxv Exp $");
     34 
     35 #include "opt_mtrr.h"
     36 #include "opt_perfctrs.h"
     37 #include "opt_user_ldt.h"
     38 #include "opt_compat_netbsd.h"
     39 #ifdef i386
     40 #include "opt_vm86.h"
     41 #endif
     42 #include "opt_xen.h"
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/ioctl.h>
     47 #include <sys/file.h>
     48 #include <sys/time.h>
     49 #include <sys/proc.h>
     50 #include <sys/uio.h>
     51 #include <sys/kernel.h>
     52 #include <sys/buf.h>
     53 #include <sys/signal.h>
     54 #include <sys/malloc.h>
     55 #include <sys/kmem.h>
     56 #include <sys/kauth.h>
     57 #include <sys/cpu.h>
     58 #include <sys/mount.h>
     59 #include <sys/syscallargs.h>
     60 
     61 #include <uvm/uvm_extern.h>
     62 
     63 #include <machine/cpufunc.h>
     64 #include <machine/gdt.h>
     65 #include <machine/psl.h>
     66 #include <machine/reg.h>
     67 #include <machine/sysarch.h>
     68 #include <machine/mtrr.h>
     69 
     70 #ifdef __x86_64__
     71 /*
     72  * The code for USER_LDT on amd64 is mostly functional, but it is still not
     73  * enabled.
     74  *
     75  * On amd64 we are allowing only 8-byte-sized entries in the LDT, and we are
     76  * not allowing the user to overwrite the existing entries (below LDT_SIZE).
     77  * Note that USER_LDT is used only by 32bit applications, under compat_netbsd32.
     78  * This is theoretically enough for Wine to work.
     79  *
     80  * However, letting segment registers have different location breaks amd64's
     81  * Thread Local Storage: %fs and %gs must be reloaded when returning to
     82  * userland. See the tech-kern@ archive from February 2017. A patch has been
     83  * proposed to fix that, but Wine still randomly crashes; it is not clear
     84  * whether the issues come from Wine, from netbsd32 or from the patch itself.
     85  */
     86 #undef	USER_LDT
     87 /* Need to be checked. */
     88 #undef	PERFCTRS
     89 #undef	IOPERM
     90 #else
     91 #if defined(XEN)
     92 #undef	IOPERM
     93 #else /* defined(XEN) */
     94 #define	IOPERM
     95 #endif /* defined(XEN) */
     96 #endif
     97 
     98 #ifdef VM86
     99 #include <machine/vm86.h>
    100 #endif
    101 
    102 #ifdef PERFCTRS
    103 #include <machine/pmc.h>
    104 #endif
    105 
    106 extern struct vm_map *kernel_map;
    107 
    108 int x86_get_ioperm(struct lwp *, void *, register_t *);
    109 int x86_set_ioperm(struct lwp *, void *, register_t *);
    110 int x86_get_mtrr(struct lwp *, void *, register_t *);
    111 int x86_set_mtrr(struct lwp *, void *, register_t *);
    112 int x86_set_sdbase32(void *, char, lwp_t *, bool);
    113 int x86_set_sdbase(void *, char, lwp_t *, bool);
    114 int x86_get_sdbase32(void *, char);
    115 int x86_get_sdbase(void *, char);
    116 
    117 #if defined(USER_LDT) && defined(LDT_DEBUG)
    118 static void x86_print_ldt(int, const struct segment_descriptor *);
    119 
    120 static void
    121 x86_print_ldt(int i, const struct segment_descriptor *d)
    122 {
    123 	printf("[%d] lolimit=0x%x, lobase=0x%x, type=%u, dpl=%u, p=%u, "
    124 	    "hilimit=0x%x, xx=%x, def32=%u, gran=%u, hibase=0x%x\n",
    125 	    i, d->sd_lolimit, d->sd_lobase, d->sd_type, d->sd_dpl, d->sd_p,
    126 	    d->sd_hilimit, d->sd_xx, d->sd_def32, d->sd_gran, d->sd_hibase);
    127 }
    128 #endif
    129 
    130 int
    131 x86_get_ldt(struct lwp *l, void *args, register_t *retval)
    132 {
    133 #ifndef USER_LDT
    134 	return EINVAL;
    135 #else
    136 	struct x86_get_ldt_args ua;
    137 	union descriptor *cp;
    138 	int error;
    139 
    140 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    141 		return error;
    142 
    143 	if (ua.num < 0 || ua.num > 8192)
    144 		return EINVAL;
    145 
    146 	cp = malloc(ua.num * sizeof(union descriptor), M_TEMP, M_WAITOK);
    147 	if (cp == NULL)
    148 		return ENOMEM;
    149 
    150 	error = x86_get_ldt1(l, &ua, cp);
    151 	*retval = ua.num;
    152 	if (error == 0)
    153 		error = copyout(cp, ua.desc, ua.num * sizeof(*cp));
    154 
    155 	free(cp, M_TEMP);
    156 	return error;
    157 #endif
    158 }
    159 
    160 int
    161 x86_get_ldt1(struct lwp *l, struct x86_get_ldt_args *ua, union descriptor *cp)
    162 {
    163 #ifndef USER_LDT
    164 	return EINVAL;
    165 #else
    166 	int error;
    167 	struct proc *p = l->l_proc;
    168 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
    169 	int nldt, num;
    170 	union descriptor *lp;
    171 
    172 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_LDT_GET,
    173 	    NULL, NULL, NULL, NULL);
    174 	if (error)
    175 		return (error);
    176 
    177 #ifdef	LDT_DEBUG
    178 	printf("x86_get_ldt: start=%d num=%d descs=%p\n", ua->start,
    179 	    ua->num, ua->desc);
    180 #endif
    181 
    182 	if (ua->start < 0 || ua->num < 0 || ua->start > 8192 || ua->num > 8192 ||
    183 	    ua->start + ua->num > 8192)
    184 		return (EINVAL);
    185 
    186 #ifdef __x86_64__
    187 	if (ua->start * sizeof(union descriptor) < LDT_SIZE)
    188 		return EINVAL;
    189 #endif
    190 
    191 	mutex_enter(&cpu_lock);
    192 
    193 	if (pmap->pm_ldt != NULL) {
    194 		nldt = pmap->pm_ldt_len / sizeof(*lp);
    195 		lp = pmap->pm_ldt;
    196 	} else {
    197 #ifdef __x86_64__
    198 		nldt = LDT_SIZE / sizeof(*lp);
    199 #else
    200 		nldt = NLDT;
    201 #endif
    202 		lp = (union descriptor *)ldtstore;
    203 	}
    204 
    205 	if (ua->start > nldt) {
    206 		mutex_exit(&cpu_lock);
    207 		return (EINVAL);
    208 	}
    209 
    210 	lp += ua->start;
    211 	num = min(ua->num, nldt - ua->start);
    212 	ua->num = num;
    213 #ifdef LDT_DEBUG
    214 	{
    215 		int i;
    216 		for (i = 0; i < num; i++)
    217 			x86_print_ldt(i, &lp[i].sd);
    218 	}
    219 #endif
    220 
    221 	memcpy(cp, lp, num * sizeof(union descriptor));
    222 	mutex_exit(&cpu_lock);
    223 
    224 	return 0;
    225 #endif
    226 }
    227 
    228 int
    229 x86_set_ldt(struct lwp *l, void *args, register_t *retval)
    230 {
    231 #ifndef USER_LDT
    232 	return EINVAL;
    233 #else
    234 	struct x86_set_ldt_args ua;
    235 	union descriptor *descv;
    236 	int error;
    237 
    238 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    239 		return (error);
    240 
    241 	if (ua.num < 0 || ua.num > 8192)
    242 		return EINVAL;
    243 
    244 	descv = malloc(sizeof (*descv) * ua.num, M_TEMP, M_NOWAIT);
    245 	if (descv == NULL)
    246 		return ENOMEM;
    247 
    248 	error = copyin(ua.desc, descv, sizeof (*descv) * ua.num);
    249 	if (error == 0)
    250 		error = x86_set_ldt1(l, &ua, descv);
    251 	*retval = ua.start;
    252 
    253 	free(descv, M_TEMP);
    254 	return error;
    255 #endif
    256 }
    257 
    258 int
    259 x86_set_ldt1(struct lwp *l, struct x86_set_ldt_args *ua,
    260     union descriptor *descv)
    261 {
    262 #ifndef USER_LDT
    263 	return EINVAL;
    264 #else
    265 	int error, i, n, old_sel, new_sel;
    266 	struct proc *p = l->l_proc;
    267 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
    268 	size_t old_len, new_len;
    269 	union descriptor *old_ldt, *new_ldt;
    270 
    271 #ifdef __x86_64__
    272 	const size_t min_ldt_size = LDT_SIZE;
    273 #else
    274 	const size_t min_ldt_size = NLDT * sizeof(union descriptor);
    275 #endif
    276 
    277 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_LDT_SET,
    278 	    NULL, NULL, NULL, NULL);
    279 	if (error)
    280 		return (error);
    281 
    282 	if (ua->start < 0 || ua->num < 0 || ua->start > 8192 || ua->num > 8192 ||
    283 	    ua->start + ua->num > 8192)
    284 		return (EINVAL);
    285 
    286 #ifdef __x86_64__
    287 	if (ua->start * sizeof(union descriptor) < LDT_SIZE)
    288 		return EINVAL;
    289 #endif
    290 
    291 	/* Check descriptors for access violations. */
    292 	for (i = 0; i < ua->num; i++) {
    293 		union descriptor *desc = &descv[i];
    294 
    295 		switch (desc->sd.sd_type) {
    296 		case SDT_SYSNULL:
    297 			desc->sd.sd_p = 0;
    298 			break;
    299 #ifdef __x86_64__
    300 		case SDT_SYS286CGT:
    301 		case SDT_SYS386CGT:
    302 			/* We don't allow these on amd64. */
    303 			return EACCES;
    304 #else
    305 		case SDT_SYS286CGT:
    306 		case SDT_SYS386CGT:
    307 			/*
    308 			 * Only allow call gates targeting a segment
    309 			 * in the LDT or a user segment in the fixed
    310 			 * part of the gdt.  Segments in the LDT are
    311 			 * constrained (below) to be user segments.
    312 			 */
    313 			if (desc->gd.gd_p != 0 &&
    314 			    !ISLDT(desc->gd.gd_selector) &&
    315 			    ((IDXSEL(desc->gd.gd_selector) >= NGDT) ||
    316 			     (gdtstore[IDXSEL(desc->gd.gd_selector)].sd.sd_dpl !=
    317 				 SEL_UPL))) {
    318 				return EACCES;
    319 			}
    320 			break;
    321 #endif
    322 		case SDT_MEMEC:
    323 		case SDT_MEMEAC:
    324 		case SDT_MEMERC:
    325 		case SDT_MEMERAC:
    326 			/* Must be "present" if executable and conforming. */
    327 			if (desc->sd.sd_p == 0)
    328 				return EACCES;
    329 			break;
    330 		case SDT_MEMRO:
    331 		case SDT_MEMROA:
    332 		case SDT_MEMRW:
    333 		case SDT_MEMRWA:
    334 		case SDT_MEMROD:
    335 		case SDT_MEMRODA:
    336 		case SDT_MEMRWD:
    337 		case SDT_MEMRWDA:
    338 		case SDT_MEME:
    339 		case SDT_MEMEA:
    340 		case SDT_MEMER:
    341 		case SDT_MEMERA:
    342 			break;
    343 		default:
    344 			/*
    345 			 * Make sure that unknown descriptor types are
    346 			 * not marked present.
    347 			 */
    348 			if (desc->sd.sd_p != 0)
    349 				return EACCES;
    350 			break;
    351 		}
    352 
    353 		if (desc->sd.sd_p != 0) {
    354 			/* Only user (ring-3) descriptors may be present. */
    355 			if (desc->sd.sd_dpl != SEL_UPL)
    356 				return EACCES;
    357 		}
    358 	}
    359 
    360 	/*
    361 	 * Install selected changes.  We perform a copy, write, swap dance
    362 	 * here to ensure that all updates happen atomically.
    363 	 */
    364 
    365 	/* Allocate a new LDT. */
    366 	for (;;) {
    367 		new_len = (ua->start + ua->num) * sizeof(union descriptor);
    368 		new_len = max(new_len, pmap->pm_ldt_len);
    369 		new_len = max(new_len, min_ldt_size);
    370 		new_len = round_page(new_len);
    371 		new_ldt = (union descriptor *)uvm_km_alloc(kernel_map,
    372 		    new_len, 0, UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_WAITVA);
    373 		mutex_enter(&cpu_lock);
    374 		if (pmap->pm_ldt_len <= new_len) {
    375 			break;
    376 		}
    377 		mutex_exit(&cpu_lock);
    378 		uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len,
    379 		    UVM_KMF_WIRED);
    380 	}
    381 
    382 	/* Copy existing entries, if any. */
    383 	if (pmap->pm_ldt != NULL) {
    384 		old_ldt = pmap->pm_ldt;
    385 		old_len = pmap->pm_ldt_len;
    386 		old_sel = pmap->pm_ldt_sel;
    387 		memcpy(new_ldt, old_ldt, old_len);
    388 	} else {
    389 		old_ldt = NULL;
    390 		old_len = 0;
    391 		old_sel = -1;
    392 		memcpy(new_ldt, ldtstore, min_ldt_size);
    393 	}
    394 
    395 	/* Apply requested changes. */
    396 	for (i = 0, n = ua->start; i < ua->num; i++, n++) {
    397 		new_ldt[n] = descv[i];
    398 	}
    399 
    400 	/* Allocate LDT selector. */
    401 	new_sel = ldt_alloc(new_ldt, new_len);
    402 	if (new_sel == -1) {
    403 		mutex_exit(&cpu_lock);
    404 		uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len,
    405 		    UVM_KMF_WIRED);
    406 		return ENOMEM;
    407 	}
    408 
    409 	/* All changes are now globally visible.  Swap in the new LDT. */
    410 	pmap->pm_ldt_len = new_len;
    411 	pmap->pm_ldt_sel = new_sel;
    412 	/* membar_store_store for pmap_fork() to read these unlocked safely */
    413 	membar_producer();
    414 	pmap->pm_ldt = new_ldt;
    415 
    416 	/* Switch existing users onto new LDT. */
    417 	pmap_ldt_sync(pmap);
    418 
    419 	/* Free existing LDT (if any). */
    420 	if (old_ldt != NULL) {
    421 		ldt_free(old_sel);
    422 		/* exit the mutex before free */
    423 		mutex_exit(&cpu_lock);
    424 		uvm_km_free(kernel_map, (vaddr_t)old_ldt, old_len,
    425 		    UVM_KMF_WIRED);
    426 	} else {
    427 		mutex_exit(&cpu_lock);
    428 	}
    429 
    430 	return error;
    431 #endif
    432 }
    433 
    434 int
    435 x86_iopl(struct lwp *l, void *args, register_t *retval)
    436 {
    437 	int error;
    438 	struct x86_iopl_args ua;
    439 #ifdef XEN
    440 	int iopl;
    441 #else
    442 	struct trapframe *tf = l->l_md.md_regs;
    443 #endif
    444 
    445 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
    446 	    NULL, NULL, NULL, NULL);
    447 	if (error)
    448 		return (error);
    449 
    450 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    451 		return error;
    452 
    453 #ifdef XEN
    454 	if (ua.iopl)
    455 		iopl = SEL_UPL;
    456 	else
    457 		iopl = SEL_KPL;
    458 
    459     {
    460 	struct physdev_op physop;
    461 	struct pcb *pcb;
    462 
    463 	pcb = lwp_getpcb(l);
    464 	pcb->pcb_iopl = iopl;
    465 
    466 	/* Force the change at ring 0. */
    467 	physop.cmd = PHYSDEVOP_SET_IOPL;
    468 	physop.u.set_iopl.iopl = iopl;
    469 	HYPERVISOR_physdev_op(&physop);
    470     }
    471 #elif defined(__x86_64__)
    472 	if (ua.iopl)
    473 		tf->tf_rflags |= PSL_IOPL;
    474 	else
    475 		tf->tf_rflags &= ~PSL_IOPL;
    476 #else
    477 	if (ua.iopl)
    478 		tf->tf_eflags |= PSL_IOPL;
    479 	else
    480 		tf->tf_eflags &= ~PSL_IOPL;
    481 #endif
    482 
    483 	return 0;
    484 }
    485 
    486 int
    487 x86_get_ioperm(struct lwp *l, void *args, register_t *retval)
    488 {
    489 #ifdef IOPERM
    490 	int error;
    491 	struct pcb *pcb = lwp_getpcb(l);
    492 	struct x86_get_ioperm_args ua;
    493 	void *dummymap = NULL;
    494 	void *iomap;
    495 
    496 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPERM_GET,
    497 	    NULL, NULL, NULL, NULL);
    498 	if (error)
    499 		return (error);
    500 
    501 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    502 		return (error);
    503 
    504 	iomap = pcb->pcb_iomap;
    505 	if (iomap == NULL) {
    506 		iomap = dummymap = kmem_alloc(IOMAPSIZE, KM_SLEEP);
    507 		memset(dummymap, 0xff, IOMAPSIZE);
    508 	}
    509 	error = copyout(iomap, ua.iomap, IOMAPSIZE);
    510 	if (dummymap != NULL) {
    511 		kmem_free(dummymap, IOMAPSIZE);
    512 	}
    513 	return error;
    514 #else
    515 	return EINVAL;
    516 #endif
    517 }
    518 
    519 int
    520 x86_set_ioperm(struct lwp *l, void *args, register_t *retval)
    521 {
    522 #ifdef IOPERM
    523 	struct cpu_info *ci;
    524 	int error;
    525 	struct pcb *pcb = lwp_getpcb(l);
    526 	struct x86_set_ioperm_args ua;
    527 	void *new;
    528 	void *old;
    529 
    530   	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPERM_SET,
    531 	    NULL, NULL, NULL, NULL);
    532 	if (error)
    533 		return (error);
    534 
    535 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    536 		return (error);
    537 
    538 	new = kmem_alloc(IOMAPSIZE, KM_SLEEP);
    539 	error = copyin(ua.iomap, new, IOMAPSIZE);
    540 	if (error) {
    541 		kmem_free(new, IOMAPSIZE);
    542 		return error;
    543 	}
    544 	old = pcb->pcb_iomap;
    545 	pcb->pcb_iomap = new;
    546 	if (old != NULL) {
    547 		kmem_free(old, IOMAPSIZE);
    548 	}
    549 
    550 	kpreempt_disable();
    551 	ci = curcpu();
    552 	memcpy(ci->ci_iomap, pcb->pcb_iomap, sizeof(ci->ci_iomap));
    553 	ci->ci_tss.tss_iobase =
    554 	    ((uintptr_t)ci->ci_iomap - (uintptr_t)&ci->ci_tss) << 16;
    555 	kpreempt_enable();
    556 
    557 	return error;
    558 #else
    559 	return EINVAL;
    560 #endif
    561 }
    562 
    563 int
    564 x86_get_mtrr(struct lwp *l, void *args, register_t *retval)
    565 {
    566 #ifdef MTRR
    567 	struct x86_get_mtrr_args ua;
    568 	int error, n;
    569 
    570 	if (mtrr_funcs == NULL)
    571 		return ENOSYS;
    572 
    573  	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_MTRR_GET,
    574 	    NULL, NULL, NULL, NULL);
    575 	if (error)
    576 		return (error);
    577 
    578 	error = copyin(args, &ua, sizeof ua);
    579 	if (error != 0)
    580 		return error;
    581 
    582 	error = copyin(ua.n, &n, sizeof n);
    583 	if (error != 0)
    584 		return error;
    585 
    586 	KERNEL_LOCK(1, NULL);
    587 	error = mtrr_get(ua.mtrrp, &n, l->l_proc, MTRR_GETSET_USER);
    588 	KERNEL_UNLOCK_ONE(NULL);
    589 
    590 	copyout(&n, ua.n, sizeof (int));
    591 
    592 	return error;
    593 #else
    594 	return EINVAL;
    595 #endif
    596 }
    597 
    598 int
    599 x86_set_mtrr(struct lwp *l, void *args, register_t *retval)
    600 {
    601 #ifdef MTRR
    602 	int error, n;
    603 	struct x86_set_mtrr_args ua;
    604 
    605 	if (mtrr_funcs == NULL)
    606 		return ENOSYS;
    607 
    608  	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_MTRR_SET,
    609 	    NULL, NULL, NULL, NULL);
    610 	if (error)
    611 		return (error);
    612 
    613 	error = copyin(args, &ua, sizeof ua);
    614 	if (error != 0)
    615 		return error;
    616 
    617 	error = copyin(ua.n, &n, sizeof n);
    618 	if (error != 0)
    619 		return error;
    620 
    621 	KERNEL_LOCK(1, NULL);
    622 	error = mtrr_set(ua.mtrrp, &n, l->l_proc, MTRR_GETSET_USER);
    623 	if (n != 0)
    624 		mtrr_commit();
    625 	KERNEL_UNLOCK_ONE(NULL);
    626 
    627 	copyout(&n, ua.n, sizeof n);
    628 
    629 	return error;
    630 #else
    631 	return EINVAL;
    632 #endif
    633 }
    634 
    635 #ifdef __x86_64__
    636 #define pcb_fsd pcb_fs
    637 #define pcb_gsd pcb_gs
    638 #define segment_descriptor mem_segment_descriptor
    639 #endif
    640 
    641 int
    642 x86_set_sdbase32(void *arg, char which, lwp_t *l, bool direct)
    643 {
    644 	struct trapframe *tf = l->l_md.md_regs;
    645 	union descriptor usd;
    646 	struct pcb *pcb;
    647 	uint32_t base;
    648 	int error;
    649 
    650 	if (direct) {
    651 		base = (vaddr_t)arg;
    652 	} else {
    653 		error = copyin(arg, &base, sizeof(base));
    654 		if (error != 0)
    655 			return error;
    656 	}
    657 
    658 	memset(&usd, 0, sizeof(usd));
    659 	usd.sd.sd_lobase = base & 0xffffff;
    660 	usd.sd.sd_hibase = (base >> 24) & 0xff;
    661 	usd.sd.sd_lolimit = 0xffff;
    662 	usd.sd.sd_hilimit = 0xf;
    663 	usd.sd.sd_type = SDT_MEMRWA;
    664 	usd.sd.sd_dpl = SEL_UPL;
    665 	usd.sd.sd_p = 1;
    666 	usd.sd.sd_def32 = 1;
    667 	usd.sd.sd_gran = 1;
    668 
    669 	pcb = lwp_getpcb(l);
    670 	kpreempt_disable();
    671 	if (which == 'f') {
    672 		memcpy(&pcb->pcb_fsd, &usd.sd,
    673 		    sizeof(struct segment_descriptor));
    674 		if (l == curlwp) {
    675 			update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &usd);
    676 #ifdef __x86_64__
    677 			setfs(GSEL(GUFS_SEL, SEL_UPL));
    678 #endif
    679 		}
    680 		tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
    681 	} else /* which == 'g' */ {
    682 		memcpy(&pcb->pcb_gsd, &usd.sd,
    683 		    sizeof(struct segment_descriptor));
    684 		if (l == curlwp) {
    685 			update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &usd);
    686 #ifdef __x86_64__
    687 #ifndef XEN
    688 			setusergs(GSEL(GUGS_SEL, SEL_UPL));
    689 #else
    690 			HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL,
    691 						    GSEL(GUGS_SEL, SEL_UPL));
    692 #endif
    693 #endif
    694 		}
    695 		tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    696 	}
    697 	kpreempt_enable();
    698 	return 0;
    699 }
    700 
    701 int
    702 x86_set_sdbase(void *arg, char which, lwp_t *l, bool direct)
    703 {
    704 #ifdef i386
    705 	return x86_set_sdbase32(arg, which, l, direct);
    706 #else
    707 	struct pcb *pcb;
    708 	vaddr_t base;
    709 
    710 	if (l->l_proc->p_flag & PK_32) {
    711 		return x86_set_sdbase32(arg, which, l, direct);
    712 	}
    713 
    714 	if (direct) {
    715 		base = (vaddr_t)arg;
    716 	} else {
    717 		int error = copyin(arg, &base, sizeof(base));
    718 		if (error != 0)
    719 			return error;
    720 	}
    721 
    722 	if (base >= VM_MAXUSER_ADDRESS)
    723 		return EINVAL;
    724 
    725 	pcb = lwp_getpcb(l);
    726 
    727 	kpreempt_disable();
    728 	switch(which) {
    729 	case 'f':
    730 		pcb->pcb_fs = base;
    731 		if (l == curlwp)
    732 			wrmsr(MSR_FSBASE, pcb->pcb_fs);
    733 		break;
    734 	case 'g':
    735 		pcb->pcb_gs = base;
    736 		if (l == curlwp)
    737 			wrmsr(MSR_KERNELGSBASE, pcb->pcb_gs);
    738 		break;
    739 	default:
    740 		panic("x86_set_sdbase");
    741 	}
    742 	kpreempt_enable();
    743 
    744 	return 0;
    745 #endif
    746 }
    747 
    748 int
    749 x86_get_sdbase32(void *arg, char which)
    750 {
    751 	struct segment_descriptor *sd;
    752 	uint32_t base;
    753 
    754 	switch (which) {
    755 	case 'f':
    756 		sd = (void *)&curpcb->pcb_fsd;
    757 		break;
    758 	case 'g':
    759 		sd = (void *)&curpcb->pcb_gsd;
    760 		break;
    761 	default:
    762 		panic("x86_get_sdbase32");
    763 	}
    764 
    765 	base = sd->sd_hibase << 24 | sd->sd_lobase;
    766 	return copyout(&base, arg, sizeof(base));
    767 }
    768 
    769 int
    770 x86_get_sdbase(void *arg, char which)
    771 {
    772 #ifdef i386
    773 	return x86_get_sdbase32(arg, which);
    774 #else
    775 	vaddr_t base;
    776 	struct pcb *pcb;
    777 
    778 	if (curproc->p_flag & PK_32) {
    779 		return x86_get_sdbase32(arg, which);
    780 	}
    781 
    782 	pcb = lwp_getpcb(curlwp);
    783 
    784 	switch(which) {
    785 	case 'f':
    786 		base = pcb->pcb_fs;
    787 		break;
    788 	case 'g':
    789 		base = pcb->pcb_gs;
    790 		break;
    791 	default:
    792 		panic("x86_get_sdbase");
    793 	}
    794 
    795 	return copyout(&base, arg, sizeof(base));
    796 #endif
    797 }
    798 
    799 int
    800 sys_sysarch(struct lwp *l, const struct sys_sysarch_args *uap, register_t *retval)
    801 {
    802 	/* {
    803 		syscallarg(int) op;
    804 		syscallarg(void *) parms;
    805 	} */
    806 	int error = 0;
    807 
    808 	switch(SCARG(uap, op)) {
    809 	case X86_IOPL:
    810 		error = x86_iopl(l, SCARG(uap, parms), retval);
    811 		break;
    812 
    813 #ifdef i386
    814 	/*
    815 	 * On amd64, this is done via netbsd32_sysarch.
    816 	 */
    817 	case X86_GET_LDT:
    818 		error = x86_get_ldt(l, SCARG(uap, parms), retval);
    819 		break;
    820 
    821 	case X86_SET_LDT:
    822 		error = x86_set_ldt(l, SCARG(uap, parms), retval);
    823 		break;
    824 #endif
    825 
    826 	case X86_GET_IOPERM:
    827 		error = x86_get_ioperm(l, SCARG(uap, parms), retval);
    828 		break;
    829 
    830 	case X86_SET_IOPERM:
    831 		error = x86_set_ioperm(l, SCARG(uap, parms), retval);
    832 		break;
    833 
    834 	case X86_GET_MTRR:
    835 		error = x86_get_mtrr(l, SCARG(uap, parms), retval);
    836 		break;
    837 	case X86_SET_MTRR:
    838 		error = x86_set_mtrr(l, SCARG(uap, parms), retval);
    839 		break;
    840 
    841 #ifdef VM86
    842 	case X86_VM86:
    843 		error = x86_vm86(l, SCARG(uap, parms), retval);
    844 		break;
    845 	case X86_OLD_VM86:
    846 		error = compat_16_x86_vm86(l, SCARG(uap, parms), retval);
    847 		break;
    848 #endif
    849 
    850 #ifdef PERFCTRS
    851 	case X86_PMC_INFO:
    852 		KERNEL_LOCK(1, NULL);
    853 		error = pmc_info(l, SCARG(uap, parms), retval);
    854 		KERNEL_UNLOCK_ONE(NULL);
    855 		break;
    856 
    857 	case X86_PMC_STARTSTOP:
    858 		KERNEL_LOCK(1, NULL);
    859 		error = pmc_startstop(l, SCARG(uap, parms), retval);
    860 		KERNEL_UNLOCK_ONE(NULL);
    861 		break;
    862 
    863 	case X86_PMC_READ:
    864 		KERNEL_LOCK(1, NULL);
    865 		error = pmc_read(l, SCARG(uap, parms), retval);
    866 		KERNEL_UNLOCK_ONE(NULL);
    867 		break;
    868 #endif
    869 
    870 	case X86_SET_FSBASE:
    871 		error = x86_set_sdbase(SCARG(uap, parms), 'f', curlwp, false);
    872 		break;
    873 
    874 	case X86_SET_GSBASE:
    875 		error = x86_set_sdbase(SCARG(uap, parms), 'g', curlwp, false);
    876 		break;
    877 
    878 	case X86_GET_FSBASE:
    879 		error = x86_get_sdbase(SCARG(uap, parms), 'f');
    880 		break;
    881 
    882 	case X86_GET_GSBASE:
    883 		error = x86_get_sdbase(SCARG(uap, parms), 'g');
    884 		break;
    885 
    886 	default:
    887 		error = EINVAL;
    888 		break;
    889 	}
    890 	return (error);
    891 }
    892 
    893 int
    894 cpu_lwp_setprivate(lwp_t *l, void *addr)
    895 {
    896 
    897 #ifdef __x86_64__
    898 	if ((l->l_proc->p_flag & PK_32) == 0) {
    899 		return x86_set_sdbase(addr, 'f', l, true);
    900 	}
    901 #endif
    902 	return x86_set_sdbase(addr, 'g', l, true);
    903 }
    904