Home | History | Annotate | Line # | Download | only in x86
sys_machdep.c revision 1.36
      1 /*	$NetBSD: sys_machdep.c,v 1.36 2017/07/12 16:59:41 maxv Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1998, 2007, 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Charles M. Hannum, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: sys_machdep.c,v 1.36 2017/07/12 16:59:41 maxv Exp $");
     34 
     35 #include "opt_mtrr.h"
     36 #include "opt_pmc.h"
     37 #include "opt_user_ldt.h"
     38 #include "opt_compat_netbsd.h"
     39 #ifdef i386
     40 #include "opt_vm86.h"
     41 #endif
     42 #include "opt_xen.h"
     43 
     44 #include <sys/param.h>
     45 #include <sys/systm.h>
     46 #include <sys/ioctl.h>
     47 #include <sys/file.h>
     48 #include <sys/time.h>
     49 #include <sys/proc.h>
     50 #include <sys/uio.h>
     51 #include <sys/kernel.h>
     52 #include <sys/buf.h>
     53 #include <sys/signal.h>
     54 #include <sys/malloc.h>
     55 #include <sys/kmem.h>
     56 #include <sys/kauth.h>
     57 #include <sys/cpu.h>
     58 #include <sys/mount.h>
     59 #include <sys/syscallargs.h>
     60 
     61 #include <uvm/uvm_extern.h>
     62 
     63 #include <machine/cpufunc.h>
     64 #include <machine/gdt.h>
     65 #include <machine/psl.h>
     66 #include <machine/reg.h>
     67 #include <machine/sysarch.h>
     68 #include <machine/mtrr.h>
     69 
     70 #ifdef __x86_64__
     71 /*
     72  * The code for USER_LDT on amd64 is mostly functional, but it is still not
     73  * enabled.
     74  *
     75  * On amd64 we are allowing only 8-byte-sized entries in the LDT, and we are
     76  * not allowing the user to overwrite the existing entries (below LDT_SIZE).
     77  * Note that USER_LDT is used only by 32bit applications, under compat_netbsd32.
     78  * This is theoretically enough for Wine to work.
     79  *
     80  * However, letting segment registers have different location breaks amd64's
     81  * Thread Local Storage: %fs and %gs must be reloaded when returning to
     82  * userland. See the tech-kern@ archive from February 2017. A patch has been
     83  * proposed to fix that, but Wine still randomly crashes; it is not clear
     84  * whether the issues come from Wine, from netbsd32 or from the patch itself.
     85  */
     86 #undef	USER_LDT
     87 /* Need to be checked. */
     88 #undef	IOPERM
     89 #else
     90 #if defined(XEN)
     91 #undef	IOPERM
     92 #else /* defined(XEN) */
     93 #define	IOPERM
     94 #endif /* defined(XEN) */
     95 #endif
     96 
     97 #ifdef XEN
     98 #undef	PMC
     99 #endif
    100 
    101 #ifdef VM86
    102 #include <machine/vm86.h>
    103 #endif
    104 
    105 #ifdef PMC
    106 #include <machine/pmc.h>
    107 #endif
    108 
    109 extern struct vm_map *kernel_map;
    110 
    111 int x86_get_ioperm(struct lwp *, void *, register_t *);
    112 int x86_set_ioperm(struct lwp *, void *, register_t *);
    113 int x86_get_mtrr(struct lwp *, void *, register_t *);
    114 int x86_set_mtrr(struct lwp *, void *, register_t *);
    115 int x86_set_sdbase32(void *, char, lwp_t *, bool);
    116 int x86_set_sdbase(void *, char, lwp_t *, bool);
    117 int x86_get_sdbase32(void *, char);
    118 int x86_get_sdbase(void *, char);
    119 
    120 #if defined(USER_LDT) && defined(LDT_DEBUG)
    121 static void x86_print_ldt(int, const struct segment_descriptor *);
    122 
    123 static void
    124 x86_print_ldt(int i, const struct segment_descriptor *d)
    125 {
    126 	printf("[%d] lolimit=0x%x, lobase=0x%x, type=%u, dpl=%u, p=%u, "
    127 	    "hilimit=0x%x, xx=%x, def32=%u, gran=%u, hibase=0x%x\n",
    128 	    i, d->sd_lolimit, d->sd_lobase, d->sd_type, d->sd_dpl, d->sd_p,
    129 	    d->sd_hilimit, d->sd_xx, d->sd_def32, d->sd_gran, d->sd_hibase);
    130 }
    131 #endif
    132 
    133 int
    134 x86_get_ldt(struct lwp *l, void *args, register_t *retval)
    135 {
    136 #ifndef USER_LDT
    137 	return EINVAL;
    138 #else
    139 	struct x86_get_ldt_args ua;
    140 	union descriptor *cp;
    141 	int error;
    142 
    143 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    144 		return error;
    145 
    146 	if (ua.num < 0 || ua.num > 8192)
    147 		return EINVAL;
    148 
    149 	cp = malloc(ua.num * sizeof(union descriptor), M_TEMP, M_WAITOK);
    150 	if (cp == NULL)
    151 		return ENOMEM;
    152 
    153 	error = x86_get_ldt1(l, &ua, cp);
    154 	*retval = ua.num;
    155 	if (error == 0)
    156 		error = copyout(cp, ua.desc, ua.num * sizeof(*cp));
    157 
    158 	free(cp, M_TEMP);
    159 	return error;
    160 #endif
    161 }
    162 
    163 int
    164 x86_get_ldt1(struct lwp *l, struct x86_get_ldt_args *ua, union descriptor *cp)
    165 {
    166 #ifndef USER_LDT
    167 	return EINVAL;
    168 #else
    169 	int error;
    170 	struct proc *p = l->l_proc;
    171 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
    172 	int nldt, num;
    173 	union descriptor *lp;
    174 
    175 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_LDT_GET,
    176 	    NULL, NULL, NULL, NULL);
    177 	if (error)
    178 		return (error);
    179 
    180 #ifdef	LDT_DEBUG
    181 	printf("x86_get_ldt: start=%d num=%d descs=%p\n", ua->start,
    182 	    ua->num, ua->desc);
    183 #endif
    184 
    185 	if (ua->start < 0 || ua->num < 0 || ua->start > 8192 || ua->num > 8192 ||
    186 	    ua->start + ua->num > 8192)
    187 		return (EINVAL);
    188 
    189 #ifdef __x86_64__
    190 	if (ua->start * sizeof(union descriptor) < LDT_SIZE)
    191 		return EINVAL;
    192 #endif
    193 
    194 	mutex_enter(&cpu_lock);
    195 
    196 	if (pmap->pm_ldt != NULL) {
    197 		nldt = pmap->pm_ldt_len / sizeof(*lp);
    198 		lp = pmap->pm_ldt;
    199 	} else {
    200 #ifdef __x86_64__
    201 		nldt = LDT_SIZE / sizeof(*lp);
    202 #else
    203 		nldt = NLDT;
    204 #endif
    205 		lp = (union descriptor *)ldtstore;
    206 	}
    207 
    208 	if (ua->start > nldt) {
    209 		mutex_exit(&cpu_lock);
    210 		return (EINVAL);
    211 	}
    212 
    213 	lp += ua->start;
    214 	num = min(ua->num, nldt - ua->start);
    215 	ua->num = num;
    216 #ifdef LDT_DEBUG
    217 	{
    218 		int i;
    219 		for (i = 0; i < num; i++)
    220 			x86_print_ldt(i, &lp[i].sd);
    221 	}
    222 #endif
    223 
    224 	memcpy(cp, lp, num * sizeof(union descriptor));
    225 	mutex_exit(&cpu_lock);
    226 
    227 	return 0;
    228 #endif
    229 }
    230 
    231 int
    232 x86_set_ldt(struct lwp *l, void *args, register_t *retval)
    233 {
    234 #ifndef USER_LDT
    235 	return EINVAL;
    236 #else
    237 	struct x86_set_ldt_args ua;
    238 	union descriptor *descv;
    239 	int error;
    240 
    241 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    242 		return (error);
    243 
    244 	if (ua.num < 0 || ua.num > 8192)
    245 		return EINVAL;
    246 
    247 	descv = malloc(sizeof (*descv) * ua.num, M_TEMP, M_NOWAIT);
    248 	if (descv == NULL)
    249 		return ENOMEM;
    250 
    251 	error = copyin(ua.desc, descv, sizeof (*descv) * ua.num);
    252 	if (error == 0)
    253 		error = x86_set_ldt1(l, &ua, descv);
    254 	*retval = ua.start;
    255 
    256 	free(descv, M_TEMP);
    257 	return error;
    258 #endif
    259 }
    260 
    261 int
    262 x86_set_ldt1(struct lwp *l, struct x86_set_ldt_args *ua,
    263     union descriptor *descv)
    264 {
    265 #ifndef USER_LDT
    266 	return EINVAL;
    267 #else
    268 	int error, i, n, old_sel, new_sel;
    269 	struct proc *p = l->l_proc;
    270 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
    271 	size_t old_len, new_len;
    272 	union descriptor *old_ldt, *new_ldt;
    273 
    274 #ifdef __x86_64__
    275 	const size_t min_ldt_size = LDT_SIZE;
    276 #else
    277 	const size_t min_ldt_size = NLDT * sizeof(union descriptor);
    278 #endif
    279 
    280 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_LDT_SET,
    281 	    NULL, NULL, NULL, NULL);
    282 	if (error)
    283 		return (error);
    284 
    285 	if (ua->start < 0 || ua->num < 0 || ua->start > 8192 || ua->num > 8192 ||
    286 	    ua->start + ua->num > 8192)
    287 		return (EINVAL);
    288 
    289 #ifdef __x86_64__
    290 	if (ua->start * sizeof(union descriptor) < LDT_SIZE)
    291 		return EINVAL;
    292 #endif
    293 
    294 	/* Check descriptors for access violations. */
    295 	for (i = 0; i < ua->num; i++) {
    296 		union descriptor *desc = &descv[i];
    297 
    298 		switch (desc->sd.sd_type) {
    299 		case SDT_SYSNULL:
    300 			desc->sd.sd_p = 0;
    301 			break;
    302 #ifdef __x86_64__
    303 		case SDT_SYS286CGT:
    304 		case SDT_SYS386CGT:
    305 			/* We don't allow these on amd64. */
    306 			return EACCES;
    307 #else
    308 		case SDT_SYS286CGT:
    309 		case SDT_SYS386CGT:
    310 			/*
    311 			 * Only allow call gates targeting a segment
    312 			 * in the LDT or a user segment in the fixed
    313 			 * part of the gdt.  Segments in the LDT are
    314 			 * constrained (below) to be user segments.
    315 			 */
    316 			if (desc->gd.gd_p != 0 &&
    317 			    !ISLDT(desc->gd.gd_selector) &&
    318 			    ((IDXSEL(desc->gd.gd_selector) >= NGDT) ||
    319 			     (gdtstore[IDXSEL(desc->gd.gd_selector)].sd.sd_dpl !=
    320 				 SEL_UPL))) {
    321 				return EACCES;
    322 			}
    323 			break;
    324 #endif
    325 		case SDT_MEMEC:
    326 		case SDT_MEMEAC:
    327 		case SDT_MEMERC:
    328 		case SDT_MEMERAC:
    329 			/* Must be "present" if executable and conforming. */
    330 			if (desc->sd.sd_p == 0)
    331 				return EACCES;
    332 			break;
    333 		case SDT_MEMRO:
    334 		case SDT_MEMROA:
    335 		case SDT_MEMRW:
    336 		case SDT_MEMRWA:
    337 		case SDT_MEMROD:
    338 		case SDT_MEMRODA:
    339 		case SDT_MEMRWD:
    340 		case SDT_MEMRWDA:
    341 		case SDT_MEME:
    342 		case SDT_MEMEA:
    343 		case SDT_MEMER:
    344 		case SDT_MEMERA:
    345 			break;
    346 		default:
    347 			/*
    348 			 * Make sure that unknown descriptor types are
    349 			 * not marked present.
    350 			 */
    351 			if (desc->sd.sd_p != 0)
    352 				return EACCES;
    353 			break;
    354 		}
    355 
    356 		if (desc->sd.sd_p != 0) {
    357 			/* Only user (ring-3) descriptors may be present. */
    358 			if (desc->sd.sd_dpl != SEL_UPL)
    359 				return EACCES;
    360 		}
    361 	}
    362 
    363 	/*
    364 	 * Install selected changes.  We perform a copy, write, swap dance
    365 	 * here to ensure that all updates happen atomically.
    366 	 */
    367 
    368 	/* Allocate a new LDT. */
    369 	for (;;) {
    370 		new_len = (ua->start + ua->num) * sizeof(union descriptor);
    371 		new_len = max(new_len, pmap->pm_ldt_len);
    372 		new_len = max(new_len, min_ldt_size);
    373 		new_len = round_page(new_len);
    374 		new_ldt = (union descriptor *)uvm_km_alloc(kernel_map,
    375 		    new_len, 0, UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_WAITVA);
    376 		mutex_enter(&cpu_lock);
    377 		if (pmap->pm_ldt_len <= new_len) {
    378 			break;
    379 		}
    380 		mutex_exit(&cpu_lock);
    381 		uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len,
    382 		    UVM_KMF_WIRED);
    383 	}
    384 
    385 	/* Copy existing entries, if any. */
    386 	if (pmap->pm_ldt != NULL) {
    387 		old_ldt = pmap->pm_ldt;
    388 		old_len = pmap->pm_ldt_len;
    389 		old_sel = pmap->pm_ldt_sel;
    390 		memcpy(new_ldt, old_ldt, old_len);
    391 	} else {
    392 		old_ldt = NULL;
    393 		old_len = 0;
    394 		old_sel = -1;
    395 		memcpy(new_ldt, ldtstore, min_ldt_size);
    396 	}
    397 
    398 	/* Apply requested changes. */
    399 	for (i = 0, n = ua->start; i < ua->num; i++, n++) {
    400 		new_ldt[n] = descv[i];
    401 	}
    402 
    403 	/* Allocate LDT selector. */
    404 	new_sel = ldt_alloc(new_ldt, new_len);
    405 	if (new_sel == -1) {
    406 		mutex_exit(&cpu_lock);
    407 		uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len,
    408 		    UVM_KMF_WIRED);
    409 		return ENOMEM;
    410 	}
    411 
    412 	/* All changes are now globally visible.  Swap in the new LDT. */
    413 	pmap->pm_ldt_len = new_len;
    414 	pmap->pm_ldt_sel = new_sel;
    415 	/* membar_store_store for pmap_fork() to read these unlocked safely */
    416 	membar_producer();
    417 	pmap->pm_ldt = new_ldt;
    418 
    419 	/* Switch existing users onto new LDT. */
    420 	pmap_ldt_sync(pmap);
    421 
    422 	/* Free existing LDT (if any). */
    423 	if (old_ldt != NULL) {
    424 		ldt_free(old_sel);
    425 		/* exit the mutex before free */
    426 		mutex_exit(&cpu_lock);
    427 		uvm_km_free(kernel_map, (vaddr_t)old_ldt, old_len,
    428 		    UVM_KMF_WIRED);
    429 	} else {
    430 		mutex_exit(&cpu_lock);
    431 	}
    432 
    433 	return error;
    434 #endif
    435 }
    436 
    437 int
    438 x86_iopl(struct lwp *l, void *args, register_t *retval)
    439 {
    440 	int error;
    441 	struct x86_iopl_args ua;
    442 #ifdef XEN
    443 	int iopl;
    444 #else
    445 	struct trapframe *tf = l->l_md.md_regs;
    446 #endif
    447 
    448 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
    449 	    NULL, NULL, NULL, NULL);
    450 	if (error)
    451 		return (error);
    452 
    453 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    454 		return error;
    455 
    456 #ifdef XEN
    457 	if (ua.iopl)
    458 		iopl = SEL_UPL;
    459 	else
    460 		iopl = SEL_KPL;
    461 
    462     {
    463 	struct physdev_op physop;
    464 	struct pcb *pcb;
    465 
    466 	pcb = lwp_getpcb(l);
    467 	pcb->pcb_iopl = iopl;
    468 
    469 	/* Force the change at ring 0. */
    470 	physop.cmd = PHYSDEVOP_SET_IOPL;
    471 	physop.u.set_iopl.iopl = iopl;
    472 	HYPERVISOR_physdev_op(&physop);
    473     }
    474 #elif defined(__x86_64__)
    475 	if (ua.iopl)
    476 		tf->tf_rflags |= PSL_IOPL;
    477 	else
    478 		tf->tf_rflags &= ~PSL_IOPL;
    479 #else
    480 	if (ua.iopl)
    481 		tf->tf_eflags |= PSL_IOPL;
    482 	else
    483 		tf->tf_eflags &= ~PSL_IOPL;
    484 #endif
    485 
    486 	return 0;
    487 }
    488 
    489 int
    490 x86_get_ioperm(struct lwp *l, void *args, register_t *retval)
    491 {
    492 #ifdef IOPERM
    493 	int error;
    494 	struct pcb *pcb = lwp_getpcb(l);
    495 	struct x86_get_ioperm_args ua;
    496 	void *dummymap = NULL;
    497 	void *iomap;
    498 
    499 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPERM_GET,
    500 	    NULL, NULL, NULL, NULL);
    501 	if (error)
    502 		return (error);
    503 
    504 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    505 		return (error);
    506 
    507 	iomap = pcb->pcb_iomap;
    508 	if (iomap == NULL) {
    509 		iomap = dummymap = kmem_alloc(IOMAPSIZE, KM_SLEEP);
    510 		memset(dummymap, 0xff, IOMAPSIZE);
    511 	}
    512 	error = copyout(iomap, ua.iomap, IOMAPSIZE);
    513 	if (dummymap != NULL) {
    514 		kmem_free(dummymap, IOMAPSIZE);
    515 	}
    516 	return error;
    517 #else
    518 	return EINVAL;
    519 #endif
    520 }
    521 
    522 int
    523 x86_set_ioperm(struct lwp *l, void *args, register_t *retval)
    524 {
    525 #ifdef IOPERM
    526 	struct cpu_info *ci;
    527 	int error;
    528 	struct pcb *pcb = lwp_getpcb(l);
    529 	struct x86_set_ioperm_args ua;
    530 	void *new;
    531 	void *old;
    532 
    533   	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPERM_SET,
    534 	    NULL, NULL, NULL, NULL);
    535 	if (error)
    536 		return (error);
    537 
    538 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    539 		return (error);
    540 
    541 	new = kmem_alloc(IOMAPSIZE, KM_SLEEP);
    542 	error = copyin(ua.iomap, new, IOMAPSIZE);
    543 	if (error) {
    544 		kmem_free(new, IOMAPSIZE);
    545 		return error;
    546 	}
    547 	old = pcb->pcb_iomap;
    548 	pcb->pcb_iomap = new;
    549 	if (old != NULL) {
    550 		kmem_free(old, IOMAPSIZE);
    551 	}
    552 
    553 	kpreempt_disable();
    554 	ci = curcpu();
    555 	memcpy(ci->ci_iomap, pcb->pcb_iomap, sizeof(ci->ci_iomap));
    556 	ci->ci_tss.tss_iobase =
    557 	    ((uintptr_t)ci->ci_iomap - (uintptr_t)&ci->ci_tss) << 16;
    558 	kpreempt_enable();
    559 
    560 	return error;
    561 #else
    562 	return EINVAL;
    563 #endif
    564 }
    565 
    566 int
    567 x86_get_mtrr(struct lwp *l, void *args, register_t *retval)
    568 {
    569 #ifdef MTRR
    570 	struct x86_get_mtrr_args ua;
    571 	int error, n;
    572 
    573 	if (mtrr_funcs == NULL)
    574 		return ENOSYS;
    575 
    576  	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_MTRR_GET,
    577 	    NULL, NULL, NULL, NULL);
    578 	if (error)
    579 		return (error);
    580 
    581 	error = copyin(args, &ua, sizeof ua);
    582 	if (error != 0)
    583 		return error;
    584 
    585 	error = copyin(ua.n, &n, sizeof n);
    586 	if (error != 0)
    587 		return error;
    588 
    589 	KERNEL_LOCK(1, NULL);
    590 	error = mtrr_get(ua.mtrrp, &n, l->l_proc, MTRR_GETSET_USER);
    591 	KERNEL_UNLOCK_ONE(NULL);
    592 
    593 	copyout(&n, ua.n, sizeof (int));
    594 
    595 	return error;
    596 #else
    597 	return EINVAL;
    598 #endif
    599 }
    600 
    601 int
    602 x86_set_mtrr(struct lwp *l, void *args, register_t *retval)
    603 {
    604 #ifdef MTRR
    605 	int error, n;
    606 	struct x86_set_mtrr_args ua;
    607 
    608 	if (mtrr_funcs == NULL)
    609 		return ENOSYS;
    610 
    611  	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_MTRR_SET,
    612 	    NULL, NULL, NULL, NULL);
    613 	if (error)
    614 		return (error);
    615 
    616 	error = copyin(args, &ua, sizeof ua);
    617 	if (error != 0)
    618 		return error;
    619 
    620 	error = copyin(ua.n, &n, sizeof n);
    621 	if (error != 0)
    622 		return error;
    623 
    624 	KERNEL_LOCK(1, NULL);
    625 	error = mtrr_set(ua.mtrrp, &n, l->l_proc, MTRR_GETSET_USER);
    626 	if (n != 0)
    627 		mtrr_commit();
    628 	KERNEL_UNLOCK_ONE(NULL);
    629 
    630 	copyout(&n, ua.n, sizeof n);
    631 
    632 	return error;
    633 #else
    634 	return EINVAL;
    635 #endif
    636 }
    637 
    638 #ifdef __x86_64__
    639 #define pcb_fsd pcb_fs
    640 #define pcb_gsd pcb_gs
    641 #define segment_descriptor mem_segment_descriptor
    642 #endif
    643 
    644 int
    645 x86_set_sdbase32(void *arg, char which, lwp_t *l, bool direct)
    646 {
    647 	struct trapframe *tf = l->l_md.md_regs;
    648 	union descriptor usd;
    649 	struct pcb *pcb;
    650 	uint32_t base;
    651 	int error;
    652 
    653 	if (direct) {
    654 		base = (vaddr_t)arg;
    655 	} else {
    656 		error = copyin(arg, &base, sizeof(base));
    657 		if (error != 0)
    658 			return error;
    659 	}
    660 
    661 	memset(&usd, 0, sizeof(usd));
    662 	usd.sd.sd_lobase = base & 0xffffff;
    663 	usd.sd.sd_hibase = (base >> 24) & 0xff;
    664 	usd.sd.sd_lolimit = 0xffff;
    665 	usd.sd.sd_hilimit = 0xf;
    666 	usd.sd.sd_type = SDT_MEMRWA;
    667 	usd.sd.sd_dpl = SEL_UPL;
    668 	usd.sd.sd_p = 1;
    669 	usd.sd.sd_def32 = 1;
    670 	usd.sd.sd_gran = 1;
    671 
    672 	pcb = lwp_getpcb(l);
    673 	kpreempt_disable();
    674 	if (which == 'f') {
    675 		memcpy(&pcb->pcb_fsd, &usd.sd,
    676 		    sizeof(struct segment_descriptor));
    677 		if (l == curlwp) {
    678 			update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &usd);
    679 #ifdef __x86_64__
    680 			setfs(GSEL(GUFS_SEL, SEL_UPL));
    681 #endif
    682 		}
    683 		tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
    684 	} else /* which == 'g' */ {
    685 		memcpy(&pcb->pcb_gsd, &usd.sd,
    686 		    sizeof(struct segment_descriptor));
    687 		if (l == curlwp) {
    688 			update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &usd);
    689 #ifdef __x86_64__
    690 #ifndef XEN
    691 			setusergs(GSEL(GUGS_SEL, SEL_UPL));
    692 #else
    693 			HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL,
    694 						    GSEL(GUGS_SEL, SEL_UPL));
    695 #endif
    696 #endif
    697 		}
    698 		tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    699 	}
    700 	kpreempt_enable();
    701 	return 0;
    702 }
    703 
    704 int
    705 x86_set_sdbase(void *arg, char which, lwp_t *l, bool direct)
    706 {
    707 #ifdef i386
    708 	return x86_set_sdbase32(arg, which, l, direct);
    709 #else
    710 	struct pcb *pcb;
    711 	vaddr_t base;
    712 
    713 	if (l->l_proc->p_flag & PK_32) {
    714 		return x86_set_sdbase32(arg, which, l, direct);
    715 	}
    716 
    717 	if (direct) {
    718 		base = (vaddr_t)arg;
    719 	} else {
    720 		int error = copyin(arg, &base, sizeof(base));
    721 		if (error != 0)
    722 			return error;
    723 	}
    724 
    725 	if (base >= VM_MAXUSER_ADDRESS)
    726 		return EINVAL;
    727 
    728 	pcb = lwp_getpcb(l);
    729 
    730 	kpreempt_disable();
    731 	switch(which) {
    732 	case 'f':
    733 		pcb->pcb_fs = base;
    734 		if (l == curlwp)
    735 			wrmsr(MSR_FSBASE, pcb->pcb_fs);
    736 		break;
    737 	case 'g':
    738 		pcb->pcb_gs = base;
    739 		if (l == curlwp)
    740 			wrmsr(MSR_KERNELGSBASE, pcb->pcb_gs);
    741 		break;
    742 	default:
    743 		panic("x86_set_sdbase");
    744 	}
    745 	kpreempt_enable();
    746 
    747 	return 0;
    748 #endif
    749 }
    750 
    751 int
    752 x86_get_sdbase32(void *arg, char which)
    753 {
    754 	struct segment_descriptor *sd;
    755 	uint32_t base;
    756 
    757 	switch (which) {
    758 	case 'f':
    759 		sd = (void *)&curpcb->pcb_fsd;
    760 		break;
    761 	case 'g':
    762 		sd = (void *)&curpcb->pcb_gsd;
    763 		break;
    764 	default:
    765 		panic("x86_get_sdbase32");
    766 	}
    767 
    768 	base = sd->sd_hibase << 24 | sd->sd_lobase;
    769 	return copyout(&base, arg, sizeof(base));
    770 }
    771 
    772 int
    773 x86_get_sdbase(void *arg, char which)
    774 {
    775 #ifdef i386
    776 	return x86_get_sdbase32(arg, which);
    777 #else
    778 	vaddr_t base;
    779 	struct pcb *pcb;
    780 
    781 	if (curproc->p_flag & PK_32) {
    782 		return x86_get_sdbase32(arg, which);
    783 	}
    784 
    785 	pcb = lwp_getpcb(curlwp);
    786 
    787 	switch(which) {
    788 	case 'f':
    789 		base = pcb->pcb_fs;
    790 		break;
    791 	case 'g':
    792 		base = pcb->pcb_gs;
    793 		break;
    794 	default:
    795 		panic("x86_get_sdbase");
    796 	}
    797 
    798 	return copyout(&base, arg, sizeof(base));
    799 #endif
    800 }
    801 
    802 int
    803 sys_sysarch(struct lwp *l, const struct sys_sysarch_args *uap, register_t *retval)
    804 {
    805 	/* {
    806 		syscallarg(int) op;
    807 		syscallarg(void *) parms;
    808 	} */
    809 	int error = 0;
    810 
    811 	switch(SCARG(uap, op)) {
    812 	case X86_IOPL:
    813 		error = x86_iopl(l, SCARG(uap, parms), retval);
    814 		break;
    815 
    816 #ifdef i386
    817 	/*
    818 	 * On amd64, this is done via netbsd32_sysarch.
    819 	 */
    820 	case X86_GET_LDT:
    821 		error = x86_get_ldt(l, SCARG(uap, parms), retval);
    822 		break;
    823 
    824 	case X86_SET_LDT:
    825 		error = x86_set_ldt(l, SCARG(uap, parms), retval);
    826 		break;
    827 #endif
    828 
    829 	case X86_GET_IOPERM:
    830 		error = x86_get_ioperm(l, SCARG(uap, parms), retval);
    831 		break;
    832 
    833 	case X86_SET_IOPERM:
    834 		error = x86_set_ioperm(l, SCARG(uap, parms), retval);
    835 		break;
    836 
    837 	case X86_GET_MTRR:
    838 		error = x86_get_mtrr(l, SCARG(uap, parms), retval);
    839 		break;
    840 	case X86_SET_MTRR:
    841 		error = x86_set_mtrr(l, SCARG(uap, parms), retval);
    842 		break;
    843 
    844 #ifdef VM86
    845 	case X86_VM86:
    846 		error = x86_vm86(l, SCARG(uap, parms), retval);
    847 		break;
    848 	case X86_OLD_VM86:
    849 		error = compat_16_x86_vm86(l, SCARG(uap, parms), retval);
    850 		break;
    851 #endif
    852 
    853 #ifdef PMC
    854 	case X86_PMC_INFO:
    855 		error = sys_pmc_info(l, SCARG(uap, parms), retval);
    856 		break;
    857 
    858 	case X86_PMC_STARTSTOP:
    859 		error = sys_pmc_startstop(l, SCARG(uap, parms), retval);
    860 		break;
    861 
    862 	case X86_PMC_READ:
    863 		error = sys_pmc_read(l, SCARG(uap, parms), retval);
    864 		break;
    865 #endif
    866 
    867 	case X86_SET_FSBASE:
    868 		error = x86_set_sdbase(SCARG(uap, parms), 'f', curlwp, false);
    869 		break;
    870 
    871 	case X86_SET_GSBASE:
    872 		error = x86_set_sdbase(SCARG(uap, parms), 'g', curlwp, false);
    873 		break;
    874 
    875 	case X86_GET_FSBASE:
    876 		error = x86_get_sdbase(SCARG(uap, parms), 'f');
    877 		break;
    878 
    879 	case X86_GET_GSBASE:
    880 		error = x86_get_sdbase(SCARG(uap, parms), 'g');
    881 		break;
    882 
    883 	default:
    884 		error = EINVAL;
    885 		break;
    886 	}
    887 	return (error);
    888 }
    889 
    890 int
    891 cpu_lwp_setprivate(lwp_t *l, void *addr)
    892 {
    893 
    894 #ifdef __x86_64__
    895 	if ((l->l_proc->p_flag & PK_32) == 0) {
    896 		return x86_set_sdbase(addr, 'f', l, true);
    897 	}
    898 #endif
    899 	return x86_set_sdbase(addr, 'g', l, true);
    900 }
    901