Home | History | Annotate | Line # | Download | only in x86
sys_machdep.c revision 1.36
      1  1.36      maxv /*	$NetBSD: sys_machdep.c,v 1.36 2017/07/12 16:59:41 maxv Exp $	*/
      2   1.1        ad 
      3   1.1        ad /*-
      4  1.17        ad  * Copyright (c) 1998, 2007, 2009 The NetBSD Foundation, Inc.
      5   1.1        ad  * All rights reserved.
      6   1.1        ad  *
      7   1.1        ad  * This code is derived from software contributed to The NetBSD Foundation
      8   1.1        ad  * by Charles M. Hannum, and by Andrew Doran.
      9   1.1        ad  *
     10   1.1        ad  * Redistribution and use in source and binary forms, with or without
     11   1.1        ad  * modification, are permitted provided that the following conditions
     12   1.1        ad  * are met:
     13   1.1        ad  * 1. Redistributions of source code must retain the above copyright
     14   1.1        ad  *    notice, this list of conditions and the following disclaimer.
     15   1.1        ad  * 2. Redistributions in binary form must reproduce the above copyright
     16   1.1        ad  *    notice, this list of conditions and the following disclaimer in the
     17   1.1        ad  *    documentation and/or other materials provided with the distribution.
     18   1.1        ad  *
     19   1.1        ad  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20   1.1        ad  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21   1.1        ad  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22   1.1        ad  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23   1.1        ad  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24   1.1        ad  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25   1.1        ad  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26   1.1        ad  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27   1.1        ad  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28   1.1        ad  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29   1.1        ad  * POSSIBILITY OF SUCH DAMAGE.
     30   1.1        ad  */
     31   1.1        ad 
     32   1.1        ad #include <sys/cdefs.h>
     33  1.36      maxv __KERNEL_RCSID(0, "$NetBSD: sys_machdep.c,v 1.36 2017/07/12 16:59:41 maxv Exp $");
     34   1.1        ad 
     35   1.1        ad #include "opt_mtrr.h"
     36  1.36      maxv #include "opt_pmc.h"
     37   1.1        ad #include "opt_user_ldt.h"
     38  1.27  christos #include "opt_compat_netbsd.h"
     39  1.26       dsl #ifdef i386
     40   1.1        ad #include "opt_vm86.h"
     41  1.26       dsl #endif
     42   1.1        ad #include "opt_xen.h"
     43   1.1        ad 
     44   1.1        ad #include <sys/param.h>
     45   1.1        ad #include <sys/systm.h>
     46   1.1        ad #include <sys/ioctl.h>
     47   1.1        ad #include <sys/file.h>
     48   1.1        ad #include <sys/time.h>
     49   1.1        ad #include <sys/proc.h>
     50   1.1        ad #include <sys/uio.h>
     51   1.1        ad #include <sys/kernel.h>
     52   1.1        ad #include <sys/buf.h>
     53   1.1        ad #include <sys/signal.h>
     54   1.1        ad #include <sys/malloc.h>
     55   1.9      yamt #include <sys/kmem.h>
     56   1.1        ad #include <sys/kauth.h>
     57  1.17        ad #include <sys/cpu.h>
     58   1.1        ad #include <sys/mount.h>
     59   1.1        ad #include <sys/syscallargs.h>
     60   1.1        ad 
     61   1.1        ad #include <uvm/uvm_extern.h>
     62   1.1        ad 
     63   1.1        ad #include <machine/cpufunc.h>
     64   1.1        ad #include <machine/gdt.h>
     65   1.1        ad #include <machine/psl.h>
     66   1.1        ad #include <machine/reg.h>
     67   1.1        ad #include <machine/sysarch.h>
     68   1.1        ad #include <machine/mtrr.h>
     69   1.1        ad 
     70   1.1        ad #ifdef __x86_64__
     71  1.32      maxv /*
     72  1.32      maxv  * The code for USER_LDT on amd64 is mostly functional, but it is still not
     73  1.32      maxv  * enabled.
     74  1.32      maxv  *
     75  1.32      maxv  * On amd64 we are allowing only 8-byte-sized entries in the LDT, and we are
     76  1.32      maxv  * not allowing the user to overwrite the existing entries (below LDT_SIZE).
     77  1.32      maxv  * Note that USER_LDT is used only by 32bit applications, under compat_netbsd32.
     78  1.32      maxv  * This is theoretically enough for Wine to work.
     79  1.32      maxv  *
     80  1.32      maxv  * However, letting segment registers have different location breaks amd64's
     81  1.32      maxv  * Thread Local Storage: %fs and %gs must be reloaded when returning to
     82  1.32      maxv  * userland. See the tech-kern@ archive from February 2017. A patch has been
     83  1.32      maxv  * proposed to fix that, but Wine still randomly crashes; it is not clear
     84  1.32      maxv  * whether the issues come from Wine, from netbsd32 or from the patch itself.
     85  1.32      maxv  */
     86  1.32      maxv #undef	USER_LDT
     87   1.1        ad /* Need to be checked. */
     88   1.1        ad #undef	IOPERM
     89   1.1        ad #else
     90   1.9      yamt #if defined(XEN)
     91   1.9      yamt #undef	IOPERM
     92   1.9      yamt #else /* defined(XEN) */
     93   1.1        ad #define	IOPERM
     94   1.9      yamt #endif /* defined(XEN) */
     95   1.1        ad #endif
     96   1.1        ad 
     97  1.35      maxv #ifdef XEN
     98  1.35      maxv #undef	PMC
     99  1.35      maxv #endif
    100  1.35      maxv 
    101   1.1        ad #ifdef VM86
    102   1.1        ad #include <machine/vm86.h>
    103   1.1        ad #endif
    104   1.1        ad 
    105  1.34      maxv #ifdef PMC
    106   1.1        ad #include <machine/pmc.h>
    107   1.1        ad #endif
    108   1.1        ad 
    109   1.1        ad extern struct vm_map *kernel_map;
    110   1.1        ad 
    111   1.1        ad int x86_get_ioperm(struct lwp *, void *, register_t *);
    112   1.1        ad int x86_set_ioperm(struct lwp *, void *, register_t *);
    113   1.1        ad int x86_get_mtrr(struct lwp *, void *, register_t *);
    114   1.1        ad int x86_set_mtrr(struct lwp *, void *, register_t *);
    115  1.24       chs int x86_set_sdbase32(void *, char, lwp_t *, bool);
    116  1.18        ad int x86_set_sdbase(void *, char, lwp_t *, bool);
    117  1.24       chs int x86_get_sdbase32(void *, char);
    118  1.18        ad int x86_get_sdbase(void *, char);
    119   1.1        ad 
    120  1.25  jakllsch #if defined(USER_LDT) && defined(LDT_DEBUG)
    121   1.1        ad static void x86_print_ldt(int, const struct segment_descriptor *);
    122   1.1        ad 
    123   1.1        ad static void
    124   1.1        ad x86_print_ldt(int i, const struct segment_descriptor *d)
    125   1.1        ad {
    126   1.1        ad 	printf("[%d] lolimit=0x%x, lobase=0x%x, type=%u, dpl=%u, p=%u, "
    127   1.1        ad 	    "hilimit=0x%x, xx=%x, def32=%u, gran=%u, hibase=0x%x\n",
    128   1.1        ad 	    i, d->sd_lolimit, d->sd_lobase, d->sd_type, d->sd_dpl, d->sd_p,
    129   1.1        ad 	    d->sd_hilimit, d->sd_xx, d->sd_def32, d->sd_gran, d->sd_hibase);
    130   1.1        ad }
    131   1.1        ad #endif
    132   1.1        ad 
    133   1.1        ad int
    134   1.1        ad x86_get_ldt(struct lwp *l, void *args, register_t *retval)
    135   1.1        ad {
    136   1.2       dsl #ifndef USER_LDT
    137   1.2       dsl 	return EINVAL;
    138   1.2       dsl #else
    139   1.2       dsl 	struct x86_get_ldt_args ua;
    140   1.2       dsl 	union descriptor *cp;
    141   1.2       dsl 	int error;
    142   1.2       dsl 
    143   1.2       dsl 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    144   1.2       dsl 		return error;
    145   1.2       dsl 
    146   1.2       dsl 	if (ua.num < 0 || ua.num > 8192)
    147   1.2       dsl 		return EINVAL;
    148   1.2       dsl 
    149   1.2       dsl 	cp = malloc(ua.num * sizeof(union descriptor), M_TEMP, M_WAITOK);
    150   1.2       dsl 	if (cp == NULL)
    151   1.2       dsl 		return ENOMEM;
    152   1.2       dsl 
    153   1.2       dsl 	error = x86_get_ldt1(l, &ua, cp);
    154   1.2       dsl 	*retval = ua.num;
    155   1.2       dsl 	if (error == 0)
    156   1.2       dsl 		error = copyout(cp, ua.desc, ua.num * sizeof(*cp));
    157   1.2       dsl 
    158   1.2       dsl 	free(cp, M_TEMP);
    159   1.2       dsl 	return error;
    160   1.2       dsl #endif
    161   1.2       dsl }
    162   1.2       dsl 
    163   1.2       dsl int
    164   1.2       dsl x86_get_ldt1(struct lwp *l, struct x86_get_ldt_args *ua, union descriptor *cp)
    165   1.2       dsl {
    166   1.2       dsl #ifndef USER_LDT
    167   1.2       dsl 	return EINVAL;
    168   1.2       dsl #else
    169   1.1        ad 	int error;
    170   1.1        ad 	struct proc *p = l->l_proc;
    171   1.1        ad 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
    172   1.1        ad 	int nldt, num;
    173   1.2       dsl 	union descriptor *lp;
    174   1.1        ad 
    175   1.1        ad 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_LDT_GET,
    176   1.1        ad 	    NULL, NULL, NULL, NULL);
    177   1.1        ad 	if (error)
    178   1.1        ad 		return (error);
    179   1.1        ad 
    180   1.1        ad #ifdef	LDT_DEBUG
    181   1.2       dsl 	printf("x86_get_ldt: start=%d num=%d descs=%p\n", ua->start,
    182   1.2       dsl 	    ua->num, ua->desc);
    183   1.1        ad #endif
    184   1.1        ad 
    185   1.2       dsl 	if (ua->start < 0 || ua->num < 0 || ua->start > 8192 || ua->num > 8192 ||
    186   1.2       dsl 	    ua->start + ua->num > 8192)
    187   1.1        ad 		return (EINVAL);
    188   1.1        ad 
    189  1.32      maxv #ifdef __x86_64__
    190  1.32      maxv 	if (ua->start * sizeof(union descriptor) < LDT_SIZE)
    191  1.32      maxv 		return EINVAL;
    192  1.32      maxv #endif
    193  1.32      maxv 
    194  1.17        ad 	mutex_enter(&cpu_lock);
    195   1.1        ad 
    196  1.17        ad 	if (pmap->pm_ldt != NULL) {
    197  1.17        ad 		nldt = pmap->pm_ldt_len / sizeof(*lp);
    198   1.1        ad 		lp = pmap->pm_ldt;
    199   1.1        ad 	} else {
    200  1.32      maxv #ifdef __x86_64__
    201  1.32      maxv 		nldt = LDT_SIZE / sizeof(*lp);
    202  1.32      maxv #else
    203   1.1        ad 		nldt = NLDT;
    204  1.32      maxv #endif
    205  1.32      maxv 		lp = (union descriptor *)ldtstore;
    206   1.1        ad 	}
    207   1.1        ad 
    208   1.2       dsl 	if (ua->start > nldt) {
    209  1.17        ad 		mutex_exit(&cpu_lock);
    210   1.1        ad 		return (EINVAL);
    211   1.1        ad 	}
    212   1.1        ad 
    213   1.2       dsl 	lp += ua->start;
    214   1.2       dsl 	num = min(ua->num, nldt - ua->start);
    215   1.2       dsl 	ua->num = num;
    216   1.1        ad #ifdef LDT_DEBUG
    217   1.1        ad 	{
    218   1.1        ad 		int i;
    219   1.1        ad 		for (i = 0; i < num; i++)
    220   1.1        ad 			x86_print_ldt(i, &lp[i].sd);
    221   1.1        ad 	}
    222   1.1        ad #endif
    223   1.1        ad 
    224   1.1        ad 	memcpy(cp, lp, num * sizeof(union descriptor));
    225  1.17        ad 	mutex_exit(&cpu_lock);
    226   1.1        ad 
    227   1.2       dsl 	return 0;
    228   1.2       dsl #endif
    229   1.2       dsl }
    230   1.2       dsl 
    231   1.2       dsl int
    232   1.2       dsl x86_set_ldt(struct lwp *l, void *args, register_t *retval)
    233   1.2       dsl {
    234   1.2       dsl #ifndef USER_LDT
    235   1.2       dsl 	return EINVAL;
    236   1.2       dsl #else
    237   1.2       dsl 	struct x86_set_ldt_args ua;
    238   1.2       dsl 	union descriptor *descv;
    239   1.2       dsl 	int error;
    240   1.2       dsl 
    241   1.2       dsl 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    242   1.2       dsl 		return (error);
    243   1.2       dsl 
    244   1.2       dsl 	if (ua.num < 0 || ua.num > 8192)
    245   1.2       dsl 		return EINVAL;
    246   1.2       dsl 
    247   1.2       dsl 	descv = malloc(sizeof (*descv) * ua.num, M_TEMP, M_NOWAIT);
    248   1.2       dsl 	if (descv == NULL)
    249   1.2       dsl 		return ENOMEM;
    250   1.2       dsl 
    251   1.2       dsl 	error = copyin(ua.desc, descv, sizeof (*descv) * ua.num);
    252   1.1        ad 	if (error == 0)
    253   1.2       dsl 		error = x86_set_ldt1(l, &ua, descv);
    254   1.2       dsl 	*retval = ua.start;
    255   1.1        ad 
    256   1.2       dsl 	free(descv, M_TEMP);
    257   1.2       dsl 	return error;
    258   1.1        ad #endif
    259   1.1        ad }
    260   1.1        ad 
    261   1.1        ad int
    262   1.2       dsl x86_set_ldt1(struct lwp *l, struct x86_set_ldt_args *ua,
    263   1.2       dsl     union descriptor *descv)
    264   1.1        ad {
    265   1.2       dsl #ifndef USER_LDT
    266   1.2       dsl 	return EINVAL;
    267   1.2       dsl #else
    268  1.17        ad 	int error, i, n, old_sel, new_sel;
    269   1.1        ad 	struct proc *p = l->l_proc;
    270   1.1        ad 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
    271  1.17        ad 	size_t old_len, new_len;
    272  1.17        ad 	union descriptor *old_ldt, *new_ldt;
    273   1.1        ad 
    274  1.32      maxv #ifdef __x86_64__
    275  1.32      maxv 	const size_t min_ldt_size = LDT_SIZE;
    276  1.32      maxv #else
    277  1.32      maxv 	const size_t min_ldt_size = NLDT * sizeof(union descriptor);
    278  1.32      maxv #endif
    279  1.32      maxv 
    280   1.1        ad 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_LDT_SET,
    281   1.1        ad 	    NULL, NULL, NULL, NULL);
    282   1.1        ad 	if (error)
    283   1.1        ad 		return (error);
    284   1.1        ad 
    285   1.2       dsl 	if (ua->start < 0 || ua->num < 0 || ua->start > 8192 || ua->num > 8192 ||
    286   1.2       dsl 	    ua->start + ua->num > 8192)
    287   1.1        ad 		return (EINVAL);
    288   1.1        ad 
    289  1.32      maxv #ifdef __x86_64__
    290  1.32      maxv 	if (ua->start * sizeof(union descriptor) < LDT_SIZE)
    291  1.32      maxv 		return EINVAL;
    292  1.32      maxv #endif
    293  1.32      maxv 
    294   1.1        ad 	/* Check descriptors for access violations. */
    295   1.2       dsl 	for (i = 0; i < ua->num; i++) {
    296   1.1        ad 		union descriptor *desc = &descv[i];
    297   1.1        ad 
    298   1.1        ad 		switch (desc->sd.sd_type) {
    299   1.1        ad 		case SDT_SYSNULL:
    300   1.1        ad 			desc->sd.sd_p = 0;
    301   1.1        ad 			break;
    302  1.32      maxv #ifdef __x86_64__
    303  1.32      maxv 		case SDT_SYS286CGT:
    304  1.32      maxv 		case SDT_SYS386CGT:
    305  1.32      maxv 			/* We don't allow these on amd64. */
    306  1.32      maxv 			return EACCES;
    307  1.32      maxv #else
    308   1.1        ad 		case SDT_SYS286CGT:
    309   1.1        ad 		case SDT_SYS386CGT:
    310   1.1        ad 			/*
    311   1.1        ad 			 * Only allow call gates targeting a segment
    312   1.1        ad 			 * in the LDT or a user segment in the fixed
    313   1.1        ad 			 * part of the gdt.  Segments in the LDT are
    314   1.1        ad 			 * constrained (below) to be user segments.
    315   1.1        ad 			 */
    316   1.1        ad 			if (desc->gd.gd_p != 0 &&
    317   1.1        ad 			    !ISLDT(desc->gd.gd_selector) &&
    318   1.1        ad 			    ((IDXSEL(desc->gd.gd_selector) >= NGDT) ||
    319  1.31      maxv 			     (gdtstore[IDXSEL(desc->gd.gd_selector)].sd.sd_dpl !=
    320   1.1        ad 				 SEL_UPL))) {
    321   1.2       dsl 				return EACCES;
    322   1.1        ad 			}
    323   1.1        ad 			break;
    324  1.32      maxv #endif
    325   1.1        ad 		case SDT_MEMEC:
    326   1.1        ad 		case SDT_MEMEAC:
    327   1.1        ad 		case SDT_MEMERC:
    328   1.1        ad 		case SDT_MEMERAC:
    329   1.1        ad 			/* Must be "present" if executable and conforming. */
    330   1.2       dsl 			if (desc->sd.sd_p == 0)
    331   1.2       dsl 				return EACCES;
    332   1.1        ad 			break;
    333   1.1        ad 		case SDT_MEMRO:
    334   1.1        ad 		case SDT_MEMROA:
    335   1.1        ad 		case SDT_MEMRW:
    336   1.1        ad 		case SDT_MEMRWA:
    337   1.1        ad 		case SDT_MEMROD:
    338   1.1        ad 		case SDT_MEMRODA:
    339   1.1        ad 		case SDT_MEMRWD:
    340   1.1        ad 		case SDT_MEMRWDA:
    341   1.1        ad 		case SDT_MEME:
    342   1.1        ad 		case SDT_MEMEA:
    343   1.1        ad 		case SDT_MEMER:
    344   1.1        ad 		case SDT_MEMERA:
    345   1.1        ad 			break;
    346   1.1        ad 		default:
    347   1.1        ad 			/*
    348   1.1        ad 			 * Make sure that unknown descriptor types are
    349   1.1        ad 			 * not marked present.
    350   1.1        ad 			 */
    351   1.2       dsl 			if (desc->sd.sd_p != 0)
    352   1.2       dsl 				return EACCES;
    353   1.1        ad 			break;
    354   1.1        ad 		}
    355   1.1        ad 
    356   1.1        ad 		if (desc->sd.sd_p != 0) {
    357   1.1        ad 			/* Only user (ring-3) descriptors may be present. */
    358   1.2       dsl 			if (desc->sd.sd_dpl != SEL_UPL)
    359   1.2       dsl 				return EACCES;
    360   1.1        ad 		}
    361   1.1        ad 	}
    362   1.1        ad 
    363  1.17        ad 	/*
    364  1.17        ad 	 * Install selected changes.  We perform a copy, write, swap dance
    365  1.17        ad 	 * here to ensure that all updates happen atomically.
    366  1.17        ad 	 */
    367  1.17        ad 
    368  1.17        ad 	/* Allocate a new LDT. */
    369  1.17        ad 	for (;;) {
    370  1.17        ad 		new_len = (ua->start + ua->num) * sizeof(union descriptor);
    371  1.17        ad 		new_len = max(new_len, pmap->pm_ldt_len);
    372  1.32      maxv 		new_len = max(new_len, min_ldt_size);
    373  1.17        ad 		new_len = round_page(new_len);
    374   1.1        ad 		new_ldt = (union descriptor *)uvm_km_alloc(kernel_map,
    375  1.30  dholland 		    new_len, 0, UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_WAITVA);
    376  1.17        ad 		mutex_enter(&cpu_lock);
    377  1.17        ad 		if (pmap->pm_ldt_len <= new_len) {
    378  1.17        ad 			break;
    379   1.1        ad 		}
    380  1.17        ad 		mutex_exit(&cpu_lock);
    381  1.17        ad 		uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len,
    382  1.17        ad 		    UVM_KMF_WIRED);
    383  1.17        ad 	}
    384   1.1        ad 
    385  1.17        ad 	/* Copy existing entries, if any. */
    386  1.17        ad 	if (pmap->pm_ldt != NULL) {
    387   1.1        ad 		old_ldt = pmap->pm_ldt;
    388  1.17        ad 		old_len = pmap->pm_ldt_len;
    389  1.17        ad 		old_sel = pmap->pm_ldt_sel;
    390   1.1        ad 		memcpy(new_ldt, old_ldt, old_len);
    391  1.17        ad 	} else {
    392  1.17        ad 		old_ldt = NULL;
    393  1.17        ad 		old_len = 0;
    394  1.17        ad 		old_sel = -1;
    395  1.32      maxv 		memcpy(new_ldt, ldtstore, min_ldt_size);
    396  1.17        ad 	}
    397   1.1        ad 
    398  1.17        ad 	/* Apply requested changes. */
    399  1.17        ad 	for (i = 0, n = ua->start; i < ua->num; i++, n++) {
    400  1.17        ad 		new_ldt[n] = descv[i];
    401  1.17        ad 	}
    402   1.1        ad 
    403  1.17        ad 	/* Allocate LDT selector. */
    404  1.17        ad 	new_sel = ldt_alloc(new_ldt, new_len);
    405  1.17        ad 	if (new_sel == -1) {
    406  1.17        ad 		mutex_exit(&cpu_lock);
    407   1.1        ad 		uvm_km_free(kernel_map, (vaddr_t)new_ldt, new_len,
    408   1.1        ad 		    UVM_KMF_WIRED);
    409  1.17        ad 		return ENOMEM;
    410  1.17        ad 	}
    411  1.17        ad 
    412  1.17        ad 	/* All changes are now globally visible.  Swap in the new LDT. */
    413  1.17        ad 	pmap->pm_ldt_len = new_len;
    414  1.17        ad 	pmap->pm_ldt_sel = new_sel;
    415  1.30  dholland 	/* membar_store_store for pmap_fork() to read these unlocked safely */
    416  1.30  dholland 	membar_producer();
    417  1.30  dholland 	pmap->pm_ldt = new_ldt;
    418  1.17        ad 
    419  1.17        ad 	/* Switch existing users onto new LDT. */
    420  1.17        ad 	pmap_ldt_sync(pmap);
    421  1.17        ad 
    422  1.17        ad 	/* Free existing LDT (if any). */
    423  1.17        ad 	if (old_ldt != NULL) {
    424  1.17        ad 		ldt_free(old_sel);
    425  1.30  dholland 		/* exit the mutex before free */
    426  1.30  dholland 		mutex_exit(&cpu_lock);
    427  1.17        ad 		uvm_km_free(kernel_map, (vaddr_t)old_ldt, old_len,
    428   1.1        ad 		    UVM_KMF_WIRED);
    429  1.30  dholland 	} else {
    430  1.30  dholland 		mutex_exit(&cpu_lock);
    431  1.17        ad 	}
    432   1.2       dsl 
    433  1.17        ad 	return error;
    434   1.1        ad #endif
    435   1.1        ad }
    436   1.1        ad 
    437   1.1        ad int
    438   1.1        ad x86_iopl(struct lwp *l, void *args, register_t *retval)
    439   1.1        ad {
    440   1.1        ad 	int error;
    441   1.1        ad 	struct x86_iopl_args ua;
    442   1.1        ad #ifdef XEN
    443   1.9      yamt 	int iopl;
    444   1.1        ad #else
    445   1.1        ad 	struct trapframe *tf = l->l_md.md_regs;
    446   1.1        ad #endif
    447   1.1        ad 
    448   1.1        ad 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
    449   1.1        ad 	    NULL, NULL, NULL, NULL);
    450   1.1        ad 	if (error)
    451   1.1        ad 		return (error);
    452   1.1        ad 
    453   1.1        ad 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    454   1.1        ad 		return error;
    455   1.1        ad 
    456   1.1        ad #ifdef XEN
    457   1.9      yamt 	if (ua.iopl)
    458   1.9      yamt 		iopl = SEL_UPL;
    459   1.9      yamt 	else
    460   1.9      yamt 		iopl = SEL_KPL;
    461  1.22     rmind 
    462  1.22     rmind     {
    463  1.22     rmind 	struct physdev_op physop;
    464  1.22     rmind 	struct pcb *pcb;
    465  1.22     rmind 
    466  1.22     rmind 	pcb = lwp_getpcb(l);
    467  1.22     rmind 	pcb->pcb_iopl = iopl;
    468  1.22     rmind 
    469   1.1        ad 	/* Force the change at ring 0. */
    470  1.22     rmind 	physop.cmd = PHYSDEVOP_SET_IOPL;
    471  1.22     rmind 	physop.u.set_iopl.iopl = iopl;
    472  1.22     rmind 	HYPERVISOR_physdev_op(&physop);
    473  1.22     rmind     }
    474   1.1        ad #elif defined(__x86_64__)
    475   1.1        ad 	if (ua.iopl)
    476   1.1        ad 		tf->tf_rflags |= PSL_IOPL;
    477   1.1        ad 	else
    478   1.1        ad 		tf->tf_rflags &= ~PSL_IOPL;
    479   1.1        ad #else
    480   1.1        ad 	if (ua.iopl)
    481   1.1        ad 		tf->tf_eflags |= PSL_IOPL;
    482   1.1        ad 	else
    483   1.1        ad 		tf->tf_eflags &= ~PSL_IOPL;
    484   1.1        ad #endif
    485   1.1        ad 
    486   1.1        ad 	return 0;
    487   1.1        ad }
    488   1.1        ad 
    489   1.1        ad int
    490   1.1        ad x86_get_ioperm(struct lwp *l, void *args, register_t *retval)
    491   1.1        ad {
    492   1.1        ad #ifdef IOPERM
    493   1.1        ad 	int error;
    494  1.22     rmind 	struct pcb *pcb = lwp_getpcb(l);
    495   1.1        ad 	struct x86_get_ioperm_args ua;
    496   1.9      yamt 	void *dummymap = NULL;
    497   1.9      yamt 	void *iomap;
    498   1.1        ad 
    499   1.1        ad 	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPERM_GET,
    500   1.1        ad 	    NULL, NULL, NULL, NULL);
    501   1.1        ad 	if (error)
    502   1.1        ad 		return (error);
    503   1.1        ad 
    504   1.1        ad 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    505   1.1        ad 		return (error);
    506   1.1        ad 
    507   1.9      yamt 	iomap = pcb->pcb_iomap;
    508   1.9      yamt 	if (iomap == NULL) {
    509   1.9      yamt 		iomap = dummymap = kmem_alloc(IOMAPSIZE, KM_SLEEP);
    510   1.9      yamt 		memset(dummymap, 0xff, IOMAPSIZE);
    511   1.9      yamt 	}
    512   1.9      yamt 	error = copyout(iomap, ua.iomap, IOMAPSIZE);
    513   1.9      yamt 	if (dummymap != NULL) {
    514   1.9      yamt 		kmem_free(dummymap, IOMAPSIZE);
    515   1.9      yamt 	}
    516   1.9      yamt 	return error;
    517   1.1        ad #else
    518   1.1        ad 	return EINVAL;
    519   1.1        ad #endif
    520   1.1        ad }
    521   1.1        ad 
    522   1.1        ad int
    523   1.1        ad x86_set_ioperm(struct lwp *l, void *args, register_t *retval)
    524   1.1        ad {
    525   1.1        ad #ifdef IOPERM
    526   1.9      yamt 	struct cpu_info *ci;
    527   1.1        ad 	int error;
    528  1.22     rmind 	struct pcb *pcb = lwp_getpcb(l);
    529   1.1        ad 	struct x86_set_ioperm_args ua;
    530   1.9      yamt 	void *new;
    531   1.9      yamt 	void *old;
    532   1.1        ad 
    533   1.1        ad   	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPERM_SET,
    534   1.1        ad 	    NULL, NULL, NULL, NULL);
    535   1.1        ad 	if (error)
    536   1.1        ad 		return (error);
    537   1.1        ad 
    538   1.1        ad 	if ((error = copyin(args, &ua, sizeof(ua))) != 0)
    539   1.1        ad 		return (error);
    540   1.1        ad 
    541   1.9      yamt 	new = kmem_alloc(IOMAPSIZE, KM_SLEEP);
    542   1.9      yamt 	error = copyin(ua.iomap, new, IOMAPSIZE);
    543   1.9      yamt 	if (error) {
    544   1.9      yamt 		kmem_free(new, IOMAPSIZE);
    545   1.9      yamt 		return error;
    546   1.9      yamt 	}
    547   1.9      yamt 	old = pcb->pcb_iomap;
    548   1.9      yamt 	pcb->pcb_iomap = new;
    549   1.9      yamt 	if (old != NULL) {
    550   1.9      yamt 		kmem_free(old, IOMAPSIZE);
    551   1.9      yamt 	}
    552   1.9      yamt 
    553  1.13        ad 	kpreempt_disable();
    554   1.9      yamt 	ci = curcpu();
    555   1.9      yamt 	memcpy(ci->ci_iomap, pcb->pcb_iomap, sizeof(ci->ci_iomap));
    556   1.9      yamt 	ci->ci_tss.tss_iobase =
    557   1.9      yamt 	    ((uintptr_t)ci->ci_iomap - (uintptr_t)&ci->ci_tss) << 16;
    558  1.13        ad 	kpreempt_enable();
    559   1.9      yamt 
    560   1.9      yamt 	return error;
    561   1.1        ad #else
    562   1.1        ad 	return EINVAL;
    563   1.1        ad #endif
    564   1.1        ad }
    565   1.1        ad 
    566   1.1        ad int
    567   1.1        ad x86_get_mtrr(struct lwp *l, void *args, register_t *retval)
    568   1.1        ad {
    569   1.1        ad #ifdef MTRR
    570   1.1        ad 	struct x86_get_mtrr_args ua;
    571   1.1        ad 	int error, n;
    572   1.1        ad 
    573   1.1        ad 	if (mtrr_funcs == NULL)
    574   1.1        ad 		return ENOSYS;
    575   1.1        ad 
    576   1.1        ad  	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_MTRR_GET,
    577   1.1        ad 	    NULL, NULL, NULL, NULL);
    578   1.1        ad 	if (error)
    579   1.1        ad 		return (error);
    580   1.1        ad 
    581   1.1        ad 	error = copyin(args, &ua, sizeof ua);
    582   1.1        ad 	if (error != 0)
    583   1.1        ad 		return error;
    584   1.1        ad 
    585   1.1        ad 	error = copyin(ua.n, &n, sizeof n);
    586   1.1        ad 	if (error != 0)
    587   1.1        ad 		return error;
    588   1.1        ad 
    589  1.12        ad 	KERNEL_LOCK(1, NULL);
    590   1.1        ad 	error = mtrr_get(ua.mtrrp, &n, l->l_proc, MTRR_GETSET_USER);
    591  1.12        ad 	KERNEL_UNLOCK_ONE(NULL);
    592   1.1        ad 
    593   1.1        ad 	copyout(&n, ua.n, sizeof (int));
    594   1.1        ad 
    595   1.1        ad 	return error;
    596   1.1        ad #else
    597   1.1        ad 	return EINVAL;
    598   1.1        ad #endif
    599   1.1        ad }
    600   1.1        ad 
    601   1.1        ad int
    602   1.1        ad x86_set_mtrr(struct lwp *l, void *args, register_t *retval)
    603   1.1        ad {
    604   1.1        ad #ifdef MTRR
    605   1.1        ad 	int error, n;
    606   1.1        ad 	struct x86_set_mtrr_args ua;
    607   1.1        ad 
    608   1.1        ad 	if (mtrr_funcs == NULL)
    609   1.1        ad 		return ENOSYS;
    610   1.1        ad 
    611   1.1        ad  	error = kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_MTRR_SET,
    612   1.1        ad 	    NULL, NULL, NULL, NULL);
    613   1.1        ad 	if (error)
    614   1.1        ad 		return (error);
    615   1.1        ad 
    616   1.1        ad 	error = copyin(args, &ua, sizeof ua);
    617   1.1        ad 	if (error != 0)
    618   1.1        ad 		return error;
    619   1.1        ad 
    620   1.1        ad 	error = copyin(ua.n, &n, sizeof n);
    621   1.1        ad 	if (error != 0)
    622   1.1        ad 		return error;
    623   1.1        ad 
    624  1.12        ad 	KERNEL_LOCK(1, NULL);
    625   1.1        ad 	error = mtrr_set(ua.mtrrp, &n, l->l_proc, MTRR_GETSET_USER);
    626   1.1        ad 	if (n != 0)
    627   1.1        ad 		mtrr_commit();
    628  1.12        ad 	KERNEL_UNLOCK_ONE(NULL);
    629   1.1        ad 
    630   1.1        ad 	copyout(&n, ua.n, sizeof n);
    631   1.1        ad 
    632   1.1        ad 	return error;
    633   1.1        ad #else
    634   1.1        ad 	return EINVAL;
    635   1.1        ad #endif
    636   1.1        ad }
    637   1.1        ad 
    638  1.24       chs #ifdef __x86_64__
    639  1.24       chs #define pcb_fsd pcb_fs
    640  1.24       chs #define pcb_gsd pcb_gs
    641  1.24       chs #define segment_descriptor mem_segment_descriptor
    642  1.24       chs #endif
    643  1.24       chs 
    644   1.1        ad int
    645  1.24       chs x86_set_sdbase32(void *arg, char which, lwp_t *l, bool direct)
    646   1.5        ad {
    647  1.24       chs 	struct trapframe *tf = l->l_md.md_regs;
    648  1.24       chs 	union descriptor usd;
    649  1.18        ad 	struct pcb *pcb;
    650  1.24       chs 	uint32_t base;
    651   1.6        ad 	int error;
    652   1.5        ad 
    653  1.18        ad 	if (direct) {
    654  1.18        ad 		base = (vaddr_t)arg;
    655  1.18        ad 	} else {
    656  1.18        ad 		error = copyin(arg, &base, sizeof(base));
    657  1.18        ad 		if (error != 0)
    658  1.18        ad 			return error;
    659  1.18        ad 	}
    660   1.5        ad 
    661  1.24       chs 	memset(&usd, 0, sizeof(usd));
    662  1.19    bouyer 	usd.sd.sd_lobase = base & 0xffffff;
    663  1.19    bouyer 	usd.sd.sd_hibase = (base >> 24) & 0xff;
    664  1.19    bouyer 	usd.sd.sd_lolimit = 0xffff;
    665  1.19    bouyer 	usd.sd.sd_hilimit = 0xf;
    666  1.19    bouyer 	usd.sd.sd_type = SDT_MEMRWA;
    667  1.19    bouyer 	usd.sd.sd_dpl = SEL_UPL;
    668  1.19    bouyer 	usd.sd.sd_p = 1;
    669  1.19    bouyer 	usd.sd.sd_def32 = 1;
    670  1.19    bouyer 	usd.sd.sd_gran = 1;
    671   1.6        ad 
    672  1.24       chs 	pcb = lwp_getpcb(l);
    673  1.13        ad 	kpreempt_disable();
    674   1.6        ad 	if (which == 'f') {
    675  1.19    bouyer 		memcpy(&pcb->pcb_fsd, &usd.sd,
    676  1.19    bouyer 		    sizeof(struct segment_descriptor));
    677  1.18        ad 		if (l == curlwp) {
    678  1.19    bouyer 			update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &usd);
    679  1.24       chs #ifdef __x86_64__
    680  1.24       chs 			setfs(GSEL(GUFS_SEL, SEL_UPL));
    681  1.24       chs #endif
    682  1.18        ad 		}
    683  1.24       chs 		tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
    684   1.6        ad 	} else /* which == 'g' */ {
    685  1.19    bouyer 		memcpy(&pcb->pcb_gsd, &usd.sd,
    686  1.19    bouyer 		    sizeof(struct segment_descriptor));
    687  1.18        ad 		if (l == curlwp) {
    688  1.19    bouyer 			update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &usd);
    689  1.24       chs #ifdef __x86_64__
    690  1.24       chs #ifndef XEN
    691  1.24       chs 			setusergs(GSEL(GUGS_SEL, SEL_UPL));
    692  1.24       chs #else
    693  1.24       chs 			HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL,
    694  1.24       chs 						    GSEL(GUGS_SEL, SEL_UPL));
    695  1.24       chs #endif
    696  1.24       chs #endif
    697  1.18        ad 		}
    698  1.24       chs 		tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    699   1.6        ad 	}
    700  1.13        ad 	kpreempt_enable();
    701  1.24       chs 	return 0;
    702  1.24       chs }
    703   1.5        ad 
    704  1.24       chs int
    705  1.24       chs x86_set_sdbase(void *arg, char which, lwp_t *l, bool direct)
    706  1.24       chs {
    707  1.24       chs #ifdef i386
    708  1.24       chs 	return x86_set_sdbase32(arg, which, l, direct);
    709   1.5        ad #else
    710  1.24       chs 	struct pcb *pcb;
    711  1.24       chs 	vaddr_t base;
    712  1.24       chs 
    713  1.24       chs 	if (l->l_proc->p_flag & PK_32) {
    714  1.24       chs 		return x86_set_sdbase32(arg, which, l, direct);
    715  1.24       chs 	}
    716  1.24       chs 
    717  1.24       chs 	if (direct) {
    718  1.24       chs 		base = (vaddr_t)arg;
    719  1.24       chs 	} else {
    720  1.29  christos 		int error = copyin(arg, &base, sizeof(base));
    721  1.24       chs 		if (error != 0)
    722  1.24       chs 			return error;
    723  1.24       chs 	}
    724  1.24       chs 
    725  1.24       chs 	if (base >= VM_MAXUSER_ADDRESS)
    726  1.24       chs 		return EINVAL;
    727  1.24       chs 
    728  1.24       chs 	pcb = lwp_getpcb(l);
    729  1.24       chs 
    730  1.24       chs 	kpreempt_disable();
    731  1.24       chs 	switch(which) {
    732  1.24       chs 	case 'f':
    733  1.24       chs 		pcb->pcb_fs = base;
    734  1.24       chs 		if (l == curlwp)
    735  1.24       chs 			wrmsr(MSR_FSBASE, pcb->pcb_fs);
    736  1.24       chs 		break;
    737  1.24       chs 	case 'g':
    738  1.24       chs 		pcb->pcb_gs = base;
    739  1.24       chs 		if (l == curlwp)
    740  1.24       chs 			wrmsr(MSR_KERNELGSBASE, pcb->pcb_gs);
    741  1.24       chs 		break;
    742  1.24       chs 	default:
    743  1.28  dholland 		panic("x86_set_sdbase");
    744  1.24       chs 	}
    745  1.24       chs 	kpreempt_enable();
    746  1.24       chs 
    747  1.29  christos 	return 0;
    748   1.5        ad #endif
    749   1.5        ad }
    750   1.5        ad 
    751   1.5        ad int
    752  1.24       chs x86_get_sdbase32(void *arg, char which)
    753   1.5        ad {
    754   1.5        ad 	struct segment_descriptor *sd;
    755  1.24       chs 	uint32_t base;
    756   1.5        ad 
    757   1.5        ad 	switch (which) {
    758   1.5        ad 	case 'f':
    759  1.24       chs 		sd = (void *)&curpcb->pcb_fsd;
    760   1.5        ad 		break;
    761   1.5        ad 	case 'g':
    762  1.24       chs 		sd = (void *)&curpcb->pcb_gsd;
    763   1.5        ad 		break;
    764   1.5        ad 	default:
    765  1.28  dholland 		panic("x86_get_sdbase32");
    766   1.5        ad 	}
    767   1.5        ad 
    768   1.5        ad 	base = sd->sd_hibase << 24 | sd->sd_lobase;
    769  1.21      yamt 	return copyout(&base, arg, sizeof(base));
    770  1.24       chs }
    771  1.24       chs 
    772  1.24       chs int
    773  1.24       chs x86_get_sdbase(void *arg, char which)
    774  1.24       chs {
    775  1.24       chs #ifdef i386
    776  1.24       chs 	return x86_get_sdbase32(arg, which);
    777   1.5        ad #else
    778  1.24       chs 	vaddr_t base;
    779  1.24       chs 	struct pcb *pcb;
    780  1.24       chs 
    781  1.24       chs 	if (curproc->p_flag & PK_32) {
    782  1.24       chs 		return x86_get_sdbase32(arg, which);
    783  1.24       chs 	}
    784  1.24       chs 
    785  1.24       chs 	pcb = lwp_getpcb(curlwp);
    786  1.24       chs 
    787  1.24       chs 	switch(which) {
    788  1.24       chs 	case 'f':
    789  1.24       chs 		base = pcb->pcb_fs;
    790  1.24       chs 		break;
    791  1.24       chs 	case 'g':
    792  1.24       chs 		base = pcb->pcb_gs;
    793  1.24       chs 		break;
    794  1.24       chs 	default:
    795  1.24       chs 		panic("x86_get_sdbase");
    796  1.24       chs 	}
    797  1.24       chs 
    798  1.24       chs 	return copyout(&base, arg, sizeof(base));
    799   1.5        ad #endif
    800   1.5        ad }
    801   1.5        ad 
    802   1.5        ad int
    803   1.8       dsl sys_sysarch(struct lwp *l, const struct sys_sysarch_args *uap, register_t *retval)
    804   1.1        ad {
    805   1.8       dsl 	/* {
    806   1.1        ad 		syscallarg(int) op;
    807   1.1        ad 		syscallarg(void *) parms;
    808   1.8       dsl 	} */
    809   1.1        ad 	int error = 0;
    810   1.1        ad 
    811   1.1        ad 	switch(SCARG(uap, op)) {
    812   1.1        ad 	case X86_IOPL:
    813   1.1        ad 		error = x86_iopl(l, SCARG(uap, parms), retval);
    814   1.1        ad 		break;
    815   1.1        ad 
    816  1.32      maxv #ifdef i386
    817  1.32      maxv 	/*
    818  1.32      maxv 	 * On amd64, this is done via netbsd32_sysarch.
    819  1.32      maxv 	 */
    820   1.1        ad 	case X86_GET_LDT:
    821   1.1        ad 		error = x86_get_ldt(l, SCARG(uap, parms), retval);
    822   1.1        ad 		break;
    823   1.1        ad 
    824   1.1        ad 	case X86_SET_LDT:
    825   1.1        ad 		error = x86_set_ldt(l, SCARG(uap, parms), retval);
    826   1.1        ad 		break;
    827  1.32      maxv #endif
    828   1.1        ad 
    829   1.1        ad 	case X86_GET_IOPERM:
    830   1.1        ad 		error = x86_get_ioperm(l, SCARG(uap, parms), retval);
    831   1.1        ad 		break;
    832   1.1        ad 
    833   1.1        ad 	case X86_SET_IOPERM:
    834   1.1        ad 		error = x86_set_ioperm(l, SCARG(uap, parms), retval);
    835   1.1        ad 		break;
    836   1.1        ad 
    837   1.1        ad 	case X86_GET_MTRR:
    838   1.1        ad 		error = x86_get_mtrr(l, SCARG(uap, parms), retval);
    839   1.1        ad 		break;
    840   1.1        ad 	case X86_SET_MTRR:
    841   1.1        ad 		error = x86_set_mtrr(l, SCARG(uap, parms), retval);
    842   1.1        ad 		break;
    843   1.1        ad 
    844   1.1        ad #ifdef VM86
    845   1.1        ad 	case X86_VM86:
    846   1.1        ad 		error = x86_vm86(l, SCARG(uap, parms), retval);
    847   1.1        ad 		break;
    848   1.1        ad 	case X86_OLD_VM86:
    849   1.1        ad 		error = compat_16_x86_vm86(l, SCARG(uap, parms), retval);
    850   1.1        ad 		break;
    851   1.1        ad #endif
    852   1.1        ad 
    853  1.34      maxv #ifdef PMC
    854   1.1        ad 	case X86_PMC_INFO:
    855  1.33      maxv 		error = sys_pmc_info(l, SCARG(uap, parms), retval);
    856   1.1        ad 		break;
    857   1.1        ad 
    858   1.1        ad 	case X86_PMC_STARTSTOP:
    859  1.33      maxv 		error = sys_pmc_startstop(l, SCARG(uap, parms), retval);
    860   1.1        ad 		break;
    861   1.1        ad 
    862   1.1        ad 	case X86_PMC_READ:
    863  1.33      maxv 		error = sys_pmc_read(l, SCARG(uap, parms), retval);
    864   1.1        ad 		break;
    865   1.1        ad #endif
    866   1.1        ad 
    867   1.5        ad 	case X86_SET_FSBASE:
    868  1.18        ad 		error = x86_set_sdbase(SCARG(uap, parms), 'f', curlwp, false);
    869   1.5        ad 		break;
    870   1.5        ad 
    871   1.5        ad 	case X86_SET_GSBASE:
    872  1.18        ad 		error = x86_set_sdbase(SCARG(uap, parms), 'g', curlwp, false);
    873   1.5        ad 		break;
    874   1.5        ad 
    875   1.5        ad 	case X86_GET_FSBASE:
    876   1.5        ad 		error = x86_get_sdbase(SCARG(uap, parms), 'f');
    877   1.5        ad 		break;
    878   1.5        ad 
    879   1.5        ad 	case X86_GET_GSBASE:
    880   1.5        ad 		error = x86_get_sdbase(SCARG(uap, parms), 'g');
    881   1.5        ad 		break;
    882   1.5        ad 
    883   1.1        ad 	default:
    884   1.1        ad 		error = EINVAL;
    885   1.1        ad 		break;
    886   1.1        ad 	}
    887   1.1        ad 	return (error);
    888   1.1        ad }
    889  1.18        ad 
    890  1.18        ad int
    891  1.18        ad cpu_lwp_setprivate(lwp_t *l, void *addr)
    892  1.18        ad {
    893  1.18        ad 
    894  1.24       chs #ifdef __x86_64__
    895  1.24       chs 	if ((l->l_proc->p_flag & PK_32) == 0) {
    896  1.24       chs 		return x86_set_sdbase(addr, 'f', l, true);
    897  1.24       chs 	}
    898  1.24       chs #endif
    899  1.18        ad 	return x86_set_sdbase(addr, 'g', l, true);
    900  1.18        ad }
    901