Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.156
      1 /*	$NetBSD: linux_machdep.c,v 1.156 2014/01/26 19:16:17 dsl Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.156 2014/01/26 19:16:17 dsl Exp $");
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_vm86.h"
     37 #include "opt_user_ldt.h"
     38 #endif
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/signalvar.h>
     43 #include <sys/kernel.h>
     44 #include <sys/proc.h>
     45 #include <sys/buf.h>
     46 #include <sys/reboot.h>
     47 #include <sys/conf.h>
     48 #include <sys/exec.h>
     49 #include <sys/file.h>
     50 #include <sys/callout.h>
     51 #include <sys/malloc.h>
     52 #include <sys/mbuf.h>
     53 #include <sys/msgbuf.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/device.h>
     57 #include <sys/syscallargs.h>
     58 #include <sys/filedesc.h>
     59 #include <sys/exec_elf.h>
     60 #include <sys/disklabel.h>
     61 #include <sys/ioctl.h>
     62 #include <sys/wait.h>
     63 #include <sys/kauth.h>
     64 #include <sys/kmem.h>
     65 
     66 #include <miscfs/specfs/specdev.h>
     67 
     68 #include <compat/linux/common/linux_types.h>
     69 #include <compat/linux/common/linux_signal.h>
     70 #include <compat/linux/common/linux_util.h>
     71 #include <compat/linux/common/linux_ioctl.h>
     72 #include <compat/linux/common/linux_hdio.h>
     73 #include <compat/linux/common/linux_exec.h>
     74 #include <compat/linux/common/linux_machdep.h>
     75 #include <compat/linux/common/linux_errno.h>
     76 
     77 #include <compat/linux/linux_syscallargs.h>
     78 
     79 #include <sys/cpu.h>
     80 #include <machine/cpufunc.h>
     81 #include <machine/psl.h>
     82 #include <machine/reg.h>
     83 #include <machine/segments.h>
     84 #include <machine/specialreg.h>
     85 #include <machine/sysarch.h>
     86 #include <machine/vm86.h>
     87 #include <machine/vmparam.h>
     88 
     89 /*
     90  * To see whether wscons is configured (for virtual console ioctl calls).
     91  */
     92 #if defined(_KERNEL_OPT)
     93 #include "wsdisplay.h"
     94 #endif
     95 #if (NWSDISPLAY > 0)
     96 #include <dev/wscons/wsconsio.h>
     97 #include <dev/wscons/wsdisplay_usl_io.h>
     98 #if defined(_KERNEL_OPT)
     99 #include "opt_xserver.h"
    100 #endif
    101 #endif
    102 
    103 #ifdef DEBUG_LINUX
    104 #define DPRINTF(a) uprintf a
    105 #else
    106 #define DPRINTF(a)
    107 #endif
    108 
    109 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
    110 extern struct disklist *x86_alldisks;
    111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
    112     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
    113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
    114     const sigset_t *, struct linux_sigcontext *);
    115 static int linux_restore_sigcontext(struct lwp *,
    116     struct linux_sigcontext *, register_t *);
    117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
    118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
    119 
    120 extern char linux_sigcode[], linux_rt_sigcode[];
    121 
    122 /*
    123  * Deal with some i386-specific things in the Linux emulation code.
    124  */
    125 
    126 void
    127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
    128 {
    129 	struct pcb *pcb = lwp_getpcb(l);
    130 	struct trapframe *tf;
    131 
    132 	/* If we were using the FPU, forget about it. */
    133 	if (pcb->pcb_fpcpu != NULL)
    134 		fpusave_lwp(l, false);
    135 
    136 
    137 #ifdef USER_LDT
    138 	pmap_ldt_cleanup(l);
    139 #endif
    140 
    141 	l->l_md.md_flags &= ~MDL_USEDFPU;
    142 
    143 	if (i386_use_fxsave) {
    144 		pcb->pcb_savefpu.sv_xmm.fx_cw = __Linux_NPXCW__;
    145 		pcb->pcb_savefpu.sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
    146 	} else
    147 		pcb->pcb_savefpu.sv_87.s87_cw = __Linux_NPXCW__;
    148 
    149 	tf = l->l_md.md_regs;
    150 	tf->tf_gs = 0;
    151 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    152 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    153 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    154 	tf->tf_edi = 0;
    155 	tf->tf_esi = 0;
    156 	tf->tf_ebp = 0;
    157 	tf->tf_ebx = l->l_proc->p_psstrp;
    158 	tf->tf_edx = 0;
    159 	tf->tf_ecx = 0;
    160 	tf->tf_eax = 0;
    161 	tf->tf_eip = epp->ep_entry;
    162 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    163 	tf->tf_eflags = PSL_USERSET;
    164 	tf->tf_esp = stack;
    165 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    166 }
    167 
    168 /*
    169  * Send an interrupt to process.
    170  *
    171  * Stack is set up to allow sigcode stored
    172  * in u. to call routine, followed by kcall
    173  * to sigreturn routine below.  After sigreturn
    174  * resets the signal mask, the stack, and the
    175  * frame pointer, it returns to the user
    176  * specified pc, psl.
    177  */
    178 
    179 void
    180 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    181 {
    182 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    183 		linux_rt_sendsig(ksi, mask);
    184 	else
    185 		linux_old_sendsig(ksi, mask);
    186 }
    187 
    188 
    189 static void
    190 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
    191 {
    192 	uc->uc_flags = 0;
    193 	uc->uc_link = NULL;
    194 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    195 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    196 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    197 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    198 }
    199 
    200 static void
    201 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
    202     const sigset_t *mask, struct linux_sigcontext *sc)
    203 {
    204 	struct pcb *pcb = lwp_getpcb(l);
    205 
    206 	/* Save register context. */
    207 #ifdef VM86
    208 	if (tf->tf_eflags & PSL_VM) {
    209 		sc->sc_gs = tf->tf_vm86_gs;
    210 		sc->sc_fs = tf->tf_vm86_fs;
    211 		sc->sc_es = tf->tf_vm86_es;
    212 		sc->sc_ds = tf->tf_vm86_ds;
    213 		sc->sc_eflags = get_vflags(l);
    214 	} else
    215 #endif
    216 	{
    217 		sc->sc_gs = tf->tf_gs;
    218 		sc->sc_fs = tf->tf_fs;
    219 		sc->sc_es = tf->tf_es;
    220 		sc->sc_ds = tf->tf_ds;
    221 		sc->sc_eflags = tf->tf_eflags;
    222 	}
    223 	sc->sc_edi = tf->tf_edi;
    224 	sc->sc_esi = tf->tf_esi;
    225 	sc->sc_esp = tf->tf_esp;
    226 	sc->sc_ebp = tf->tf_ebp;
    227 	sc->sc_ebx = tf->tf_ebx;
    228 	sc->sc_edx = tf->tf_edx;
    229 	sc->sc_ecx = tf->tf_ecx;
    230 	sc->sc_eax = tf->tf_eax;
    231 	sc->sc_eip = tf->tf_eip;
    232 	sc->sc_cs = tf->tf_cs;
    233 	sc->sc_esp_at_signal = tf->tf_esp;
    234 	sc->sc_ss = tf->tf_ss;
    235 	sc->sc_err = tf->tf_err;
    236 	sc->sc_trapno = tf->tf_trapno;
    237 	sc->sc_cr2 = pcb->pcb_cr2;
    238 	sc->sc_387 = NULL;
    239 
    240 	/* Save signal stack. */
    241 	/* Linux doesn't save the onstack flag in sigframe */
    242 
    243 	/* Save signal mask. */
    244 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    245 }
    246 
    247 static void
    248 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    249 {
    250 	struct lwp *l = curlwp;
    251 	struct proc *p = l->l_proc;
    252 	struct trapframe *tf;
    253 	struct linux_rt_sigframe *fp, frame;
    254 	int onstack, error;
    255 	int sig = ksi->ksi_signo;
    256 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    257 	struct sigaltstack *sas = &l->l_sigstk;
    258 
    259 	tf = l->l_md.md_regs;
    260 	/* Do we need to jump onto the signal stack? */
    261 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    262 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    263 
    264 
    265 	/* Allocate space for the signal handler context. */
    266 	if (onstack)
    267 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
    268 		    sas->ss_size);
    269 	else
    270 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    271 	fp--;
    272 
    273 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    274 	    onstack, fp, sig, tf->tf_eip,
    275 	    ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
    276 
    277 	/* Build stack frame for signal trampoline. */
    278 	frame.sf_handler = catcher;
    279 	frame.sf_sig = native_to_linux_signo[sig];
    280 	frame.sf_sip = &fp->sf_si;
    281 	frame.sf_ucp = &fp->sf_uc;
    282 
    283 	/*
    284 	 * XXX: the following code assumes that the constants for
    285 	 * siginfo are the same between linux and NetBSD.
    286 	 */
    287 	native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
    288 
    289 	/* Save register context. */
    290 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    291 	sendsig_reset(l, sig);
    292 
    293 	mutex_exit(p->p_lock);
    294 	error = copyout(&frame, fp, sizeof(frame));
    295 	mutex_enter(p->p_lock);
    296 
    297 	if (error != 0) {
    298 		/*
    299 		 * Process has trashed its stack; give it an illegal
    300 		 * instruction to halt it in its tracks.
    301 		 */
    302 		sigexit(l, SIGILL);
    303 		/* NOTREACHED */
    304 	}
    305 
    306 	/*
    307 	 * Build context to run handler in.
    308 	 */
    309 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    310 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    311 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    312 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    313 	    (linux_rt_sigcode - linux_sigcode);
    314 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    315 	tf->tf_eflags &= ~PSL_CLEARSIG;
    316 	tf->tf_esp = (int)fp;
    317 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    318 
    319 	/* Remember that we're now on the signal stack. */
    320 	if (onstack)
    321 		sas->ss_flags |= SS_ONSTACK;
    322 }
    323 
    324 static void
    325 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    326 {
    327 	struct lwp *l = curlwp;
    328 	struct proc *p = l->l_proc;
    329 	struct trapframe *tf;
    330 	struct linux_sigframe *fp, frame;
    331 	int onstack, error;
    332 	int sig = ksi->ksi_signo;
    333 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    334 	struct sigaltstack *sas = &l->l_sigstk;
    335 
    336 	tf = l->l_md.md_regs;
    337 
    338 	/* Do we need to jump onto the signal stack? */
    339 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    340 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    341 
    342 	/* Allocate space for the signal handler context. */
    343 	if (onstack)
    344 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
    345 		    sas->ss_size);
    346 	else
    347 		fp = (struct linux_sigframe *)tf->tf_esp;
    348 	fp--;
    349 
    350 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    351 	    onstack, fp, sig, tf->tf_eip,
    352 	    ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
    353 
    354 	/* Build stack frame for signal trampoline. */
    355 	frame.sf_handler = catcher;
    356 	frame.sf_sig = native_to_linux_signo[sig];
    357 
    358 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    359 	sendsig_reset(l, sig);
    360 
    361 	mutex_exit(p->p_lock);
    362 	error = copyout(&frame, fp, sizeof(frame));
    363 	mutex_enter(p->p_lock);
    364 
    365 	if (error != 0) {
    366 		/*
    367 		 * Process has trashed its stack; give it an illegal
    368 		 * instruction to halt it in its tracks.
    369 		 */
    370 		sigexit(l, SIGILL);
    371 		/* NOTREACHED */
    372 	}
    373 
    374 	/*
    375 	 * Build context to run handler in.
    376 	 */
    377 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    378 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    379 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    380 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    381 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    382 	tf->tf_eflags &= ~PSL_CLEARSIG;
    383 	tf->tf_esp = (int)fp;
    384 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    385 
    386 	/* Remember that we're now on the signal stack. */
    387 	if (onstack)
    388 		sas->ss_flags |= SS_ONSTACK;
    389 }
    390 
    391 /*
    392  * System call to cleanup state after a signal
    393  * has been taken.  Reset signal mask and
    394  * stack state from context left by sendsig (above).
    395  * Return to previous pc and psl as specified by
    396  * context left by sendsig. Check carefully to
    397  * make sure that the user has not modified the
    398  * psl to gain improper privileges or to cause
    399  * a machine fault.
    400  */
    401 int
    402 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
    403 {
    404 	/* {
    405 		syscallarg(struct linux_ucontext *) ucp;
    406 	} */
    407 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    408 	int error;
    409 
    410 	/*
    411 	 * The trampoline code hands us the context.
    412 	 * It is unsafe to keep track of it ourselves, in the event that a
    413 	 * program jumps out of a signal handler.
    414 	 */
    415 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    416 		return error;
    417 
    418 	/* XXX XAX we can do better here by using more of the ucontext */
    419 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    420 }
    421 
    422 int
    423 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
    424 {
    425 	/* {
    426 		syscallarg(struct linux_sigcontext *) scp;
    427 	} */
    428 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    429 	int error;
    430 
    431 	/*
    432 	 * The trampoline code hands us the context.
    433 	 * It is unsafe to keep track of it ourselves, in the event that a
    434 	 * program jumps out of a signal handler.
    435 	 */
    436 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
    437 		return error;
    438 	return linux_restore_sigcontext(l, &context, retval);
    439 }
    440 
    441 static int
    442 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    443     register_t *retval)
    444 {
    445 	struct proc *p = l->l_proc;
    446 	struct sigaltstack *sas = &l->l_sigstk;
    447 	struct trapframe *tf;
    448 	sigset_t mask;
    449 	ssize_t ss_gap;
    450 
    451 	/* Restore register context. */
    452 	tf = l->l_md.md_regs;
    453 	DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
    454 
    455 #ifdef VM86
    456 	if (scp->sc_eflags & PSL_VM) {
    457 		void syscall_vm86(struct trapframe *);
    458 
    459 		tf->tf_vm86_gs = scp->sc_gs;
    460 		tf->tf_vm86_fs = scp->sc_fs;
    461 		tf->tf_vm86_es = scp->sc_es;
    462 		tf->tf_vm86_ds = scp->sc_ds;
    463 		set_vflags(l, scp->sc_eflags);
    464 		p->p_md.md_syscall = syscall_vm86;
    465 	} else
    466 #endif
    467 	{
    468 		/*
    469 		 * Check for security violations.  If we're returning to
    470 		 * protected mode, the CPU will validate the segment registers
    471 		 * automatically and generate a trap on violations.  We handle
    472 		 * the trap, rather than doing all of the checking here.
    473 		 */
    474 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    475 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    476 			return EINVAL;
    477 
    478 		tf->tf_gs = scp->sc_gs;
    479 		tf->tf_fs = scp->sc_fs;
    480 		tf->tf_es = scp->sc_es;
    481 		tf->tf_ds = scp->sc_ds;
    482 #ifdef VM86
    483 		if (tf->tf_eflags & PSL_VM)
    484 			(*p->p_emul->e_syscall_intern)(p);
    485 #endif
    486 		tf->tf_eflags = scp->sc_eflags;
    487 	}
    488 	tf->tf_edi = scp->sc_edi;
    489 	tf->tf_esi = scp->sc_esi;
    490 	tf->tf_ebp = scp->sc_ebp;
    491 	tf->tf_ebx = scp->sc_ebx;
    492 	tf->tf_edx = scp->sc_edx;
    493 	tf->tf_ecx = scp->sc_ecx;
    494 	tf->tf_eax = scp->sc_eax;
    495 	tf->tf_eip = scp->sc_eip;
    496 	tf->tf_cs = scp->sc_cs;
    497 	tf->tf_esp = scp->sc_esp_at_signal;
    498 	tf->tf_ss = scp->sc_ss;
    499 
    500 	/* Restore signal stack. */
    501 	/*
    502 	 * Linux really does it this way; it doesn't have space in sigframe
    503 	 * to save the onstack flag.
    504 	 */
    505 	mutex_enter(p->p_lock);
    506 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
    507 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    508 		sas->ss_flags |= SS_ONSTACK;
    509 	else
    510 		sas->ss_flags &= ~SS_ONSTACK;
    511 
    512 	/* Restore signal mask. */
    513 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    514 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    515 	mutex_exit(p->p_lock);
    516 
    517 	DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
    518 	return EJUSTRETURN;
    519 }
    520 
    521 #ifdef USER_LDT
    522 
    523 static int
    524 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    525     register_t *retval)
    526 {
    527 	struct x86_get_ldt_args gl;
    528 	int error;
    529 	union descriptor *ldt_buf;
    530 	size_t sz;
    531 
    532 	/*
    533 	 * I've checked the linux code - this function is asymetric with
    534 	 * linux_write_ldt, and returns raw ldt entries.
    535 	 * NB, the code I saw zerod the spare parts of the user buffer.
    536 	 */
    537 
    538 	DPRINTF(("linux_read_ldt!"));
    539 
    540 	sz = 8192 * sizeof(*ldt_buf);
    541 	ldt_buf = kmem_zalloc(sz, KM_SLEEP);
    542 	gl.start = 0;
    543 	gl.desc = NULL;
    544 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    545 	error = x86_get_ldt1(l, &gl, ldt_buf);
    546 	/* NB gl.num might have changed */
    547 	if (error == 0) {
    548 		*retval = gl.num * sizeof *ldt;
    549 		error = copyout(ldt_buf, SCARG(uap, ptr),
    550 		    gl.num * sizeof *ldt_buf);
    551 	}
    552 	kmem_free(ldt_buf, sz);
    553 
    554 	return error;
    555 }
    556 
    557 struct linux_ldt_info {
    558 	u_int entry_number;
    559 	u_long base_addr;
    560 	u_int limit;
    561 	u_int seg_32bit:1;
    562 	u_int contents:2;
    563 	u_int read_exec_only:1;
    564 	u_int limit_in_pages:1;
    565 	u_int seg_not_present:1;
    566 	u_int useable:1;
    567 };
    568 
    569 static int
    570 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    571     int oldmode)
    572 {
    573 	struct linux_ldt_info ldt_info;
    574 	union descriptor d;
    575 	struct x86_set_ldt_args sl;
    576 	int error;
    577 
    578 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    579 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    580 		return (EINVAL);
    581 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    582 		return error;
    583 	if (ldt_info.entry_number >= 8192)
    584 		return (EINVAL);
    585 	if (ldt_info.contents == 3) {
    586 		if (oldmode)
    587 			return (EINVAL);
    588 		if (ldt_info.seg_not_present)
    589 			return (EINVAL);
    590 	}
    591 
    592 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    593 	    (oldmode || (ldt_info.contents == 0 &&
    594 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    595 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    596 	    ldt_info.useable == 0))) {
    597 		/* this means you should zero the ldt */
    598 		(void)memset(&d, 0, sizeof(d));
    599 	} else {
    600 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    601 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    602 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
    603 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    604 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
    605 		    (!ldt_info.read_exec_only << 1);
    606 		d.sd.sd_dpl = SEL_UPL;
    607 		d.sd.sd_p = !ldt_info.seg_not_present;
    608 		d.sd.sd_def32 = ldt_info.seg_32bit;
    609 		d.sd.sd_gran = ldt_info.limit_in_pages;
    610 		if (!oldmode)
    611 			d.sd.sd_xx = ldt_info.useable;
    612 		else
    613 			d.sd.sd_xx = 0;
    614 	}
    615 	sl.start = ldt_info.entry_number;
    616 	sl.desc = NULL;
    617 	sl.num = 1;
    618 
    619 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    620 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    621 
    622 	return x86_set_ldt1(l, &sl, &d);
    623 }
    624 
    625 #endif /* USER_LDT */
    626 
    627 int
    628 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
    629 {
    630 	/* {
    631 		syscallarg(int) func;
    632 		syscallarg(void *) ptr;
    633 		syscallarg(size_t) bytecount;
    634 	} */
    635 
    636 	switch (SCARG(uap, func)) {
    637 #ifdef USER_LDT
    638 	case 0:
    639 		return linux_read_ldt(l, (const void *)uap, retval);
    640 	case 1:
    641 		return linux_write_ldt(l, (const void *)uap, 1);
    642 	case 2:
    643 #ifdef notyet
    644 		return linux_read_default_ldt(l, (const void *)uap, retval);
    645 #else
    646 		return (ENOSYS);
    647 #endif
    648 	case 0x11:
    649 		return linux_write_ldt(l, (const void *)uap, 0);
    650 #endif /* USER_LDT */
    651 
    652 	default:
    653 		return (ENOSYS);
    654 	}
    655 }
    656 
    657 /*
    658  * XXX Pathetic hack to make svgalib work. This will fake the major
    659  * device number of an opened VT so that svgalib likes it. grmbl.
    660  * Should probably do it 'wrong the right way' and use a mapping
    661  * array for all major device numbers, and map linux_mknod too.
    662  */
    663 dev_t
    664 linux_fakedev(dev_t dev, int raw)
    665 {
    666 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    667 	const struct cdevsw *cd = cdevsw_lookup(dev);
    668 
    669 	if (raw) {
    670 #if (NWSDISPLAY > 0)
    671 		extern const struct cdevsw wsdisplay_cdevsw;
    672 		if (cd == &wsdisplay_cdevsw)
    673 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    674 #endif
    675 	}
    676 
    677 	if (cd == &ptc_cdevsw)
    678 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    679 	if (cd == &pts_cdevsw)
    680 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    681 
    682 	return dev;
    683 }
    684 
    685 #if (NWSDISPLAY > 0)
    686 /*
    687  * That's not complete, but enough to get an X server running.
    688  */
    689 #define NR_KEYS 128
    690 static const u_short plain_map[NR_KEYS] = {
    691 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    692 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    693 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    694 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    695 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    696 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    697 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    698 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    699 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    700 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    701 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    702 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    703 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    704 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    705 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    706 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    707 }, shift_map[NR_KEYS] = {
    708 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    709 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    710 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    711 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    712 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    713 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    714 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    715 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    716 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    717 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    718 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    719 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    720 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    721 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    722 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    723 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    724 }, altgr_map[NR_KEYS] = {
    725 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    726 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    727 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    728 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    729 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    730 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    731 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    732 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    733 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    734 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    735 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    736 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    737 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    738 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    739 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    740 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    741 }, ctrl_map[NR_KEYS] = {
    742 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    743 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    744 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    745 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    746 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    747 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    748 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    749 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    750 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    751 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    752 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    753 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    754 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    755 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    756 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    757 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    758 };
    759 
    760 const u_short * const linux_keytabs[] = {
    761 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    762 };
    763 #endif
    764 
    765 static struct biosdisk_info *
    766 fd2biosinfo(struct proc *p, struct file *fp)
    767 {
    768 	struct vnode *vp;
    769 	const char *blkname;
    770 	char diskname[16];
    771 	int i;
    772 	struct nativedisk_info *nip;
    773 	struct disklist *dl = x86_alldisks;
    774 
    775 	if (fp->f_type != DTYPE_VNODE)
    776 		return NULL;
    777 	vp = (struct vnode *)fp->f_data;
    778 
    779 	if (vp->v_type != VBLK)
    780 		return NULL;
    781 
    782 	blkname = devsw_blk2name(major(vp->v_rdev));
    783 	snprintf(diskname, sizeof diskname, "%s%llu", blkname,
    784 	    (unsigned long long)DISKUNIT(vp->v_rdev));
    785 
    786 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    787 		nip = &dl->dl_nativedisks[i];
    788 		if (strcmp(diskname, nip->ni_devname))
    789 			continue;
    790 		if (nip->ni_nmatches != 0)
    791 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    792 	}
    793 
    794 	return NULL;
    795 }
    796 
    797 
    798 /*
    799  * We come here in a last attempt to satisfy a Linux ioctl() call
    800  */
    801 int
    802 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
    803 {
    804 	/* {
    805 		syscallarg(int) fd;
    806 		syscallarg(u_long) com;
    807 		syscallarg(void *) data;
    808 	} */
    809 	struct sys_ioctl_args bia;
    810 	u_long com;
    811 	int error, error1;
    812 #if (NWSDISPLAY > 0)
    813 	struct vt_mode lvt;
    814 	struct kbentry kbe;
    815 #endif
    816 	struct linux_hd_geometry hdg;
    817 	struct linux_hd_big_geometry hdg_big;
    818 	struct biosdisk_info *bip;
    819 	file_t *fp;
    820 	int fd;
    821 	struct disklabel label, *labp;
    822 	struct partinfo partp;
    823 	int (*ioctlf)(struct file *, u_long, void *);
    824 	u_long start, biostotal, realtotal;
    825 	u_char heads, sectors;
    826 	u_int cylinders;
    827 	struct ioctl_pt pt;
    828 
    829 	fd = SCARG(uap, fd);
    830 	SCARG(&bia, fd) = fd;
    831 	SCARG(&bia, data) = SCARG(uap, data);
    832 	com = SCARG(uap, com);
    833 
    834 	if ((fp = fd_getfile(fd)) == NULL)
    835 		return (EBADF);
    836 
    837 	switch (com) {
    838 #if (NWSDISPLAY > 0)
    839 	case LINUX_KDGKBMODE:
    840 		com = KDGKBMODE;
    841 		break;
    842 	case LINUX_KDSKBMODE:
    843 		com = KDSKBMODE;
    844 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    845 			SCARG(&bia, data) = (void *)K_RAW;
    846 		break;
    847 	case LINUX_KIOCSOUND:
    848 		SCARG(&bia, data) =
    849 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    850 		/* fall through */
    851 	case LINUX_KDMKTONE:
    852 		com = KDMKTONE;
    853 		break;
    854 	case LINUX_KDSETMODE:
    855 		com = KDSETMODE;
    856 		break;
    857 	case LINUX_KDGETMODE:
    858 		/* KD_* values are equal to the wscons numbers */
    859 		com = WSDISPLAYIO_GMODE;
    860 		break;
    861 	case LINUX_KDENABIO:
    862 		com = KDENABIO;
    863 		break;
    864 	case LINUX_KDDISABIO:
    865 		com = KDDISABIO;
    866 		break;
    867 	case LINUX_KDGETLED:
    868 		com = KDGETLED;
    869 		break;
    870 	case LINUX_KDSETLED:
    871 		com = KDSETLED;
    872 		break;
    873 	case LINUX_VT_OPENQRY:
    874 		com = VT_OPENQRY;
    875 		break;
    876 	case LINUX_VT_GETMODE:
    877 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
    878 		if (error != 0)
    879 			goto out;
    880 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    881 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    882 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    883 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
    884 		goto out;
    885 	case LINUX_VT_SETMODE:
    886 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
    887 		if (error != 0)
    888 			goto out;
    889 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    890 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    891 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    892 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
    893 		goto out;
    894 	case LINUX_VT_DISALLOCATE:
    895 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    896 		error = 0;
    897 		goto out;
    898 	case LINUX_VT_RELDISP:
    899 		com = VT_RELDISP;
    900 		break;
    901 	case LINUX_VT_ACTIVATE:
    902 		com = VT_ACTIVATE;
    903 		break;
    904 	case LINUX_VT_WAITACTIVE:
    905 		com = VT_WAITACTIVE;
    906 		break;
    907 	case LINUX_VT_GETSTATE:
    908 		com = VT_GETSTATE;
    909 		break;
    910 	case LINUX_KDGKBTYPE:
    911 	    {
    912 		static const u_int8_t kb101 = KB_101;
    913 
    914 		/* This is what Linux does. */
    915 		error = copyout(&kb101, SCARG(uap, data), 1);
    916 		goto out;
    917 	    }
    918 	case LINUX_KDGKBENT:
    919 		/*
    920 		 * The Linux KDGKBENT ioctl is different from the
    921 		 * SYSV original. So we handle it in machdep code.
    922 		 * XXX We should use keyboard mapping information
    923 		 * from wsdisplay, but this would be expensive.
    924 		 */
    925 		if ((error = copyin(SCARG(uap, data), &kbe,
    926 				    sizeof(struct kbentry))))
    927 			goto out;
    928 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    929 		    || kbe.kb_index >= NR_KEYS) {
    930 			error = EINVAL;
    931 			goto out;
    932 		}
    933 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    934 		error = copyout(&kbe, SCARG(uap, data),
    935 				sizeof(struct kbentry));
    936 		goto out;
    937 #endif
    938 	case LINUX_HDIO_GETGEO:
    939 	case LINUX_HDIO_GETGEO_BIG:
    940 		/*
    941 		 * Try to mimic Linux behaviour: return the BIOS geometry
    942 		 * if possible (extending its # of cylinders if it's beyond
    943 		 * the 1023 limit), fall back to the MI geometry (i.e.
    944 		 * the real geometry) if not found, by returning an
    945 		 * error. See common/linux_hdio.c
    946 		 */
    947 		bip = fd2biosinfo(curproc, fp);
    948 		ioctlf = fp->f_ops->fo_ioctl;
    949 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
    950 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
    951 		if (error != 0 && error1 != 0) {
    952 			error = error1;
    953 			goto out;
    954 		}
    955 		labp = error != 0 ? &label : partp.disklab;
    956 		start = error1 != 0 ? partp.part->p_offset : 0;
    957 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    958 		    && bip->bi_cyl != 0) {
    959 			heads = bip->bi_head;
    960 			sectors = bip->bi_sec;
    961 			cylinders = bip->bi_cyl;
    962 			biostotal = heads * sectors * cylinders;
    963 			realtotal = labp->d_ntracks * labp->d_nsectors *
    964 			    labp->d_ncylinders;
    965 			if (realtotal > biostotal)
    966 				cylinders = realtotal / (heads * sectors);
    967 		} else {
    968 			heads = labp->d_ntracks;
    969 			cylinders = labp->d_ncylinders;
    970 			sectors = labp->d_nsectors;
    971 		}
    972 		if (com == LINUX_HDIO_GETGEO) {
    973 			hdg.start = start;
    974 			hdg.heads = heads;
    975 			hdg.cylinders = cylinders;
    976 			hdg.sectors = sectors;
    977 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
    978 			goto out;
    979 		} else {
    980 			hdg_big.start = start;
    981 			hdg_big.heads = heads;
    982 			hdg_big.cylinders = cylinders;
    983 			hdg_big.sectors = sectors;
    984 			error = copyout(&hdg_big, SCARG(uap, data),
    985 			    sizeof hdg_big);
    986 			goto out;
    987 		}
    988 
    989 	default:
    990 		/*
    991 		 * Unknown to us. If it's on a device, just pass it through
    992 		 * using PTIOCLINUX, the device itself might be able to
    993 		 * make some sense of it.
    994 		 * XXX hack: if the function returns EJUSTRETURN,
    995 		 * it has stuffed a sysctl return value in pt.data.
    996 		 */
    997 		ioctlf = fp->f_ops->fo_ioctl;
    998 		pt.com = SCARG(uap, com);
    999 		pt.data = SCARG(uap, data);
   1000 		error = ioctlf(fp, PTIOCLINUX, &pt);
   1001 		if (error == EJUSTRETURN) {
   1002 			retval[0] = (register_t)pt.data;
   1003 			error = 0;
   1004 		}
   1005 
   1006 		if (error == ENOTTY) {
   1007 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1008 			    com));
   1009 		}
   1010 		goto out;
   1011 	}
   1012 	SCARG(&bia, com) = com;
   1013 	error = sys_ioctl(curlwp, &bia, retval);
   1014 out:
   1015 	fd_putfile(fd);
   1016 	return error;
   1017 }
   1018 
   1019 /*
   1020  * Set I/O permissions for a process. Just set the maximum level
   1021  * right away (ignoring the argument), otherwise we would have
   1022  * to rely on I/O permission maps, which are not implemented.
   1023  */
   1024 int
   1025 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
   1026 {
   1027 	/* {
   1028 		syscallarg(int) level;
   1029 	} */
   1030 	struct trapframe *fp = l->l_md.md_regs;
   1031 
   1032 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1033 	    NULL, NULL, NULL, NULL) != 0)
   1034 		return EPERM;
   1035 	fp->tf_eflags |= PSL_IOPL;
   1036 	*retval = 0;
   1037 	return 0;
   1038 }
   1039 
   1040 /*
   1041  * See above. If a root process tries to set access to an I/O port,
   1042  * just let it have the whole range.
   1043  */
   1044 int
   1045 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
   1046 {
   1047 	/* {
   1048 		syscallarg(unsigned int) lo;
   1049 		syscallarg(unsigned int) hi;
   1050 		syscallarg(int) val;
   1051 	} */
   1052 	struct trapframe *fp = l->l_md.md_regs;
   1053 
   1054 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1055 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1056 	    NULL, NULL) != 0)
   1057 		return EPERM;
   1058 	if (SCARG(uap, val))
   1059 		fp->tf_eflags |= PSL_IOPL;
   1060 	*retval = 0;
   1061 	return 0;
   1062 }
   1063 
   1064 int
   1065 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1066     void *arg)
   1067 {
   1068 	return 0;
   1069 }
   1070 
   1071 const char *
   1072 linux_get_uname_arch(void)
   1073 {
   1074 	static char uname_arch[5] = "i386";
   1075 
   1076 	if (uname_arch[1] == '3')
   1077 		uname_arch[1] += cpu_class;
   1078 	return uname_arch;
   1079 }
   1080