Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.151
      1 /*	$NetBSD: linux_machdep.c,v 1.151 2011/11/18 04:07:44 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000, 2008, 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.151 2011/11/18 04:07:44 christos Exp $");
     34 
     35 #if defined(_KERNEL_OPT)
     36 #include "opt_vm86.h"
     37 #include "opt_user_ldt.h"
     38 #endif
     39 
     40 #include <sys/param.h>
     41 #include <sys/systm.h>
     42 #include <sys/signalvar.h>
     43 #include <sys/kernel.h>
     44 #include <sys/proc.h>
     45 #include <sys/buf.h>
     46 #include <sys/reboot.h>
     47 #include <sys/conf.h>
     48 #include <sys/exec.h>
     49 #include <sys/file.h>
     50 #include <sys/callout.h>
     51 #include <sys/malloc.h>
     52 #include <sys/mbuf.h>
     53 #include <sys/msgbuf.h>
     54 #include <sys/mount.h>
     55 #include <sys/vnode.h>
     56 #include <sys/device.h>
     57 #include <sys/syscallargs.h>
     58 #include <sys/filedesc.h>
     59 #include <sys/exec_elf.h>
     60 #include <sys/disklabel.h>
     61 #include <sys/ioctl.h>
     62 #include <sys/wait.h>
     63 #include <sys/kauth.h>
     64 #include <sys/kmem.h>
     65 
     66 #include <miscfs/specfs/specdev.h>
     67 
     68 #include <compat/linux/common/linux_types.h>
     69 #include <compat/linux/common/linux_signal.h>
     70 #include <compat/linux/common/linux_util.h>
     71 #include <compat/linux/common/linux_ioctl.h>
     72 #include <compat/linux/common/linux_hdio.h>
     73 #include <compat/linux/common/linux_exec.h>
     74 #include <compat/linux/common/linux_machdep.h>
     75 #include <compat/linux/common/linux_errno.h>
     76 
     77 #include <compat/linux/linux_syscallargs.h>
     78 
     79 #include <sys/cpu.h>
     80 #include <machine/cpufunc.h>
     81 #include <machine/psl.h>
     82 #include <machine/reg.h>
     83 #include <machine/segments.h>
     84 #include <machine/specialreg.h>
     85 #include <machine/sysarch.h>
     86 #include <machine/vm86.h>
     87 #include <machine/vmparam.h>
     88 
     89 /*
     90  * To see whether wscons is configured (for virtual console ioctl calls).
     91  */
     92 #if defined(_KERNEL_OPT)
     93 #include "wsdisplay.h"
     94 #endif
     95 #if (NWSDISPLAY > 0)
     96 #include <dev/wscons/wsconsio.h>
     97 #include <dev/wscons/wsdisplay_usl_io.h>
     98 #if defined(_KERNEL_OPT)
     99 #include "opt_xserver.h"
    100 #endif
    101 #endif
    102 
    103 #ifdef DEBUG_LINUX
    104 #define DPRINTF(a) uprintf a
    105 #else
    106 #define DPRINTF(a)
    107 #endif
    108 
    109 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
    110 extern struct disklist *x86_alldisks;
    111 static void linux_save_ucontext(struct lwp *, struct trapframe *,
    112     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
    113 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
    114     const sigset_t *, struct linux_sigcontext *);
    115 static int linux_restore_sigcontext(struct lwp *,
    116     struct linux_sigcontext *, register_t *);
    117 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
    118 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
    119 
    120 extern char linux_sigcode[], linux_rt_sigcode[];
    121 
    122 /*
    123  * Deal with some i386-specific things in the Linux emulation code.
    124  */
    125 
    126 void
    127 linux_setregs(struct lwp *l, struct exec_package *epp, vaddr_t stack)
    128 {
    129 	struct pcb *pcb = lwp_getpcb(l);
    130 	struct trapframe *tf;
    131 
    132 #if NNPX > 0
    133 	/* If we were using the FPU, forget about it. */
    134 	if (npxproc == l)
    135 		npxdrop();
    136 #endif
    137 
    138 #ifdef USER_LDT
    139 	pmap_ldt_cleanup(l);
    140 #endif
    141 
    142 	l->l_md.md_flags &= ~MDL_USEDFPU;
    143 
    144 	if (i386_use_fxsave) {
    145 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    146 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    147 	} else
    148 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    149 
    150 	tf = l->l_md.md_regs;
    151 	tf->tf_gs = 0;
    152 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    153 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    154 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    155 	tf->tf_edi = 0;
    156 	tf->tf_esi = 0;
    157 	tf->tf_ebp = 0;
    158 	tf->tf_ebx = l->l_proc->p_psstrp;
    159 	tf->tf_edx = 0;
    160 	tf->tf_ecx = 0;
    161 	tf->tf_eax = 0;
    162 	tf->tf_eip = epp->ep_entry;
    163 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    164 	tf->tf_eflags = PSL_USERSET;
    165 	tf->tf_esp = stack;
    166 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    167 }
    168 
    169 /*
    170  * Send an interrupt to process.
    171  *
    172  * Stack is set up to allow sigcode stored
    173  * in u. to call routine, followed by kcall
    174  * to sigreturn routine below.  After sigreturn
    175  * resets the signal mask, the stack, and the
    176  * frame pointer, it returns to the user
    177  * specified pc, psl.
    178  */
    179 
    180 void
    181 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    182 {
    183 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    184 		linux_rt_sendsig(ksi, mask);
    185 	else
    186 		linux_old_sendsig(ksi, mask);
    187 }
    188 
    189 
    190 static void
    191 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
    192 {
    193 	uc->uc_flags = 0;
    194 	uc->uc_link = NULL;
    195 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    196 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    197 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    198 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    199 }
    200 
    201 static void
    202 linux_save_sigcontext(struct lwp *l, struct trapframe *tf,
    203     const sigset_t *mask, struct linux_sigcontext *sc)
    204 {
    205 	struct pcb *pcb = lwp_getpcb(l);
    206 
    207 	/* Save register context. */
    208 #ifdef VM86
    209 	if (tf->tf_eflags & PSL_VM) {
    210 		sc->sc_gs = tf->tf_vm86_gs;
    211 		sc->sc_fs = tf->tf_vm86_fs;
    212 		sc->sc_es = tf->tf_vm86_es;
    213 		sc->sc_ds = tf->tf_vm86_ds;
    214 		sc->sc_eflags = get_vflags(l);
    215 	} else
    216 #endif
    217 	{
    218 		sc->sc_gs = tf->tf_gs;
    219 		sc->sc_fs = tf->tf_fs;
    220 		sc->sc_es = tf->tf_es;
    221 		sc->sc_ds = tf->tf_ds;
    222 		sc->sc_eflags = tf->tf_eflags;
    223 	}
    224 	sc->sc_edi = tf->tf_edi;
    225 	sc->sc_esi = tf->tf_esi;
    226 	sc->sc_esp = tf->tf_esp;
    227 	sc->sc_ebp = tf->tf_ebp;
    228 	sc->sc_ebx = tf->tf_ebx;
    229 	sc->sc_edx = tf->tf_edx;
    230 	sc->sc_ecx = tf->tf_ecx;
    231 	sc->sc_eax = tf->tf_eax;
    232 	sc->sc_eip = tf->tf_eip;
    233 	sc->sc_cs = tf->tf_cs;
    234 	sc->sc_esp_at_signal = tf->tf_esp;
    235 	sc->sc_ss = tf->tf_ss;
    236 	sc->sc_err = tf->tf_err;
    237 	sc->sc_trapno = tf->tf_trapno;
    238 	sc->sc_cr2 = pcb->pcb_cr2;
    239 	sc->sc_387 = NULL;
    240 
    241 	/* Save signal stack. */
    242 	/* Linux doesn't save the onstack flag in sigframe */
    243 
    244 	/* Save signal mask. */
    245 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    246 }
    247 
    248 static void
    249 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    250 {
    251 	struct lwp *l = curlwp;
    252 	struct proc *p = l->l_proc;
    253 	struct trapframe *tf;
    254 	struct linux_rt_sigframe *fp, frame;
    255 	int onstack, error;
    256 	int sig = ksi->ksi_signo;
    257 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    258 	struct sigaltstack *sas = &l->l_sigstk;
    259 
    260 	tf = l->l_md.md_regs;
    261 	/* Do we need to jump onto the signal stack? */
    262 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    263 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    264 
    265 
    266 	/* Allocate space for the signal handler context. */
    267 	if (onstack)
    268 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
    269 		    sas->ss_size);
    270 	else
    271 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    272 	fp--;
    273 
    274 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    275 	    onstack, fp, sig, tf->tf_eip,
    276 	    ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
    277 
    278 	/* Build stack frame for signal trampoline. */
    279 	frame.sf_handler = catcher;
    280 	frame.sf_sig = native_to_linux_signo[sig];
    281 	frame.sf_sip = &fp->sf_si;
    282 	frame.sf_ucp = &fp->sf_uc;
    283 
    284 	/*
    285 	 * XXX: the following code assumes that the constants for
    286 	 * siginfo are the same between linux and NetBSD.
    287 	 */
    288 	native_to_linux_siginfo(&frame.sf_si, &ksi->ksi_info);
    289 
    290 	/* Save register context. */
    291 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    292 	sendsig_reset(l, sig);
    293 
    294 	mutex_exit(p->p_lock);
    295 	error = copyout(&frame, fp, sizeof(frame));
    296 	mutex_enter(p->p_lock);
    297 
    298 	if (error != 0) {
    299 		/*
    300 		 * Process has trashed its stack; give it an illegal
    301 		 * instruction to halt it in its tracks.
    302 		 */
    303 		sigexit(l, SIGILL);
    304 		/* NOTREACHED */
    305 	}
    306 
    307 	/*
    308 	 * Build context to run handler in.
    309 	 */
    310 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    311 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    312 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    313 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    314 	    (linux_rt_sigcode - linux_sigcode);
    315 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    316 	tf->tf_eflags &= ~PSL_CLEARSIG;
    317 	tf->tf_esp = (int)fp;
    318 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    319 
    320 	/* Remember that we're now on the signal stack. */
    321 	if (onstack)
    322 		sas->ss_flags |= SS_ONSTACK;
    323 }
    324 
    325 static void
    326 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    327 {
    328 	struct lwp *l = curlwp;
    329 	struct proc *p = l->l_proc;
    330 	struct trapframe *tf;
    331 	struct linux_sigframe *fp, frame;
    332 	int onstack, error;
    333 	int sig = ksi->ksi_signo;
    334 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    335 	struct sigaltstack *sas = &l->l_sigstk;
    336 
    337 	tf = l->l_md.md_regs;
    338 
    339 	/* Do we need to jump onto the signal stack? */
    340 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    341 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    342 
    343 	/* Allocate space for the signal handler context. */
    344 	if (onstack)
    345 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
    346 		    sas->ss_size);
    347 	else
    348 		fp = (struct linux_sigframe *)tf->tf_esp;
    349 	fp--;
    350 
    351 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    352 	    onstack, fp, sig, tf->tf_eip,
    353 	    ((struct pcb *)lwp_getpcb(l))->pcb_cr2));
    354 
    355 	/* Build stack frame for signal trampoline. */
    356 	frame.sf_handler = catcher;
    357 	frame.sf_sig = native_to_linux_signo[sig];
    358 
    359 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    360 	sendsig_reset(l, sig);
    361 
    362 	mutex_exit(p->p_lock);
    363 	error = copyout(&frame, fp, sizeof(frame));
    364 	mutex_enter(p->p_lock);
    365 
    366 	if (error != 0) {
    367 		/*
    368 		 * Process has trashed its stack; give it an illegal
    369 		 * instruction to halt it in its tracks.
    370 		 */
    371 		sigexit(l, SIGILL);
    372 		/* NOTREACHED */
    373 	}
    374 
    375 	/*
    376 	 * Build context to run handler in.
    377 	 */
    378 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    379 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    380 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    381 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    382 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    383 	tf->tf_eflags &= ~PSL_CLEARSIG;
    384 	tf->tf_esp = (int)fp;
    385 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    386 
    387 	/* Remember that we're now on the signal stack. */
    388 	if (onstack)
    389 		sas->ss_flags |= SS_ONSTACK;
    390 }
    391 
    392 /*
    393  * System call to cleanup state after a signal
    394  * has been taken.  Reset signal mask and
    395  * stack state from context left by sendsig (above).
    396  * Return to previous pc and psl as specified by
    397  * context left by sendsig. Check carefully to
    398  * make sure that the user has not modified the
    399  * psl to gain improper privileges or to cause
    400  * a machine fault.
    401  */
    402 int
    403 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
    404 {
    405 	/* {
    406 		syscallarg(struct linux_ucontext *) ucp;
    407 	} */
    408 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    409 	int error;
    410 
    411 	/*
    412 	 * The trampoline code hands us the context.
    413 	 * It is unsafe to keep track of it ourselves, in the event that a
    414 	 * program jumps out of a signal handler.
    415 	 */
    416 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    417 		return error;
    418 
    419 	/* XXX XAX we can do better here by using more of the ucontext */
    420 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    421 }
    422 
    423 int
    424 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
    425 {
    426 	/* {
    427 		syscallarg(struct linux_sigcontext *) scp;
    428 	} */
    429 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    430 	int error;
    431 
    432 	/*
    433 	 * The trampoline code hands us the context.
    434 	 * It is unsafe to keep track of it ourselves, in the event that a
    435 	 * program jumps out of a signal handler.
    436 	 */
    437 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
    438 		return error;
    439 	return linux_restore_sigcontext(l, &context, retval);
    440 }
    441 
    442 static int
    443 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    444     register_t *retval)
    445 {
    446 	struct proc *p = l->l_proc;
    447 	struct sigaltstack *sas = &l->l_sigstk;
    448 	struct trapframe *tf;
    449 	sigset_t mask;
    450 	ssize_t ss_gap;
    451 
    452 	/* Restore register context. */
    453 	tf = l->l_md.md_regs;
    454 	DPRINTF(("sigreturn enter esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
    455 
    456 #ifdef VM86
    457 	if (scp->sc_eflags & PSL_VM) {
    458 		void syscall_vm86(struct trapframe *);
    459 
    460 		tf->tf_vm86_gs = scp->sc_gs;
    461 		tf->tf_vm86_fs = scp->sc_fs;
    462 		tf->tf_vm86_es = scp->sc_es;
    463 		tf->tf_vm86_ds = scp->sc_ds;
    464 		set_vflags(l, scp->sc_eflags);
    465 		p->p_md.md_syscall = syscall_vm86;
    466 	} else
    467 #endif
    468 	{
    469 		/*
    470 		 * Check for security violations.  If we're returning to
    471 		 * protected mode, the CPU will validate the segment registers
    472 		 * automatically and generate a trap on violations.  We handle
    473 		 * the trap, rather than doing all of the checking here.
    474 		 */
    475 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    476 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    477 			return EINVAL;
    478 
    479 		tf->tf_gs = scp->sc_gs;
    480 		tf->tf_fs = scp->sc_fs;
    481 		tf->tf_es = scp->sc_es;
    482 		tf->tf_ds = scp->sc_ds;
    483 #ifdef VM86
    484 		if (tf->tf_eflags & PSL_VM)
    485 			(*p->p_emul->e_syscall_intern)(p);
    486 #endif
    487 		tf->tf_eflags = scp->sc_eflags;
    488 	}
    489 	tf->tf_edi = scp->sc_edi;
    490 	tf->tf_esi = scp->sc_esi;
    491 	tf->tf_ebp = scp->sc_ebp;
    492 	tf->tf_ebx = scp->sc_ebx;
    493 	tf->tf_edx = scp->sc_edx;
    494 	tf->tf_ecx = scp->sc_ecx;
    495 	tf->tf_eax = scp->sc_eax;
    496 	tf->tf_eip = scp->sc_eip;
    497 	tf->tf_cs = scp->sc_cs;
    498 	tf->tf_esp = scp->sc_esp_at_signal;
    499 	tf->tf_ss = scp->sc_ss;
    500 
    501 	/* Restore signal stack. */
    502 	/*
    503 	 * Linux really does it this way; it doesn't have space in sigframe
    504 	 * to save the onstack flag.
    505 	 */
    506 	mutex_enter(p->p_lock);
    507 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
    508 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    509 		sas->ss_flags |= SS_ONSTACK;
    510 	else
    511 		sas->ss_flags &= ~SS_ONSTACK;
    512 
    513 	/* Restore signal mask. */
    514 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    515 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    516 	mutex_exit(p->p_lock);
    517 
    518 	DPRINTF(("sigreturn exit esp=0x%x eip=0x%x\n", tf->tf_esp, tf->tf_eip));
    519 	return EJUSTRETURN;
    520 }
    521 
    522 #ifdef USER_LDT
    523 
    524 static int
    525 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    526     register_t *retval)
    527 {
    528 	struct x86_get_ldt_args gl;
    529 	int error;
    530 	union descriptor *ldt_buf;
    531 	size_t sz;
    532 
    533 	/*
    534 	 * I've checked the linux code - this function is asymetric with
    535 	 * linux_write_ldt, and returns raw ldt entries.
    536 	 * NB, the code I saw zerod the spare parts of the user buffer.
    537 	 */
    538 
    539 	DPRINTF(("linux_read_ldt!"));
    540 
    541 	sz = 8192 * sizeof(*ldt_buf);
    542 	ldt_buf = kmem_zalloc(sz, KM_SLEEP);
    543 	gl.start = 0;
    544 	gl.desc = NULL;
    545 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    546 	error = x86_get_ldt1(l, &gl, ldt_buf);
    547 	/* NB gl.num might have changed */
    548 	if (error == 0) {
    549 		*retval = gl.num * sizeof *ldt;
    550 		error = copyout(ldt_buf, SCARG(uap, ptr),
    551 		    gl.num * sizeof *ldt_buf);
    552 	}
    553 	kmem_free(ldt_buf, sz);
    554 
    555 	return error;
    556 }
    557 
    558 struct linux_ldt_info {
    559 	u_int entry_number;
    560 	u_long base_addr;
    561 	u_int limit;
    562 	u_int seg_32bit:1;
    563 	u_int contents:2;
    564 	u_int read_exec_only:1;
    565 	u_int limit_in_pages:1;
    566 	u_int seg_not_present:1;
    567 	u_int useable:1;
    568 };
    569 
    570 static int
    571 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    572     int oldmode)
    573 {
    574 	struct linux_ldt_info ldt_info;
    575 	union descriptor d;
    576 	struct x86_set_ldt_args sl;
    577 	int error;
    578 
    579 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    580 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    581 		return (EINVAL);
    582 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    583 		return error;
    584 	if (ldt_info.entry_number >= 8192)
    585 		return (EINVAL);
    586 	if (ldt_info.contents == 3) {
    587 		if (oldmode)
    588 			return (EINVAL);
    589 		if (ldt_info.seg_not_present)
    590 			return (EINVAL);
    591 	}
    592 
    593 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    594 	    (oldmode || (ldt_info.contents == 0 &&
    595 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    596 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    597 	    ldt_info.useable == 0))) {
    598 		/* this means you should zero the ldt */
    599 		(void)memset(&d, 0, sizeof(d));
    600 	} else {
    601 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    602 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    603 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
    604 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    605 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
    606 		    (!ldt_info.read_exec_only << 1);
    607 		d.sd.sd_dpl = SEL_UPL;
    608 		d.sd.sd_p = !ldt_info.seg_not_present;
    609 		d.sd.sd_def32 = ldt_info.seg_32bit;
    610 		d.sd.sd_gran = ldt_info.limit_in_pages;
    611 		if (!oldmode)
    612 			d.sd.sd_xx = ldt_info.useable;
    613 		else
    614 			d.sd.sd_xx = 0;
    615 	}
    616 	sl.start = ldt_info.entry_number;
    617 	sl.desc = NULL;
    618 	sl.num = 1;
    619 
    620 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    621 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    622 
    623 	return x86_set_ldt1(l, &sl, &d);
    624 }
    625 
    626 #endif /* USER_LDT */
    627 
    628 int
    629 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
    630 {
    631 	/* {
    632 		syscallarg(int) func;
    633 		syscallarg(void *) ptr;
    634 		syscallarg(size_t) bytecount;
    635 	} */
    636 
    637 	switch (SCARG(uap, func)) {
    638 #ifdef USER_LDT
    639 	case 0:
    640 		return linux_read_ldt(l, (const void *)uap, retval);
    641 	case 1:
    642 		return linux_write_ldt(l, (const void *)uap, 1);
    643 	case 2:
    644 #ifdef notyet
    645 		return linux_read_default_ldt(l, (const void *)uap, retval);
    646 #else
    647 		return (ENOSYS);
    648 #endif
    649 	case 0x11:
    650 		return linux_write_ldt(l, (const void *)uap, 0);
    651 #endif /* USER_LDT */
    652 
    653 	default:
    654 		return (ENOSYS);
    655 	}
    656 }
    657 
    658 /*
    659  * XXX Pathetic hack to make svgalib work. This will fake the major
    660  * device number of an opened VT so that svgalib likes it. grmbl.
    661  * Should probably do it 'wrong the right way' and use a mapping
    662  * array for all major device numbers, and map linux_mknod too.
    663  */
    664 dev_t
    665 linux_fakedev(dev_t dev, int raw)
    666 {
    667 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    668 	const struct cdevsw *cd = cdevsw_lookup(dev);
    669 
    670 	if (raw) {
    671 #if (NWSDISPLAY > 0)
    672 		extern const struct cdevsw wsdisplay_cdevsw;
    673 		if (cd == &wsdisplay_cdevsw)
    674 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    675 #endif
    676 	}
    677 
    678 	if (cd == &ptc_cdevsw)
    679 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    680 	if (cd == &pts_cdevsw)
    681 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    682 
    683 	return dev;
    684 }
    685 
    686 #if (NWSDISPLAY > 0)
    687 /*
    688  * That's not complete, but enough to get an X server running.
    689  */
    690 #define NR_KEYS 128
    691 static const u_short plain_map[NR_KEYS] = {
    692 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    693 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    694 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    695 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    696 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    697 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    698 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    699 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    700 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    701 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    702 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    703 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    704 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    705 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    706 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    707 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    708 }, shift_map[NR_KEYS] = {
    709 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    710 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    711 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    712 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    713 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    714 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    715 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    716 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    717 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    718 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    719 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    720 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    721 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    722 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    723 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    724 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    725 }, altgr_map[NR_KEYS] = {
    726 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    727 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    728 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    729 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    730 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    731 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    732 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    733 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    734 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    735 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    736 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    737 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    738 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    739 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    740 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    741 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    742 }, ctrl_map[NR_KEYS] = {
    743 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    744 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    745 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    746 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    747 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    748 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    749 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    750 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    751 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    752 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    753 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    754 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    755 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    756 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    757 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    758 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    759 };
    760 
    761 const u_short * const linux_keytabs[] = {
    762 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    763 };
    764 #endif
    765 
    766 static struct biosdisk_info *
    767 fd2biosinfo(struct proc *p, struct file *fp)
    768 {
    769 	struct vnode *vp;
    770 	const char *blkname;
    771 	char diskname[16];
    772 	int i;
    773 	struct nativedisk_info *nip;
    774 	struct disklist *dl = x86_alldisks;
    775 
    776 	if (fp->f_type != DTYPE_VNODE)
    777 		return NULL;
    778 	vp = (struct vnode *)fp->f_data;
    779 
    780 	if (vp->v_type != VBLK)
    781 		return NULL;
    782 
    783 	blkname = devsw_blk2name(major(vp->v_rdev));
    784 	snprintf(diskname, sizeof diskname, "%s%llu", blkname,
    785 	    (unsigned long long)DISKUNIT(vp->v_rdev));
    786 
    787 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    788 		nip = &dl->dl_nativedisks[i];
    789 		if (strcmp(diskname, nip->ni_devname))
    790 			continue;
    791 		if (nip->ni_nmatches != 0)
    792 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    793 	}
    794 
    795 	return NULL;
    796 }
    797 
    798 
    799 /*
    800  * We come here in a last attempt to satisfy a Linux ioctl() call
    801  */
    802 int
    803 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
    804 {
    805 	/* {
    806 		syscallarg(int) fd;
    807 		syscallarg(u_long) com;
    808 		syscallarg(void *) data;
    809 	} */
    810 	struct sys_ioctl_args bia;
    811 	u_long com;
    812 	int error, error1;
    813 #if (NWSDISPLAY > 0)
    814 	struct vt_mode lvt;
    815 	struct kbentry kbe;
    816 #endif
    817 	struct linux_hd_geometry hdg;
    818 	struct linux_hd_big_geometry hdg_big;
    819 	struct biosdisk_info *bip;
    820 	file_t *fp;
    821 	int fd;
    822 	struct disklabel label, *labp;
    823 	struct partinfo partp;
    824 	int (*ioctlf)(struct file *, u_long, void *);
    825 	u_long start, biostotal, realtotal;
    826 	u_char heads, sectors;
    827 	u_int cylinders;
    828 	struct ioctl_pt pt;
    829 
    830 	fd = SCARG(uap, fd);
    831 	SCARG(&bia, fd) = fd;
    832 	SCARG(&bia, data) = SCARG(uap, data);
    833 	com = SCARG(uap, com);
    834 
    835 	if ((fp = fd_getfile(fd)) == NULL)
    836 		return (EBADF);
    837 
    838 	switch (com) {
    839 #if (NWSDISPLAY > 0)
    840 	case LINUX_KDGKBMODE:
    841 		com = KDGKBMODE;
    842 		break;
    843 	case LINUX_KDSKBMODE:
    844 		com = KDSKBMODE;
    845 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    846 			SCARG(&bia, data) = (void *)K_RAW;
    847 		break;
    848 	case LINUX_KIOCSOUND:
    849 		SCARG(&bia, data) =
    850 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    851 		/* fall through */
    852 	case LINUX_KDMKTONE:
    853 		com = KDMKTONE;
    854 		break;
    855 	case LINUX_KDSETMODE:
    856 		com = KDSETMODE;
    857 		break;
    858 	case LINUX_KDGETMODE:
    859 		/* KD_* values are equal to the wscons numbers */
    860 		com = WSDISPLAYIO_GMODE;
    861 		break;
    862 	case LINUX_KDENABIO:
    863 		com = KDENABIO;
    864 		break;
    865 	case LINUX_KDDISABIO:
    866 		com = KDDISABIO;
    867 		break;
    868 	case LINUX_KDGETLED:
    869 		com = KDGETLED;
    870 		break;
    871 	case LINUX_KDSETLED:
    872 		com = KDSETLED;
    873 		break;
    874 	case LINUX_VT_OPENQRY:
    875 		com = VT_OPENQRY;
    876 		break;
    877 	case LINUX_VT_GETMODE:
    878 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
    879 		if (error != 0)
    880 			goto out;
    881 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    882 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    883 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    884 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
    885 		goto out;
    886 	case LINUX_VT_SETMODE:
    887 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
    888 		if (error != 0)
    889 			goto out;
    890 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    891 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    892 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    893 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
    894 		goto out;
    895 	case LINUX_VT_DISALLOCATE:
    896 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    897 		error = 0;
    898 		goto out;
    899 	case LINUX_VT_RELDISP:
    900 		com = VT_RELDISP;
    901 		break;
    902 	case LINUX_VT_ACTIVATE:
    903 		com = VT_ACTIVATE;
    904 		break;
    905 	case LINUX_VT_WAITACTIVE:
    906 		com = VT_WAITACTIVE;
    907 		break;
    908 	case LINUX_VT_GETSTATE:
    909 		com = VT_GETSTATE;
    910 		break;
    911 	case LINUX_KDGKBTYPE:
    912 	    {
    913 		static const u_int8_t kb101 = KB_101;
    914 
    915 		/* This is what Linux does. */
    916 		error = copyout(&kb101, SCARG(uap, data), 1);
    917 		goto out;
    918 	    }
    919 	case LINUX_KDGKBENT:
    920 		/*
    921 		 * The Linux KDGKBENT ioctl is different from the
    922 		 * SYSV original. So we handle it in machdep code.
    923 		 * XXX We should use keyboard mapping information
    924 		 * from wsdisplay, but this would be expensive.
    925 		 */
    926 		if ((error = copyin(SCARG(uap, data), &kbe,
    927 				    sizeof(struct kbentry))))
    928 			goto out;
    929 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    930 		    || kbe.kb_index >= NR_KEYS) {
    931 			error = EINVAL;
    932 			goto out;
    933 		}
    934 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    935 		error = copyout(&kbe, SCARG(uap, data),
    936 				sizeof(struct kbentry));
    937 		goto out;
    938 #endif
    939 	case LINUX_HDIO_GETGEO:
    940 	case LINUX_HDIO_GETGEO_BIG:
    941 		/*
    942 		 * Try to mimic Linux behaviour: return the BIOS geometry
    943 		 * if possible (extending its # of cylinders if it's beyond
    944 		 * the 1023 limit), fall back to the MI geometry (i.e.
    945 		 * the real geometry) if not found, by returning an
    946 		 * error. See common/linux_hdio.c
    947 		 */
    948 		bip = fd2biosinfo(curproc, fp);
    949 		ioctlf = fp->f_ops->fo_ioctl;
    950 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
    951 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
    952 		if (error != 0 && error1 != 0) {
    953 			error = error1;
    954 			goto out;
    955 		}
    956 		labp = error != 0 ? &label : partp.disklab;
    957 		start = error1 != 0 ? partp.part->p_offset : 0;
    958 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
    959 		    && bip->bi_cyl != 0) {
    960 			heads = bip->bi_head;
    961 			sectors = bip->bi_sec;
    962 			cylinders = bip->bi_cyl;
    963 			biostotal = heads * sectors * cylinders;
    964 			realtotal = labp->d_ntracks * labp->d_nsectors *
    965 			    labp->d_ncylinders;
    966 			if (realtotal > biostotal)
    967 				cylinders = realtotal / (heads * sectors);
    968 		} else {
    969 			heads = labp->d_ntracks;
    970 			cylinders = labp->d_ncylinders;
    971 			sectors = labp->d_nsectors;
    972 		}
    973 		if (com == LINUX_HDIO_GETGEO) {
    974 			hdg.start = start;
    975 			hdg.heads = heads;
    976 			hdg.cylinders = cylinders;
    977 			hdg.sectors = sectors;
    978 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
    979 			goto out;
    980 		} else {
    981 			hdg_big.start = start;
    982 			hdg_big.heads = heads;
    983 			hdg_big.cylinders = cylinders;
    984 			hdg_big.sectors = sectors;
    985 			error = copyout(&hdg_big, SCARG(uap, data),
    986 			    sizeof hdg_big);
    987 			goto out;
    988 		}
    989 
    990 	default:
    991 		/*
    992 		 * Unknown to us. If it's on a device, just pass it through
    993 		 * using PTIOCLINUX, the device itself might be able to
    994 		 * make some sense of it.
    995 		 * XXX hack: if the function returns EJUSTRETURN,
    996 		 * it has stuffed a sysctl return value in pt.data.
    997 		 */
    998 		ioctlf = fp->f_ops->fo_ioctl;
    999 		pt.com = SCARG(uap, com);
   1000 		pt.data = SCARG(uap, data);
   1001 		error = ioctlf(fp, PTIOCLINUX, &pt);
   1002 		if (error == EJUSTRETURN) {
   1003 			retval[0] = (register_t)pt.data;
   1004 			error = 0;
   1005 		}
   1006 
   1007 		if (error == ENOTTY) {
   1008 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1009 			    com));
   1010 		}
   1011 		goto out;
   1012 	}
   1013 	SCARG(&bia, com) = com;
   1014 	error = sys_ioctl(curlwp, &bia, retval);
   1015 out:
   1016 	fd_putfile(fd);
   1017 	return error;
   1018 }
   1019 
   1020 /*
   1021  * Set I/O permissions for a process. Just set the maximum level
   1022  * right away (ignoring the argument), otherwise we would have
   1023  * to rely on I/O permission maps, which are not implemented.
   1024  */
   1025 int
   1026 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
   1027 {
   1028 	/* {
   1029 		syscallarg(int) level;
   1030 	} */
   1031 	struct trapframe *fp = l->l_md.md_regs;
   1032 
   1033 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1034 	    NULL, NULL, NULL, NULL) != 0)
   1035 		return EPERM;
   1036 	fp->tf_eflags |= PSL_IOPL;
   1037 	*retval = 0;
   1038 	return 0;
   1039 }
   1040 
   1041 /*
   1042  * See above. If a root process tries to set access to an I/O port,
   1043  * just let it have the whole range.
   1044  */
   1045 int
   1046 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
   1047 {
   1048 	/* {
   1049 		syscallarg(unsigned int) lo;
   1050 		syscallarg(unsigned int) hi;
   1051 		syscallarg(int) val;
   1052 	} */
   1053 	struct trapframe *fp = l->l_md.md_regs;
   1054 
   1055 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1056 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1057 	    NULL, NULL) != 0)
   1058 		return EPERM;
   1059 	if (SCARG(uap, val))
   1060 		fp->tf_eflags |= PSL_IOPL;
   1061 	*retval = 0;
   1062 	return 0;
   1063 }
   1064 
   1065 int
   1066 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1067     void *arg)
   1068 {
   1069 	return 0;
   1070 }
   1071 
   1072 const char *
   1073 linux_get_uname_arch(void)
   1074 {
   1075 	static char uname_arch[5] = "i386";
   1076 
   1077 	if (uname_arch[1] == '3')
   1078 		uname_arch[1] += cpu_class;
   1079 	return uname_arch;
   1080 }
   1081