Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.114.4.5
      1 /*	$NetBSD: linux_machdep.c,v 1.114.4.5 2007/01/19 20:18:46 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.114.4.5 2007/01/19 20:18:46 ad Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <sys/wait.h>
     72 #include <sys/kauth.h>
     73 
     74 #include <miscfs/specfs/specdev.h>
     75 
     76 #include <compat/linux/common/linux_types.h>
     77 #include <compat/linux/common/linux_signal.h>
     78 #include <compat/linux/common/linux_util.h>
     79 #include <compat/linux/common/linux_ioctl.h>
     80 #include <compat/linux/common/linux_hdio.h>
     81 #include <compat/linux/common/linux_exec.h>
     82 #include <compat/linux/common/linux_machdep.h>
     83 #include <compat/linux/common/linux_errno.h>
     84 
     85 #include <compat/linux/linux_syscallargs.h>
     86 
     87 #include <machine/cpu.h>
     88 #include <machine/cpufunc.h>
     89 #include <machine/psl.h>
     90 #include <machine/reg.h>
     91 #include <machine/segments.h>
     92 #include <machine/specialreg.h>
     93 #include <machine/sysarch.h>
     94 #include <machine/vm86.h>
     95 #include <machine/vmparam.h>
     96 
     97 /*
     98  * To see whether wscons is configured (for virtual console ioctl calls).
     99  */
    100 #if defined(_KERNEL_OPT)
    101 #include "wsdisplay.h"
    102 #endif
    103 #if (NWSDISPLAY > 0)
    104 #include <dev/wscons/wsconsio.h>
    105 #include <dev/wscons/wsdisplay_usl_io.h>
    106 #if defined(_KERNEL_OPT)
    107 #include "opt_xserver.h"
    108 #endif
    109 #endif
    110 
    111 #ifdef USER_LDT
    112 #include <machine/cpu.h>
    113 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    114     register_t *));
    115 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    116     register_t *));
    117 #endif
    118 
    119 #ifdef DEBUG_LINUX
    120 #define DPRINTF(a) uprintf a
    121 #else
    122 #define DPRINTF(a)
    123 #endif
    124 
    125 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    126 extern struct disklist *x86_alldisks;
    127 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    128     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    129 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    130     const sigset_t *, struct linux_sigcontext *));
    131 static int linux_restore_sigcontext __P((struct lwp *,
    132     struct linux_sigcontext *, register_t *));
    133 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    134 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    135 
    136 extern char linux_sigcode[], linux_rt_sigcode[];
    137 /*
    138  * Deal with some i386-specific things in the Linux emulation code.
    139  */
    140 
    141 void
    142 linux_setregs(l, epp, stack)
    143 	struct lwp *l;
    144 	struct exec_package *epp;
    145 	u_long stack;
    146 {
    147 	struct pcb *pcb = &l->l_addr->u_pcb;
    148 	struct trapframe *tf;
    149 
    150 #if NNPX > 0
    151 	/* If we were using the FPU, forget about it. */
    152 	if (npxproc == l)
    153 		npxdrop();
    154 #endif
    155 
    156 #ifdef USER_LDT
    157 	pmap_ldt_cleanup(l);
    158 #endif
    159 
    160 	l->l_md.md_flags &= ~MDL_USEDFPU;
    161 
    162 	if (i386_use_fxsave) {
    163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    164 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    165 	} else
    166 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    167 
    168 	tf = l->l_md.md_regs;
    169 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    172 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    173 	tf->tf_edi = 0;
    174 	tf->tf_esi = 0;
    175 	tf->tf_ebp = 0;
    176 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    177 	tf->tf_edx = 0;
    178 	tf->tf_ecx = 0;
    179 	tf->tf_eax = 0;
    180 	tf->tf_eip = epp->ep_entry;
    181 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    182 	tf->tf_eflags = PSL_USERSET;
    183 	tf->tf_esp = stack;
    184 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    185 }
    186 
    187 /*
    188  * Send an interrupt to process.
    189  *
    190  * Stack is set up to allow sigcode stored
    191  * in u. to call routine, followed by kcall
    192  * to sigreturn routine below.  After sigreturn
    193  * resets the signal mask, the stack, and the
    194  * frame pointer, it returns to the user
    195  * specified pc, psl.
    196  */
    197 
    198 void
    199 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    200 {
    201 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    202 		linux_rt_sendsig(ksi, mask);
    203 	else
    204 		linux_old_sendsig(ksi, mask);
    205 }
    206 
    207 
    208 static void
    209 linux_save_ucontext(l, tf, mask, sas, uc)
    210 	struct lwp *l;
    211 	struct trapframe *tf;
    212 	const sigset_t *mask;
    213 	struct sigaltstack *sas;
    214 	struct linux_ucontext *uc;
    215 {
    216 	uc->uc_flags = 0;
    217 	uc->uc_link = NULL;
    218 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    219 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    220 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    221 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    222 }
    223 
    224 static void
    225 linux_save_sigcontext(l, tf, mask, sc)
    226 	struct lwp *l;
    227 	struct trapframe *tf;
    228 	const sigset_t *mask;
    229 	struct linux_sigcontext *sc;
    230 {
    231 	/* Save register context. */
    232 #ifdef VM86
    233 	if (tf->tf_eflags & PSL_VM) {
    234 		sc->sc_gs = tf->tf_vm86_gs;
    235 		sc->sc_fs = tf->tf_vm86_fs;
    236 		sc->sc_es = tf->tf_vm86_es;
    237 		sc->sc_ds = tf->tf_vm86_ds;
    238 		sc->sc_eflags = get_vflags(l);
    239 	} else
    240 #endif
    241 	{
    242 		sc->sc_gs = tf->tf_gs;
    243 		sc->sc_fs = tf->tf_fs;
    244 		sc->sc_es = tf->tf_es;
    245 		sc->sc_ds = tf->tf_ds;
    246 		sc->sc_eflags = tf->tf_eflags;
    247 	}
    248 	sc->sc_edi = tf->tf_edi;
    249 	sc->sc_esi = tf->tf_esi;
    250 	sc->sc_esp = tf->tf_esp;
    251 	sc->sc_ebp = tf->tf_ebp;
    252 	sc->sc_ebx = tf->tf_ebx;
    253 	sc->sc_edx = tf->tf_edx;
    254 	sc->sc_ecx = tf->tf_ecx;
    255 	sc->sc_eax = tf->tf_eax;
    256 	sc->sc_eip = tf->tf_eip;
    257 	sc->sc_cs = tf->tf_cs;
    258 	sc->sc_esp_at_signal = tf->tf_esp;
    259 	sc->sc_ss = tf->tf_ss;
    260 	sc->sc_err = tf->tf_err;
    261 	sc->sc_trapno = tf->tf_trapno;
    262 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    263 	sc->sc_387 = NULL;
    264 
    265 	/* Save signal stack. */
    266 	/* Linux doesn't save the onstack flag in sigframe */
    267 
    268 	/* Save signal mask. */
    269 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    270 }
    271 
    272 static void
    273 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    274 {
    275 	struct lwp *l = curlwp;
    276 	struct proc *p = l->l_proc;
    277 	struct trapframe *tf;
    278 	struct linux_rt_sigframe *fp, frame;
    279 	int onstack, error;
    280 	linux_siginfo_t *lsi;
    281 	int sig = ksi->ksi_signo;
    282 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    283 	struct sigaltstack *sas = l->l_sigstk;
    284 
    285 	tf = l->l_md.md_regs;
    286 	/* Do we need to jump onto the signal stack? */
    287 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    288 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    289 
    290 
    291 	/* Allocate space for the signal handler context. */
    292 	if (onstack)
    293 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    294 		    sas->ss_size);
    295 	else
    296 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    297 	fp--;
    298 
    299 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    300 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    301 
    302 	/* Build stack frame for signal trampoline. */
    303 	frame.sf_handler = catcher;
    304 	frame.sf_sig = native_to_linux_signo[sig];
    305 	frame.sf_sip = &fp->sf_si;
    306 	frame.sf_ucp = &fp->sf_uc;
    307 
    308 	/*
    309 	 * XXX: the following code assumes that the constants for
    310 	 * siginfo are the same between linux and NetBSD.
    311 	 */
    312 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    313 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    314 	lsi->lsi_code = ksi->ksi_code;
    315 	switch (lsi->lsi_signo = frame.sf_sig) {
    316 	case LINUX_SIGILL:
    317 	case LINUX_SIGFPE:
    318 	case LINUX_SIGSEGV:
    319 	case LINUX_SIGBUS:
    320 	case LINUX_SIGTRAP:
    321 		lsi->lsi_addr = ksi->ksi_addr;
    322 		break;
    323 	case LINUX_SIGCHLD:
    324 		lsi->lsi_uid = ksi->ksi_uid;
    325 		lsi->lsi_pid = ksi->ksi_pid;
    326 		lsi->lsi_utime = ksi->ksi_utime;
    327 		lsi->lsi_stime = ksi->ksi_stime;
    328 
    329 		/* We use the same codes */
    330 		lsi->lsi_code = ksi->ksi_code;
    331 		/* XXX is that right? */
    332 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    333 		break;
    334 	case LINUX_SIGIO:
    335 		lsi->lsi_band = ksi->ksi_band;
    336 		lsi->lsi_fd = ksi->ksi_fd;
    337 		break;
    338 	default:
    339 		lsi->lsi_uid = ksi->ksi_uid;
    340 		lsi->lsi_pid = ksi->ksi_pid;
    341 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    342 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    343 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    344 		break;
    345 	}
    346 
    347 	/* Save register context. */
    348 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    349 	sendsig_reset(l, sig);
    350 
    351 	mutex_exit(&p->p_smutex);
    352 	error = copyout(&frame, fp, sizeof(frame));
    353 	mutex_enter(&p->p_smutex);
    354 
    355 	if (error != 0) {
    356 		/*
    357 		 * Process has trashed its stack; give it an illegal
    358 		 * instruction to halt it in its tracks.
    359 		 */
    360 		sigexit(l, SIGILL);
    361 		/* NOTREACHED */
    362 	}
    363 
    364 	/*
    365 	 * Build context to run handler in.
    366 	 */
    367 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    368 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    369 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    370 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    371 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    372 	    (linux_rt_sigcode - linux_sigcode);
    373 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    374 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    375 	tf->tf_esp = (int)fp;
    376 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    377 
    378 	/* Remember that we're now on the signal stack. */
    379 	if (onstack)
    380 		sas->ss_flags |= SS_ONSTACK;
    381 }
    382 
    383 static void
    384 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    385 {
    386 	struct lwp *l = curlwp;
    387 	struct proc *p = l->l_proc;
    388 	struct trapframe *tf;
    389 	struct linux_sigframe *fp, frame;
    390 	int onstack, error;
    391 	int sig = ksi->ksi_signo;
    392 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    393 	struct sigaltstack *sas = l->l_sigstk;
    394 
    395 	tf = l->l_md.md_regs;
    396 
    397 	/* Do we need to jump onto the signal stack? */
    398 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    399 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    400 
    401 	/* Allocate space for the signal handler context. */
    402 	if (onstack)
    403 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    404 		    sas->ss_size);
    405 	else
    406 		fp = (struct linux_sigframe *)tf->tf_esp;
    407 	fp--;
    408 
    409 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    410 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    411 
    412 	/* Build stack frame for signal trampoline. */
    413 	frame.sf_handler = catcher;
    414 	frame.sf_sig = native_to_linux_signo[sig];
    415 
    416 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    417 	sendsig_reset(l, sig);
    418 
    419 	mutex_exit(&p->p_smutex);
    420 	error = copyout(&frame, fp, sizeof(frame));
    421 	mutex_enter(&p->p_smutex);
    422 
    423 	if (error != 0) {
    424 		/*
    425 		 * Process has trashed its stack; give it an illegal
    426 		 * instruction to halt it in its tracks.
    427 		 */
    428 		sigexit(l, SIGILL);
    429 		/* NOTREACHED */
    430 	}
    431 
    432 	/*
    433 	 * Build context to run handler in.
    434 	 */
    435 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    436 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    437 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    438 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    439 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    440 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    441 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    442 	tf->tf_esp = (int)fp;
    443 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    444 
    445 	/* Remember that we're now on the signal stack. */
    446 	if (onstack)
    447 		sas->ss_flags |= SS_ONSTACK;
    448 }
    449 
    450 /*
    451  * System call to cleanup state after a signal
    452  * has been taken.  Reset signal mask and
    453  * stack state from context left by sendsig (above).
    454  * Return to previous pc and psl as specified by
    455  * context left by sendsig. Check carefully to
    456  * make sure that the user has not modified the
    457  * psl to gain improper privileges or to cause
    458  * a machine fault.
    459  */
    460 int
    461 linux_sys_rt_sigreturn(l, v, retval)
    462 	struct lwp *l;
    463 	void *v;
    464 	register_t *retval;
    465 {
    466 	struct linux_sys_rt_sigreturn_args /* {
    467 		syscallarg(struct linux_ucontext *) ucp;
    468 	} */ *uap = v;
    469 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    470 	int error;
    471 
    472 	/*
    473 	 * The trampoline code hands us the context.
    474 	 * It is unsafe to keep track of it ourselves, in the event that a
    475 	 * program jumps out of a signal handler.
    476 	 */
    477 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    478 		return error;
    479 
    480 	/* XXX XAX we can do better here by using more of the ucontext */
    481 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    482 }
    483 
    484 int
    485 linux_sys_sigreturn(l, v, retval)
    486 	struct lwp *l;
    487 	void *v;
    488 	register_t *retval;
    489 {
    490 	struct linux_sys_sigreturn_args /* {
    491 		syscallarg(struct linux_sigcontext *) scp;
    492 	} */ *uap = v;
    493 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    494 	int error;
    495 
    496 	/*
    497 	 * The trampoline code hands us the context.
    498 	 * It is unsafe to keep track of it ourselves, in the event that a
    499 	 * program jumps out of a signal handler.
    500 	 */
    501 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    502 		return error;
    503 	return linux_restore_sigcontext(l, &context, retval);
    504 }
    505 
    506 static int
    507 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    508     register_t *retval)
    509 {
    510 	struct proc *p = l->l_proc;
    511 	struct sigaltstack *sas = l->l_sigstk;
    512 	struct trapframe *tf;
    513 	sigset_t mask;
    514 	ssize_t ss_gap;
    515 	/* Restore register context. */
    516 	tf = l->l_md.md_regs;
    517 
    518 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    519 #ifdef VM86
    520 	if (scp->sc_eflags & PSL_VM) {
    521 		void syscall_vm86 __P((struct trapframe *));
    522 
    523 		tf->tf_vm86_gs = scp->sc_gs;
    524 		tf->tf_vm86_fs = scp->sc_fs;
    525 		tf->tf_vm86_es = scp->sc_es;
    526 		tf->tf_vm86_ds = scp->sc_ds;
    527 		set_vflags(l, scp->sc_eflags);
    528 		p->p_md.md_syscall = syscall_vm86;
    529 	} else
    530 #endif
    531 	{
    532 		/*
    533 		 * Check for security violations.  If we're returning to
    534 		 * protected mode, the CPU will validate the segment registers
    535 		 * automatically and generate a trap on violations.  We handle
    536 		 * the trap, rather than doing all of the checking here.
    537 		 */
    538 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    539 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    540 			return EINVAL;
    541 
    542 		tf->tf_gs = scp->sc_gs;
    543 		tf->tf_fs = scp->sc_fs;
    544 		tf->tf_es = scp->sc_es;
    545 		tf->tf_ds = scp->sc_ds;
    546 #ifdef VM86
    547 		if (tf->tf_eflags & PSL_VM)
    548 			(*p->p_emul->e_syscall_intern)(p);
    549 #endif
    550 		tf->tf_eflags = scp->sc_eflags;
    551 	}
    552 	tf->tf_edi = scp->sc_edi;
    553 	tf->tf_esi = scp->sc_esi;
    554 	tf->tf_ebp = scp->sc_ebp;
    555 	tf->tf_ebx = scp->sc_ebx;
    556 	tf->tf_edx = scp->sc_edx;
    557 	tf->tf_ecx = scp->sc_ecx;
    558 	tf->tf_eax = scp->sc_eax;
    559 	tf->tf_eip = scp->sc_eip;
    560 	tf->tf_cs = scp->sc_cs;
    561 	tf->tf_esp = scp->sc_esp_at_signal;
    562 	tf->tf_ss = scp->sc_ss;
    563 
    564 	/* Restore signal stack. */
    565 	/*
    566 	 * Linux really does it this way; it doesn't have space in sigframe
    567 	 * to save the onstack flag.
    568 	 */
    569 	mutex_enter(&p->p_smutex);
    570 	ss_gap = (ssize_t)
    571 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    572 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    573 		sas->ss_flags |= SS_ONSTACK;
    574 	else
    575 		sas->ss_flags &= ~SS_ONSTACK;
    576 
    577 	/* Restore signal mask. */
    578 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    579 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    580 	mutex_exit(&p->p_smutex);
    581 
    582 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    583 	return EJUSTRETURN;
    584 }
    585 
    586 #ifdef USER_LDT
    587 
    588 int
    589 linux_read_ldt(l, uap, retval)
    590 	struct lwp *l;
    591 	struct linux_sys_modify_ldt_args /* {
    592 		syscallarg(int) func;
    593 		syscallarg(void *) ptr;
    594 		syscallarg(size_t) bytecount;
    595 	} */ *uap;
    596 	register_t *retval;
    597 {
    598 	struct proc *p = l->l_proc;
    599 	struct i386_get_ldt_args gl;
    600 	int error;
    601 	caddr_t sg;
    602 	char *parms;
    603 
    604 	DPRINTF(("linux_read_ldt!"));
    605 	sg = stackgap_init(p, 0);
    606 
    607 	gl.start = 0;
    608 	gl.desc = SCARG(uap, ptr);
    609 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    610 
    611 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    612 
    613 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    614 		return (error);
    615 
    616 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    617 		return (error);
    618 
    619 	*retval *= sizeof(union descriptor);
    620 	return (0);
    621 }
    622 
    623 struct linux_ldt_info {
    624 	u_int entry_number;
    625 	u_long base_addr;
    626 	u_int limit;
    627 	u_int seg_32bit:1;
    628 	u_int contents:2;
    629 	u_int read_exec_only:1;
    630 	u_int limit_in_pages:1;
    631 	u_int seg_not_present:1;
    632 	u_int useable:1;
    633 };
    634 
    635 int
    636 linux_write_ldt(l, uap, retval)
    637 	struct lwp *l;
    638 	struct linux_sys_modify_ldt_args /* {
    639 		syscallarg(int) func;
    640 		syscallarg(void *) ptr;
    641 		syscallarg(size_t) bytecount;
    642 	} */ *uap;
    643 	register_t *retval;
    644 {
    645 	struct proc *p = l->l_proc;
    646 	struct linux_ldt_info ldt_info;
    647 	struct segment_descriptor sd;
    648 	struct i386_set_ldt_args sl;
    649 	int error;
    650 	caddr_t sg;
    651 	char *parms;
    652 	int oldmode = (int)retval[0];
    653 
    654 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    655 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    656 		return (EINVAL);
    657 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    658 		return error;
    659 	if (ldt_info.entry_number >= 8192)
    660 		return (EINVAL);
    661 	if (ldt_info.contents == 3) {
    662 		if (oldmode)
    663 			return (EINVAL);
    664 		if (ldt_info.seg_not_present)
    665 			return (EINVAL);
    666 	}
    667 
    668 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    669 	    (oldmode || (ldt_info.contents == 0 &&
    670 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    671 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    672 	    ldt_info.useable == 0))) {
    673 		/* this means you should zero the ldt */
    674 		(void)memset(&sd, 0, sizeof(sd));
    675 	} else {
    676 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    677 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    678 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    679 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    680 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    681 		    (!ldt_info.read_exec_only << 1);
    682 		sd.sd_dpl = SEL_UPL;
    683 		sd.sd_p = !ldt_info.seg_not_present;
    684 		sd.sd_def32 = ldt_info.seg_32bit;
    685 		sd.sd_gran = ldt_info.limit_in_pages;
    686 		if (!oldmode)
    687 			sd.sd_xx = ldt_info.useable;
    688 		else
    689 			sd.sd_xx = 0;
    690 	}
    691 	sg = stackgap_init(p, 0);
    692 	sl.start = ldt_info.entry_number;
    693 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    694 	sl.num = 1;
    695 
    696 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    697 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    698 
    699 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    700 
    701 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    702 		return (error);
    703 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    704 		return (error);
    705 
    706 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    707 		return (error);
    708 
    709 	*retval = 0;
    710 	return (0);
    711 }
    712 
    713 #endif /* USER_LDT */
    714 
    715 int
    716 linux_sys_modify_ldt(struct lwp *l, void *v,
    717     register_t *retval)
    718 {
    719 	struct linux_sys_modify_ldt_args /* {
    720 		syscallarg(int) func;
    721 		syscallarg(void *) ptr;
    722 		syscallarg(size_t) bytecount;
    723 	} */ *uap = v;
    724 
    725 	switch (SCARG(uap, func)) {
    726 #ifdef USER_LDT
    727 	case 0:
    728 		return linux_read_ldt(l, uap, retval);
    729 	case 1:
    730 		retval[0] = 1;
    731 		return linux_write_ldt(l, uap, retval);
    732 	case 2:
    733 #ifdef notyet
    734 		return (linux_read_default_ldt(l, uap, retval);
    735 #else
    736 		return (ENOSYS);
    737 #endif
    738 	case 0x11:
    739 		retval[0] = 0;
    740 		return linux_write_ldt(l, uap, retval);
    741 #endif /* USER_LDT */
    742 
    743 	default:
    744 		return (ENOSYS);
    745 	}
    746 }
    747 
    748 /*
    749  * XXX Pathetic hack to make svgalib work. This will fake the major
    750  * device number of an opened VT so that svgalib likes it. grmbl.
    751  * Should probably do it 'wrong the right way' and use a mapping
    752  * array for all major device numbers, and map linux_mknod too.
    753  */
    754 dev_t
    755 linux_fakedev(dev, raw)
    756 	dev_t dev;
    757 	int raw;
    758 {
    759 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    760 	const struct cdevsw *cd = cdevsw_lookup(dev);
    761 
    762 	if (raw) {
    763 #if (NWSDISPLAY > 0)
    764 		extern const struct cdevsw wsdisplay_cdevsw;
    765 		if (cd == &wsdisplay_cdevsw)
    766 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    767 #endif
    768 	}
    769 
    770 	if (cd == &ptc_cdevsw)
    771 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    772 	if (cd == &pts_cdevsw)
    773 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    774 
    775 	return dev;
    776 }
    777 
    778 #if (NWSDISPLAY > 0)
    779 /*
    780  * That's not complete, but enough to get an X server running.
    781  */
    782 #define NR_KEYS 128
    783 static const u_short plain_map[NR_KEYS] = {
    784 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    785 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    786 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    787 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    788 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    789 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    790 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    791 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    792 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    793 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    794 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    795 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    796 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    797 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    798 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    799 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    800 }, shift_map[NR_KEYS] = {
    801 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    802 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    803 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    804 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    805 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    806 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    807 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    808 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    809 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    810 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    811 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    812 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    813 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    814 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    815 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    816 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    817 }, altgr_map[NR_KEYS] = {
    818 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    819 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    820 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    821 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    822 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    823 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    824 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    825 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    826 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    827 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    828 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    829 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    830 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    831 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    832 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    833 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    834 }, ctrl_map[NR_KEYS] = {
    835 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    836 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    837 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    838 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    839 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    840 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    841 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    842 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    843 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    844 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    845 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    846 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    847 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    848 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    849 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    850 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    851 };
    852 
    853 const u_short * const linux_keytabs[] = {
    854 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    855 };
    856 #endif
    857 
    858 static struct biosdisk_info *
    859 fd2biosinfo(struct proc *p, struct file *fp)
    860 {
    861 	struct vnode *vp;
    862 	const char *blkname;
    863 	char diskname[16];
    864 	int i;
    865 	struct nativedisk_info *nip;
    866 	struct disklist *dl = x86_alldisks;
    867 
    868 	if (fp->f_type != DTYPE_VNODE)
    869 		return NULL;
    870 	vp = (struct vnode *)fp->f_data;
    871 
    872 	if (vp->v_type != VBLK)
    873 		return NULL;
    874 
    875 	blkname = devsw_blk2name(major(vp->v_rdev));
    876 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    877 	    DISKUNIT(vp->v_rdev));
    878 
    879 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    880 		nip = &dl->dl_nativedisks[i];
    881 		if (strcmp(diskname, nip->ni_devname))
    882 			continue;
    883 		if (nip->ni_nmatches != 0)
    884 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    885 	}
    886 
    887 	return NULL;
    888 }
    889 
    890 
    891 /*
    892  * We come here in a last attempt to satisfy a Linux ioctl() call
    893  */
    894 int
    895 linux_machdepioctl(l, v, retval)
    896 	struct lwp *l;
    897 	void *v;
    898 	register_t *retval;
    899 {
    900 	struct linux_sys_ioctl_args /* {
    901 		syscallarg(int) fd;
    902 		syscallarg(u_long) com;
    903 		syscallarg(caddr_t) data;
    904 	} */ *uap = v;
    905 	struct sys_ioctl_args bia;
    906 	u_long com;
    907 	int error, error1;
    908 #if (NWSDISPLAY > 0)
    909 	struct vt_mode lvt;
    910 	caddr_t bvtp, sg;
    911 	struct kbentry kbe;
    912 #endif
    913 	struct linux_hd_geometry hdg;
    914 	struct linux_hd_big_geometry hdg_big;
    915 	struct biosdisk_info *bip;
    916 	struct filedesc *fdp;
    917 	struct file *fp;
    918 	int fd;
    919 	struct disklabel label, *labp;
    920 	struct partinfo partp;
    921 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    922 	u_long start, biostotal, realtotal;
    923 	u_char heads, sectors;
    924 	u_int cylinders;
    925 	struct ioctl_pt pt;
    926 	struct proc *p = l->l_proc;
    927 
    928 	fd = SCARG(uap, fd);
    929 	SCARG(&bia, fd) = fd;
    930 	SCARG(&bia, data) = SCARG(uap, data);
    931 	com = SCARG(uap, com);
    932 
    933 	fdp = p->p_fd;
    934 
    935 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    936 		return (EBADF);
    937 
    938 	FILE_USE(fp);
    939 
    940 	switch (com) {
    941 #if (NWSDISPLAY > 0)
    942 	case LINUX_KDGKBMODE:
    943 		com = KDGKBMODE;
    944 		break;
    945 	case LINUX_KDSKBMODE:
    946 		com = KDSKBMODE;
    947 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    948 			SCARG(&bia, data) = (caddr_t)K_RAW;
    949 		break;
    950 	case LINUX_KIOCSOUND:
    951 		SCARG(&bia, data) =
    952 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    953 		/* fall through */
    954 	case LINUX_KDMKTONE:
    955 		com = KDMKTONE;
    956 		break;
    957 	case LINUX_KDSETMODE:
    958 		com = KDSETMODE;
    959 		break;
    960 	case LINUX_KDGETMODE:
    961 		/* KD_* values are equal to the wscons numbers */
    962 		com = WSDISPLAYIO_GMODE;
    963 		break;
    964 	case LINUX_KDENABIO:
    965 		com = KDENABIO;
    966 		break;
    967 	case LINUX_KDDISABIO:
    968 		com = KDDISABIO;
    969 		break;
    970 	case LINUX_KDGETLED:
    971 		com = KDGETLED;
    972 		break;
    973 	case LINUX_KDSETLED:
    974 		com = KDSETLED;
    975 		break;
    976 	case LINUX_VT_OPENQRY:
    977 		com = VT_OPENQRY;
    978 		break;
    979 	case LINUX_VT_GETMODE:
    980 		SCARG(&bia, com) = VT_GETMODE;
    981 		/* XXX NJWLWP */
    982 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    983 			goto out;
    984 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    985 		    sizeof (struct vt_mode))))
    986 			goto out;
    987 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    988 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    989 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    990 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    991 		    sizeof (struct vt_mode));
    992 		goto out;
    993 	case LINUX_VT_SETMODE:
    994 		com = VT_SETMODE;
    995 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    996 		    sizeof (struct vt_mode))))
    997 			goto out;
    998 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    999 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
   1000 		lvt.frsig = linux_to_native_signo[lvt.frsig];
   1001 		sg = stackgap_init(p, 0);
   1002 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
   1003 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
   1004 			goto out;
   1005 		SCARG(&bia, data) = bvtp;
   1006 		break;
   1007 	case LINUX_VT_DISALLOCATE:
   1008 		/* XXX should use WSDISPLAYIO_DELSCREEN */
   1009 		error = 0;
   1010 		goto out;
   1011 	case LINUX_VT_RELDISP:
   1012 		com = VT_RELDISP;
   1013 		break;
   1014 	case LINUX_VT_ACTIVATE:
   1015 		com = VT_ACTIVATE;
   1016 		break;
   1017 	case LINUX_VT_WAITACTIVE:
   1018 		com = VT_WAITACTIVE;
   1019 		break;
   1020 	case LINUX_VT_GETSTATE:
   1021 		com = VT_GETSTATE;
   1022 		break;
   1023 	case LINUX_KDGKBTYPE:
   1024 	    {
   1025 		static const u_int8_t kb101 = KB_101;
   1026 
   1027 		/* This is what Linux does. */
   1028 		error = copyout(&kb101, SCARG(uap, data), 1);
   1029 		goto out;
   1030 	    }
   1031 	case LINUX_KDGKBENT:
   1032 		/*
   1033 		 * The Linux KDGKBENT ioctl is different from the
   1034 		 * SYSV original. So we handle it in machdep code.
   1035 		 * XXX We should use keyboard mapping information
   1036 		 * from wsdisplay, but this would be expensive.
   1037 		 */
   1038 		if ((error = copyin(SCARG(uap, data), &kbe,
   1039 				    sizeof(struct kbentry))))
   1040 			goto out;
   1041 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1042 		    || kbe.kb_index >= NR_KEYS) {
   1043 			error = EINVAL;
   1044 			goto out;
   1045 		}
   1046 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1047 		error = copyout(&kbe, SCARG(uap, data),
   1048 				sizeof(struct kbentry));
   1049 		goto out;
   1050 #endif
   1051 	case LINUX_HDIO_GETGEO:
   1052 	case LINUX_HDIO_GETGEO_BIG:
   1053 		/*
   1054 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1055 		 * if possible (extending its # of cylinders if it's beyond
   1056 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1057 		 * the real geometry) if not found, by returning an
   1058 		 * error. See common/linux_hdio.c
   1059 		 */
   1060 		bip = fd2biosinfo(p, fp);
   1061 		ioctlf = fp->f_ops->fo_ioctl;
   1062 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
   1063 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
   1064 		if (error != 0 && error1 != 0) {
   1065 			error = error1;
   1066 			goto out;
   1067 		}
   1068 		labp = error != 0 ? &label : partp.disklab;
   1069 		start = error1 != 0 ? partp.part->p_offset : 0;
   1070 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1071 		    && bip->bi_cyl != 0) {
   1072 			heads = bip->bi_head;
   1073 			sectors = bip->bi_sec;
   1074 			cylinders = bip->bi_cyl;
   1075 			biostotal = heads * sectors * cylinders;
   1076 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1077 			    labp->d_ncylinders;
   1078 			if (realtotal > biostotal)
   1079 				cylinders = realtotal / (heads * sectors);
   1080 		} else {
   1081 			heads = labp->d_ntracks;
   1082 			cylinders = labp->d_ncylinders;
   1083 			sectors = labp->d_nsectors;
   1084 		}
   1085 		if (com == LINUX_HDIO_GETGEO) {
   1086 			hdg.start = start;
   1087 			hdg.heads = heads;
   1088 			hdg.cylinders = cylinders;
   1089 			hdg.sectors = sectors;
   1090 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1091 			goto out;
   1092 		} else {
   1093 			hdg_big.start = start;
   1094 			hdg_big.heads = heads;
   1095 			hdg_big.cylinders = cylinders;
   1096 			hdg_big.sectors = sectors;
   1097 			error = copyout(&hdg_big, SCARG(uap, data),
   1098 			    sizeof hdg_big);
   1099 			goto out;
   1100 		}
   1101 
   1102 	default:
   1103 		/*
   1104 		 * Unknown to us. If it's on a device, just pass it through
   1105 		 * using PTIOCLINUX, the device itself might be able to
   1106 		 * make some sense of it.
   1107 		 * XXX hack: if the function returns EJUSTRETURN,
   1108 		 * it has stuffed a sysctl return value in pt.data.
   1109 		 */
   1110 		ioctlf = fp->f_ops->fo_ioctl;
   1111 		pt.com = SCARG(uap, com);
   1112 		pt.data = SCARG(uap, data);
   1113 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
   1114 		if (error == EJUSTRETURN) {
   1115 			retval[0] = (register_t)pt.data;
   1116 			error = 0;
   1117 		}
   1118 
   1119 		if (error == ENOTTY) {
   1120 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1121 			    com));
   1122 		}
   1123 		goto out;
   1124 	}
   1125 	SCARG(&bia, com) = com;
   1126 	/* XXX NJWLWP */
   1127 	error = sys_ioctl(curlwp, &bia, retval);
   1128 out:
   1129 	FILE_UNUSE(fp ,l);
   1130 	return error;
   1131 }
   1132 
   1133 /*
   1134  * Set I/O permissions for a process. Just set the maximum level
   1135  * right away (ignoring the argument), otherwise we would have
   1136  * to rely on I/O permission maps, which are not implemented.
   1137  */
   1138 int
   1139 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1140 {
   1141 #if 0
   1142 	struct linux_sys_iopl_args /* {
   1143 		syscallarg(int) level;
   1144 	} */ *uap = v;
   1145 #endif
   1146 	struct trapframe *fp = l->l_md.md_regs;
   1147 
   1148 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1149 	    NULL, NULL, NULL, NULL) != 0)
   1150 		return EPERM;
   1151 	fp->tf_eflags |= PSL_IOPL;
   1152 	*retval = 0;
   1153 	return 0;
   1154 }
   1155 
   1156 /*
   1157  * See above. If a root process tries to set access to an I/O port,
   1158  * just let it have the whole range.
   1159  */
   1160 int
   1161 linux_sys_ioperm(l, v, retval)
   1162 	struct lwp *l;
   1163 	void *v;
   1164 	register_t *retval;
   1165 {
   1166 	struct linux_sys_ioperm_args /* {
   1167 		syscallarg(unsigned int) lo;
   1168 		syscallarg(unsigned int) hi;
   1169 		syscallarg(int) val;
   1170 	} */ *uap = v;
   1171 	struct trapframe *fp = l->l_md.md_regs;
   1172 
   1173 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1174 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1175 	    NULL, NULL) != 0)
   1176 		return EPERM;
   1177 	if (SCARG(uap, val))
   1178 		fp->tf_eflags |= PSL_IOPL;
   1179 	*retval = 0;
   1180 	return 0;
   1181 }
   1182 
   1183 int
   1184 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1185     void *arg)
   1186 {
   1187 	return 0;
   1188 }
   1189