Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.123
      1 /*	$NetBSD: linux_machdep.c,v 1.123 2007/03/04 06:01:19 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.123 2007/03/04 06:01:19 christos Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/syscallargs.h>
     66 #include <sys/filedesc.h>
     67 #include <sys/exec_elf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/ioctl.h>
     70 #include <sys/wait.h>
     71 #include <sys/kauth.h>
     72 
     73 #include <miscfs/specfs/specdev.h>
     74 
     75 #include <compat/linux/common/linux_types.h>
     76 #include <compat/linux/common/linux_signal.h>
     77 #include <compat/linux/common/linux_util.h>
     78 #include <compat/linux/common/linux_ioctl.h>
     79 #include <compat/linux/common/linux_hdio.h>
     80 #include <compat/linux/common/linux_exec.h>
     81 #include <compat/linux/common/linux_machdep.h>
     82 #include <compat/linux/common/linux_errno.h>
     83 
     84 #include <compat/linux/linux_syscallargs.h>
     85 
     86 #include <machine/cpu.h>
     87 #include <machine/cpufunc.h>
     88 #include <machine/psl.h>
     89 #include <machine/reg.h>
     90 #include <machine/segments.h>
     91 #include <machine/specialreg.h>
     92 #include <machine/sysarch.h>
     93 #include <machine/vm86.h>
     94 #include <machine/vmparam.h>
     95 
     96 /*
     97  * To see whether wscons is configured (for virtual console ioctl calls).
     98  */
     99 #if defined(_KERNEL_OPT)
    100 #include "wsdisplay.h"
    101 #endif
    102 #if (NWSDISPLAY > 0)
    103 #include <dev/wscons/wsconsio.h>
    104 #include <dev/wscons/wsdisplay_usl_io.h>
    105 #if defined(_KERNEL_OPT)
    106 #include "opt_xserver.h"
    107 #endif
    108 #endif
    109 
    110 #ifdef USER_LDT
    111 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    114     register_t *));
    115 #endif
    116 
    117 #ifdef DEBUG_LINUX
    118 #define DPRINTF(a) uprintf a
    119 #else
    120 #define DPRINTF(a)
    121 #endif
    122 
    123 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    124 extern struct disklist *x86_alldisks;
    125 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    126     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    127 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    128     const sigset_t *, struct linux_sigcontext *));
    129 static int linux_restore_sigcontext __P((struct lwp *,
    130     struct linux_sigcontext *, register_t *));
    131 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    132 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    133 
    134 extern char linux_sigcode[], linux_rt_sigcode[];
    135 /*
    136  * Deal with some i386-specific things in the Linux emulation code.
    137  */
    138 
    139 void
    140 linux_setregs(l, epp, stack)
    141 	struct lwp *l;
    142 	struct exec_package *epp;
    143 	u_long stack;
    144 {
    145 	struct pcb *pcb = &l->l_addr->u_pcb;
    146 	struct trapframe *tf;
    147 
    148 #if NNPX > 0
    149 	/* If we were using the FPU, forget about it. */
    150 	if (npxproc == l)
    151 		npxdrop();
    152 #endif
    153 
    154 #ifdef USER_LDT
    155 	pmap_ldt_cleanup(l);
    156 #endif
    157 
    158 	l->l_md.md_flags &= ~MDL_USEDFPU;
    159 
    160 	if (i386_use_fxsave) {
    161 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    162 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    163 	} else
    164 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    165 
    166 	tf = l->l_md.md_regs;
    167 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    168 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_edi = 0;
    172 	tf->tf_esi = 0;
    173 	tf->tf_ebp = 0;
    174 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    175 	tf->tf_edx = 0;
    176 	tf->tf_ecx = 0;
    177 	tf->tf_eax = 0;
    178 	tf->tf_eip = epp->ep_entry;
    179 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    180 	tf->tf_eflags = PSL_USERSET;
    181 	tf->tf_esp = stack;
    182 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    183 }
    184 
    185 /*
    186  * Send an interrupt to process.
    187  *
    188  * Stack is set up to allow sigcode stored
    189  * in u. to call routine, followed by kcall
    190  * to sigreturn routine below.  After sigreturn
    191  * resets the signal mask, the stack, and the
    192  * frame pointer, it returns to the user
    193  * specified pc, psl.
    194  */
    195 
    196 void
    197 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    198 {
    199 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    200 		linux_rt_sendsig(ksi, mask);
    201 	else
    202 		linux_old_sendsig(ksi, mask);
    203 }
    204 
    205 
    206 static void
    207 linux_save_ucontext(l, tf, mask, sas, uc)
    208 	struct lwp *l;
    209 	struct trapframe *tf;
    210 	const sigset_t *mask;
    211 	struct sigaltstack *sas;
    212 	struct linux_ucontext *uc;
    213 {
    214 	uc->uc_flags = 0;
    215 	uc->uc_link = NULL;
    216 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    217 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    218 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    219 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    220 }
    221 
    222 static void
    223 linux_save_sigcontext(l, tf, mask, sc)
    224 	struct lwp *l;
    225 	struct trapframe *tf;
    226 	const sigset_t *mask;
    227 	struct linux_sigcontext *sc;
    228 {
    229 	/* Save register context. */
    230 #ifdef VM86
    231 	if (tf->tf_eflags & PSL_VM) {
    232 		sc->sc_gs = tf->tf_vm86_gs;
    233 		sc->sc_fs = tf->tf_vm86_fs;
    234 		sc->sc_es = tf->tf_vm86_es;
    235 		sc->sc_ds = tf->tf_vm86_ds;
    236 		sc->sc_eflags = get_vflags(l);
    237 	} else
    238 #endif
    239 	{
    240 		sc->sc_gs = tf->tf_gs;
    241 		sc->sc_fs = tf->tf_fs;
    242 		sc->sc_es = tf->tf_es;
    243 		sc->sc_ds = tf->tf_ds;
    244 		sc->sc_eflags = tf->tf_eflags;
    245 	}
    246 	sc->sc_edi = tf->tf_edi;
    247 	sc->sc_esi = tf->tf_esi;
    248 	sc->sc_esp = tf->tf_esp;
    249 	sc->sc_ebp = tf->tf_ebp;
    250 	sc->sc_ebx = tf->tf_ebx;
    251 	sc->sc_edx = tf->tf_edx;
    252 	sc->sc_ecx = tf->tf_ecx;
    253 	sc->sc_eax = tf->tf_eax;
    254 	sc->sc_eip = tf->tf_eip;
    255 	sc->sc_cs = tf->tf_cs;
    256 	sc->sc_esp_at_signal = tf->tf_esp;
    257 	sc->sc_ss = tf->tf_ss;
    258 	sc->sc_err = tf->tf_err;
    259 	sc->sc_trapno = tf->tf_trapno;
    260 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    261 	sc->sc_387 = NULL;
    262 
    263 	/* Save signal stack. */
    264 	/* Linux doesn't save the onstack flag in sigframe */
    265 
    266 	/* Save signal mask. */
    267 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    268 }
    269 
    270 static void
    271 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    272 {
    273 	struct lwp *l = curlwp;
    274 	struct proc *p = l->l_proc;
    275 	struct trapframe *tf;
    276 	struct linux_rt_sigframe *fp, frame;
    277 	int onstack, error;
    278 	linux_siginfo_t *lsi;
    279 	int sig = ksi->ksi_signo;
    280 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    281 	struct sigaltstack *sas = &l->l_sigstk;
    282 
    283 	tf = l->l_md.md_regs;
    284 	/* Do we need to jump onto the signal stack? */
    285 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    286 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    287 
    288 
    289 	/* Allocate space for the signal handler context. */
    290 	if (onstack)
    291 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
    292 		    sas->ss_size);
    293 	else
    294 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    295 	fp--;
    296 
    297 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    298 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    299 
    300 	/* Build stack frame for signal trampoline. */
    301 	frame.sf_handler = catcher;
    302 	frame.sf_sig = native_to_linux_signo[sig];
    303 	frame.sf_sip = &fp->sf_si;
    304 	frame.sf_ucp = &fp->sf_uc;
    305 
    306 	/*
    307 	 * XXX: the following code assumes that the constants for
    308 	 * siginfo are the same between linux and NetBSD.
    309 	 */
    310 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    311 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    312 	lsi->lsi_code = ksi->ksi_code;
    313 	switch (lsi->lsi_signo = frame.sf_sig) {
    314 	case LINUX_SIGILL:
    315 	case LINUX_SIGFPE:
    316 	case LINUX_SIGSEGV:
    317 	case LINUX_SIGBUS:
    318 	case LINUX_SIGTRAP:
    319 		lsi->lsi_addr = ksi->ksi_addr;
    320 		break;
    321 	case LINUX_SIGCHLD:
    322 		lsi->lsi_uid = ksi->ksi_uid;
    323 		lsi->lsi_pid = ksi->ksi_pid;
    324 		lsi->lsi_utime = ksi->ksi_utime;
    325 		lsi->lsi_stime = ksi->ksi_stime;
    326 
    327 		/* We use the same codes */
    328 		lsi->lsi_code = ksi->ksi_code;
    329 		/* XXX is that right? */
    330 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    331 		break;
    332 	case LINUX_SIGIO:
    333 		lsi->lsi_band = ksi->ksi_band;
    334 		lsi->lsi_fd = ksi->ksi_fd;
    335 		break;
    336 	default:
    337 		lsi->lsi_uid = ksi->ksi_uid;
    338 		lsi->lsi_pid = ksi->ksi_pid;
    339 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    340 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    341 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    342 		break;
    343 	}
    344 
    345 	/* Save register context. */
    346 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    347 	sendsig_reset(l, sig);
    348 
    349 	mutex_exit(&p->p_smutex);
    350 	error = copyout(&frame, fp, sizeof(frame));
    351 	mutex_enter(&p->p_smutex);
    352 
    353 	if (error != 0) {
    354 		/*
    355 		 * Process has trashed its stack; give it an illegal
    356 		 * instruction to halt it in its tracks.
    357 		 */
    358 		sigexit(l, SIGILL);
    359 		/* NOTREACHED */
    360 	}
    361 
    362 	/*
    363 	 * Build context to run handler in.
    364 	 */
    365 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    366 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    367 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    368 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    369 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    370 	    (linux_rt_sigcode - linux_sigcode);
    371 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    372 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    373 	tf->tf_esp = (int)fp;
    374 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    375 
    376 	/* Remember that we're now on the signal stack. */
    377 	if (onstack)
    378 		sas->ss_flags |= SS_ONSTACK;
    379 }
    380 
    381 static void
    382 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    383 {
    384 	struct lwp *l = curlwp;
    385 	struct proc *p = l->l_proc;
    386 	struct trapframe *tf;
    387 	struct linux_sigframe *fp, frame;
    388 	int onstack, error;
    389 	int sig = ksi->ksi_signo;
    390 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    391 	struct sigaltstack *sas = &l->l_sigstk;
    392 
    393 	tf = l->l_md.md_regs;
    394 
    395 	/* Do we need to jump onto the signal stack? */
    396 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    397 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    398 
    399 	/* Allocate space for the signal handler context. */
    400 	if (onstack)
    401 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
    402 		    sas->ss_size);
    403 	else
    404 		fp = (struct linux_sigframe *)tf->tf_esp;
    405 	fp--;
    406 
    407 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    408 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    409 
    410 	/* Build stack frame for signal trampoline. */
    411 	frame.sf_handler = catcher;
    412 	frame.sf_sig = native_to_linux_signo[sig];
    413 
    414 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    415 	sendsig_reset(l, sig);
    416 
    417 	mutex_exit(&p->p_smutex);
    418 	error = copyout(&frame, fp, sizeof(frame));
    419 	mutex_enter(&p->p_smutex);
    420 
    421 	if (error != 0) {
    422 		/*
    423 		 * Process has trashed its stack; give it an illegal
    424 		 * instruction to halt it in its tracks.
    425 		 */
    426 		sigexit(l, SIGILL);
    427 		/* NOTREACHED */
    428 	}
    429 
    430 	/*
    431 	 * Build context to run handler in.
    432 	 */
    433 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    434 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    435 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    436 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    437 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    438 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    439 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    440 	tf->tf_esp = (int)fp;
    441 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    442 
    443 	/* Remember that we're now on the signal stack. */
    444 	if (onstack)
    445 		sas->ss_flags |= SS_ONSTACK;
    446 }
    447 
    448 /*
    449  * System call to cleanup state after a signal
    450  * has been taken.  Reset signal mask and
    451  * stack state from context left by sendsig (above).
    452  * Return to previous pc and psl as specified by
    453  * context left by sendsig. Check carefully to
    454  * make sure that the user has not modified the
    455  * psl to gain improper privileges or to cause
    456  * a machine fault.
    457  */
    458 int
    459 linux_sys_rt_sigreturn(l, v, retval)
    460 	struct lwp *l;
    461 	void *v;
    462 	register_t *retval;
    463 {
    464 	struct linux_sys_rt_sigreturn_args /* {
    465 		syscallarg(struct linux_ucontext *) ucp;
    466 	} */ *uap = v;
    467 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    468 	int error;
    469 
    470 	/*
    471 	 * The trampoline code hands us the context.
    472 	 * It is unsafe to keep track of it ourselves, in the event that a
    473 	 * program jumps out of a signal handler.
    474 	 */
    475 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    476 		return error;
    477 
    478 	/* XXX XAX we can do better here by using more of the ucontext */
    479 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    480 }
    481 
    482 int
    483 linux_sys_sigreturn(l, v, retval)
    484 	struct lwp *l;
    485 	void *v;
    486 	register_t *retval;
    487 {
    488 	struct linux_sys_sigreturn_args /* {
    489 		syscallarg(struct linux_sigcontext *) scp;
    490 	} */ *uap = v;
    491 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    492 	int error;
    493 
    494 	/*
    495 	 * The trampoline code hands us the context.
    496 	 * It is unsafe to keep track of it ourselves, in the event that a
    497 	 * program jumps out of a signal handler.
    498 	 */
    499 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
    500 		return error;
    501 	return linux_restore_sigcontext(l, &context, retval);
    502 }
    503 
    504 static int
    505 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    506     register_t *retval)
    507 {
    508 	struct proc *p = l->l_proc;
    509 	struct sigaltstack *sas = &l->l_sigstk;
    510 	struct trapframe *tf;
    511 	sigset_t mask;
    512 	ssize_t ss_gap;
    513 	/* Restore register context. */
    514 	tf = l->l_md.md_regs;
    515 
    516 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    517 #ifdef VM86
    518 	if (scp->sc_eflags & PSL_VM) {
    519 		void syscall_vm86 __P((struct trapframe *));
    520 
    521 		tf->tf_vm86_gs = scp->sc_gs;
    522 		tf->tf_vm86_fs = scp->sc_fs;
    523 		tf->tf_vm86_es = scp->sc_es;
    524 		tf->tf_vm86_ds = scp->sc_ds;
    525 		set_vflags(l, scp->sc_eflags);
    526 		p->p_md.md_syscall = syscall_vm86;
    527 	} else
    528 #endif
    529 	{
    530 		/*
    531 		 * Check for security violations.  If we're returning to
    532 		 * protected mode, the CPU will validate the segment registers
    533 		 * automatically and generate a trap on violations.  We handle
    534 		 * the trap, rather than doing all of the checking here.
    535 		 */
    536 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    537 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    538 			return EINVAL;
    539 
    540 		tf->tf_gs = scp->sc_gs;
    541 		tf->tf_fs = scp->sc_fs;
    542 		tf->tf_es = scp->sc_es;
    543 		tf->tf_ds = scp->sc_ds;
    544 #ifdef VM86
    545 		if (tf->tf_eflags & PSL_VM)
    546 			(*p->p_emul->e_syscall_intern)(p);
    547 #endif
    548 		tf->tf_eflags = scp->sc_eflags;
    549 	}
    550 	tf->tf_edi = scp->sc_edi;
    551 	tf->tf_esi = scp->sc_esi;
    552 	tf->tf_ebp = scp->sc_ebp;
    553 	tf->tf_ebx = scp->sc_ebx;
    554 	tf->tf_edx = scp->sc_edx;
    555 	tf->tf_ecx = scp->sc_ecx;
    556 	tf->tf_eax = scp->sc_eax;
    557 	tf->tf_eip = scp->sc_eip;
    558 	tf->tf_cs = scp->sc_cs;
    559 	tf->tf_esp = scp->sc_esp_at_signal;
    560 	tf->tf_ss = scp->sc_ss;
    561 
    562 	/* Restore signal stack. */
    563 	/*
    564 	 * Linux really does it this way; it doesn't have space in sigframe
    565 	 * to save the onstack flag.
    566 	 */
    567 	mutex_enter(&p->p_smutex);
    568 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
    569 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    570 		sas->ss_flags |= SS_ONSTACK;
    571 	else
    572 		sas->ss_flags &= ~SS_ONSTACK;
    573 
    574 	/* Restore signal mask. */
    575 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    576 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    577 	mutex_exit(&p->p_smutex);
    578 
    579 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    580 	return EJUSTRETURN;
    581 }
    582 
    583 #ifdef USER_LDT
    584 
    585 int
    586 linux_read_ldt(l, uap, retval)
    587 	struct lwp *l;
    588 	struct linux_sys_modify_ldt_args /* {
    589 		syscallarg(int) func;
    590 		syscallarg(void *) ptr;
    591 		syscallarg(size_t) bytecount;
    592 	} */ *uap;
    593 	register_t *retval;
    594 {
    595 	struct proc *p = l->l_proc;
    596 	struct i386_get_ldt_args gl;
    597 	int error;
    598 	void *sg;
    599 	char *parms;
    600 
    601 	DPRINTF(("linux_read_ldt!"));
    602 	sg = stackgap_init(p, 0);
    603 
    604 	gl.start = 0;
    605 	gl.desc = SCARG(uap, ptr);
    606 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    607 
    608 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    609 
    610 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    611 		return (error);
    612 
    613 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    614 		return (error);
    615 
    616 	*retval *= sizeof(union descriptor);
    617 	return (0);
    618 }
    619 
    620 struct linux_ldt_info {
    621 	u_int entry_number;
    622 	u_long base_addr;
    623 	u_int limit;
    624 	u_int seg_32bit:1;
    625 	u_int contents:2;
    626 	u_int read_exec_only:1;
    627 	u_int limit_in_pages:1;
    628 	u_int seg_not_present:1;
    629 	u_int useable:1;
    630 };
    631 
    632 int
    633 linux_write_ldt(l, uap, retval)
    634 	struct lwp *l;
    635 	struct linux_sys_modify_ldt_args /* {
    636 		syscallarg(int) func;
    637 		syscallarg(void *) ptr;
    638 		syscallarg(size_t) bytecount;
    639 	} */ *uap;
    640 	register_t *retval;
    641 {
    642 	struct proc *p = l->l_proc;
    643 	struct linux_ldt_info ldt_info;
    644 	struct segment_descriptor sd;
    645 	struct i386_set_ldt_args sl;
    646 	int error;
    647 	void *sg;
    648 	char *parms;
    649 	int oldmode = (int)retval[0];
    650 
    651 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    652 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    653 		return (EINVAL);
    654 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    655 		return error;
    656 	if (ldt_info.entry_number >= 8192)
    657 		return (EINVAL);
    658 	if (ldt_info.contents == 3) {
    659 		if (oldmode)
    660 			return (EINVAL);
    661 		if (ldt_info.seg_not_present)
    662 			return (EINVAL);
    663 	}
    664 
    665 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    666 	    (oldmode || (ldt_info.contents == 0 &&
    667 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    668 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    669 	    ldt_info.useable == 0))) {
    670 		/* this means you should zero the ldt */
    671 		(void)memset(&sd, 0, sizeof(sd));
    672 	} else {
    673 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    674 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    675 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    676 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    677 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    678 		    (!ldt_info.read_exec_only << 1);
    679 		sd.sd_dpl = SEL_UPL;
    680 		sd.sd_p = !ldt_info.seg_not_present;
    681 		sd.sd_def32 = ldt_info.seg_32bit;
    682 		sd.sd_gran = ldt_info.limit_in_pages;
    683 		if (!oldmode)
    684 			sd.sd_xx = ldt_info.useable;
    685 		else
    686 			sd.sd_xx = 0;
    687 	}
    688 	sg = stackgap_init(p, 0);
    689 	sl.start = ldt_info.entry_number;
    690 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    691 	sl.num = 1;
    692 
    693 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    694 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    695 
    696 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    697 
    698 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    699 		return (error);
    700 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    701 		return (error);
    702 
    703 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    704 		return (error);
    705 
    706 	*retval = 0;
    707 	return (0);
    708 }
    709 
    710 #endif /* USER_LDT */
    711 
    712 int
    713 linux_sys_modify_ldt(struct lwp *l, void *v,
    714     register_t *retval)
    715 {
    716 	struct linux_sys_modify_ldt_args /* {
    717 		syscallarg(int) func;
    718 		syscallarg(void *) ptr;
    719 		syscallarg(size_t) bytecount;
    720 	} */ *uap = v;
    721 
    722 	switch (SCARG(uap, func)) {
    723 #ifdef USER_LDT
    724 	case 0:
    725 		return linux_read_ldt(l, uap, retval);
    726 	case 1:
    727 		retval[0] = 1;
    728 		return linux_write_ldt(l, uap, retval);
    729 	case 2:
    730 #ifdef notyet
    731 		return (linux_read_default_ldt(l, uap, retval);
    732 #else
    733 		return (ENOSYS);
    734 #endif
    735 	case 0x11:
    736 		retval[0] = 0;
    737 		return linux_write_ldt(l, uap, retval);
    738 #endif /* USER_LDT */
    739 
    740 	default:
    741 		return (ENOSYS);
    742 	}
    743 }
    744 
    745 /*
    746  * XXX Pathetic hack to make svgalib work. This will fake the major
    747  * device number of an opened VT so that svgalib likes it. grmbl.
    748  * Should probably do it 'wrong the right way' and use a mapping
    749  * array for all major device numbers, and map linux_mknod too.
    750  */
    751 dev_t
    752 linux_fakedev(dev, raw)
    753 	dev_t dev;
    754 	int raw;
    755 {
    756 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    757 	const struct cdevsw *cd = cdevsw_lookup(dev);
    758 
    759 	if (raw) {
    760 #if (NWSDISPLAY > 0)
    761 		extern const struct cdevsw wsdisplay_cdevsw;
    762 		if (cd == &wsdisplay_cdevsw)
    763 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    764 #endif
    765 	}
    766 
    767 	if (cd == &ptc_cdevsw)
    768 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    769 	if (cd == &pts_cdevsw)
    770 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    771 
    772 	return dev;
    773 }
    774 
    775 #if (NWSDISPLAY > 0)
    776 /*
    777  * That's not complete, but enough to get an X server running.
    778  */
    779 #define NR_KEYS 128
    780 static const u_short plain_map[NR_KEYS] = {
    781 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    782 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    783 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    784 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    785 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    786 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    787 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    788 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    789 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    790 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    791 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    792 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    793 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    794 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    795 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    796 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    797 }, shift_map[NR_KEYS] = {
    798 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    799 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    800 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    801 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    802 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    803 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    804 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    805 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    806 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    807 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    808 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    809 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    810 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    811 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    812 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    813 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    814 }, altgr_map[NR_KEYS] = {
    815 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    816 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    817 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    818 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    819 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    820 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    821 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    822 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    823 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    824 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    825 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    826 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    827 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    828 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    829 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    830 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    831 }, ctrl_map[NR_KEYS] = {
    832 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    833 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    834 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    835 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    836 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    837 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    838 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    839 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    840 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    841 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    842 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    843 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    844 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    845 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    846 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    847 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    848 };
    849 
    850 const u_short * const linux_keytabs[] = {
    851 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    852 };
    853 #endif
    854 
    855 static struct biosdisk_info *
    856 fd2biosinfo(struct proc *p, struct file *fp)
    857 {
    858 	struct vnode *vp;
    859 	const char *blkname;
    860 	char diskname[16];
    861 	int i;
    862 	struct nativedisk_info *nip;
    863 	struct disklist *dl = x86_alldisks;
    864 
    865 	if (fp->f_type != DTYPE_VNODE)
    866 		return NULL;
    867 	vp = (struct vnode *)fp->f_data;
    868 
    869 	if (vp->v_type != VBLK)
    870 		return NULL;
    871 
    872 	blkname = devsw_blk2name(major(vp->v_rdev));
    873 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    874 	    DISKUNIT(vp->v_rdev));
    875 
    876 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    877 		nip = &dl->dl_nativedisks[i];
    878 		if (strcmp(diskname, nip->ni_devname))
    879 			continue;
    880 		if (nip->ni_nmatches != 0)
    881 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    882 	}
    883 
    884 	return NULL;
    885 }
    886 
    887 
    888 /*
    889  * We come here in a last attempt to satisfy a Linux ioctl() call
    890  */
    891 int
    892 linux_machdepioctl(l, v, retval)
    893 	struct lwp *l;
    894 	void *v;
    895 	register_t *retval;
    896 {
    897 	struct linux_sys_ioctl_args /* {
    898 		syscallarg(int) fd;
    899 		syscallarg(u_long) com;
    900 		syscallarg(void *) data;
    901 	} */ *uap = v;
    902 	struct sys_ioctl_args bia;
    903 	u_long com;
    904 	int error, error1;
    905 #if (NWSDISPLAY > 0)
    906 	struct vt_mode lvt;
    907 	void *bvtp, *sg;
    908 	struct kbentry kbe;
    909 #endif
    910 	struct linux_hd_geometry hdg;
    911 	struct linux_hd_big_geometry hdg_big;
    912 	struct biosdisk_info *bip;
    913 	struct filedesc *fdp;
    914 	struct file *fp;
    915 	int fd;
    916 	struct disklabel label, *labp;
    917 	struct partinfo partp;
    918 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    919 	u_long start, biostotal, realtotal;
    920 	u_char heads, sectors;
    921 	u_int cylinders;
    922 	struct ioctl_pt pt;
    923 	struct proc *p = l->l_proc;
    924 
    925 	fd = SCARG(uap, fd);
    926 	SCARG(&bia, fd) = fd;
    927 	SCARG(&bia, data) = SCARG(uap, data);
    928 	com = SCARG(uap, com);
    929 
    930 	fdp = p->p_fd;
    931 
    932 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    933 		return (EBADF);
    934 
    935 	FILE_USE(fp);
    936 
    937 	switch (com) {
    938 #if (NWSDISPLAY > 0)
    939 	case LINUX_KDGKBMODE:
    940 		com = KDGKBMODE;
    941 		break;
    942 	case LINUX_KDSKBMODE:
    943 		com = KDSKBMODE;
    944 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    945 			SCARG(&bia, data) = (void *)K_RAW;
    946 		break;
    947 	case LINUX_KIOCSOUND:
    948 		SCARG(&bia, data) =
    949 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    950 		/* fall through */
    951 	case LINUX_KDMKTONE:
    952 		com = KDMKTONE;
    953 		break;
    954 	case LINUX_KDSETMODE:
    955 		com = KDSETMODE;
    956 		break;
    957 	case LINUX_KDGETMODE:
    958 		/* KD_* values are equal to the wscons numbers */
    959 		com = WSDISPLAYIO_GMODE;
    960 		break;
    961 	case LINUX_KDENABIO:
    962 		com = KDENABIO;
    963 		break;
    964 	case LINUX_KDDISABIO:
    965 		com = KDDISABIO;
    966 		break;
    967 	case LINUX_KDGETLED:
    968 		com = KDGETLED;
    969 		break;
    970 	case LINUX_KDSETLED:
    971 		com = KDSETLED;
    972 		break;
    973 	case LINUX_VT_OPENQRY:
    974 		com = VT_OPENQRY;
    975 		break;
    976 	case LINUX_VT_GETMODE:
    977 		SCARG(&bia, com) = VT_GETMODE;
    978 		/* XXX NJWLWP */
    979 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    980 			goto out;
    981 		if ((error = copyin(SCARG(uap, data), (void *)&lvt,
    982 		    sizeof (struct vt_mode))))
    983 			goto out;
    984 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    985 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    986 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    987 		error = copyout((void *)&lvt, SCARG(uap, data),
    988 		    sizeof (struct vt_mode));
    989 		goto out;
    990 	case LINUX_VT_SETMODE:
    991 		com = VT_SETMODE;
    992 		if ((error = copyin(SCARG(uap, data), (void *)&lvt,
    993 		    sizeof (struct vt_mode))))
    994 			goto out;
    995 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    996 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    997 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    998 		sg = stackgap_init(p, 0);
    999 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
   1000 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
   1001 			goto out;
   1002 		SCARG(&bia, data) = bvtp;
   1003 		break;
   1004 	case LINUX_VT_DISALLOCATE:
   1005 		/* XXX should use WSDISPLAYIO_DELSCREEN */
   1006 		error = 0;
   1007 		goto out;
   1008 	case LINUX_VT_RELDISP:
   1009 		com = VT_RELDISP;
   1010 		break;
   1011 	case LINUX_VT_ACTIVATE:
   1012 		com = VT_ACTIVATE;
   1013 		break;
   1014 	case LINUX_VT_WAITACTIVE:
   1015 		com = VT_WAITACTIVE;
   1016 		break;
   1017 	case LINUX_VT_GETSTATE:
   1018 		com = VT_GETSTATE;
   1019 		break;
   1020 	case LINUX_KDGKBTYPE:
   1021 	    {
   1022 		static const u_int8_t kb101 = KB_101;
   1023 
   1024 		/* This is what Linux does. */
   1025 		error = copyout(&kb101, SCARG(uap, data), 1);
   1026 		goto out;
   1027 	    }
   1028 	case LINUX_KDGKBENT:
   1029 		/*
   1030 		 * The Linux KDGKBENT ioctl is different from the
   1031 		 * SYSV original. So we handle it in machdep code.
   1032 		 * XXX We should use keyboard mapping information
   1033 		 * from wsdisplay, but this would be expensive.
   1034 		 */
   1035 		if ((error = copyin(SCARG(uap, data), &kbe,
   1036 				    sizeof(struct kbentry))))
   1037 			goto out;
   1038 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1039 		    || kbe.kb_index >= NR_KEYS) {
   1040 			error = EINVAL;
   1041 			goto out;
   1042 		}
   1043 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1044 		error = copyout(&kbe, SCARG(uap, data),
   1045 				sizeof(struct kbentry));
   1046 		goto out;
   1047 #endif
   1048 	case LINUX_HDIO_GETGEO:
   1049 	case LINUX_HDIO_GETGEO_BIG:
   1050 		/*
   1051 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1052 		 * if possible (extending its # of cylinders if it's beyond
   1053 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1054 		 * the real geometry) if not found, by returning an
   1055 		 * error. See common/linux_hdio.c
   1056 		 */
   1057 		bip = fd2biosinfo(p, fp);
   1058 		ioctlf = fp->f_ops->fo_ioctl;
   1059 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label, l);
   1060 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp, l);
   1061 		if (error != 0 && error1 != 0) {
   1062 			error = error1;
   1063 			goto out;
   1064 		}
   1065 		labp = error != 0 ? &label : partp.disklab;
   1066 		start = error1 != 0 ? partp.part->p_offset : 0;
   1067 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1068 		    && bip->bi_cyl != 0) {
   1069 			heads = bip->bi_head;
   1070 			sectors = bip->bi_sec;
   1071 			cylinders = bip->bi_cyl;
   1072 			biostotal = heads * sectors * cylinders;
   1073 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1074 			    labp->d_ncylinders;
   1075 			if (realtotal > biostotal)
   1076 				cylinders = realtotal / (heads * sectors);
   1077 		} else {
   1078 			heads = labp->d_ntracks;
   1079 			cylinders = labp->d_ncylinders;
   1080 			sectors = labp->d_nsectors;
   1081 		}
   1082 		if (com == LINUX_HDIO_GETGEO) {
   1083 			hdg.start = start;
   1084 			hdg.heads = heads;
   1085 			hdg.cylinders = cylinders;
   1086 			hdg.sectors = sectors;
   1087 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1088 			goto out;
   1089 		} else {
   1090 			hdg_big.start = start;
   1091 			hdg_big.heads = heads;
   1092 			hdg_big.cylinders = cylinders;
   1093 			hdg_big.sectors = sectors;
   1094 			error = copyout(&hdg_big, SCARG(uap, data),
   1095 			    sizeof hdg_big);
   1096 			goto out;
   1097 		}
   1098 
   1099 	default:
   1100 		/*
   1101 		 * Unknown to us. If it's on a device, just pass it through
   1102 		 * using PTIOCLINUX, the device itself might be able to
   1103 		 * make some sense of it.
   1104 		 * XXX hack: if the function returns EJUSTRETURN,
   1105 		 * it has stuffed a sysctl return value in pt.data.
   1106 		 */
   1107 		ioctlf = fp->f_ops->fo_ioctl;
   1108 		pt.com = SCARG(uap, com);
   1109 		pt.data = SCARG(uap, data);
   1110 		error = ioctlf(fp, PTIOCLINUX, (void *)&pt, l);
   1111 		if (error == EJUSTRETURN) {
   1112 			retval[0] = (register_t)pt.data;
   1113 			error = 0;
   1114 		}
   1115 
   1116 		if (error == ENOTTY) {
   1117 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1118 			    com));
   1119 		}
   1120 		goto out;
   1121 	}
   1122 	SCARG(&bia, com) = com;
   1123 	/* XXX NJWLWP */
   1124 	error = sys_ioctl(curlwp, &bia, retval);
   1125 out:
   1126 	FILE_UNUSE(fp ,l);
   1127 	return error;
   1128 }
   1129 
   1130 /*
   1131  * Set I/O permissions for a process. Just set the maximum level
   1132  * right away (ignoring the argument), otherwise we would have
   1133  * to rely on I/O permission maps, which are not implemented.
   1134  */
   1135 int
   1136 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1137 {
   1138 #if 0
   1139 	struct linux_sys_iopl_args /* {
   1140 		syscallarg(int) level;
   1141 	} */ *uap = v;
   1142 #endif
   1143 	struct trapframe *fp = l->l_md.md_regs;
   1144 
   1145 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1146 	    NULL, NULL, NULL, NULL) != 0)
   1147 		return EPERM;
   1148 	fp->tf_eflags |= PSL_IOPL;
   1149 	*retval = 0;
   1150 	return 0;
   1151 }
   1152 
   1153 /*
   1154  * See above. If a root process tries to set access to an I/O port,
   1155  * just let it have the whole range.
   1156  */
   1157 int
   1158 linux_sys_ioperm(l, v, retval)
   1159 	struct lwp *l;
   1160 	void *v;
   1161 	register_t *retval;
   1162 {
   1163 	struct linux_sys_ioperm_args /* {
   1164 		syscallarg(unsigned int) lo;
   1165 		syscallarg(unsigned int) hi;
   1166 		syscallarg(int) val;
   1167 	} */ *uap = v;
   1168 	struct trapframe *fp = l->l_md.md_regs;
   1169 
   1170 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1171 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1172 	    NULL, NULL) != 0)
   1173 		return EPERM;
   1174 	if (SCARG(uap, val))
   1175 		fp->tf_eflags |= PSL_IOPL;
   1176 	*retval = 0;
   1177 	return 0;
   1178 }
   1179 
   1180 int
   1181 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1182     void *arg)
   1183 {
   1184 	return 0;
   1185 }
   1186