Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.121
      1 /*	$NetBSD: linux_machdep.c,v 1.121 2007/01/29 01:52:44 hubertf Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.121 2007/01/29 01:52:44 hubertf Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <sys/wait.h>
     72 #include <sys/kauth.h>
     73 
     74 #include <miscfs/specfs/specdev.h>
     75 
     76 #include <compat/linux/common/linux_types.h>
     77 #include <compat/linux/common/linux_signal.h>
     78 #include <compat/linux/common/linux_util.h>
     79 #include <compat/linux/common/linux_ioctl.h>
     80 #include <compat/linux/common/linux_hdio.h>
     81 #include <compat/linux/common/linux_exec.h>
     82 #include <compat/linux/common/linux_machdep.h>
     83 #include <compat/linux/common/linux_errno.h>
     84 
     85 #include <compat/linux/linux_syscallargs.h>
     86 
     87 #include <machine/cpu.h>
     88 #include <machine/cpufunc.h>
     89 #include <machine/psl.h>
     90 #include <machine/reg.h>
     91 #include <machine/segments.h>
     92 #include <machine/specialreg.h>
     93 #include <machine/sysarch.h>
     94 #include <machine/vm86.h>
     95 #include <machine/vmparam.h>
     96 
     97 /*
     98  * To see whether wscons is configured (for virtual console ioctl calls).
     99  */
    100 #if defined(_KERNEL_OPT)
    101 #include "wsdisplay.h"
    102 #endif
    103 #if (NWSDISPLAY > 0)
    104 #include <dev/wscons/wsconsio.h>
    105 #include <dev/wscons/wsdisplay_usl_io.h>
    106 #if defined(_KERNEL_OPT)
    107 #include "opt_xserver.h"
    108 #endif
    109 #endif
    110 
    111 #ifdef USER_LDT
    112 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    113     register_t *));
    114 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    115     register_t *));
    116 #endif
    117 
    118 #ifdef DEBUG_LINUX
    119 #define DPRINTF(a) uprintf a
    120 #else
    121 #define DPRINTF(a)
    122 #endif
    123 
    124 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    125 extern struct disklist *x86_alldisks;
    126 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    127     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    128 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    129     const sigset_t *, struct linux_sigcontext *));
    130 static int linux_restore_sigcontext __P((struct lwp *,
    131     struct linux_sigcontext *, register_t *));
    132 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    133 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    134 
    135 extern char linux_sigcode[], linux_rt_sigcode[];
    136 /*
    137  * Deal with some i386-specific things in the Linux emulation code.
    138  */
    139 
    140 void
    141 linux_setregs(l, epp, stack)
    142 	struct lwp *l;
    143 	struct exec_package *epp;
    144 	u_long stack;
    145 {
    146 	struct pcb *pcb = &l->l_addr->u_pcb;
    147 	struct trapframe *tf;
    148 
    149 #if NNPX > 0
    150 	/* If we were using the FPU, forget about it. */
    151 	if (npxproc == l)
    152 		npxdrop();
    153 #endif
    154 
    155 #ifdef USER_LDT
    156 	pmap_ldt_cleanup(l);
    157 #endif
    158 
    159 	l->l_md.md_flags &= ~MDL_USEDFPU;
    160 
    161 	if (i386_use_fxsave) {
    162 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    164 	} else
    165 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    166 
    167 	tf = l->l_md.md_regs;
    168 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    172 	tf->tf_edi = 0;
    173 	tf->tf_esi = 0;
    174 	tf->tf_ebp = 0;
    175 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    176 	tf->tf_edx = 0;
    177 	tf->tf_ecx = 0;
    178 	tf->tf_eax = 0;
    179 	tf->tf_eip = epp->ep_entry;
    180 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    181 	tf->tf_eflags = PSL_USERSET;
    182 	tf->tf_esp = stack;
    183 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    184 }
    185 
    186 /*
    187  * Send an interrupt to process.
    188  *
    189  * Stack is set up to allow sigcode stored
    190  * in u. to call routine, followed by kcall
    191  * to sigreturn routine below.  After sigreturn
    192  * resets the signal mask, the stack, and the
    193  * frame pointer, it returns to the user
    194  * specified pc, psl.
    195  */
    196 
    197 void
    198 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    199 {
    200 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    201 		linux_rt_sendsig(ksi, mask);
    202 	else
    203 		linux_old_sendsig(ksi, mask);
    204 }
    205 
    206 
    207 static void
    208 linux_save_ucontext(l, tf, mask, sas, uc)
    209 	struct lwp *l;
    210 	struct trapframe *tf;
    211 	const sigset_t *mask;
    212 	struct sigaltstack *sas;
    213 	struct linux_ucontext *uc;
    214 {
    215 	uc->uc_flags = 0;
    216 	uc->uc_link = NULL;
    217 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    218 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    219 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    220 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    221 }
    222 
    223 static void
    224 linux_save_sigcontext(l, tf, mask, sc)
    225 	struct lwp *l;
    226 	struct trapframe *tf;
    227 	const sigset_t *mask;
    228 	struct linux_sigcontext *sc;
    229 {
    230 	/* Save register context. */
    231 #ifdef VM86
    232 	if (tf->tf_eflags & PSL_VM) {
    233 		sc->sc_gs = tf->tf_vm86_gs;
    234 		sc->sc_fs = tf->tf_vm86_fs;
    235 		sc->sc_es = tf->tf_vm86_es;
    236 		sc->sc_ds = tf->tf_vm86_ds;
    237 		sc->sc_eflags = get_vflags(l);
    238 	} else
    239 #endif
    240 	{
    241 		sc->sc_gs = tf->tf_gs;
    242 		sc->sc_fs = tf->tf_fs;
    243 		sc->sc_es = tf->tf_es;
    244 		sc->sc_ds = tf->tf_ds;
    245 		sc->sc_eflags = tf->tf_eflags;
    246 	}
    247 	sc->sc_edi = tf->tf_edi;
    248 	sc->sc_esi = tf->tf_esi;
    249 	sc->sc_esp = tf->tf_esp;
    250 	sc->sc_ebp = tf->tf_ebp;
    251 	sc->sc_ebx = tf->tf_ebx;
    252 	sc->sc_edx = tf->tf_edx;
    253 	sc->sc_ecx = tf->tf_ecx;
    254 	sc->sc_eax = tf->tf_eax;
    255 	sc->sc_eip = tf->tf_eip;
    256 	sc->sc_cs = tf->tf_cs;
    257 	sc->sc_esp_at_signal = tf->tf_esp;
    258 	sc->sc_ss = tf->tf_ss;
    259 	sc->sc_err = tf->tf_err;
    260 	sc->sc_trapno = tf->tf_trapno;
    261 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    262 	sc->sc_387 = NULL;
    263 
    264 	/* Save signal stack. */
    265 	/* Linux doesn't save the onstack flag in sigframe */
    266 
    267 	/* Save signal mask. */
    268 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    269 }
    270 
    271 static void
    272 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    273 {
    274 	struct lwp *l = curlwp;
    275 	struct proc *p = l->l_proc;
    276 	struct trapframe *tf;
    277 	struct linux_rt_sigframe *fp, frame;
    278 	int onstack;
    279 	linux_siginfo_t *lsi;
    280 	int sig = ksi->ksi_signo;
    281 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    282 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    283 
    284 	tf = l->l_md.md_regs;
    285 	/* Do we need to jump onto the signal stack? */
    286 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    287 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    288 
    289 
    290 	/* Allocate space for the signal handler context. */
    291 	if (onstack)
    292 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    293 		    sas->ss_size);
    294 	else
    295 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    296 	fp--;
    297 
    298 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    299 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    300 
    301 	/* Build stack frame for signal trampoline. */
    302 	frame.sf_handler = catcher;
    303 	frame.sf_sig = native_to_linux_signo[sig];
    304 	frame.sf_sip = &fp->sf_si;
    305 	frame.sf_ucp = &fp->sf_uc;
    306 
    307 	/*
    308 	 * XXX: the following code assumes that the constants for
    309 	 * siginfo are the same between linux and NetBSD.
    310 	 */
    311 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    312 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    313 	lsi->lsi_code = ksi->ksi_code;
    314 	switch (lsi->lsi_signo = frame.sf_sig) {
    315 	case LINUX_SIGILL:
    316 	case LINUX_SIGFPE:
    317 	case LINUX_SIGSEGV:
    318 	case LINUX_SIGBUS:
    319 	case LINUX_SIGTRAP:
    320 		lsi->lsi_addr = ksi->ksi_addr;
    321 		break;
    322 	case LINUX_SIGCHLD:
    323 		lsi->lsi_uid = ksi->ksi_uid;
    324 		lsi->lsi_pid = ksi->ksi_pid;
    325 		lsi->lsi_utime = ksi->ksi_utime;
    326 		lsi->lsi_stime = ksi->ksi_stime;
    327 
    328 		/* We use the same codes */
    329 		lsi->lsi_code = ksi->ksi_code;
    330 		/* XXX is that right? */
    331 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    332 		break;
    333 	case LINUX_SIGIO:
    334 		lsi->lsi_band = ksi->ksi_band;
    335 		lsi->lsi_fd = ksi->ksi_fd;
    336 		break;
    337 	default:
    338 		lsi->lsi_uid = ksi->ksi_uid;
    339 		lsi->lsi_pid = ksi->ksi_pid;
    340 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    341 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    342 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    343 		break;
    344 	}
    345 
    346 	/* Save register context. */
    347 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    348 
    349 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    350 		/*
    351 		 * Process has trashed its stack; give it an illegal
    352 		 * instruction to halt it in its tracks.
    353 		 */
    354 		sigexit(l, SIGILL);
    355 		/* NOTREACHED */
    356 	}
    357 
    358 	/*
    359 	 * Build context to run handler in.
    360 	 */
    361 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    362 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    363 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    364 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    365 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    366 	    (linux_rt_sigcode - linux_sigcode);
    367 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    368 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    369 	tf->tf_esp = (int)fp;
    370 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    371 
    372 	/* Remember that we're now on the signal stack. */
    373 	if (onstack)
    374 		sas->ss_flags |= SS_ONSTACK;
    375 }
    376 
    377 static void
    378 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    379 {
    380 	struct lwp *l = curlwp;
    381 	struct proc *p = l->l_proc;
    382 	struct trapframe *tf;
    383 	struct linux_sigframe *fp, frame;
    384 	int onstack;
    385 	int sig = ksi->ksi_signo;
    386 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    387 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    388 
    389 	tf = l->l_md.md_regs;
    390 
    391 	/* Do we need to jump onto the signal stack? */
    392 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    393 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    394 
    395 	/* Allocate space for the signal handler context. */
    396 	if (onstack)
    397 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    398 		    sas->ss_size);
    399 	else
    400 		fp = (struct linux_sigframe *)tf->tf_esp;
    401 	fp--;
    402 
    403 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    404 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    405 
    406 	/* Build stack frame for signal trampoline. */
    407 	frame.sf_handler = catcher;
    408 	frame.sf_sig = native_to_linux_signo[sig];
    409 
    410 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    411 
    412 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    413 		/*
    414 		 * Process has trashed its stack; give it an illegal
    415 		 * instruction to halt it in its tracks.
    416 		 */
    417 		sigexit(l, SIGILL);
    418 		/* NOTREACHED */
    419 	}
    420 
    421 	/*
    422 	 * Build context to run handler in.
    423 	 */
    424 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    425 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    426 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    427 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    428 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    429 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    430 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    431 	tf->tf_esp = (int)fp;
    432 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    433 
    434 	/* Remember that we're now on the signal stack. */
    435 	if (onstack)
    436 		sas->ss_flags |= SS_ONSTACK;
    437 }
    438 
    439 /*
    440  * System call to cleanup state after a signal
    441  * has been taken.  Reset signal mask and
    442  * stack state from context left by sendsig (above).
    443  * Return to previous pc and psl as specified by
    444  * context left by sendsig. Check carefully to
    445  * make sure that the user has not modified the
    446  * psl to gain improper privileges or to cause
    447  * a machine fault.
    448  */
    449 int
    450 linux_sys_rt_sigreturn(l, v, retval)
    451 	struct lwp *l;
    452 	void *v;
    453 	register_t *retval;
    454 {
    455 	struct linux_sys_rt_sigreturn_args /* {
    456 		syscallarg(struct linux_ucontext *) ucp;
    457 	} */ *uap = v;
    458 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    459 	int error;
    460 
    461 	/*
    462 	 * The trampoline code hands us the context.
    463 	 * It is unsafe to keep track of it ourselves, in the event that a
    464 	 * program jumps out of a signal handler.
    465 	 */
    466 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    467 		return error;
    468 
    469 	/* XXX XAX we can do better here by using more of the ucontext */
    470 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    471 }
    472 
    473 int
    474 linux_sys_sigreturn(l, v, retval)
    475 	struct lwp *l;
    476 	void *v;
    477 	register_t *retval;
    478 {
    479 	struct linux_sys_sigreturn_args /* {
    480 		syscallarg(struct linux_sigcontext *) scp;
    481 	} */ *uap = v;
    482 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    483 	int error;
    484 
    485 	/*
    486 	 * The trampoline code hands us the context.
    487 	 * It is unsafe to keep track of it ourselves, in the event that a
    488 	 * program jumps out of a signal handler.
    489 	 */
    490 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    491 		return error;
    492 	return linux_restore_sigcontext(l, &context, retval);
    493 }
    494 
    495 static int
    496 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    497     register_t *retval)
    498 {
    499 	struct proc *p = l->l_proc;
    500 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    501 	struct trapframe *tf;
    502 	sigset_t mask;
    503 	ssize_t ss_gap;
    504 	/* Restore register context. */
    505 	tf = l->l_md.md_regs;
    506 
    507 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    508 #ifdef VM86
    509 	if (scp->sc_eflags & PSL_VM) {
    510 		void syscall_vm86 __P((struct trapframe *));
    511 
    512 		tf->tf_vm86_gs = scp->sc_gs;
    513 		tf->tf_vm86_fs = scp->sc_fs;
    514 		tf->tf_vm86_es = scp->sc_es;
    515 		tf->tf_vm86_ds = scp->sc_ds;
    516 		set_vflags(l, scp->sc_eflags);
    517 		p->p_md.md_syscall = syscall_vm86;
    518 	} else
    519 #endif
    520 	{
    521 		/*
    522 		 * Check for security violations.  If we're returning to
    523 		 * protected mode, the CPU will validate the segment registers
    524 		 * automatically and generate a trap on violations.  We handle
    525 		 * the trap, rather than doing all of the checking here.
    526 		 */
    527 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    528 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    529 			return EINVAL;
    530 
    531 		tf->tf_gs = scp->sc_gs;
    532 		tf->tf_fs = scp->sc_fs;
    533 		tf->tf_es = scp->sc_es;
    534 		tf->tf_ds = scp->sc_ds;
    535 #ifdef VM86
    536 		if (tf->tf_eflags & PSL_VM)
    537 			(*p->p_emul->e_syscall_intern)(p);
    538 #endif
    539 		tf->tf_eflags = scp->sc_eflags;
    540 	}
    541 	tf->tf_edi = scp->sc_edi;
    542 	tf->tf_esi = scp->sc_esi;
    543 	tf->tf_ebp = scp->sc_ebp;
    544 	tf->tf_ebx = scp->sc_ebx;
    545 	tf->tf_edx = scp->sc_edx;
    546 	tf->tf_ecx = scp->sc_ecx;
    547 	tf->tf_eax = scp->sc_eax;
    548 	tf->tf_eip = scp->sc_eip;
    549 	tf->tf_cs = scp->sc_cs;
    550 	tf->tf_esp = scp->sc_esp_at_signal;
    551 	tf->tf_ss = scp->sc_ss;
    552 
    553 	/* Restore signal stack. */
    554 	/*
    555 	 * Linux really does it this way; it doesn't have space in sigframe
    556 	 * to save the onstack flag.
    557 	 */
    558 	ss_gap = (ssize_t)
    559 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    560 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    561 		sas->ss_flags |= SS_ONSTACK;
    562 	else
    563 		sas->ss_flags &= ~SS_ONSTACK;
    564 
    565 	/* Restore signal mask. */
    566 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    567 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    568 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    569 	return EJUSTRETURN;
    570 }
    571 
    572 #ifdef USER_LDT
    573 
    574 int
    575 linux_read_ldt(l, uap, retval)
    576 	struct lwp *l;
    577 	struct linux_sys_modify_ldt_args /* {
    578 		syscallarg(int) func;
    579 		syscallarg(void *) ptr;
    580 		syscallarg(size_t) bytecount;
    581 	} */ *uap;
    582 	register_t *retval;
    583 {
    584 	struct proc *p = l->l_proc;
    585 	struct i386_get_ldt_args gl;
    586 	int error;
    587 	caddr_t sg;
    588 	char *parms;
    589 
    590 	DPRINTF(("linux_read_ldt!"));
    591 	sg = stackgap_init(p, 0);
    592 
    593 	gl.start = 0;
    594 	gl.desc = SCARG(uap, ptr);
    595 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    596 
    597 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    598 
    599 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    600 		return (error);
    601 
    602 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    603 		return (error);
    604 
    605 	*retval *= sizeof(union descriptor);
    606 	return (0);
    607 }
    608 
    609 struct linux_ldt_info {
    610 	u_int entry_number;
    611 	u_long base_addr;
    612 	u_int limit;
    613 	u_int seg_32bit:1;
    614 	u_int contents:2;
    615 	u_int read_exec_only:1;
    616 	u_int limit_in_pages:1;
    617 	u_int seg_not_present:1;
    618 	u_int useable:1;
    619 };
    620 
    621 int
    622 linux_write_ldt(l, uap, retval)
    623 	struct lwp *l;
    624 	struct linux_sys_modify_ldt_args /* {
    625 		syscallarg(int) func;
    626 		syscallarg(void *) ptr;
    627 		syscallarg(size_t) bytecount;
    628 	} */ *uap;
    629 	register_t *retval;
    630 {
    631 	struct proc *p = l->l_proc;
    632 	struct linux_ldt_info ldt_info;
    633 	struct segment_descriptor sd;
    634 	struct i386_set_ldt_args sl;
    635 	int error;
    636 	caddr_t sg;
    637 	char *parms;
    638 	int oldmode = (int)retval[0];
    639 
    640 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    641 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    642 		return (EINVAL);
    643 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    644 		return error;
    645 	if (ldt_info.entry_number >= 8192)
    646 		return (EINVAL);
    647 	if (ldt_info.contents == 3) {
    648 		if (oldmode)
    649 			return (EINVAL);
    650 		if (ldt_info.seg_not_present)
    651 			return (EINVAL);
    652 	}
    653 
    654 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    655 	    (oldmode || (ldt_info.contents == 0 &&
    656 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    657 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    658 	    ldt_info.useable == 0))) {
    659 		/* this means you should zero the ldt */
    660 		(void)memset(&sd, 0, sizeof(sd));
    661 	} else {
    662 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    663 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    664 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    665 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    666 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    667 		    (!ldt_info.read_exec_only << 1);
    668 		sd.sd_dpl = SEL_UPL;
    669 		sd.sd_p = !ldt_info.seg_not_present;
    670 		sd.sd_def32 = ldt_info.seg_32bit;
    671 		sd.sd_gran = ldt_info.limit_in_pages;
    672 		if (!oldmode)
    673 			sd.sd_xx = ldt_info.useable;
    674 		else
    675 			sd.sd_xx = 0;
    676 	}
    677 	sg = stackgap_init(p, 0);
    678 	sl.start = ldt_info.entry_number;
    679 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    680 	sl.num = 1;
    681 
    682 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    683 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    684 
    685 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    686 
    687 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    688 		return (error);
    689 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    690 		return (error);
    691 
    692 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    693 		return (error);
    694 
    695 	*retval = 0;
    696 	return (0);
    697 }
    698 
    699 #endif /* USER_LDT */
    700 
    701 int
    702 linux_sys_modify_ldt(struct lwp *l, void *v,
    703     register_t *retval)
    704 {
    705 	struct linux_sys_modify_ldt_args /* {
    706 		syscallarg(int) func;
    707 		syscallarg(void *) ptr;
    708 		syscallarg(size_t) bytecount;
    709 	} */ *uap = v;
    710 
    711 	switch (SCARG(uap, func)) {
    712 #ifdef USER_LDT
    713 	case 0:
    714 		return linux_read_ldt(l, uap, retval);
    715 	case 1:
    716 		retval[0] = 1;
    717 		return linux_write_ldt(l, uap, retval);
    718 	case 2:
    719 #ifdef notyet
    720 		return (linux_read_default_ldt(l, uap, retval);
    721 #else
    722 		return (ENOSYS);
    723 #endif
    724 	case 0x11:
    725 		retval[0] = 0;
    726 		return linux_write_ldt(l, uap, retval);
    727 #endif /* USER_LDT */
    728 
    729 	default:
    730 		return (ENOSYS);
    731 	}
    732 }
    733 
    734 /*
    735  * XXX Pathetic hack to make svgalib work. This will fake the major
    736  * device number of an opened VT so that svgalib likes it. grmbl.
    737  * Should probably do it 'wrong the right way' and use a mapping
    738  * array for all major device numbers, and map linux_mknod too.
    739  */
    740 dev_t
    741 linux_fakedev(dev, raw)
    742 	dev_t dev;
    743 	int raw;
    744 {
    745 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    746 	const struct cdevsw *cd = cdevsw_lookup(dev);
    747 
    748 	if (raw) {
    749 #if (NWSDISPLAY > 0)
    750 		extern const struct cdevsw wsdisplay_cdevsw;
    751 		if (cd == &wsdisplay_cdevsw)
    752 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    753 #endif
    754 	}
    755 
    756 	if (cd == &ptc_cdevsw)
    757 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    758 	if (cd == &pts_cdevsw)
    759 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    760 
    761 	return dev;
    762 }
    763 
    764 #if (NWSDISPLAY > 0)
    765 /*
    766  * That's not complete, but enough to get an X server running.
    767  */
    768 #define NR_KEYS 128
    769 static const u_short plain_map[NR_KEYS] = {
    770 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    771 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    772 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    773 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    774 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    775 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    776 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    777 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    778 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    779 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    780 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    781 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    782 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    783 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    784 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    785 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    786 }, shift_map[NR_KEYS] = {
    787 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    788 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    789 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    790 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    791 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    792 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    793 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    794 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    795 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    796 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    797 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    798 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    799 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    800 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    801 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    802 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    803 }, altgr_map[NR_KEYS] = {
    804 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    805 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    806 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    807 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    808 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    809 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    810 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    811 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    812 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    813 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    814 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    815 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    816 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    817 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    818 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    819 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    820 }, ctrl_map[NR_KEYS] = {
    821 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    822 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    823 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    824 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    825 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    826 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    827 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    828 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    829 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    830 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    831 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    832 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    833 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    834 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    835 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    836 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    837 };
    838 
    839 const u_short * const linux_keytabs[] = {
    840 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    841 };
    842 #endif
    843 
    844 static struct biosdisk_info *
    845 fd2biosinfo(struct proc *p, struct file *fp)
    846 {
    847 	struct vnode *vp;
    848 	const char *blkname;
    849 	char diskname[16];
    850 	int i;
    851 	struct nativedisk_info *nip;
    852 	struct disklist *dl = x86_alldisks;
    853 
    854 	if (fp->f_type != DTYPE_VNODE)
    855 		return NULL;
    856 	vp = (struct vnode *)fp->f_data;
    857 
    858 	if (vp->v_type != VBLK)
    859 		return NULL;
    860 
    861 	blkname = devsw_blk2name(major(vp->v_rdev));
    862 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    863 	    DISKUNIT(vp->v_rdev));
    864 
    865 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    866 		nip = &dl->dl_nativedisks[i];
    867 		if (strcmp(diskname, nip->ni_devname))
    868 			continue;
    869 		if (nip->ni_nmatches != 0)
    870 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    871 	}
    872 
    873 	return NULL;
    874 }
    875 
    876 
    877 /*
    878  * We come here in a last attempt to satisfy a Linux ioctl() call
    879  */
    880 int
    881 linux_machdepioctl(l, v, retval)
    882 	struct lwp *l;
    883 	void *v;
    884 	register_t *retval;
    885 {
    886 	struct linux_sys_ioctl_args /* {
    887 		syscallarg(int) fd;
    888 		syscallarg(u_long) com;
    889 		syscallarg(caddr_t) data;
    890 	} */ *uap = v;
    891 	struct sys_ioctl_args bia;
    892 	u_long com;
    893 	int error, error1;
    894 #if (NWSDISPLAY > 0)
    895 	struct vt_mode lvt;
    896 	caddr_t bvtp, sg;
    897 	struct kbentry kbe;
    898 #endif
    899 	struct linux_hd_geometry hdg;
    900 	struct linux_hd_big_geometry hdg_big;
    901 	struct biosdisk_info *bip;
    902 	struct filedesc *fdp;
    903 	struct file *fp;
    904 	int fd;
    905 	struct disklabel label, *labp;
    906 	struct partinfo partp;
    907 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    908 	u_long start, biostotal, realtotal;
    909 	u_char heads, sectors;
    910 	u_int cylinders;
    911 	struct ioctl_pt pt;
    912 	struct proc *p = l->l_proc;
    913 
    914 	fd = SCARG(uap, fd);
    915 	SCARG(&bia, fd) = fd;
    916 	SCARG(&bia, data) = SCARG(uap, data);
    917 	com = SCARG(uap, com);
    918 
    919 	fdp = p->p_fd;
    920 
    921 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    922 		return (EBADF);
    923 
    924 	FILE_USE(fp);
    925 
    926 	switch (com) {
    927 #if (NWSDISPLAY > 0)
    928 	case LINUX_KDGKBMODE:
    929 		com = KDGKBMODE;
    930 		break;
    931 	case LINUX_KDSKBMODE:
    932 		com = KDSKBMODE;
    933 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    934 			SCARG(&bia, data) = (caddr_t)K_RAW;
    935 		break;
    936 	case LINUX_KIOCSOUND:
    937 		SCARG(&bia, data) =
    938 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    939 		/* fall through */
    940 	case LINUX_KDMKTONE:
    941 		com = KDMKTONE;
    942 		break;
    943 	case LINUX_KDSETMODE:
    944 		com = KDSETMODE;
    945 		break;
    946 	case LINUX_KDGETMODE:
    947 		/* KD_* values are equal to the wscons numbers */
    948 		com = WSDISPLAYIO_GMODE;
    949 		break;
    950 	case LINUX_KDENABIO:
    951 		com = KDENABIO;
    952 		break;
    953 	case LINUX_KDDISABIO:
    954 		com = KDDISABIO;
    955 		break;
    956 	case LINUX_KDGETLED:
    957 		com = KDGETLED;
    958 		break;
    959 	case LINUX_KDSETLED:
    960 		com = KDSETLED;
    961 		break;
    962 	case LINUX_VT_OPENQRY:
    963 		com = VT_OPENQRY;
    964 		break;
    965 	case LINUX_VT_GETMODE:
    966 		SCARG(&bia, com) = VT_GETMODE;
    967 		/* XXX NJWLWP */
    968 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    969 			goto out;
    970 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    971 		    sizeof (struct vt_mode))))
    972 			goto out;
    973 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    974 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    975 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    976 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    977 		    sizeof (struct vt_mode));
    978 		goto out;
    979 	case LINUX_VT_SETMODE:
    980 		com = VT_SETMODE;
    981 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    982 		    sizeof (struct vt_mode))))
    983 			goto out;
    984 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    985 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    986 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    987 		sg = stackgap_init(p, 0);
    988 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    989 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    990 			goto out;
    991 		SCARG(&bia, data) = bvtp;
    992 		break;
    993 	case LINUX_VT_DISALLOCATE:
    994 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    995 		error = 0;
    996 		goto out;
    997 	case LINUX_VT_RELDISP:
    998 		com = VT_RELDISP;
    999 		break;
   1000 	case LINUX_VT_ACTIVATE:
   1001 		com = VT_ACTIVATE;
   1002 		break;
   1003 	case LINUX_VT_WAITACTIVE:
   1004 		com = VT_WAITACTIVE;
   1005 		break;
   1006 	case LINUX_VT_GETSTATE:
   1007 		com = VT_GETSTATE;
   1008 		break;
   1009 	case LINUX_KDGKBTYPE:
   1010 	    {
   1011 		static const u_int8_t kb101 = KB_101;
   1012 
   1013 		/* This is what Linux does. */
   1014 		error = copyout(&kb101, SCARG(uap, data), 1);
   1015 		goto out;
   1016 	    }
   1017 	case LINUX_KDGKBENT:
   1018 		/*
   1019 		 * The Linux KDGKBENT ioctl is different from the
   1020 		 * SYSV original. So we handle it in machdep code.
   1021 		 * XXX We should use keyboard mapping information
   1022 		 * from wsdisplay, but this would be expensive.
   1023 		 */
   1024 		if ((error = copyin(SCARG(uap, data), &kbe,
   1025 				    sizeof(struct kbentry))))
   1026 			goto out;
   1027 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1028 		    || kbe.kb_index >= NR_KEYS) {
   1029 			error = EINVAL;
   1030 			goto out;
   1031 		}
   1032 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1033 		error = copyout(&kbe, SCARG(uap, data),
   1034 				sizeof(struct kbentry));
   1035 		goto out;
   1036 #endif
   1037 	case LINUX_HDIO_GETGEO:
   1038 	case LINUX_HDIO_GETGEO_BIG:
   1039 		/*
   1040 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1041 		 * if possible (extending its # of cylinders if it's beyond
   1042 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1043 		 * the real geometry) if not found, by returning an
   1044 		 * error. See common/linux_hdio.c
   1045 		 */
   1046 		bip = fd2biosinfo(p, fp);
   1047 		ioctlf = fp->f_ops->fo_ioctl;
   1048 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
   1049 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
   1050 		if (error != 0 && error1 != 0) {
   1051 			error = error1;
   1052 			goto out;
   1053 		}
   1054 		labp = error != 0 ? &label : partp.disklab;
   1055 		start = error1 != 0 ? partp.part->p_offset : 0;
   1056 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1057 		    && bip->bi_cyl != 0) {
   1058 			heads = bip->bi_head;
   1059 			sectors = bip->bi_sec;
   1060 			cylinders = bip->bi_cyl;
   1061 			biostotal = heads * sectors * cylinders;
   1062 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1063 			    labp->d_ncylinders;
   1064 			if (realtotal > biostotal)
   1065 				cylinders = realtotal / (heads * sectors);
   1066 		} else {
   1067 			heads = labp->d_ntracks;
   1068 			cylinders = labp->d_ncylinders;
   1069 			sectors = labp->d_nsectors;
   1070 		}
   1071 		if (com == LINUX_HDIO_GETGEO) {
   1072 			hdg.start = start;
   1073 			hdg.heads = heads;
   1074 			hdg.cylinders = cylinders;
   1075 			hdg.sectors = sectors;
   1076 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1077 			goto out;
   1078 		} else {
   1079 			hdg_big.start = start;
   1080 			hdg_big.heads = heads;
   1081 			hdg_big.cylinders = cylinders;
   1082 			hdg_big.sectors = sectors;
   1083 			error = copyout(&hdg_big, SCARG(uap, data),
   1084 			    sizeof hdg_big);
   1085 			goto out;
   1086 		}
   1087 
   1088 	default:
   1089 		/*
   1090 		 * Unknown to us. If it's on a device, just pass it through
   1091 		 * using PTIOCLINUX, the device itself might be able to
   1092 		 * make some sense of it.
   1093 		 * XXX hack: if the function returns EJUSTRETURN,
   1094 		 * it has stuffed a sysctl return value in pt.data.
   1095 		 */
   1096 		ioctlf = fp->f_ops->fo_ioctl;
   1097 		pt.com = SCARG(uap, com);
   1098 		pt.data = SCARG(uap, data);
   1099 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
   1100 		if (error == EJUSTRETURN) {
   1101 			retval[0] = (register_t)pt.data;
   1102 			error = 0;
   1103 		}
   1104 
   1105 		if (error == ENOTTY) {
   1106 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1107 			    com));
   1108 		}
   1109 		goto out;
   1110 	}
   1111 	SCARG(&bia, com) = com;
   1112 	/* XXX NJWLWP */
   1113 	error = sys_ioctl(curlwp, &bia, retval);
   1114 out:
   1115 	FILE_UNUSE(fp ,l);
   1116 	return error;
   1117 }
   1118 
   1119 /*
   1120  * Set I/O permissions for a process. Just set the maximum level
   1121  * right away (ignoring the argument), otherwise we would have
   1122  * to rely on I/O permission maps, which are not implemented.
   1123  */
   1124 int
   1125 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1126 {
   1127 #if 0
   1128 	struct linux_sys_iopl_args /* {
   1129 		syscallarg(int) level;
   1130 	} */ *uap = v;
   1131 #endif
   1132 	struct trapframe *fp = l->l_md.md_regs;
   1133 
   1134 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1135 	    NULL, NULL, NULL, NULL) != 0)
   1136 		return EPERM;
   1137 	fp->tf_eflags |= PSL_IOPL;
   1138 	*retval = 0;
   1139 	return 0;
   1140 }
   1141 
   1142 /*
   1143  * See above. If a root process tries to set access to an I/O port,
   1144  * just let it have the whole range.
   1145  */
   1146 int
   1147 linux_sys_ioperm(l, v, retval)
   1148 	struct lwp *l;
   1149 	void *v;
   1150 	register_t *retval;
   1151 {
   1152 	struct linux_sys_ioperm_args /* {
   1153 		syscallarg(unsigned int) lo;
   1154 		syscallarg(unsigned int) hi;
   1155 		syscallarg(int) val;
   1156 	} */ *uap = v;
   1157 	struct trapframe *fp = l->l_md.md_regs;
   1158 
   1159 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1160 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1161 	    NULL, NULL) != 0)
   1162 		return EPERM;
   1163 	if (SCARG(uap, val))
   1164 		fp->tf_eflags |= PSL_IOPL;
   1165 	*retval = 0;
   1166 	return 0;
   1167 }
   1168 
   1169 int
   1170 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1171     void *arg)
   1172 {
   1173 	return 0;
   1174 }
   1175