Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.120
      1 /*	$NetBSD: linux_machdep.c,v 1.120 2006/12/26 16:42:06 elad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.120 2006/12/26 16:42:06 elad Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <sys/wait.h>
     72 #include <sys/kauth.h>
     73 
     74 #include <miscfs/specfs/specdev.h>
     75 
     76 #include <compat/linux/common/linux_types.h>
     77 #include <compat/linux/common/linux_signal.h>
     78 #include <compat/linux/common/linux_util.h>
     79 #include <compat/linux/common/linux_ioctl.h>
     80 #include <compat/linux/common/linux_hdio.h>
     81 #include <compat/linux/common/linux_exec.h>
     82 #include <compat/linux/common/linux_machdep.h>
     83 #include <compat/linux/common/linux_errno.h>
     84 
     85 #include <compat/linux/linux_syscallargs.h>
     86 
     87 #include <machine/cpu.h>
     88 #include <machine/cpufunc.h>
     89 #include <machine/psl.h>
     90 #include <machine/reg.h>
     91 #include <machine/segments.h>
     92 #include <machine/specialreg.h>
     93 #include <machine/sysarch.h>
     94 #include <machine/vm86.h>
     95 #include <machine/vmparam.h>
     96 
     97 /*
     98  * To see whether wscons is configured (for virtual console ioctl calls).
     99  */
    100 #if defined(_KERNEL_OPT)
    101 #include "wsdisplay.h"
    102 #endif
    103 #if (NWSDISPLAY > 0)
    104 #include <dev/wscons/wsconsio.h>
    105 #include <dev/wscons/wsdisplay_usl_io.h>
    106 #if defined(_KERNEL_OPT)
    107 #include "opt_xserver.h"
    108 #endif
    109 #endif
    110 
    111 #ifdef USER_LDT
    112 #include <machine/cpu.h>
    113 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    114     register_t *));
    115 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    116     register_t *));
    117 #endif
    118 
    119 #ifdef DEBUG_LINUX
    120 #define DPRINTF(a) uprintf a
    121 #else
    122 #define DPRINTF(a)
    123 #endif
    124 
    125 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    126 extern struct disklist *x86_alldisks;
    127 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    128     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    129 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    130     const sigset_t *, struct linux_sigcontext *));
    131 static int linux_restore_sigcontext __P((struct lwp *,
    132     struct linux_sigcontext *, register_t *));
    133 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    134 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    135 
    136 extern char linux_sigcode[], linux_rt_sigcode[];
    137 /*
    138  * Deal with some i386-specific things in the Linux emulation code.
    139  */
    140 
    141 void
    142 linux_setregs(l, epp, stack)
    143 	struct lwp *l;
    144 	struct exec_package *epp;
    145 	u_long stack;
    146 {
    147 	struct pcb *pcb = &l->l_addr->u_pcb;
    148 	struct trapframe *tf;
    149 
    150 #if NNPX > 0
    151 	/* If we were using the FPU, forget about it. */
    152 	if (npxproc == l)
    153 		npxdrop();
    154 #endif
    155 
    156 #ifdef USER_LDT
    157 	pmap_ldt_cleanup(l);
    158 #endif
    159 
    160 	l->l_md.md_flags &= ~MDL_USEDFPU;
    161 
    162 	if (i386_use_fxsave) {
    163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    164 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    165 	} else
    166 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    167 
    168 	tf = l->l_md.md_regs;
    169 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    172 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    173 	tf->tf_edi = 0;
    174 	tf->tf_esi = 0;
    175 	tf->tf_ebp = 0;
    176 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    177 	tf->tf_edx = 0;
    178 	tf->tf_ecx = 0;
    179 	tf->tf_eax = 0;
    180 	tf->tf_eip = epp->ep_entry;
    181 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    182 	tf->tf_eflags = PSL_USERSET;
    183 	tf->tf_esp = stack;
    184 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    185 }
    186 
    187 /*
    188  * Send an interrupt to process.
    189  *
    190  * Stack is set up to allow sigcode stored
    191  * in u. to call routine, followed by kcall
    192  * to sigreturn routine below.  After sigreturn
    193  * resets the signal mask, the stack, and the
    194  * frame pointer, it returns to the user
    195  * specified pc, psl.
    196  */
    197 
    198 void
    199 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    200 {
    201 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    202 		linux_rt_sendsig(ksi, mask);
    203 	else
    204 		linux_old_sendsig(ksi, mask);
    205 }
    206 
    207 
    208 static void
    209 linux_save_ucontext(l, tf, mask, sas, uc)
    210 	struct lwp *l;
    211 	struct trapframe *tf;
    212 	const sigset_t *mask;
    213 	struct sigaltstack *sas;
    214 	struct linux_ucontext *uc;
    215 {
    216 	uc->uc_flags = 0;
    217 	uc->uc_link = NULL;
    218 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    219 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    220 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    221 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    222 }
    223 
    224 static void
    225 linux_save_sigcontext(l, tf, mask, sc)
    226 	struct lwp *l;
    227 	struct trapframe *tf;
    228 	const sigset_t *mask;
    229 	struct linux_sigcontext *sc;
    230 {
    231 	/* Save register context. */
    232 #ifdef VM86
    233 	if (tf->tf_eflags & PSL_VM) {
    234 		sc->sc_gs = tf->tf_vm86_gs;
    235 		sc->sc_fs = tf->tf_vm86_fs;
    236 		sc->sc_es = tf->tf_vm86_es;
    237 		sc->sc_ds = tf->tf_vm86_ds;
    238 		sc->sc_eflags = get_vflags(l);
    239 	} else
    240 #endif
    241 	{
    242 		sc->sc_gs = tf->tf_gs;
    243 		sc->sc_fs = tf->tf_fs;
    244 		sc->sc_es = tf->tf_es;
    245 		sc->sc_ds = tf->tf_ds;
    246 		sc->sc_eflags = tf->tf_eflags;
    247 	}
    248 	sc->sc_edi = tf->tf_edi;
    249 	sc->sc_esi = tf->tf_esi;
    250 	sc->sc_esp = tf->tf_esp;
    251 	sc->sc_ebp = tf->tf_ebp;
    252 	sc->sc_ebx = tf->tf_ebx;
    253 	sc->sc_edx = tf->tf_edx;
    254 	sc->sc_ecx = tf->tf_ecx;
    255 	sc->sc_eax = tf->tf_eax;
    256 	sc->sc_eip = tf->tf_eip;
    257 	sc->sc_cs = tf->tf_cs;
    258 	sc->sc_esp_at_signal = tf->tf_esp;
    259 	sc->sc_ss = tf->tf_ss;
    260 	sc->sc_err = tf->tf_err;
    261 	sc->sc_trapno = tf->tf_trapno;
    262 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    263 	sc->sc_387 = NULL;
    264 
    265 	/* Save signal stack. */
    266 	/* Linux doesn't save the onstack flag in sigframe */
    267 
    268 	/* Save signal mask. */
    269 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    270 }
    271 
    272 static void
    273 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    274 {
    275 	struct lwp *l = curlwp;
    276 	struct proc *p = l->l_proc;
    277 	struct trapframe *tf;
    278 	struct linux_rt_sigframe *fp, frame;
    279 	int onstack;
    280 	linux_siginfo_t *lsi;
    281 	int sig = ksi->ksi_signo;
    282 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    283 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    284 
    285 	tf = l->l_md.md_regs;
    286 	/* Do we need to jump onto the signal stack? */
    287 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    288 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    289 
    290 
    291 	/* Allocate space for the signal handler context. */
    292 	if (onstack)
    293 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    294 		    sas->ss_size);
    295 	else
    296 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    297 	fp--;
    298 
    299 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    300 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    301 
    302 	/* Build stack frame for signal trampoline. */
    303 	frame.sf_handler = catcher;
    304 	frame.sf_sig = native_to_linux_signo[sig];
    305 	frame.sf_sip = &fp->sf_si;
    306 	frame.sf_ucp = &fp->sf_uc;
    307 
    308 	/*
    309 	 * XXX: the following code assumes that the constants for
    310 	 * siginfo are the same between linux and NetBSD.
    311 	 */
    312 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    313 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    314 	lsi->lsi_code = ksi->ksi_code;
    315 	switch (lsi->lsi_signo = frame.sf_sig) {
    316 	case LINUX_SIGILL:
    317 	case LINUX_SIGFPE:
    318 	case LINUX_SIGSEGV:
    319 	case LINUX_SIGBUS:
    320 	case LINUX_SIGTRAP:
    321 		lsi->lsi_addr = ksi->ksi_addr;
    322 		break;
    323 	case LINUX_SIGCHLD:
    324 		lsi->lsi_uid = ksi->ksi_uid;
    325 		lsi->lsi_pid = ksi->ksi_pid;
    326 		lsi->lsi_utime = ksi->ksi_utime;
    327 		lsi->lsi_stime = ksi->ksi_stime;
    328 
    329 		/* We use the same codes */
    330 		lsi->lsi_code = ksi->ksi_code;
    331 		/* XXX is that right? */
    332 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    333 		break;
    334 	case LINUX_SIGIO:
    335 		lsi->lsi_band = ksi->ksi_band;
    336 		lsi->lsi_fd = ksi->ksi_fd;
    337 		break;
    338 	default:
    339 		lsi->lsi_uid = ksi->ksi_uid;
    340 		lsi->lsi_pid = ksi->ksi_pid;
    341 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    342 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    343 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    344 		break;
    345 	}
    346 
    347 	/* Save register context. */
    348 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    349 
    350 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    351 		/*
    352 		 * Process has trashed its stack; give it an illegal
    353 		 * instruction to halt it in its tracks.
    354 		 */
    355 		sigexit(l, SIGILL);
    356 		/* NOTREACHED */
    357 	}
    358 
    359 	/*
    360 	 * Build context to run handler in.
    361 	 */
    362 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    363 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    364 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    365 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    366 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    367 	    (linux_rt_sigcode - linux_sigcode);
    368 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    369 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    370 	tf->tf_esp = (int)fp;
    371 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    372 
    373 	/* Remember that we're now on the signal stack. */
    374 	if (onstack)
    375 		sas->ss_flags |= SS_ONSTACK;
    376 }
    377 
    378 static void
    379 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    380 {
    381 	struct lwp *l = curlwp;
    382 	struct proc *p = l->l_proc;
    383 	struct trapframe *tf;
    384 	struct linux_sigframe *fp, frame;
    385 	int onstack;
    386 	int sig = ksi->ksi_signo;
    387 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    388 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    389 
    390 	tf = l->l_md.md_regs;
    391 
    392 	/* Do we need to jump onto the signal stack? */
    393 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    394 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    395 
    396 	/* Allocate space for the signal handler context. */
    397 	if (onstack)
    398 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    399 		    sas->ss_size);
    400 	else
    401 		fp = (struct linux_sigframe *)tf->tf_esp;
    402 	fp--;
    403 
    404 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    405 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    406 
    407 	/* Build stack frame for signal trampoline. */
    408 	frame.sf_handler = catcher;
    409 	frame.sf_sig = native_to_linux_signo[sig];
    410 
    411 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    412 
    413 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    414 		/*
    415 		 * Process has trashed its stack; give it an illegal
    416 		 * instruction to halt it in its tracks.
    417 		 */
    418 		sigexit(l, SIGILL);
    419 		/* NOTREACHED */
    420 	}
    421 
    422 	/*
    423 	 * Build context to run handler in.
    424 	 */
    425 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    426 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    427 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    428 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    429 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    430 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    431 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    432 	tf->tf_esp = (int)fp;
    433 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    434 
    435 	/* Remember that we're now on the signal stack. */
    436 	if (onstack)
    437 		sas->ss_flags |= SS_ONSTACK;
    438 }
    439 
    440 /*
    441  * System call to cleanup state after a signal
    442  * has been taken.  Reset signal mask and
    443  * stack state from context left by sendsig (above).
    444  * Return to previous pc and psl as specified by
    445  * context left by sendsig. Check carefully to
    446  * make sure that the user has not modified the
    447  * psl to gain improper privileges or to cause
    448  * a machine fault.
    449  */
    450 int
    451 linux_sys_rt_sigreturn(l, v, retval)
    452 	struct lwp *l;
    453 	void *v;
    454 	register_t *retval;
    455 {
    456 	struct linux_sys_rt_sigreturn_args /* {
    457 		syscallarg(struct linux_ucontext *) ucp;
    458 	} */ *uap = v;
    459 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    460 	int error;
    461 
    462 	/*
    463 	 * The trampoline code hands us the context.
    464 	 * It is unsafe to keep track of it ourselves, in the event that a
    465 	 * program jumps out of a signal handler.
    466 	 */
    467 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    468 		return error;
    469 
    470 	/* XXX XAX we can do better here by using more of the ucontext */
    471 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    472 }
    473 
    474 int
    475 linux_sys_sigreturn(l, v, retval)
    476 	struct lwp *l;
    477 	void *v;
    478 	register_t *retval;
    479 {
    480 	struct linux_sys_sigreturn_args /* {
    481 		syscallarg(struct linux_sigcontext *) scp;
    482 	} */ *uap = v;
    483 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    484 	int error;
    485 
    486 	/*
    487 	 * The trampoline code hands us the context.
    488 	 * It is unsafe to keep track of it ourselves, in the event that a
    489 	 * program jumps out of a signal handler.
    490 	 */
    491 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    492 		return error;
    493 	return linux_restore_sigcontext(l, &context, retval);
    494 }
    495 
    496 static int
    497 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    498     register_t *retval)
    499 {
    500 	struct proc *p = l->l_proc;
    501 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    502 	struct trapframe *tf;
    503 	sigset_t mask;
    504 	ssize_t ss_gap;
    505 	/* Restore register context. */
    506 	tf = l->l_md.md_regs;
    507 
    508 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    509 #ifdef VM86
    510 	if (scp->sc_eflags & PSL_VM) {
    511 		void syscall_vm86 __P((struct trapframe *));
    512 
    513 		tf->tf_vm86_gs = scp->sc_gs;
    514 		tf->tf_vm86_fs = scp->sc_fs;
    515 		tf->tf_vm86_es = scp->sc_es;
    516 		tf->tf_vm86_ds = scp->sc_ds;
    517 		set_vflags(l, scp->sc_eflags);
    518 		p->p_md.md_syscall = syscall_vm86;
    519 	} else
    520 #endif
    521 	{
    522 		/*
    523 		 * Check for security violations.  If we're returning to
    524 		 * protected mode, the CPU will validate the segment registers
    525 		 * automatically and generate a trap on violations.  We handle
    526 		 * the trap, rather than doing all of the checking here.
    527 		 */
    528 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    529 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    530 			return EINVAL;
    531 
    532 		tf->tf_gs = scp->sc_gs;
    533 		tf->tf_fs = scp->sc_fs;
    534 		tf->tf_es = scp->sc_es;
    535 		tf->tf_ds = scp->sc_ds;
    536 #ifdef VM86
    537 		if (tf->tf_eflags & PSL_VM)
    538 			(*p->p_emul->e_syscall_intern)(p);
    539 #endif
    540 		tf->tf_eflags = scp->sc_eflags;
    541 	}
    542 	tf->tf_edi = scp->sc_edi;
    543 	tf->tf_esi = scp->sc_esi;
    544 	tf->tf_ebp = scp->sc_ebp;
    545 	tf->tf_ebx = scp->sc_ebx;
    546 	tf->tf_edx = scp->sc_edx;
    547 	tf->tf_ecx = scp->sc_ecx;
    548 	tf->tf_eax = scp->sc_eax;
    549 	tf->tf_eip = scp->sc_eip;
    550 	tf->tf_cs = scp->sc_cs;
    551 	tf->tf_esp = scp->sc_esp_at_signal;
    552 	tf->tf_ss = scp->sc_ss;
    553 
    554 	/* Restore signal stack. */
    555 	/*
    556 	 * Linux really does it this way; it doesn't have space in sigframe
    557 	 * to save the onstack flag.
    558 	 */
    559 	ss_gap = (ssize_t)
    560 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    561 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    562 		sas->ss_flags |= SS_ONSTACK;
    563 	else
    564 		sas->ss_flags &= ~SS_ONSTACK;
    565 
    566 	/* Restore signal mask. */
    567 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    568 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    569 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    570 	return EJUSTRETURN;
    571 }
    572 
    573 #ifdef USER_LDT
    574 
    575 int
    576 linux_read_ldt(l, uap, retval)
    577 	struct lwp *l;
    578 	struct linux_sys_modify_ldt_args /* {
    579 		syscallarg(int) func;
    580 		syscallarg(void *) ptr;
    581 		syscallarg(size_t) bytecount;
    582 	} */ *uap;
    583 	register_t *retval;
    584 {
    585 	struct proc *p = l->l_proc;
    586 	struct i386_get_ldt_args gl;
    587 	int error;
    588 	caddr_t sg;
    589 	char *parms;
    590 
    591 	DPRINTF(("linux_read_ldt!"));
    592 	sg = stackgap_init(p, 0);
    593 
    594 	gl.start = 0;
    595 	gl.desc = SCARG(uap, ptr);
    596 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    597 
    598 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    599 
    600 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    601 		return (error);
    602 
    603 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    604 		return (error);
    605 
    606 	*retval *= sizeof(union descriptor);
    607 	return (0);
    608 }
    609 
    610 struct linux_ldt_info {
    611 	u_int entry_number;
    612 	u_long base_addr;
    613 	u_int limit;
    614 	u_int seg_32bit:1;
    615 	u_int contents:2;
    616 	u_int read_exec_only:1;
    617 	u_int limit_in_pages:1;
    618 	u_int seg_not_present:1;
    619 	u_int useable:1;
    620 };
    621 
    622 int
    623 linux_write_ldt(l, uap, retval)
    624 	struct lwp *l;
    625 	struct linux_sys_modify_ldt_args /* {
    626 		syscallarg(int) func;
    627 		syscallarg(void *) ptr;
    628 		syscallarg(size_t) bytecount;
    629 	} */ *uap;
    630 	register_t *retval;
    631 {
    632 	struct proc *p = l->l_proc;
    633 	struct linux_ldt_info ldt_info;
    634 	struct segment_descriptor sd;
    635 	struct i386_set_ldt_args sl;
    636 	int error;
    637 	caddr_t sg;
    638 	char *parms;
    639 	int oldmode = (int)retval[0];
    640 
    641 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    642 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    643 		return (EINVAL);
    644 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    645 		return error;
    646 	if (ldt_info.entry_number >= 8192)
    647 		return (EINVAL);
    648 	if (ldt_info.contents == 3) {
    649 		if (oldmode)
    650 			return (EINVAL);
    651 		if (ldt_info.seg_not_present)
    652 			return (EINVAL);
    653 	}
    654 
    655 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    656 	    (oldmode || (ldt_info.contents == 0 &&
    657 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    658 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    659 	    ldt_info.useable == 0))) {
    660 		/* this means you should zero the ldt */
    661 		(void)memset(&sd, 0, sizeof(sd));
    662 	} else {
    663 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    664 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    665 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    666 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    667 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    668 		    (!ldt_info.read_exec_only << 1);
    669 		sd.sd_dpl = SEL_UPL;
    670 		sd.sd_p = !ldt_info.seg_not_present;
    671 		sd.sd_def32 = ldt_info.seg_32bit;
    672 		sd.sd_gran = ldt_info.limit_in_pages;
    673 		if (!oldmode)
    674 			sd.sd_xx = ldt_info.useable;
    675 		else
    676 			sd.sd_xx = 0;
    677 	}
    678 	sg = stackgap_init(p, 0);
    679 	sl.start = ldt_info.entry_number;
    680 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    681 	sl.num = 1;
    682 
    683 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    684 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    685 
    686 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    687 
    688 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    689 		return (error);
    690 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    691 		return (error);
    692 
    693 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    694 		return (error);
    695 
    696 	*retval = 0;
    697 	return (0);
    698 }
    699 
    700 #endif /* USER_LDT */
    701 
    702 int
    703 linux_sys_modify_ldt(struct lwp *l, void *v,
    704     register_t *retval)
    705 {
    706 	struct linux_sys_modify_ldt_args /* {
    707 		syscallarg(int) func;
    708 		syscallarg(void *) ptr;
    709 		syscallarg(size_t) bytecount;
    710 	} */ *uap = v;
    711 
    712 	switch (SCARG(uap, func)) {
    713 #ifdef USER_LDT
    714 	case 0:
    715 		return linux_read_ldt(l, uap, retval);
    716 	case 1:
    717 		retval[0] = 1;
    718 		return linux_write_ldt(l, uap, retval);
    719 	case 2:
    720 #ifdef notyet
    721 		return (linux_read_default_ldt(l, uap, retval);
    722 #else
    723 		return (ENOSYS);
    724 #endif
    725 	case 0x11:
    726 		retval[0] = 0;
    727 		return linux_write_ldt(l, uap, retval);
    728 #endif /* USER_LDT */
    729 
    730 	default:
    731 		return (ENOSYS);
    732 	}
    733 }
    734 
    735 /*
    736  * XXX Pathetic hack to make svgalib work. This will fake the major
    737  * device number of an opened VT so that svgalib likes it. grmbl.
    738  * Should probably do it 'wrong the right way' and use a mapping
    739  * array for all major device numbers, and map linux_mknod too.
    740  */
    741 dev_t
    742 linux_fakedev(dev, raw)
    743 	dev_t dev;
    744 	int raw;
    745 {
    746 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    747 	const struct cdevsw *cd = cdevsw_lookup(dev);
    748 
    749 	if (raw) {
    750 #if (NWSDISPLAY > 0)
    751 		extern const struct cdevsw wsdisplay_cdevsw;
    752 		if (cd == &wsdisplay_cdevsw)
    753 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    754 #endif
    755 	}
    756 
    757 	if (cd == &ptc_cdevsw)
    758 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    759 	if (cd == &pts_cdevsw)
    760 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    761 
    762 	return dev;
    763 }
    764 
    765 #if (NWSDISPLAY > 0)
    766 /*
    767  * That's not complete, but enough to get an X server running.
    768  */
    769 #define NR_KEYS 128
    770 static const u_short plain_map[NR_KEYS] = {
    771 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    772 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    773 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    774 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    775 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    776 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    777 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    778 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    779 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    780 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    781 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    782 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    783 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    784 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    785 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    786 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    787 }, shift_map[NR_KEYS] = {
    788 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    789 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    790 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    791 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    792 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    793 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    794 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    795 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    796 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    797 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    798 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    799 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    800 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    801 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    802 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    803 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    804 }, altgr_map[NR_KEYS] = {
    805 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    806 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    807 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    808 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    809 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    810 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    811 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    812 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    813 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    814 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    815 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    816 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    817 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    818 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    819 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    820 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    821 }, ctrl_map[NR_KEYS] = {
    822 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    823 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    824 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    825 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    826 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    827 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    828 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    829 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    830 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    831 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    832 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    833 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    834 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    835 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    836 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    837 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    838 };
    839 
    840 const u_short * const linux_keytabs[] = {
    841 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    842 };
    843 #endif
    844 
    845 static struct biosdisk_info *
    846 fd2biosinfo(struct proc *p, struct file *fp)
    847 {
    848 	struct vnode *vp;
    849 	const char *blkname;
    850 	char diskname[16];
    851 	int i;
    852 	struct nativedisk_info *nip;
    853 	struct disklist *dl = x86_alldisks;
    854 
    855 	if (fp->f_type != DTYPE_VNODE)
    856 		return NULL;
    857 	vp = (struct vnode *)fp->f_data;
    858 
    859 	if (vp->v_type != VBLK)
    860 		return NULL;
    861 
    862 	blkname = devsw_blk2name(major(vp->v_rdev));
    863 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    864 	    DISKUNIT(vp->v_rdev));
    865 
    866 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    867 		nip = &dl->dl_nativedisks[i];
    868 		if (strcmp(diskname, nip->ni_devname))
    869 			continue;
    870 		if (nip->ni_nmatches != 0)
    871 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    872 	}
    873 
    874 	return NULL;
    875 }
    876 
    877 
    878 /*
    879  * We come here in a last attempt to satisfy a Linux ioctl() call
    880  */
    881 int
    882 linux_machdepioctl(l, v, retval)
    883 	struct lwp *l;
    884 	void *v;
    885 	register_t *retval;
    886 {
    887 	struct linux_sys_ioctl_args /* {
    888 		syscallarg(int) fd;
    889 		syscallarg(u_long) com;
    890 		syscallarg(caddr_t) data;
    891 	} */ *uap = v;
    892 	struct sys_ioctl_args bia;
    893 	u_long com;
    894 	int error, error1;
    895 #if (NWSDISPLAY > 0)
    896 	struct vt_mode lvt;
    897 	caddr_t bvtp, sg;
    898 	struct kbentry kbe;
    899 #endif
    900 	struct linux_hd_geometry hdg;
    901 	struct linux_hd_big_geometry hdg_big;
    902 	struct biosdisk_info *bip;
    903 	struct filedesc *fdp;
    904 	struct file *fp;
    905 	int fd;
    906 	struct disklabel label, *labp;
    907 	struct partinfo partp;
    908 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    909 	u_long start, biostotal, realtotal;
    910 	u_char heads, sectors;
    911 	u_int cylinders;
    912 	struct ioctl_pt pt;
    913 	struct proc *p = l->l_proc;
    914 
    915 	fd = SCARG(uap, fd);
    916 	SCARG(&bia, fd) = fd;
    917 	SCARG(&bia, data) = SCARG(uap, data);
    918 	com = SCARG(uap, com);
    919 
    920 	fdp = p->p_fd;
    921 
    922 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    923 		return (EBADF);
    924 
    925 	FILE_USE(fp);
    926 
    927 	switch (com) {
    928 #if (NWSDISPLAY > 0)
    929 	case LINUX_KDGKBMODE:
    930 		com = KDGKBMODE;
    931 		break;
    932 	case LINUX_KDSKBMODE:
    933 		com = KDSKBMODE;
    934 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    935 			SCARG(&bia, data) = (caddr_t)K_RAW;
    936 		break;
    937 	case LINUX_KIOCSOUND:
    938 		SCARG(&bia, data) =
    939 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    940 		/* fall through */
    941 	case LINUX_KDMKTONE:
    942 		com = KDMKTONE;
    943 		break;
    944 	case LINUX_KDSETMODE:
    945 		com = KDSETMODE;
    946 		break;
    947 	case LINUX_KDGETMODE:
    948 		/* KD_* values are equal to the wscons numbers */
    949 		com = WSDISPLAYIO_GMODE;
    950 		break;
    951 	case LINUX_KDENABIO:
    952 		com = KDENABIO;
    953 		break;
    954 	case LINUX_KDDISABIO:
    955 		com = KDDISABIO;
    956 		break;
    957 	case LINUX_KDGETLED:
    958 		com = KDGETLED;
    959 		break;
    960 	case LINUX_KDSETLED:
    961 		com = KDSETLED;
    962 		break;
    963 	case LINUX_VT_OPENQRY:
    964 		com = VT_OPENQRY;
    965 		break;
    966 	case LINUX_VT_GETMODE:
    967 		SCARG(&bia, com) = VT_GETMODE;
    968 		/* XXX NJWLWP */
    969 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    970 			goto out;
    971 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    972 		    sizeof (struct vt_mode))))
    973 			goto out;
    974 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    975 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    976 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    977 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    978 		    sizeof (struct vt_mode));
    979 		goto out;
    980 	case LINUX_VT_SETMODE:
    981 		com = VT_SETMODE;
    982 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    983 		    sizeof (struct vt_mode))))
    984 			goto out;
    985 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    986 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    987 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    988 		sg = stackgap_init(p, 0);
    989 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    990 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    991 			goto out;
    992 		SCARG(&bia, data) = bvtp;
    993 		break;
    994 	case LINUX_VT_DISALLOCATE:
    995 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    996 		error = 0;
    997 		goto out;
    998 	case LINUX_VT_RELDISP:
    999 		com = VT_RELDISP;
   1000 		break;
   1001 	case LINUX_VT_ACTIVATE:
   1002 		com = VT_ACTIVATE;
   1003 		break;
   1004 	case LINUX_VT_WAITACTIVE:
   1005 		com = VT_WAITACTIVE;
   1006 		break;
   1007 	case LINUX_VT_GETSTATE:
   1008 		com = VT_GETSTATE;
   1009 		break;
   1010 	case LINUX_KDGKBTYPE:
   1011 	    {
   1012 		static const u_int8_t kb101 = KB_101;
   1013 
   1014 		/* This is what Linux does. */
   1015 		error = copyout(&kb101, SCARG(uap, data), 1);
   1016 		goto out;
   1017 	    }
   1018 	case LINUX_KDGKBENT:
   1019 		/*
   1020 		 * The Linux KDGKBENT ioctl is different from the
   1021 		 * SYSV original. So we handle it in machdep code.
   1022 		 * XXX We should use keyboard mapping information
   1023 		 * from wsdisplay, but this would be expensive.
   1024 		 */
   1025 		if ((error = copyin(SCARG(uap, data), &kbe,
   1026 				    sizeof(struct kbentry))))
   1027 			goto out;
   1028 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1029 		    || kbe.kb_index >= NR_KEYS) {
   1030 			error = EINVAL;
   1031 			goto out;
   1032 		}
   1033 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1034 		error = copyout(&kbe, SCARG(uap, data),
   1035 				sizeof(struct kbentry));
   1036 		goto out;
   1037 #endif
   1038 	case LINUX_HDIO_GETGEO:
   1039 	case LINUX_HDIO_GETGEO_BIG:
   1040 		/*
   1041 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1042 		 * if possible (extending its # of cylinders if it's beyond
   1043 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1044 		 * the real geometry) if not found, by returning an
   1045 		 * error. See common/linux_hdio.c
   1046 		 */
   1047 		bip = fd2biosinfo(p, fp);
   1048 		ioctlf = fp->f_ops->fo_ioctl;
   1049 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
   1050 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
   1051 		if (error != 0 && error1 != 0) {
   1052 			error = error1;
   1053 			goto out;
   1054 		}
   1055 		labp = error != 0 ? &label : partp.disklab;
   1056 		start = error1 != 0 ? partp.part->p_offset : 0;
   1057 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1058 		    && bip->bi_cyl != 0) {
   1059 			heads = bip->bi_head;
   1060 			sectors = bip->bi_sec;
   1061 			cylinders = bip->bi_cyl;
   1062 			biostotal = heads * sectors * cylinders;
   1063 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1064 			    labp->d_ncylinders;
   1065 			if (realtotal > biostotal)
   1066 				cylinders = realtotal / (heads * sectors);
   1067 		} else {
   1068 			heads = labp->d_ntracks;
   1069 			cylinders = labp->d_ncylinders;
   1070 			sectors = labp->d_nsectors;
   1071 		}
   1072 		if (com == LINUX_HDIO_GETGEO) {
   1073 			hdg.start = start;
   1074 			hdg.heads = heads;
   1075 			hdg.cylinders = cylinders;
   1076 			hdg.sectors = sectors;
   1077 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1078 			goto out;
   1079 		} else {
   1080 			hdg_big.start = start;
   1081 			hdg_big.heads = heads;
   1082 			hdg_big.cylinders = cylinders;
   1083 			hdg_big.sectors = sectors;
   1084 			error = copyout(&hdg_big, SCARG(uap, data),
   1085 			    sizeof hdg_big);
   1086 			goto out;
   1087 		}
   1088 
   1089 	default:
   1090 		/*
   1091 		 * Unknown to us. If it's on a device, just pass it through
   1092 		 * using PTIOCLINUX, the device itself might be able to
   1093 		 * make some sense of it.
   1094 		 * XXX hack: if the function returns EJUSTRETURN,
   1095 		 * it has stuffed a sysctl return value in pt.data.
   1096 		 */
   1097 		ioctlf = fp->f_ops->fo_ioctl;
   1098 		pt.com = SCARG(uap, com);
   1099 		pt.data = SCARG(uap, data);
   1100 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
   1101 		if (error == EJUSTRETURN) {
   1102 			retval[0] = (register_t)pt.data;
   1103 			error = 0;
   1104 		}
   1105 
   1106 		if (error == ENOTTY) {
   1107 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1108 			    com));
   1109 		}
   1110 		goto out;
   1111 	}
   1112 	SCARG(&bia, com) = com;
   1113 	/* XXX NJWLWP */
   1114 	error = sys_ioctl(curlwp, &bia, retval);
   1115 out:
   1116 	FILE_UNUSE(fp ,l);
   1117 	return error;
   1118 }
   1119 
   1120 /*
   1121  * Set I/O permissions for a process. Just set the maximum level
   1122  * right away (ignoring the argument), otherwise we would have
   1123  * to rely on I/O permission maps, which are not implemented.
   1124  */
   1125 int
   1126 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1127 {
   1128 #if 0
   1129 	struct linux_sys_iopl_args /* {
   1130 		syscallarg(int) level;
   1131 	} */ *uap = v;
   1132 #endif
   1133 	struct trapframe *fp = l->l_md.md_regs;
   1134 
   1135 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1136 	    NULL, NULL, NULL, NULL) != 0)
   1137 		return EPERM;
   1138 	fp->tf_eflags |= PSL_IOPL;
   1139 	*retval = 0;
   1140 	return 0;
   1141 }
   1142 
   1143 /*
   1144  * See above. If a root process tries to set access to an I/O port,
   1145  * just let it have the whole range.
   1146  */
   1147 int
   1148 linux_sys_ioperm(l, v, retval)
   1149 	struct lwp *l;
   1150 	void *v;
   1151 	register_t *retval;
   1152 {
   1153 	struct linux_sys_ioperm_args /* {
   1154 		syscallarg(unsigned int) lo;
   1155 		syscallarg(unsigned int) hi;
   1156 		syscallarg(int) val;
   1157 	} */ *uap = v;
   1158 	struct trapframe *fp = l->l_md.md_regs;
   1159 
   1160 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1161 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1162 	    NULL, NULL) != 0)
   1163 		return EPERM;
   1164 	if (SCARG(uap, val))
   1165 		fp->tf_eflags |= PSL_IOPL;
   1166 	*retval = 0;
   1167 	return 0;
   1168 }
   1169 
   1170 int
   1171 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1172     void *arg)
   1173 {
   1174 	return 0;
   1175 }
   1176