Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.114.4.4
      1 /*	$NetBSD: linux_machdep.c,v 1.114.4.4 2007/01/18 11:32:04 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.114.4.4 2007/01/18 11:32:04 yamt Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <sys/wait.h>
     72 #include <sys/kauth.h>
     73 
     74 #include <miscfs/specfs/specdev.h>
     75 
     76 #include <compat/linux/common/linux_types.h>
     77 #include <compat/linux/common/linux_signal.h>
     78 #include <compat/linux/common/linux_util.h>
     79 #include <compat/linux/common/linux_ioctl.h>
     80 #include <compat/linux/common/linux_hdio.h>
     81 #include <compat/linux/common/linux_exec.h>
     82 #include <compat/linux/common/linux_machdep.h>
     83 #include <compat/linux/common/linux_errno.h>
     84 
     85 #include <compat/linux/linux_syscallargs.h>
     86 
     87 #include <machine/cpu.h>
     88 #include <machine/cpufunc.h>
     89 #include <machine/psl.h>
     90 #include <machine/reg.h>
     91 #include <machine/segments.h>
     92 #include <machine/specialreg.h>
     93 #include <machine/sysarch.h>
     94 #include <machine/vm86.h>
     95 #include <machine/vmparam.h>
     96 
     97 /*
     98  * To see whether wscons is configured (for virtual console ioctl calls).
     99  */
    100 #if defined(_KERNEL_OPT)
    101 #include "wsdisplay.h"
    102 #endif
    103 #if (NWSDISPLAY > 0)
    104 #include <dev/wscons/wsconsio.h>
    105 #include <dev/wscons/wsdisplay_usl_io.h>
    106 #if defined(_KERNEL_OPT)
    107 #include "opt_xserver.h"
    108 #endif
    109 #endif
    110 
    111 #ifdef USER_LDT
    112 #include <machine/cpu.h>
    113 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    114     register_t *));
    115 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    116     register_t *));
    117 #endif
    118 
    119 #ifdef DEBUG_LINUX
    120 #define DPRINTF(a) uprintf a
    121 #else
    122 #define DPRINTF(a)
    123 #endif
    124 
    125 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    126 extern struct disklist *x86_alldisks;
    127 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    128     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    129 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    130     const sigset_t *, struct linux_sigcontext *));
    131 static int linux_restore_sigcontext __P((struct lwp *,
    132     struct linux_sigcontext *, register_t *));
    133 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    134 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    135 
    136 extern char linux_sigcode[], linux_rt_sigcode[];
    137 /*
    138  * Deal with some i386-specific things in the Linux emulation code.
    139  */
    140 
    141 void
    142 linux_setregs(l, epp, stack)
    143 	struct lwp *l;
    144 	struct exec_package *epp;
    145 	u_long stack;
    146 {
    147 	struct pcb *pcb = &l->l_addr->u_pcb;
    148 	struct trapframe *tf;
    149 
    150 #if NNPX > 0
    151 	/* If we were using the FPU, forget about it. */
    152 	if (npxproc == l)
    153 		npxdrop();
    154 #endif
    155 
    156 #ifdef USER_LDT
    157 	pmap_ldt_cleanup(l);
    158 #endif
    159 
    160 	l->l_md.md_flags &= ~MDL_USEDFPU;
    161 
    162 	if (i386_use_fxsave) {
    163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    164 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    165 	} else
    166 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    167 
    168 	tf = l->l_md.md_regs;
    169 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    172 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    173 	tf->tf_edi = 0;
    174 	tf->tf_esi = 0;
    175 	tf->tf_ebp = 0;
    176 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    177 	tf->tf_edx = 0;
    178 	tf->tf_ecx = 0;
    179 	tf->tf_eax = 0;
    180 	tf->tf_eip = epp->ep_entry;
    181 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    182 	tf->tf_eflags = PSL_USERSET;
    183 	tf->tf_esp = stack;
    184 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    185 }
    186 
    187 /*
    188  * Send an interrupt to process.
    189  *
    190  * Stack is set up to allow sigcode stored
    191  * in u. to call routine, followed by kcall
    192  * to sigreturn routine below.  After sigreturn
    193  * resets the signal mask, the stack, and the
    194  * frame pointer, it returns to the user
    195  * specified pc, psl.
    196  */
    197 
    198 void
    199 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    200 {
    201 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    202 		linux_rt_sendsig(ksi, mask);
    203 	else
    204 		linux_old_sendsig(ksi, mask);
    205 }
    206 
    207 
    208 static void
    209 linux_save_ucontext(l, tf, mask, sas, uc)
    210 	struct lwp *l;
    211 	struct trapframe *tf;
    212 	const sigset_t *mask;
    213 	struct sigaltstack *sas;
    214 	struct linux_ucontext *uc;
    215 {
    216 	uc->uc_flags = 0;
    217 	uc->uc_link = NULL;
    218 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    219 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    220 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    221 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    222 }
    223 
    224 static void
    225 linux_save_sigcontext(l, tf, mask, sc)
    226 	struct lwp *l;
    227 	struct trapframe *tf;
    228 	const sigset_t *mask;
    229 	struct linux_sigcontext *sc;
    230 {
    231 	/* Save register context. */
    232 #ifdef VM86
    233 	if (tf->tf_eflags & PSL_VM) {
    234 		sc->sc_gs = tf->tf_vm86_gs;
    235 		sc->sc_fs = tf->tf_vm86_fs;
    236 		sc->sc_es = tf->tf_vm86_es;
    237 		sc->sc_ds = tf->tf_vm86_ds;
    238 		sc->sc_eflags = get_vflags(l);
    239 	} else
    240 #endif
    241 	{
    242 		sc->sc_gs = tf->tf_gs;
    243 		sc->sc_fs = tf->tf_fs;
    244 		sc->sc_es = tf->tf_es;
    245 		sc->sc_ds = tf->tf_ds;
    246 		sc->sc_eflags = tf->tf_eflags;
    247 	}
    248 	sc->sc_edi = tf->tf_edi;
    249 	sc->sc_esi = tf->tf_esi;
    250 	sc->sc_esp = tf->tf_esp;
    251 	sc->sc_ebp = tf->tf_ebp;
    252 	sc->sc_ebx = tf->tf_ebx;
    253 	sc->sc_edx = tf->tf_edx;
    254 	sc->sc_ecx = tf->tf_ecx;
    255 	sc->sc_eax = tf->tf_eax;
    256 	sc->sc_eip = tf->tf_eip;
    257 	sc->sc_cs = tf->tf_cs;
    258 	sc->sc_esp_at_signal = tf->tf_esp;
    259 	sc->sc_ss = tf->tf_ss;
    260 	sc->sc_err = tf->tf_err;
    261 	sc->sc_trapno = tf->tf_trapno;
    262 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    263 	sc->sc_387 = NULL;
    264 
    265 	/* Save signal stack. */
    266 	/* Linux doesn't save the onstack flag in sigframe */
    267 
    268 	/* Save signal mask. */
    269 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    270 }
    271 
    272 static void
    273 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    274 {
    275 	struct lwp *l = curlwp;
    276 	struct proc *p = l->l_proc;
    277 	struct trapframe *tf;
    278 	struct linux_rt_sigframe *fp, frame;
    279 	int onstack, error;
    280 	linux_siginfo_t *lsi;
    281 	int sig = ksi->ksi_signo;
    282 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    283 	struct sigaltstack *sas = l->l_sigstk;
    284 
    285 	tf = l->l_md.md_regs;
    286 	/* Do we need to jump onto the signal stack? */
    287 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    288 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    289 
    290 
    291 	/* Allocate space for the signal handler context. */
    292 	if (onstack)
    293 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    294 		    sas->ss_size);
    295 	else
    296 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    297 	fp--;
    298 
    299 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    300 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    301 
    302 	/* Build stack frame for signal trampoline. */
    303 	frame.sf_handler = catcher;
    304 	frame.sf_sig = native_to_linux_signo[sig];
    305 	frame.sf_sip = &fp->sf_si;
    306 	frame.sf_ucp = &fp->sf_uc;
    307 
    308 	/*
    309 	 * XXX: the following code assumes that the constants for
    310 	 * siginfo are the same between linux and NetBSD.
    311 	 */
    312 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    313 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    314 	lsi->lsi_code = ksi->ksi_code;
    315 	switch (lsi->lsi_signo = frame.sf_sig) {
    316 	case LINUX_SIGILL:
    317 	case LINUX_SIGFPE:
    318 	case LINUX_SIGSEGV:
    319 	case LINUX_SIGBUS:
    320 	case LINUX_SIGTRAP:
    321 		lsi->lsi_addr = ksi->ksi_addr;
    322 		break;
    323 	case LINUX_SIGCHLD:
    324 		lsi->lsi_uid = ksi->ksi_uid;
    325 		lsi->lsi_pid = ksi->ksi_pid;
    326 		lsi->lsi_utime = ksi->ksi_utime;
    327 		lsi->lsi_stime = ksi->ksi_stime;
    328 
    329 		/* We use the same codes */
    330 		lsi->lsi_code = ksi->ksi_code;
    331 		/* XXX is that right? */
    332 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    333 		break;
    334 	case LINUX_SIGIO:
    335 		lsi->lsi_band = ksi->ksi_band;
    336 		lsi->lsi_fd = ksi->ksi_fd;
    337 		break;
    338 	default:
    339 		lsi->lsi_uid = ksi->ksi_uid;
    340 		lsi->lsi_pid = ksi->ksi_pid;
    341 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    342 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    343 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    344 		break;
    345 	}
    346 
    347 	/* Save register context. */
    348 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    349 
    350 	mutex_exit(&p->p_smutex);
    351 	error = copyout(&frame, fp, sizeof(frame));
    352 	mutex_enter(&p->p_smutex);
    353 
    354 	if (error != 0) {
    355 		/*
    356 		 * Process has trashed its stack; give it an illegal
    357 		 * instruction to halt it in its tracks.
    358 		 */
    359 		sigexit(l, SIGILL);
    360 		/* NOTREACHED */
    361 	}
    362 
    363 	/*
    364 	 * Build context to run handler in.
    365 	 */
    366 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    367 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    368 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    369 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    370 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    371 	    (linux_rt_sigcode - linux_sigcode);
    372 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    373 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    374 	tf->tf_esp = (int)fp;
    375 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    376 
    377 	/* Remember that we're now on the signal stack. */
    378 	if (onstack)
    379 		sas->ss_flags |= SS_ONSTACK;
    380 }
    381 
    382 static void
    383 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    384 {
    385 	struct lwp *l = curlwp;
    386 	struct proc *p = l->l_proc;
    387 	struct trapframe *tf;
    388 	struct linux_sigframe *fp, frame;
    389 	int onstack;
    390 	int sig = ksi->ksi_signo;
    391 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    392 	struct sigaltstack *sas = l->l_sigstk;
    393 
    394 	tf = l->l_md.md_regs;
    395 
    396 	/* Do we need to jump onto the signal stack? */
    397 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    398 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    399 
    400 	/* Allocate space for the signal handler context. */
    401 	if (onstack)
    402 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    403 		    sas->ss_size);
    404 	else
    405 		fp = (struct linux_sigframe *)tf->tf_esp;
    406 	fp--;
    407 
    408 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    409 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    410 
    411 	/* Build stack frame for signal trampoline. */
    412 	frame.sf_handler = catcher;
    413 	frame.sf_sig = native_to_linux_signo[sig];
    414 
    415 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    416 
    417 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    418 		/*
    419 		 * Process has trashed its stack; give it an illegal
    420 		 * instruction to halt it in its tracks.
    421 		 */
    422 		sigexit(l, SIGILL);
    423 		/* NOTREACHED */
    424 	}
    425 
    426 	/*
    427 	 * Build context to run handler in.
    428 	 */
    429 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    430 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    431 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    432 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    433 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    434 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    435 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    436 	tf->tf_esp = (int)fp;
    437 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    438 
    439 	/* Remember that we're now on the signal stack. */
    440 	if (onstack)
    441 		sas->ss_flags |= SS_ONSTACK;
    442 }
    443 
    444 /*
    445  * System call to cleanup state after a signal
    446  * has been taken.  Reset signal mask and
    447  * stack state from context left by sendsig (above).
    448  * Return to previous pc and psl as specified by
    449  * context left by sendsig. Check carefully to
    450  * make sure that the user has not modified the
    451  * psl to gain improper privileges or to cause
    452  * a machine fault.
    453  */
    454 int
    455 linux_sys_rt_sigreturn(l, v, retval)
    456 	struct lwp *l;
    457 	void *v;
    458 	register_t *retval;
    459 {
    460 	struct linux_sys_rt_sigreturn_args /* {
    461 		syscallarg(struct linux_ucontext *) ucp;
    462 	} */ *uap = v;
    463 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    464 	int error;
    465 
    466 	/*
    467 	 * The trampoline code hands us the context.
    468 	 * It is unsafe to keep track of it ourselves, in the event that a
    469 	 * program jumps out of a signal handler.
    470 	 */
    471 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    472 		return error;
    473 
    474 	/* XXX XAX we can do better here by using more of the ucontext */
    475 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    476 }
    477 
    478 int
    479 linux_sys_sigreturn(l, v, retval)
    480 	struct lwp *l;
    481 	void *v;
    482 	register_t *retval;
    483 {
    484 	struct linux_sys_sigreturn_args /* {
    485 		syscallarg(struct linux_sigcontext *) scp;
    486 	} */ *uap = v;
    487 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    488 	int error;
    489 
    490 	/*
    491 	 * The trampoline code hands us the context.
    492 	 * It is unsafe to keep track of it ourselves, in the event that a
    493 	 * program jumps out of a signal handler.
    494 	 */
    495 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    496 		return error;
    497 	return linux_restore_sigcontext(l, &context, retval);
    498 }
    499 
    500 static int
    501 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    502     register_t *retval)
    503 {
    504 #if defined(VM86)
    505 	struct proc *p = l->l_proc;
    506 #endif /* defined(VM86) */
    507 	struct sigaltstack *sas = l->l_sigstk;
    508 	struct trapframe *tf;
    509 	sigset_t mask;
    510 	ssize_t ss_gap;
    511 	/* Restore register context. */
    512 	tf = l->l_md.md_regs;
    513 
    514 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    515 #ifdef VM86
    516 	if (scp->sc_eflags & PSL_VM) {
    517 		void syscall_vm86 __P((struct trapframe *));
    518 
    519 		tf->tf_vm86_gs = scp->sc_gs;
    520 		tf->tf_vm86_fs = scp->sc_fs;
    521 		tf->tf_vm86_es = scp->sc_es;
    522 		tf->tf_vm86_ds = scp->sc_ds;
    523 		set_vflags(l, scp->sc_eflags);
    524 		p->p_md.md_syscall = syscall_vm86;
    525 	} else
    526 #endif
    527 	{
    528 		/*
    529 		 * Check for security violations.  If we're returning to
    530 		 * protected mode, the CPU will validate the segment registers
    531 		 * automatically and generate a trap on violations.  We handle
    532 		 * the trap, rather than doing all of the checking here.
    533 		 */
    534 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    535 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    536 			return EINVAL;
    537 
    538 		tf->tf_gs = scp->sc_gs;
    539 		tf->tf_fs = scp->sc_fs;
    540 		tf->tf_es = scp->sc_es;
    541 		tf->tf_ds = scp->sc_ds;
    542 #ifdef VM86
    543 		if (tf->tf_eflags & PSL_VM)
    544 			(*p->p_emul->e_syscall_intern)(p);
    545 #endif
    546 		tf->tf_eflags = scp->sc_eflags;
    547 	}
    548 	tf->tf_edi = scp->sc_edi;
    549 	tf->tf_esi = scp->sc_esi;
    550 	tf->tf_ebp = scp->sc_ebp;
    551 	tf->tf_ebx = scp->sc_ebx;
    552 	tf->tf_edx = scp->sc_edx;
    553 	tf->tf_ecx = scp->sc_ecx;
    554 	tf->tf_eax = scp->sc_eax;
    555 	tf->tf_eip = scp->sc_eip;
    556 	tf->tf_cs = scp->sc_cs;
    557 	tf->tf_esp = scp->sc_esp_at_signal;
    558 	tf->tf_ss = scp->sc_ss;
    559 
    560 	/* Restore signal stack. */
    561 	/*
    562 	 * Linux really does it this way; it doesn't have space in sigframe
    563 	 * to save the onstack flag.
    564 	 */
    565 	ss_gap = (ssize_t)
    566 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    567 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    568 		sas->ss_flags |= SS_ONSTACK;
    569 	else
    570 		sas->ss_flags &= ~SS_ONSTACK;
    571 
    572 	/* Restore signal mask. */
    573 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    574 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    575 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    576 	return EJUSTRETURN;
    577 }
    578 
    579 #ifdef USER_LDT
    580 
    581 int
    582 linux_read_ldt(l, uap, retval)
    583 	struct lwp *l;
    584 	struct linux_sys_modify_ldt_args /* {
    585 		syscallarg(int) func;
    586 		syscallarg(void *) ptr;
    587 		syscallarg(size_t) bytecount;
    588 	} */ *uap;
    589 	register_t *retval;
    590 {
    591 	struct proc *p = l->l_proc;
    592 	struct i386_get_ldt_args gl;
    593 	int error;
    594 	caddr_t sg;
    595 	char *parms;
    596 
    597 	DPRINTF(("linux_read_ldt!"));
    598 	sg = stackgap_init(p, 0);
    599 
    600 	gl.start = 0;
    601 	gl.desc = SCARG(uap, ptr);
    602 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    603 
    604 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    605 
    606 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    607 		return (error);
    608 
    609 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    610 		return (error);
    611 
    612 	*retval *= sizeof(union descriptor);
    613 	return (0);
    614 }
    615 
    616 struct linux_ldt_info {
    617 	u_int entry_number;
    618 	u_long base_addr;
    619 	u_int limit;
    620 	u_int seg_32bit:1;
    621 	u_int contents:2;
    622 	u_int read_exec_only:1;
    623 	u_int limit_in_pages:1;
    624 	u_int seg_not_present:1;
    625 	u_int useable:1;
    626 };
    627 
    628 int
    629 linux_write_ldt(l, uap, retval)
    630 	struct lwp *l;
    631 	struct linux_sys_modify_ldt_args /* {
    632 		syscallarg(int) func;
    633 		syscallarg(void *) ptr;
    634 		syscallarg(size_t) bytecount;
    635 	} */ *uap;
    636 	register_t *retval;
    637 {
    638 	struct proc *p = l->l_proc;
    639 	struct linux_ldt_info ldt_info;
    640 	struct segment_descriptor sd;
    641 	struct i386_set_ldt_args sl;
    642 	int error;
    643 	caddr_t sg;
    644 	char *parms;
    645 	int oldmode = (int)retval[0];
    646 
    647 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    648 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    649 		return (EINVAL);
    650 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    651 		return error;
    652 	if (ldt_info.entry_number >= 8192)
    653 		return (EINVAL);
    654 	if (ldt_info.contents == 3) {
    655 		if (oldmode)
    656 			return (EINVAL);
    657 		if (ldt_info.seg_not_present)
    658 			return (EINVAL);
    659 	}
    660 
    661 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    662 	    (oldmode || (ldt_info.contents == 0 &&
    663 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    664 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    665 	    ldt_info.useable == 0))) {
    666 		/* this means you should zero the ldt */
    667 		(void)memset(&sd, 0, sizeof(sd));
    668 	} else {
    669 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    670 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    671 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    672 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    673 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    674 		    (!ldt_info.read_exec_only << 1);
    675 		sd.sd_dpl = SEL_UPL;
    676 		sd.sd_p = !ldt_info.seg_not_present;
    677 		sd.sd_def32 = ldt_info.seg_32bit;
    678 		sd.sd_gran = ldt_info.limit_in_pages;
    679 		if (!oldmode)
    680 			sd.sd_xx = ldt_info.useable;
    681 		else
    682 			sd.sd_xx = 0;
    683 	}
    684 	sg = stackgap_init(p, 0);
    685 	sl.start = ldt_info.entry_number;
    686 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    687 	sl.num = 1;
    688 
    689 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    690 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    691 
    692 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    693 
    694 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    695 		return (error);
    696 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    697 		return (error);
    698 
    699 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    700 		return (error);
    701 
    702 	*retval = 0;
    703 	return (0);
    704 }
    705 
    706 #endif /* USER_LDT */
    707 
    708 int
    709 linux_sys_modify_ldt(struct lwp *l, void *v,
    710     register_t *retval)
    711 {
    712 	struct linux_sys_modify_ldt_args /* {
    713 		syscallarg(int) func;
    714 		syscallarg(void *) ptr;
    715 		syscallarg(size_t) bytecount;
    716 	} */ *uap = v;
    717 
    718 	switch (SCARG(uap, func)) {
    719 #ifdef USER_LDT
    720 	case 0:
    721 		return linux_read_ldt(l, uap, retval);
    722 	case 1:
    723 		retval[0] = 1;
    724 		return linux_write_ldt(l, uap, retval);
    725 	case 2:
    726 #ifdef notyet
    727 		return (linux_read_default_ldt(l, uap, retval);
    728 #else
    729 		return (ENOSYS);
    730 #endif
    731 	case 0x11:
    732 		retval[0] = 0;
    733 		return linux_write_ldt(l, uap, retval);
    734 #endif /* USER_LDT */
    735 
    736 	default:
    737 		return (ENOSYS);
    738 	}
    739 }
    740 
    741 /*
    742  * XXX Pathetic hack to make svgalib work. This will fake the major
    743  * device number of an opened VT so that svgalib likes it. grmbl.
    744  * Should probably do it 'wrong the right way' and use a mapping
    745  * array for all major device numbers, and map linux_mknod too.
    746  */
    747 dev_t
    748 linux_fakedev(dev, raw)
    749 	dev_t dev;
    750 	int raw;
    751 {
    752 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    753 	const struct cdevsw *cd = cdevsw_lookup(dev);
    754 
    755 	if (raw) {
    756 #if (NWSDISPLAY > 0)
    757 		extern const struct cdevsw wsdisplay_cdevsw;
    758 		if (cd == &wsdisplay_cdevsw)
    759 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    760 #endif
    761 	}
    762 
    763 	if (cd == &ptc_cdevsw)
    764 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    765 	if (cd == &pts_cdevsw)
    766 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    767 
    768 	return dev;
    769 }
    770 
    771 #if (NWSDISPLAY > 0)
    772 /*
    773  * That's not complete, but enough to get an X server running.
    774  */
    775 #define NR_KEYS 128
    776 static const u_short plain_map[NR_KEYS] = {
    777 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    778 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    779 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    780 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    781 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    782 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    783 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    784 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    785 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    786 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    787 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    788 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    789 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    790 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    791 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    792 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    793 }, shift_map[NR_KEYS] = {
    794 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    795 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    796 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    797 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    798 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    799 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    800 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    801 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    802 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    803 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    804 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    805 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    806 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    807 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    808 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    809 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    810 }, altgr_map[NR_KEYS] = {
    811 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    812 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    813 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    814 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    815 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    816 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    817 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    818 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    819 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    820 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    821 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    822 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    823 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    824 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    825 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    826 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    827 }, ctrl_map[NR_KEYS] = {
    828 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    829 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    830 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    831 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    832 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    833 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    834 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    835 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    836 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    837 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    838 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    839 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    840 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    841 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    842 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    843 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    844 };
    845 
    846 const u_short * const linux_keytabs[] = {
    847 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    848 };
    849 #endif
    850 
    851 static struct biosdisk_info *
    852 fd2biosinfo(struct proc *p, struct file *fp)
    853 {
    854 	struct vnode *vp;
    855 	const char *blkname;
    856 	char diskname[16];
    857 	int i;
    858 	struct nativedisk_info *nip;
    859 	struct disklist *dl = x86_alldisks;
    860 
    861 	if (fp->f_type != DTYPE_VNODE)
    862 		return NULL;
    863 	vp = (struct vnode *)fp->f_data;
    864 
    865 	if (vp->v_type != VBLK)
    866 		return NULL;
    867 
    868 	blkname = devsw_blk2name(major(vp->v_rdev));
    869 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    870 	    DISKUNIT(vp->v_rdev));
    871 
    872 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    873 		nip = &dl->dl_nativedisks[i];
    874 		if (strcmp(diskname, nip->ni_devname))
    875 			continue;
    876 		if (nip->ni_nmatches != 0)
    877 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    878 	}
    879 
    880 	return NULL;
    881 }
    882 
    883 
    884 /*
    885  * We come here in a last attempt to satisfy a Linux ioctl() call
    886  */
    887 int
    888 linux_machdepioctl(l, v, retval)
    889 	struct lwp *l;
    890 	void *v;
    891 	register_t *retval;
    892 {
    893 	struct linux_sys_ioctl_args /* {
    894 		syscallarg(int) fd;
    895 		syscallarg(u_long) com;
    896 		syscallarg(caddr_t) data;
    897 	} */ *uap = v;
    898 	struct sys_ioctl_args bia;
    899 	u_long com;
    900 	int error, error1;
    901 #if (NWSDISPLAY > 0)
    902 	struct vt_mode lvt;
    903 	caddr_t bvtp, sg;
    904 	struct kbentry kbe;
    905 #endif
    906 	struct linux_hd_geometry hdg;
    907 	struct linux_hd_big_geometry hdg_big;
    908 	struct biosdisk_info *bip;
    909 	struct filedesc *fdp;
    910 	struct file *fp;
    911 	int fd;
    912 	struct disklabel label, *labp;
    913 	struct partinfo partp;
    914 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    915 	u_long start, biostotal, realtotal;
    916 	u_char heads, sectors;
    917 	u_int cylinders;
    918 	struct ioctl_pt pt;
    919 	struct proc *p = l->l_proc;
    920 
    921 	fd = SCARG(uap, fd);
    922 	SCARG(&bia, fd) = fd;
    923 	SCARG(&bia, data) = SCARG(uap, data);
    924 	com = SCARG(uap, com);
    925 
    926 	fdp = p->p_fd;
    927 
    928 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    929 		return (EBADF);
    930 
    931 	FILE_USE(fp);
    932 
    933 	switch (com) {
    934 #if (NWSDISPLAY > 0)
    935 	case LINUX_KDGKBMODE:
    936 		com = KDGKBMODE;
    937 		break;
    938 	case LINUX_KDSKBMODE:
    939 		com = KDSKBMODE;
    940 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    941 			SCARG(&bia, data) = (caddr_t)K_RAW;
    942 		break;
    943 	case LINUX_KIOCSOUND:
    944 		SCARG(&bia, data) =
    945 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    946 		/* fall through */
    947 	case LINUX_KDMKTONE:
    948 		com = KDMKTONE;
    949 		break;
    950 	case LINUX_KDSETMODE:
    951 		com = KDSETMODE;
    952 		break;
    953 	case LINUX_KDGETMODE:
    954 		/* KD_* values are equal to the wscons numbers */
    955 		com = WSDISPLAYIO_GMODE;
    956 		break;
    957 	case LINUX_KDENABIO:
    958 		com = KDENABIO;
    959 		break;
    960 	case LINUX_KDDISABIO:
    961 		com = KDDISABIO;
    962 		break;
    963 	case LINUX_KDGETLED:
    964 		com = KDGETLED;
    965 		break;
    966 	case LINUX_KDSETLED:
    967 		com = KDSETLED;
    968 		break;
    969 	case LINUX_VT_OPENQRY:
    970 		com = VT_OPENQRY;
    971 		break;
    972 	case LINUX_VT_GETMODE:
    973 		SCARG(&bia, com) = VT_GETMODE;
    974 		/* XXX NJWLWP */
    975 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    976 			goto out;
    977 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    978 		    sizeof (struct vt_mode))))
    979 			goto out;
    980 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    981 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    982 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    983 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    984 		    sizeof (struct vt_mode));
    985 		goto out;
    986 	case LINUX_VT_SETMODE:
    987 		com = VT_SETMODE;
    988 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    989 		    sizeof (struct vt_mode))))
    990 			goto out;
    991 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    992 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    993 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    994 		sg = stackgap_init(p, 0);
    995 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    996 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    997 			goto out;
    998 		SCARG(&bia, data) = bvtp;
    999 		break;
   1000 	case LINUX_VT_DISALLOCATE:
   1001 		/* XXX should use WSDISPLAYIO_DELSCREEN */
   1002 		error = 0;
   1003 		goto out;
   1004 	case LINUX_VT_RELDISP:
   1005 		com = VT_RELDISP;
   1006 		break;
   1007 	case LINUX_VT_ACTIVATE:
   1008 		com = VT_ACTIVATE;
   1009 		break;
   1010 	case LINUX_VT_WAITACTIVE:
   1011 		com = VT_WAITACTIVE;
   1012 		break;
   1013 	case LINUX_VT_GETSTATE:
   1014 		com = VT_GETSTATE;
   1015 		break;
   1016 	case LINUX_KDGKBTYPE:
   1017 	    {
   1018 		static const u_int8_t kb101 = KB_101;
   1019 
   1020 		/* This is what Linux does. */
   1021 		error = copyout(&kb101, SCARG(uap, data), 1);
   1022 		goto out;
   1023 	    }
   1024 	case LINUX_KDGKBENT:
   1025 		/*
   1026 		 * The Linux KDGKBENT ioctl is different from the
   1027 		 * SYSV original. So we handle it in machdep code.
   1028 		 * XXX We should use keyboard mapping information
   1029 		 * from wsdisplay, but this would be expensive.
   1030 		 */
   1031 		if ((error = copyin(SCARG(uap, data), &kbe,
   1032 				    sizeof(struct kbentry))))
   1033 			goto out;
   1034 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1035 		    || kbe.kb_index >= NR_KEYS) {
   1036 			error = EINVAL;
   1037 			goto out;
   1038 		}
   1039 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1040 		error = copyout(&kbe, SCARG(uap, data),
   1041 				sizeof(struct kbentry));
   1042 		goto out;
   1043 #endif
   1044 	case LINUX_HDIO_GETGEO:
   1045 	case LINUX_HDIO_GETGEO_BIG:
   1046 		/*
   1047 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1048 		 * if possible (extending its # of cylinders if it's beyond
   1049 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1050 		 * the real geometry) if not found, by returning an
   1051 		 * error. See common/linux_hdio.c
   1052 		 */
   1053 		bip = fd2biosinfo(p, fp);
   1054 		ioctlf = fp->f_ops->fo_ioctl;
   1055 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
   1056 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
   1057 		if (error != 0 && error1 != 0) {
   1058 			error = error1;
   1059 			goto out;
   1060 		}
   1061 		labp = error != 0 ? &label : partp.disklab;
   1062 		start = error1 != 0 ? partp.part->p_offset : 0;
   1063 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1064 		    && bip->bi_cyl != 0) {
   1065 			heads = bip->bi_head;
   1066 			sectors = bip->bi_sec;
   1067 			cylinders = bip->bi_cyl;
   1068 			biostotal = heads * sectors * cylinders;
   1069 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1070 			    labp->d_ncylinders;
   1071 			if (realtotal > biostotal)
   1072 				cylinders = realtotal / (heads * sectors);
   1073 		} else {
   1074 			heads = labp->d_ntracks;
   1075 			cylinders = labp->d_ncylinders;
   1076 			sectors = labp->d_nsectors;
   1077 		}
   1078 		if (com == LINUX_HDIO_GETGEO) {
   1079 			hdg.start = start;
   1080 			hdg.heads = heads;
   1081 			hdg.cylinders = cylinders;
   1082 			hdg.sectors = sectors;
   1083 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1084 			goto out;
   1085 		} else {
   1086 			hdg_big.start = start;
   1087 			hdg_big.heads = heads;
   1088 			hdg_big.cylinders = cylinders;
   1089 			hdg_big.sectors = sectors;
   1090 			error = copyout(&hdg_big, SCARG(uap, data),
   1091 			    sizeof hdg_big);
   1092 			goto out;
   1093 		}
   1094 
   1095 	default:
   1096 		/*
   1097 		 * Unknown to us. If it's on a device, just pass it through
   1098 		 * using PTIOCLINUX, the device itself might be able to
   1099 		 * make some sense of it.
   1100 		 * XXX hack: if the function returns EJUSTRETURN,
   1101 		 * it has stuffed a sysctl return value in pt.data.
   1102 		 */
   1103 		ioctlf = fp->f_ops->fo_ioctl;
   1104 		pt.com = SCARG(uap, com);
   1105 		pt.data = SCARG(uap, data);
   1106 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
   1107 		if (error == EJUSTRETURN) {
   1108 			retval[0] = (register_t)pt.data;
   1109 			error = 0;
   1110 		}
   1111 
   1112 		if (error == ENOTTY) {
   1113 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1114 			    com));
   1115 		}
   1116 		goto out;
   1117 	}
   1118 	SCARG(&bia, com) = com;
   1119 	/* XXX NJWLWP */
   1120 	error = sys_ioctl(curlwp, &bia, retval);
   1121 out:
   1122 	FILE_UNUSE(fp ,l);
   1123 	return error;
   1124 }
   1125 
   1126 /*
   1127  * Set I/O permissions for a process. Just set the maximum level
   1128  * right away (ignoring the argument), otherwise we would have
   1129  * to rely on I/O permission maps, which are not implemented.
   1130  */
   1131 int
   1132 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1133 {
   1134 #if 0
   1135 	struct linux_sys_iopl_args /* {
   1136 		syscallarg(int) level;
   1137 	} */ *uap = v;
   1138 #endif
   1139 	struct trapframe *fp = l->l_md.md_regs;
   1140 
   1141 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1142 	    NULL, NULL, NULL, NULL) != 0)
   1143 		return EPERM;
   1144 	fp->tf_eflags |= PSL_IOPL;
   1145 	*retval = 0;
   1146 	return 0;
   1147 }
   1148 
   1149 /*
   1150  * See above. If a root process tries to set access to an I/O port,
   1151  * just let it have the whole range.
   1152  */
   1153 int
   1154 linux_sys_ioperm(l, v, retval)
   1155 	struct lwp *l;
   1156 	void *v;
   1157 	register_t *retval;
   1158 {
   1159 	struct linux_sys_ioperm_args /* {
   1160 		syscallarg(unsigned int) lo;
   1161 		syscallarg(unsigned int) hi;
   1162 		syscallarg(int) val;
   1163 	} */ *uap = v;
   1164 	struct trapframe *fp = l->l_md.md_regs;
   1165 
   1166 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1167 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1168 	    NULL, NULL) != 0)
   1169 		return EPERM;
   1170 	if (SCARG(uap, val))
   1171 		fp->tf_eflags |= PSL_IOPL;
   1172 	*retval = 0;
   1173 	return 0;
   1174 }
   1175 
   1176 int
   1177 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1178     void *arg)
   1179 {
   1180 	return 0;
   1181 }
   1182