Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.116
      1 /*	$NetBSD: linux_machdep.c,v 1.116 2006/10/12 01:30:48 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.116 2006/10/12 01:30:48 christos Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <sys/wait.h>
     72 #include <sys/kauth.h>
     73 
     74 #include <miscfs/specfs/specdev.h>
     75 
     76 #include <compat/linux/common/linux_types.h>
     77 #include <compat/linux/common/linux_signal.h>
     78 #include <compat/linux/common/linux_util.h>
     79 #include <compat/linux/common/linux_ioctl.h>
     80 #include <compat/linux/common/linux_hdio.h>
     81 #include <compat/linux/common/linux_exec.h>
     82 #include <compat/linux/common/linux_machdep.h>
     83 #include <compat/linux/common/linux_errno.h>
     84 
     85 #include <compat/linux/linux_syscallargs.h>
     86 
     87 #include <machine/cpu.h>
     88 #include <machine/cpufunc.h>
     89 #include <machine/psl.h>
     90 #include <machine/reg.h>
     91 #include <machine/segments.h>
     92 #include <machine/specialreg.h>
     93 #include <machine/sysarch.h>
     94 #include <machine/vm86.h>
     95 #include <machine/vmparam.h>
     96 
     97 /*
     98  * To see whether wscons is configured (for virtual console ioctl calls).
     99  */
    100 #if defined(_KERNEL_OPT)
    101 #include "wsdisplay.h"
    102 #endif
    103 #if (NWSDISPLAY > 0)
    104 #include <dev/wscons/wsconsio.h>
    105 #include <dev/wscons/wsdisplay_usl_io.h>
    106 #if defined(_KERNEL_OPT)
    107 #include "opt_xserver.h"
    108 #endif
    109 #endif
    110 
    111 #ifdef USER_LDT
    112 #include <machine/cpu.h>
    113 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    114     register_t *));
    115 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    116     register_t *));
    117 #endif
    118 
    119 #ifdef DEBUG_LINUX
    120 #define DPRINTF(a) uprintf a
    121 #else
    122 #define DPRINTF(a)
    123 #endif
    124 
    125 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    126 extern struct disklist *x86_alldisks;
    127 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    128     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    129 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    130     const sigset_t *, struct linux_sigcontext *));
    131 static int linux_restore_sigcontext __P((struct lwp *,
    132     struct linux_sigcontext *, register_t *));
    133 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    134 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    135 
    136 extern char linux_sigcode[], linux_rt_sigcode[];
    137 /*
    138  * Deal with some i386-specific things in the Linux emulation code.
    139  */
    140 
    141 void
    142 linux_setregs(l, epp, stack)
    143 	struct lwp *l;
    144 	struct exec_package *epp;
    145 	u_long stack;
    146 {
    147 	struct pcb *pcb = &l->l_addr->u_pcb;
    148 	struct trapframe *tf;
    149 
    150 #if NNPX > 0
    151 	/* If we were using the FPU, forget about it. */
    152 	if (npxproc == l)
    153 		npxdrop();
    154 #endif
    155 
    156 #ifdef USER_LDT
    157 	pmap_ldt_cleanup(l);
    158 #endif
    159 
    160 	l->l_md.md_flags &= ~MDL_USEDFPU;
    161 
    162 	if (i386_use_fxsave) {
    163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    164 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    165 	} else
    166 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    167 
    168 	tf = l->l_md.md_regs;
    169 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    172 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    173 	tf->tf_edi = 0;
    174 	tf->tf_esi = 0;
    175 	tf->tf_ebp = 0;
    176 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    177 	tf->tf_edx = 0;
    178 	tf->tf_ecx = 0;
    179 	tf->tf_eax = 0;
    180 	tf->tf_eip = epp->ep_entry;
    181 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    182 	tf->tf_eflags = PSL_USERSET;
    183 	tf->tf_esp = stack;
    184 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    185 }
    186 
    187 /*
    188  * Send an interrupt to process.
    189  *
    190  * Stack is set up to allow sigcode stored
    191  * in u. to call routine, followed by kcall
    192  * to sigreturn routine below.  After sigreturn
    193  * resets the signal mask, the stack, and the
    194  * frame pointer, it returns to the user
    195  * specified pc, psl.
    196  */
    197 
    198 void
    199 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    200 {
    201 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    202 		linux_rt_sendsig(ksi, mask);
    203 	else
    204 		linux_old_sendsig(ksi, mask);
    205 }
    206 
    207 
    208 static void
    209 linux_save_ucontext(l, tf, mask, sas, uc)
    210 	struct lwp *l;
    211 	struct trapframe *tf;
    212 	const sigset_t *mask;
    213 	struct sigaltstack *sas;
    214 	struct linux_ucontext *uc;
    215 {
    216 	uc->uc_flags = 0;
    217 	uc->uc_link = NULL;
    218 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    219 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    220 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    221 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    222 }
    223 
    224 static void
    225 linux_save_sigcontext(l, tf, mask, sc)
    226 	struct lwp *l;
    227 	struct trapframe *tf;
    228 	const sigset_t *mask;
    229 	struct linux_sigcontext *sc;
    230 {
    231 	/* Save register context. */
    232 #ifdef VM86
    233 	if (tf->tf_eflags & PSL_VM) {
    234 		sc->sc_gs = tf->tf_vm86_gs;
    235 		sc->sc_fs = tf->tf_vm86_fs;
    236 		sc->sc_es = tf->tf_vm86_es;
    237 		sc->sc_ds = tf->tf_vm86_ds;
    238 		sc->sc_eflags = get_vflags(l);
    239 	} else
    240 #endif
    241 	{
    242 		sc->sc_gs = tf->tf_gs;
    243 		sc->sc_fs = tf->tf_fs;
    244 		sc->sc_es = tf->tf_es;
    245 		sc->sc_ds = tf->tf_ds;
    246 		sc->sc_eflags = tf->tf_eflags;
    247 	}
    248 	sc->sc_edi = tf->tf_edi;
    249 	sc->sc_esi = tf->tf_esi;
    250 	sc->sc_esp = tf->tf_esp;
    251 	sc->sc_ebp = tf->tf_ebp;
    252 	sc->sc_ebx = tf->tf_ebx;
    253 	sc->sc_edx = tf->tf_edx;
    254 	sc->sc_ecx = tf->tf_ecx;
    255 	sc->sc_eax = tf->tf_eax;
    256 	sc->sc_eip = tf->tf_eip;
    257 	sc->sc_cs = tf->tf_cs;
    258 	sc->sc_esp_at_signal = tf->tf_esp;
    259 	sc->sc_ss = tf->tf_ss;
    260 	sc->sc_err = tf->tf_err;
    261 	sc->sc_trapno = tf->tf_trapno;
    262 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    263 	sc->sc_387 = NULL;
    264 
    265 	/* Save signal stack. */
    266 	/* Linux doesn't save the onstack flag in sigframe */
    267 
    268 	/* Save signal mask. */
    269 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    270 }
    271 
    272 static void
    273 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    274 {
    275 	struct lwp *l = curlwp;
    276 	struct proc *p = l->l_proc;
    277 	struct trapframe *tf;
    278 	struct linux_rt_sigframe *fp, frame;
    279 	int onstack;
    280 	linux_siginfo_t *lsi;
    281 	int sig = ksi->ksi_signo;
    282 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    283 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    284 
    285 	tf = l->l_md.md_regs;
    286 	/* Do we need to jump onto the signal stack? */
    287 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    288 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    289 
    290 
    291 	/* Allocate space for the signal handler context. */
    292 	if (onstack)
    293 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    294 		    sas->ss_size);
    295 	else
    296 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    297 	fp--;
    298 
    299 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    300 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    301 
    302 	/* Build stack frame for signal trampoline. */
    303 	frame.sf_handler = catcher;
    304 	frame.sf_sig = native_to_linux_signo[sig];
    305 	frame.sf_sip = &fp->sf_si;
    306 	frame.sf_ucp = &fp->sf_uc;
    307 
    308 	/*
    309 	 * XXX: the following code assumes that the constants for
    310 	 * siginfo are the same between linux and NetBSD.
    311 	 */
    312 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    313 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    314 	lsi->lsi_code = ksi->ksi_code;
    315 	switch (lsi->lsi_signo = frame.sf_sig) {
    316 	case LINUX_SIGILL:
    317 	case LINUX_SIGFPE:
    318 	case LINUX_SIGSEGV:
    319 	case LINUX_SIGBUS:
    320 	case LINUX_SIGTRAP:
    321 		lsi->lsi_addr = ksi->ksi_addr;
    322 		break;
    323 	case LINUX_SIGCHLD:
    324 		lsi->lsi_uid = ksi->ksi_uid;
    325 		lsi->lsi_pid = ksi->ksi_pid;
    326 		lsi->lsi_utime = ksi->ksi_utime;
    327 		lsi->lsi_stime = ksi->ksi_stime;
    328 
    329 		/* We use the same codes */
    330 		lsi->lsi_code = ksi->ksi_code;
    331 		/* XXX is that right? */
    332 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    333 		break;
    334 	case LINUX_SIGIO:
    335 		lsi->lsi_band = ksi->ksi_band;
    336 		lsi->lsi_fd = ksi->ksi_fd;
    337 		break;
    338 	default:
    339 		lsi->lsi_uid = ksi->ksi_uid;
    340 		lsi->lsi_pid = ksi->ksi_pid;
    341 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    342 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    343 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    344 		break;
    345 	}
    346 
    347 	/* Save register context. */
    348 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    349 
    350 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    351 		/*
    352 		 * Process has trashed its stack; give it an illegal
    353 		 * instruction to halt it in its tracks.
    354 		 */
    355 		sigexit(l, SIGILL);
    356 		/* NOTREACHED */
    357 	}
    358 
    359 	/*
    360 	 * Build context to run handler in.
    361 	 */
    362 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    363 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    364 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    365 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    366 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    367 	    (linux_rt_sigcode - linux_sigcode);
    368 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    369 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    370 	tf->tf_esp = (int)fp;
    371 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    372 
    373 	/* Remember that we're now on the signal stack. */
    374 	if (onstack)
    375 		sas->ss_flags |= SS_ONSTACK;
    376 }
    377 
    378 static void
    379 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    380 {
    381 	struct lwp *l = curlwp;
    382 	struct proc *p = l->l_proc;
    383 	struct trapframe *tf;
    384 	struct linux_sigframe *fp, frame;
    385 	int onstack;
    386 	int sig = ksi->ksi_signo;
    387 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    388 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    389 
    390 	tf = l->l_md.md_regs;
    391 
    392 	/* Do we need to jump onto the signal stack? */
    393 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    394 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    395 
    396 	/* Allocate space for the signal handler context. */
    397 	if (onstack)
    398 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    399 		    sas->ss_size);
    400 	else
    401 		fp = (struct linux_sigframe *)tf->tf_esp;
    402 	fp--;
    403 
    404 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    405 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    406 
    407 	/* Build stack frame for signal trampoline. */
    408 	frame.sf_handler = catcher;
    409 	frame.sf_sig = native_to_linux_signo[sig];
    410 
    411 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    412 
    413 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    414 		/*
    415 		 * Process has trashed its stack; give it an illegal
    416 		 * instruction to halt it in its tracks.
    417 		 */
    418 		sigexit(l, SIGILL);
    419 		/* NOTREACHED */
    420 	}
    421 
    422 	/*
    423 	 * Build context to run handler in.
    424 	 */
    425 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    426 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    427 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    428 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    429 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    430 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    431 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    432 	tf->tf_esp = (int)fp;
    433 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    434 
    435 	/* Remember that we're now on the signal stack. */
    436 	if (onstack)
    437 		sas->ss_flags |= SS_ONSTACK;
    438 }
    439 
    440 /*
    441  * System call to cleanup state after a signal
    442  * has been taken.  Reset signal mask and
    443  * stack state from context left by sendsig (above).
    444  * Return to previous pc and psl as specified by
    445  * context left by sendsig. Check carefully to
    446  * make sure that the user has not modified the
    447  * psl to gain improper privileges or to cause
    448  * a machine fault.
    449  */
    450 int
    451 linux_sys_rt_sigreturn(l, v, retval)
    452 	struct lwp *l;
    453 	void *v;
    454 	register_t *retval;
    455 {
    456 	struct linux_sys_rt_sigreturn_args /* {
    457 		syscallarg(struct linux_ucontext *) ucp;
    458 	} */ *uap = v;
    459 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    460 	int error;
    461 
    462 	/*
    463 	 * The trampoline code hands us the context.
    464 	 * It is unsafe to keep track of it ourselves, in the event that a
    465 	 * program jumps out of a signal handler.
    466 	 */
    467 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    468 		return error;
    469 
    470 	/* XXX XAX we can do better here by using more of the ucontext */
    471 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    472 }
    473 
    474 int
    475 linux_sys_sigreturn(l, v, retval)
    476 	struct lwp *l;
    477 	void *v;
    478 	register_t *retval;
    479 {
    480 	struct linux_sys_sigreturn_args /* {
    481 		syscallarg(struct linux_sigcontext *) scp;
    482 	} */ *uap = v;
    483 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    484 	int error;
    485 
    486 	/*
    487 	 * The trampoline code hands us the context.
    488 	 * It is unsafe to keep track of it ourselves, in the event that a
    489 	 * program jumps out of a signal handler.
    490 	 */
    491 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    492 		return error;
    493 	return linux_restore_sigcontext(l, &context, retval);
    494 }
    495 
    496 static int
    497 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    498     register_t *retval __unused)
    499 {
    500 	struct proc *p = l->l_proc;
    501 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    502 	struct trapframe *tf;
    503 	sigset_t mask;
    504 	ssize_t ss_gap;
    505 	/* Restore register context. */
    506 	tf = l->l_md.md_regs;
    507 
    508 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    509 #ifdef VM86
    510 	if (scp->sc_eflags & PSL_VM) {
    511 		void syscall_vm86 __P((struct trapframe *));
    512 
    513 		tf->tf_vm86_gs = scp->sc_gs;
    514 		tf->tf_vm86_fs = scp->sc_fs;
    515 		tf->tf_vm86_es = scp->sc_es;
    516 		tf->tf_vm86_ds = scp->sc_ds;
    517 		set_vflags(l, scp->sc_eflags);
    518 		p->p_md.md_syscall = syscall_vm86;
    519 	} else
    520 #endif
    521 	{
    522 		/*
    523 		 * Check for security violations.  If we're returning to
    524 		 * protected mode, the CPU will validate the segment registers
    525 		 * automatically and generate a trap on violations.  We handle
    526 		 * the trap, rather than doing all of the checking here.
    527 		 */
    528 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    529 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    530 			return EINVAL;
    531 
    532 		tf->tf_gs = scp->sc_gs;
    533 		tf->tf_fs = scp->sc_fs;
    534 		tf->tf_es = scp->sc_es;
    535 		tf->tf_ds = scp->sc_ds;
    536 #ifdef VM86
    537 		if (tf->tf_eflags & PSL_VM)
    538 			(*p->p_emul->e_syscall_intern)(p);
    539 #endif
    540 		tf->tf_eflags = scp->sc_eflags;
    541 	}
    542 	tf->tf_edi = scp->sc_edi;
    543 	tf->tf_esi = scp->sc_esi;
    544 	tf->tf_ebp = scp->sc_ebp;
    545 	tf->tf_ebx = scp->sc_ebx;
    546 	tf->tf_edx = scp->sc_edx;
    547 	tf->tf_ecx = scp->sc_ecx;
    548 	tf->tf_eax = scp->sc_eax;
    549 	tf->tf_eip = scp->sc_eip;
    550 	tf->tf_cs = scp->sc_cs;
    551 	tf->tf_esp = scp->sc_esp_at_signal;
    552 	tf->tf_ss = scp->sc_ss;
    553 
    554 	/* Restore signal stack. */
    555 	/*
    556 	 * Linux really does it this way; it doesn't have space in sigframe
    557 	 * to save the onstack flag.
    558 	 */
    559 	ss_gap = (ssize_t)
    560 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    561 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    562 		sas->ss_flags |= SS_ONSTACK;
    563 	else
    564 		sas->ss_flags &= ~SS_ONSTACK;
    565 
    566 	/* Restore signal mask. */
    567 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    568 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    569 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    570 	return EJUSTRETURN;
    571 }
    572 
    573 #ifdef USER_LDT
    574 
    575 int
    576 linux_read_ldt(l, uap, retval)
    577 	struct lwp *l;
    578 	struct linux_sys_modify_ldt_args /* {
    579 		syscallarg(int) func;
    580 		syscallarg(void *) ptr;
    581 		syscallarg(size_t) bytecount;
    582 	} */ *uap;
    583 	register_t *retval;
    584 {
    585 	struct proc *p = l->l_proc;
    586 	struct i386_get_ldt_args gl;
    587 	int error;
    588 	caddr_t sg;
    589 	char *parms;
    590 
    591 	DPRINTF(("linux_read_ldt!"));
    592 	sg = stackgap_init(p, 0);
    593 
    594 	gl.start = 0;
    595 	gl.desc = SCARG(uap, ptr);
    596 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    597 
    598 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    599 
    600 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    601 		return (error);
    602 
    603 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    604 		return (error);
    605 
    606 	*retval *= sizeof(union descriptor);
    607 	return (0);
    608 }
    609 
    610 struct linux_ldt_info {
    611 	u_int entry_number;
    612 	u_long base_addr;
    613 	u_int limit;
    614 	u_int seg_32bit:1;
    615 	u_int contents:2;
    616 	u_int read_exec_only:1;
    617 	u_int limit_in_pages:1;
    618 	u_int seg_not_present:1;
    619 	u_int useable:1;
    620 };
    621 
    622 int
    623 linux_write_ldt(l, uap, retval)
    624 	struct lwp *l;
    625 	struct linux_sys_modify_ldt_args /* {
    626 		syscallarg(int) func;
    627 		syscallarg(void *) ptr;
    628 		syscallarg(size_t) bytecount;
    629 	} */ *uap;
    630 	register_t *retval;
    631 {
    632 	struct proc *p = l->l_proc;
    633 	struct linux_ldt_info ldt_info;
    634 	struct segment_descriptor sd;
    635 	struct i386_set_ldt_args sl;
    636 	int error;
    637 	caddr_t sg;
    638 	char *parms;
    639 	int oldmode = (int)retval[0];
    640 
    641 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    642 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    643 		return (EINVAL);
    644 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    645 		return error;
    646 	if (ldt_info.entry_number >= 8192)
    647 		return (EINVAL);
    648 	if (ldt_info.contents == 3) {
    649 		if (oldmode)
    650 			return (EINVAL);
    651 		if (ldt_info.seg_not_present)
    652 			return (EINVAL);
    653 	}
    654 
    655 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    656 	    (oldmode || (ldt_info.contents == 0 &&
    657 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    658 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    659 	    ldt_info.useable == 0))) {
    660 		/* this means you should zero the ldt */
    661 		(void)memset(&sd, 0, sizeof(sd));
    662 	} else {
    663 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    664 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    665 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    666 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    667 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    668 		    (!ldt_info.read_exec_only << 1);
    669 		sd.sd_dpl = SEL_UPL;
    670 		sd.sd_p = !ldt_info.seg_not_present;
    671 		sd.sd_def32 = ldt_info.seg_32bit;
    672 		sd.sd_gran = ldt_info.limit_in_pages;
    673 		if (!oldmode)
    674 			sd.sd_xx = ldt_info.useable;
    675 		else
    676 			sd.sd_xx = 0;
    677 	}
    678 	sg = stackgap_init(p, 0);
    679 	sl.start = ldt_info.entry_number;
    680 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    681 	sl.num = 1;
    682 
    683 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    684 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    685 
    686 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    687 
    688 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    689 		return (error);
    690 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    691 		return (error);
    692 
    693 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    694 		return (error);
    695 
    696 	*retval = 0;
    697 	return (0);
    698 }
    699 
    700 #endif /* USER_LDT */
    701 
    702 int
    703 linux_sys_modify_ldt(l, v, retval)
    704 	struct lwp *l;
    705 	void *v;
    706 	register_t *retval;
    707 {
    708 	struct linux_sys_modify_ldt_args /* {
    709 		syscallarg(int) func;
    710 		syscallarg(void *) ptr;
    711 		syscallarg(size_t) bytecount;
    712 	} */ *uap = v;
    713 
    714 	switch (SCARG(uap, func)) {
    715 #ifdef USER_LDT
    716 	case 0:
    717 		return linux_read_ldt(l, uap, retval);
    718 	case 1:
    719 		retval[0] = 1;
    720 		return linux_write_ldt(l, uap, retval);
    721 	case 2:
    722 #ifdef notyet
    723 		return (linux_read_default_ldt(l, uap, retval);
    724 #else
    725 		return (ENOSYS);
    726 #endif
    727 	case 0x11:
    728 		retval[0] = 0;
    729 		return linux_write_ldt(l, uap, retval);
    730 #endif /* USER_LDT */
    731 
    732 	default:
    733 		return (ENOSYS);
    734 	}
    735 }
    736 
    737 /*
    738  * XXX Pathetic hack to make svgalib work. This will fake the major
    739  * device number of an opened VT so that svgalib likes it. grmbl.
    740  * Should probably do it 'wrong the right way' and use a mapping
    741  * array for all major device numbers, and map linux_mknod too.
    742  */
    743 dev_t
    744 linux_fakedev(dev, raw)
    745 	dev_t dev;
    746 	int raw;
    747 {
    748 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    749 	const struct cdevsw *cd = cdevsw_lookup(dev);
    750 
    751 	if (raw) {
    752 #if (NWSDISPLAY > 0)
    753 		extern const struct cdevsw wsdisplay_cdevsw;
    754 		if (cd == &wsdisplay_cdevsw)
    755 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    756 #endif
    757 	}
    758 
    759 	if (cd == &ptc_cdevsw)
    760 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    761 	if (cd == &pts_cdevsw)
    762 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    763 
    764 	return dev;
    765 }
    766 
    767 #if (NWSDISPLAY > 0)
    768 /*
    769  * That's not complete, but enough to get an X server running.
    770  */
    771 #define NR_KEYS 128
    772 static const u_short plain_map[NR_KEYS] = {
    773 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    774 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    775 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    776 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    777 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    778 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    779 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    780 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    781 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    782 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    783 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    784 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    785 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    786 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    787 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    788 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    789 }, shift_map[NR_KEYS] = {
    790 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    791 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    792 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    793 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    794 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    795 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    796 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    797 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    798 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    799 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    800 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    801 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    802 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    803 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    804 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    805 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    806 }, altgr_map[NR_KEYS] = {
    807 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    808 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    809 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    810 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    811 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    812 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    813 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    814 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    815 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    816 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    817 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    818 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    819 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    820 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    821 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    822 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    823 }, ctrl_map[NR_KEYS] = {
    824 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    825 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    826 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    827 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    828 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    829 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    830 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    831 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    832 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    833 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    834 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    835 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    836 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    837 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    838 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    839 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    840 };
    841 
    842 const u_short * const linux_keytabs[] = {
    843 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    844 };
    845 #endif
    846 
    847 static struct biosdisk_info *
    848 fd2biosinfo(struct proc *p __unused, struct file *fp)
    849 {
    850 	struct vnode *vp;
    851 	const char *blkname;
    852 	char diskname[16];
    853 	int i;
    854 	struct nativedisk_info *nip;
    855 	struct disklist *dl = x86_alldisks;
    856 
    857 	if (fp->f_type != DTYPE_VNODE)
    858 		return NULL;
    859 	vp = (struct vnode *)fp->f_data;
    860 
    861 	if (vp->v_type != VBLK)
    862 		return NULL;
    863 
    864 	blkname = devsw_blk2name(major(vp->v_rdev));
    865 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    866 	    DISKUNIT(vp->v_rdev));
    867 
    868 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    869 		nip = &dl->dl_nativedisks[i];
    870 		if (strcmp(diskname, nip->ni_devname))
    871 			continue;
    872 		if (nip->ni_nmatches != 0)
    873 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    874 	}
    875 
    876 	return NULL;
    877 }
    878 
    879 
    880 /*
    881  * We come here in a last attempt to satisfy a Linux ioctl() call
    882  */
    883 int
    884 linux_machdepioctl(l, v, retval)
    885 	struct lwp *l;
    886 	void *v;
    887 	register_t *retval;
    888 {
    889 	struct linux_sys_ioctl_args /* {
    890 		syscallarg(int) fd;
    891 		syscallarg(u_long) com;
    892 		syscallarg(caddr_t) data;
    893 	} */ *uap = v;
    894 	struct sys_ioctl_args bia;
    895 	u_long com;
    896 	int error, error1;
    897 #if (NWSDISPLAY > 0)
    898 	struct vt_mode lvt;
    899 	caddr_t bvtp, sg;
    900 	struct kbentry kbe;
    901 #endif
    902 	struct linux_hd_geometry hdg;
    903 	struct linux_hd_big_geometry hdg_big;
    904 	struct biosdisk_info *bip;
    905 	struct filedesc *fdp;
    906 	struct file *fp;
    907 	int fd;
    908 	struct disklabel label, *labp;
    909 	struct partinfo partp;
    910 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    911 	u_long start, biostotal, realtotal;
    912 	u_char heads, sectors;
    913 	u_int cylinders;
    914 	struct ioctl_pt pt;
    915 	struct proc *p = l->l_proc;
    916 
    917 	fd = SCARG(uap, fd);
    918 	SCARG(&bia, fd) = fd;
    919 	SCARG(&bia, data) = SCARG(uap, data);
    920 	com = SCARG(uap, com);
    921 
    922 	fdp = p->p_fd;
    923 
    924 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    925 		return (EBADF);
    926 
    927 	FILE_USE(fp);
    928 
    929 	switch (com) {
    930 #if (NWSDISPLAY > 0)
    931 	case LINUX_KDGKBMODE:
    932 		com = KDGKBMODE;
    933 		break;
    934 	case LINUX_KDSKBMODE:
    935 		com = KDSKBMODE;
    936 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    937 			SCARG(&bia, data) = (caddr_t)K_RAW;
    938 		break;
    939 	case LINUX_KIOCSOUND:
    940 		SCARG(&bia, data) =
    941 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    942 		/* fall through */
    943 	case LINUX_KDMKTONE:
    944 		com = KDMKTONE;
    945 		break;
    946 	case LINUX_KDSETMODE:
    947 		com = KDSETMODE;
    948 		break;
    949 	case LINUX_KDGETMODE:
    950 		/* KD_* values are equal to the wscons numbers */
    951 		com = WSDISPLAYIO_GMODE;
    952 		break;
    953 	case LINUX_KDENABIO:
    954 		com = KDENABIO;
    955 		break;
    956 	case LINUX_KDDISABIO:
    957 		com = KDDISABIO;
    958 		break;
    959 	case LINUX_KDGETLED:
    960 		com = KDGETLED;
    961 		break;
    962 	case LINUX_KDSETLED:
    963 		com = KDSETLED;
    964 		break;
    965 	case LINUX_VT_OPENQRY:
    966 		com = VT_OPENQRY;
    967 		break;
    968 	case LINUX_VT_GETMODE:
    969 		SCARG(&bia, com) = VT_GETMODE;
    970 		/* XXX NJWLWP */
    971 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    972 			goto out;
    973 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    974 		    sizeof (struct vt_mode))))
    975 			goto out;
    976 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    977 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    978 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    979 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    980 		    sizeof (struct vt_mode));
    981 		goto out;
    982 	case LINUX_VT_SETMODE:
    983 		com = VT_SETMODE;
    984 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    985 		    sizeof (struct vt_mode))))
    986 			goto out;
    987 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    988 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    989 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    990 		sg = stackgap_init(p, 0);
    991 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    992 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    993 			goto out;
    994 		SCARG(&bia, data) = bvtp;
    995 		break;
    996 	case LINUX_VT_DISALLOCATE:
    997 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    998 		error = 0;
    999 		goto out;
   1000 	case LINUX_VT_RELDISP:
   1001 		com = VT_RELDISP;
   1002 		break;
   1003 	case LINUX_VT_ACTIVATE:
   1004 		com = VT_ACTIVATE;
   1005 		break;
   1006 	case LINUX_VT_WAITACTIVE:
   1007 		com = VT_WAITACTIVE;
   1008 		break;
   1009 	case LINUX_VT_GETSTATE:
   1010 		com = VT_GETSTATE;
   1011 		break;
   1012 	case LINUX_KDGKBTYPE:
   1013 	    {
   1014 		static const u_int8_t kb101 = KB_101;
   1015 
   1016 		/* This is what Linux does. */
   1017 		error = copyout(&kb101, SCARG(uap, data), 1);
   1018 		goto out;
   1019 	    }
   1020 	case LINUX_KDGKBENT:
   1021 		/*
   1022 		 * The Linux KDGKBENT ioctl is different from the
   1023 		 * SYSV original. So we handle it in machdep code.
   1024 		 * XXX We should use keyboard mapping information
   1025 		 * from wsdisplay, but this would be expensive.
   1026 		 */
   1027 		if ((error = copyin(SCARG(uap, data), &kbe,
   1028 				    sizeof(struct kbentry))))
   1029 			goto out;
   1030 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1031 		    || kbe.kb_index >= NR_KEYS) {
   1032 			error = EINVAL;
   1033 			goto out;
   1034 		}
   1035 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1036 		error = copyout(&kbe, SCARG(uap, data),
   1037 				sizeof(struct kbentry));
   1038 		goto out;
   1039 #endif
   1040 	case LINUX_HDIO_GETGEO:
   1041 	case LINUX_HDIO_GETGEO_BIG:
   1042 		/*
   1043 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1044 		 * if possible (extending its # of cylinders if it's beyond
   1045 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1046 		 * the real geometry) if not found, by returning an
   1047 		 * error. See common/linux_hdio.c
   1048 		 */
   1049 		bip = fd2biosinfo(p, fp);
   1050 		ioctlf = fp->f_ops->fo_ioctl;
   1051 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
   1052 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
   1053 		if (error != 0 && error1 != 0) {
   1054 			error = error1;
   1055 			goto out;
   1056 		}
   1057 		labp = error != 0 ? &label : partp.disklab;
   1058 		start = error1 != 0 ? partp.part->p_offset : 0;
   1059 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1060 		    && bip->bi_cyl != 0) {
   1061 			heads = bip->bi_head;
   1062 			sectors = bip->bi_sec;
   1063 			cylinders = bip->bi_cyl;
   1064 			biostotal = heads * sectors * cylinders;
   1065 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1066 			    labp->d_ncylinders;
   1067 			if (realtotal > biostotal)
   1068 				cylinders = realtotal / (heads * sectors);
   1069 		} else {
   1070 			heads = labp->d_ntracks;
   1071 			cylinders = labp->d_ncylinders;
   1072 			sectors = labp->d_nsectors;
   1073 		}
   1074 		if (com == LINUX_HDIO_GETGEO) {
   1075 			hdg.start = start;
   1076 			hdg.heads = heads;
   1077 			hdg.cylinders = cylinders;
   1078 			hdg.sectors = sectors;
   1079 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1080 			goto out;
   1081 		} else {
   1082 			hdg_big.start = start;
   1083 			hdg_big.heads = heads;
   1084 			hdg_big.cylinders = cylinders;
   1085 			hdg_big.sectors = sectors;
   1086 			error = copyout(&hdg_big, SCARG(uap, data),
   1087 			    sizeof hdg_big);
   1088 			goto out;
   1089 		}
   1090 
   1091 	default:
   1092 		/*
   1093 		 * Unknown to us. If it's on a device, just pass it through
   1094 		 * using PTIOCLINUX, the device itself might be able to
   1095 		 * make some sense of it.
   1096 		 * XXX hack: if the function returns EJUSTRETURN,
   1097 		 * it has stuffed a sysctl return value in pt.data.
   1098 		 */
   1099 		ioctlf = fp->f_ops->fo_ioctl;
   1100 		pt.com = SCARG(uap, com);
   1101 		pt.data = SCARG(uap, data);
   1102 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
   1103 		if (error == EJUSTRETURN) {
   1104 			retval[0] = (register_t)pt.data;
   1105 			error = 0;
   1106 		}
   1107 
   1108 		if (error == ENOTTY) {
   1109 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1110 			    com));
   1111 		}
   1112 		goto out;
   1113 	}
   1114 	SCARG(&bia, com) = com;
   1115 	/* XXX NJWLWP */
   1116 	error = sys_ioctl(curlwp, &bia, retval);
   1117 out:
   1118 	FILE_UNUSE(fp ,l);
   1119 	return error;
   1120 }
   1121 
   1122 /*
   1123  * Set I/O permissions for a process. Just set the maximum level
   1124  * right away (ignoring the argument), otherwise we would have
   1125  * to rely on I/O permission maps, which are not implemented.
   1126  */
   1127 int
   1128 linux_sys_iopl(struct lwp *l, void *v __unused, register_t *retval)
   1129 {
   1130 #if 0
   1131 	struct linux_sys_iopl_args /* {
   1132 		syscallarg(int) level;
   1133 	} */ *uap = v;
   1134 #endif
   1135 	struct trapframe *fp = l->l_md.md_regs;
   1136 
   1137 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
   1138 	    &l->l_acflag) != 0)
   1139 		return EPERM;
   1140 	fp->tf_eflags |= PSL_IOPL;
   1141 	*retval = 0;
   1142 	return 0;
   1143 }
   1144 
   1145 /*
   1146  * See above. If a root process tries to set access to an I/O port,
   1147  * just let it have the whole range.
   1148  */
   1149 int
   1150 linux_sys_ioperm(l, v, retval)
   1151 	struct lwp *l;
   1152 	void *v;
   1153 	register_t *retval;
   1154 {
   1155 	struct linux_sys_ioperm_args /* {
   1156 		syscallarg(unsigned int) lo;
   1157 		syscallarg(unsigned int) hi;
   1158 		syscallarg(int) val;
   1159 	} */ *uap = v;
   1160 	struct trapframe *fp = l->l_md.md_regs;
   1161 
   1162 	if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER,
   1163 	    &l->l_acflag) != 0)
   1164 		return EPERM;
   1165 	if (SCARG(uap, val))
   1166 		fp->tf_eflags |= PSL_IOPL;
   1167 	*retval = 0;
   1168 	return 0;
   1169 }
   1170 
   1171 int
   1172 linux_usertrap(struct lwp *l __unused, vaddr_t trapaddr __unused,
   1173     void *arg __unused)
   1174 {
   1175 	return 0;
   1176 }
   1177