Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.110.2.3
      1 /*	$NetBSD: linux_machdep.c,v 1.110.2.3 2007/02/26 09:09:14 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.110.2.3 2007/02/26 09:09:14 yamt Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/syscallargs.h>
     66 #include <sys/filedesc.h>
     67 #include <sys/exec_elf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/ioctl.h>
     70 #include <sys/wait.h>
     71 #include <sys/kauth.h>
     72 
     73 #include <miscfs/specfs/specdev.h>
     74 
     75 #include <compat/linux/common/linux_types.h>
     76 #include <compat/linux/common/linux_signal.h>
     77 #include <compat/linux/common/linux_util.h>
     78 #include <compat/linux/common/linux_ioctl.h>
     79 #include <compat/linux/common/linux_hdio.h>
     80 #include <compat/linux/common/linux_exec.h>
     81 #include <compat/linux/common/linux_machdep.h>
     82 #include <compat/linux/common/linux_errno.h>
     83 
     84 #include <compat/linux/linux_syscallargs.h>
     85 
     86 #include <machine/cpu.h>
     87 #include <machine/cpufunc.h>
     88 #include <machine/psl.h>
     89 #include <machine/reg.h>
     90 #include <machine/segments.h>
     91 #include <machine/specialreg.h>
     92 #include <machine/sysarch.h>
     93 #include <machine/vm86.h>
     94 #include <machine/vmparam.h>
     95 
     96 /*
     97  * To see whether wscons is configured (for virtual console ioctl calls).
     98  */
     99 #if defined(_KERNEL_OPT)
    100 #include "wsdisplay.h"
    101 #endif
    102 #if (NWSDISPLAY > 0)
    103 #include <dev/wscons/wsconsio.h>
    104 #include <dev/wscons/wsdisplay_usl_io.h>
    105 #if defined(_KERNEL_OPT)
    106 #include "opt_xserver.h"
    107 #endif
    108 #endif
    109 
    110 #ifdef USER_LDT
    111 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    114     register_t *));
    115 #endif
    116 
    117 #ifdef DEBUG_LINUX
    118 #define DPRINTF(a) uprintf a
    119 #else
    120 #define DPRINTF(a)
    121 #endif
    122 
    123 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    124 extern struct disklist *x86_alldisks;
    125 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    126     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    127 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    128     const sigset_t *, struct linux_sigcontext *));
    129 static int linux_restore_sigcontext __P((struct lwp *,
    130     struct linux_sigcontext *, register_t *));
    131 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    132 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    133 
    134 extern char linux_sigcode[], linux_rt_sigcode[];
    135 /*
    136  * Deal with some i386-specific things in the Linux emulation code.
    137  */
    138 
    139 void
    140 linux_setregs(l, epp, stack)
    141 	struct lwp *l;
    142 	struct exec_package *epp;
    143 	u_long stack;
    144 {
    145 	struct pcb *pcb = &l->l_addr->u_pcb;
    146 	struct trapframe *tf;
    147 
    148 #if NNPX > 0
    149 	/* If we were using the FPU, forget about it. */
    150 	if (npxproc == l)
    151 		npxdrop();
    152 #endif
    153 
    154 #ifdef USER_LDT
    155 	pmap_ldt_cleanup(l);
    156 #endif
    157 
    158 	l->l_md.md_flags &= ~MDL_USEDFPU;
    159 
    160 	if (i386_use_fxsave) {
    161 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    162 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    163 	} else
    164 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    165 
    166 	tf = l->l_md.md_regs;
    167 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    168 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_edi = 0;
    172 	tf->tf_esi = 0;
    173 	tf->tf_ebp = 0;
    174 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    175 	tf->tf_edx = 0;
    176 	tf->tf_ecx = 0;
    177 	tf->tf_eax = 0;
    178 	tf->tf_eip = epp->ep_entry;
    179 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    180 	tf->tf_eflags = PSL_USERSET;
    181 	tf->tf_esp = stack;
    182 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    183 }
    184 
    185 /*
    186  * Send an interrupt to process.
    187  *
    188  * Stack is set up to allow sigcode stored
    189  * in u. to call routine, followed by kcall
    190  * to sigreturn routine below.  After sigreturn
    191  * resets the signal mask, the stack, and the
    192  * frame pointer, it returns to the user
    193  * specified pc, psl.
    194  */
    195 
    196 void
    197 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    198 {
    199 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    200 		linux_rt_sendsig(ksi, mask);
    201 	else
    202 		linux_old_sendsig(ksi, mask);
    203 }
    204 
    205 
    206 static void
    207 linux_save_ucontext(l, tf, mask, sas, uc)
    208 	struct lwp *l;
    209 	struct trapframe *tf;
    210 	const sigset_t *mask;
    211 	struct sigaltstack *sas;
    212 	struct linux_ucontext *uc;
    213 {
    214 	uc->uc_flags = 0;
    215 	uc->uc_link = NULL;
    216 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    217 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    218 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    219 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    220 }
    221 
    222 static void
    223 linux_save_sigcontext(l, tf, mask, sc)
    224 	struct lwp *l;
    225 	struct trapframe *tf;
    226 	const sigset_t *mask;
    227 	struct linux_sigcontext *sc;
    228 {
    229 	/* Save register context. */
    230 #ifdef VM86
    231 	if (tf->tf_eflags & PSL_VM) {
    232 		sc->sc_gs = tf->tf_vm86_gs;
    233 		sc->sc_fs = tf->tf_vm86_fs;
    234 		sc->sc_es = tf->tf_vm86_es;
    235 		sc->sc_ds = tf->tf_vm86_ds;
    236 		sc->sc_eflags = get_vflags(l);
    237 	} else
    238 #endif
    239 	{
    240 		sc->sc_gs = tf->tf_gs;
    241 		sc->sc_fs = tf->tf_fs;
    242 		sc->sc_es = tf->tf_es;
    243 		sc->sc_ds = tf->tf_ds;
    244 		sc->sc_eflags = tf->tf_eflags;
    245 	}
    246 	sc->sc_edi = tf->tf_edi;
    247 	sc->sc_esi = tf->tf_esi;
    248 	sc->sc_esp = tf->tf_esp;
    249 	sc->sc_ebp = tf->tf_ebp;
    250 	sc->sc_ebx = tf->tf_ebx;
    251 	sc->sc_edx = tf->tf_edx;
    252 	sc->sc_ecx = tf->tf_ecx;
    253 	sc->sc_eax = tf->tf_eax;
    254 	sc->sc_eip = tf->tf_eip;
    255 	sc->sc_cs = tf->tf_cs;
    256 	sc->sc_esp_at_signal = tf->tf_esp;
    257 	sc->sc_ss = tf->tf_ss;
    258 	sc->sc_err = tf->tf_err;
    259 	sc->sc_trapno = tf->tf_trapno;
    260 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    261 	sc->sc_387 = NULL;
    262 
    263 	/* Save signal stack. */
    264 	/* Linux doesn't save the onstack flag in sigframe */
    265 
    266 	/* Save signal mask. */
    267 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    268 }
    269 
    270 static void
    271 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    272 {
    273 	struct lwp *l = curlwp;
    274 	struct proc *p = l->l_proc;
    275 	struct trapframe *tf;
    276 	struct linux_rt_sigframe *fp, frame;
    277 	int onstack, error;
    278 	linux_siginfo_t *lsi;
    279 	int sig = ksi->ksi_signo;
    280 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    281 	struct sigaltstack *sas = &l->l_sigstk;
    282 
    283 	tf = l->l_md.md_regs;
    284 	/* Do we need to jump onto the signal stack? */
    285 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    286 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    287 
    288 
    289 	/* Allocate space for the signal handler context. */
    290 	if (onstack)
    291 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    292 		    sas->ss_size);
    293 	else
    294 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    295 	fp--;
    296 
    297 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    298 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    299 
    300 	/* Build stack frame for signal trampoline. */
    301 	frame.sf_handler = catcher;
    302 	frame.sf_sig = native_to_linux_signo[sig];
    303 	frame.sf_sip = &fp->sf_si;
    304 	frame.sf_ucp = &fp->sf_uc;
    305 
    306 	/*
    307 	 * XXX: the following code assumes that the constants for
    308 	 * siginfo are the same between linux and NetBSD.
    309 	 */
    310 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    311 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    312 	lsi->lsi_code = ksi->ksi_code;
    313 	switch (lsi->lsi_signo = frame.sf_sig) {
    314 	case LINUX_SIGILL:
    315 	case LINUX_SIGFPE:
    316 	case LINUX_SIGSEGV:
    317 	case LINUX_SIGBUS:
    318 	case LINUX_SIGTRAP:
    319 		lsi->lsi_addr = ksi->ksi_addr;
    320 		break;
    321 	case LINUX_SIGCHLD:
    322 		lsi->lsi_uid = ksi->ksi_uid;
    323 		lsi->lsi_pid = ksi->ksi_pid;
    324 		lsi->lsi_utime = ksi->ksi_utime;
    325 		lsi->lsi_stime = ksi->ksi_stime;
    326 
    327 		/* We use the same codes */
    328 		lsi->lsi_code = ksi->ksi_code;
    329 		/* XXX is that right? */
    330 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    331 		break;
    332 	case LINUX_SIGIO:
    333 		lsi->lsi_band = ksi->ksi_band;
    334 		lsi->lsi_fd = ksi->ksi_fd;
    335 		break;
    336 	default:
    337 		lsi->lsi_uid = ksi->ksi_uid;
    338 		lsi->lsi_pid = ksi->ksi_pid;
    339 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    340 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    341 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    342 		break;
    343 	}
    344 
    345 	/* Save register context. */
    346 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    347 	sendsig_reset(l, sig);
    348 
    349 	mutex_exit(&p->p_smutex);
    350 	error = copyout(&frame, fp, sizeof(frame));
    351 	mutex_enter(&p->p_smutex);
    352 
    353 	if (error != 0) {
    354 		/*
    355 		 * Process has trashed its stack; give it an illegal
    356 		 * instruction to halt it in its tracks.
    357 		 */
    358 		sigexit(l, SIGILL);
    359 		/* NOTREACHED */
    360 	}
    361 
    362 	/*
    363 	 * Build context to run handler in.
    364 	 */
    365 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    366 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    367 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    368 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    369 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    370 	    (linux_rt_sigcode - linux_sigcode);
    371 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    372 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    373 	tf->tf_esp = (int)fp;
    374 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    375 
    376 	/* Remember that we're now on the signal stack. */
    377 	if (onstack)
    378 		sas->ss_flags |= SS_ONSTACK;
    379 }
    380 
    381 static void
    382 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    383 {
    384 	struct lwp *l = curlwp;
    385 	struct proc *p = l->l_proc;
    386 	struct trapframe *tf;
    387 	struct linux_sigframe *fp, frame;
    388 	int onstack, error;
    389 	int sig = ksi->ksi_signo;
    390 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    391 	struct sigaltstack *sas = &l->l_sigstk;
    392 
    393 	tf = l->l_md.md_regs;
    394 
    395 	/* Do we need to jump onto the signal stack? */
    396 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    397 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    398 
    399 	/* Allocate space for the signal handler context. */
    400 	if (onstack)
    401 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    402 		    sas->ss_size);
    403 	else
    404 		fp = (struct linux_sigframe *)tf->tf_esp;
    405 	fp--;
    406 
    407 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    408 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    409 
    410 	/* Build stack frame for signal trampoline. */
    411 	frame.sf_handler = catcher;
    412 	frame.sf_sig = native_to_linux_signo[sig];
    413 
    414 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    415 	sendsig_reset(l, sig);
    416 
    417 	mutex_exit(&p->p_smutex);
    418 	error = copyout(&frame, fp, sizeof(frame));
    419 	mutex_enter(&p->p_smutex);
    420 
    421 	if (error != 0) {
    422 		/*
    423 		 * Process has trashed its stack; give it an illegal
    424 		 * instruction to halt it in its tracks.
    425 		 */
    426 		sigexit(l, SIGILL);
    427 		/* NOTREACHED */
    428 	}
    429 
    430 	/*
    431 	 * Build context to run handler in.
    432 	 */
    433 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    434 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    435 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    436 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    437 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    438 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    439 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    440 	tf->tf_esp = (int)fp;
    441 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    442 
    443 	/* Remember that we're now on the signal stack. */
    444 	if (onstack)
    445 		sas->ss_flags |= SS_ONSTACK;
    446 }
    447 
    448 /*
    449  * System call to cleanup state after a signal
    450  * has been taken.  Reset signal mask and
    451  * stack state from context left by sendsig (above).
    452  * Return to previous pc and psl as specified by
    453  * context left by sendsig. Check carefully to
    454  * make sure that the user has not modified the
    455  * psl to gain improper privileges or to cause
    456  * a machine fault.
    457  */
    458 int
    459 linux_sys_rt_sigreturn(l, v, retval)
    460 	struct lwp *l;
    461 	void *v;
    462 	register_t *retval;
    463 {
    464 	struct linux_sys_rt_sigreturn_args /* {
    465 		syscallarg(struct linux_ucontext *) ucp;
    466 	} */ *uap = v;
    467 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    468 	int error;
    469 
    470 	/*
    471 	 * The trampoline code hands us the context.
    472 	 * It is unsafe to keep track of it ourselves, in the event that a
    473 	 * program jumps out of a signal handler.
    474 	 */
    475 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    476 		return error;
    477 
    478 	/* XXX XAX we can do better here by using more of the ucontext */
    479 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    480 }
    481 
    482 int
    483 linux_sys_sigreturn(l, v, retval)
    484 	struct lwp *l;
    485 	void *v;
    486 	register_t *retval;
    487 {
    488 	struct linux_sys_sigreturn_args /* {
    489 		syscallarg(struct linux_sigcontext *) scp;
    490 	} */ *uap = v;
    491 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    492 	int error;
    493 
    494 	/*
    495 	 * The trampoline code hands us the context.
    496 	 * It is unsafe to keep track of it ourselves, in the event that a
    497 	 * program jumps out of a signal handler.
    498 	 */
    499 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    500 		return error;
    501 	return linux_restore_sigcontext(l, &context, retval);
    502 }
    503 
    504 static int
    505 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    506     register_t *retval)
    507 {
    508 	struct proc *p = l->l_proc;
    509 	struct sigaltstack *sas = &l->l_sigstk;
    510 	struct trapframe *tf;
    511 	sigset_t mask;
    512 	ssize_t ss_gap;
    513 	/* Restore register context. */
    514 	tf = l->l_md.md_regs;
    515 
    516 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    517 #ifdef VM86
    518 	if (scp->sc_eflags & PSL_VM) {
    519 		void syscall_vm86 __P((struct trapframe *));
    520 
    521 		tf->tf_vm86_gs = scp->sc_gs;
    522 		tf->tf_vm86_fs = scp->sc_fs;
    523 		tf->tf_vm86_es = scp->sc_es;
    524 		tf->tf_vm86_ds = scp->sc_ds;
    525 		set_vflags(l, scp->sc_eflags);
    526 		p->p_md.md_syscall = syscall_vm86;
    527 	} else
    528 #endif
    529 	{
    530 		/*
    531 		 * Check for security violations.  If we're returning to
    532 		 * protected mode, the CPU will validate the segment registers
    533 		 * automatically and generate a trap on violations.  We handle
    534 		 * the trap, rather than doing all of the checking here.
    535 		 */
    536 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    537 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    538 			return EINVAL;
    539 
    540 		tf->tf_gs = scp->sc_gs;
    541 		tf->tf_fs = scp->sc_fs;
    542 		tf->tf_es = scp->sc_es;
    543 		tf->tf_ds = scp->sc_ds;
    544 #ifdef VM86
    545 		if (tf->tf_eflags & PSL_VM)
    546 			(*p->p_emul->e_syscall_intern)(p);
    547 #endif
    548 		tf->tf_eflags = scp->sc_eflags;
    549 	}
    550 	tf->tf_edi = scp->sc_edi;
    551 	tf->tf_esi = scp->sc_esi;
    552 	tf->tf_ebp = scp->sc_ebp;
    553 	tf->tf_ebx = scp->sc_ebx;
    554 	tf->tf_edx = scp->sc_edx;
    555 	tf->tf_ecx = scp->sc_ecx;
    556 	tf->tf_eax = scp->sc_eax;
    557 	tf->tf_eip = scp->sc_eip;
    558 	tf->tf_cs = scp->sc_cs;
    559 	tf->tf_esp = scp->sc_esp_at_signal;
    560 	tf->tf_ss = scp->sc_ss;
    561 
    562 	/* Restore signal stack. */
    563 	/*
    564 	 * Linux really does it this way; it doesn't have space in sigframe
    565 	 * to save the onstack flag.
    566 	 */
    567 	mutex_enter(&p->p_smutex);
    568 	ss_gap = (ssize_t)
    569 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    570 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    571 		sas->ss_flags |= SS_ONSTACK;
    572 	else
    573 		sas->ss_flags &= ~SS_ONSTACK;
    574 
    575 	/* Restore signal mask. */
    576 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    577 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    578 	mutex_exit(&p->p_smutex);
    579 
    580 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    581 	return EJUSTRETURN;
    582 }
    583 
    584 #ifdef USER_LDT
    585 
    586 int
    587 linux_read_ldt(l, uap, retval)
    588 	struct lwp *l;
    589 	struct linux_sys_modify_ldt_args /* {
    590 		syscallarg(int) func;
    591 		syscallarg(void *) ptr;
    592 		syscallarg(size_t) bytecount;
    593 	} */ *uap;
    594 	register_t *retval;
    595 {
    596 	struct proc *p = l->l_proc;
    597 	struct i386_get_ldt_args gl;
    598 	int error;
    599 	caddr_t sg;
    600 	char *parms;
    601 
    602 	DPRINTF(("linux_read_ldt!"));
    603 	sg = stackgap_init(p, 0);
    604 
    605 	gl.start = 0;
    606 	gl.desc = SCARG(uap, ptr);
    607 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    608 
    609 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    610 
    611 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    612 		return (error);
    613 
    614 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    615 		return (error);
    616 
    617 	*retval *= sizeof(union descriptor);
    618 	return (0);
    619 }
    620 
    621 struct linux_ldt_info {
    622 	u_int entry_number;
    623 	u_long base_addr;
    624 	u_int limit;
    625 	u_int seg_32bit:1;
    626 	u_int contents:2;
    627 	u_int read_exec_only:1;
    628 	u_int limit_in_pages:1;
    629 	u_int seg_not_present:1;
    630 	u_int useable:1;
    631 };
    632 
    633 int
    634 linux_write_ldt(l, uap, retval)
    635 	struct lwp *l;
    636 	struct linux_sys_modify_ldt_args /* {
    637 		syscallarg(int) func;
    638 		syscallarg(void *) ptr;
    639 		syscallarg(size_t) bytecount;
    640 	} */ *uap;
    641 	register_t *retval;
    642 {
    643 	struct proc *p = l->l_proc;
    644 	struct linux_ldt_info ldt_info;
    645 	struct segment_descriptor sd;
    646 	struct i386_set_ldt_args sl;
    647 	int error;
    648 	caddr_t sg;
    649 	char *parms;
    650 	int oldmode = (int)retval[0];
    651 
    652 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    653 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    654 		return (EINVAL);
    655 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    656 		return error;
    657 	if (ldt_info.entry_number >= 8192)
    658 		return (EINVAL);
    659 	if (ldt_info.contents == 3) {
    660 		if (oldmode)
    661 			return (EINVAL);
    662 		if (ldt_info.seg_not_present)
    663 			return (EINVAL);
    664 	}
    665 
    666 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    667 	    (oldmode || (ldt_info.contents == 0 &&
    668 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    669 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    670 	    ldt_info.useable == 0))) {
    671 		/* this means you should zero the ldt */
    672 		(void)memset(&sd, 0, sizeof(sd));
    673 	} else {
    674 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    675 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    676 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    677 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    678 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    679 		    (!ldt_info.read_exec_only << 1);
    680 		sd.sd_dpl = SEL_UPL;
    681 		sd.sd_p = !ldt_info.seg_not_present;
    682 		sd.sd_def32 = ldt_info.seg_32bit;
    683 		sd.sd_gran = ldt_info.limit_in_pages;
    684 		if (!oldmode)
    685 			sd.sd_xx = ldt_info.useable;
    686 		else
    687 			sd.sd_xx = 0;
    688 	}
    689 	sg = stackgap_init(p, 0);
    690 	sl.start = ldt_info.entry_number;
    691 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    692 	sl.num = 1;
    693 
    694 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    695 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    696 
    697 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    698 
    699 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    700 		return (error);
    701 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    702 		return (error);
    703 
    704 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    705 		return (error);
    706 
    707 	*retval = 0;
    708 	return (0);
    709 }
    710 
    711 #endif /* USER_LDT */
    712 
    713 int
    714 linux_sys_modify_ldt(struct lwp *l, void *v,
    715     register_t *retval)
    716 {
    717 	struct linux_sys_modify_ldt_args /* {
    718 		syscallarg(int) func;
    719 		syscallarg(void *) ptr;
    720 		syscallarg(size_t) bytecount;
    721 	} */ *uap = v;
    722 
    723 	switch (SCARG(uap, func)) {
    724 #ifdef USER_LDT
    725 	case 0:
    726 		return linux_read_ldt(l, uap, retval);
    727 	case 1:
    728 		retval[0] = 1;
    729 		return linux_write_ldt(l, uap, retval);
    730 	case 2:
    731 #ifdef notyet
    732 		return (linux_read_default_ldt(l, uap, retval);
    733 #else
    734 		return (ENOSYS);
    735 #endif
    736 	case 0x11:
    737 		retval[0] = 0;
    738 		return linux_write_ldt(l, uap, retval);
    739 #endif /* USER_LDT */
    740 
    741 	default:
    742 		return (ENOSYS);
    743 	}
    744 }
    745 
    746 /*
    747  * XXX Pathetic hack to make svgalib work. This will fake the major
    748  * device number of an opened VT so that svgalib likes it. grmbl.
    749  * Should probably do it 'wrong the right way' and use a mapping
    750  * array for all major device numbers, and map linux_mknod too.
    751  */
    752 dev_t
    753 linux_fakedev(dev, raw)
    754 	dev_t dev;
    755 	int raw;
    756 {
    757 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    758 	const struct cdevsw *cd = cdevsw_lookup(dev);
    759 
    760 	if (raw) {
    761 #if (NWSDISPLAY > 0)
    762 		extern const struct cdevsw wsdisplay_cdevsw;
    763 		if (cd == &wsdisplay_cdevsw)
    764 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    765 #endif
    766 	}
    767 
    768 	if (cd == &ptc_cdevsw)
    769 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    770 	if (cd == &pts_cdevsw)
    771 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    772 
    773 	return dev;
    774 }
    775 
    776 #if (NWSDISPLAY > 0)
    777 /*
    778  * That's not complete, but enough to get an X server running.
    779  */
    780 #define NR_KEYS 128
    781 static const u_short plain_map[NR_KEYS] = {
    782 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    783 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    784 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    785 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    786 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    787 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    788 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    789 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    790 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    791 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    792 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    793 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    794 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    795 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    796 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    797 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    798 }, shift_map[NR_KEYS] = {
    799 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    800 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    801 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    802 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    803 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    804 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    805 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    806 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    807 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    808 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    809 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    810 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    811 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    812 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    813 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    814 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    815 }, altgr_map[NR_KEYS] = {
    816 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    817 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    818 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    819 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    820 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    821 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    822 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    823 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    824 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    825 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    826 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    827 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    828 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    829 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    830 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    831 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    832 }, ctrl_map[NR_KEYS] = {
    833 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    834 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    835 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    836 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    837 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    838 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    839 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    840 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    841 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    842 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    843 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    844 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    845 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    846 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    847 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    848 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    849 };
    850 
    851 const u_short * const linux_keytabs[] = {
    852 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    853 };
    854 #endif
    855 
    856 static struct biosdisk_info *
    857 fd2biosinfo(struct proc *p, struct file *fp)
    858 {
    859 	struct vnode *vp;
    860 	const char *blkname;
    861 	char diskname[16];
    862 	int i;
    863 	struct nativedisk_info *nip;
    864 	struct disklist *dl = x86_alldisks;
    865 
    866 	if (fp->f_type != DTYPE_VNODE)
    867 		return NULL;
    868 	vp = (struct vnode *)fp->f_data;
    869 
    870 	if (vp->v_type != VBLK)
    871 		return NULL;
    872 
    873 	blkname = devsw_blk2name(major(vp->v_rdev));
    874 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    875 	    DISKUNIT(vp->v_rdev));
    876 
    877 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    878 		nip = &dl->dl_nativedisks[i];
    879 		if (strcmp(diskname, nip->ni_devname))
    880 			continue;
    881 		if (nip->ni_nmatches != 0)
    882 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    883 	}
    884 
    885 	return NULL;
    886 }
    887 
    888 
    889 /*
    890  * We come here in a last attempt to satisfy a Linux ioctl() call
    891  */
    892 int
    893 linux_machdepioctl(l, v, retval)
    894 	struct lwp *l;
    895 	void *v;
    896 	register_t *retval;
    897 {
    898 	struct linux_sys_ioctl_args /* {
    899 		syscallarg(int) fd;
    900 		syscallarg(u_long) com;
    901 		syscallarg(caddr_t) data;
    902 	} */ *uap = v;
    903 	struct sys_ioctl_args bia;
    904 	u_long com;
    905 	int error, error1;
    906 #if (NWSDISPLAY > 0)
    907 	struct vt_mode lvt;
    908 	caddr_t bvtp, sg;
    909 	struct kbentry kbe;
    910 #endif
    911 	struct linux_hd_geometry hdg;
    912 	struct linux_hd_big_geometry hdg_big;
    913 	struct biosdisk_info *bip;
    914 	struct filedesc *fdp;
    915 	struct file *fp;
    916 	int fd;
    917 	struct disklabel label, *labp;
    918 	struct partinfo partp;
    919 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    920 	u_long start, biostotal, realtotal;
    921 	u_char heads, sectors;
    922 	u_int cylinders;
    923 	struct ioctl_pt pt;
    924 	struct proc *p = l->l_proc;
    925 
    926 	fd = SCARG(uap, fd);
    927 	SCARG(&bia, fd) = fd;
    928 	SCARG(&bia, data) = SCARG(uap, data);
    929 	com = SCARG(uap, com);
    930 
    931 	fdp = p->p_fd;
    932 
    933 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    934 		return (EBADF);
    935 
    936 	FILE_USE(fp);
    937 
    938 	switch (com) {
    939 #if (NWSDISPLAY > 0)
    940 	case LINUX_KDGKBMODE:
    941 		com = KDGKBMODE;
    942 		break;
    943 	case LINUX_KDSKBMODE:
    944 		com = KDSKBMODE;
    945 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    946 			SCARG(&bia, data) = (caddr_t)K_RAW;
    947 		break;
    948 	case LINUX_KIOCSOUND:
    949 		SCARG(&bia, data) =
    950 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    951 		/* fall through */
    952 	case LINUX_KDMKTONE:
    953 		com = KDMKTONE;
    954 		break;
    955 	case LINUX_KDSETMODE:
    956 		com = KDSETMODE;
    957 		break;
    958 	case LINUX_KDGETMODE:
    959 		/* KD_* values are equal to the wscons numbers */
    960 		com = WSDISPLAYIO_GMODE;
    961 		break;
    962 	case LINUX_KDENABIO:
    963 		com = KDENABIO;
    964 		break;
    965 	case LINUX_KDDISABIO:
    966 		com = KDDISABIO;
    967 		break;
    968 	case LINUX_KDGETLED:
    969 		com = KDGETLED;
    970 		break;
    971 	case LINUX_KDSETLED:
    972 		com = KDSETLED;
    973 		break;
    974 	case LINUX_VT_OPENQRY:
    975 		com = VT_OPENQRY;
    976 		break;
    977 	case LINUX_VT_GETMODE:
    978 		SCARG(&bia, com) = VT_GETMODE;
    979 		/* XXX NJWLWP */
    980 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    981 			goto out;
    982 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    983 		    sizeof (struct vt_mode))))
    984 			goto out;
    985 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    986 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    987 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    988 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    989 		    sizeof (struct vt_mode));
    990 		goto out;
    991 	case LINUX_VT_SETMODE:
    992 		com = VT_SETMODE;
    993 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    994 		    sizeof (struct vt_mode))))
    995 			goto out;
    996 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    997 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    998 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    999 		sg = stackgap_init(p, 0);
   1000 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
   1001 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
   1002 			goto out;
   1003 		SCARG(&bia, data) = bvtp;
   1004 		break;
   1005 	case LINUX_VT_DISALLOCATE:
   1006 		/* XXX should use WSDISPLAYIO_DELSCREEN */
   1007 		error = 0;
   1008 		goto out;
   1009 	case LINUX_VT_RELDISP:
   1010 		com = VT_RELDISP;
   1011 		break;
   1012 	case LINUX_VT_ACTIVATE:
   1013 		com = VT_ACTIVATE;
   1014 		break;
   1015 	case LINUX_VT_WAITACTIVE:
   1016 		com = VT_WAITACTIVE;
   1017 		break;
   1018 	case LINUX_VT_GETSTATE:
   1019 		com = VT_GETSTATE;
   1020 		break;
   1021 	case LINUX_KDGKBTYPE:
   1022 	    {
   1023 		static const u_int8_t kb101 = KB_101;
   1024 
   1025 		/* This is what Linux does. */
   1026 		error = copyout(&kb101, SCARG(uap, data), 1);
   1027 		goto out;
   1028 	    }
   1029 	case LINUX_KDGKBENT:
   1030 		/*
   1031 		 * The Linux KDGKBENT ioctl is different from the
   1032 		 * SYSV original. So we handle it in machdep code.
   1033 		 * XXX We should use keyboard mapping information
   1034 		 * from wsdisplay, but this would be expensive.
   1035 		 */
   1036 		if ((error = copyin(SCARG(uap, data), &kbe,
   1037 				    sizeof(struct kbentry))))
   1038 			goto out;
   1039 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1040 		    || kbe.kb_index >= NR_KEYS) {
   1041 			error = EINVAL;
   1042 			goto out;
   1043 		}
   1044 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1045 		error = copyout(&kbe, SCARG(uap, data),
   1046 				sizeof(struct kbentry));
   1047 		goto out;
   1048 #endif
   1049 	case LINUX_HDIO_GETGEO:
   1050 	case LINUX_HDIO_GETGEO_BIG:
   1051 		/*
   1052 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1053 		 * if possible (extending its # of cylinders if it's beyond
   1054 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1055 		 * the real geometry) if not found, by returning an
   1056 		 * error. See common/linux_hdio.c
   1057 		 */
   1058 		bip = fd2biosinfo(p, fp);
   1059 		ioctlf = fp->f_ops->fo_ioctl;
   1060 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
   1061 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
   1062 		if (error != 0 && error1 != 0) {
   1063 			error = error1;
   1064 			goto out;
   1065 		}
   1066 		labp = error != 0 ? &label : partp.disklab;
   1067 		start = error1 != 0 ? partp.part->p_offset : 0;
   1068 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1069 		    && bip->bi_cyl != 0) {
   1070 			heads = bip->bi_head;
   1071 			sectors = bip->bi_sec;
   1072 			cylinders = bip->bi_cyl;
   1073 			biostotal = heads * sectors * cylinders;
   1074 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1075 			    labp->d_ncylinders;
   1076 			if (realtotal > biostotal)
   1077 				cylinders = realtotal / (heads * sectors);
   1078 		} else {
   1079 			heads = labp->d_ntracks;
   1080 			cylinders = labp->d_ncylinders;
   1081 			sectors = labp->d_nsectors;
   1082 		}
   1083 		if (com == LINUX_HDIO_GETGEO) {
   1084 			hdg.start = start;
   1085 			hdg.heads = heads;
   1086 			hdg.cylinders = cylinders;
   1087 			hdg.sectors = sectors;
   1088 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1089 			goto out;
   1090 		} else {
   1091 			hdg_big.start = start;
   1092 			hdg_big.heads = heads;
   1093 			hdg_big.cylinders = cylinders;
   1094 			hdg_big.sectors = sectors;
   1095 			error = copyout(&hdg_big, SCARG(uap, data),
   1096 			    sizeof hdg_big);
   1097 			goto out;
   1098 		}
   1099 
   1100 	default:
   1101 		/*
   1102 		 * Unknown to us. If it's on a device, just pass it through
   1103 		 * using PTIOCLINUX, the device itself might be able to
   1104 		 * make some sense of it.
   1105 		 * XXX hack: if the function returns EJUSTRETURN,
   1106 		 * it has stuffed a sysctl return value in pt.data.
   1107 		 */
   1108 		ioctlf = fp->f_ops->fo_ioctl;
   1109 		pt.com = SCARG(uap, com);
   1110 		pt.data = SCARG(uap, data);
   1111 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
   1112 		if (error == EJUSTRETURN) {
   1113 			retval[0] = (register_t)pt.data;
   1114 			error = 0;
   1115 		}
   1116 
   1117 		if (error == ENOTTY) {
   1118 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1119 			    com));
   1120 		}
   1121 		goto out;
   1122 	}
   1123 	SCARG(&bia, com) = com;
   1124 	/* XXX NJWLWP */
   1125 	error = sys_ioctl(curlwp, &bia, retval);
   1126 out:
   1127 	FILE_UNUSE(fp ,l);
   1128 	return error;
   1129 }
   1130 
   1131 /*
   1132  * Set I/O permissions for a process. Just set the maximum level
   1133  * right away (ignoring the argument), otherwise we would have
   1134  * to rely on I/O permission maps, which are not implemented.
   1135  */
   1136 int
   1137 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1138 {
   1139 #if 0
   1140 	struct linux_sys_iopl_args /* {
   1141 		syscallarg(int) level;
   1142 	} */ *uap = v;
   1143 #endif
   1144 	struct trapframe *fp = l->l_md.md_regs;
   1145 
   1146 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1147 	    NULL, NULL, NULL, NULL) != 0)
   1148 		return EPERM;
   1149 	fp->tf_eflags |= PSL_IOPL;
   1150 	*retval = 0;
   1151 	return 0;
   1152 }
   1153 
   1154 /*
   1155  * See above. If a root process tries to set access to an I/O port,
   1156  * just let it have the whole range.
   1157  */
   1158 int
   1159 linux_sys_ioperm(l, v, retval)
   1160 	struct lwp *l;
   1161 	void *v;
   1162 	register_t *retval;
   1163 {
   1164 	struct linux_sys_ioperm_args /* {
   1165 		syscallarg(unsigned int) lo;
   1166 		syscallarg(unsigned int) hi;
   1167 		syscallarg(int) val;
   1168 	} */ *uap = v;
   1169 	struct trapframe *fp = l->l_md.md_regs;
   1170 
   1171 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1172 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1173 	    NULL, NULL) != 0)
   1174 		return EPERM;
   1175 	if (SCARG(uap, val))
   1176 		fp->tf_eflags |= PSL_IOPL;
   1177 	*retval = 0;
   1178 	return 0;
   1179 }
   1180 
   1181 int
   1182 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1183     void *arg)
   1184 {
   1185 	return 0;
   1186 }
   1187