Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.114.4.6
      1 /*	$NetBSD: linux_machdep.c,v 1.114.4.6 2007/01/30 13:51:32 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.114.4.6 2007/01/30 13:51:32 ad Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/syscallargs.h>
     66 #include <sys/filedesc.h>
     67 #include <sys/exec_elf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/ioctl.h>
     70 #include <sys/wait.h>
     71 #include <sys/kauth.h>
     72 
     73 #include <miscfs/specfs/specdev.h>
     74 
     75 #include <compat/linux/common/linux_types.h>
     76 #include <compat/linux/common/linux_signal.h>
     77 #include <compat/linux/common/linux_util.h>
     78 #include <compat/linux/common/linux_ioctl.h>
     79 #include <compat/linux/common/linux_hdio.h>
     80 #include <compat/linux/common/linux_exec.h>
     81 #include <compat/linux/common/linux_machdep.h>
     82 #include <compat/linux/common/linux_errno.h>
     83 
     84 #include <compat/linux/linux_syscallargs.h>
     85 
     86 #include <machine/cpu.h>
     87 #include <machine/cpufunc.h>
     88 #include <machine/psl.h>
     89 #include <machine/reg.h>
     90 #include <machine/segments.h>
     91 #include <machine/specialreg.h>
     92 #include <machine/sysarch.h>
     93 #include <machine/vm86.h>
     94 #include <machine/vmparam.h>
     95 
     96 /*
     97  * To see whether wscons is configured (for virtual console ioctl calls).
     98  */
     99 #if defined(_KERNEL_OPT)
    100 #include "wsdisplay.h"
    101 #endif
    102 #if (NWSDISPLAY > 0)
    103 #include <dev/wscons/wsconsio.h>
    104 #include <dev/wscons/wsdisplay_usl_io.h>
    105 #if defined(_KERNEL_OPT)
    106 #include "opt_xserver.h"
    107 #endif
    108 #endif
    109 
    110 #ifdef USER_LDT
    111 #include <machine/cpu.h>
    112 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    113     register_t *));
    114 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    115     register_t *));
    116 #endif
    117 
    118 #ifdef DEBUG_LINUX
    119 #define DPRINTF(a) uprintf a
    120 #else
    121 #define DPRINTF(a)
    122 #endif
    123 
    124 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    125 extern struct disklist *x86_alldisks;
    126 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    127     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    128 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    129     const sigset_t *, struct linux_sigcontext *));
    130 static int linux_restore_sigcontext __P((struct lwp *,
    131     struct linux_sigcontext *, register_t *));
    132 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    133 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    134 
    135 extern char linux_sigcode[], linux_rt_sigcode[];
    136 /*
    137  * Deal with some i386-specific things in the Linux emulation code.
    138  */
    139 
    140 void
    141 linux_setregs(l, epp, stack)
    142 	struct lwp *l;
    143 	struct exec_package *epp;
    144 	u_long stack;
    145 {
    146 	struct pcb *pcb = &l->l_addr->u_pcb;
    147 	struct trapframe *tf;
    148 
    149 #if NNPX > 0
    150 	/* If we were using the FPU, forget about it. */
    151 	if (npxproc == l)
    152 		npxdrop();
    153 #endif
    154 
    155 #ifdef USER_LDT
    156 	pmap_ldt_cleanup(l);
    157 #endif
    158 
    159 	l->l_md.md_flags &= ~MDL_USEDFPU;
    160 
    161 	if (i386_use_fxsave) {
    162 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    164 	} else
    165 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    166 
    167 	tf = l->l_md.md_regs;
    168 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    172 	tf->tf_edi = 0;
    173 	tf->tf_esi = 0;
    174 	tf->tf_ebp = 0;
    175 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    176 	tf->tf_edx = 0;
    177 	tf->tf_ecx = 0;
    178 	tf->tf_eax = 0;
    179 	tf->tf_eip = epp->ep_entry;
    180 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    181 	tf->tf_eflags = PSL_USERSET;
    182 	tf->tf_esp = stack;
    183 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    184 }
    185 
    186 /*
    187  * Send an interrupt to process.
    188  *
    189  * Stack is set up to allow sigcode stored
    190  * in u. to call routine, followed by kcall
    191  * to sigreturn routine below.  After sigreturn
    192  * resets the signal mask, the stack, and the
    193  * frame pointer, it returns to the user
    194  * specified pc, psl.
    195  */
    196 
    197 void
    198 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    199 {
    200 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    201 		linux_rt_sendsig(ksi, mask);
    202 	else
    203 		linux_old_sendsig(ksi, mask);
    204 }
    205 
    206 
    207 static void
    208 linux_save_ucontext(l, tf, mask, sas, uc)
    209 	struct lwp *l;
    210 	struct trapframe *tf;
    211 	const sigset_t *mask;
    212 	struct sigaltstack *sas;
    213 	struct linux_ucontext *uc;
    214 {
    215 	uc->uc_flags = 0;
    216 	uc->uc_link = NULL;
    217 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    218 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    219 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    220 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    221 }
    222 
    223 static void
    224 linux_save_sigcontext(l, tf, mask, sc)
    225 	struct lwp *l;
    226 	struct trapframe *tf;
    227 	const sigset_t *mask;
    228 	struct linux_sigcontext *sc;
    229 {
    230 	/* Save register context. */
    231 #ifdef VM86
    232 	if (tf->tf_eflags & PSL_VM) {
    233 		sc->sc_gs = tf->tf_vm86_gs;
    234 		sc->sc_fs = tf->tf_vm86_fs;
    235 		sc->sc_es = tf->tf_vm86_es;
    236 		sc->sc_ds = tf->tf_vm86_ds;
    237 		sc->sc_eflags = get_vflags(l);
    238 	} else
    239 #endif
    240 	{
    241 		sc->sc_gs = tf->tf_gs;
    242 		sc->sc_fs = tf->tf_fs;
    243 		sc->sc_es = tf->tf_es;
    244 		sc->sc_ds = tf->tf_ds;
    245 		sc->sc_eflags = tf->tf_eflags;
    246 	}
    247 	sc->sc_edi = tf->tf_edi;
    248 	sc->sc_esi = tf->tf_esi;
    249 	sc->sc_esp = tf->tf_esp;
    250 	sc->sc_ebp = tf->tf_ebp;
    251 	sc->sc_ebx = tf->tf_ebx;
    252 	sc->sc_edx = tf->tf_edx;
    253 	sc->sc_ecx = tf->tf_ecx;
    254 	sc->sc_eax = tf->tf_eax;
    255 	sc->sc_eip = tf->tf_eip;
    256 	sc->sc_cs = tf->tf_cs;
    257 	sc->sc_esp_at_signal = tf->tf_esp;
    258 	sc->sc_ss = tf->tf_ss;
    259 	sc->sc_err = tf->tf_err;
    260 	sc->sc_trapno = tf->tf_trapno;
    261 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    262 	sc->sc_387 = NULL;
    263 
    264 	/* Save signal stack. */
    265 	/* Linux doesn't save the onstack flag in sigframe */
    266 
    267 	/* Save signal mask. */
    268 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    269 }
    270 
    271 static void
    272 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    273 {
    274 	struct lwp *l = curlwp;
    275 	struct proc *p = l->l_proc;
    276 	struct trapframe *tf;
    277 	struct linux_rt_sigframe *fp, frame;
    278 	int onstack, error;
    279 	linux_siginfo_t *lsi;
    280 	int sig = ksi->ksi_signo;
    281 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    282 	struct sigaltstack *sas = &l->l_sigstk;
    283 
    284 	tf = l->l_md.md_regs;
    285 	/* Do we need to jump onto the signal stack? */
    286 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    287 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    288 
    289 
    290 	/* Allocate space for the signal handler context. */
    291 	if (onstack)
    292 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    293 		    sas->ss_size);
    294 	else
    295 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    296 	fp--;
    297 
    298 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    299 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    300 
    301 	/* Build stack frame for signal trampoline. */
    302 	frame.sf_handler = catcher;
    303 	frame.sf_sig = native_to_linux_signo[sig];
    304 	frame.sf_sip = &fp->sf_si;
    305 	frame.sf_ucp = &fp->sf_uc;
    306 
    307 	/*
    308 	 * XXX: the following code assumes that the constants for
    309 	 * siginfo are the same between linux and NetBSD.
    310 	 */
    311 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    312 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    313 	lsi->lsi_code = ksi->ksi_code;
    314 	switch (lsi->lsi_signo = frame.sf_sig) {
    315 	case LINUX_SIGILL:
    316 	case LINUX_SIGFPE:
    317 	case LINUX_SIGSEGV:
    318 	case LINUX_SIGBUS:
    319 	case LINUX_SIGTRAP:
    320 		lsi->lsi_addr = ksi->ksi_addr;
    321 		break;
    322 	case LINUX_SIGCHLD:
    323 		lsi->lsi_uid = ksi->ksi_uid;
    324 		lsi->lsi_pid = ksi->ksi_pid;
    325 		lsi->lsi_utime = ksi->ksi_utime;
    326 		lsi->lsi_stime = ksi->ksi_stime;
    327 
    328 		/* We use the same codes */
    329 		lsi->lsi_code = ksi->ksi_code;
    330 		/* XXX is that right? */
    331 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    332 		break;
    333 	case LINUX_SIGIO:
    334 		lsi->lsi_band = ksi->ksi_band;
    335 		lsi->lsi_fd = ksi->ksi_fd;
    336 		break;
    337 	default:
    338 		lsi->lsi_uid = ksi->ksi_uid;
    339 		lsi->lsi_pid = ksi->ksi_pid;
    340 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    341 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    342 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    343 		break;
    344 	}
    345 
    346 	/* Save register context. */
    347 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    348 	sendsig_reset(l, sig);
    349 
    350 	mutex_exit(&p->p_smutex);
    351 	error = copyout(&frame, fp, sizeof(frame));
    352 	mutex_enter(&p->p_smutex);
    353 
    354 	if (error != 0) {
    355 		/*
    356 		 * Process has trashed its stack; give it an illegal
    357 		 * instruction to halt it in its tracks.
    358 		 */
    359 		sigexit(l, SIGILL);
    360 		/* NOTREACHED */
    361 	}
    362 
    363 	/*
    364 	 * Build context to run handler in.
    365 	 */
    366 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    367 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    368 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    369 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    370 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    371 	    (linux_rt_sigcode - linux_sigcode);
    372 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    373 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    374 	tf->tf_esp = (int)fp;
    375 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    376 
    377 	/* Remember that we're now on the signal stack. */
    378 	if (onstack)
    379 		sas->ss_flags |= SS_ONSTACK;
    380 }
    381 
    382 static void
    383 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    384 {
    385 	struct lwp *l = curlwp;
    386 	struct proc *p = l->l_proc;
    387 	struct trapframe *tf;
    388 	struct linux_sigframe *fp, frame;
    389 	int onstack, error;
    390 	int sig = ksi->ksi_signo;
    391 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    392 	struct sigaltstack *sas = &l->l_sigstk;
    393 
    394 	tf = l->l_md.md_regs;
    395 
    396 	/* Do we need to jump onto the signal stack? */
    397 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    398 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    399 
    400 	/* Allocate space for the signal handler context. */
    401 	if (onstack)
    402 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    403 		    sas->ss_size);
    404 	else
    405 		fp = (struct linux_sigframe *)tf->tf_esp;
    406 	fp--;
    407 
    408 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    409 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    410 
    411 	/* Build stack frame for signal trampoline. */
    412 	frame.sf_handler = catcher;
    413 	frame.sf_sig = native_to_linux_signo[sig];
    414 
    415 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    416 	sendsig_reset(l, sig);
    417 
    418 	mutex_exit(&p->p_smutex);
    419 	error = copyout(&frame, fp, sizeof(frame));
    420 	mutex_enter(&p->p_smutex);
    421 
    422 	if (error != 0) {
    423 		/*
    424 		 * Process has trashed its stack; give it an illegal
    425 		 * instruction to halt it in its tracks.
    426 		 */
    427 		sigexit(l, SIGILL);
    428 		/* NOTREACHED */
    429 	}
    430 
    431 	/*
    432 	 * Build context to run handler in.
    433 	 */
    434 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    435 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    436 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    437 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    438 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    439 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    440 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    441 	tf->tf_esp = (int)fp;
    442 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    443 
    444 	/* Remember that we're now on the signal stack. */
    445 	if (onstack)
    446 		sas->ss_flags |= SS_ONSTACK;
    447 }
    448 
    449 /*
    450  * System call to cleanup state after a signal
    451  * has been taken.  Reset signal mask and
    452  * stack state from context left by sendsig (above).
    453  * Return to previous pc and psl as specified by
    454  * context left by sendsig. Check carefully to
    455  * make sure that the user has not modified the
    456  * psl to gain improper privileges or to cause
    457  * a machine fault.
    458  */
    459 int
    460 linux_sys_rt_sigreturn(l, v, retval)
    461 	struct lwp *l;
    462 	void *v;
    463 	register_t *retval;
    464 {
    465 	struct linux_sys_rt_sigreturn_args /* {
    466 		syscallarg(struct linux_ucontext *) ucp;
    467 	} */ *uap = v;
    468 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    469 	int error;
    470 
    471 	/*
    472 	 * The trampoline code hands us the context.
    473 	 * It is unsafe to keep track of it ourselves, in the event that a
    474 	 * program jumps out of a signal handler.
    475 	 */
    476 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    477 		return error;
    478 
    479 	/* XXX XAX we can do better here by using more of the ucontext */
    480 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    481 }
    482 
    483 int
    484 linux_sys_sigreturn(l, v, retval)
    485 	struct lwp *l;
    486 	void *v;
    487 	register_t *retval;
    488 {
    489 	struct linux_sys_sigreturn_args /* {
    490 		syscallarg(struct linux_sigcontext *) scp;
    491 	} */ *uap = v;
    492 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    493 	int error;
    494 
    495 	/*
    496 	 * The trampoline code hands us the context.
    497 	 * It is unsafe to keep track of it ourselves, in the event that a
    498 	 * program jumps out of a signal handler.
    499 	 */
    500 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    501 		return error;
    502 	return linux_restore_sigcontext(l, &context, retval);
    503 }
    504 
    505 static int
    506 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    507     register_t *retval)
    508 {
    509 	struct proc *p = l->l_proc;
    510 	struct sigaltstack *sas = &l->l_sigstk;
    511 	struct trapframe *tf;
    512 	sigset_t mask;
    513 	ssize_t ss_gap;
    514 	/* Restore register context. */
    515 	tf = l->l_md.md_regs;
    516 
    517 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    518 #ifdef VM86
    519 	if (scp->sc_eflags & PSL_VM) {
    520 		void syscall_vm86 __P((struct trapframe *));
    521 
    522 		tf->tf_vm86_gs = scp->sc_gs;
    523 		tf->tf_vm86_fs = scp->sc_fs;
    524 		tf->tf_vm86_es = scp->sc_es;
    525 		tf->tf_vm86_ds = scp->sc_ds;
    526 		set_vflags(l, scp->sc_eflags);
    527 		p->p_md.md_syscall = syscall_vm86;
    528 	} else
    529 #endif
    530 	{
    531 		/*
    532 		 * Check for security violations.  If we're returning to
    533 		 * protected mode, the CPU will validate the segment registers
    534 		 * automatically and generate a trap on violations.  We handle
    535 		 * the trap, rather than doing all of the checking here.
    536 		 */
    537 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    538 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    539 			return EINVAL;
    540 
    541 		tf->tf_gs = scp->sc_gs;
    542 		tf->tf_fs = scp->sc_fs;
    543 		tf->tf_es = scp->sc_es;
    544 		tf->tf_ds = scp->sc_ds;
    545 #ifdef VM86
    546 		if (tf->tf_eflags & PSL_VM)
    547 			(*p->p_emul->e_syscall_intern)(p);
    548 #endif
    549 		tf->tf_eflags = scp->sc_eflags;
    550 	}
    551 	tf->tf_edi = scp->sc_edi;
    552 	tf->tf_esi = scp->sc_esi;
    553 	tf->tf_ebp = scp->sc_ebp;
    554 	tf->tf_ebx = scp->sc_ebx;
    555 	tf->tf_edx = scp->sc_edx;
    556 	tf->tf_ecx = scp->sc_ecx;
    557 	tf->tf_eax = scp->sc_eax;
    558 	tf->tf_eip = scp->sc_eip;
    559 	tf->tf_cs = scp->sc_cs;
    560 	tf->tf_esp = scp->sc_esp_at_signal;
    561 	tf->tf_ss = scp->sc_ss;
    562 
    563 	/* Restore signal stack. */
    564 	/*
    565 	 * Linux really does it this way; it doesn't have space in sigframe
    566 	 * to save the onstack flag.
    567 	 */
    568 	mutex_enter(&p->p_smutex);
    569 	ss_gap = (ssize_t)
    570 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    571 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    572 		sas->ss_flags |= SS_ONSTACK;
    573 	else
    574 		sas->ss_flags &= ~SS_ONSTACK;
    575 
    576 	/* Restore signal mask. */
    577 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    578 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    579 	mutex_exit(&p->p_smutex);
    580 
    581 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    582 	return EJUSTRETURN;
    583 }
    584 
    585 #ifdef USER_LDT
    586 
    587 int
    588 linux_read_ldt(l, uap, retval)
    589 	struct lwp *l;
    590 	struct linux_sys_modify_ldt_args /* {
    591 		syscallarg(int) func;
    592 		syscallarg(void *) ptr;
    593 		syscallarg(size_t) bytecount;
    594 	} */ *uap;
    595 	register_t *retval;
    596 {
    597 	struct proc *p = l->l_proc;
    598 	struct i386_get_ldt_args gl;
    599 	int error;
    600 	caddr_t sg;
    601 	char *parms;
    602 
    603 	DPRINTF(("linux_read_ldt!"));
    604 	sg = stackgap_init(p, 0);
    605 
    606 	gl.start = 0;
    607 	gl.desc = SCARG(uap, ptr);
    608 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    609 
    610 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    611 
    612 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    613 		return (error);
    614 
    615 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    616 		return (error);
    617 
    618 	*retval *= sizeof(union descriptor);
    619 	return (0);
    620 }
    621 
    622 struct linux_ldt_info {
    623 	u_int entry_number;
    624 	u_long base_addr;
    625 	u_int limit;
    626 	u_int seg_32bit:1;
    627 	u_int contents:2;
    628 	u_int read_exec_only:1;
    629 	u_int limit_in_pages:1;
    630 	u_int seg_not_present:1;
    631 	u_int useable:1;
    632 };
    633 
    634 int
    635 linux_write_ldt(l, uap, retval)
    636 	struct lwp *l;
    637 	struct linux_sys_modify_ldt_args /* {
    638 		syscallarg(int) func;
    639 		syscallarg(void *) ptr;
    640 		syscallarg(size_t) bytecount;
    641 	} */ *uap;
    642 	register_t *retval;
    643 {
    644 	struct proc *p = l->l_proc;
    645 	struct linux_ldt_info ldt_info;
    646 	struct segment_descriptor sd;
    647 	struct i386_set_ldt_args sl;
    648 	int error;
    649 	caddr_t sg;
    650 	char *parms;
    651 	int oldmode = (int)retval[0];
    652 
    653 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    654 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    655 		return (EINVAL);
    656 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    657 		return error;
    658 	if (ldt_info.entry_number >= 8192)
    659 		return (EINVAL);
    660 	if (ldt_info.contents == 3) {
    661 		if (oldmode)
    662 			return (EINVAL);
    663 		if (ldt_info.seg_not_present)
    664 			return (EINVAL);
    665 	}
    666 
    667 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    668 	    (oldmode || (ldt_info.contents == 0 &&
    669 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    670 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    671 	    ldt_info.useable == 0))) {
    672 		/* this means you should zero the ldt */
    673 		(void)memset(&sd, 0, sizeof(sd));
    674 	} else {
    675 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    676 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    677 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    678 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    679 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    680 		    (!ldt_info.read_exec_only << 1);
    681 		sd.sd_dpl = SEL_UPL;
    682 		sd.sd_p = !ldt_info.seg_not_present;
    683 		sd.sd_def32 = ldt_info.seg_32bit;
    684 		sd.sd_gran = ldt_info.limit_in_pages;
    685 		if (!oldmode)
    686 			sd.sd_xx = ldt_info.useable;
    687 		else
    688 			sd.sd_xx = 0;
    689 	}
    690 	sg = stackgap_init(p, 0);
    691 	sl.start = ldt_info.entry_number;
    692 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    693 	sl.num = 1;
    694 
    695 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    696 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    697 
    698 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    699 
    700 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    701 		return (error);
    702 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    703 		return (error);
    704 
    705 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    706 		return (error);
    707 
    708 	*retval = 0;
    709 	return (0);
    710 }
    711 
    712 #endif /* USER_LDT */
    713 
    714 int
    715 linux_sys_modify_ldt(struct lwp *l, void *v,
    716     register_t *retval)
    717 {
    718 	struct linux_sys_modify_ldt_args /* {
    719 		syscallarg(int) func;
    720 		syscallarg(void *) ptr;
    721 		syscallarg(size_t) bytecount;
    722 	} */ *uap = v;
    723 
    724 	switch (SCARG(uap, func)) {
    725 #ifdef USER_LDT
    726 	case 0:
    727 		return linux_read_ldt(l, uap, retval);
    728 	case 1:
    729 		retval[0] = 1;
    730 		return linux_write_ldt(l, uap, retval);
    731 	case 2:
    732 #ifdef notyet
    733 		return (linux_read_default_ldt(l, uap, retval);
    734 #else
    735 		return (ENOSYS);
    736 #endif
    737 	case 0x11:
    738 		retval[0] = 0;
    739 		return linux_write_ldt(l, uap, retval);
    740 #endif /* USER_LDT */
    741 
    742 	default:
    743 		return (ENOSYS);
    744 	}
    745 }
    746 
    747 /*
    748  * XXX Pathetic hack to make svgalib work. This will fake the major
    749  * device number of an opened VT so that svgalib likes it. grmbl.
    750  * Should probably do it 'wrong the right way' and use a mapping
    751  * array for all major device numbers, and map linux_mknod too.
    752  */
    753 dev_t
    754 linux_fakedev(dev, raw)
    755 	dev_t dev;
    756 	int raw;
    757 {
    758 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    759 	const struct cdevsw *cd = cdevsw_lookup(dev);
    760 
    761 	if (raw) {
    762 #if (NWSDISPLAY > 0)
    763 		extern const struct cdevsw wsdisplay_cdevsw;
    764 		if (cd == &wsdisplay_cdevsw)
    765 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    766 #endif
    767 	}
    768 
    769 	if (cd == &ptc_cdevsw)
    770 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    771 	if (cd == &pts_cdevsw)
    772 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    773 
    774 	return dev;
    775 }
    776 
    777 #if (NWSDISPLAY > 0)
    778 /*
    779  * That's not complete, but enough to get an X server running.
    780  */
    781 #define NR_KEYS 128
    782 static const u_short plain_map[NR_KEYS] = {
    783 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    784 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    785 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    786 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    787 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    788 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    789 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    790 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    791 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    792 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    793 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    794 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    795 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    796 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    797 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    798 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    799 }, shift_map[NR_KEYS] = {
    800 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    801 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    802 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    803 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    804 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    805 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    806 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    807 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    808 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    809 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    810 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    811 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    812 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    813 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    814 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    815 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    816 }, altgr_map[NR_KEYS] = {
    817 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    818 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    819 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    820 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    821 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    822 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    823 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    824 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    825 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    826 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    827 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    828 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    829 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    830 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    831 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    832 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    833 }, ctrl_map[NR_KEYS] = {
    834 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    835 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    836 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    837 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    838 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    839 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    840 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    841 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    842 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    843 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    844 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    845 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    846 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    847 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    848 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    849 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    850 };
    851 
    852 const u_short * const linux_keytabs[] = {
    853 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    854 };
    855 #endif
    856 
    857 static struct biosdisk_info *
    858 fd2biosinfo(struct proc *p, struct file *fp)
    859 {
    860 	struct vnode *vp;
    861 	const char *blkname;
    862 	char diskname[16];
    863 	int i;
    864 	struct nativedisk_info *nip;
    865 	struct disklist *dl = x86_alldisks;
    866 
    867 	if (fp->f_type != DTYPE_VNODE)
    868 		return NULL;
    869 	vp = (struct vnode *)fp->f_data;
    870 
    871 	if (vp->v_type != VBLK)
    872 		return NULL;
    873 
    874 	blkname = devsw_blk2name(major(vp->v_rdev));
    875 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    876 	    DISKUNIT(vp->v_rdev));
    877 
    878 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    879 		nip = &dl->dl_nativedisks[i];
    880 		if (strcmp(diskname, nip->ni_devname))
    881 			continue;
    882 		if (nip->ni_nmatches != 0)
    883 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    884 	}
    885 
    886 	return NULL;
    887 }
    888 
    889 
    890 /*
    891  * We come here in a last attempt to satisfy a Linux ioctl() call
    892  */
    893 int
    894 linux_machdepioctl(l, v, retval)
    895 	struct lwp *l;
    896 	void *v;
    897 	register_t *retval;
    898 {
    899 	struct linux_sys_ioctl_args /* {
    900 		syscallarg(int) fd;
    901 		syscallarg(u_long) com;
    902 		syscallarg(caddr_t) data;
    903 	} */ *uap = v;
    904 	struct sys_ioctl_args bia;
    905 	u_long com;
    906 	int error, error1;
    907 #if (NWSDISPLAY > 0)
    908 	struct vt_mode lvt;
    909 	caddr_t bvtp, sg;
    910 	struct kbentry kbe;
    911 #endif
    912 	struct linux_hd_geometry hdg;
    913 	struct linux_hd_big_geometry hdg_big;
    914 	struct biosdisk_info *bip;
    915 	struct filedesc *fdp;
    916 	struct file *fp;
    917 	int fd;
    918 	struct disklabel label, *labp;
    919 	struct partinfo partp;
    920 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    921 	u_long start, biostotal, realtotal;
    922 	u_char heads, sectors;
    923 	u_int cylinders;
    924 	struct ioctl_pt pt;
    925 	struct proc *p = l->l_proc;
    926 
    927 	fd = SCARG(uap, fd);
    928 	SCARG(&bia, fd) = fd;
    929 	SCARG(&bia, data) = SCARG(uap, data);
    930 	com = SCARG(uap, com);
    931 
    932 	fdp = p->p_fd;
    933 
    934 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    935 		return (EBADF);
    936 
    937 	FILE_USE(fp);
    938 
    939 	switch (com) {
    940 #if (NWSDISPLAY > 0)
    941 	case LINUX_KDGKBMODE:
    942 		com = KDGKBMODE;
    943 		break;
    944 	case LINUX_KDSKBMODE:
    945 		com = KDSKBMODE;
    946 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    947 			SCARG(&bia, data) = (caddr_t)K_RAW;
    948 		break;
    949 	case LINUX_KIOCSOUND:
    950 		SCARG(&bia, data) =
    951 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    952 		/* fall through */
    953 	case LINUX_KDMKTONE:
    954 		com = KDMKTONE;
    955 		break;
    956 	case LINUX_KDSETMODE:
    957 		com = KDSETMODE;
    958 		break;
    959 	case LINUX_KDGETMODE:
    960 		/* KD_* values are equal to the wscons numbers */
    961 		com = WSDISPLAYIO_GMODE;
    962 		break;
    963 	case LINUX_KDENABIO:
    964 		com = KDENABIO;
    965 		break;
    966 	case LINUX_KDDISABIO:
    967 		com = KDDISABIO;
    968 		break;
    969 	case LINUX_KDGETLED:
    970 		com = KDGETLED;
    971 		break;
    972 	case LINUX_KDSETLED:
    973 		com = KDSETLED;
    974 		break;
    975 	case LINUX_VT_OPENQRY:
    976 		com = VT_OPENQRY;
    977 		break;
    978 	case LINUX_VT_GETMODE:
    979 		SCARG(&bia, com) = VT_GETMODE;
    980 		/* XXX NJWLWP */
    981 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    982 			goto out;
    983 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    984 		    sizeof (struct vt_mode))))
    985 			goto out;
    986 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    987 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    988 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    989 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    990 		    sizeof (struct vt_mode));
    991 		goto out;
    992 	case LINUX_VT_SETMODE:
    993 		com = VT_SETMODE;
    994 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    995 		    sizeof (struct vt_mode))))
    996 			goto out;
    997 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    998 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    999 		lvt.frsig = linux_to_native_signo[lvt.frsig];
   1000 		sg = stackgap_init(p, 0);
   1001 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
   1002 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
   1003 			goto out;
   1004 		SCARG(&bia, data) = bvtp;
   1005 		break;
   1006 	case LINUX_VT_DISALLOCATE:
   1007 		/* XXX should use WSDISPLAYIO_DELSCREEN */
   1008 		error = 0;
   1009 		goto out;
   1010 	case LINUX_VT_RELDISP:
   1011 		com = VT_RELDISP;
   1012 		break;
   1013 	case LINUX_VT_ACTIVATE:
   1014 		com = VT_ACTIVATE;
   1015 		break;
   1016 	case LINUX_VT_WAITACTIVE:
   1017 		com = VT_WAITACTIVE;
   1018 		break;
   1019 	case LINUX_VT_GETSTATE:
   1020 		com = VT_GETSTATE;
   1021 		break;
   1022 	case LINUX_KDGKBTYPE:
   1023 	    {
   1024 		static const u_int8_t kb101 = KB_101;
   1025 
   1026 		/* This is what Linux does. */
   1027 		error = copyout(&kb101, SCARG(uap, data), 1);
   1028 		goto out;
   1029 	    }
   1030 	case LINUX_KDGKBENT:
   1031 		/*
   1032 		 * The Linux KDGKBENT ioctl is different from the
   1033 		 * SYSV original. So we handle it in machdep code.
   1034 		 * XXX We should use keyboard mapping information
   1035 		 * from wsdisplay, but this would be expensive.
   1036 		 */
   1037 		if ((error = copyin(SCARG(uap, data), &kbe,
   1038 				    sizeof(struct kbentry))))
   1039 			goto out;
   1040 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1041 		    || kbe.kb_index >= NR_KEYS) {
   1042 			error = EINVAL;
   1043 			goto out;
   1044 		}
   1045 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1046 		error = copyout(&kbe, SCARG(uap, data),
   1047 				sizeof(struct kbentry));
   1048 		goto out;
   1049 #endif
   1050 	case LINUX_HDIO_GETGEO:
   1051 	case LINUX_HDIO_GETGEO_BIG:
   1052 		/*
   1053 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1054 		 * if possible (extending its # of cylinders if it's beyond
   1055 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1056 		 * the real geometry) if not found, by returning an
   1057 		 * error. See common/linux_hdio.c
   1058 		 */
   1059 		bip = fd2biosinfo(p, fp);
   1060 		ioctlf = fp->f_ops->fo_ioctl;
   1061 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
   1062 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
   1063 		if (error != 0 && error1 != 0) {
   1064 			error = error1;
   1065 			goto out;
   1066 		}
   1067 		labp = error != 0 ? &label : partp.disklab;
   1068 		start = error1 != 0 ? partp.part->p_offset : 0;
   1069 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1070 		    && bip->bi_cyl != 0) {
   1071 			heads = bip->bi_head;
   1072 			sectors = bip->bi_sec;
   1073 			cylinders = bip->bi_cyl;
   1074 			biostotal = heads * sectors * cylinders;
   1075 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1076 			    labp->d_ncylinders;
   1077 			if (realtotal > biostotal)
   1078 				cylinders = realtotal / (heads * sectors);
   1079 		} else {
   1080 			heads = labp->d_ntracks;
   1081 			cylinders = labp->d_ncylinders;
   1082 			sectors = labp->d_nsectors;
   1083 		}
   1084 		if (com == LINUX_HDIO_GETGEO) {
   1085 			hdg.start = start;
   1086 			hdg.heads = heads;
   1087 			hdg.cylinders = cylinders;
   1088 			hdg.sectors = sectors;
   1089 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1090 			goto out;
   1091 		} else {
   1092 			hdg_big.start = start;
   1093 			hdg_big.heads = heads;
   1094 			hdg_big.cylinders = cylinders;
   1095 			hdg_big.sectors = sectors;
   1096 			error = copyout(&hdg_big, SCARG(uap, data),
   1097 			    sizeof hdg_big);
   1098 			goto out;
   1099 		}
   1100 
   1101 	default:
   1102 		/*
   1103 		 * Unknown to us. If it's on a device, just pass it through
   1104 		 * using PTIOCLINUX, the device itself might be able to
   1105 		 * make some sense of it.
   1106 		 * XXX hack: if the function returns EJUSTRETURN,
   1107 		 * it has stuffed a sysctl return value in pt.data.
   1108 		 */
   1109 		ioctlf = fp->f_ops->fo_ioctl;
   1110 		pt.com = SCARG(uap, com);
   1111 		pt.data = SCARG(uap, data);
   1112 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
   1113 		if (error == EJUSTRETURN) {
   1114 			retval[0] = (register_t)pt.data;
   1115 			error = 0;
   1116 		}
   1117 
   1118 		if (error == ENOTTY) {
   1119 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1120 			    com));
   1121 		}
   1122 		goto out;
   1123 	}
   1124 	SCARG(&bia, com) = com;
   1125 	/* XXX NJWLWP */
   1126 	error = sys_ioctl(curlwp, &bia, retval);
   1127 out:
   1128 	FILE_UNUSE(fp ,l);
   1129 	return error;
   1130 }
   1131 
   1132 /*
   1133  * Set I/O permissions for a process. Just set the maximum level
   1134  * right away (ignoring the argument), otherwise we would have
   1135  * to rely on I/O permission maps, which are not implemented.
   1136  */
   1137 int
   1138 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1139 {
   1140 #if 0
   1141 	struct linux_sys_iopl_args /* {
   1142 		syscallarg(int) level;
   1143 	} */ *uap = v;
   1144 #endif
   1145 	struct trapframe *fp = l->l_md.md_regs;
   1146 
   1147 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1148 	    NULL, NULL, NULL, NULL) != 0)
   1149 		return EPERM;
   1150 	fp->tf_eflags |= PSL_IOPL;
   1151 	*retval = 0;
   1152 	return 0;
   1153 }
   1154 
   1155 /*
   1156  * See above. If a root process tries to set access to an I/O port,
   1157  * just let it have the whole range.
   1158  */
   1159 int
   1160 linux_sys_ioperm(l, v, retval)
   1161 	struct lwp *l;
   1162 	void *v;
   1163 	register_t *retval;
   1164 {
   1165 	struct linux_sys_ioperm_args /* {
   1166 		syscallarg(unsigned int) lo;
   1167 		syscallarg(unsigned int) hi;
   1168 		syscallarg(int) val;
   1169 	} */ *uap = v;
   1170 	struct trapframe *fp = l->l_md.md_regs;
   1171 
   1172 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1173 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1174 	    NULL, NULL) != 0)
   1175 		return EPERM;
   1176 	if (SCARG(uap, val))
   1177 		fp->tf_eflags |= PSL_IOPL;
   1178 	*retval = 0;
   1179 	return 0;
   1180 }
   1181 
   1182 int
   1183 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1184     void *arg)
   1185 {
   1186 	return 0;
   1187 }
   1188