Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.109
      1 /*	$NetBSD: linux_machdep.c,v 1.109 2005/06/24 22:57:05 manu Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.109 2005/06/24 22:57:05 manu Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 #include <compat/linux/common/linux_errno.h>
     81 
     82 #include <compat/linux/linux_syscallargs.h>
     83 
     84 #include <machine/cpu.h>
     85 #include <machine/cpufunc.h>
     86 #include <machine/psl.h>
     87 #include <machine/reg.h>
     88 #include <machine/segments.h>
     89 #include <machine/specialreg.h>
     90 #include <machine/sysarch.h>
     91 #include <machine/vm86.h>
     92 #include <machine/vmparam.h>
     93 
     94 /*
     95  * To see whether wscons is configured (for virtual console ioctl calls).
     96  */
     97 #if defined(_KERNEL_OPT)
     98 #include "wsdisplay.h"
     99 #endif
    100 #if (NWSDISPLAY > 0)
    101 #include <dev/wscons/wsconsio.h>
    102 #include <dev/wscons/wsdisplay_usl_io.h>
    103 #if defined(_KERNEL_OPT)
    104 #include "opt_xserver.h"
    105 #endif
    106 #endif
    107 
    108 #ifdef USER_LDT
    109 #include <machine/cpu.h>
    110 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    111     register_t *));
    112 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    113     register_t *));
    114 #endif
    115 
    116 #ifdef DEBUG_LINUX
    117 #define DPRINTF(a) uprintf a
    118 #else
    119 #define DPRINTF(a)
    120 #endif
    121 
    122 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    123 extern struct disklist *x86_alldisks;
    124 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    125     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    126 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    127     const sigset_t *, struct linux_sigcontext *));
    128 static int linux_restore_sigcontext __P((struct lwp *,
    129     struct linux_sigcontext *, register_t *));
    130 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    131 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    132 
    133 extern char linux_sigcode[], linux_rt_sigcode[];
    134 /*
    135  * Deal with some i386-specific things in the Linux emulation code.
    136  */
    137 
    138 void
    139 linux_setregs(l, epp, stack)
    140 	struct lwp *l;
    141 	struct exec_package *epp;
    142 	u_long stack;
    143 {
    144 	struct pcb *pcb = &l->l_addr->u_pcb;
    145 	struct trapframe *tf;
    146 
    147 #if NNPX > 0
    148 	/* If we were using the FPU, forget about it. */
    149 	if (npxproc == l)
    150 		npxdrop();
    151 #endif
    152 
    153 #ifdef USER_LDT
    154 	pmap_ldt_cleanup(l);
    155 #endif
    156 
    157 	l->l_md.md_flags &= ~MDL_USEDFPU;
    158 
    159 	if (i386_use_fxsave) {
    160 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    161 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    162 	} else
    163 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    164 
    165 	tf = l->l_md.md_regs;
    166 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    167 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    168 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_edi = 0;
    171 	tf->tf_esi = 0;
    172 	tf->tf_ebp = 0;
    173 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    174 	tf->tf_edx = 0;
    175 	tf->tf_ecx = 0;
    176 	tf->tf_eax = 0;
    177 	tf->tf_eip = epp->ep_entry;
    178 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    179 	tf->tf_eflags = PSL_USERSET;
    180 	tf->tf_esp = stack;
    181 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    182 }
    183 
    184 /*
    185  * Send an interrupt to process.
    186  *
    187  * Stack is set up to allow sigcode stored
    188  * in u. to call routine, followed by kcall
    189  * to sigreturn routine below.  After sigreturn
    190  * resets the signal mask, the stack, and the
    191  * frame pointer, it returns to the user
    192  * specified pc, psl.
    193  */
    194 
    195 void
    196 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    197 {
    198 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    199 		linux_rt_sendsig(ksi, mask);
    200 	else
    201 		linux_old_sendsig(ksi, mask);
    202 }
    203 
    204 
    205 static void
    206 linux_save_ucontext(l, tf, mask, sas, uc)
    207 	struct lwp *l;
    208 	struct trapframe *tf;
    209 	const sigset_t *mask;
    210 	struct sigaltstack *sas;
    211 	struct linux_ucontext *uc;
    212 {
    213 	uc->uc_flags = 0;
    214 	uc->uc_link = NULL;
    215 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    216 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    217 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    218 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    219 }
    220 
    221 static void
    222 linux_save_sigcontext(l, tf, mask, sc)
    223 	struct lwp *l;
    224 	struct trapframe *tf;
    225 	const sigset_t *mask;
    226 	struct linux_sigcontext *sc;
    227 {
    228 	/* Save register context. */
    229 #ifdef VM86
    230 	if (tf->tf_eflags & PSL_VM) {
    231 		sc->sc_gs = tf->tf_vm86_gs;
    232 		sc->sc_fs = tf->tf_vm86_fs;
    233 		sc->sc_es = tf->tf_vm86_es;
    234 		sc->sc_ds = tf->tf_vm86_ds;
    235 		sc->sc_eflags = get_vflags(l);
    236 	} else
    237 #endif
    238 	{
    239 		sc->sc_gs = tf->tf_gs;
    240 		sc->sc_fs = tf->tf_fs;
    241 		sc->sc_es = tf->tf_es;
    242 		sc->sc_ds = tf->tf_ds;
    243 		sc->sc_eflags = tf->tf_eflags;
    244 	}
    245 	sc->sc_edi = tf->tf_edi;
    246 	sc->sc_esi = tf->tf_esi;
    247 	sc->sc_esp = tf->tf_esp;
    248 	sc->sc_ebp = tf->tf_ebp;
    249 	sc->sc_ebx = tf->tf_ebx;
    250 	sc->sc_edx = tf->tf_edx;
    251 	sc->sc_ecx = tf->tf_ecx;
    252 	sc->sc_eax = tf->tf_eax;
    253 	sc->sc_eip = tf->tf_eip;
    254 	sc->sc_cs = tf->tf_cs;
    255 	sc->sc_esp_at_signal = tf->tf_esp;
    256 	sc->sc_ss = tf->tf_ss;
    257 	sc->sc_err = tf->tf_err;
    258 	sc->sc_trapno = tf->tf_trapno;
    259 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    260 	sc->sc_387 = NULL;
    261 
    262 	/* Save signal stack. */
    263 	/* Linux doesn't save the onstack flag in sigframe */
    264 
    265 	/* Save signal mask. */
    266 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    267 }
    268 
    269 static void
    270 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    271 {
    272 	struct lwp *l = curlwp;
    273 	struct proc *p = l->l_proc;
    274 	struct trapframe *tf;
    275 	struct linux_rt_sigframe *fp, frame;
    276 	int onstack;
    277 	linux_siginfo_t *lsi;
    278 	int sig = ksi->ksi_signo;
    279 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    280 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    281 
    282 	tf = l->l_md.md_regs;
    283 	/* Do we need to jump onto the signal stack? */
    284 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    285 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    286 
    287 
    288 	/* Allocate space for the signal handler context. */
    289 	if (onstack)
    290 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    291 		    sas->ss_size);
    292 	else
    293 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    294 	fp--;
    295 
    296 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    297 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    298 
    299 	/* Build stack frame for signal trampoline. */
    300 	frame.sf_handler = catcher;
    301 	frame.sf_sig = native_to_linux_signo[sig];
    302 	frame.sf_sip = &fp->sf_si;
    303 	frame.sf_ucp = &fp->sf_uc;
    304 
    305 	/*
    306 	 * XXX: the following code assumes that the constants for
    307 	 * siginfo are the same between linux and NetBSD.
    308 	 */
    309 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    310 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    311 	lsi->lsi_code = ksi->ksi_code;
    312 	switch (lsi->lsi_signo = frame.sf_sig) {
    313 	case LINUX_SIGILL:
    314 	case LINUX_SIGFPE:
    315 	case LINUX_SIGSEGV:
    316 	case LINUX_SIGBUS:
    317 	case LINUX_SIGTRAP:
    318 		lsi->lsi_addr = ksi->ksi_addr;
    319 		break;
    320 	case LINUX_SIGCHLD:
    321 		lsi->lsi_uid = ksi->ksi_uid;
    322 		lsi->lsi_pid = ksi->ksi_pid;
    323 		lsi->lsi_utime = ksi->ksi_utime;
    324 		lsi->lsi_stime = ksi->ksi_stime;
    325 
    326 		if (WCOREDUMP(ksi->ksi_status)) {
    327 			lsi->lsi_code = LINUX_CLD_DUMPED;
    328 			lsi->lsi_status = _WSTATUS(ksi->ksi_status);
    329 		} else if (_WSTATUS(ksi->ksi_status)) {
    330 			lsi->lsi_code = LINUX_CLD_KILLED;
    331 			lsi->lsi_status = _WSTATUS(ksi->ksi_status);
    332 		} else {
    333 			lsi->lsi_code = LINUX_CLD_EXITED;
    334 			lsi->lsi_status = ((ksi->ksi_status & 0xff00U) >> 8);
    335 		}
    336 		break;
    337 	case LINUX_SIGIO:
    338 		lsi->lsi_band = ksi->ksi_band;
    339 		lsi->lsi_fd = ksi->ksi_fd;
    340 		break;
    341 	default:
    342 		lsi->lsi_uid = ksi->ksi_uid;
    343 		lsi->lsi_pid = ksi->ksi_pid;
    344 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    345 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    346 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    347 		break;
    348 	}
    349 
    350 	/* Save register context. */
    351 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    352 
    353 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    354 		/*
    355 		 * Process has trashed its stack; give it an illegal
    356 		 * instruction to halt it in its tracks.
    357 		 */
    358 		sigexit(l, SIGILL);
    359 		/* NOTREACHED */
    360 	}
    361 
    362 	/*
    363 	 * Build context to run handler in.
    364 	 */
    365 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    366 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    367 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    368 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    369 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    370 	    (linux_rt_sigcode - linux_sigcode);
    371 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    372 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    373 	tf->tf_esp = (int)fp;
    374 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    375 
    376 	/* Remember that we're now on the signal stack. */
    377 	if (onstack)
    378 		sas->ss_flags |= SS_ONSTACK;
    379 }
    380 
    381 static void
    382 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    383 {
    384 	struct lwp *l = curlwp;
    385 	struct proc *p = l->l_proc;
    386 	struct trapframe *tf;
    387 	struct linux_sigframe *fp, frame;
    388 	int onstack;
    389 	int sig = ksi->ksi_signo;
    390 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    391 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    392 
    393 	tf = l->l_md.md_regs;
    394 
    395 	/* Do we need to jump onto the signal stack? */
    396 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    397 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    398 
    399 	/* Allocate space for the signal handler context. */
    400 	if (onstack)
    401 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    402 		    sas->ss_size);
    403 	else
    404 		fp = (struct linux_sigframe *)tf->tf_esp;
    405 	fp--;
    406 
    407 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    408 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    409 
    410 	/* Build stack frame for signal trampoline. */
    411 	frame.sf_handler = catcher;
    412 	frame.sf_sig = native_to_linux_signo[sig];
    413 
    414 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    415 
    416 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    417 		/*
    418 		 * Process has trashed its stack; give it an illegal
    419 		 * instruction to halt it in its tracks.
    420 		 */
    421 		sigexit(l, SIGILL);
    422 		/* NOTREACHED */
    423 	}
    424 
    425 	/*
    426 	 * Build context to run handler in.
    427 	 */
    428 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    429 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    430 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    431 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    432 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    433 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    434 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    435 	tf->tf_esp = (int)fp;
    436 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    437 
    438 	/* Remember that we're now on the signal stack. */
    439 	if (onstack)
    440 		sas->ss_flags |= SS_ONSTACK;
    441 }
    442 
    443 /*
    444  * System call to cleanup state after a signal
    445  * has been taken.  Reset signal mask and
    446  * stack state from context left by sendsig (above).
    447  * Return to previous pc and psl as specified by
    448  * context left by sendsig. Check carefully to
    449  * make sure that the user has not modified the
    450  * psl to gain improper privileges or to cause
    451  * a machine fault.
    452  */
    453 int
    454 linux_sys_rt_sigreturn(l, v, retval)
    455 	struct lwp *l;
    456 	void *v;
    457 	register_t *retval;
    458 {
    459 	struct linux_sys_rt_sigreturn_args /* {
    460 		syscallarg(struct linux_ucontext *) ucp;
    461 	} */ *uap = v;
    462 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    463 	int error;
    464 
    465 	/*
    466 	 * The trampoline code hands us the context.
    467 	 * It is unsafe to keep track of it ourselves, in the event that a
    468 	 * program jumps out of a signal handler.
    469 	 */
    470 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    471 		return error;
    472 
    473 	/* XXX XAX we can do better here by using more of the ucontext */
    474 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    475 }
    476 
    477 int
    478 linux_sys_sigreturn(l, v, retval)
    479 	struct lwp *l;
    480 	void *v;
    481 	register_t *retval;
    482 {
    483 	struct linux_sys_sigreturn_args /* {
    484 		syscallarg(struct linux_sigcontext *) scp;
    485 	} */ *uap = v;
    486 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    487 	int error;
    488 
    489 	/*
    490 	 * The trampoline code hands us the context.
    491 	 * It is unsafe to keep track of it ourselves, in the event that a
    492 	 * program jumps out of a signal handler.
    493 	 */
    494 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    495 		return error;
    496 	return linux_restore_sigcontext(l, &context, retval);
    497 }
    498 
    499 static int
    500 linux_restore_sigcontext(l, scp, retval)
    501 	struct lwp *l;
    502 	struct linux_sigcontext *scp;
    503 	register_t *retval;
    504 {
    505 	struct proc *p = l->l_proc;
    506 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    507 	struct trapframe *tf;
    508 	sigset_t mask;
    509 	ssize_t ss_gap;
    510 	/* Restore register context. */
    511 	tf = l->l_md.md_regs;
    512 
    513 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    514 #ifdef VM86
    515 	if (scp->sc_eflags & PSL_VM) {
    516 		void syscall_vm86 __P((struct trapframe *));
    517 
    518 		tf->tf_vm86_gs = scp->sc_gs;
    519 		tf->tf_vm86_fs = scp->sc_fs;
    520 		tf->tf_vm86_es = scp->sc_es;
    521 		tf->tf_vm86_ds = scp->sc_ds;
    522 		set_vflags(l, scp->sc_eflags);
    523 		p->p_md.md_syscall = syscall_vm86;
    524 	} else
    525 #endif
    526 	{
    527 		/*
    528 		 * Check for security violations.  If we're returning to
    529 		 * protected mode, the CPU will validate the segment registers
    530 		 * automatically and generate a trap on violations.  We handle
    531 		 * the trap, rather than doing all of the checking here.
    532 		 */
    533 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    534 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    535 			return EINVAL;
    536 
    537 		tf->tf_gs = scp->sc_gs;
    538 		tf->tf_fs = scp->sc_fs;
    539 		tf->tf_es = scp->sc_es;
    540 		tf->tf_ds = scp->sc_ds;
    541 #ifdef VM86
    542 		if (tf->tf_eflags & PSL_VM)
    543 			(*p->p_emul->e_syscall_intern)(p);
    544 #endif
    545 		tf->tf_eflags = scp->sc_eflags;
    546 	}
    547 	tf->tf_edi = scp->sc_edi;
    548 	tf->tf_esi = scp->sc_esi;
    549 	tf->tf_ebp = scp->sc_ebp;
    550 	tf->tf_ebx = scp->sc_ebx;
    551 	tf->tf_edx = scp->sc_edx;
    552 	tf->tf_ecx = scp->sc_ecx;
    553 	tf->tf_eax = scp->sc_eax;
    554 	tf->tf_eip = scp->sc_eip;
    555 	tf->tf_cs = scp->sc_cs;
    556 	tf->tf_esp = scp->sc_esp_at_signal;
    557 	tf->tf_ss = scp->sc_ss;
    558 
    559 	/* Restore signal stack. */
    560 	/*
    561 	 * Linux really does it this way; it doesn't have space in sigframe
    562 	 * to save the onstack flag.
    563 	 */
    564 	ss_gap = (ssize_t)
    565 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    566 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    567 		sas->ss_flags |= SS_ONSTACK;
    568 	else
    569 		sas->ss_flags &= ~SS_ONSTACK;
    570 
    571 	/* Restore signal mask. */
    572 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    573 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    574 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    575 	return EJUSTRETURN;
    576 }
    577 
    578 #ifdef USER_LDT
    579 
    580 int
    581 linux_read_ldt(l, uap, retval)
    582 	struct lwp *l;
    583 	struct linux_sys_modify_ldt_args /* {
    584 		syscallarg(int) func;
    585 		syscallarg(void *) ptr;
    586 		syscallarg(size_t) bytecount;
    587 	} */ *uap;
    588 	register_t *retval;
    589 {
    590 	struct proc *p = l->l_proc;
    591 	struct i386_get_ldt_args gl;
    592 	int error;
    593 	caddr_t sg;
    594 	char *parms;
    595 
    596 	DPRINTF(("linux_read_ldt!"));
    597 	sg = stackgap_init(p, 0);
    598 
    599 	gl.start = 0;
    600 	gl.desc = SCARG(uap, ptr);
    601 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    602 
    603 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    604 
    605 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    606 		return (error);
    607 
    608 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    609 		return (error);
    610 
    611 	*retval *= sizeof(union descriptor);
    612 	return (0);
    613 }
    614 
    615 struct linux_ldt_info {
    616 	u_int entry_number;
    617 	u_long base_addr;
    618 	u_int limit;
    619 	u_int seg_32bit:1;
    620 	u_int contents:2;
    621 	u_int read_exec_only:1;
    622 	u_int limit_in_pages:1;
    623 	u_int seg_not_present:1;
    624 	u_int useable:1;
    625 };
    626 
    627 int
    628 linux_write_ldt(l, uap, retval)
    629 	struct lwp *l;
    630 	struct linux_sys_modify_ldt_args /* {
    631 		syscallarg(int) func;
    632 		syscallarg(void *) ptr;
    633 		syscallarg(size_t) bytecount;
    634 	} */ *uap;
    635 	register_t *retval;
    636 {
    637 	struct proc *p = l->l_proc;
    638 	struct linux_ldt_info ldt_info;
    639 	struct segment_descriptor sd;
    640 	struct i386_set_ldt_args sl;
    641 	int error;
    642 	caddr_t sg;
    643 	char *parms;
    644 	int oldmode = (int)retval[0];
    645 
    646 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    647 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    648 		return (EINVAL);
    649 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    650 		return error;
    651 	if (ldt_info.entry_number >= 8192)
    652 		return (EINVAL);
    653 	if (ldt_info.contents == 3) {
    654 		if (oldmode)
    655 			return (EINVAL);
    656 		if (ldt_info.seg_not_present)
    657 			return (EINVAL);
    658 	}
    659 
    660 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    661 	    (oldmode || (ldt_info.contents == 0 &&
    662 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    663 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    664 	    ldt_info.useable == 0))) {
    665 		/* this means you should zero the ldt */
    666 		(void)memset(&sd, 0, sizeof(sd));
    667 	} else {
    668 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    669 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    670 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    671 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    672 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    673 		    (!ldt_info.read_exec_only << 1);
    674 		sd.sd_dpl = SEL_UPL;
    675 		sd.sd_p = !ldt_info.seg_not_present;
    676 		sd.sd_def32 = ldt_info.seg_32bit;
    677 		sd.sd_gran = ldt_info.limit_in_pages;
    678 		if (!oldmode)
    679 			sd.sd_xx = ldt_info.useable;
    680 		else
    681 			sd.sd_xx = 0;
    682 	}
    683 	sg = stackgap_init(p, 0);
    684 	sl.start = ldt_info.entry_number;
    685 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    686 	sl.num = 1;
    687 
    688 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    689 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    690 
    691 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    692 
    693 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    694 		return (error);
    695 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    696 		return (error);
    697 
    698 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    699 		return (error);
    700 
    701 	*retval = 0;
    702 	return (0);
    703 }
    704 
    705 #endif /* USER_LDT */
    706 
    707 int
    708 linux_sys_modify_ldt(l, v, retval)
    709 	struct lwp *l;
    710 	void *v;
    711 	register_t *retval;
    712 {
    713 	struct linux_sys_modify_ldt_args /* {
    714 		syscallarg(int) func;
    715 		syscallarg(void *) ptr;
    716 		syscallarg(size_t) bytecount;
    717 	} */ *uap = v;
    718 
    719 	switch (SCARG(uap, func)) {
    720 #ifdef USER_LDT
    721 	case 0:
    722 		return linux_read_ldt(l, uap, retval);
    723 	case 1:
    724 		retval[0] = 1;
    725 		return linux_write_ldt(l, uap, retval);
    726 	case 2:
    727 #ifdef notyet
    728 		return (linux_read_default_ldt(l, uap, retval);
    729 #else
    730 		return (ENOSYS);
    731 #endif
    732 	case 0x11:
    733 		retval[0] = 0;
    734 		return linux_write_ldt(l, uap, retval);
    735 #endif /* USER_LDT */
    736 
    737 	default:
    738 		return (ENOSYS);
    739 	}
    740 }
    741 
    742 /*
    743  * XXX Pathetic hack to make svgalib work. This will fake the major
    744  * device number of an opened VT so that svgalib likes it. grmbl.
    745  * Should probably do it 'wrong the right way' and use a mapping
    746  * array for all major device numbers, and map linux_mknod too.
    747  */
    748 dev_t
    749 linux_fakedev(dev, raw)
    750 	dev_t dev;
    751 	int raw;
    752 {
    753 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    754 	const struct cdevsw *cd = cdevsw_lookup(dev);
    755 
    756 	if (raw) {
    757 #if (NWSDISPLAY > 0)
    758 		extern const struct cdevsw wsdisplay_cdevsw;
    759 		if (cd == &wsdisplay_cdevsw)
    760 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    761 #endif
    762 	}
    763 
    764 	if (cd == &ptc_cdevsw)
    765 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    766 	if (cd == &pts_cdevsw)
    767 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    768 
    769 	return dev;
    770 }
    771 
    772 #if (NWSDISPLAY > 0)
    773 /*
    774  * That's not complete, but enough to get an X server running.
    775  */
    776 #define NR_KEYS 128
    777 static const u_short plain_map[NR_KEYS] = {
    778 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    779 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    780 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    781 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    782 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    783 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    784 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    785 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    786 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    787 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    788 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    789 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    790 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    791 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    792 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    793 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    794 }, shift_map[NR_KEYS] = {
    795 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    796 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    797 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    798 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    799 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    800 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    801 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    802 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    803 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    804 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    805 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    806 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    807 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    808 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    809 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    810 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    811 }, altgr_map[NR_KEYS] = {
    812 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    813 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    814 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    815 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    816 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    817 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    818 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    819 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    820 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    821 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    822 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    823 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    824 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    825 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    826 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    827 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    828 }, ctrl_map[NR_KEYS] = {
    829 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    830 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    831 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    832 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    833 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    834 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    835 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    836 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    837 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    838 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    839 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    840 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    841 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    842 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    843 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    844 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    845 };
    846 
    847 const u_short * const linux_keytabs[] = {
    848 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    849 };
    850 #endif
    851 
    852 static struct biosdisk_info *
    853 fd2biosinfo(p, fp)
    854 	struct proc *p;
    855 	struct file *fp;
    856 {
    857 	struct vnode *vp;
    858 	const char *blkname;
    859 	char diskname[16];
    860 	int i;
    861 	struct nativedisk_info *nip;
    862 	struct disklist *dl = x86_alldisks;
    863 
    864 	if (fp->f_type != DTYPE_VNODE)
    865 		return NULL;
    866 	vp = (struct vnode *)fp->f_data;
    867 
    868 	if (vp->v_type != VBLK)
    869 		return NULL;
    870 
    871 	blkname = devsw_blk2name(major(vp->v_rdev));
    872 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    873 	    DISKUNIT(vp->v_rdev));
    874 
    875 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    876 		nip = &dl->dl_nativedisks[i];
    877 		if (strcmp(diskname, nip->ni_devname))
    878 			continue;
    879 		if (nip->ni_nmatches != 0)
    880 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    881 	}
    882 
    883 	return NULL;
    884 }
    885 
    886 
    887 /*
    888  * We come here in a last attempt to satisfy a Linux ioctl() call
    889  */
    890 int
    891 linux_machdepioctl(p, v, retval)
    892 	struct proc *p;
    893 	void *v;
    894 	register_t *retval;
    895 {
    896 	struct linux_sys_ioctl_args /* {
    897 		syscallarg(int) fd;
    898 		syscallarg(u_long) com;
    899 		syscallarg(caddr_t) data;
    900 	} */ *uap = v;
    901 	struct sys_ioctl_args bia;
    902 	u_long com;
    903 	int error, error1;
    904 #if (NWSDISPLAY > 0)
    905 	struct vt_mode lvt;
    906 	caddr_t bvtp, sg;
    907 	struct kbentry kbe;
    908 #endif
    909 	struct linux_hd_geometry hdg;
    910 	struct linux_hd_big_geometry hdg_big;
    911 	struct biosdisk_info *bip;
    912 	struct filedesc *fdp;
    913 	struct file *fp;
    914 	int fd;
    915 	struct disklabel label, *labp;
    916 	struct partinfo partp;
    917 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
    918 	u_long start, biostotal, realtotal;
    919 	u_char heads, sectors;
    920 	u_int cylinders;
    921 	struct ioctl_pt pt;
    922 
    923 	fd = SCARG(uap, fd);
    924 	SCARG(&bia, fd) = fd;
    925 	SCARG(&bia, data) = SCARG(uap, data);
    926 	com = SCARG(uap, com);
    927 
    928 	fdp = p->p_fd;
    929 
    930 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    931 		return (EBADF);
    932 
    933 	FILE_USE(fp);
    934 
    935 	switch (com) {
    936 #if (NWSDISPLAY > 0)
    937 	case LINUX_KDGKBMODE:
    938 		com = KDGKBMODE;
    939 		break;
    940 	case LINUX_KDSKBMODE:
    941 		com = KDSKBMODE;
    942 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    943 			SCARG(&bia, data) = (caddr_t)K_RAW;
    944 		break;
    945 	case LINUX_KIOCSOUND:
    946 		SCARG(&bia, data) =
    947 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    948 		/* fall through */
    949 	case LINUX_KDMKTONE:
    950 		com = KDMKTONE;
    951 		break;
    952 	case LINUX_KDSETMODE:
    953 		com = KDSETMODE;
    954 		break;
    955 	case LINUX_KDGETMODE:
    956 		/* KD_* values are equal to the wscons numbers */
    957 		com = WSDISPLAYIO_GMODE;
    958 		break;
    959 	case LINUX_KDENABIO:
    960 		com = KDENABIO;
    961 		break;
    962 	case LINUX_KDDISABIO:
    963 		com = KDDISABIO;
    964 		break;
    965 	case LINUX_KDGETLED:
    966 		com = KDGETLED;
    967 		break;
    968 	case LINUX_KDSETLED:
    969 		com = KDSETLED;
    970 		break;
    971 	case LINUX_VT_OPENQRY:
    972 		com = VT_OPENQRY;
    973 		break;
    974 	case LINUX_VT_GETMODE:
    975 		SCARG(&bia, com) = VT_GETMODE;
    976 		/* XXX NJWLWP */
    977 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    978 			goto out;
    979 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    980 		    sizeof (struct vt_mode))))
    981 			goto out;
    982 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    983 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    984 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    985 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    986 		    sizeof (struct vt_mode));
    987 		goto out;
    988 	case LINUX_VT_SETMODE:
    989 		com = VT_SETMODE;
    990 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    991 		    sizeof (struct vt_mode))))
    992 			goto out;
    993 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    994 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    995 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    996 		sg = stackgap_init(p, 0);
    997 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    998 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    999 			goto out;
   1000 		SCARG(&bia, data) = bvtp;
   1001 		break;
   1002 	case LINUX_VT_DISALLOCATE:
   1003 		/* XXX should use WSDISPLAYIO_DELSCREEN */
   1004 		error = 0;
   1005 		goto out;
   1006 	case LINUX_VT_RELDISP:
   1007 		com = VT_RELDISP;
   1008 		break;
   1009 	case LINUX_VT_ACTIVATE:
   1010 		com = VT_ACTIVATE;
   1011 		break;
   1012 	case LINUX_VT_WAITACTIVE:
   1013 		com = VT_WAITACTIVE;
   1014 		break;
   1015 	case LINUX_VT_GETSTATE:
   1016 		com = VT_GETSTATE;
   1017 		break;
   1018 	case LINUX_KDGKBTYPE:
   1019 	    {
   1020 		static const u_int8_t kb101 = KB_101;
   1021 
   1022 		/* This is what Linux does. */
   1023 		error = copyout(&kb101, SCARG(uap, data), 1);
   1024 		goto out;
   1025 	    }
   1026 	case LINUX_KDGKBENT:
   1027 		/*
   1028 		 * The Linux KDGKBENT ioctl is different from the
   1029 		 * SYSV original. So we handle it in machdep code.
   1030 		 * XXX We should use keyboard mapping information
   1031 		 * from wsdisplay, but this would be expensive.
   1032 		 */
   1033 		if ((error = copyin(SCARG(uap, data), &kbe,
   1034 				    sizeof(struct kbentry))))
   1035 			goto out;
   1036 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1037 		    || kbe.kb_index >= NR_KEYS) {
   1038 			error = EINVAL;
   1039 			goto out;
   1040 		}
   1041 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1042 		error = copyout(&kbe, SCARG(uap, data),
   1043 				sizeof(struct kbentry));
   1044 		goto out;
   1045 #endif
   1046 	case LINUX_HDIO_GETGEO:
   1047 	case LINUX_HDIO_GETGEO_BIG:
   1048 		/*
   1049 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1050 		 * if possible (extending its # of cylinders if it's beyond
   1051 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1052 		 * the real geometry) if not found, by returning an
   1053 		 * error. See common/linux_hdio.c
   1054 		 */
   1055 		bip = fd2biosinfo(p, fp);
   1056 		ioctlf = fp->f_ops->fo_ioctl;
   1057 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
   1058 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
   1059 		if (error != 0 && error1 != 0) {
   1060 			error = error1;
   1061 			goto out;
   1062 		}
   1063 		labp = error != 0 ? &label : partp.disklab;
   1064 		start = error1 != 0 ? partp.part->p_offset : 0;
   1065 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1066 		    && bip->bi_cyl != 0) {
   1067 			heads = bip->bi_head;
   1068 			sectors = bip->bi_sec;
   1069 			cylinders = bip->bi_cyl;
   1070 			biostotal = heads * sectors * cylinders;
   1071 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1072 			    labp->d_ncylinders;
   1073 			if (realtotal > biostotal)
   1074 				cylinders = realtotal / (heads * sectors);
   1075 		} else {
   1076 			heads = labp->d_ntracks;
   1077 			cylinders = labp->d_ncylinders;
   1078 			sectors = labp->d_nsectors;
   1079 		}
   1080 		if (com == LINUX_HDIO_GETGEO) {
   1081 			hdg.start = start;
   1082 			hdg.heads = heads;
   1083 			hdg.cylinders = cylinders;
   1084 			hdg.sectors = sectors;
   1085 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1086 			goto out;
   1087 		} else {
   1088 			hdg_big.start = start;
   1089 			hdg_big.heads = heads;
   1090 			hdg_big.cylinders = cylinders;
   1091 			hdg_big.sectors = sectors;
   1092 			error = copyout(&hdg_big, SCARG(uap, data),
   1093 			    sizeof hdg_big);
   1094 			goto out;
   1095 		}
   1096 
   1097 	default:
   1098 		/*
   1099 		 * Unknown to us. If it's on a device, just pass it through
   1100 		 * using PTIOCLINUX, the device itself might be able to
   1101 		 * make some sense of it.
   1102 		 * XXX hack: if the function returns EJUSTRETURN,
   1103 		 * it has stuffed a sysctl return value in pt.data.
   1104 		 */
   1105 		FILE_USE(fp);
   1106 		ioctlf = fp->f_ops->fo_ioctl;
   1107 		pt.com = SCARG(uap, com);
   1108 		pt.data = SCARG(uap, data);
   1109 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
   1110 		FILE_UNUSE(fp, p);
   1111 		if (error == EJUSTRETURN) {
   1112 			retval[0] = (register_t)pt.data;
   1113 			error = 0;
   1114 		}
   1115 
   1116 		if (error == ENOTTY)
   1117 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1118 			    com));
   1119 		goto out;
   1120 	}
   1121 	SCARG(&bia, com) = com;
   1122 	/* XXX NJWLWP */
   1123 	error = sys_ioctl(curlwp, &bia, retval);
   1124 out:
   1125 	FILE_UNUSE(fp ,p);
   1126 	return error;
   1127 }
   1128 
   1129 /*
   1130  * Set I/O permissions for a process. Just set the maximum level
   1131  * right away (ignoring the argument), otherwise we would have
   1132  * to rely on I/O permission maps, which are not implemented.
   1133  */
   1134 int
   1135 linux_sys_iopl(l, v, retval)
   1136 	struct lwp *l;
   1137 	void *v;
   1138 	register_t *retval;
   1139 {
   1140 #if 0
   1141 	struct linux_sys_iopl_args /* {
   1142 		syscallarg(int) level;
   1143 	} */ *uap = v;
   1144 #endif
   1145 	struct proc *p = l->l_proc;
   1146 	struct trapframe *fp = l->l_md.md_regs;
   1147 
   1148 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1149 		return EPERM;
   1150 	fp->tf_eflags |= PSL_IOPL;
   1151 	*retval = 0;
   1152 	return 0;
   1153 }
   1154 
   1155 /*
   1156  * See above. If a root process tries to set access to an I/O port,
   1157  * just let it have the whole range.
   1158  */
   1159 int
   1160 linux_sys_ioperm(l, v, retval)
   1161 	struct lwp *l;
   1162 	void *v;
   1163 	register_t *retval;
   1164 {
   1165 	struct linux_sys_ioperm_args /* {
   1166 		syscallarg(unsigned int) lo;
   1167 		syscallarg(unsigned int) hi;
   1168 		syscallarg(int) val;
   1169 	} */ *uap = v;
   1170 	struct proc *p = l->l_proc;
   1171 	struct trapframe *fp = l->l_md.md_regs;
   1172 
   1173 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1174 		return EPERM;
   1175 	if (SCARG(uap, val))
   1176 		fp->tf_eflags |= PSL_IOPL;
   1177 	*retval = 0;
   1178 	return 0;
   1179 }
   1180 
   1181 int
   1182 linux_usertrap(struct lwp *l, vaddr_t trapaddr, void *arg)
   1183 {
   1184 	return 0;
   1185 }
   1186