Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.114.4.2
      1 /*	$NetBSD: linux_machdep.c,v 1.114.4.2 2006/11/18 21:39:05 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.114.4.2 2006/11/18 21:39:05 ad Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <sys/wait.h>
     72 #include <sys/kauth.h>
     73 
     74 #include <miscfs/specfs/specdev.h>
     75 
     76 #include <compat/linux/common/linux_types.h>
     77 #include <compat/linux/common/linux_signal.h>
     78 #include <compat/linux/common/linux_util.h>
     79 #include <compat/linux/common/linux_ioctl.h>
     80 #include <compat/linux/common/linux_hdio.h>
     81 #include <compat/linux/common/linux_exec.h>
     82 #include <compat/linux/common/linux_machdep.h>
     83 #include <compat/linux/common/linux_errno.h>
     84 
     85 #include <compat/linux/linux_syscallargs.h>
     86 
     87 #include <machine/cpu.h>
     88 #include <machine/cpufunc.h>
     89 #include <machine/psl.h>
     90 #include <machine/reg.h>
     91 #include <machine/segments.h>
     92 #include <machine/specialreg.h>
     93 #include <machine/sysarch.h>
     94 #include <machine/vm86.h>
     95 #include <machine/vmparam.h>
     96 
     97 /*
     98  * To see whether wscons is configured (for virtual console ioctl calls).
     99  */
    100 #if defined(_KERNEL_OPT)
    101 #include "wsdisplay.h"
    102 #endif
    103 #if (NWSDISPLAY > 0)
    104 #include <dev/wscons/wsconsio.h>
    105 #include <dev/wscons/wsdisplay_usl_io.h>
    106 #if defined(_KERNEL_OPT)
    107 #include "opt_xserver.h"
    108 #endif
    109 #endif
    110 
    111 #ifdef USER_LDT
    112 #include <machine/cpu.h>
    113 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    114     register_t *));
    115 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    116     register_t *));
    117 #endif
    118 
    119 #ifdef DEBUG_LINUX
    120 #define DPRINTF(a) uprintf a
    121 #else
    122 #define DPRINTF(a)
    123 #endif
    124 
    125 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    126 extern struct disklist *x86_alldisks;
    127 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    128     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    129 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    130     const sigset_t *, struct linux_sigcontext *));
    131 static int linux_restore_sigcontext __P((struct lwp *,
    132     struct linux_sigcontext *, register_t *));
    133 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    134 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    135 
    136 extern char linux_sigcode[], linux_rt_sigcode[];
    137 /*
    138  * Deal with some i386-specific things in the Linux emulation code.
    139  */
    140 
    141 void
    142 linux_setregs(l, epp, stack)
    143 	struct lwp *l;
    144 	struct exec_package *epp;
    145 	u_long stack;
    146 {
    147 	struct pcb *pcb = &l->l_addr->u_pcb;
    148 	struct trapframe *tf;
    149 
    150 #if NNPX > 0
    151 	/* If we were using the FPU, forget about it. */
    152 	if (npxproc == l)
    153 		npxdrop();
    154 #endif
    155 
    156 #ifdef USER_LDT
    157 	pmap_ldt_cleanup(l);
    158 #endif
    159 
    160 	l->l_md.md_flags &= ~MDL_USEDFPU;
    161 
    162 	if (i386_use_fxsave) {
    163 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    164 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    165 	} else
    166 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    167 
    168 	tf = l->l_md.md_regs;
    169 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    171 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    172 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    173 	tf->tf_edi = 0;
    174 	tf->tf_esi = 0;
    175 	tf->tf_ebp = 0;
    176 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    177 	tf->tf_edx = 0;
    178 	tf->tf_ecx = 0;
    179 	tf->tf_eax = 0;
    180 	tf->tf_eip = epp->ep_entry;
    181 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    182 	tf->tf_eflags = PSL_USERSET;
    183 	tf->tf_esp = stack;
    184 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    185 }
    186 
    187 /*
    188  * Send an interrupt to process.
    189  *
    190  * Stack is set up to allow sigcode stored
    191  * in u. to call routine, followed by kcall
    192  * to sigreturn routine below.  After sigreturn
    193  * resets the signal mask, the stack, and the
    194  * frame pointer, it returns to the user
    195  * specified pc, psl.
    196  */
    197 
    198 void
    199 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    200 {
    201 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    202 		linux_rt_sendsig(ksi, mask);
    203 	else
    204 		linux_old_sendsig(ksi, mask);
    205 }
    206 
    207 
    208 static void
    209 linux_save_ucontext(l, tf, mask, sas, uc)
    210 	struct lwp *l;
    211 	struct trapframe *tf;
    212 	const sigset_t *mask;
    213 	struct sigaltstack *sas;
    214 	struct linux_ucontext *uc;
    215 {
    216 	uc->uc_flags = 0;
    217 	uc->uc_link = NULL;
    218 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    219 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    220 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    221 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    222 }
    223 
    224 static void
    225 linux_save_sigcontext(l, tf, mask, sc)
    226 	struct lwp *l;
    227 	struct trapframe *tf;
    228 	const sigset_t *mask;
    229 	struct linux_sigcontext *sc;
    230 {
    231 	/* Save register context. */
    232 #ifdef VM86
    233 	if (tf->tf_eflags & PSL_VM) {
    234 		sc->sc_gs = tf->tf_vm86_gs;
    235 		sc->sc_fs = tf->tf_vm86_fs;
    236 		sc->sc_es = tf->tf_vm86_es;
    237 		sc->sc_ds = tf->tf_vm86_ds;
    238 		sc->sc_eflags = get_vflags(l);
    239 	} else
    240 #endif
    241 	{
    242 		sc->sc_gs = tf->tf_gs;
    243 		sc->sc_fs = tf->tf_fs;
    244 		sc->sc_es = tf->tf_es;
    245 		sc->sc_ds = tf->tf_ds;
    246 		sc->sc_eflags = tf->tf_eflags;
    247 	}
    248 	sc->sc_edi = tf->tf_edi;
    249 	sc->sc_esi = tf->tf_esi;
    250 	sc->sc_esp = tf->tf_esp;
    251 	sc->sc_ebp = tf->tf_ebp;
    252 	sc->sc_ebx = tf->tf_ebx;
    253 	sc->sc_edx = tf->tf_edx;
    254 	sc->sc_ecx = tf->tf_ecx;
    255 	sc->sc_eax = tf->tf_eax;
    256 	sc->sc_eip = tf->tf_eip;
    257 	sc->sc_cs = tf->tf_cs;
    258 	sc->sc_esp_at_signal = tf->tf_esp;
    259 	sc->sc_ss = tf->tf_ss;
    260 	sc->sc_err = tf->tf_err;
    261 	sc->sc_trapno = tf->tf_trapno;
    262 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    263 	sc->sc_387 = NULL;
    264 
    265 	/* Save signal stack. */
    266 	/* Linux doesn't save the onstack flag in sigframe */
    267 
    268 	/* Save signal mask. */
    269 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    270 }
    271 
    272 static void
    273 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    274 {
    275 	struct lwp *l = curlwp;
    276 	struct proc *p = l->l_proc;
    277 	struct trapframe *tf;
    278 	struct linux_rt_sigframe *fp, frame;
    279 	int onstack, error;
    280 	linux_siginfo_t *lsi;
    281 	int sig = ksi->ksi_signo;
    282 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    283 	struct sigaltstack *sas = l->l_sigstk;
    284 
    285 	tf = l->l_md.md_regs;
    286 	/* Do we need to jump onto the signal stack? */
    287 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    288 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    289 
    290 
    291 	/* Allocate space for the signal handler context. */
    292 	if (onstack)
    293 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    294 		    sas->ss_size);
    295 	else
    296 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    297 	fp--;
    298 
    299 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    300 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    301 
    302 	/* Build stack frame for signal trampoline. */
    303 	frame.sf_handler = catcher;
    304 	frame.sf_sig = native_to_linux_signo[sig];
    305 	frame.sf_sip = &fp->sf_si;
    306 	frame.sf_ucp = &fp->sf_uc;
    307 
    308 	/*
    309 	 * XXX: the following code assumes that the constants for
    310 	 * siginfo are the same between linux and NetBSD.
    311 	 */
    312 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    313 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    314 	lsi->lsi_code = ksi->ksi_code;
    315 	switch (lsi->lsi_signo = frame.sf_sig) {
    316 	case LINUX_SIGILL:
    317 	case LINUX_SIGFPE:
    318 	case LINUX_SIGSEGV:
    319 	case LINUX_SIGBUS:
    320 	case LINUX_SIGTRAP:
    321 		lsi->lsi_addr = ksi->ksi_addr;
    322 		break;
    323 	case LINUX_SIGCHLD:
    324 		lsi->lsi_uid = ksi->ksi_uid;
    325 		lsi->lsi_pid = ksi->ksi_pid;
    326 		lsi->lsi_utime = ksi->ksi_utime;
    327 		lsi->lsi_stime = ksi->ksi_stime;
    328 
    329 		/* We use the same codes */
    330 		lsi->lsi_code = ksi->ksi_code;
    331 		/* XXX is that right? */
    332 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    333 		break;
    334 	case LINUX_SIGIO:
    335 		lsi->lsi_band = ksi->ksi_band;
    336 		lsi->lsi_fd = ksi->ksi_fd;
    337 		break;
    338 	default:
    339 		lsi->lsi_uid = ksi->ksi_uid;
    340 		lsi->lsi_pid = ksi->ksi_pid;
    341 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    342 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    343 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    344 		break;
    345 	}
    346 
    347 	/* Save register context. */
    348 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    349 
    350 	mutex_exit(&p->p_smutex);
    351 	error = copyout(&frame, fp, sizeof(frame));
    352 	mutex_enter(&p->p_smutex);
    353 
    354 	if (error != 0) {
    355 		/*
    356 		 * Process has trashed its stack; give it an illegal
    357 		 * instruction to halt it in its tracks.
    358 		 */
    359 		sigexit(l, SIGILL);
    360 		/* NOTREACHED */
    361 	}
    362 
    363 	/*
    364 	 * Build context to run handler in.
    365 	 */
    366 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    367 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    368 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    369 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    370 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    371 	    (linux_rt_sigcode - linux_sigcode);
    372 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    373 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    374 	tf->tf_esp = (int)fp;
    375 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    376 
    377 	/* Remember that we're now on the signal stack. */
    378 	if (onstack)
    379 		sas->ss_flags |= SS_ONSTACK;
    380 }
    381 
    382 static void
    383 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    384 {
    385 	struct lwp *l = curlwp;
    386 	struct proc *p = l->l_proc;
    387 	struct trapframe *tf;
    388 	struct linux_sigframe *fp, frame;
    389 	int onstack;
    390 	int sig = ksi->ksi_signo;
    391 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    392 	struct sigaltstack *sas = l->l_sigstk;
    393 
    394 	tf = l->l_md.md_regs;
    395 
    396 	/* Do we need to jump onto the signal stack? */
    397 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    398 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    399 
    400 	/* Allocate space for the signal handler context. */
    401 	if (onstack)
    402 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    403 		    sas->ss_size);
    404 	else
    405 		fp = (struct linux_sigframe *)tf->tf_esp;
    406 	fp--;
    407 
    408 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    409 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    410 
    411 	/* Build stack frame for signal trampoline. */
    412 	frame.sf_handler = catcher;
    413 	frame.sf_sig = native_to_linux_signo[sig];
    414 
    415 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    416 
    417 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    418 		/*
    419 		 * Process has trashed its stack; give it an illegal
    420 		 * instruction to halt it in its tracks.
    421 		 */
    422 		sigexit(l, SIGILL);
    423 		/* NOTREACHED */
    424 	}
    425 
    426 	/*
    427 	 * Build context to run handler in.
    428 	 */
    429 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    430 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    431 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    432 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    433 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    434 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    435 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    436 	tf->tf_esp = (int)fp;
    437 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    438 
    439 	/* Remember that we're now on the signal stack. */
    440 	if (onstack)
    441 		sas->ss_flags |= SS_ONSTACK;
    442 }
    443 
    444 /*
    445  * System call to cleanup state after a signal
    446  * has been taken.  Reset signal mask and
    447  * stack state from context left by sendsig (above).
    448  * Return to previous pc and psl as specified by
    449  * context left by sendsig. Check carefully to
    450  * make sure that the user has not modified the
    451  * psl to gain improper privileges or to cause
    452  * a machine fault.
    453  */
    454 int
    455 linux_sys_rt_sigreturn(l, v, retval)
    456 	struct lwp *l;
    457 	void *v;
    458 	register_t *retval;
    459 {
    460 	struct linux_sys_rt_sigreturn_args /* {
    461 		syscallarg(struct linux_ucontext *) ucp;
    462 	} */ *uap = v;
    463 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    464 	int error;
    465 
    466 	/*
    467 	 * The trampoline code hands us the context.
    468 	 * It is unsafe to keep track of it ourselves, in the event that a
    469 	 * program jumps out of a signal handler.
    470 	 */
    471 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    472 		return error;
    473 
    474 	/* XXX XAX we can do better here by using more of the ucontext */
    475 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    476 }
    477 
    478 int
    479 linux_sys_sigreturn(l, v, retval)
    480 	struct lwp *l;
    481 	void *v;
    482 	register_t *retval;
    483 {
    484 	struct linux_sys_sigreturn_args /* {
    485 		syscallarg(struct linux_sigcontext *) scp;
    486 	} */ *uap = v;
    487 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    488 	int error;
    489 
    490 	/*
    491 	 * The trampoline code hands us the context.
    492 	 * It is unsafe to keep track of it ourselves, in the event that a
    493 	 * program jumps out of a signal handler.
    494 	 */
    495 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    496 		return error;
    497 	return linux_restore_sigcontext(l, &context, retval);
    498 }
    499 
    500 static int
    501 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    502     register_t *retval)
    503 {
    504 	struct proc *p = l->l_proc;
    505 	struct sigaltstack *sas = l->l_sigstk;
    506 	struct trapframe *tf;
    507 	sigset_t mask;
    508 	ssize_t ss_gap;
    509 	/* Restore register context. */
    510 	tf = l->l_md.md_regs;
    511 
    512 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    513 #ifdef VM86
    514 	if (scp->sc_eflags & PSL_VM) {
    515 		void syscall_vm86 __P((struct trapframe *));
    516 
    517 		tf->tf_vm86_gs = scp->sc_gs;
    518 		tf->tf_vm86_fs = scp->sc_fs;
    519 		tf->tf_vm86_es = scp->sc_es;
    520 		tf->tf_vm86_ds = scp->sc_ds;
    521 		set_vflags(l, scp->sc_eflags);
    522 		p->p_md.md_syscall = syscall_vm86;
    523 	} else
    524 #endif
    525 	{
    526 		/*
    527 		 * Check for security violations.  If we're returning to
    528 		 * protected mode, the CPU will validate the segment registers
    529 		 * automatically and generate a trap on violations.  We handle
    530 		 * the trap, rather than doing all of the checking here.
    531 		 */
    532 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    533 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    534 			return EINVAL;
    535 
    536 		tf->tf_gs = scp->sc_gs;
    537 		tf->tf_fs = scp->sc_fs;
    538 		tf->tf_es = scp->sc_es;
    539 		tf->tf_ds = scp->sc_ds;
    540 #ifdef VM86
    541 		if (tf->tf_eflags & PSL_VM)
    542 			(*p->p_emul->e_syscall_intern)(p);
    543 #endif
    544 		tf->tf_eflags = scp->sc_eflags;
    545 	}
    546 	tf->tf_edi = scp->sc_edi;
    547 	tf->tf_esi = scp->sc_esi;
    548 	tf->tf_ebp = scp->sc_ebp;
    549 	tf->tf_ebx = scp->sc_ebx;
    550 	tf->tf_edx = scp->sc_edx;
    551 	tf->tf_ecx = scp->sc_ecx;
    552 	tf->tf_eax = scp->sc_eax;
    553 	tf->tf_eip = scp->sc_eip;
    554 	tf->tf_cs = scp->sc_cs;
    555 	tf->tf_esp = scp->sc_esp_at_signal;
    556 	tf->tf_ss = scp->sc_ss;
    557 
    558 	/* Restore signal stack. */
    559 	/*
    560 	 * Linux really does it this way; it doesn't have space in sigframe
    561 	 * to save the onstack flag.
    562 	 */
    563 	ss_gap = (ssize_t)
    564 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    565 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    566 		sas->ss_flags |= SS_ONSTACK;
    567 	else
    568 		sas->ss_flags &= ~SS_ONSTACK;
    569 
    570 	/* Restore signal mask. */
    571 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    572 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    573 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    574 	return EJUSTRETURN;
    575 }
    576 
    577 #ifdef USER_LDT
    578 
    579 int
    580 linux_read_ldt(l, uap, retval)
    581 	struct lwp *l;
    582 	struct linux_sys_modify_ldt_args /* {
    583 		syscallarg(int) func;
    584 		syscallarg(void *) ptr;
    585 		syscallarg(size_t) bytecount;
    586 	} */ *uap;
    587 	register_t *retval;
    588 {
    589 	struct proc *p = l->l_proc;
    590 	struct i386_get_ldt_args gl;
    591 	int error;
    592 	caddr_t sg;
    593 	char *parms;
    594 
    595 	DPRINTF(("linux_read_ldt!"));
    596 	sg = stackgap_init(p, 0);
    597 
    598 	gl.start = 0;
    599 	gl.desc = SCARG(uap, ptr);
    600 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    601 
    602 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    603 
    604 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    605 		return (error);
    606 
    607 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    608 		return (error);
    609 
    610 	*retval *= sizeof(union descriptor);
    611 	return (0);
    612 }
    613 
    614 struct linux_ldt_info {
    615 	u_int entry_number;
    616 	u_long base_addr;
    617 	u_int limit;
    618 	u_int seg_32bit:1;
    619 	u_int contents:2;
    620 	u_int read_exec_only:1;
    621 	u_int limit_in_pages:1;
    622 	u_int seg_not_present:1;
    623 	u_int useable:1;
    624 };
    625 
    626 int
    627 linux_write_ldt(l, uap, retval)
    628 	struct lwp *l;
    629 	struct linux_sys_modify_ldt_args /* {
    630 		syscallarg(int) func;
    631 		syscallarg(void *) ptr;
    632 		syscallarg(size_t) bytecount;
    633 	} */ *uap;
    634 	register_t *retval;
    635 {
    636 	struct proc *p = l->l_proc;
    637 	struct linux_ldt_info ldt_info;
    638 	struct segment_descriptor sd;
    639 	struct i386_set_ldt_args sl;
    640 	int error;
    641 	caddr_t sg;
    642 	char *parms;
    643 	int oldmode = (int)retval[0];
    644 
    645 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    646 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    647 		return (EINVAL);
    648 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    649 		return error;
    650 	if (ldt_info.entry_number >= 8192)
    651 		return (EINVAL);
    652 	if (ldt_info.contents == 3) {
    653 		if (oldmode)
    654 			return (EINVAL);
    655 		if (ldt_info.seg_not_present)
    656 			return (EINVAL);
    657 	}
    658 
    659 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    660 	    (oldmode || (ldt_info.contents == 0 &&
    661 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    662 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    663 	    ldt_info.useable == 0))) {
    664 		/* this means you should zero the ldt */
    665 		(void)memset(&sd, 0, sizeof(sd));
    666 	} else {
    667 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    668 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    669 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    670 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    671 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    672 		    (!ldt_info.read_exec_only << 1);
    673 		sd.sd_dpl = SEL_UPL;
    674 		sd.sd_p = !ldt_info.seg_not_present;
    675 		sd.sd_def32 = ldt_info.seg_32bit;
    676 		sd.sd_gran = ldt_info.limit_in_pages;
    677 		if (!oldmode)
    678 			sd.sd_xx = ldt_info.useable;
    679 		else
    680 			sd.sd_xx = 0;
    681 	}
    682 	sg = stackgap_init(p, 0);
    683 	sl.start = ldt_info.entry_number;
    684 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    685 	sl.num = 1;
    686 
    687 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    688 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    689 
    690 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    691 
    692 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    693 		return (error);
    694 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    695 		return (error);
    696 
    697 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    698 		return (error);
    699 
    700 	*retval = 0;
    701 	return (0);
    702 }
    703 
    704 #endif /* USER_LDT */
    705 
    706 int
    707 linux_sys_modify_ldt(struct lwp *l, void *v,
    708     register_t *retval)
    709 {
    710 	struct linux_sys_modify_ldt_args /* {
    711 		syscallarg(int) func;
    712 		syscallarg(void *) ptr;
    713 		syscallarg(size_t) bytecount;
    714 	} */ *uap = v;
    715 
    716 	switch (SCARG(uap, func)) {
    717 #ifdef USER_LDT
    718 	case 0:
    719 		return linux_read_ldt(l, uap, retval);
    720 	case 1:
    721 		retval[0] = 1;
    722 		return linux_write_ldt(l, uap, retval);
    723 	case 2:
    724 #ifdef notyet
    725 		return (linux_read_default_ldt(l, uap, retval);
    726 #else
    727 		return (ENOSYS);
    728 #endif
    729 	case 0x11:
    730 		retval[0] = 0;
    731 		return linux_write_ldt(l, uap, retval);
    732 #endif /* USER_LDT */
    733 
    734 	default:
    735 		return (ENOSYS);
    736 	}
    737 }
    738 
    739 /*
    740  * XXX Pathetic hack to make svgalib work. This will fake the major
    741  * device number of an opened VT so that svgalib likes it. grmbl.
    742  * Should probably do it 'wrong the right way' and use a mapping
    743  * array for all major device numbers, and map linux_mknod too.
    744  */
    745 dev_t
    746 linux_fakedev(dev, raw)
    747 	dev_t dev;
    748 	int raw;
    749 {
    750 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    751 	const struct cdevsw *cd = cdevsw_lookup(dev);
    752 
    753 	if (raw) {
    754 #if (NWSDISPLAY > 0)
    755 		extern const struct cdevsw wsdisplay_cdevsw;
    756 		if (cd == &wsdisplay_cdevsw)
    757 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    758 #endif
    759 	}
    760 
    761 	if (cd == &ptc_cdevsw)
    762 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    763 	if (cd == &pts_cdevsw)
    764 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    765 
    766 	return dev;
    767 }
    768 
    769 #if (NWSDISPLAY > 0)
    770 /*
    771  * That's not complete, but enough to get an X server running.
    772  */
    773 #define NR_KEYS 128
    774 static const u_short plain_map[NR_KEYS] = {
    775 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    776 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    777 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    778 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    779 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    780 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    781 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    782 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    783 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    784 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    785 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    786 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    787 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    788 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    789 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    790 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    791 }, shift_map[NR_KEYS] = {
    792 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    793 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    794 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    795 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    796 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    797 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    798 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    799 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    800 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    801 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    802 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    803 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    804 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    805 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    806 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    807 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    808 }, altgr_map[NR_KEYS] = {
    809 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    810 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    811 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    812 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    813 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    814 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    815 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    816 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    817 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    818 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    819 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    820 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    821 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    822 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    823 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    824 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    825 }, ctrl_map[NR_KEYS] = {
    826 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    827 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    828 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    829 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    830 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    831 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    832 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    833 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    834 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    835 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    836 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    837 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    838 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    839 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    840 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    841 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    842 };
    843 
    844 const u_short * const linux_keytabs[] = {
    845 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    846 };
    847 #endif
    848 
    849 static struct biosdisk_info *
    850 fd2biosinfo(struct proc *p, struct file *fp)
    851 {
    852 	struct vnode *vp;
    853 	const char *blkname;
    854 	char diskname[16];
    855 	int i;
    856 	struct nativedisk_info *nip;
    857 	struct disklist *dl = x86_alldisks;
    858 
    859 	if (fp->f_type != DTYPE_VNODE)
    860 		return NULL;
    861 	vp = (struct vnode *)fp->f_data;
    862 
    863 	if (vp->v_type != VBLK)
    864 		return NULL;
    865 
    866 	blkname = devsw_blk2name(major(vp->v_rdev));
    867 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    868 	    DISKUNIT(vp->v_rdev));
    869 
    870 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    871 		nip = &dl->dl_nativedisks[i];
    872 		if (strcmp(diskname, nip->ni_devname))
    873 			continue;
    874 		if (nip->ni_nmatches != 0)
    875 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    876 	}
    877 
    878 	return NULL;
    879 }
    880 
    881 
    882 /*
    883  * We come here in a last attempt to satisfy a Linux ioctl() call
    884  */
    885 int
    886 linux_machdepioctl(l, v, retval)
    887 	struct lwp *l;
    888 	void *v;
    889 	register_t *retval;
    890 {
    891 	struct linux_sys_ioctl_args /* {
    892 		syscallarg(int) fd;
    893 		syscallarg(u_long) com;
    894 		syscallarg(caddr_t) data;
    895 	} */ *uap = v;
    896 	struct sys_ioctl_args bia;
    897 	u_long com;
    898 	int error, error1;
    899 #if (NWSDISPLAY > 0)
    900 	struct vt_mode lvt;
    901 	caddr_t bvtp, sg;
    902 	struct kbentry kbe;
    903 #endif
    904 	struct linux_hd_geometry hdg;
    905 	struct linux_hd_big_geometry hdg_big;
    906 	struct biosdisk_info *bip;
    907 	struct filedesc *fdp;
    908 	struct file *fp;
    909 	int fd;
    910 	struct disklabel label, *labp;
    911 	struct partinfo partp;
    912 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    913 	u_long start, biostotal, realtotal;
    914 	u_char heads, sectors;
    915 	u_int cylinders;
    916 	struct ioctl_pt pt;
    917 	struct proc *p = l->l_proc;
    918 
    919 	fd = SCARG(uap, fd);
    920 	SCARG(&bia, fd) = fd;
    921 	SCARG(&bia, data) = SCARG(uap, data);
    922 	com = SCARG(uap, com);
    923 
    924 	fdp = p->p_fd;
    925 
    926 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    927 		return (EBADF);
    928 
    929 	FILE_USE(fp);
    930 
    931 	switch (com) {
    932 #if (NWSDISPLAY > 0)
    933 	case LINUX_KDGKBMODE:
    934 		com = KDGKBMODE;
    935 		break;
    936 	case LINUX_KDSKBMODE:
    937 		com = KDSKBMODE;
    938 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    939 			SCARG(&bia, data) = (caddr_t)K_RAW;
    940 		break;
    941 	case LINUX_KIOCSOUND:
    942 		SCARG(&bia, data) =
    943 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    944 		/* fall through */
    945 	case LINUX_KDMKTONE:
    946 		com = KDMKTONE;
    947 		break;
    948 	case LINUX_KDSETMODE:
    949 		com = KDSETMODE;
    950 		break;
    951 	case LINUX_KDGETMODE:
    952 		/* KD_* values are equal to the wscons numbers */
    953 		com = WSDISPLAYIO_GMODE;
    954 		break;
    955 	case LINUX_KDENABIO:
    956 		com = KDENABIO;
    957 		break;
    958 	case LINUX_KDDISABIO:
    959 		com = KDDISABIO;
    960 		break;
    961 	case LINUX_KDGETLED:
    962 		com = KDGETLED;
    963 		break;
    964 	case LINUX_KDSETLED:
    965 		com = KDSETLED;
    966 		break;
    967 	case LINUX_VT_OPENQRY:
    968 		com = VT_OPENQRY;
    969 		break;
    970 	case LINUX_VT_GETMODE:
    971 		SCARG(&bia, com) = VT_GETMODE;
    972 		/* XXX NJWLWP */
    973 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    974 			goto out;
    975 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    976 		    sizeof (struct vt_mode))))
    977 			goto out;
    978 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    979 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    980 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    981 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    982 		    sizeof (struct vt_mode));
    983 		goto out;
    984 	case LINUX_VT_SETMODE:
    985 		com = VT_SETMODE;
    986 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    987 		    sizeof (struct vt_mode))))
    988 			goto out;
    989 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    990 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    991 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    992 		sg = stackgap_init(p, 0);
    993 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    994 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    995 			goto out;
    996 		SCARG(&bia, data) = bvtp;
    997 		break;
    998 	case LINUX_VT_DISALLOCATE:
    999 		/* XXX should use WSDISPLAYIO_DELSCREEN */
   1000 		error = 0;
   1001 		goto out;
   1002 	case LINUX_VT_RELDISP:
   1003 		com = VT_RELDISP;
   1004 		break;
   1005 	case LINUX_VT_ACTIVATE:
   1006 		com = VT_ACTIVATE;
   1007 		break;
   1008 	case LINUX_VT_WAITACTIVE:
   1009 		com = VT_WAITACTIVE;
   1010 		break;
   1011 	case LINUX_VT_GETSTATE:
   1012 		com = VT_GETSTATE;
   1013 		break;
   1014 	case LINUX_KDGKBTYPE:
   1015 	    {
   1016 		static const u_int8_t kb101 = KB_101;
   1017 
   1018 		/* This is what Linux does. */
   1019 		error = copyout(&kb101, SCARG(uap, data), 1);
   1020 		goto out;
   1021 	    }
   1022 	case LINUX_KDGKBENT:
   1023 		/*
   1024 		 * The Linux KDGKBENT ioctl is different from the
   1025 		 * SYSV original. So we handle it in machdep code.
   1026 		 * XXX We should use keyboard mapping information
   1027 		 * from wsdisplay, but this would be expensive.
   1028 		 */
   1029 		if ((error = copyin(SCARG(uap, data), &kbe,
   1030 				    sizeof(struct kbentry))))
   1031 			goto out;
   1032 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1033 		    || kbe.kb_index >= NR_KEYS) {
   1034 			error = EINVAL;
   1035 			goto out;
   1036 		}
   1037 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1038 		error = copyout(&kbe, SCARG(uap, data),
   1039 				sizeof(struct kbentry));
   1040 		goto out;
   1041 #endif
   1042 	case LINUX_HDIO_GETGEO:
   1043 	case LINUX_HDIO_GETGEO_BIG:
   1044 		/*
   1045 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1046 		 * if possible (extending its # of cylinders if it's beyond
   1047 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1048 		 * the real geometry) if not found, by returning an
   1049 		 * error. See common/linux_hdio.c
   1050 		 */
   1051 		bip = fd2biosinfo(p, fp);
   1052 		ioctlf = fp->f_ops->fo_ioctl;
   1053 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, l);
   1054 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, l);
   1055 		if (error != 0 && error1 != 0) {
   1056 			error = error1;
   1057 			goto out;
   1058 		}
   1059 		labp = error != 0 ? &label : partp.disklab;
   1060 		start = error1 != 0 ? partp.part->p_offset : 0;
   1061 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1062 		    && bip->bi_cyl != 0) {
   1063 			heads = bip->bi_head;
   1064 			sectors = bip->bi_sec;
   1065 			cylinders = bip->bi_cyl;
   1066 			biostotal = heads * sectors * cylinders;
   1067 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1068 			    labp->d_ncylinders;
   1069 			if (realtotal > biostotal)
   1070 				cylinders = realtotal / (heads * sectors);
   1071 		} else {
   1072 			heads = labp->d_ntracks;
   1073 			cylinders = labp->d_ncylinders;
   1074 			sectors = labp->d_nsectors;
   1075 		}
   1076 		if (com == LINUX_HDIO_GETGEO) {
   1077 			hdg.start = start;
   1078 			hdg.heads = heads;
   1079 			hdg.cylinders = cylinders;
   1080 			hdg.sectors = sectors;
   1081 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1082 			goto out;
   1083 		} else {
   1084 			hdg_big.start = start;
   1085 			hdg_big.heads = heads;
   1086 			hdg_big.cylinders = cylinders;
   1087 			hdg_big.sectors = sectors;
   1088 			error = copyout(&hdg_big, SCARG(uap, data),
   1089 			    sizeof hdg_big);
   1090 			goto out;
   1091 		}
   1092 
   1093 	default:
   1094 		/*
   1095 		 * Unknown to us. If it's on a device, just pass it through
   1096 		 * using PTIOCLINUX, the device itself might be able to
   1097 		 * make some sense of it.
   1098 		 * XXX hack: if the function returns EJUSTRETURN,
   1099 		 * it has stuffed a sysctl return value in pt.data.
   1100 		 */
   1101 		ioctlf = fp->f_ops->fo_ioctl;
   1102 		pt.com = SCARG(uap, com);
   1103 		pt.data = SCARG(uap, data);
   1104 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, l);
   1105 		if (error == EJUSTRETURN) {
   1106 			retval[0] = (register_t)pt.data;
   1107 			error = 0;
   1108 		}
   1109 
   1110 		if (error == ENOTTY) {
   1111 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1112 			    com));
   1113 		}
   1114 		goto out;
   1115 	}
   1116 	SCARG(&bia, com) = com;
   1117 	/* XXX NJWLWP */
   1118 	error = sys_ioctl(curlwp, &bia, retval);
   1119 out:
   1120 	FILE_UNUSE(fp ,l);
   1121 	return error;
   1122 }
   1123 
   1124 /*
   1125  * Set I/O permissions for a process. Just set the maximum level
   1126  * right away (ignoring the argument), otherwise we would have
   1127  * to rely on I/O permission maps, which are not implemented.
   1128  */
   1129 int
   1130 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1131 {
   1132 #if 0
   1133 	struct linux_sys_iopl_args /* {
   1134 		syscallarg(int) level;
   1135 	} */ *uap = v;
   1136 #endif
   1137 	struct trapframe *fp = l->l_md.md_regs;
   1138 
   1139 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_X86,
   1140 	    KAUTH_REQ_MACHDEP_X86_IOPL, NULL, NULL, NULL) != 0)
   1141 		return EPERM;
   1142 	fp->tf_eflags |= PSL_IOPL;
   1143 	*retval = 0;
   1144 	return 0;
   1145 }
   1146 
   1147 /*
   1148  * See above. If a root process tries to set access to an I/O port,
   1149  * just let it have the whole range.
   1150  */
   1151 int
   1152 linux_sys_ioperm(l, v, retval)
   1153 	struct lwp *l;
   1154 	void *v;
   1155 	register_t *retval;
   1156 {
   1157 	struct linux_sys_ioperm_args /* {
   1158 		syscallarg(unsigned int) lo;
   1159 		syscallarg(unsigned int) hi;
   1160 		syscallarg(int) val;
   1161 	} */ *uap = v;
   1162 	struct trapframe *fp = l->l_md.md_regs;
   1163 
   1164 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_X86,
   1165 	    KAUTH_REQ_MACHDEP_X86_IOPERM, NULL, NULL, NULL) != 0)
   1166 		return EPERM;
   1167 	if (SCARG(uap, val))
   1168 		fp->tf_eflags |= PSL_IOPL;
   1169 	*retval = 0;
   1170 	return 0;
   1171 }
   1172 
   1173 int
   1174 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1175     void *arg)
   1176 {
   1177 	return 0;
   1178 }
   1179