Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.128
      1 /*	$NetBSD: linux_machdep.c,v 1.128 2007/06/23 15:26:16 dsl Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.128 2007/06/23 15:26:16 dsl Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/syscallargs.h>
     66 #include <sys/filedesc.h>
     67 #include <sys/exec_elf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/ioctl.h>
     70 #include <sys/wait.h>
     71 #include <sys/kauth.h>
     72 
     73 #include <miscfs/specfs/specdev.h>
     74 
     75 #include <compat/linux/common/linux_types.h>
     76 #include <compat/linux/common/linux_signal.h>
     77 #include <compat/linux/common/linux_util.h>
     78 #include <compat/linux/common/linux_ioctl.h>
     79 #include <compat/linux/common/linux_hdio.h>
     80 #include <compat/linux/common/linux_exec.h>
     81 #include <compat/linux/common/linux_machdep.h>
     82 #include <compat/linux/common/linux_errno.h>
     83 
     84 #include <compat/linux/linux_syscallargs.h>
     85 
     86 #include <machine/cpu.h>
     87 #include <machine/cpufunc.h>
     88 #include <machine/psl.h>
     89 #include <machine/reg.h>
     90 #include <machine/segments.h>
     91 #include <machine/specialreg.h>
     92 #include <machine/sysarch.h>
     93 #include <machine/vm86.h>
     94 #include <machine/vmparam.h>
     95 
     96 /*
     97  * To see whether wscons is configured (for virtual console ioctl calls).
     98  */
     99 #if defined(_KERNEL_OPT)
    100 #include "wsdisplay.h"
    101 #endif
    102 #if (NWSDISPLAY > 0)
    103 #include <dev/wscons/wsconsio.h>
    104 #include <dev/wscons/wsdisplay_usl_io.h>
    105 #if defined(_KERNEL_OPT)
    106 #include "opt_xserver.h"
    107 #endif
    108 #endif
    109 
    110 #ifdef DEBUG_LINUX
    111 #define DPRINTF(a) uprintf a
    112 #else
    113 #define DPRINTF(a)
    114 #endif
    115 
    116 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    117 extern struct disklist *x86_alldisks;
    118 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    119     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    120 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    121     const sigset_t *, struct linux_sigcontext *));
    122 static int linux_restore_sigcontext __P((struct lwp *,
    123     struct linux_sigcontext *, register_t *));
    124 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    125 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    126 
    127 extern char linux_sigcode[], linux_rt_sigcode[];
    128 /*
    129  * Deal with some i386-specific things in the Linux emulation code.
    130  */
    131 
    132 void
    133 linux_setregs(l, epp, stack)
    134 	struct lwp *l;
    135 	struct exec_package *epp;
    136 	u_long stack;
    137 {
    138 	struct pcb *pcb = &l->l_addr->u_pcb;
    139 	struct trapframe *tf;
    140 
    141 #if NNPX > 0
    142 	/* If we were using the FPU, forget about it. */
    143 	if (npxproc == l)
    144 		npxdrop();
    145 #endif
    146 
    147 #ifdef USER_LDT
    148 	pmap_ldt_cleanup(l);
    149 #endif
    150 
    151 	l->l_md.md_flags &= ~MDL_USEDFPU;
    152 
    153 	if (i386_use_fxsave) {
    154 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    155 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    156 	} else
    157 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    158 
    159 	tf = l->l_md.md_regs;
    160 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    161 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    162 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    163 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    164 	tf->tf_edi = 0;
    165 	tf->tf_esi = 0;
    166 	tf->tf_ebp = 0;
    167 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    168 	tf->tf_edx = 0;
    169 	tf->tf_ecx = 0;
    170 	tf->tf_eax = 0;
    171 	tf->tf_eip = epp->ep_entry;
    172 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    173 	tf->tf_eflags = PSL_USERSET;
    174 	tf->tf_esp = stack;
    175 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    176 }
    177 
    178 /*
    179  * Send an interrupt to process.
    180  *
    181  * Stack is set up to allow sigcode stored
    182  * in u. to call routine, followed by kcall
    183  * to sigreturn routine below.  After sigreturn
    184  * resets the signal mask, the stack, and the
    185  * frame pointer, it returns to the user
    186  * specified pc, psl.
    187  */
    188 
    189 void
    190 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    191 {
    192 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    193 		linux_rt_sendsig(ksi, mask);
    194 	else
    195 		linux_old_sendsig(ksi, mask);
    196 }
    197 
    198 
    199 static void
    200 linux_save_ucontext(l, tf, mask, sas, uc)
    201 	struct lwp *l;
    202 	struct trapframe *tf;
    203 	const sigset_t *mask;
    204 	struct sigaltstack *sas;
    205 	struct linux_ucontext *uc;
    206 {
    207 	uc->uc_flags = 0;
    208 	uc->uc_link = NULL;
    209 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    210 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    211 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    212 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    213 }
    214 
    215 static void
    216 linux_save_sigcontext(l, tf, mask, sc)
    217 	struct lwp *l;
    218 	struct trapframe *tf;
    219 	const sigset_t *mask;
    220 	struct linux_sigcontext *sc;
    221 {
    222 	/* Save register context. */
    223 #ifdef VM86
    224 	if (tf->tf_eflags & PSL_VM) {
    225 		sc->sc_gs = tf->tf_vm86_gs;
    226 		sc->sc_fs = tf->tf_vm86_fs;
    227 		sc->sc_es = tf->tf_vm86_es;
    228 		sc->sc_ds = tf->tf_vm86_ds;
    229 		sc->sc_eflags = get_vflags(l);
    230 	} else
    231 #endif
    232 	{
    233 		sc->sc_gs = tf->tf_gs;
    234 		sc->sc_fs = tf->tf_fs;
    235 		sc->sc_es = tf->tf_es;
    236 		sc->sc_ds = tf->tf_ds;
    237 		sc->sc_eflags = tf->tf_eflags;
    238 	}
    239 	sc->sc_edi = tf->tf_edi;
    240 	sc->sc_esi = tf->tf_esi;
    241 	sc->sc_esp = tf->tf_esp;
    242 	sc->sc_ebp = tf->tf_ebp;
    243 	sc->sc_ebx = tf->tf_ebx;
    244 	sc->sc_edx = tf->tf_edx;
    245 	sc->sc_ecx = tf->tf_ecx;
    246 	sc->sc_eax = tf->tf_eax;
    247 	sc->sc_eip = tf->tf_eip;
    248 	sc->sc_cs = tf->tf_cs;
    249 	sc->sc_esp_at_signal = tf->tf_esp;
    250 	sc->sc_ss = tf->tf_ss;
    251 	sc->sc_err = tf->tf_err;
    252 	sc->sc_trapno = tf->tf_trapno;
    253 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    254 	sc->sc_387 = NULL;
    255 
    256 	/* Save signal stack. */
    257 	/* Linux doesn't save the onstack flag in sigframe */
    258 
    259 	/* Save signal mask. */
    260 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    261 }
    262 
    263 static void
    264 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    265 {
    266 	struct lwp *l = curlwp;
    267 	struct proc *p = l->l_proc;
    268 	struct trapframe *tf;
    269 	struct linux_rt_sigframe *fp, frame;
    270 	int onstack, error;
    271 	linux_siginfo_t *lsi;
    272 	int sig = ksi->ksi_signo;
    273 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    274 	struct sigaltstack *sas = &l->l_sigstk;
    275 
    276 	tf = l->l_md.md_regs;
    277 	/* Do we need to jump onto the signal stack? */
    278 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    279 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    280 
    281 
    282 	/* Allocate space for the signal handler context. */
    283 	if (onstack)
    284 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
    285 		    sas->ss_size);
    286 	else
    287 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    288 	fp--;
    289 
    290 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    291 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    292 
    293 	/* Build stack frame for signal trampoline. */
    294 	frame.sf_handler = catcher;
    295 	frame.sf_sig = native_to_linux_signo[sig];
    296 	frame.sf_sip = &fp->sf_si;
    297 	frame.sf_ucp = &fp->sf_uc;
    298 
    299 	/*
    300 	 * XXX: the following code assumes that the constants for
    301 	 * siginfo are the same between linux and NetBSD.
    302 	 */
    303 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    304 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    305 	lsi->lsi_code = ksi->ksi_code;
    306 	switch (lsi->lsi_signo = frame.sf_sig) {
    307 	case LINUX_SIGILL:
    308 	case LINUX_SIGFPE:
    309 	case LINUX_SIGSEGV:
    310 	case LINUX_SIGBUS:
    311 	case LINUX_SIGTRAP:
    312 		lsi->lsi_addr = ksi->ksi_addr;
    313 		break;
    314 	case LINUX_SIGCHLD:
    315 		lsi->lsi_uid = ksi->ksi_uid;
    316 		lsi->lsi_pid = ksi->ksi_pid;
    317 		lsi->lsi_utime = ksi->ksi_utime;
    318 		lsi->lsi_stime = ksi->ksi_stime;
    319 
    320 		/* We use the same codes */
    321 		lsi->lsi_code = ksi->ksi_code;
    322 		/* XXX is that right? */
    323 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    324 		break;
    325 	case LINUX_SIGIO:
    326 		lsi->lsi_band = ksi->ksi_band;
    327 		lsi->lsi_fd = ksi->ksi_fd;
    328 		break;
    329 	default:
    330 		lsi->lsi_uid = ksi->ksi_uid;
    331 		lsi->lsi_pid = ksi->ksi_pid;
    332 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    333 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    334 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
    335 		break;
    336 	}
    337 
    338 	/* Save register context. */
    339 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    340 	sendsig_reset(l, sig);
    341 
    342 	mutex_exit(&p->p_smutex);
    343 	error = copyout(&frame, fp, sizeof(frame));
    344 	mutex_enter(&p->p_smutex);
    345 
    346 	if (error != 0) {
    347 		/*
    348 		 * Process has trashed its stack; give it an illegal
    349 		 * instruction to halt it in its tracks.
    350 		 */
    351 		sigexit(l, SIGILL);
    352 		/* NOTREACHED */
    353 	}
    354 
    355 	/*
    356 	 * Build context to run handler in.
    357 	 */
    358 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    359 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    360 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    361 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    362 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    363 	    (linux_rt_sigcode - linux_sigcode);
    364 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    365 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    366 	tf->tf_esp = (int)fp;
    367 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    368 
    369 	/* Remember that we're now on the signal stack. */
    370 	if (onstack)
    371 		sas->ss_flags |= SS_ONSTACK;
    372 }
    373 
    374 static void
    375 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    376 {
    377 	struct lwp *l = curlwp;
    378 	struct proc *p = l->l_proc;
    379 	struct trapframe *tf;
    380 	struct linux_sigframe *fp, frame;
    381 	int onstack, error;
    382 	int sig = ksi->ksi_signo;
    383 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    384 	struct sigaltstack *sas = &l->l_sigstk;
    385 
    386 	tf = l->l_md.md_regs;
    387 
    388 	/* Do we need to jump onto the signal stack? */
    389 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    390 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    391 
    392 	/* Allocate space for the signal handler context. */
    393 	if (onstack)
    394 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
    395 		    sas->ss_size);
    396 	else
    397 		fp = (struct linux_sigframe *)tf->tf_esp;
    398 	fp--;
    399 
    400 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    401 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    402 
    403 	/* Build stack frame for signal trampoline. */
    404 	frame.sf_handler = catcher;
    405 	frame.sf_sig = native_to_linux_signo[sig];
    406 
    407 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    408 	sendsig_reset(l, sig);
    409 
    410 	mutex_exit(&p->p_smutex);
    411 	error = copyout(&frame, fp, sizeof(frame));
    412 	mutex_enter(&p->p_smutex);
    413 
    414 	if (error != 0) {
    415 		/*
    416 		 * Process has trashed its stack; give it an illegal
    417 		 * instruction to halt it in its tracks.
    418 		 */
    419 		sigexit(l, SIGILL);
    420 		/* NOTREACHED */
    421 	}
    422 
    423 	/*
    424 	 * Build context to run handler in.
    425 	 */
    426 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    427 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    428 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    429 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    430 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    431 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    432 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    433 	tf->tf_esp = (int)fp;
    434 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    435 
    436 	/* Remember that we're now on the signal stack. */
    437 	if (onstack)
    438 		sas->ss_flags |= SS_ONSTACK;
    439 }
    440 
    441 /*
    442  * System call to cleanup state after a signal
    443  * has been taken.  Reset signal mask and
    444  * stack state from context left by sendsig (above).
    445  * Return to previous pc and psl as specified by
    446  * context left by sendsig. Check carefully to
    447  * make sure that the user has not modified the
    448  * psl to gain improper privileges or to cause
    449  * a machine fault.
    450  */
    451 int
    452 linux_sys_rt_sigreturn(l, v, retval)
    453 	struct lwp *l;
    454 	void *v;
    455 	register_t *retval;
    456 {
    457 	struct linux_sys_rt_sigreturn_args /* {
    458 		syscallarg(struct linux_ucontext *) ucp;
    459 	} */ *uap = v;
    460 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    461 	int error;
    462 
    463 	/*
    464 	 * The trampoline code hands us the context.
    465 	 * It is unsafe to keep track of it ourselves, in the event that a
    466 	 * program jumps out of a signal handler.
    467 	 */
    468 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    469 		return error;
    470 
    471 	/* XXX XAX we can do better here by using more of the ucontext */
    472 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    473 }
    474 
    475 int
    476 linux_sys_sigreturn(l, v, retval)
    477 	struct lwp *l;
    478 	void *v;
    479 	register_t *retval;
    480 {
    481 	struct linux_sys_sigreturn_args /* {
    482 		syscallarg(struct linux_sigcontext *) scp;
    483 	} */ *uap = v;
    484 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    485 	int error;
    486 
    487 	/*
    488 	 * The trampoline code hands us the context.
    489 	 * It is unsafe to keep track of it ourselves, in the event that a
    490 	 * program jumps out of a signal handler.
    491 	 */
    492 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
    493 		return error;
    494 	return linux_restore_sigcontext(l, &context, retval);
    495 }
    496 
    497 static int
    498 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    499     register_t *retval)
    500 {
    501 	struct proc *p = l->l_proc;
    502 	struct sigaltstack *sas = &l->l_sigstk;
    503 	struct trapframe *tf;
    504 	sigset_t mask;
    505 	ssize_t ss_gap;
    506 	/* Restore register context. */
    507 	tf = l->l_md.md_regs;
    508 
    509 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    510 #ifdef VM86
    511 	if (scp->sc_eflags & PSL_VM) {
    512 		void syscall_vm86 __P((struct trapframe *));
    513 
    514 		tf->tf_vm86_gs = scp->sc_gs;
    515 		tf->tf_vm86_fs = scp->sc_fs;
    516 		tf->tf_vm86_es = scp->sc_es;
    517 		tf->tf_vm86_ds = scp->sc_ds;
    518 		set_vflags(l, scp->sc_eflags);
    519 		p->p_md.md_syscall = syscall_vm86;
    520 	} else
    521 #endif
    522 	{
    523 		/*
    524 		 * Check for security violations.  If we're returning to
    525 		 * protected mode, the CPU will validate the segment registers
    526 		 * automatically and generate a trap on violations.  We handle
    527 		 * the trap, rather than doing all of the checking here.
    528 		 */
    529 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    530 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    531 			return EINVAL;
    532 
    533 		tf->tf_gs = scp->sc_gs;
    534 		tf->tf_fs = scp->sc_fs;
    535 		tf->tf_es = scp->sc_es;
    536 		tf->tf_ds = scp->sc_ds;
    537 #ifdef VM86
    538 		if (tf->tf_eflags & PSL_VM)
    539 			(*p->p_emul->e_syscall_intern)(p);
    540 #endif
    541 		tf->tf_eflags = scp->sc_eflags;
    542 	}
    543 	tf->tf_edi = scp->sc_edi;
    544 	tf->tf_esi = scp->sc_esi;
    545 	tf->tf_ebp = scp->sc_ebp;
    546 	tf->tf_ebx = scp->sc_ebx;
    547 	tf->tf_edx = scp->sc_edx;
    548 	tf->tf_ecx = scp->sc_ecx;
    549 	tf->tf_eax = scp->sc_eax;
    550 	tf->tf_eip = scp->sc_eip;
    551 	tf->tf_cs = scp->sc_cs;
    552 	tf->tf_esp = scp->sc_esp_at_signal;
    553 	tf->tf_ss = scp->sc_ss;
    554 
    555 	/* Restore signal stack. */
    556 	/*
    557 	 * Linux really does it this way; it doesn't have space in sigframe
    558 	 * to save the onstack flag.
    559 	 */
    560 	mutex_enter(&p->p_smutex);
    561 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
    562 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    563 		sas->ss_flags |= SS_ONSTACK;
    564 	else
    565 		sas->ss_flags &= ~SS_ONSTACK;
    566 
    567 	/* Restore signal mask. */
    568 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    569 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    570 	mutex_exit(&p->p_smutex);
    571 
    572 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    573 	return EJUSTRETURN;
    574 }
    575 
    576 #ifdef USER_LDT
    577 
    578 static int
    579 linux_read_ldt(struct lwp *l, struct linux_sys_modify_ldt_args *uap,
    580     register_t *retval)
    581 {
    582 	struct x86_get_ldt_args gl;
    583 	int error;
    584 	int num_ldt;
    585 	union descriptor *ldt_buf;
    586 
    587 	/*
    588 	 * I've checked the linux code - this function is asymetric with
    589 	 * linux_write_ldt, and returns raw ldt entries.
    590 	 * NB, the code I saw zerod the spare parts of the user buffer.
    591 	 */
    592 
    593 	DPRINTF(("linux_read_ldt!"));
    594 
    595 	num_ldt = x86_get_ldt_len(l);
    596 	if (num_ldt <= 0)
    597 		return EINVAL;
    598 
    599 	gl.start = 0;
    600 	gl.desc = NULL;
    601 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    602 
    603 	if (gl.num > num_ldt)
    604 		gl.num = num_ldt;
    605 
    606 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
    607 
    608 	error = x86_get_ldt1(l, &gl, ldt_buf);
    609 	/* NB gl.num might have changed */
    610 	if (error == 0) {
    611 		*retval = gl.num * sizeof *ldt;
    612 		error = copyout(ldt_buf, SCARG(uap, ptr),
    613 		    gl.num * sizeof *ldt_buf);
    614 	}
    615 	free(ldt, M_TEMP);
    616 
    617 	return error;
    618 }
    619 
    620 struct linux_ldt_info {
    621 	u_int entry_number;
    622 	u_long base_addr;
    623 	u_int limit;
    624 	u_int seg_32bit:1;
    625 	u_int contents:2;
    626 	u_int read_exec_only:1;
    627 	u_int limit_in_pages:1;
    628 	u_int seg_not_present:1;
    629 	u_int useable:1;
    630 };
    631 
    632 static int
    633 linux_write_ldt(struct lwp *l, struct linux_sys_modify_ldt_args *uap,
    634     int oldmode)
    635 {
    636 	struct linux_ldt_info ldt_info;
    637 	union descriptor d;
    638 	struct x86_set_ldt_args sl;
    639 	int error;
    640 
    641 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    642 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    643 		return (EINVAL);
    644 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    645 		return error;
    646 	if (ldt_info.entry_number >= 8192)
    647 		return (EINVAL);
    648 	if (ldt_info.contents == 3) {
    649 		if (oldmode)
    650 			return (EINVAL);
    651 		if (ldt_info.seg_not_present)
    652 			return (EINVAL);
    653 	}
    654 
    655 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    656 	    (oldmode || (ldt_info.contents == 0 &&
    657 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    658 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    659 	    ldt_info.useable == 0))) {
    660 		/* this means you should zero the ldt */
    661 		(void)memset(&d, 0, sizeof(d));
    662 	} else {
    663 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    664 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    665 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
    666 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    667 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
    668 		    (!ldt_info.read_exec_only << 1);
    669 		d.sd.sd_dpl = SEL_UPL;
    670 		d.sd.sd_p = !ldt_info.seg_not_present;
    671 		d.sd.sd_def32 = ldt_info.seg_32bit;
    672 		d.sd.sd_gran = ldt_info.limit_in_pages;
    673 		if (!oldmode)
    674 			d.sd.sd_xx = ldt_info.useable;
    675 		else
    676 			d.sd.sd_xx = 0;
    677 	}
    678 	sl.start = ldt_info.entry_number;
    679 	sl.desc = NULL;;
    680 	sl.num = 1;
    681 
    682 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    683 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    684 
    685 	return x86_set_ldt1(l, &sl, &d);
    686 }
    687 
    688 #endif /* USER_LDT */
    689 
    690 int
    691 linux_sys_modify_ldt(struct lwp *l, void *v,
    692     register_t *retval)
    693 {
    694 	struct linux_sys_modify_ldt_args /* {
    695 		syscallarg(int) func;
    696 		syscallarg(void *) ptr;
    697 		syscallarg(size_t) bytecount;
    698 	} */ *uap = v;
    699 
    700 	switch (SCARG(uap, func)) {
    701 #ifdef USER_LDT
    702 	case 0:
    703 		return linux_read_ldt(l, uap, retval);
    704 	case 1:
    705 		return linux_write_ldt(l, uap, 1);
    706 	case 2:
    707 #ifdef notyet
    708 		return (linux_read_default_ldt(l, uap, retval);
    709 #else
    710 		return (ENOSYS);
    711 #endif
    712 	case 0x11:
    713 		return linux_write_ldt(l, uap, 0);
    714 #endif /* USER_LDT */
    715 
    716 	default:
    717 		return (ENOSYS);
    718 	}
    719 }
    720 
    721 /*
    722  * XXX Pathetic hack to make svgalib work. This will fake the major
    723  * device number of an opened VT so that svgalib likes it. grmbl.
    724  * Should probably do it 'wrong the right way' and use a mapping
    725  * array for all major device numbers, and map linux_mknod too.
    726  */
    727 dev_t
    728 linux_fakedev(dev, raw)
    729 	dev_t dev;
    730 	int raw;
    731 {
    732 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    733 	const struct cdevsw *cd = cdevsw_lookup(dev);
    734 
    735 	if (raw) {
    736 #if (NWSDISPLAY > 0)
    737 		extern const struct cdevsw wsdisplay_cdevsw;
    738 		if (cd == &wsdisplay_cdevsw)
    739 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    740 #endif
    741 	}
    742 
    743 	if (cd == &ptc_cdevsw)
    744 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    745 	if (cd == &pts_cdevsw)
    746 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    747 
    748 	return dev;
    749 }
    750 
    751 #if (NWSDISPLAY > 0)
    752 /*
    753  * That's not complete, but enough to get an X server running.
    754  */
    755 #define NR_KEYS 128
    756 static const u_short plain_map[NR_KEYS] = {
    757 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    758 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    759 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    760 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    761 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    762 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    763 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    764 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    765 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    766 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    767 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    768 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    769 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    770 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    771 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    772 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    773 }, shift_map[NR_KEYS] = {
    774 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    775 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    776 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    777 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    778 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    779 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    780 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    781 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    782 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    783 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    784 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    785 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    786 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    787 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    788 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    789 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    790 }, altgr_map[NR_KEYS] = {
    791 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    792 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    793 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    794 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    795 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    796 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    797 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    798 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    799 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    800 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    801 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    802 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    803 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    804 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    805 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    806 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    807 }, ctrl_map[NR_KEYS] = {
    808 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    809 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    810 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    811 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    812 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    813 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    814 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    815 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    816 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    817 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    818 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    819 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    820 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    821 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    822 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    823 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    824 };
    825 
    826 const u_short * const linux_keytabs[] = {
    827 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    828 };
    829 #endif
    830 
    831 static struct biosdisk_info *
    832 fd2biosinfo(struct proc *p, struct file *fp)
    833 {
    834 	struct vnode *vp;
    835 	const char *blkname;
    836 	char diskname[16];
    837 	int i;
    838 	struct nativedisk_info *nip;
    839 	struct disklist *dl = x86_alldisks;
    840 
    841 	if (fp->f_type != DTYPE_VNODE)
    842 		return NULL;
    843 	vp = (struct vnode *)fp->f_data;
    844 
    845 	if (vp->v_type != VBLK)
    846 		return NULL;
    847 
    848 	blkname = devsw_blk2name(major(vp->v_rdev));
    849 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    850 	    DISKUNIT(vp->v_rdev));
    851 
    852 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    853 		nip = &dl->dl_nativedisks[i];
    854 		if (strcmp(diskname, nip->ni_devname))
    855 			continue;
    856 		if (nip->ni_nmatches != 0)
    857 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    858 	}
    859 
    860 	return NULL;
    861 }
    862 
    863 
    864 /*
    865  * We come here in a last attempt to satisfy a Linux ioctl() call
    866  */
    867 int
    868 linux_machdepioctl(l, v, retval)
    869 	struct lwp *l;
    870 	void *v;
    871 	register_t *retval;
    872 {
    873 	struct linux_sys_ioctl_args /* {
    874 		syscallarg(int) fd;
    875 		syscallarg(u_long) com;
    876 		syscallarg(void *) data;
    877 	} */ *uap = v;
    878 	struct sys_ioctl_args bia;
    879 	u_long com;
    880 	int error, error1;
    881 #if (NWSDISPLAY > 0)
    882 	struct vt_mode lvt;
    883 	void *bvtp, *sg;
    884 	struct kbentry kbe;
    885 #endif
    886 	struct linux_hd_geometry hdg;
    887 	struct linux_hd_big_geometry hdg_big;
    888 	struct biosdisk_info *bip;
    889 	struct filedesc *fdp;
    890 	struct file *fp;
    891 	int fd;
    892 	struct disklabel label, *labp;
    893 	struct partinfo partp;
    894 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    895 	u_long start, biostotal, realtotal;
    896 	u_char heads, sectors;
    897 	u_int cylinders;
    898 	struct ioctl_pt pt;
    899 	struct proc *p = l->l_proc;
    900 
    901 	fd = SCARG(uap, fd);
    902 	SCARG(&bia, fd) = fd;
    903 	SCARG(&bia, data) = SCARG(uap, data);
    904 	com = SCARG(uap, com);
    905 
    906 	fdp = p->p_fd;
    907 
    908 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    909 		return (EBADF);
    910 
    911 	FILE_USE(fp);
    912 
    913 	switch (com) {
    914 #if (NWSDISPLAY > 0)
    915 	case LINUX_KDGKBMODE:
    916 		com = KDGKBMODE;
    917 		break;
    918 	case LINUX_KDSKBMODE:
    919 		com = KDSKBMODE;
    920 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    921 			SCARG(&bia, data) = (void *)K_RAW;
    922 		break;
    923 	case LINUX_KIOCSOUND:
    924 		SCARG(&bia, data) =
    925 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    926 		/* fall through */
    927 	case LINUX_KDMKTONE:
    928 		com = KDMKTONE;
    929 		break;
    930 	case LINUX_KDSETMODE:
    931 		com = KDSETMODE;
    932 		break;
    933 	case LINUX_KDGETMODE:
    934 		/* KD_* values are equal to the wscons numbers */
    935 		com = WSDISPLAYIO_GMODE;
    936 		break;
    937 	case LINUX_KDENABIO:
    938 		com = KDENABIO;
    939 		break;
    940 	case LINUX_KDDISABIO:
    941 		com = KDDISABIO;
    942 		break;
    943 	case LINUX_KDGETLED:
    944 		com = KDGETLED;
    945 		break;
    946 	case LINUX_KDSETLED:
    947 		com = KDSETLED;
    948 		break;
    949 	case LINUX_VT_OPENQRY:
    950 		com = VT_OPENQRY;
    951 		break;
    952 	case LINUX_VT_GETMODE:
    953 		SCARG(&bia, com) = VT_GETMODE;
    954 		/* XXX NJWLWP */
    955 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    956 			goto out;
    957 		if ((error = copyin(SCARG(uap, data), (void *)&lvt,
    958 		    sizeof (struct vt_mode))))
    959 			goto out;
    960 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    961 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    962 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    963 		error = copyout((void *)&lvt, SCARG(uap, data),
    964 		    sizeof (struct vt_mode));
    965 		goto out;
    966 	case LINUX_VT_SETMODE:
    967 		com = VT_SETMODE;
    968 		if ((error = copyin(SCARG(uap, data), (void *)&lvt,
    969 		    sizeof (struct vt_mode))))
    970 			goto out;
    971 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    972 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    973 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    974 		sg = stackgap_init(p, 0);
    975 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    976 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    977 			goto out;
    978 		SCARG(&bia, data) = bvtp;
    979 		break;
    980 	case LINUX_VT_DISALLOCATE:
    981 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    982 		error = 0;
    983 		goto out;
    984 	case LINUX_VT_RELDISP:
    985 		com = VT_RELDISP;
    986 		break;
    987 	case LINUX_VT_ACTIVATE:
    988 		com = VT_ACTIVATE;
    989 		break;
    990 	case LINUX_VT_WAITACTIVE:
    991 		com = VT_WAITACTIVE;
    992 		break;
    993 	case LINUX_VT_GETSTATE:
    994 		com = VT_GETSTATE;
    995 		break;
    996 	case LINUX_KDGKBTYPE:
    997 	    {
    998 		static const u_int8_t kb101 = KB_101;
    999 
   1000 		/* This is what Linux does. */
   1001 		error = copyout(&kb101, SCARG(uap, data), 1);
   1002 		goto out;
   1003 	    }
   1004 	case LINUX_KDGKBENT:
   1005 		/*
   1006 		 * The Linux KDGKBENT ioctl is different from the
   1007 		 * SYSV original. So we handle it in machdep code.
   1008 		 * XXX We should use keyboard mapping information
   1009 		 * from wsdisplay, but this would be expensive.
   1010 		 */
   1011 		if ((error = copyin(SCARG(uap, data), &kbe,
   1012 				    sizeof(struct kbentry))))
   1013 			goto out;
   1014 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1015 		    || kbe.kb_index >= NR_KEYS) {
   1016 			error = EINVAL;
   1017 			goto out;
   1018 		}
   1019 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1020 		error = copyout(&kbe, SCARG(uap, data),
   1021 				sizeof(struct kbentry));
   1022 		goto out;
   1023 #endif
   1024 	case LINUX_HDIO_GETGEO:
   1025 	case LINUX_HDIO_GETGEO_BIG:
   1026 		/*
   1027 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1028 		 * if possible (extending its # of cylinders if it's beyond
   1029 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1030 		 * the real geometry) if not found, by returning an
   1031 		 * error. See common/linux_hdio.c
   1032 		 */
   1033 		bip = fd2biosinfo(p, fp);
   1034 		ioctlf = fp->f_ops->fo_ioctl;
   1035 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label, l);
   1036 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp, l);
   1037 		if (error != 0 && error1 != 0) {
   1038 			error = error1;
   1039 			goto out;
   1040 		}
   1041 		labp = error != 0 ? &label : partp.disklab;
   1042 		start = error1 != 0 ? partp.part->p_offset : 0;
   1043 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1044 		    && bip->bi_cyl != 0) {
   1045 			heads = bip->bi_head;
   1046 			sectors = bip->bi_sec;
   1047 			cylinders = bip->bi_cyl;
   1048 			biostotal = heads * sectors * cylinders;
   1049 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1050 			    labp->d_ncylinders;
   1051 			if (realtotal > biostotal)
   1052 				cylinders = realtotal / (heads * sectors);
   1053 		} else {
   1054 			heads = labp->d_ntracks;
   1055 			cylinders = labp->d_ncylinders;
   1056 			sectors = labp->d_nsectors;
   1057 		}
   1058 		if (com == LINUX_HDIO_GETGEO) {
   1059 			hdg.start = start;
   1060 			hdg.heads = heads;
   1061 			hdg.cylinders = cylinders;
   1062 			hdg.sectors = sectors;
   1063 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1064 			goto out;
   1065 		} else {
   1066 			hdg_big.start = start;
   1067 			hdg_big.heads = heads;
   1068 			hdg_big.cylinders = cylinders;
   1069 			hdg_big.sectors = sectors;
   1070 			error = copyout(&hdg_big, SCARG(uap, data),
   1071 			    sizeof hdg_big);
   1072 			goto out;
   1073 		}
   1074 
   1075 	default:
   1076 		/*
   1077 		 * Unknown to us. If it's on a device, just pass it through
   1078 		 * using PTIOCLINUX, the device itself might be able to
   1079 		 * make some sense of it.
   1080 		 * XXX hack: if the function returns EJUSTRETURN,
   1081 		 * it has stuffed a sysctl return value in pt.data.
   1082 		 */
   1083 		ioctlf = fp->f_ops->fo_ioctl;
   1084 		pt.com = SCARG(uap, com);
   1085 		pt.data = SCARG(uap, data);
   1086 		error = ioctlf(fp, PTIOCLINUX, (void *)&pt, l);
   1087 		if (error == EJUSTRETURN) {
   1088 			retval[0] = (register_t)pt.data;
   1089 			error = 0;
   1090 		}
   1091 
   1092 		if (error == ENOTTY) {
   1093 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1094 			    com));
   1095 		}
   1096 		goto out;
   1097 	}
   1098 	SCARG(&bia, com) = com;
   1099 	/* XXX NJWLWP */
   1100 	error = sys_ioctl(curlwp, &bia, retval);
   1101 out:
   1102 	FILE_UNUSE(fp ,l);
   1103 	return error;
   1104 }
   1105 
   1106 /*
   1107  * Set I/O permissions for a process. Just set the maximum level
   1108  * right away (ignoring the argument), otherwise we would have
   1109  * to rely on I/O permission maps, which are not implemented.
   1110  */
   1111 int
   1112 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1113 {
   1114 #if 0
   1115 	struct linux_sys_iopl_args /* {
   1116 		syscallarg(int) level;
   1117 	} */ *uap = v;
   1118 #endif
   1119 	struct trapframe *fp = l->l_md.md_regs;
   1120 
   1121 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1122 	    NULL, NULL, NULL, NULL) != 0)
   1123 		return EPERM;
   1124 	fp->tf_eflags |= PSL_IOPL;
   1125 	*retval = 0;
   1126 	return 0;
   1127 }
   1128 
   1129 /*
   1130  * See above. If a root process tries to set access to an I/O port,
   1131  * just let it have the whole range.
   1132  */
   1133 int
   1134 linux_sys_ioperm(l, v, retval)
   1135 	struct lwp *l;
   1136 	void *v;
   1137 	register_t *retval;
   1138 {
   1139 	struct linux_sys_ioperm_args /* {
   1140 		syscallarg(unsigned int) lo;
   1141 		syscallarg(unsigned int) hi;
   1142 		syscallarg(int) val;
   1143 	} */ *uap = v;
   1144 	struct trapframe *fp = l->l_md.md_regs;
   1145 
   1146 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1147 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1148 	    NULL, NULL) != 0)
   1149 		return EPERM;
   1150 	if (SCARG(uap, val))
   1151 		fp->tf_eflags |= PSL_IOPL;
   1152 	*retval = 0;
   1153 	return 0;
   1154 }
   1155 
   1156 int
   1157 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1158     void *arg)
   1159 {
   1160 	return 0;
   1161 }
   1162 
   1163 const char *
   1164 linux_get_uname_arch(void)
   1165 {
   1166 	static char uname_arch[5] = "i386";
   1167 
   1168 	if (uname_arch[1] == '3')
   1169 		uname_arch[1] += cpu_class;
   1170 	return uname_arch;
   1171 }
   1172 
   1173 #ifdef LINUX_NPTL
   1174 void *
   1175 linux_get_newtls(l)
   1176 	struct lwp *l;
   1177 {
   1178 	struct trapframe *tf = l->l_md.md_regs;
   1179 
   1180 	/* XXX: Implement me */
   1181 	return NULL;
   1182 }
   1183 
   1184 int
   1185 linux_set_newtls(l, tls)
   1186 	struct lwp *l;
   1187 	void *tls;
   1188 {
   1189 	/* XXX: Implement me */
   1190 	return 0;
   1191 }
   1192 #endif
   1193