Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.97
      1 /*	$NetBSD: linux_machdep.c,v 1.97 2003/09/21 17:42:23 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.97 2003/09/21 17:42:23 christos Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 
     81 #include <compat/linux/linux_syscallargs.h>
     82 
     83 #include <machine/cpu.h>
     84 #include <machine/cpufunc.h>
     85 #include <machine/psl.h>
     86 #include <machine/reg.h>
     87 #include <machine/segments.h>
     88 #include <machine/specialreg.h>
     89 #include <machine/sysarch.h>
     90 #include <machine/vm86.h>
     91 #include <machine/vmparam.h>
     92 
     93 /*
     94  * To see whether wscons is configured (for virtual console ioctl calls).
     95  */
     96 #if defined(_KERNEL_OPT)
     97 #include "wsdisplay.h"
     98 #endif
     99 #if (NWSDISPLAY > 0)
    100 #include <dev/wscons/wsconsio.h>
    101 #include <dev/wscons/wsdisplay_usl_io.h>
    102 #if defined(_KERNEL_OPT)
    103 #include "opt_xserver.h"
    104 #endif
    105 #endif
    106 
    107 #ifdef USER_LDT
    108 #include <machine/cpu.h>
    109 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 #endif
    114 
    115 #ifdef DEBUG_LINUX
    116 #define DPRINTF(a) uprintf a
    117 #else
    118 #define DPRINTF(a)
    119 #endif
    120 
    121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    122 extern struct disklist *i386_alldisks;
    123 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    124     sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    125 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    126     sigset_t *, struct linux_sigcontext *));
    127 static int linux_restore_sigcontext __P((struct lwp *,
    128     struct linux_sigcontext *, register_t *));
    129 static void linux_rt_sendsig __P((ksiginfo_t *, sigset_t *));
    130 static void linux_old_sendsig __P((ksiginfo_t *, sigset_t *));
    131 
    132 extern char linux_sigcode[], linux_rt_sigcode[];
    133 /*
    134  * Deal with some i386-specific things in the Linux emulation code.
    135  */
    136 
    137 void
    138 linux_setregs(l, epp, stack)
    139 	struct lwp *l;
    140 	struct exec_package *epp;
    141 	u_long stack;
    142 {
    143 	struct pcb *pcb = &l->l_addr->u_pcb;
    144 	struct trapframe *tf;
    145 
    146 #if NNPX > 0
    147 	/* If we were using the FPU, forget about it. */
    148 	if (npxproc == l)
    149 		npxdrop();
    150 #endif
    151 
    152 #ifdef USER_LDT
    153 	pmap_ldt_cleanup(l);
    154 #endif
    155 
    156 	l->l_md.md_flags &= ~MDP_USEDFPU;
    157 
    158 	if (i386_use_fxsave) {
    159 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    160 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    161 	} else
    162 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    163 
    164 	tf = l->l_md.md_regs;
    165 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    166 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    167 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    168 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_edi = 0;
    170 	tf->tf_esi = 0;
    171 	tf->tf_ebp = 0;
    172 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    173 	tf->tf_edx = 0;
    174 	tf->tf_ecx = 0;
    175 	tf->tf_eax = 0;
    176 	tf->tf_eip = epp->ep_entry;
    177 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    178 	tf->tf_eflags = PSL_USERSET;
    179 	tf->tf_esp = stack;
    180 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    181 }
    182 
    183 /*
    184  * Send an interrupt to process.
    185  *
    186  * Stack is set up to allow sigcode stored
    187  * in u. to call routine, followed by kcall
    188  * to sigreturn routine below.  After sigreturn
    189  * resets the signal mask, the stack, and the
    190  * frame pointer, it returns to the user
    191  * specified pc, psl.
    192  */
    193 
    194 void
    195 linux_sendsig(ksiginfo_t *ksi, sigset_t *mask)
    196 {
    197 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    198 		linux_rt_sendsig(ksi, mask);
    199 	else
    200 		linux_old_sendsig(ksi, mask);
    201 }
    202 
    203 
    204 static void
    205 linux_save_ucontext(l, tf, mask, sas, uc)
    206 	struct lwp *l;
    207 	struct trapframe *tf;
    208 	sigset_t *mask;
    209 	struct sigaltstack *sas;
    210 	struct linux_ucontext *uc;
    211 {
    212 	uc->uc_flags = 0;
    213 	uc->uc_link = NULL;
    214 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    215 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    216 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    217 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    218 }
    219 
    220 static void
    221 linux_save_sigcontext(l, tf, mask, sc)
    222 	struct lwp *l;
    223 	struct trapframe *tf;
    224 	sigset_t *mask;
    225 	struct linux_sigcontext *sc;
    226 {
    227 	/* Save register context. */
    228 #ifdef VM86
    229 	if (tf->tf_eflags & PSL_VM) {
    230 		sc->sc_gs = tf->tf_vm86_gs;
    231 		sc->sc_fs = tf->tf_vm86_fs;
    232 		sc->sc_es = tf->tf_vm86_es;
    233 		sc->sc_ds = tf->tf_vm86_ds;
    234 		sc->sc_eflags = get_vflags(l);
    235 	} else
    236 #endif
    237 	{
    238 		sc->sc_gs = tf->tf_gs;
    239 		sc->sc_fs = tf->tf_fs;
    240 		sc->sc_es = tf->tf_es;
    241 		sc->sc_ds = tf->tf_ds;
    242 		sc->sc_eflags = tf->tf_eflags;
    243 	}
    244 	sc->sc_edi = tf->tf_edi;
    245 	sc->sc_esi = tf->tf_esi;
    246 	sc->sc_esp = tf->tf_esp;
    247 	sc->sc_ebp = tf->tf_ebp;
    248 	sc->sc_ebx = tf->tf_ebx;
    249 	sc->sc_edx = tf->tf_edx;
    250 	sc->sc_ecx = tf->tf_ecx;
    251 	sc->sc_eax = tf->tf_eax;
    252 	sc->sc_eip = tf->tf_eip;
    253 	sc->sc_cs = tf->tf_cs;
    254 	sc->sc_esp_at_signal = tf->tf_esp;
    255 	sc->sc_ss = tf->tf_ss;
    256 	sc->sc_err = tf->tf_err;
    257 	sc->sc_trapno = tf->tf_trapno;
    258 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    259 	sc->sc_387 = NULL;
    260 
    261 	/* Save signal stack. */
    262 	/* Linux doesn't save the onstack flag in sigframe */
    263 
    264 	/* Save signal mask. */
    265 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    266 }
    267 
    268 static void
    269 linux_rt_sendsig(ksiginfo_t *ksi, sigset_t *mask)
    270 {
    271 	struct lwp *l = curlwp;
    272 	struct proc *p = l->l_proc;
    273 	struct trapframe *tf;
    274 	struct linux_rt_sigframe *fp, frame;
    275 	int onstack;
    276 	linux_siginfo_t *lsi;
    277 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    278 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    279 
    280 	tf = l->l_md.md_regs;
    281 	/* Do we need to jump onto the signal stack? */
    282 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    283 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    284 
    285 
    286 	/* Allocate space for the signal handler context. */
    287 	if (onstack)
    288 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    289 		    sas->ss_size);
    290 	else
    291 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    292 	fp--;
    293 
    294 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    295 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    296 
    297 	/* Build stack frame for signal trampoline. */
    298 	frame.sf_handler = catcher;
    299 	frame.sf_sig = native_to_linux_signo[sig];
    300 	frame.sf_sip = &fp->sf_si;
    301 	frame.sf_ucp = &fp->sf_uc;
    302 
    303 	/*
    304 	 * XXX: the following code assumes that the constants for
    305 	 * siginfo are the same between linux and NetBSD.
    306 	 */
    307 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    308 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    309 	lsi->lsi_code = ksi->ksi_code;
    310 	switch (lsi->lsi_signo = frame.sf_sig) {
    311 	case LINUX_SIGILL:
    312 	case LINUX_SIGFPE:
    313 	case LINUX_SIGSEGV:
    314 	case LINUX_SIGBUS:
    315 	case LINUX_SIGTRAP:
    316 		lsi->lsi_addr = ksi->ksi_addr;
    317 		break;
    318 	case LINUX_SIGCHLD:
    319 		lsi->lsi_uid = ksi->ksi_uid;
    320 		lsi->lsi_pid = ksi->ksi_pid;
    321 		lsi->lsi_status = ksi->ksi_status;
    322 		lsi->lsi_utime = ksi->ksi_utime;
    323 		lsi->lsi_stime = ksi->ksi_stime;
    324 		break;
    325 	case LINUX_SIGIO:
    326 		lsi->lsi_band = ksi->ksi_band;
    327 		lsi->lsi_fd = ksi->ksi_fd;
    328 		break;
    329 	default:
    330 		lsi->lsi_uid = ksi->ksi_uid;
    331 		lsi->lsi_pid = ksi->ksi_pid;
    332 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    333 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    334 			lsi->lsi_sigval.sival_ptr = ksi->ksi_sigval.sival_ptr;
    335 		break;
    336 	}
    337 
    338 	/* Save register context. */
    339 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    340 
    341 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    342 		/*
    343 		 * Process has trashed its stack; give it an illegal
    344 		 * instruction to halt it in its tracks.
    345 		 */
    346 		sigexit(l, SIGILL);
    347 		/* NOTREACHED */
    348 	}
    349 
    350 	/*
    351 	 * Build context to run handler in.
    352 	 */
    353 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    354 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    355 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    356 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    357 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    358 	    (linux_rt_sigcode - linux_sigcode);
    359 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    360 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    361 	tf->tf_esp = (int)fp;
    362 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    363 
    364 	/* Remember that we're now on the signal stack. */
    365 	if (onstack)
    366 		sas->ss_flags |= SS_ONSTACK;
    367 }
    368 
    369 static void
    370 linux_old_sendsig(ksiginfo_t *ksi, sigset_t *mask)
    371 {
    372 	struct lwp *l = curlwp;
    373 	struct proc *p = l->l_proc;
    374 	struct trapframe *tf;
    375 	struct linux_sigframe *fp, frame;
    376 	int onstack;
    377 	int sig = ksi->ksi_signo;
    378 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    379 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    380 
    381 	tf = l->l_md.md_regs;
    382 
    383 	/* Do we need to jump onto the signal stack? */
    384 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    385 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    386 
    387 	/* Allocate space for the signal handler context. */
    388 	if (onstack)
    389 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    390 		    sas->ss_size);
    391 	else
    392 		fp = (struct linux_sigframe *)tf->tf_esp;
    393 	fp--;
    394 
    395 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    396 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    397 
    398 	/* Build stack frame for signal trampoline. */
    399 	frame.sf_handler = catcher;
    400 	frame.sf_sig = native_to_linux_signo[sig];
    401 
    402 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    403 
    404 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    405 		/*
    406 		 * Process has trashed its stack; give it an illegal
    407 		 * instruction to halt it in its tracks.
    408 		 */
    409 		sigexit(l, SIGILL);
    410 		/* NOTREACHED */
    411 	}
    412 
    413 	/*
    414 	 * Build context to run handler in.
    415 	 */
    416 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    417 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    418 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    419 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    420 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    421 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    422 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    423 	tf->tf_esp = (int)fp;
    424 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    425 
    426 	/* Remember that we're now on the signal stack. */
    427 	if (onstack)
    428 		sas->ss_flags |= SS_ONSTACK;
    429 }
    430 
    431 /*
    432  * System call to cleanup state after a signal
    433  * has been taken.  Reset signal mask and
    434  * stack state from context left by sendsig (above).
    435  * Return to previous pc and psl as specified by
    436  * context left by sendsig. Check carefully to
    437  * make sure that the user has not modified the
    438  * psl to gain improper privileges or to cause
    439  * a machine fault.
    440  */
    441 int
    442 linux_sys_rt_sigreturn(l, v, retval)
    443 	struct lwp *l;
    444 	void *v;
    445 	register_t *retval;
    446 {
    447 	struct linux_sys_rt_sigreturn_args /* {
    448 		syscallarg(struct linux_ucontext *) ucp;
    449 	} */ *uap = v;
    450 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    451 	int error;
    452 
    453 	/*
    454 	 * The trampoline code hands us the context.
    455 	 * It is unsafe to keep track of it ourselves, in the event that a
    456 	 * program jumps out of a signal handler.
    457 	 */
    458 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    459 		return error;
    460 
    461 	/* XXX XAX we can do better here by using more of the ucontext */
    462 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    463 }
    464 
    465 int
    466 linux_sys_sigreturn(l, v, retval)
    467 	struct lwp *l;
    468 	void *v;
    469 	register_t *retval;
    470 {
    471 	struct linux_sys_sigreturn_args /* {
    472 		syscallarg(struct linux_sigcontext *) scp;
    473 	} */ *uap = v;
    474 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    475 	int error;
    476 
    477 	/*
    478 	 * The trampoline code hands us the context.
    479 	 * It is unsafe to keep track of it ourselves, in the event that a
    480 	 * program jumps out of a signal handler.
    481 	 */
    482 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    483 		return error;
    484 	return linux_restore_sigcontext(l, &context, retval);
    485 }
    486 
    487 static int
    488 linux_restore_sigcontext(l, scp, retval)
    489 	struct lwp *l;
    490 	struct linux_sigcontext *scp;
    491 	register_t *retval;
    492 {
    493 	struct proc *p = l->l_proc;
    494 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    495 	struct trapframe *tf;
    496 	sigset_t mask;
    497 	ssize_t ss_gap;
    498 	/* Restore register context. */
    499 	tf = l->l_md.md_regs;
    500 
    501 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    502 #ifdef VM86
    503 	if (scp->sc_eflags & PSL_VM) {
    504 		void syscall_vm86 __P((struct trapframe *));
    505 
    506 		tf->tf_vm86_gs = scp->sc_gs;
    507 		tf->tf_vm86_fs = scp->sc_fs;
    508 		tf->tf_vm86_es = scp->sc_es;
    509 		tf->tf_vm86_ds = scp->sc_ds;
    510 		set_vflags(l, scp->sc_eflags);
    511 		p->p_md.md_syscall = syscall_vm86;
    512 	} else
    513 #endif
    514 	{
    515 		/*
    516 		 * Check for security violations.  If we're returning to
    517 		 * protected mode, the CPU will validate the segment registers
    518 		 * automatically and generate a trap on violations.  We handle
    519 		 * the trap, rather than doing all of the checking here.
    520 		 */
    521 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    522 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    523 			return EINVAL;
    524 
    525 		tf->tf_gs = scp->sc_gs;
    526 		tf->tf_fs = scp->sc_fs;
    527 		tf->tf_es = scp->sc_es;
    528 		tf->tf_ds = scp->sc_ds;
    529 #ifdef VM86
    530 		if (tf->tf_eflags & PSL_VM)
    531 			(*p->p_emul->e_syscall_intern)(p);
    532 #endif
    533 		tf->tf_eflags = scp->sc_eflags;
    534 	}
    535 	tf->tf_edi = scp->sc_edi;
    536 	tf->tf_esi = scp->sc_esi;
    537 	tf->tf_ebp = scp->sc_ebp;
    538 	tf->tf_ebx = scp->sc_ebx;
    539 	tf->tf_edx = scp->sc_edx;
    540 	tf->tf_ecx = scp->sc_ecx;
    541 	tf->tf_eax = scp->sc_eax;
    542 	tf->tf_eip = scp->sc_eip;
    543 	tf->tf_cs = scp->sc_cs;
    544 	tf->tf_esp = scp->sc_esp_at_signal;
    545 	tf->tf_ss = scp->sc_ss;
    546 
    547 	/* Restore signal stack. */
    548 	/*
    549 	 * Linux really does it this way; it doesn't have space in sigframe
    550 	 * to save the onstack flag.
    551 	 */
    552 	ss_gap = (ssize_t)
    553 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    554 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    555 		sas->ss_flags |= SS_ONSTACK;
    556 	else
    557 		sas->ss_flags &= ~SS_ONSTACK;
    558 
    559 	/* Restore signal mask. */
    560 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    561 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    562 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    563 	return EJUSTRETURN;
    564 }
    565 
    566 #ifdef USER_LDT
    567 
    568 int
    569 linux_read_ldt(l, uap, retval)
    570 	struct lwp *l;
    571 	struct linux_sys_modify_ldt_args /* {
    572 		syscallarg(int) func;
    573 		syscallarg(void *) ptr;
    574 		syscallarg(size_t) bytecount;
    575 	} */ *uap;
    576 	register_t *retval;
    577 {
    578 	struct proc *p = l->l_proc;
    579 	struct i386_get_ldt_args gl;
    580 	int error;
    581 	caddr_t sg;
    582 	char *parms;
    583 
    584 	DPRINTF(("linux_read_ldt!"));
    585 	sg = stackgap_init(p, 0);
    586 
    587 	gl.start = 0;
    588 	gl.desc = SCARG(uap, ptr);
    589 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    590 
    591 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    592 
    593 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    594 		return (error);
    595 
    596 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    597 		return (error);
    598 
    599 	*retval *= sizeof(union descriptor);
    600 	return (0);
    601 }
    602 
    603 struct linux_ldt_info {
    604 	u_int entry_number;
    605 	u_long base_addr;
    606 	u_int limit;
    607 	u_int seg_32bit:1;
    608 	u_int contents:2;
    609 	u_int read_exec_only:1;
    610 	u_int limit_in_pages:1;
    611 	u_int seg_not_present:1;
    612 	u_int useable:1;
    613 };
    614 
    615 int
    616 linux_write_ldt(l, uap, retval)
    617 	struct lwp *l;
    618 	struct linux_sys_modify_ldt_args /* {
    619 		syscallarg(int) func;
    620 		syscallarg(void *) ptr;
    621 		syscallarg(size_t) bytecount;
    622 	} */ *uap;
    623 	register_t *retval;
    624 {
    625 	struct proc *p = l->l_proc;
    626 	struct linux_ldt_info ldt_info;
    627 	struct segment_descriptor sd;
    628 	struct i386_set_ldt_args sl;
    629 	int error;
    630 	caddr_t sg;
    631 	char *parms;
    632 	int oldmode = (int)retval[0];
    633 
    634 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    635 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    636 		return (EINVAL);
    637 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    638 		return error;
    639 	if (ldt_info.entry_number >= 8192)
    640 		return (EINVAL);
    641 	if (ldt_info.contents == 3) {
    642 		if (oldmode)
    643 			return (EINVAL);
    644 		if (ldt_info.seg_not_present)
    645 			return (EINVAL);
    646 	}
    647 
    648 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    649 	    (oldmode || (ldt_info.contents == 0 &&
    650 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    651 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    652 	    ldt_info.useable == 0))) {
    653 		/* this means you should zero the ldt */
    654 		(void)memset(&sd, 0, sizeof(sd));
    655 	} else {
    656 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    657 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    658 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    659 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    660 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    661 		    (!ldt_info.read_exec_only << 1);
    662 		sd.sd_dpl = SEL_UPL;
    663 		sd.sd_p = !ldt_info.seg_not_present;
    664 		sd.sd_def32 = ldt_info.seg_32bit;
    665 		sd.sd_gran = ldt_info.limit_in_pages;
    666 		if (!oldmode)
    667 			sd.sd_xx = ldt_info.useable;
    668 		else
    669 			sd.sd_xx = 0;
    670 	}
    671 	sg = stackgap_init(p, 0);
    672 	sl.start = ldt_info.entry_number;
    673 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    674 	sl.num = 1;
    675 
    676 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    677 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    678 
    679 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    680 
    681 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    682 		return (error);
    683 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    684 		return (error);
    685 
    686 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    687 		return (error);
    688 
    689 	*retval = 0;
    690 	return (0);
    691 }
    692 
    693 #endif /* USER_LDT */
    694 
    695 int
    696 linux_sys_modify_ldt(l, v, retval)
    697 	struct lwp *l;
    698 	void *v;
    699 	register_t *retval;
    700 {
    701 	struct linux_sys_modify_ldt_args /* {
    702 		syscallarg(int) func;
    703 		syscallarg(void *) ptr;
    704 		syscallarg(size_t) bytecount;
    705 	} */ *uap = v;
    706 
    707 	switch (SCARG(uap, func)) {
    708 #ifdef USER_LDT
    709 	case 0:
    710 		return linux_read_ldt(l, uap, retval);
    711 	case 1:
    712 		retval[0] = 1;
    713 		return linux_write_ldt(l, uap, retval);
    714 	case 2:
    715 #ifdef notyet
    716 		return (linux_read_default_ldt(l, uap, retval);
    717 #else
    718 		return (ENOSYS);
    719 #endif
    720 	case 0x11:
    721 		retval[0] = 0;
    722 		return linux_write_ldt(l, uap, retval);
    723 #endif /* USER_LDT */
    724 
    725 	default:
    726 		return (ENOSYS);
    727 	}
    728 }
    729 
    730 /*
    731  * XXX Pathetic hack to make svgalib work. This will fake the major
    732  * device number of an opened VT so that svgalib likes it. grmbl.
    733  * Should probably do it 'wrong the right way' and use a mapping
    734  * array for all major device numbers, and map linux_mknod too.
    735  */
    736 dev_t
    737 linux_fakedev(dev, raw)
    738 	dev_t dev;
    739 	int raw;
    740 {
    741 	if (raw) {
    742 #if (NWSDISPLAY > 0)
    743 		extern const struct cdevsw wsdisplay_cdevsw;
    744 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    745 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    746 #endif
    747 	}
    748 
    749 	return dev;
    750 }
    751 
    752 #if (NWSDISPLAY > 0)
    753 /*
    754  * That's not complete, but enough to get an X server running.
    755  */
    756 #define NR_KEYS 128
    757 static const u_short plain_map[NR_KEYS] = {
    758 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    759 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    760 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    761 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    762 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    763 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    764 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    765 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    766 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    767 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    768 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    769 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    770 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    771 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    772 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    773 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    774 }, shift_map[NR_KEYS] = {
    775 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    776 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    777 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    778 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    779 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    780 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    781 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    782 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    783 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    784 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    785 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    786 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    787 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    788 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    789 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    790 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    791 }, altgr_map[NR_KEYS] = {
    792 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    793 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    794 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    795 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    796 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    797 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    798 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    799 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    800 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    801 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    802 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    803 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    804 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    805 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    806 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    807 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    808 }, ctrl_map[NR_KEYS] = {
    809 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    810 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    811 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    812 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    813 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    814 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    815 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    816 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    817 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    818 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    819 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    820 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    821 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    822 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    823 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    824 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    825 };
    826 
    827 const u_short * const linux_keytabs[] = {
    828 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    829 };
    830 #endif
    831 
    832 static struct biosdisk_info *
    833 fd2biosinfo(p, fp)
    834 	struct proc *p;
    835 	struct file *fp;
    836 {
    837 	struct vnode *vp;
    838 	const char *blkname;
    839 	char diskname[16];
    840 	int i;
    841 	struct nativedisk_info *nip;
    842 	struct disklist *dl = i386_alldisks;
    843 
    844 	if (fp->f_type != DTYPE_VNODE)
    845 		return NULL;
    846 	vp = (struct vnode *)fp->f_data;
    847 
    848 	if (vp->v_type != VBLK)
    849 		return NULL;
    850 
    851 	blkname = devsw_blk2name(major(vp->v_rdev));
    852 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    853 	    DISKUNIT(vp->v_rdev));
    854 
    855 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    856 		nip = &dl->dl_nativedisks[i];
    857 		if (strcmp(diskname, nip->ni_devname))
    858 			continue;
    859 		if (nip->ni_nmatches != 0)
    860 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    861 	}
    862 
    863 	return NULL;
    864 }
    865 
    866 
    867 /*
    868  * We come here in a last attempt to satisfy a Linux ioctl() call
    869  */
    870 int
    871 linux_machdepioctl(p, v, retval)
    872 	struct proc *p;
    873 	void *v;
    874 	register_t *retval;
    875 {
    876 	struct linux_sys_ioctl_args /* {
    877 		syscallarg(int) fd;
    878 		syscallarg(u_long) com;
    879 		syscallarg(caddr_t) data;
    880 	} */ *uap = v;
    881 	struct sys_ioctl_args bia;
    882 	u_long com;
    883 	int error, error1;
    884 #if (NWSDISPLAY > 0)
    885 	struct vt_mode lvt;
    886 	caddr_t bvtp, sg;
    887 	struct kbentry kbe;
    888 #endif
    889 	struct linux_hd_geometry hdg;
    890 	struct linux_hd_big_geometry hdg_big;
    891 	struct biosdisk_info *bip;
    892 	struct filedesc *fdp;
    893 	struct file *fp;
    894 	int fd;
    895 	struct disklabel label, *labp;
    896 	struct partinfo partp;
    897 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
    898 	u_long start, biostotal, realtotal;
    899 	u_char heads, sectors;
    900 	u_int cylinders;
    901 	struct ioctl_pt pt;
    902 
    903 	fd = SCARG(uap, fd);
    904 	SCARG(&bia, fd) = fd;
    905 	SCARG(&bia, data) = SCARG(uap, data);
    906 	com = SCARG(uap, com);
    907 
    908 	fdp = p->p_fd;
    909 
    910 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    911 		return (EBADF);
    912 
    913 	FILE_USE(fp);
    914 
    915 	switch (com) {
    916 #if (NWSDISPLAY > 0)
    917 	case LINUX_KDGKBMODE:
    918 		com = KDGKBMODE;
    919 		break;
    920 	case LINUX_KDSKBMODE:
    921 		com = KDSKBMODE;
    922 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    923 			SCARG(&bia, data) = (caddr_t)K_RAW;
    924 		break;
    925 	case LINUX_KIOCSOUND:
    926 		SCARG(&bia, data) =
    927 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    928 		/* fall through */
    929 	case LINUX_KDMKTONE:
    930 		com = KDMKTONE;
    931 		break;
    932 	case LINUX_KDSETMODE:
    933 		com = KDSETMODE;
    934 		break;
    935 	case LINUX_KDGETMODE:
    936 		/* KD_* values are equal to the wscons numbers */
    937 		com = WSDISPLAYIO_GMODE;
    938 		break;
    939 	case LINUX_KDENABIO:
    940 		com = KDENABIO;
    941 		break;
    942 	case LINUX_KDDISABIO:
    943 		com = KDDISABIO;
    944 		break;
    945 	case LINUX_KDGETLED:
    946 		com = KDGETLED;
    947 		break;
    948 	case LINUX_KDSETLED:
    949 		com = KDSETLED;
    950 		break;
    951 	case LINUX_VT_OPENQRY:
    952 		com = VT_OPENQRY;
    953 		break;
    954 	case LINUX_VT_GETMODE:
    955 		SCARG(&bia, com) = VT_GETMODE;
    956 		/* XXX NJWLWP */
    957 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    958 			goto out;
    959 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    960 		    sizeof (struct vt_mode))))
    961 			goto out;
    962 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    963 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    964 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    965 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    966 		    sizeof (struct vt_mode));
    967 		goto out;
    968 	case LINUX_VT_SETMODE:
    969 		com = VT_SETMODE;
    970 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    971 		    sizeof (struct vt_mode))))
    972 			goto out;
    973 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    974 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    975 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    976 		sg = stackgap_init(p, 0);
    977 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    978 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    979 			goto out;
    980 		SCARG(&bia, data) = bvtp;
    981 		break;
    982 	case LINUX_VT_DISALLOCATE:
    983 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    984 		error = 0;
    985 		goto out;
    986 	case LINUX_VT_RELDISP:
    987 		com = VT_RELDISP;
    988 		break;
    989 	case LINUX_VT_ACTIVATE:
    990 		com = VT_ACTIVATE;
    991 		break;
    992 	case LINUX_VT_WAITACTIVE:
    993 		com = VT_WAITACTIVE;
    994 		break;
    995 	case LINUX_VT_GETSTATE:
    996 		com = VT_GETSTATE;
    997 		break;
    998 	case LINUX_KDGKBTYPE:
    999 	    {
   1000 		static const u_int8_t kb101 = KB_101;
   1001 
   1002 		/* This is what Linux does. */
   1003 		error = copyout(&kb101, SCARG(uap, data), 1);
   1004 		goto out;
   1005 	    }
   1006 	case LINUX_KDGKBENT:
   1007 		/*
   1008 		 * The Linux KDGKBENT ioctl is different from the
   1009 		 * SYSV original. So we handle it in machdep code.
   1010 		 * XXX We should use keyboard mapping information
   1011 		 * from wsdisplay, but this would be expensive.
   1012 		 */
   1013 		if ((error = copyin(SCARG(uap, data), &kbe,
   1014 				    sizeof(struct kbentry))))
   1015 			goto out;
   1016 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1017 		    || kbe.kb_index >= NR_KEYS) {
   1018 			error = EINVAL;
   1019 			goto out;
   1020 		}
   1021 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1022 		error = copyout(&kbe, SCARG(uap, data),
   1023 				sizeof(struct kbentry));
   1024 		goto out;
   1025 #endif
   1026 	case LINUX_HDIO_GETGEO:
   1027 	case LINUX_HDIO_GETGEO_BIG:
   1028 		/*
   1029 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1030 		 * if possible (extending its # of cylinders if it's beyond
   1031 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1032 		 * the real geometry) if not found, by returning an
   1033 		 * error. See common/linux_hdio.c
   1034 		 */
   1035 		bip = fd2biosinfo(p, fp);
   1036 		ioctlf = fp->f_ops->fo_ioctl;
   1037 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
   1038 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
   1039 		if (error != 0 && error1 != 0) {
   1040 			error = error1;
   1041 			goto out;
   1042 		}
   1043 		labp = error != 0 ? &label : partp.disklab;
   1044 		start = error1 != 0 ? partp.part->p_offset : 0;
   1045 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1046 		    && bip->bi_cyl != 0) {
   1047 			heads = bip->bi_head;
   1048 			sectors = bip->bi_sec;
   1049 			cylinders = bip->bi_cyl;
   1050 			biostotal = heads * sectors * cylinders;
   1051 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1052 			    labp->d_ncylinders;
   1053 			if (realtotal > biostotal)
   1054 				cylinders = realtotal / (heads * sectors);
   1055 		} else {
   1056 			heads = labp->d_ntracks;
   1057 			cylinders = labp->d_ncylinders;
   1058 			sectors = labp->d_nsectors;
   1059 		}
   1060 		if (com == LINUX_HDIO_GETGEO) {
   1061 			hdg.start = start;
   1062 			hdg.heads = heads;
   1063 			hdg.cylinders = cylinders;
   1064 			hdg.sectors = sectors;
   1065 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1066 			goto out;
   1067 		} else {
   1068 			hdg_big.start = start;
   1069 			hdg_big.heads = heads;
   1070 			hdg_big.cylinders = cylinders;
   1071 			hdg_big.sectors = sectors;
   1072 			error = copyout(&hdg_big, SCARG(uap, data),
   1073 			    sizeof hdg_big);
   1074 			goto out;
   1075 		}
   1076 
   1077 	default:
   1078 		/*
   1079 		 * Unknown to us. If it's on a device, just pass it through
   1080 		 * using PTIOCLINUX, the device itself might be able to
   1081 		 * make some sense of it.
   1082 		 * XXX hack: if the function returns EJUSTRETURN,
   1083 		 * it has stuffed a sysctl return value in pt.data.
   1084 		 */
   1085 		FILE_USE(fp);
   1086 		ioctlf = fp->f_ops->fo_ioctl;
   1087 		pt.com = SCARG(uap, com);
   1088 		pt.data = SCARG(uap, data);
   1089 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
   1090 		FILE_UNUSE(fp, p);
   1091 		if (error == EJUSTRETURN) {
   1092 			retval[0] = (register_t)pt.data;
   1093 			error = 0;
   1094 		}
   1095 
   1096 		if (error == ENOTTY)
   1097 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1098 			    com));
   1099 		goto out;
   1100 	}
   1101 	SCARG(&bia, com) = com;
   1102 	/* XXX NJWLWP */
   1103 	error = sys_ioctl(curlwp, &bia, retval);
   1104 out:
   1105 	FILE_UNUSE(fp ,p);
   1106 	return error;
   1107 }
   1108 
   1109 /*
   1110  * Set I/O permissions for a process. Just set the maximum level
   1111  * right away (ignoring the argument), otherwise we would have
   1112  * to rely on I/O permission maps, which are not implemented.
   1113  */
   1114 int
   1115 linux_sys_iopl(l, v, retval)
   1116 	struct lwp *l;
   1117 	void *v;
   1118 	register_t *retval;
   1119 {
   1120 #if 0
   1121 	struct linux_sys_iopl_args /* {
   1122 		syscallarg(int) level;
   1123 	} */ *uap = v;
   1124 #endif
   1125 	struct proc *p = l->l_proc;
   1126 	struct trapframe *fp = l->l_md.md_regs;
   1127 
   1128 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1129 		return EPERM;
   1130 	fp->tf_eflags |= PSL_IOPL;
   1131 	*retval = 0;
   1132 	return 0;
   1133 }
   1134 
   1135 /*
   1136  * See above. If a root process tries to set access to an I/O port,
   1137  * just let it have the whole range.
   1138  */
   1139 int
   1140 linux_sys_ioperm(l, v, retval)
   1141 	struct lwp *l;
   1142 	void *v;
   1143 	register_t *retval;
   1144 {
   1145 	struct linux_sys_ioperm_args /* {
   1146 		syscallarg(unsigned int) lo;
   1147 		syscallarg(unsigned int) hi;
   1148 		syscallarg(int) val;
   1149 	} */ *uap = v;
   1150 	struct proc *p = l->l_proc;
   1151 	struct trapframe *fp = l->l_md.md_regs;
   1152 
   1153 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1154 		return EPERM;
   1155 	if (SCARG(uap, val))
   1156 		fp->tf_eflags |= PSL_IOPL;
   1157 	*retval = 0;
   1158 	return 0;
   1159 }
   1160 
   1161 int
   1162 linux_exec_setup_stack(struct proc *p, struct exec_package *epp)
   1163 {
   1164 	u_long max_stack_size;
   1165 	u_long access_linear_min, access_size;
   1166 	u_long noaccess_linear_min, noaccess_size;
   1167 
   1168 #ifndef	USRSTACK32
   1169 #define USRSTACK32	(0x00000000ffffffffL&~PGOFSET)
   1170 #endif
   1171 
   1172 	if (epp->ep_flags & EXEC_32) {
   1173 		epp->ep_minsaddr = USRSTACK32;
   1174 		max_stack_size = MAXSSIZ;
   1175 	} else {
   1176 		epp->ep_minsaddr = USRSTACK;
   1177 		max_stack_size = MAXSSIZ;
   1178 	}
   1179 
   1180 	if (epp->ep_minsaddr > LINUX_USRSTACK)
   1181 		epp->ep_minsaddr = LINUX_USRSTACK;
   1182 #ifdef DEBUG_LINUX
   1183 	else {
   1184 		/*
   1185 		 * Someone needs to make KERNBASE and TEXTADDR
   1186 		 * java versions < 1.4.2 need the stack to be
   1187 		 * at 0xC0000000
   1188 		 */
   1189 		uprintf("Cannot setup stack to 0xC0000000, "
   1190 		    "java will not work properly\n");
   1191 	}
   1192 #endif
   1193 	epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
   1194 		max_stack_size);
   1195 	epp->ep_ssize = p->p_rlimit[RLIMIT_STACK].rlim_cur;
   1196 
   1197 	/*
   1198 	 * set up commands for stack.  note that this takes *two*, one to
   1199 	 * map the part of the stack which we can access, and one to map
   1200 	 * the part which we can't.
   1201 	 *
   1202 	 * arguably, it could be made into one, but that would require the
   1203 	 * addition of another mapping proc, which is unnecessary
   1204 	 */
   1205 	access_size = epp->ep_ssize;
   1206 	access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
   1207 	noaccess_size = max_stack_size - access_size;
   1208 	noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
   1209 	    access_size), noaccess_size);
   1210 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
   1211 	    noaccess_linear_min, NULLVP, 0, VM_PROT_NONE);
   1212 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
   1213 	    access_linear_min, NULLVP, 0,
   1214 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
   1215 
   1216 	return 0;
   1217 }
   1218