Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.93
      1 /*	$NetBSD: linux_machdep.c,v 1.93 2003/08/09 14:16:44 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.93 2003/08/09 14:16:44 christos Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 
     81 #include <compat/linux/linux_syscallargs.h>
     82 
     83 #include <machine/cpu.h>
     84 #include <machine/cpufunc.h>
     85 #include <machine/psl.h>
     86 #include <machine/reg.h>
     87 #include <machine/segments.h>
     88 #include <machine/specialreg.h>
     89 #include <machine/sysarch.h>
     90 #include <machine/vm86.h>
     91 #include <machine/vmparam.h>
     92 
     93 /*
     94  * To see whether wscons is configured (for virtual console ioctl calls).
     95  */
     96 #if defined(_KERNEL_OPT)
     97 #include "wsdisplay.h"
     98 #endif
     99 #if (NWSDISPLAY > 0)
    100 #include <dev/wscons/wsconsio.h>
    101 #include <dev/wscons/wsdisplay_usl_io.h>
    102 #if defined(_KERNEL_OPT)
    103 #include "opt_xserver.h"
    104 #endif
    105 #endif
    106 
    107 #ifdef USER_LDT
    108 #include <machine/cpu.h>
    109 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 #endif
    114 
    115 #ifdef DEBUG_LINUX
    116 #define DPRINTF(a) uprintf a
    117 #else
    118 #define DPRINTF(a)
    119 #endif
    120 
    121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    122 extern struct disklist *i386_alldisks;
    123 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    124     sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    125 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    126     sigset_t *, struct linux_sigcontext *));
    127 static int linux_restore_sigcontext __P((struct lwp *,
    128     struct linux_sigcontext *, register_t *));
    129 static void linux_rt_sendsig __P((int, sigset_t *, u_long));
    130 static void linux_old_sendsig __P((int, sigset_t *, u_long));
    131 
    132 extern char linux_sigcode[], linux_rt_sigcode[];
    133 /*
    134  * Deal with some i386-specific things in the Linux emulation code.
    135  */
    136 
    137 void
    138 linux_setregs(l, epp, stack)
    139 	struct lwp *l;
    140 	struct exec_package *epp;
    141 	u_long stack;
    142 {
    143 	struct pcb *pcb = &l->l_addr->u_pcb;
    144 	struct trapframe *tf;
    145 
    146 #if NNPX > 0
    147 	/* If we were using the FPU, forget about it. */
    148 	if (npxproc == l)
    149 		npxdrop();
    150 #endif
    151 
    152 #ifdef USER_LDT
    153 	pmap_ldt_cleanup(l);
    154 #endif
    155 
    156 	l->l_md.md_flags &= ~MDP_USEDFPU;
    157 
    158 	if (i386_use_fxsave) {
    159 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    160 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    161 	} else
    162 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    163 
    164 	tf = l->l_md.md_regs;
    165 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    166 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    167 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    168 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_edi = 0;
    170 	tf->tf_esi = 0;
    171 	tf->tf_ebp = 0;
    172 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    173 	tf->tf_edx = 0;
    174 	tf->tf_ecx = 0;
    175 	tf->tf_eax = 0;
    176 	tf->tf_eip = epp->ep_entry;
    177 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    178 	tf->tf_eflags = PSL_USERSET;
    179 	tf->tf_esp = stack;
    180 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    181 }
    182 
    183 /*
    184  * Send an interrupt to process.
    185  *
    186  * Stack is set up to allow sigcode stored
    187  * in u. to call routine, followed by kcall
    188  * to sigreturn routine below.  After sigreturn
    189  * resets the signal mask, the stack, and the
    190  * frame pointer, it returns to the user
    191  * specified pc, psl.
    192  */
    193 
    194 void
    195 linux_sendsig(sig, mask, code)
    196 	int sig;
    197 	sigset_t *mask;
    198 	u_long code;
    199 {
    200 	if (SIGACTION(curproc, sig).sa_flags & SA_SIGINFO)
    201 		linux_rt_sendsig(sig, mask, code);
    202 	else
    203 		linux_old_sendsig(sig, mask, code);
    204 }
    205 
    206 
    207 static void
    208 linux_save_ucontext(l, tf, mask, sas, uc)
    209 	struct lwp *l;
    210 	struct trapframe *tf;
    211 	sigset_t *mask;
    212 	struct sigaltstack *sas;
    213 	struct linux_ucontext *uc;
    214 {
    215 	uc->uc_flags = 0;
    216 	uc->uc_link = NULL;
    217 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    218 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    219 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    220 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    221 }
    222 
    223 static void
    224 linux_save_sigcontext(l, tf, mask, sc)
    225 	struct lwp *l;
    226 	struct trapframe *tf;
    227 	sigset_t *mask;
    228 	struct linux_sigcontext *sc;
    229 {
    230 	/* Save register context. */
    231 #ifdef VM86
    232 	if (tf->tf_eflags & PSL_VM) {
    233 		sc->sc_gs = tf->tf_vm86_gs;
    234 		sc->sc_fs = tf->tf_vm86_fs;
    235 		sc->sc_es = tf->tf_vm86_es;
    236 		sc->sc_ds = tf->tf_vm86_ds;
    237 		sc->sc_eflags = get_vflags(l);
    238 	} else
    239 #endif
    240 	{
    241 		sc->sc_gs = tf->tf_gs;
    242 		sc->sc_fs = tf->tf_fs;
    243 		sc->sc_es = tf->tf_es;
    244 		sc->sc_ds = tf->tf_ds;
    245 		sc->sc_eflags = tf->tf_eflags;
    246 	}
    247 	sc->sc_edi = tf->tf_edi;
    248 	sc->sc_esi = tf->tf_esi;
    249 	sc->sc_esp = tf->tf_esp;
    250 	sc->sc_ebp = tf->tf_ebp;
    251 	sc->sc_ebx = tf->tf_ebx;
    252 	sc->sc_edx = tf->tf_edx;
    253 	sc->sc_ecx = tf->tf_ecx;
    254 	sc->sc_eax = tf->tf_eax;
    255 	sc->sc_eip = tf->tf_eip;
    256 	sc->sc_cs = tf->tf_cs;
    257 	sc->sc_esp_at_signal = tf->tf_esp;
    258 	sc->sc_ss = tf->tf_ss;
    259 	sc->sc_err = tf->tf_err;
    260 	sc->sc_trapno = tf->tf_trapno;
    261 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    262 	sc->sc_387 = NULL;
    263 
    264 	/* Save signal stack. */
    265 	/* Linux doesn't save the onstack flag in sigframe */
    266 
    267 	/* Save signal mask. */
    268 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    269 }
    270 
    271 static void
    272 linux_rt_sendsig(sig, mask, code)
    273 	int sig;
    274 	sigset_t *mask;
    275 	u_long code;
    276 {
    277 	struct lwp *l = curlwp;
    278 	struct proc *p = l->l_proc;
    279 	struct trapframe *tf;
    280 	struct linux_rt_sigframe *fp, frame;
    281 	int onstack;
    282 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    283 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    284 
    285 	tf = l->l_md.md_regs;
    286 	/* Do we need to jump onto the signal stack? */
    287 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    288 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    289 
    290 
    291 	/* Allocate space for the signal handler context. */
    292 	if (onstack)
    293 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    294 		    sas->ss_size);
    295 	else
    296 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    297 	fp--;
    298 
    299 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    300 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    301 
    302 	/* Build stack frame for signal trampoline. */
    303 	frame.sf_handler = catcher;
    304 	frame.sf_sig = native_to_linux_signo[sig];
    305 	frame.sf_sip = &fp->sf_si;
    306 	frame.sf_ucp = &fp->sf_uc;
    307 
    308 	(void)memset(&frame.sf_si, 0, sizeof(frame.sf_si));
    309 	/*
    310 	 * XXX: We'll fake bit of it here, all of the following
    311 	 * info is a bit bogus, because we don't have the
    312 	 * right info passed to us from the trap.
    313 	 */
    314 	switch (frame.sf_si.lsi_signo = frame.sf_sig) {
    315 	case LINUX_SIGSEGV:
    316 		frame.sf_si.lsi_code = LINUX_SEGV_MAPERR;
    317 		break;
    318 	case LINUX_SIGBUS:
    319 		frame.sf_si.lsi_code = LINUX_BUS_ADRERR;
    320 		break;
    321 	case LINUX_SIGTRAP:
    322 		frame.sf_si.lsi_code = LINUX_TRAP_BRKPT;
    323 		break;
    324 	case LINUX_SIGCHLD:
    325 	case LINUX_SIGIO:
    326 	default:
    327 		frame.sf_si.lsi_signo = 0;
    328 		break;
    329 	}
    330 
    331 	/* Save register context. */
    332 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    333 
    334 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    335 		/*
    336 		 * Process has trashed its stack; give it an illegal
    337 		 * instruction to halt it in its tracks.
    338 		 */
    339 		sigexit(l, SIGILL);
    340 		/* NOTREACHED */
    341 	}
    342 
    343 	/*
    344 	 * Build context to run handler in.
    345 	 */
    346 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    347 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    348 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    349 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    350 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    351 	    (linux_rt_sigcode - linux_sigcode);
    352 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    353 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    354 	tf->tf_esp = (int)fp;
    355 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    356 
    357 	/* Remember that we're now on the signal stack. */
    358 	if (onstack)
    359 		sas->ss_flags |= SS_ONSTACK;
    360 }
    361 
    362 static void
    363 linux_old_sendsig(sig, mask, code)
    364 	int sig;
    365 	sigset_t *mask;
    366 	u_long code;
    367 {
    368 	struct lwp *l = curlwp;
    369 	struct proc *p = l->l_proc;
    370 	struct trapframe *tf;
    371 	struct linux_sigframe *fp, frame;
    372 	int onstack;
    373 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    374 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    375 
    376 	tf = l->l_md.md_regs;
    377 
    378 	/* Do we need to jump onto the signal stack? */
    379 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    380 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    381 
    382 	/* Allocate space for the signal handler context. */
    383 	if (onstack)
    384 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    385 		    sas->ss_size);
    386 	else
    387 		fp = (struct linux_sigframe *)tf->tf_esp;
    388 	fp--;
    389 
    390 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    391 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    392 
    393 	/* Build stack frame for signal trampoline. */
    394 	frame.sf_handler = catcher;
    395 	frame.sf_sig = native_to_linux_signo[sig];
    396 
    397 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    398 
    399 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    400 		/*
    401 		 * Process has trashed its stack; give it an illegal
    402 		 * instruction to halt it in its tracks.
    403 		 */
    404 		sigexit(l, SIGILL);
    405 		/* NOTREACHED */
    406 	}
    407 
    408 	/*
    409 	 * Build context to run handler in.
    410 	 */
    411 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    412 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    413 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    414 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    415 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    416 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    417 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    418 	tf->tf_esp = (int)fp;
    419 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    420 
    421 	/* Remember that we're now on the signal stack. */
    422 	if (onstack)
    423 		sas->ss_flags |= SS_ONSTACK;
    424 }
    425 
    426 /*
    427  * System call to cleanup state after a signal
    428  * has been taken.  Reset signal mask and
    429  * stack state from context left by sendsig (above).
    430  * Return to previous pc and psl as specified by
    431  * context left by sendsig. Check carefully to
    432  * make sure that the user has not modified the
    433  * psl to gain improper privileges or to cause
    434  * a machine fault.
    435  */
    436 int
    437 linux_sys_rt_sigreturn(l, v, retval)
    438 	struct lwp *l;
    439 	void *v;
    440 	register_t *retval;
    441 {
    442 	struct linux_sys_rt_sigreturn_args /* {
    443 		syscallarg(struct linux_ucontext *) ucp;
    444 	} */ *uap = v;
    445 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    446 	int error;
    447 
    448 	/*
    449 	 * The trampoline code hands us the context.
    450 	 * It is unsafe to keep track of it ourselves, in the event that a
    451 	 * program jumps out of a signal handler.
    452 	 */
    453 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    454 		return error;
    455 
    456 	/* XXX XAX we can do better here by using more of the ucontext */
    457 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    458 }
    459 
    460 int
    461 linux_sys_sigreturn(l, v, retval)
    462 	struct lwp *l;
    463 	void *v;
    464 	register_t *retval;
    465 {
    466 	struct linux_sys_sigreturn_args /* {
    467 		syscallarg(struct linux_sigcontext *) scp;
    468 	} */ *uap = v;
    469 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    470 	int error;
    471 
    472 	/*
    473 	 * The trampoline code hands us the context.
    474 	 * It is unsafe to keep track of it ourselves, in the event that a
    475 	 * program jumps out of a signal handler.
    476 	 */
    477 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    478 		return error;
    479 	return linux_restore_sigcontext(l, &context, retval);
    480 }
    481 
    482 static int
    483 linux_restore_sigcontext(l, scp, retval)
    484 	struct lwp *l;
    485 	struct linux_sigcontext *scp;
    486 	register_t *retval;
    487 {
    488 	struct proc *p = l->l_proc;
    489 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    490 	struct trapframe *tf;
    491 	sigset_t mask;
    492 	ssize_t ss_gap;
    493 	/* Restore register context. */
    494 	tf = l->l_md.md_regs;
    495 
    496 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    497 #ifdef VM86
    498 	if (scp->sc_eflags & PSL_VM) {
    499 		void syscall_vm86 __P((struct trapframe));
    500 
    501 		tf->tf_vm86_gs = scp->sc_gs;
    502 		tf->tf_vm86_fs = scp->sc_fs;
    503 		tf->tf_vm86_es = scp->sc_es;
    504 		tf->tf_vm86_ds = scp->sc_ds;
    505 		set_vflags(l, scp->sc_eflags);
    506 		p->p_md.md_syscall = syscall_vm86;
    507 	} else
    508 #endif
    509 	{
    510 		/*
    511 		 * Check for security violations.  If we're returning to
    512 		 * protected mode, the CPU will validate the segment registers
    513 		 * automatically and generate a trap on violations.  We handle
    514 		 * the trap, rather than doing all of the checking here.
    515 		 */
    516 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    517 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    518 			return EINVAL;
    519 
    520 		tf->tf_gs = scp->sc_gs;
    521 		tf->tf_fs = scp->sc_fs;
    522 		tf->tf_es = scp->sc_es;
    523 		tf->tf_ds = scp->sc_ds;
    524 #ifdef VM86
    525 		if (tf->tf_eflags & PSL_VM)
    526 			(*p->p_emul->e_syscall_intern)(p);
    527 #endif
    528 		tf->tf_eflags = scp->sc_eflags;
    529 	}
    530 	tf->tf_edi = scp->sc_edi;
    531 	tf->tf_esi = scp->sc_esi;
    532 	tf->tf_ebp = scp->sc_ebp;
    533 	tf->tf_ebx = scp->sc_ebx;
    534 	tf->tf_edx = scp->sc_edx;
    535 	tf->tf_ecx = scp->sc_ecx;
    536 	tf->tf_eax = scp->sc_eax;
    537 	tf->tf_eip = scp->sc_eip;
    538 	tf->tf_cs = scp->sc_cs;
    539 	tf->tf_esp = scp->sc_esp_at_signal;
    540 	tf->tf_ss = scp->sc_ss;
    541 
    542 	/* Restore signal stack. */
    543 	/*
    544 	 * Linux really does it this way; it doesn't have space in sigframe
    545 	 * to save the onstack flag.
    546 	 */
    547 	ss_gap = (ssize_t)
    548 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    549 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    550 		sas->ss_flags |= SS_ONSTACK;
    551 	else
    552 		sas->ss_flags &= ~SS_ONSTACK;
    553 
    554 	/* Restore signal mask. */
    555 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    556 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    557 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    558 	return EJUSTRETURN;
    559 }
    560 
    561 #ifdef USER_LDT
    562 
    563 int
    564 linux_read_ldt(l, uap, retval)
    565 	struct lwp *l;
    566 	struct linux_sys_modify_ldt_args /* {
    567 		syscallarg(int) func;
    568 		syscallarg(void *) ptr;
    569 		syscallarg(size_t) bytecount;
    570 	} */ *uap;
    571 	register_t *retval;
    572 {
    573 	struct proc *p = l->l_proc;
    574 	struct i386_get_ldt_args gl;
    575 	int error;
    576 	caddr_t sg;
    577 	char *parms;
    578 
    579 	DPRINTF(("linux_read_ldt!"));
    580 	sg = stackgap_init(p, 0);
    581 
    582 	gl.start = 0;
    583 	gl.desc = SCARG(uap, ptr);
    584 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    585 
    586 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    587 
    588 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    589 		return (error);
    590 
    591 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    592 		return (error);
    593 
    594 	*retval *= sizeof(union descriptor);
    595 	return (0);
    596 }
    597 
    598 struct linux_ldt_info {
    599 	u_int entry_number;
    600 	u_long base_addr;
    601 	u_int limit;
    602 	u_int seg_32bit:1;
    603 	u_int contents:2;
    604 	u_int read_exec_only:1;
    605 	u_int limit_in_pages:1;
    606 	u_int seg_not_present:1;
    607 	u_int useable:1;
    608 };
    609 
    610 int
    611 linux_write_ldt(l, uap, retval)
    612 	struct lwp *l;
    613 	struct linux_sys_modify_ldt_args /* {
    614 		syscallarg(int) func;
    615 		syscallarg(void *) ptr;
    616 		syscallarg(size_t) bytecount;
    617 	} */ *uap;
    618 	register_t *retval;
    619 {
    620 	struct proc *p = l->l_proc;
    621 	struct linux_ldt_info ldt_info;
    622 	struct segment_descriptor sd;
    623 	struct i386_set_ldt_args sl;
    624 	int error;
    625 	caddr_t sg;
    626 	char *parms;
    627 	int oldmode = (int)retval[0];
    628 
    629 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    630 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    631 		return (EINVAL);
    632 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    633 		return error;
    634 	if (ldt_info.entry_number >= 8192)
    635 		return (EINVAL);
    636 	if (ldt_info.contents == 3) {
    637 		if (oldmode)
    638 			return (EINVAL);
    639 		if (ldt_info.seg_not_present)
    640 			return (EINVAL);
    641 	}
    642 
    643 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    644 	    (oldmode || (ldt_info.contents == 0 &&
    645 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    646 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    647 	    ldt_info.useable == 0))) {
    648 		/* this means you should zero the ldt */
    649 		(void)memset(&sd, 0, sizeof(sd));
    650 	} else {
    651 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    652 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    653 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    654 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    655 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    656 		    (!ldt_info.read_exec_only << 1);
    657 		sd.sd_dpl = SEL_UPL;
    658 		sd.sd_p = !ldt_info.seg_not_present;
    659 		sd.sd_def32 = ldt_info.seg_32bit;
    660 		sd.sd_gran = ldt_info.limit_in_pages;
    661 		if (!oldmode)
    662 			sd.sd_xx = ldt_info.useable;
    663 		else
    664 			sd.sd_xx = 0;
    665 	}
    666 	sg = stackgap_init(p, 0);
    667 	sl.start = ldt_info.entry_number;
    668 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    669 	sl.num = 1;
    670 
    671 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    672 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    673 
    674 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    675 
    676 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    677 		return (error);
    678 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    679 		return (error);
    680 
    681 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    682 		return (error);
    683 
    684 	*retval = 0;
    685 	return (0);
    686 }
    687 
    688 #endif /* USER_LDT */
    689 
    690 int
    691 linux_sys_modify_ldt(l, v, retval)
    692 	struct lwp *l;
    693 	void *v;
    694 	register_t *retval;
    695 {
    696 	struct linux_sys_modify_ldt_args /* {
    697 		syscallarg(int) func;
    698 		syscallarg(void *) ptr;
    699 		syscallarg(size_t) bytecount;
    700 	} */ *uap = v;
    701 
    702 	switch (SCARG(uap, func)) {
    703 #ifdef USER_LDT
    704 	case 0:
    705 		return linux_read_ldt(l, uap, retval);
    706 	case 1:
    707 		retval[0] = 1;
    708 		return linux_write_ldt(l, uap, retval);
    709 	case 2:
    710 #ifdef notyet
    711 		return (linux_read_default_ldt(l, uap, retval);
    712 #else
    713 		return (ENOSYS);
    714 #endif
    715 	case 0x11:
    716 		retval[0] = 0;
    717 		return linux_write_ldt(l, uap, retval);
    718 #endif /* USER_LDT */
    719 
    720 	default:
    721 		return (ENOSYS);
    722 	}
    723 }
    724 
    725 /*
    726  * XXX Pathetic hack to make svgalib work. This will fake the major
    727  * device number of an opened VT so that svgalib likes it. grmbl.
    728  * Should probably do it 'wrong the right way' and use a mapping
    729  * array for all major device numbers, and map linux_mknod too.
    730  */
    731 dev_t
    732 linux_fakedev(dev, raw)
    733 	dev_t dev;
    734 	int raw;
    735 {
    736 	if (raw) {
    737 #if (NWSDISPLAY > 0)
    738 		extern const struct cdevsw wsdisplay_cdevsw;
    739 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    740 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    741 #endif
    742 	}
    743 
    744 	return dev;
    745 }
    746 
    747 #if (NWSDISPLAY > 0)
    748 /*
    749  * That's not complete, but enough to get an X server running.
    750  */
    751 #define NR_KEYS 128
    752 static const u_short plain_map[NR_KEYS] = {
    753 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    754 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    755 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    756 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    757 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    758 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    759 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    760 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    761 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    762 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    763 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    764 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    765 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    766 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    767 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    768 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    769 }, shift_map[NR_KEYS] = {
    770 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    771 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    772 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    773 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    774 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    775 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    776 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    777 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    778 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    779 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    780 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    781 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    782 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    783 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    784 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    785 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    786 }, altgr_map[NR_KEYS] = {
    787 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    788 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    789 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    790 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    791 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    792 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    793 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    794 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    795 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    796 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    797 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    798 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    799 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    800 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    801 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    802 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    803 }, ctrl_map[NR_KEYS] = {
    804 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    805 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    806 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    807 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    808 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    809 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    810 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    811 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    812 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    813 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    814 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    815 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    816 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    817 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    818 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    819 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    820 };
    821 
    822 const u_short * const linux_keytabs[] = {
    823 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    824 };
    825 #endif
    826 
    827 static struct biosdisk_info *
    828 fd2biosinfo(p, fp)
    829 	struct proc *p;
    830 	struct file *fp;
    831 {
    832 	struct vnode *vp;
    833 	const char *blkname;
    834 	char diskname[16];
    835 	int i;
    836 	struct nativedisk_info *nip;
    837 	struct disklist *dl = i386_alldisks;
    838 
    839 	if (fp->f_type != DTYPE_VNODE)
    840 		return NULL;
    841 	vp = (struct vnode *)fp->f_data;
    842 
    843 	if (vp->v_type != VBLK)
    844 		return NULL;
    845 
    846 	blkname = devsw_blk2name(major(vp->v_rdev));
    847 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    848 	    DISKUNIT(vp->v_rdev));
    849 
    850 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    851 		nip = &dl->dl_nativedisks[i];
    852 		if (strcmp(diskname, nip->ni_devname))
    853 			continue;
    854 		if (nip->ni_nmatches != 0)
    855 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    856 	}
    857 
    858 	return NULL;
    859 }
    860 
    861 
    862 /*
    863  * We come here in a last attempt to satisfy a Linux ioctl() call
    864  */
    865 int
    866 linux_machdepioctl(p, v, retval)
    867 	struct proc *p;
    868 	void *v;
    869 	register_t *retval;
    870 {
    871 	struct linux_sys_ioctl_args /* {
    872 		syscallarg(int) fd;
    873 		syscallarg(u_long) com;
    874 		syscallarg(caddr_t) data;
    875 	} */ *uap = v;
    876 	struct sys_ioctl_args bia;
    877 	u_long com;
    878 	int error, error1;
    879 #if (NWSDISPLAY > 0)
    880 	struct vt_mode lvt;
    881 	caddr_t bvtp, sg;
    882 	struct kbentry kbe;
    883 #endif
    884 	struct linux_hd_geometry hdg;
    885 	struct linux_hd_big_geometry hdg_big;
    886 	struct biosdisk_info *bip;
    887 	struct filedesc *fdp;
    888 	struct file *fp;
    889 	int fd;
    890 	struct disklabel label, *labp;
    891 	struct partinfo partp;
    892 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
    893 	u_long start, biostotal, realtotal;
    894 	u_char heads, sectors;
    895 	u_int cylinders;
    896 	struct ioctl_pt pt;
    897 
    898 	fd = SCARG(uap, fd);
    899 	SCARG(&bia, fd) = fd;
    900 	SCARG(&bia, data) = SCARG(uap, data);
    901 	com = SCARG(uap, com);
    902 
    903 	fdp = p->p_fd;
    904 
    905 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    906 		return (EBADF);
    907 
    908 	FILE_USE(fp);
    909 
    910 	switch (com) {
    911 #if (NWSDISPLAY > 0)
    912 	case LINUX_KDGKBMODE:
    913 		com = KDGKBMODE;
    914 		break;
    915 	case LINUX_KDSKBMODE:
    916 		com = KDSKBMODE;
    917 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    918 			SCARG(&bia, data) = (caddr_t)K_RAW;
    919 		break;
    920 	case LINUX_KIOCSOUND:
    921 		SCARG(&bia, data) =
    922 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    923 		/* fall through */
    924 	case LINUX_KDMKTONE:
    925 		com = KDMKTONE;
    926 		break;
    927 	case LINUX_KDSETMODE:
    928 		com = KDSETMODE;
    929 		break;
    930 	case LINUX_KDGETMODE:
    931 		/* KD_* values are equal to the wscons numbers */
    932 		com = WSDISPLAYIO_GMODE;
    933 		break;
    934 	case LINUX_KDENABIO:
    935 		com = KDENABIO;
    936 		break;
    937 	case LINUX_KDDISABIO:
    938 		com = KDDISABIO;
    939 		break;
    940 	case LINUX_KDGETLED:
    941 		com = KDGETLED;
    942 		break;
    943 	case LINUX_KDSETLED:
    944 		com = KDSETLED;
    945 		break;
    946 	case LINUX_VT_OPENQRY:
    947 		com = VT_OPENQRY;
    948 		break;
    949 	case LINUX_VT_GETMODE:
    950 		SCARG(&bia, com) = VT_GETMODE;
    951 		/* XXX NJWLWP */
    952 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    953 			goto out;
    954 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    955 		    sizeof (struct vt_mode))))
    956 			goto out;
    957 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    958 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    959 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    960 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    961 		    sizeof (struct vt_mode));
    962 		goto out;
    963 	case LINUX_VT_SETMODE:
    964 		com = VT_SETMODE;
    965 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    966 		    sizeof (struct vt_mode))))
    967 			goto out;
    968 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    969 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    970 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    971 		sg = stackgap_init(p, 0);
    972 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    973 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    974 			goto out;
    975 		SCARG(&bia, data) = bvtp;
    976 		break;
    977 	case LINUX_VT_DISALLOCATE:
    978 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    979 		error = 0;
    980 		goto out;
    981 	case LINUX_VT_RELDISP:
    982 		com = VT_RELDISP;
    983 		break;
    984 	case LINUX_VT_ACTIVATE:
    985 		com = VT_ACTIVATE;
    986 		break;
    987 	case LINUX_VT_WAITACTIVE:
    988 		com = VT_WAITACTIVE;
    989 		break;
    990 	case LINUX_VT_GETSTATE:
    991 		com = VT_GETSTATE;
    992 		break;
    993 	case LINUX_KDGKBTYPE:
    994 	    {
    995 		static const u_int8_t kb101 = KB_101;
    996 
    997 		/* This is what Linux does. */
    998 		error = copyout(&kb101, SCARG(uap, data), 1);
    999 		goto out;
   1000 	    }
   1001 	case LINUX_KDGKBENT:
   1002 		/*
   1003 		 * The Linux KDGKBENT ioctl is different from the
   1004 		 * SYSV original. So we handle it in machdep code.
   1005 		 * XXX We should use keyboard mapping information
   1006 		 * from wsdisplay, but this would be expensive.
   1007 		 */
   1008 		if ((error = copyin(SCARG(uap, data), &kbe,
   1009 				    sizeof(struct kbentry))))
   1010 			goto out;
   1011 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1012 		    || kbe.kb_index >= NR_KEYS) {
   1013 			error = EINVAL;
   1014 			goto out;
   1015 		}
   1016 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1017 		error = copyout(&kbe, SCARG(uap, data),
   1018 				sizeof(struct kbentry));
   1019 		goto out;
   1020 #endif
   1021 	case LINUX_HDIO_GETGEO:
   1022 	case LINUX_HDIO_GETGEO_BIG:
   1023 		/*
   1024 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1025 		 * if possible (extending its # of cylinders if it's beyond
   1026 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1027 		 * the real geometry) if not found, by returning an
   1028 		 * error. See common/linux_hdio.c
   1029 		 */
   1030 		bip = fd2biosinfo(p, fp);
   1031 		ioctlf = fp->f_ops->fo_ioctl;
   1032 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
   1033 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
   1034 		if (error != 0 && error1 != 0) {
   1035 			error = error1;
   1036 			goto out;
   1037 		}
   1038 		labp = error != 0 ? &label : partp.disklab;
   1039 		start = error1 != 0 ? partp.part->p_offset : 0;
   1040 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1041 		    && bip->bi_cyl != 0) {
   1042 			heads = bip->bi_head;
   1043 			sectors = bip->bi_sec;
   1044 			cylinders = bip->bi_cyl;
   1045 			biostotal = heads * sectors * cylinders;
   1046 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1047 			    labp->d_ncylinders;
   1048 			if (realtotal > biostotal)
   1049 				cylinders = realtotal / (heads * sectors);
   1050 		} else {
   1051 			heads = labp->d_ntracks;
   1052 			cylinders = labp->d_ncylinders;
   1053 			sectors = labp->d_nsectors;
   1054 		}
   1055 		if (com == LINUX_HDIO_GETGEO) {
   1056 			hdg.start = start;
   1057 			hdg.heads = heads;
   1058 			hdg.cylinders = cylinders;
   1059 			hdg.sectors = sectors;
   1060 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1061 			goto out;
   1062 		} else {
   1063 			hdg_big.start = start;
   1064 			hdg_big.heads = heads;
   1065 			hdg_big.cylinders = cylinders;
   1066 			hdg_big.sectors = sectors;
   1067 			error = copyout(&hdg_big, SCARG(uap, data),
   1068 			    sizeof hdg_big);
   1069 			goto out;
   1070 		}
   1071 
   1072 	default:
   1073 		/*
   1074 		 * Unknown to us. If it's on a device, just pass it through
   1075 		 * using PTIOCLINUX, the device itself might be able to
   1076 		 * make some sense of it.
   1077 		 * XXX hack: if the function returns EJUSTRETURN,
   1078 		 * it has stuffed a sysctl return value in pt.data.
   1079 		 */
   1080 		FILE_USE(fp);
   1081 		ioctlf = fp->f_ops->fo_ioctl;
   1082 		pt.com = SCARG(uap, com);
   1083 		pt.data = SCARG(uap, data);
   1084 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
   1085 		FILE_UNUSE(fp, p);
   1086 		if (error == EJUSTRETURN) {
   1087 			retval[0] = (register_t)pt.data;
   1088 			error = 0;
   1089 		}
   1090 
   1091 		if (error == ENOTTY)
   1092 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1093 			    com));
   1094 		goto out;
   1095 	}
   1096 	SCARG(&bia, com) = com;
   1097 	/* XXX NJWLWP */
   1098 	error = sys_ioctl(curlwp, &bia, retval);
   1099 out:
   1100 	FILE_UNUSE(fp ,p);
   1101 	return error;
   1102 }
   1103 
   1104 /*
   1105  * Set I/O permissions for a process. Just set the maximum level
   1106  * right away (ignoring the argument), otherwise we would have
   1107  * to rely on I/O permission maps, which are not implemented.
   1108  */
   1109 int
   1110 linux_sys_iopl(l, v, retval)
   1111 	struct lwp *l;
   1112 	void *v;
   1113 	register_t *retval;
   1114 {
   1115 #if 0
   1116 	struct linux_sys_iopl_args /* {
   1117 		syscallarg(int) level;
   1118 	} */ *uap = v;
   1119 #endif
   1120 	struct proc *p = l->l_proc;
   1121 	struct trapframe *fp = l->l_md.md_regs;
   1122 
   1123 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1124 		return EPERM;
   1125 	fp->tf_eflags |= PSL_IOPL;
   1126 	*retval = 0;
   1127 	return 0;
   1128 }
   1129 
   1130 /*
   1131  * See above. If a root process tries to set access to an I/O port,
   1132  * just let it have the whole range.
   1133  */
   1134 int
   1135 linux_sys_ioperm(l, v, retval)
   1136 	struct lwp *l;
   1137 	void *v;
   1138 	register_t *retval;
   1139 {
   1140 	struct linux_sys_ioperm_args /* {
   1141 		syscallarg(unsigned int) lo;
   1142 		syscallarg(unsigned int) hi;
   1143 		syscallarg(int) val;
   1144 	} */ *uap = v;
   1145 	struct proc *p = l->l_proc;
   1146 	struct trapframe *fp = l->l_md.md_regs;
   1147 
   1148 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1149 		return EPERM;
   1150 	if (SCARG(uap, val))
   1151 		fp->tf_eflags |= PSL_IOPL;
   1152 	*retval = 0;
   1153 	return 0;
   1154 }
   1155 
   1156 int
   1157 linux_exec_setup_stack(struct proc *p, struct exec_package *epp)
   1158 {
   1159 	u_long max_stack_size;
   1160 	u_long access_linear_min, access_size;
   1161 	u_long noaccess_linear_min, noaccess_size;
   1162 
   1163 #ifndef	USRSTACK32
   1164 #define USRSTACK32	(0x00000000ffffffffL&~PGOFSET)
   1165 #endif
   1166 
   1167 	if (epp->ep_flags & EXEC_32) {
   1168 		epp->ep_minsaddr = USRSTACK32;
   1169 		max_stack_size = MAXSSIZ;
   1170 	} else {
   1171 		epp->ep_minsaddr = USRSTACK;
   1172 		max_stack_size = MAXSSIZ;
   1173 	}
   1174 
   1175 	if (epp->ep_minsaddr > LINUX_USRSTACK)
   1176 		epp->ep_minsaddr = LINUX_USRSTACK;
   1177 #ifdef DEBUG_LINUX
   1178 	else {
   1179 		/*
   1180 		 * Someone needs to make KERNBASE and TEXTADDR
   1181 		 * java versions < 1.4.2 need the stack to be
   1182 		 * at 0xC0000000
   1183 		 */
   1184 		uprintf("Cannot setup stack to 0xC0000000, "
   1185 		    "java will not work properly\n");
   1186 	}
   1187 #endif
   1188 	epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
   1189 		max_stack_size);
   1190 	epp->ep_ssize = p->p_rlimit[RLIMIT_STACK].rlim_cur;
   1191 
   1192 	/*
   1193 	 * set up commands for stack.  note that this takes *two*, one to
   1194 	 * map the part of the stack which we can access, and one to map
   1195 	 * the part which we can't.
   1196 	 *
   1197 	 * arguably, it could be made into one, but that would require the
   1198 	 * addition of another mapping proc, which is unnecessary
   1199 	 */
   1200 	access_size = epp->ep_ssize;
   1201 	access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
   1202 	noaccess_size = max_stack_size - access_size;
   1203 	noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
   1204 	    access_size), noaccess_size);
   1205 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
   1206 	    noaccess_linear_min, NULLVP, 0, VM_PROT_NONE);
   1207 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
   1208 	    access_linear_min, NULLVP, 0,
   1209 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
   1210 
   1211 	return 0;
   1212 }
   1213