Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.96
      1 /*	$NetBSD: linux_machdep.c,v 1.96 2003/09/06 22:09:21 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.96 2003/09/06 22:09:21 christos Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 
     81 #include <compat/linux/linux_syscallargs.h>
     82 
     83 #include <machine/cpu.h>
     84 #include <machine/cpufunc.h>
     85 #include <machine/psl.h>
     86 #include <machine/reg.h>
     87 #include <machine/segments.h>
     88 #include <machine/specialreg.h>
     89 #include <machine/sysarch.h>
     90 #include <machine/vm86.h>
     91 #include <machine/vmparam.h>
     92 
     93 /*
     94  * To see whether wscons is configured (for virtual console ioctl calls).
     95  */
     96 #if defined(_KERNEL_OPT)
     97 #include "wsdisplay.h"
     98 #endif
     99 #if (NWSDISPLAY > 0)
    100 #include <dev/wscons/wsconsio.h>
    101 #include <dev/wscons/wsdisplay_usl_io.h>
    102 #if defined(_KERNEL_OPT)
    103 #include "opt_xserver.h"
    104 #endif
    105 #endif
    106 
    107 #ifdef USER_LDT
    108 #include <machine/cpu.h>
    109 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    110     register_t *));
    111 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    112     register_t *));
    113 #endif
    114 
    115 #ifdef DEBUG_LINUX
    116 #define DPRINTF(a) uprintf a
    117 #else
    118 #define DPRINTF(a)
    119 #endif
    120 
    121 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    122 extern struct disklist *i386_alldisks;
    123 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    124     sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    125 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    126     sigset_t *, struct linux_sigcontext *));
    127 static int linux_restore_sigcontext __P((struct lwp *,
    128     struct linux_sigcontext *, register_t *));
    129 static void linux_rt_sendsig __P((int, sigset_t *, u_long));
    130 static void linux_old_sendsig __P((int, sigset_t *, u_long));
    131 
    132 extern char linux_sigcode[], linux_rt_sigcode[];
    133 /*
    134  * Deal with some i386-specific things in the Linux emulation code.
    135  */
    136 
    137 void
    138 linux_setregs(l, epp, stack)
    139 	struct lwp *l;
    140 	struct exec_package *epp;
    141 	u_long stack;
    142 {
    143 	struct pcb *pcb = &l->l_addr->u_pcb;
    144 	struct trapframe *tf;
    145 
    146 #if NNPX > 0
    147 	/* If we were using the FPU, forget about it. */
    148 	if (npxproc == l)
    149 		npxdrop();
    150 #endif
    151 
    152 #ifdef USER_LDT
    153 	pmap_ldt_cleanup(l);
    154 #endif
    155 
    156 	l->l_md.md_flags &= ~MDP_USEDFPU;
    157 
    158 	if (i386_use_fxsave) {
    159 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    160 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    161 	} else
    162 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    163 
    164 	tf = l->l_md.md_regs;
    165 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    166 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    167 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    168 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_edi = 0;
    170 	tf->tf_esi = 0;
    171 	tf->tf_ebp = 0;
    172 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    173 	tf->tf_edx = 0;
    174 	tf->tf_ecx = 0;
    175 	tf->tf_eax = 0;
    176 	tf->tf_eip = epp->ep_entry;
    177 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    178 	tf->tf_eflags = PSL_USERSET;
    179 	tf->tf_esp = stack;
    180 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    181 }
    182 
    183 /*
    184  * Send an interrupt to process.
    185  *
    186  * Stack is set up to allow sigcode stored
    187  * in u. to call routine, followed by kcall
    188  * to sigreturn routine below.  After sigreturn
    189  * resets the signal mask, the stack, and the
    190  * frame pointer, it returns to the user
    191  * specified pc, psl.
    192  */
    193 
    194 void
    195 linux_sendsig(ksiginfo_t *ksi, sigset_t *mask)
    196 {
    197 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    198 		linux_rt_sendsig(ksi->ksi_signo, mask, ksi->ksi_trap);
    199 	else
    200 		linux_old_sendsig(ksi->ksi_signo, mask, ksi->ksi_trap);
    201 }
    202 
    203 
    204 static void
    205 linux_save_ucontext(l, tf, mask, sas, uc)
    206 	struct lwp *l;
    207 	struct trapframe *tf;
    208 	sigset_t *mask;
    209 	struct sigaltstack *sas;
    210 	struct linux_ucontext *uc;
    211 {
    212 	uc->uc_flags = 0;
    213 	uc->uc_link = NULL;
    214 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    215 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    216 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    217 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    218 }
    219 
    220 static void
    221 linux_save_sigcontext(l, tf, mask, sc)
    222 	struct lwp *l;
    223 	struct trapframe *tf;
    224 	sigset_t *mask;
    225 	struct linux_sigcontext *sc;
    226 {
    227 	/* Save register context. */
    228 #ifdef VM86
    229 	if (tf->tf_eflags & PSL_VM) {
    230 		sc->sc_gs = tf->tf_vm86_gs;
    231 		sc->sc_fs = tf->tf_vm86_fs;
    232 		sc->sc_es = tf->tf_vm86_es;
    233 		sc->sc_ds = tf->tf_vm86_ds;
    234 		sc->sc_eflags = get_vflags(l);
    235 	} else
    236 #endif
    237 	{
    238 		sc->sc_gs = tf->tf_gs;
    239 		sc->sc_fs = tf->tf_fs;
    240 		sc->sc_es = tf->tf_es;
    241 		sc->sc_ds = tf->tf_ds;
    242 		sc->sc_eflags = tf->tf_eflags;
    243 	}
    244 	sc->sc_edi = tf->tf_edi;
    245 	sc->sc_esi = tf->tf_esi;
    246 	sc->sc_esp = tf->tf_esp;
    247 	sc->sc_ebp = tf->tf_ebp;
    248 	sc->sc_ebx = tf->tf_ebx;
    249 	sc->sc_edx = tf->tf_edx;
    250 	sc->sc_ecx = tf->tf_ecx;
    251 	sc->sc_eax = tf->tf_eax;
    252 	sc->sc_eip = tf->tf_eip;
    253 	sc->sc_cs = tf->tf_cs;
    254 	sc->sc_esp_at_signal = tf->tf_esp;
    255 	sc->sc_ss = tf->tf_ss;
    256 	sc->sc_err = tf->tf_err;
    257 	sc->sc_trapno = tf->tf_trapno;
    258 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    259 	sc->sc_387 = NULL;
    260 
    261 	/* Save signal stack. */
    262 	/* Linux doesn't save the onstack flag in sigframe */
    263 
    264 	/* Save signal mask. */
    265 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    266 }
    267 
    268 static void
    269 linux_rt_sendsig(sig, mask, code)
    270 	int sig;
    271 	sigset_t *mask;
    272 	u_long code;
    273 {
    274 	struct lwp *l = curlwp;
    275 	struct proc *p = l->l_proc;
    276 	struct trapframe *tf;
    277 	struct linux_rt_sigframe *fp, frame;
    278 	int onstack;
    279 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    280 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    281 
    282 	tf = l->l_md.md_regs;
    283 	/* Do we need to jump onto the signal stack? */
    284 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    285 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    286 
    287 
    288 	/* Allocate space for the signal handler context. */
    289 	if (onstack)
    290 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    291 		    sas->ss_size);
    292 	else
    293 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    294 	fp--;
    295 
    296 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    297 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    298 
    299 	/* Build stack frame for signal trampoline. */
    300 	frame.sf_handler = catcher;
    301 	frame.sf_sig = native_to_linux_signo[sig];
    302 	frame.sf_sip = &fp->sf_si;
    303 	frame.sf_ucp = &fp->sf_uc;
    304 
    305 	(void)memset(&frame.sf_si, 0, sizeof(frame.sf_si));
    306 	/*
    307 	 * XXX: We'll fake bit of it here, all of the following
    308 	 * info is a bit bogus, because we don't have the
    309 	 * right info passed to us from the trap.
    310 	 */
    311 	switch (frame.sf_si.lsi_signo = frame.sf_sig) {
    312 	case LINUX_SIGSEGV:
    313 		frame.sf_si.lsi_code = LINUX_SEGV_MAPERR;
    314 		break;
    315 	case LINUX_SIGBUS:
    316 		frame.sf_si.lsi_code = LINUX_BUS_ADRERR;
    317 		break;
    318 	case LINUX_SIGTRAP:
    319 		frame.sf_si.lsi_code = LINUX_TRAP_BRKPT;
    320 		break;
    321 	case LINUX_SIGCHLD:
    322 	case LINUX_SIGIO:
    323 	default:
    324 		frame.sf_si.lsi_signo = 0;
    325 		break;
    326 	}
    327 
    328 	/* Save register context. */
    329 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    330 
    331 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    332 		/*
    333 		 * Process has trashed its stack; give it an illegal
    334 		 * instruction to halt it in its tracks.
    335 		 */
    336 		sigexit(l, SIGILL);
    337 		/* NOTREACHED */
    338 	}
    339 
    340 	/*
    341 	 * Build context to run handler in.
    342 	 */
    343 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    344 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    345 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    346 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    347 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    348 	    (linux_rt_sigcode - linux_sigcode);
    349 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    350 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    351 	tf->tf_esp = (int)fp;
    352 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    353 
    354 	/* Remember that we're now on the signal stack. */
    355 	if (onstack)
    356 		sas->ss_flags |= SS_ONSTACK;
    357 }
    358 
    359 static void
    360 linux_old_sendsig(sig, mask, code)
    361 	int sig;
    362 	sigset_t *mask;
    363 	u_long code;
    364 {
    365 	struct lwp *l = curlwp;
    366 	struct proc *p = l->l_proc;
    367 	struct trapframe *tf;
    368 	struct linux_sigframe *fp, frame;
    369 	int onstack;
    370 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    371 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    372 
    373 	tf = l->l_md.md_regs;
    374 
    375 	/* Do we need to jump onto the signal stack? */
    376 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    377 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    378 
    379 	/* Allocate space for the signal handler context. */
    380 	if (onstack)
    381 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    382 		    sas->ss_size);
    383 	else
    384 		fp = (struct linux_sigframe *)tf->tf_esp;
    385 	fp--;
    386 
    387 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    388 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    389 
    390 	/* Build stack frame for signal trampoline. */
    391 	frame.sf_handler = catcher;
    392 	frame.sf_sig = native_to_linux_signo[sig];
    393 
    394 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    395 
    396 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    397 		/*
    398 		 * Process has trashed its stack; give it an illegal
    399 		 * instruction to halt it in its tracks.
    400 		 */
    401 		sigexit(l, SIGILL);
    402 		/* NOTREACHED */
    403 	}
    404 
    405 	/*
    406 	 * Build context to run handler in.
    407 	 */
    408 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    409 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    410 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    411 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    412 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    413 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    414 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    415 	tf->tf_esp = (int)fp;
    416 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    417 
    418 	/* Remember that we're now on the signal stack. */
    419 	if (onstack)
    420 		sas->ss_flags |= SS_ONSTACK;
    421 }
    422 
    423 /*
    424  * System call to cleanup state after a signal
    425  * has been taken.  Reset signal mask and
    426  * stack state from context left by sendsig (above).
    427  * Return to previous pc and psl as specified by
    428  * context left by sendsig. Check carefully to
    429  * make sure that the user has not modified the
    430  * psl to gain improper privileges or to cause
    431  * a machine fault.
    432  */
    433 int
    434 linux_sys_rt_sigreturn(l, v, retval)
    435 	struct lwp *l;
    436 	void *v;
    437 	register_t *retval;
    438 {
    439 	struct linux_sys_rt_sigreturn_args /* {
    440 		syscallarg(struct linux_ucontext *) ucp;
    441 	} */ *uap = v;
    442 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    443 	int error;
    444 
    445 	/*
    446 	 * The trampoline code hands us the context.
    447 	 * It is unsafe to keep track of it ourselves, in the event that a
    448 	 * program jumps out of a signal handler.
    449 	 */
    450 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    451 		return error;
    452 
    453 	/* XXX XAX we can do better here by using more of the ucontext */
    454 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    455 }
    456 
    457 int
    458 linux_sys_sigreturn(l, v, retval)
    459 	struct lwp *l;
    460 	void *v;
    461 	register_t *retval;
    462 {
    463 	struct linux_sys_sigreturn_args /* {
    464 		syscallarg(struct linux_sigcontext *) scp;
    465 	} */ *uap = v;
    466 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    467 	int error;
    468 
    469 	/*
    470 	 * The trampoline code hands us the context.
    471 	 * It is unsafe to keep track of it ourselves, in the event that a
    472 	 * program jumps out of a signal handler.
    473 	 */
    474 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    475 		return error;
    476 	return linux_restore_sigcontext(l, &context, retval);
    477 }
    478 
    479 static int
    480 linux_restore_sigcontext(l, scp, retval)
    481 	struct lwp *l;
    482 	struct linux_sigcontext *scp;
    483 	register_t *retval;
    484 {
    485 	struct proc *p = l->l_proc;
    486 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    487 	struct trapframe *tf;
    488 	sigset_t mask;
    489 	ssize_t ss_gap;
    490 	/* Restore register context. */
    491 	tf = l->l_md.md_regs;
    492 
    493 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    494 #ifdef VM86
    495 	if (scp->sc_eflags & PSL_VM) {
    496 		void syscall_vm86 __P((struct trapframe *));
    497 
    498 		tf->tf_vm86_gs = scp->sc_gs;
    499 		tf->tf_vm86_fs = scp->sc_fs;
    500 		tf->tf_vm86_es = scp->sc_es;
    501 		tf->tf_vm86_ds = scp->sc_ds;
    502 		set_vflags(l, scp->sc_eflags);
    503 		p->p_md.md_syscall = syscall_vm86;
    504 	} else
    505 #endif
    506 	{
    507 		/*
    508 		 * Check for security violations.  If we're returning to
    509 		 * protected mode, the CPU will validate the segment registers
    510 		 * automatically and generate a trap on violations.  We handle
    511 		 * the trap, rather than doing all of the checking here.
    512 		 */
    513 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    514 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    515 			return EINVAL;
    516 
    517 		tf->tf_gs = scp->sc_gs;
    518 		tf->tf_fs = scp->sc_fs;
    519 		tf->tf_es = scp->sc_es;
    520 		tf->tf_ds = scp->sc_ds;
    521 #ifdef VM86
    522 		if (tf->tf_eflags & PSL_VM)
    523 			(*p->p_emul->e_syscall_intern)(p);
    524 #endif
    525 		tf->tf_eflags = scp->sc_eflags;
    526 	}
    527 	tf->tf_edi = scp->sc_edi;
    528 	tf->tf_esi = scp->sc_esi;
    529 	tf->tf_ebp = scp->sc_ebp;
    530 	tf->tf_ebx = scp->sc_ebx;
    531 	tf->tf_edx = scp->sc_edx;
    532 	tf->tf_ecx = scp->sc_ecx;
    533 	tf->tf_eax = scp->sc_eax;
    534 	tf->tf_eip = scp->sc_eip;
    535 	tf->tf_cs = scp->sc_cs;
    536 	tf->tf_esp = scp->sc_esp_at_signal;
    537 	tf->tf_ss = scp->sc_ss;
    538 
    539 	/* Restore signal stack. */
    540 	/*
    541 	 * Linux really does it this way; it doesn't have space in sigframe
    542 	 * to save the onstack flag.
    543 	 */
    544 	ss_gap = (ssize_t)
    545 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    546 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    547 		sas->ss_flags |= SS_ONSTACK;
    548 	else
    549 		sas->ss_flags &= ~SS_ONSTACK;
    550 
    551 	/* Restore signal mask. */
    552 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    553 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    554 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    555 	return EJUSTRETURN;
    556 }
    557 
    558 #ifdef USER_LDT
    559 
    560 int
    561 linux_read_ldt(l, uap, retval)
    562 	struct lwp *l;
    563 	struct linux_sys_modify_ldt_args /* {
    564 		syscallarg(int) func;
    565 		syscallarg(void *) ptr;
    566 		syscallarg(size_t) bytecount;
    567 	} */ *uap;
    568 	register_t *retval;
    569 {
    570 	struct proc *p = l->l_proc;
    571 	struct i386_get_ldt_args gl;
    572 	int error;
    573 	caddr_t sg;
    574 	char *parms;
    575 
    576 	DPRINTF(("linux_read_ldt!"));
    577 	sg = stackgap_init(p, 0);
    578 
    579 	gl.start = 0;
    580 	gl.desc = SCARG(uap, ptr);
    581 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    582 
    583 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    584 
    585 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    586 		return (error);
    587 
    588 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    589 		return (error);
    590 
    591 	*retval *= sizeof(union descriptor);
    592 	return (0);
    593 }
    594 
    595 struct linux_ldt_info {
    596 	u_int entry_number;
    597 	u_long base_addr;
    598 	u_int limit;
    599 	u_int seg_32bit:1;
    600 	u_int contents:2;
    601 	u_int read_exec_only:1;
    602 	u_int limit_in_pages:1;
    603 	u_int seg_not_present:1;
    604 	u_int useable:1;
    605 };
    606 
    607 int
    608 linux_write_ldt(l, uap, retval)
    609 	struct lwp *l;
    610 	struct linux_sys_modify_ldt_args /* {
    611 		syscallarg(int) func;
    612 		syscallarg(void *) ptr;
    613 		syscallarg(size_t) bytecount;
    614 	} */ *uap;
    615 	register_t *retval;
    616 {
    617 	struct proc *p = l->l_proc;
    618 	struct linux_ldt_info ldt_info;
    619 	struct segment_descriptor sd;
    620 	struct i386_set_ldt_args sl;
    621 	int error;
    622 	caddr_t sg;
    623 	char *parms;
    624 	int oldmode = (int)retval[0];
    625 
    626 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    627 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    628 		return (EINVAL);
    629 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    630 		return error;
    631 	if (ldt_info.entry_number >= 8192)
    632 		return (EINVAL);
    633 	if (ldt_info.contents == 3) {
    634 		if (oldmode)
    635 			return (EINVAL);
    636 		if (ldt_info.seg_not_present)
    637 			return (EINVAL);
    638 	}
    639 
    640 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    641 	    (oldmode || (ldt_info.contents == 0 &&
    642 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    643 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    644 	    ldt_info.useable == 0))) {
    645 		/* this means you should zero the ldt */
    646 		(void)memset(&sd, 0, sizeof(sd));
    647 	} else {
    648 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    649 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    650 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    651 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    652 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    653 		    (!ldt_info.read_exec_only << 1);
    654 		sd.sd_dpl = SEL_UPL;
    655 		sd.sd_p = !ldt_info.seg_not_present;
    656 		sd.sd_def32 = ldt_info.seg_32bit;
    657 		sd.sd_gran = ldt_info.limit_in_pages;
    658 		if (!oldmode)
    659 			sd.sd_xx = ldt_info.useable;
    660 		else
    661 			sd.sd_xx = 0;
    662 	}
    663 	sg = stackgap_init(p, 0);
    664 	sl.start = ldt_info.entry_number;
    665 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    666 	sl.num = 1;
    667 
    668 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    669 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    670 
    671 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    672 
    673 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    674 		return (error);
    675 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    676 		return (error);
    677 
    678 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    679 		return (error);
    680 
    681 	*retval = 0;
    682 	return (0);
    683 }
    684 
    685 #endif /* USER_LDT */
    686 
    687 int
    688 linux_sys_modify_ldt(l, v, retval)
    689 	struct lwp *l;
    690 	void *v;
    691 	register_t *retval;
    692 {
    693 	struct linux_sys_modify_ldt_args /* {
    694 		syscallarg(int) func;
    695 		syscallarg(void *) ptr;
    696 		syscallarg(size_t) bytecount;
    697 	} */ *uap = v;
    698 
    699 	switch (SCARG(uap, func)) {
    700 #ifdef USER_LDT
    701 	case 0:
    702 		return linux_read_ldt(l, uap, retval);
    703 	case 1:
    704 		retval[0] = 1;
    705 		return linux_write_ldt(l, uap, retval);
    706 	case 2:
    707 #ifdef notyet
    708 		return (linux_read_default_ldt(l, uap, retval);
    709 #else
    710 		return (ENOSYS);
    711 #endif
    712 	case 0x11:
    713 		retval[0] = 0;
    714 		return linux_write_ldt(l, uap, retval);
    715 #endif /* USER_LDT */
    716 
    717 	default:
    718 		return (ENOSYS);
    719 	}
    720 }
    721 
    722 /*
    723  * XXX Pathetic hack to make svgalib work. This will fake the major
    724  * device number of an opened VT so that svgalib likes it. grmbl.
    725  * Should probably do it 'wrong the right way' and use a mapping
    726  * array for all major device numbers, and map linux_mknod too.
    727  */
    728 dev_t
    729 linux_fakedev(dev, raw)
    730 	dev_t dev;
    731 	int raw;
    732 {
    733 	if (raw) {
    734 #if (NWSDISPLAY > 0)
    735 		extern const struct cdevsw wsdisplay_cdevsw;
    736 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    737 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    738 #endif
    739 	}
    740 
    741 	return dev;
    742 }
    743 
    744 #if (NWSDISPLAY > 0)
    745 /*
    746  * That's not complete, but enough to get an X server running.
    747  */
    748 #define NR_KEYS 128
    749 static const u_short plain_map[NR_KEYS] = {
    750 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    751 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    752 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    753 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    754 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    755 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    756 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    757 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    758 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    759 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    760 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    761 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    762 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    763 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    764 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    765 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    766 }, shift_map[NR_KEYS] = {
    767 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    768 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    769 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    770 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    771 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    772 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    773 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    774 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    775 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    776 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    777 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    778 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    779 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    780 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    781 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    782 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    783 }, altgr_map[NR_KEYS] = {
    784 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    785 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    786 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    787 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    788 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    789 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    790 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    791 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    792 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    793 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    794 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    795 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    796 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    797 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    798 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    799 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    800 }, ctrl_map[NR_KEYS] = {
    801 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    802 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    803 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    804 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    805 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    806 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    807 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    808 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    809 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    810 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    811 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    812 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    813 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    814 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    815 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    816 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    817 };
    818 
    819 const u_short * const linux_keytabs[] = {
    820 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    821 };
    822 #endif
    823 
    824 static struct biosdisk_info *
    825 fd2biosinfo(p, fp)
    826 	struct proc *p;
    827 	struct file *fp;
    828 {
    829 	struct vnode *vp;
    830 	const char *blkname;
    831 	char diskname[16];
    832 	int i;
    833 	struct nativedisk_info *nip;
    834 	struct disklist *dl = i386_alldisks;
    835 
    836 	if (fp->f_type != DTYPE_VNODE)
    837 		return NULL;
    838 	vp = (struct vnode *)fp->f_data;
    839 
    840 	if (vp->v_type != VBLK)
    841 		return NULL;
    842 
    843 	blkname = devsw_blk2name(major(vp->v_rdev));
    844 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    845 	    DISKUNIT(vp->v_rdev));
    846 
    847 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    848 		nip = &dl->dl_nativedisks[i];
    849 		if (strcmp(diskname, nip->ni_devname))
    850 			continue;
    851 		if (nip->ni_nmatches != 0)
    852 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    853 	}
    854 
    855 	return NULL;
    856 }
    857 
    858 
    859 /*
    860  * We come here in a last attempt to satisfy a Linux ioctl() call
    861  */
    862 int
    863 linux_machdepioctl(p, v, retval)
    864 	struct proc *p;
    865 	void *v;
    866 	register_t *retval;
    867 {
    868 	struct linux_sys_ioctl_args /* {
    869 		syscallarg(int) fd;
    870 		syscallarg(u_long) com;
    871 		syscallarg(caddr_t) data;
    872 	} */ *uap = v;
    873 	struct sys_ioctl_args bia;
    874 	u_long com;
    875 	int error, error1;
    876 #if (NWSDISPLAY > 0)
    877 	struct vt_mode lvt;
    878 	caddr_t bvtp, sg;
    879 	struct kbentry kbe;
    880 #endif
    881 	struct linux_hd_geometry hdg;
    882 	struct linux_hd_big_geometry hdg_big;
    883 	struct biosdisk_info *bip;
    884 	struct filedesc *fdp;
    885 	struct file *fp;
    886 	int fd;
    887 	struct disklabel label, *labp;
    888 	struct partinfo partp;
    889 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
    890 	u_long start, biostotal, realtotal;
    891 	u_char heads, sectors;
    892 	u_int cylinders;
    893 	struct ioctl_pt pt;
    894 
    895 	fd = SCARG(uap, fd);
    896 	SCARG(&bia, fd) = fd;
    897 	SCARG(&bia, data) = SCARG(uap, data);
    898 	com = SCARG(uap, com);
    899 
    900 	fdp = p->p_fd;
    901 
    902 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    903 		return (EBADF);
    904 
    905 	FILE_USE(fp);
    906 
    907 	switch (com) {
    908 #if (NWSDISPLAY > 0)
    909 	case LINUX_KDGKBMODE:
    910 		com = KDGKBMODE;
    911 		break;
    912 	case LINUX_KDSKBMODE:
    913 		com = KDSKBMODE;
    914 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    915 			SCARG(&bia, data) = (caddr_t)K_RAW;
    916 		break;
    917 	case LINUX_KIOCSOUND:
    918 		SCARG(&bia, data) =
    919 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    920 		/* fall through */
    921 	case LINUX_KDMKTONE:
    922 		com = KDMKTONE;
    923 		break;
    924 	case LINUX_KDSETMODE:
    925 		com = KDSETMODE;
    926 		break;
    927 	case LINUX_KDGETMODE:
    928 		/* KD_* values are equal to the wscons numbers */
    929 		com = WSDISPLAYIO_GMODE;
    930 		break;
    931 	case LINUX_KDENABIO:
    932 		com = KDENABIO;
    933 		break;
    934 	case LINUX_KDDISABIO:
    935 		com = KDDISABIO;
    936 		break;
    937 	case LINUX_KDGETLED:
    938 		com = KDGETLED;
    939 		break;
    940 	case LINUX_KDSETLED:
    941 		com = KDSETLED;
    942 		break;
    943 	case LINUX_VT_OPENQRY:
    944 		com = VT_OPENQRY;
    945 		break;
    946 	case LINUX_VT_GETMODE:
    947 		SCARG(&bia, com) = VT_GETMODE;
    948 		/* XXX NJWLWP */
    949 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    950 			goto out;
    951 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    952 		    sizeof (struct vt_mode))))
    953 			goto out;
    954 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    955 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    956 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    957 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    958 		    sizeof (struct vt_mode));
    959 		goto out;
    960 	case LINUX_VT_SETMODE:
    961 		com = VT_SETMODE;
    962 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    963 		    sizeof (struct vt_mode))))
    964 			goto out;
    965 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    966 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    967 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    968 		sg = stackgap_init(p, 0);
    969 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    970 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    971 			goto out;
    972 		SCARG(&bia, data) = bvtp;
    973 		break;
    974 	case LINUX_VT_DISALLOCATE:
    975 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    976 		error = 0;
    977 		goto out;
    978 	case LINUX_VT_RELDISP:
    979 		com = VT_RELDISP;
    980 		break;
    981 	case LINUX_VT_ACTIVATE:
    982 		com = VT_ACTIVATE;
    983 		break;
    984 	case LINUX_VT_WAITACTIVE:
    985 		com = VT_WAITACTIVE;
    986 		break;
    987 	case LINUX_VT_GETSTATE:
    988 		com = VT_GETSTATE;
    989 		break;
    990 	case LINUX_KDGKBTYPE:
    991 	    {
    992 		static const u_int8_t kb101 = KB_101;
    993 
    994 		/* This is what Linux does. */
    995 		error = copyout(&kb101, SCARG(uap, data), 1);
    996 		goto out;
    997 	    }
    998 	case LINUX_KDGKBENT:
    999 		/*
   1000 		 * The Linux KDGKBENT ioctl is different from the
   1001 		 * SYSV original. So we handle it in machdep code.
   1002 		 * XXX We should use keyboard mapping information
   1003 		 * from wsdisplay, but this would be expensive.
   1004 		 */
   1005 		if ((error = copyin(SCARG(uap, data), &kbe,
   1006 				    sizeof(struct kbentry))))
   1007 			goto out;
   1008 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1009 		    || kbe.kb_index >= NR_KEYS) {
   1010 			error = EINVAL;
   1011 			goto out;
   1012 		}
   1013 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1014 		error = copyout(&kbe, SCARG(uap, data),
   1015 				sizeof(struct kbentry));
   1016 		goto out;
   1017 #endif
   1018 	case LINUX_HDIO_GETGEO:
   1019 	case LINUX_HDIO_GETGEO_BIG:
   1020 		/*
   1021 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1022 		 * if possible (extending its # of cylinders if it's beyond
   1023 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1024 		 * the real geometry) if not found, by returning an
   1025 		 * error. See common/linux_hdio.c
   1026 		 */
   1027 		bip = fd2biosinfo(p, fp);
   1028 		ioctlf = fp->f_ops->fo_ioctl;
   1029 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
   1030 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
   1031 		if (error != 0 && error1 != 0) {
   1032 			error = error1;
   1033 			goto out;
   1034 		}
   1035 		labp = error != 0 ? &label : partp.disklab;
   1036 		start = error1 != 0 ? partp.part->p_offset : 0;
   1037 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1038 		    && bip->bi_cyl != 0) {
   1039 			heads = bip->bi_head;
   1040 			sectors = bip->bi_sec;
   1041 			cylinders = bip->bi_cyl;
   1042 			biostotal = heads * sectors * cylinders;
   1043 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1044 			    labp->d_ncylinders;
   1045 			if (realtotal > biostotal)
   1046 				cylinders = realtotal / (heads * sectors);
   1047 		} else {
   1048 			heads = labp->d_ntracks;
   1049 			cylinders = labp->d_ncylinders;
   1050 			sectors = labp->d_nsectors;
   1051 		}
   1052 		if (com == LINUX_HDIO_GETGEO) {
   1053 			hdg.start = start;
   1054 			hdg.heads = heads;
   1055 			hdg.cylinders = cylinders;
   1056 			hdg.sectors = sectors;
   1057 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1058 			goto out;
   1059 		} else {
   1060 			hdg_big.start = start;
   1061 			hdg_big.heads = heads;
   1062 			hdg_big.cylinders = cylinders;
   1063 			hdg_big.sectors = sectors;
   1064 			error = copyout(&hdg_big, SCARG(uap, data),
   1065 			    sizeof hdg_big);
   1066 			goto out;
   1067 		}
   1068 
   1069 	default:
   1070 		/*
   1071 		 * Unknown to us. If it's on a device, just pass it through
   1072 		 * using PTIOCLINUX, the device itself might be able to
   1073 		 * make some sense of it.
   1074 		 * XXX hack: if the function returns EJUSTRETURN,
   1075 		 * it has stuffed a sysctl return value in pt.data.
   1076 		 */
   1077 		FILE_USE(fp);
   1078 		ioctlf = fp->f_ops->fo_ioctl;
   1079 		pt.com = SCARG(uap, com);
   1080 		pt.data = SCARG(uap, data);
   1081 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
   1082 		FILE_UNUSE(fp, p);
   1083 		if (error == EJUSTRETURN) {
   1084 			retval[0] = (register_t)pt.data;
   1085 			error = 0;
   1086 		}
   1087 
   1088 		if (error == ENOTTY)
   1089 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1090 			    com));
   1091 		goto out;
   1092 	}
   1093 	SCARG(&bia, com) = com;
   1094 	/* XXX NJWLWP */
   1095 	error = sys_ioctl(curlwp, &bia, retval);
   1096 out:
   1097 	FILE_UNUSE(fp ,p);
   1098 	return error;
   1099 }
   1100 
   1101 /*
   1102  * Set I/O permissions for a process. Just set the maximum level
   1103  * right away (ignoring the argument), otherwise we would have
   1104  * to rely on I/O permission maps, which are not implemented.
   1105  */
   1106 int
   1107 linux_sys_iopl(l, v, retval)
   1108 	struct lwp *l;
   1109 	void *v;
   1110 	register_t *retval;
   1111 {
   1112 #if 0
   1113 	struct linux_sys_iopl_args /* {
   1114 		syscallarg(int) level;
   1115 	} */ *uap = v;
   1116 #endif
   1117 	struct proc *p = l->l_proc;
   1118 	struct trapframe *fp = l->l_md.md_regs;
   1119 
   1120 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1121 		return EPERM;
   1122 	fp->tf_eflags |= PSL_IOPL;
   1123 	*retval = 0;
   1124 	return 0;
   1125 }
   1126 
   1127 /*
   1128  * See above. If a root process tries to set access to an I/O port,
   1129  * just let it have the whole range.
   1130  */
   1131 int
   1132 linux_sys_ioperm(l, v, retval)
   1133 	struct lwp *l;
   1134 	void *v;
   1135 	register_t *retval;
   1136 {
   1137 	struct linux_sys_ioperm_args /* {
   1138 		syscallarg(unsigned int) lo;
   1139 		syscallarg(unsigned int) hi;
   1140 		syscallarg(int) val;
   1141 	} */ *uap = v;
   1142 	struct proc *p = l->l_proc;
   1143 	struct trapframe *fp = l->l_md.md_regs;
   1144 
   1145 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1146 		return EPERM;
   1147 	if (SCARG(uap, val))
   1148 		fp->tf_eflags |= PSL_IOPL;
   1149 	*retval = 0;
   1150 	return 0;
   1151 }
   1152 
   1153 int
   1154 linux_exec_setup_stack(struct proc *p, struct exec_package *epp)
   1155 {
   1156 	u_long max_stack_size;
   1157 	u_long access_linear_min, access_size;
   1158 	u_long noaccess_linear_min, noaccess_size;
   1159 
   1160 #ifndef	USRSTACK32
   1161 #define USRSTACK32	(0x00000000ffffffffL&~PGOFSET)
   1162 #endif
   1163 
   1164 	if (epp->ep_flags & EXEC_32) {
   1165 		epp->ep_minsaddr = USRSTACK32;
   1166 		max_stack_size = MAXSSIZ;
   1167 	} else {
   1168 		epp->ep_minsaddr = USRSTACK;
   1169 		max_stack_size = MAXSSIZ;
   1170 	}
   1171 
   1172 	if (epp->ep_minsaddr > LINUX_USRSTACK)
   1173 		epp->ep_minsaddr = LINUX_USRSTACK;
   1174 #ifdef DEBUG_LINUX
   1175 	else {
   1176 		/*
   1177 		 * Someone needs to make KERNBASE and TEXTADDR
   1178 		 * java versions < 1.4.2 need the stack to be
   1179 		 * at 0xC0000000
   1180 		 */
   1181 		uprintf("Cannot setup stack to 0xC0000000, "
   1182 		    "java will not work properly\n");
   1183 	}
   1184 #endif
   1185 	epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
   1186 		max_stack_size);
   1187 	epp->ep_ssize = p->p_rlimit[RLIMIT_STACK].rlim_cur;
   1188 
   1189 	/*
   1190 	 * set up commands for stack.  note that this takes *two*, one to
   1191 	 * map the part of the stack which we can access, and one to map
   1192 	 * the part which we can't.
   1193 	 *
   1194 	 * arguably, it could be made into one, but that would require the
   1195 	 * addition of another mapping proc, which is unnecessary
   1196 	 */
   1197 	access_size = epp->ep_ssize;
   1198 	access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
   1199 	noaccess_size = max_stack_size - access_size;
   1200 	noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
   1201 	    access_size), noaccess_size);
   1202 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
   1203 	    noaccess_linear_min, NULLVP, 0, VM_PROT_NONE);
   1204 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
   1205 	    access_linear_min, NULLVP, 0,
   1206 	    VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
   1207 
   1208 	return 0;
   1209 }
   1210