Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.100
      1 /*	$NetBSD: linux_machdep.c,v 1.100 2003/10/06 03:45:40 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.100 2003/10/06 03:45:40 christos Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/sa.h>
     66 #include <sys/syscallargs.h>
     67 #include <sys/filedesc.h>
     68 #include <sys/exec_elf.h>
     69 #include <sys/disklabel.h>
     70 #include <sys/ioctl.h>
     71 #include <miscfs/specfs/specdev.h>
     72 
     73 #include <compat/linux/common/linux_types.h>
     74 #include <compat/linux/common/linux_signal.h>
     75 #include <compat/linux/common/linux_util.h>
     76 #include <compat/linux/common/linux_ioctl.h>
     77 #include <compat/linux/common/linux_hdio.h>
     78 #include <compat/linux/common/linux_exec.h>
     79 #include <compat/linux/common/linux_machdep.h>
     80 #include <compat/linux/common/linux_errno.h>
     81 
     82 #include <compat/linux/linux_syscallargs.h>
     83 
     84 #include <machine/cpu.h>
     85 #include <machine/cpufunc.h>
     86 #include <machine/psl.h>
     87 #include <machine/reg.h>
     88 #include <machine/segments.h>
     89 #include <machine/specialreg.h>
     90 #include <machine/sysarch.h>
     91 #include <machine/vm86.h>
     92 #include <machine/vmparam.h>
     93 
     94 /*
     95  * To see whether wscons is configured (for virtual console ioctl calls).
     96  */
     97 #if defined(_KERNEL_OPT)
     98 #include "wsdisplay.h"
     99 #endif
    100 #if (NWSDISPLAY > 0)
    101 #include <dev/wscons/wsconsio.h>
    102 #include <dev/wscons/wsdisplay_usl_io.h>
    103 #if defined(_KERNEL_OPT)
    104 #include "opt_xserver.h"
    105 #endif
    106 #endif
    107 
    108 #ifdef USER_LDT
    109 #include <machine/cpu.h>
    110 int linux_read_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    111     register_t *));
    112 int linux_write_ldt __P((struct lwp *, struct linux_sys_modify_ldt_args *,
    113     register_t *));
    114 #endif
    115 
    116 #ifdef DEBUG_LINUX
    117 #define DPRINTF(a) uprintf a
    118 #else
    119 #define DPRINTF(a)
    120 #endif
    121 
    122 static struct biosdisk_info *fd2biosinfo __P((struct proc *, struct file *));
    123 extern struct disklist *i386_alldisks;
    124 static void linux_save_ucontext __P((struct lwp *, struct trapframe *,
    125     const sigset_t *, struct sigaltstack *, struct linux_ucontext *));
    126 static void linux_save_sigcontext __P((struct lwp *, struct trapframe *,
    127     const sigset_t *, struct linux_sigcontext *));
    128 static int linux_restore_sigcontext __P((struct lwp *,
    129     struct linux_sigcontext *, register_t *));
    130 static void linux_rt_sendsig __P((const ksiginfo_t *, const sigset_t *));
    131 static void linux_old_sendsig __P((const ksiginfo_t *, const sigset_t *));
    132 
    133 extern char linux_sigcode[], linux_rt_sigcode[];
    134 /*
    135  * Deal with some i386-specific things in the Linux emulation code.
    136  */
    137 
    138 void
    139 linux_setregs(l, epp, stack)
    140 	struct lwp *l;
    141 	struct exec_package *epp;
    142 	u_long stack;
    143 {
    144 	struct pcb *pcb = &l->l_addr->u_pcb;
    145 	struct trapframe *tf;
    146 
    147 #if NNPX > 0
    148 	/* If we were using the FPU, forget about it. */
    149 	if (npxproc == l)
    150 		npxdrop();
    151 #endif
    152 
    153 #ifdef USER_LDT
    154 	pmap_ldt_cleanup(l);
    155 #endif
    156 
    157 	l->l_md.md_flags &= ~MDP_USEDFPU;
    158 
    159 	if (i386_use_fxsave) {
    160 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    161 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    162 	} else
    163 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    164 
    165 	tf = l->l_md.md_regs;
    166 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    167 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    168 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    169 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    170 	tf->tf_edi = 0;
    171 	tf->tf_esi = 0;
    172 	tf->tf_ebp = 0;
    173 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    174 	tf->tf_edx = 0;
    175 	tf->tf_ecx = 0;
    176 	tf->tf_eax = 0;
    177 	tf->tf_eip = epp->ep_entry;
    178 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    179 	tf->tf_eflags = PSL_USERSET;
    180 	tf->tf_esp = stack;
    181 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    182 }
    183 
    184 /*
    185  * Send an interrupt to process.
    186  *
    187  * Stack is set up to allow sigcode stored
    188  * in u. to call routine, followed by kcall
    189  * to sigreturn routine below.  After sigreturn
    190  * resets the signal mask, the stack, and the
    191  * frame pointer, it returns to the user
    192  * specified pc, psl.
    193  */
    194 
    195 void
    196 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    197 {
    198 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    199 		linux_rt_sendsig(ksi, mask);
    200 	else
    201 		linux_old_sendsig(ksi, mask);
    202 }
    203 
    204 
    205 static void
    206 linux_save_ucontext(l, tf, mask, sas, uc)
    207 	struct lwp *l;
    208 	struct trapframe *tf;
    209 	const sigset_t *mask;
    210 	struct sigaltstack *sas;
    211 	struct linux_ucontext *uc;
    212 {
    213 	uc->uc_flags = 0;
    214 	uc->uc_link = NULL;
    215 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    216 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    217 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    218 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    219 }
    220 
    221 static void
    222 linux_save_sigcontext(l, tf, mask, sc)
    223 	struct lwp *l;
    224 	struct trapframe *tf;
    225 	const sigset_t *mask;
    226 	struct linux_sigcontext *sc;
    227 {
    228 	/* Save register context. */
    229 #ifdef VM86
    230 	if (tf->tf_eflags & PSL_VM) {
    231 		sc->sc_gs = tf->tf_vm86_gs;
    232 		sc->sc_fs = tf->tf_vm86_fs;
    233 		sc->sc_es = tf->tf_vm86_es;
    234 		sc->sc_ds = tf->tf_vm86_ds;
    235 		sc->sc_eflags = get_vflags(l);
    236 	} else
    237 #endif
    238 	{
    239 		sc->sc_gs = tf->tf_gs;
    240 		sc->sc_fs = tf->tf_fs;
    241 		sc->sc_es = tf->tf_es;
    242 		sc->sc_ds = tf->tf_ds;
    243 		sc->sc_eflags = tf->tf_eflags;
    244 	}
    245 	sc->sc_edi = tf->tf_edi;
    246 	sc->sc_esi = tf->tf_esi;
    247 	sc->sc_esp = tf->tf_esp;
    248 	sc->sc_ebp = tf->tf_ebp;
    249 	sc->sc_ebx = tf->tf_ebx;
    250 	sc->sc_edx = tf->tf_edx;
    251 	sc->sc_ecx = tf->tf_ecx;
    252 	sc->sc_eax = tf->tf_eax;
    253 	sc->sc_eip = tf->tf_eip;
    254 	sc->sc_cs = tf->tf_cs;
    255 	sc->sc_esp_at_signal = tf->tf_esp;
    256 	sc->sc_ss = tf->tf_ss;
    257 	sc->sc_err = tf->tf_err;
    258 	sc->sc_trapno = tf->tf_trapno;
    259 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    260 	sc->sc_387 = NULL;
    261 
    262 	/* Save signal stack. */
    263 	/* Linux doesn't save the onstack flag in sigframe */
    264 
    265 	/* Save signal mask. */
    266 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    267 }
    268 
    269 static void
    270 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    271 {
    272 	struct lwp *l = curlwp;
    273 	struct proc *p = l->l_proc;
    274 	struct trapframe *tf;
    275 	struct linux_rt_sigframe *fp, frame;
    276 	int onstack;
    277 	linux_siginfo_t *lsi;
    278 	int sig = ksi->ksi_signo;
    279 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    280 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    281 
    282 	tf = l->l_md.md_regs;
    283 	/* Do we need to jump onto the signal stack? */
    284 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    285 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    286 
    287 
    288 	/* Allocate space for the signal handler context. */
    289 	if (onstack)
    290 		fp = (struct linux_rt_sigframe *)((caddr_t)sas->ss_sp +
    291 		    sas->ss_size);
    292 	else
    293 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    294 	fp--;
    295 
    296 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    297 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    298 
    299 	/* Build stack frame for signal trampoline. */
    300 	frame.sf_handler = catcher;
    301 	frame.sf_sig = native_to_linux_signo[sig];
    302 	frame.sf_sip = &fp->sf_si;
    303 	frame.sf_ucp = &fp->sf_uc;
    304 
    305 	/*
    306 	 * XXX: the following code assumes that the constants for
    307 	 * siginfo are the same between linux and NetBSD.
    308 	 */
    309 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    310 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    311 	lsi->lsi_code = ksi->ksi_code;
    312 	switch (lsi->lsi_signo = frame.sf_sig) {
    313 	case LINUX_SIGILL:
    314 	case LINUX_SIGFPE:
    315 	case LINUX_SIGSEGV:
    316 	case LINUX_SIGBUS:
    317 	case LINUX_SIGTRAP:
    318 		lsi->lsi_addr = ksi->ksi_addr;
    319 		break;
    320 	case LINUX_SIGCHLD:
    321 		lsi->lsi_uid = ksi->ksi_uid;
    322 		lsi->lsi_pid = ksi->ksi_pid;
    323 		lsi->lsi_status = ksi->ksi_status;
    324 		lsi->lsi_utime = ksi->ksi_utime;
    325 		lsi->lsi_stime = ksi->ksi_stime;
    326 		break;
    327 	case LINUX_SIGIO:
    328 		lsi->lsi_band = ksi->ksi_band;
    329 		lsi->lsi_fd = ksi->ksi_fd;
    330 		break;
    331 	default:
    332 		lsi->lsi_uid = ksi->ksi_uid;
    333 		lsi->lsi_pid = ksi->ksi_pid;
    334 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    335 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    336 			lsi->lsi_value.sival_ptr = ksi->ksi_sigval.sival_ptr;
    337 		break;
    338 	}
    339 
    340 	/* Save register context. */
    341 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    342 
    343 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    344 		/*
    345 		 * Process has trashed its stack; give it an illegal
    346 		 * instruction to halt it in its tracks.
    347 		 */
    348 		sigexit(l, SIGILL);
    349 		/* NOTREACHED */
    350 	}
    351 
    352 	/*
    353 	 * Build context to run handler in.
    354 	 */
    355 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    356 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    357 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    358 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    359 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    360 	    (linux_rt_sigcode - linux_sigcode);
    361 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    362 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    363 	tf->tf_esp = (int)fp;
    364 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    365 
    366 	/* Remember that we're now on the signal stack. */
    367 	if (onstack)
    368 		sas->ss_flags |= SS_ONSTACK;
    369 }
    370 
    371 static void
    372 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    373 {
    374 	struct lwp *l = curlwp;
    375 	struct proc *p = l->l_proc;
    376 	struct trapframe *tf;
    377 	struct linux_sigframe *fp, frame;
    378 	int onstack;
    379 	int sig = ksi->ksi_signo;
    380 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    381 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    382 
    383 	tf = l->l_md.md_regs;
    384 
    385 	/* Do we need to jump onto the signal stack? */
    386 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    387 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    388 
    389 	/* Allocate space for the signal handler context. */
    390 	if (onstack)
    391 		fp = (struct linux_sigframe *) ((caddr_t)sas->ss_sp +
    392 		    sas->ss_size);
    393 	else
    394 		fp = (struct linux_sigframe *)tf->tf_esp;
    395 	fp--;
    396 
    397 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    398 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    399 
    400 	/* Build stack frame for signal trampoline. */
    401 	frame.sf_handler = catcher;
    402 	frame.sf_sig = native_to_linux_signo[sig];
    403 
    404 /*###404 [cc] warning: passing arg 3 of `linux_save_sigcontext' discards qualifiers from pointer target type%%%*/
    405 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    406 
    407 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
    408 		/*
    409 		 * Process has trashed its stack; give it an illegal
    410 		 * instruction to halt it in its tracks.
    411 		 */
    412 		sigexit(l, SIGILL);
    413 		/* NOTREACHED */
    414 	}
    415 
    416 	/*
    417 	 * Build context to run handler in.
    418 	 */
    419 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    420 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    421 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    422 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    423 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    424 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    425 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    426 	tf->tf_esp = (int)fp;
    427 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    428 
    429 	/* Remember that we're now on the signal stack. */
    430 	if (onstack)
    431 		sas->ss_flags |= SS_ONSTACK;
    432 }
    433 
    434 /*
    435  * System call to cleanup state after a signal
    436  * has been taken.  Reset signal mask and
    437  * stack state from context left by sendsig (above).
    438  * Return to previous pc and psl as specified by
    439  * context left by sendsig. Check carefully to
    440  * make sure that the user has not modified the
    441  * psl to gain improper privileges or to cause
    442  * a machine fault.
    443  */
    444 int
    445 linux_sys_rt_sigreturn(l, v, retval)
    446 	struct lwp *l;
    447 	void *v;
    448 	register_t *retval;
    449 {
    450 	struct linux_sys_rt_sigreturn_args /* {
    451 		syscallarg(struct linux_ucontext *) ucp;
    452 	} */ *uap = v;
    453 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    454 	int error;
    455 
    456 	/*
    457 	 * The trampoline code hands us the context.
    458 	 * It is unsafe to keep track of it ourselves, in the event that a
    459 	 * program jumps out of a signal handler.
    460 	 */
    461 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    462 		return error;
    463 
    464 	/* XXX XAX we can do better here by using more of the ucontext */
    465 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    466 }
    467 
    468 int
    469 linux_sys_sigreturn(l, v, retval)
    470 	struct lwp *l;
    471 	void *v;
    472 	register_t *retval;
    473 {
    474 	struct linux_sys_sigreturn_args /* {
    475 		syscallarg(struct linux_sigcontext *) scp;
    476 	} */ *uap = v;
    477 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    478 	int error;
    479 
    480 	/*
    481 	 * The trampoline code hands us the context.
    482 	 * It is unsafe to keep track of it ourselves, in the event that a
    483 	 * program jumps out of a signal handler.
    484 	 */
    485 	if ((error = copyin((caddr_t)scp, &context, sizeof(*scp))) != 0)
    486 		return error;
    487 	return linux_restore_sigcontext(l, &context, retval);
    488 }
    489 
    490 static int
    491 linux_restore_sigcontext(l, scp, retval)
    492 	struct lwp *l;
    493 	struct linux_sigcontext *scp;
    494 	register_t *retval;
    495 {
    496 	struct proc *p = l->l_proc;
    497 	struct sigaltstack *sas = &p->p_sigctx.ps_sigstk;
    498 	struct trapframe *tf;
    499 	sigset_t mask;
    500 	ssize_t ss_gap;
    501 	/* Restore register context. */
    502 	tf = l->l_md.md_regs;
    503 
    504 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    505 #ifdef VM86
    506 	if (scp->sc_eflags & PSL_VM) {
    507 		void syscall_vm86 __P((struct trapframe *));
    508 
    509 		tf->tf_vm86_gs = scp->sc_gs;
    510 		tf->tf_vm86_fs = scp->sc_fs;
    511 		tf->tf_vm86_es = scp->sc_es;
    512 		tf->tf_vm86_ds = scp->sc_ds;
    513 		set_vflags(l, scp->sc_eflags);
    514 		p->p_md.md_syscall = syscall_vm86;
    515 	} else
    516 #endif
    517 	{
    518 		/*
    519 		 * Check for security violations.  If we're returning to
    520 		 * protected mode, the CPU will validate the segment registers
    521 		 * automatically and generate a trap on violations.  We handle
    522 		 * the trap, rather than doing all of the checking here.
    523 		 */
    524 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    525 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    526 			return EINVAL;
    527 
    528 		tf->tf_gs = scp->sc_gs;
    529 		tf->tf_fs = scp->sc_fs;
    530 		tf->tf_es = scp->sc_es;
    531 		tf->tf_ds = scp->sc_ds;
    532 #ifdef VM86
    533 		if (tf->tf_eflags & PSL_VM)
    534 			(*p->p_emul->e_syscall_intern)(p);
    535 #endif
    536 		tf->tf_eflags = scp->sc_eflags;
    537 	}
    538 	tf->tf_edi = scp->sc_edi;
    539 	tf->tf_esi = scp->sc_esi;
    540 	tf->tf_ebp = scp->sc_ebp;
    541 	tf->tf_ebx = scp->sc_ebx;
    542 	tf->tf_edx = scp->sc_edx;
    543 	tf->tf_ecx = scp->sc_ecx;
    544 	tf->tf_eax = scp->sc_eax;
    545 	tf->tf_eip = scp->sc_eip;
    546 	tf->tf_cs = scp->sc_cs;
    547 	tf->tf_esp = scp->sc_esp_at_signal;
    548 	tf->tf_ss = scp->sc_ss;
    549 
    550 	/* Restore signal stack. */
    551 	/*
    552 	 * Linux really does it this way; it doesn't have space in sigframe
    553 	 * to save the onstack flag.
    554 	 */
    555 	ss_gap = (ssize_t)
    556 	    ((caddr_t) scp->sc_esp_at_signal - (caddr_t) sas->ss_sp);
    557 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    558 		sas->ss_flags |= SS_ONSTACK;
    559 	else
    560 		sas->ss_flags &= ~SS_ONSTACK;
    561 
    562 	/* Restore signal mask. */
    563 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    564 	(void) sigprocmask1(p, SIG_SETMASK, &mask, 0);
    565 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    566 	return EJUSTRETURN;
    567 }
    568 
    569 #ifdef USER_LDT
    570 
    571 int
    572 linux_read_ldt(l, uap, retval)
    573 	struct lwp *l;
    574 	struct linux_sys_modify_ldt_args /* {
    575 		syscallarg(int) func;
    576 		syscallarg(void *) ptr;
    577 		syscallarg(size_t) bytecount;
    578 	} */ *uap;
    579 	register_t *retval;
    580 {
    581 	struct proc *p = l->l_proc;
    582 	struct i386_get_ldt_args gl;
    583 	int error;
    584 	caddr_t sg;
    585 	char *parms;
    586 
    587 	DPRINTF(("linux_read_ldt!"));
    588 	sg = stackgap_init(p, 0);
    589 
    590 	gl.start = 0;
    591 	gl.desc = SCARG(uap, ptr);
    592 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    593 
    594 	parms = stackgap_alloc(p, &sg, sizeof(gl));
    595 
    596 	if ((error = copyout(&gl, parms, sizeof(gl))) != 0)
    597 		return (error);
    598 
    599 	if ((error = i386_get_ldt(l, parms, retval)) != 0)
    600 		return (error);
    601 
    602 	*retval *= sizeof(union descriptor);
    603 	return (0);
    604 }
    605 
    606 struct linux_ldt_info {
    607 	u_int entry_number;
    608 	u_long base_addr;
    609 	u_int limit;
    610 	u_int seg_32bit:1;
    611 	u_int contents:2;
    612 	u_int read_exec_only:1;
    613 	u_int limit_in_pages:1;
    614 	u_int seg_not_present:1;
    615 	u_int useable:1;
    616 };
    617 
    618 int
    619 linux_write_ldt(l, uap, retval)
    620 	struct lwp *l;
    621 	struct linux_sys_modify_ldt_args /* {
    622 		syscallarg(int) func;
    623 		syscallarg(void *) ptr;
    624 		syscallarg(size_t) bytecount;
    625 	} */ *uap;
    626 	register_t *retval;
    627 {
    628 	struct proc *p = l->l_proc;
    629 	struct linux_ldt_info ldt_info;
    630 	struct segment_descriptor sd;
    631 	struct i386_set_ldt_args sl;
    632 	int error;
    633 	caddr_t sg;
    634 	char *parms;
    635 	int oldmode = (int)retval[0];
    636 
    637 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    638 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    639 		return (EINVAL);
    640 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    641 		return error;
    642 	if (ldt_info.entry_number >= 8192)
    643 		return (EINVAL);
    644 	if (ldt_info.contents == 3) {
    645 		if (oldmode)
    646 			return (EINVAL);
    647 		if (ldt_info.seg_not_present)
    648 			return (EINVAL);
    649 	}
    650 
    651 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    652 	    (oldmode || (ldt_info.contents == 0 &&
    653 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    654 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    655 	    ldt_info.useable == 0))) {
    656 		/* this means you should zero the ldt */
    657 		(void)memset(&sd, 0, sizeof(sd));
    658 	} else {
    659 		sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    660 		sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    661 		sd.sd_lolimit = ldt_info.limit & 0xffff;
    662 		sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    663 		sd.sd_type = 16 | (ldt_info.contents << 2) |
    664 		    (!ldt_info.read_exec_only << 1);
    665 		sd.sd_dpl = SEL_UPL;
    666 		sd.sd_p = !ldt_info.seg_not_present;
    667 		sd.sd_def32 = ldt_info.seg_32bit;
    668 		sd.sd_gran = ldt_info.limit_in_pages;
    669 		if (!oldmode)
    670 			sd.sd_xx = ldt_info.useable;
    671 		else
    672 			sd.sd_xx = 0;
    673 	}
    674 	sg = stackgap_init(p, 0);
    675 	sl.start = ldt_info.entry_number;
    676 	sl.desc = stackgap_alloc(p, &sg, sizeof(sd));
    677 	sl.num = 1;
    678 
    679 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    680 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    681 
    682 	parms = stackgap_alloc(p, &sg, sizeof(sl));
    683 
    684 	if ((error = copyout(&sd, sl.desc, sizeof(sd))) != 0)
    685 		return (error);
    686 	if ((error = copyout(&sl, parms, sizeof(sl))) != 0)
    687 		return (error);
    688 
    689 	if ((error = i386_set_ldt(l, parms, retval)) != 0)
    690 		return (error);
    691 
    692 	*retval = 0;
    693 	return (0);
    694 }
    695 
    696 #endif /* USER_LDT */
    697 
    698 int
    699 linux_sys_modify_ldt(l, v, retval)
    700 	struct lwp *l;
    701 	void *v;
    702 	register_t *retval;
    703 {
    704 	struct linux_sys_modify_ldt_args /* {
    705 		syscallarg(int) func;
    706 		syscallarg(void *) ptr;
    707 		syscallarg(size_t) bytecount;
    708 	} */ *uap = v;
    709 
    710 	switch (SCARG(uap, func)) {
    711 #ifdef USER_LDT
    712 	case 0:
    713 		return linux_read_ldt(l, uap, retval);
    714 	case 1:
    715 		retval[0] = 1;
    716 		return linux_write_ldt(l, uap, retval);
    717 	case 2:
    718 #ifdef notyet
    719 		return (linux_read_default_ldt(l, uap, retval);
    720 #else
    721 		return (ENOSYS);
    722 #endif
    723 	case 0x11:
    724 		retval[0] = 0;
    725 		return linux_write_ldt(l, uap, retval);
    726 #endif /* USER_LDT */
    727 
    728 	default:
    729 		return (ENOSYS);
    730 	}
    731 }
    732 
    733 /*
    734  * XXX Pathetic hack to make svgalib work. This will fake the major
    735  * device number of an opened VT so that svgalib likes it. grmbl.
    736  * Should probably do it 'wrong the right way' and use a mapping
    737  * array for all major device numbers, and map linux_mknod too.
    738  */
    739 dev_t
    740 linux_fakedev(dev, raw)
    741 	dev_t dev;
    742 	int raw;
    743 {
    744 	if (raw) {
    745 #if (NWSDISPLAY > 0)
    746 		extern const struct cdevsw wsdisplay_cdevsw;
    747 		if (cdevsw_lookup(dev) == &wsdisplay_cdevsw)
    748 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    749 #endif
    750 	}
    751 
    752 	return dev;
    753 }
    754 
    755 #if (NWSDISPLAY > 0)
    756 /*
    757  * That's not complete, but enough to get an X server running.
    758  */
    759 #define NR_KEYS 128
    760 static const u_short plain_map[NR_KEYS] = {
    761 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    762 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    763 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    764 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    765 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    766 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    767 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    768 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    769 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    770 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    771 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    772 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    773 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    774 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    775 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    776 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    777 }, shift_map[NR_KEYS] = {
    778 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    779 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    780 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    781 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    782 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    783 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    784 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    785 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    786 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    787 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    788 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    789 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    790 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    791 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    792 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    793 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    794 }, altgr_map[NR_KEYS] = {
    795 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    796 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    797 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    798 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    799 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    800 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    801 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    802 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    803 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    804 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    805 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    806 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    807 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    808 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    809 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    810 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    811 }, ctrl_map[NR_KEYS] = {
    812 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    813 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    814 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    815 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    816 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    817 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    818 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    819 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    820 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    821 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    822 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    823 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    824 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    825 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    826 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    827 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    828 };
    829 
    830 const u_short * const linux_keytabs[] = {
    831 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    832 };
    833 #endif
    834 
    835 static struct biosdisk_info *
    836 fd2biosinfo(p, fp)
    837 	struct proc *p;
    838 	struct file *fp;
    839 {
    840 	struct vnode *vp;
    841 	const char *blkname;
    842 	char diskname[16];
    843 	int i;
    844 	struct nativedisk_info *nip;
    845 	struct disklist *dl = i386_alldisks;
    846 
    847 	if (fp->f_type != DTYPE_VNODE)
    848 		return NULL;
    849 	vp = (struct vnode *)fp->f_data;
    850 
    851 	if (vp->v_type != VBLK)
    852 		return NULL;
    853 
    854 	blkname = devsw_blk2name(major(vp->v_rdev));
    855 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    856 	    DISKUNIT(vp->v_rdev));
    857 
    858 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    859 		nip = &dl->dl_nativedisks[i];
    860 		if (strcmp(diskname, nip->ni_devname))
    861 			continue;
    862 		if (nip->ni_nmatches != 0)
    863 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    864 	}
    865 
    866 	return NULL;
    867 }
    868 
    869 
    870 /*
    871  * We come here in a last attempt to satisfy a Linux ioctl() call
    872  */
    873 int
    874 linux_machdepioctl(p, v, retval)
    875 	struct proc *p;
    876 	void *v;
    877 	register_t *retval;
    878 {
    879 	struct linux_sys_ioctl_args /* {
    880 		syscallarg(int) fd;
    881 		syscallarg(u_long) com;
    882 		syscallarg(caddr_t) data;
    883 	} */ *uap = v;
    884 	struct sys_ioctl_args bia;
    885 	u_long com;
    886 	int error, error1;
    887 #if (NWSDISPLAY > 0)
    888 	struct vt_mode lvt;
    889 	caddr_t bvtp, sg;
    890 	struct kbentry kbe;
    891 #endif
    892 	struct linux_hd_geometry hdg;
    893 	struct linux_hd_big_geometry hdg_big;
    894 	struct biosdisk_info *bip;
    895 	struct filedesc *fdp;
    896 	struct file *fp;
    897 	int fd;
    898 	struct disklabel label, *labp;
    899 	struct partinfo partp;
    900 	int (*ioctlf)(struct file *, u_long, void *, struct proc *);
    901 	u_long start, biostotal, realtotal;
    902 	u_char heads, sectors;
    903 	u_int cylinders;
    904 	struct ioctl_pt pt;
    905 
    906 	fd = SCARG(uap, fd);
    907 	SCARG(&bia, fd) = fd;
    908 	SCARG(&bia, data) = SCARG(uap, data);
    909 	com = SCARG(uap, com);
    910 
    911 	fdp = p->p_fd;
    912 
    913 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    914 		return (EBADF);
    915 
    916 	FILE_USE(fp);
    917 
    918 	switch (com) {
    919 #if (NWSDISPLAY > 0)
    920 	case LINUX_KDGKBMODE:
    921 		com = KDGKBMODE;
    922 		break;
    923 	case LINUX_KDSKBMODE:
    924 		com = KDSKBMODE;
    925 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    926 			SCARG(&bia, data) = (caddr_t)K_RAW;
    927 		break;
    928 	case LINUX_KIOCSOUND:
    929 		SCARG(&bia, data) =
    930 		    (caddr_t)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    931 		/* fall through */
    932 	case LINUX_KDMKTONE:
    933 		com = KDMKTONE;
    934 		break;
    935 	case LINUX_KDSETMODE:
    936 		com = KDSETMODE;
    937 		break;
    938 	case LINUX_KDGETMODE:
    939 		/* KD_* values are equal to the wscons numbers */
    940 		com = WSDISPLAYIO_GMODE;
    941 		break;
    942 	case LINUX_KDENABIO:
    943 		com = KDENABIO;
    944 		break;
    945 	case LINUX_KDDISABIO:
    946 		com = KDDISABIO;
    947 		break;
    948 	case LINUX_KDGETLED:
    949 		com = KDGETLED;
    950 		break;
    951 	case LINUX_KDSETLED:
    952 		com = KDSETLED;
    953 		break;
    954 	case LINUX_VT_OPENQRY:
    955 		com = VT_OPENQRY;
    956 		break;
    957 	case LINUX_VT_GETMODE:
    958 		SCARG(&bia, com) = VT_GETMODE;
    959 		/* XXX NJWLWP */
    960 		if ((error = sys_ioctl(curlwp, &bia, retval)))
    961 			goto out;
    962 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    963 		    sizeof (struct vt_mode))))
    964 			goto out;
    965 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    966 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    967 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    968 		error = copyout((caddr_t)&lvt, SCARG(uap, data),
    969 		    sizeof (struct vt_mode));
    970 		goto out;
    971 	case LINUX_VT_SETMODE:
    972 		com = VT_SETMODE;
    973 		if ((error = copyin(SCARG(uap, data), (caddr_t)&lvt,
    974 		    sizeof (struct vt_mode))))
    975 			goto out;
    976 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    977 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    978 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    979 		sg = stackgap_init(p, 0);
    980 		bvtp = stackgap_alloc(p, &sg, sizeof (struct vt_mode));
    981 		if ((error = copyout(&lvt, bvtp, sizeof (struct vt_mode))))
    982 			goto out;
    983 		SCARG(&bia, data) = bvtp;
    984 		break;
    985 	case LINUX_VT_DISALLOCATE:
    986 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    987 		error = 0;
    988 		goto out;
    989 	case LINUX_VT_RELDISP:
    990 		com = VT_RELDISP;
    991 		break;
    992 	case LINUX_VT_ACTIVATE:
    993 		com = VT_ACTIVATE;
    994 		break;
    995 	case LINUX_VT_WAITACTIVE:
    996 		com = VT_WAITACTIVE;
    997 		break;
    998 	case LINUX_VT_GETSTATE:
    999 		com = VT_GETSTATE;
   1000 		break;
   1001 	case LINUX_KDGKBTYPE:
   1002 	    {
   1003 		static const u_int8_t kb101 = KB_101;
   1004 
   1005 		/* This is what Linux does. */
   1006 		error = copyout(&kb101, SCARG(uap, data), 1);
   1007 		goto out;
   1008 	    }
   1009 	case LINUX_KDGKBENT:
   1010 		/*
   1011 		 * The Linux KDGKBENT ioctl is different from the
   1012 		 * SYSV original. So we handle it in machdep code.
   1013 		 * XXX We should use keyboard mapping information
   1014 		 * from wsdisplay, but this would be expensive.
   1015 		 */
   1016 		if ((error = copyin(SCARG(uap, data), &kbe,
   1017 				    sizeof(struct kbentry))))
   1018 			goto out;
   1019 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
   1020 		    || kbe.kb_index >= NR_KEYS) {
   1021 			error = EINVAL;
   1022 			goto out;
   1023 		}
   1024 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
   1025 		error = copyout(&kbe, SCARG(uap, data),
   1026 				sizeof(struct kbentry));
   1027 		goto out;
   1028 #endif
   1029 	case LINUX_HDIO_GETGEO:
   1030 	case LINUX_HDIO_GETGEO_BIG:
   1031 		/*
   1032 		 * Try to mimic Linux behaviour: return the BIOS geometry
   1033 		 * if possible (extending its # of cylinders if it's beyond
   1034 		 * the 1023 limit), fall back to the MI geometry (i.e.
   1035 		 * the real geometry) if not found, by returning an
   1036 		 * error. See common/linux_hdio.c
   1037 		 */
   1038 		bip = fd2biosinfo(p, fp);
   1039 		ioctlf = fp->f_ops->fo_ioctl;
   1040 		error = ioctlf(fp, DIOCGDEFLABEL, (caddr_t)&label, p);
   1041 		error1 = ioctlf(fp, DIOCGPART, (caddr_t)&partp, p);
   1042 		if (error != 0 && error1 != 0) {
   1043 			error = error1;
   1044 			goto out;
   1045 		}
   1046 		labp = error != 0 ? &label : partp.disklab;
   1047 		start = error1 != 0 ? partp.part->p_offset : 0;
   1048 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1049 		    && bip->bi_cyl != 0) {
   1050 			heads = bip->bi_head;
   1051 			sectors = bip->bi_sec;
   1052 			cylinders = bip->bi_cyl;
   1053 			biostotal = heads * sectors * cylinders;
   1054 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1055 			    labp->d_ncylinders;
   1056 			if (realtotal > biostotal)
   1057 				cylinders = realtotal / (heads * sectors);
   1058 		} else {
   1059 			heads = labp->d_ntracks;
   1060 			cylinders = labp->d_ncylinders;
   1061 			sectors = labp->d_nsectors;
   1062 		}
   1063 		if (com == LINUX_HDIO_GETGEO) {
   1064 			hdg.start = start;
   1065 			hdg.heads = heads;
   1066 			hdg.cylinders = cylinders;
   1067 			hdg.sectors = sectors;
   1068 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1069 			goto out;
   1070 		} else {
   1071 			hdg_big.start = start;
   1072 			hdg_big.heads = heads;
   1073 			hdg_big.cylinders = cylinders;
   1074 			hdg_big.sectors = sectors;
   1075 			error = copyout(&hdg_big, SCARG(uap, data),
   1076 			    sizeof hdg_big);
   1077 			goto out;
   1078 		}
   1079 
   1080 	default:
   1081 		/*
   1082 		 * Unknown to us. If it's on a device, just pass it through
   1083 		 * using PTIOCLINUX, the device itself might be able to
   1084 		 * make some sense of it.
   1085 		 * XXX hack: if the function returns EJUSTRETURN,
   1086 		 * it has stuffed a sysctl return value in pt.data.
   1087 		 */
   1088 		FILE_USE(fp);
   1089 		ioctlf = fp->f_ops->fo_ioctl;
   1090 		pt.com = SCARG(uap, com);
   1091 		pt.data = SCARG(uap, data);
   1092 		error = ioctlf(fp, PTIOCLINUX, (caddr_t)&pt, p);
   1093 		FILE_UNUSE(fp, p);
   1094 		if (error == EJUSTRETURN) {
   1095 			retval[0] = (register_t)pt.data;
   1096 			error = 0;
   1097 		}
   1098 
   1099 		if (error == ENOTTY)
   1100 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1101 			    com));
   1102 		goto out;
   1103 	}
   1104 	SCARG(&bia, com) = com;
   1105 	/* XXX NJWLWP */
   1106 	error = sys_ioctl(curlwp, &bia, retval);
   1107 out:
   1108 	FILE_UNUSE(fp ,p);
   1109 	return error;
   1110 }
   1111 
   1112 /*
   1113  * Set I/O permissions for a process. Just set the maximum level
   1114  * right away (ignoring the argument), otherwise we would have
   1115  * to rely on I/O permission maps, which are not implemented.
   1116  */
   1117 int
   1118 linux_sys_iopl(l, v, retval)
   1119 	struct lwp *l;
   1120 	void *v;
   1121 	register_t *retval;
   1122 {
   1123 #if 0
   1124 	struct linux_sys_iopl_args /* {
   1125 		syscallarg(int) level;
   1126 	} */ *uap = v;
   1127 #endif
   1128 	struct proc *p = l->l_proc;
   1129 	struct trapframe *fp = l->l_md.md_regs;
   1130 
   1131 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1132 		return EPERM;
   1133 	fp->tf_eflags |= PSL_IOPL;
   1134 	*retval = 0;
   1135 	return 0;
   1136 }
   1137 
   1138 /*
   1139  * See above. If a root process tries to set access to an I/O port,
   1140  * just let it have the whole range.
   1141  */
   1142 int
   1143 linux_sys_ioperm(l, v, retval)
   1144 	struct lwp *l;
   1145 	void *v;
   1146 	register_t *retval;
   1147 {
   1148 	struct linux_sys_ioperm_args /* {
   1149 		syscallarg(unsigned int) lo;
   1150 		syscallarg(unsigned int) hi;
   1151 		syscallarg(int) val;
   1152 	} */ *uap = v;
   1153 	struct proc *p = l->l_proc;
   1154 	struct trapframe *fp = l->l_md.md_regs;
   1155 
   1156 	if (suser(p->p_ucred, &p->p_acflag) != 0)
   1157 		return EPERM;
   1158 	if (SCARG(uap, val))
   1159 		fp->tf_eflags |= PSL_IOPL;
   1160 	*retval = 0;
   1161 	return 0;
   1162 }
   1163 
   1164 int
   1165 linux_exec_setup_stack(struct proc *p, struct exec_package *epp)
   1166 {
   1167 	u_long max_stack_size;
   1168 	u_long access_linear_min, access_size;
   1169 	u_long noaccess_linear_min, noaccess_size;
   1170 
   1171 #ifndef	USRSTACK32
   1172 #define USRSTACK32	(0x00000000ffffffffL&~PGOFSET)
   1173 #endif
   1174 
   1175 	if (epp->ep_flags & EXEC_32) {
   1176 		epp->ep_minsaddr = USRSTACK32;
   1177 		max_stack_size = MAXSSIZ;
   1178 	} else {
   1179 		epp->ep_minsaddr = USRSTACK;
   1180 		max_stack_size = MAXSSIZ;
   1181 	}
   1182 
   1183 	if (epp->ep_minsaddr > LINUX_USRSTACK)
   1184 		epp->ep_minsaddr = LINUX_USRSTACK;
   1185 #ifdef DEBUG_LINUX
   1186 	else {
   1187 		/*
   1188 		 * Someone needs to make KERNBASE and TEXTADDR
   1189 		 * java versions < 1.4.2 need the stack to be
   1190 		 * at 0xC0000000
   1191 		 */
   1192 		uprintf("Cannot setup stack to 0xC0000000, "
   1193 		    "java will not work properly\n");
   1194 	}
   1195 #endif
   1196 	epp->ep_maxsaddr = (u_long)STACK_GROW(epp->ep_minsaddr,
   1197 		max_stack_size);
   1198 	epp->ep_ssize = p->p_rlimit[RLIMIT_STACK].rlim_cur;
   1199 
   1200 	/*
   1201 	 * set up commands for stack.  note that this takes *two*, one to
   1202 	 * map the part of the stack which we can access, and one to map
   1203 	 * the part which we can't.
   1204 	 *
   1205 	 * arguably, it could be made into one, but that would require the
   1206 	 * addition of another mapping proc, which is unnecessary
   1207 	 */
   1208 	access_size = epp->ep_ssize;
   1209 	access_linear_min = (u_long)STACK_ALLOC(epp->ep_minsaddr, access_size);
   1210 	noaccess_size = max_stack_size - access_size;
   1211 	noaccess_linear_min = (u_long)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
   1212 	    access_size), noaccess_size);
   1213 	if (noaccess_size > 0) {
   1214 		NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, noaccess_size,
   1215 		    noaccess_linear_min, NULLVP, 0, VM_PROT_NONE);
   1216 	}
   1217 	KASSERT(access_size > 0);
   1218 	NEW_VMCMD(&epp->ep_vmcmds, vmcmd_map_zero, access_size,
   1219 	    access_linear_min, NULLVP, 0, VM_PROT_READ | VM_PROT_WRITE);
   1220 
   1221 	return 0;
   1222 }
   1223