Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.132
      1 /*	$NetBSD: linux_machdep.c,v 1.132 2007/12/08 18:36:05 dsl Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.132 2007/12/08 18:36:05 dsl Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/syscallargs.h>
     66 #include <sys/filedesc.h>
     67 #include <sys/exec_elf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/ioctl.h>
     70 #include <sys/wait.h>
     71 #include <sys/kauth.h>
     72 
     73 #include <miscfs/specfs/specdev.h>
     74 
     75 #include <compat/linux/common/linux_types.h>
     76 #include <compat/linux/common/linux_signal.h>
     77 #include <compat/linux/common/linux_util.h>
     78 #include <compat/linux/common/linux_ioctl.h>
     79 #include <compat/linux/common/linux_hdio.h>
     80 #include <compat/linux/common/linux_exec.h>
     81 #include <compat/linux/common/linux_machdep.h>
     82 #include <compat/linux/common/linux_errno.h>
     83 
     84 #include <compat/linux/linux_syscallargs.h>
     85 
     86 #include <sys/cpu.h>
     87 #include <machine/cpufunc.h>
     88 #include <machine/psl.h>
     89 #include <machine/reg.h>
     90 #include <machine/segments.h>
     91 #include <machine/specialreg.h>
     92 #include <machine/sysarch.h>
     93 #include <machine/vm86.h>
     94 #include <machine/vmparam.h>
     95 
     96 /*
     97  * To see whether wscons is configured (for virtual console ioctl calls).
     98  */
     99 #if defined(_KERNEL_OPT)
    100 #include "wsdisplay.h"
    101 #endif
    102 #if (NWSDISPLAY > 0)
    103 #include <dev/wscons/wsconsio.h>
    104 #include <dev/wscons/wsdisplay_usl_io.h>
    105 #if defined(_KERNEL_OPT)
    106 #include "opt_xserver.h"
    107 #endif
    108 #endif
    109 
    110 #ifdef DEBUG_LINUX
    111 #define DPRINTF(a) uprintf a
    112 #else
    113 #define DPRINTF(a)
    114 #endif
    115 
    116 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
    117 extern struct disklist *x86_alldisks;
    118 static void linux_save_ucontext(struct lwp *, struct trapframe *,
    119     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
    120 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
    121     const sigset_t *, struct linux_sigcontext *);
    122 static int linux_restore_sigcontext(struct lwp *,
    123     struct linux_sigcontext *, register_t *);
    124 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
    125 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
    126 
    127 extern char linux_sigcode[], linux_rt_sigcode[];
    128 /*
    129  * Deal with some i386-specific things in the Linux emulation code.
    130  */
    131 
    132 void
    133 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
    134 {
    135 	struct pcb *pcb = &l->l_addr->u_pcb;
    136 	struct trapframe *tf;
    137 
    138 #if NNPX > 0
    139 	/* If we were using the FPU, forget about it. */
    140 	if (npxproc == l)
    141 		npxdrop();
    142 #endif
    143 
    144 #ifdef USER_LDT
    145 	pmap_ldt_cleanup(l);
    146 #endif
    147 
    148 	l->l_md.md_flags &= ~MDL_USEDFPU;
    149 
    150 	if (i386_use_fxsave) {
    151 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    152 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    153 	} else
    154 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    155 
    156 	tf = l->l_md.md_regs;
    157 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    158 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    159 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    160 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    161 	tf->tf_edi = 0;
    162 	tf->tf_esi = 0;
    163 	tf->tf_ebp = 0;
    164 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    165 	tf->tf_edx = 0;
    166 	tf->tf_ecx = 0;
    167 	tf->tf_eax = 0;
    168 	tf->tf_eip = epp->ep_entry;
    169 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    170 	tf->tf_eflags = PSL_USERSET;
    171 	tf->tf_esp = stack;
    172 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    173 }
    174 
    175 /*
    176  * Send an interrupt to process.
    177  *
    178  * Stack is set up to allow sigcode stored
    179  * in u. to call routine, followed by kcall
    180  * to sigreturn routine below.  After sigreturn
    181  * resets the signal mask, the stack, and the
    182  * frame pointer, it returns to the user
    183  * specified pc, psl.
    184  */
    185 
    186 void
    187 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    188 {
    189 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    190 		linux_rt_sendsig(ksi, mask);
    191 	else
    192 		linux_old_sendsig(ksi, mask);
    193 }
    194 
    195 
    196 static void
    197 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
    198 {
    199 	uc->uc_flags = 0;
    200 	uc->uc_link = NULL;
    201 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    202 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    203 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    204 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    205 }
    206 
    207 static void
    208 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct linux_sigcontext *sc)
    209 {
    210 	/* Save register context. */
    211 #ifdef VM86
    212 	if (tf->tf_eflags & PSL_VM) {
    213 		sc->sc_gs = tf->tf_vm86_gs;
    214 		sc->sc_fs = tf->tf_vm86_fs;
    215 		sc->sc_es = tf->tf_vm86_es;
    216 		sc->sc_ds = tf->tf_vm86_ds;
    217 		sc->sc_eflags = get_vflags(l);
    218 	} else
    219 #endif
    220 	{
    221 		sc->sc_gs = tf->tf_gs;
    222 		sc->sc_fs = tf->tf_fs;
    223 		sc->sc_es = tf->tf_es;
    224 		sc->sc_ds = tf->tf_ds;
    225 		sc->sc_eflags = tf->tf_eflags;
    226 	}
    227 	sc->sc_edi = tf->tf_edi;
    228 	sc->sc_esi = tf->tf_esi;
    229 	sc->sc_esp = tf->tf_esp;
    230 	sc->sc_ebp = tf->tf_ebp;
    231 	sc->sc_ebx = tf->tf_ebx;
    232 	sc->sc_edx = tf->tf_edx;
    233 	sc->sc_ecx = tf->tf_ecx;
    234 	sc->sc_eax = tf->tf_eax;
    235 	sc->sc_eip = tf->tf_eip;
    236 	sc->sc_cs = tf->tf_cs;
    237 	sc->sc_esp_at_signal = tf->tf_esp;
    238 	sc->sc_ss = tf->tf_ss;
    239 	sc->sc_err = tf->tf_err;
    240 	sc->sc_trapno = tf->tf_trapno;
    241 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    242 	sc->sc_387 = NULL;
    243 
    244 	/* Save signal stack. */
    245 	/* Linux doesn't save the onstack flag in sigframe */
    246 
    247 	/* Save signal mask. */
    248 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    249 }
    250 
    251 static void
    252 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    253 {
    254 	struct lwp *l = curlwp;
    255 	struct proc *p = l->l_proc;
    256 	struct trapframe *tf;
    257 	struct linux_rt_sigframe *fp, frame;
    258 	int onstack, error;
    259 	linux_siginfo_t *lsi;
    260 	int sig = ksi->ksi_signo;
    261 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    262 	struct sigaltstack *sas = &l->l_sigstk;
    263 
    264 	tf = l->l_md.md_regs;
    265 	/* Do we need to jump onto the signal stack? */
    266 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    267 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    268 
    269 
    270 	/* Allocate space for the signal handler context. */
    271 	if (onstack)
    272 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
    273 		    sas->ss_size);
    274 	else
    275 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    276 	fp--;
    277 
    278 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    279 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    280 
    281 	/* Build stack frame for signal trampoline. */
    282 	frame.sf_handler = catcher;
    283 	frame.sf_sig = native_to_linux_signo[sig];
    284 	frame.sf_sip = &fp->sf_si;
    285 	frame.sf_ucp = &fp->sf_uc;
    286 
    287 	/*
    288 	 * XXX: the following code assumes that the constants for
    289 	 * siginfo are the same between linux and NetBSD.
    290 	 */
    291 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    292 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    293 	lsi->lsi_code = ksi->ksi_code;
    294 	switch (lsi->lsi_signo = frame.sf_sig) {
    295 	case LINUX_SIGILL:
    296 	case LINUX_SIGFPE:
    297 	case LINUX_SIGSEGV:
    298 	case LINUX_SIGBUS:
    299 	case LINUX_SIGTRAP:
    300 		lsi->lsi_addr = ksi->ksi_addr;
    301 		break;
    302 	case LINUX_SIGCHLD:
    303 		lsi->lsi_uid = ksi->ksi_uid;
    304 		lsi->lsi_pid = ksi->ksi_pid;
    305 		lsi->lsi_utime = ksi->ksi_utime;
    306 		lsi->lsi_stime = ksi->ksi_stime;
    307 
    308 		/* We use the same codes */
    309 		lsi->lsi_code = ksi->ksi_code;
    310 		/* XXX is that right? */
    311 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    312 		break;
    313 	case LINUX_SIGIO:
    314 		lsi->lsi_band = ksi->ksi_band;
    315 		lsi->lsi_fd = ksi->ksi_fd;
    316 		break;
    317 	default:
    318 		lsi->lsi_uid = ksi->ksi_uid;
    319 		lsi->lsi_pid = ksi->ksi_pid;
    320 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    321 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    322 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
    323 		break;
    324 	}
    325 
    326 	/* Save register context. */
    327 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    328 	sendsig_reset(l, sig);
    329 
    330 	mutex_exit(&p->p_smutex);
    331 	error = copyout(&frame, fp, sizeof(frame));
    332 	mutex_enter(&p->p_smutex);
    333 
    334 	if (error != 0) {
    335 		/*
    336 		 * Process has trashed its stack; give it an illegal
    337 		 * instruction to halt it in its tracks.
    338 		 */
    339 		sigexit(l, SIGILL);
    340 		/* NOTREACHED */
    341 	}
    342 
    343 	/*
    344 	 * Build context to run handler in.
    345 	 */
    346 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    347 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    348 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    349 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    350 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    351 	    (linux_rt_sigcode - linux_sigcode);
    352 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    353 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    354 	tf->tf_esp = (int)fp;
    355 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    356 
    357 	/* Remember that we're now on the signal stack. */
    358 	if (onstack)
    359 		sas->ss_flags |= SS_ONSTACK;
    360 }
    361 
    362 static void
    363 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    364 {
    365 	struct lwp *l = curlwp;
    366 	struct proc *p = l->l_proc;
    367 	struct trapframe *tf;
    368 	struct linux_sigframe *fp, frame;
    369 	int onstack, error;
    370 	int sig = ksi->ksi_signo;
    371 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    372 	struct sigaltstack *sas = &l->l_sigstk;
    373 
    374 	tf = l->l_md.md_regs;
    375 
    376 	/* Do we need to jump onto the signal stack? */
    377 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    378 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    379 
    380 	/* Allocate space for the signal handler context. */
    381 	if (onstack)
    382 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
    383 		    sas->ss_size);
    384 	else
    385 		fp = (struct linux_sigframe *)tf->tf_esp;
    386 	fp--;
    387 
    388 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    389 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    390 
    391 	/* Build stack frame for signal trampoline. */
    392 	frame.sf_handler = catcher;
    393 	frame.sf_sig = native_to_linux_signo[sig];
    394 
    395 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    396 	sendsig_reset(l, sig);
    397 
    398 	mutex_exit(&p->p_smutex);
    399 	error = copyout(&frame, fp, sizeof(frame));
    400 	mutex_enter(&p->p_smutex);
    401 
    402 	if (error != 0) {
    403 		/*
    404 		 * Process has trashed its stack; give it an illegal
    405 		 * instruction to halt it in its tracks.
    406 		 */
    407 		sigexit(l, SIGILL);
    408 		/* NOTREACHED */
    409 	}
    410 
    411 	/*
    412 	 * Build context to run handler in.
    413 	 */
    414 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    415 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    416 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    417 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    418 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    419 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    420 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    421 	tf->tf_esp = (int)fp;
    422 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    423 
    424 	/* Remember that we're now on the signal stack. */
    425 	if (onstack)
    426 		sas->ss_flags |= SS_ONSTACK;
    427 }
    428 
    429 /*
    430  * System call to cleanup state after a signal
    431  * has been taken.  Reset signal mask and
    432  * stack state from context left by sendsig (above).
    433  * Return to previous pc and psl as specified by
    434  * context left by sendsig. Check carefully to
    435  * make sure that the user has not modified the
    436  * psl to gain improper privileges or to cause
    437  * a machine fault.
    438  */
    439 int
    440 linux_sys_rt_sigreturn(struct lwp *l, void *v, register_t *retval)
    441 {
    442 	struct linux_sys_rt_sigreturn_args /* {
    443 		syscallarg(struct linux_ucontext *) ucp;
    444 	} */ *uap = v;
    445 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    446 	int error;
    447 
    448 	/*
    449 	 * The trampoline code hands us the context.
    450 	 * It is unsafe to keep track of it ourselves, in the event that a
    451 	 * program jumps out of a signal handler.
    452 	 */
    453 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    454 		return error;
    455 
    456 	/* XXX XAX we can do better here by using more of the ucontext */
    457 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    458 }
    459 
    460 int
    461 linux_sys_sigreturn(struct lwp *l, void *v, register_t *retval)
    462 {
    463 	struct linux_sys_sigreturn_args /* {
    464 		syscallarg(struct linux_sigcontext *) scp;
    465 	} */ *uap = v;
    466 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    467 	int error;
    468 
    469 	/*
    470 	 * The trampoline code hands us the context.
    471 	 * It is unsafe to keep track of it ourselves, in the event that a
    472 	 * program jumps out of a signal handler.
    473 	 */
    474 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
    475 		return error;
    476 	return linux_restore_sigcontext(l, &context, retval);
    477 }
    478 
    479 static int
    480 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    481     register_t *retval)
    482 {
    483 	struct proc *p = l->l_proc;
    484 	struct sigaltstack *sas = &l->l_sigstk;
    485 	struct trapframe *tf;
    486 	sigset_t mask;
    487 	ssize_t ss_gap;
    488 	/* Restore register context. */
    489 	tf = l->l_md.md_regs;
    490 
    491 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    492 #ifdef VM86
    493 	if (scp->sc_eflags & PSL_VM) {
    494 		void syscall_vm86(struct trapframe *);
    495 
    496 		tf->tf_vm86_gs = scp->sc_gs;
    497 		tf->tf_vm86_fs = scp->sc_fs;
    498 		tf->tf_vm86_es = scp->sc_es;
    499 		tf->tf_vm86_ds = scp->sc_ds;
    500 		set_vflags(l, scp->sc_eflags);
    501 		p->p_md.md_syscall = syscall_vm86;
    502 	} else
    503 #endif
    504 	{
    505 		/*
    506 		 * Check for security violations.  If we're returning to
    507 		 * protected mode, the CPU will validate the segment registers
    508 		 * automatically and generate a trap on violations.  We handle
    509 		 * the trap, rather than doing all of the checking here.
    510 		 */
    511 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    512 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    513 			return EINVAL;
    514 
    515 		tf->tf_gs = scp->sc_gs;
    516 		tf->tf_fs = scp->sc_fs;
    517 		tf->tf_es = scp->sc_es;
    518 		tf->tf_ds = scp->sc_ds;
    519 #ifdef VM86
    520 		if (tf->tf_eflags & PSL_VM)
    521 			(*p->p_emul->e_syscall_intern)(p);
    522 #endif
    523 		tf->tf_eflags = scp->sc_eflags;
    524 	}
    525 	tf->tf_edi = scp->sc_edi;
    526 	tf->tf_esi = scp->sc_esi;
    527 	tf->tf_ebp = scp->sc_ebp;
    528 	tf->tf_ebx = scp->sc_ebx;
    529 	tf->tf_edx = scp->sc_edx;
    530 	tf->tf_ecx = scp->sc_ecx;
    531 	tf->tf_eax = scp->sc_eax;
    532 	tf->tf_eip = scp->sc_eip;
    533 	tf->tf_cs = scp->sc_cs;
    534 	tf->tf_esp = scp->sc_esp_at_signal;
    535 	tf->tf_ss = scp->sc_ss;
    536 
    537 	/* Restore signal stack. */
    538 	/*
    539 	 * Linux really does it this way; it doesn't have space in sigframe
    540 	 * to save the onstack flag.
    541 	 */
    542 	mutex_enter(&p->p_smutex);
    543 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
    544 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    545 		sas->ss_flags |= SS_ONSTACK;
    546 	else
    547 		sas->ss_flags &= ~SS_ONSTACK;
    548 
    549 	/* Restore signal mask. */
    550 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    551 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    552 	mutex_exit(&p->p_smutex);
    553 
    554 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    555 	return EJUSTRETURN;
    556 }
    557 
    558 #ifdef USER_LDT
    559 
    560 static int
    561 linux_read_ldt(struct lwp *l, struct linux_sys_modify_ldt_args *uap,
    562     register_t *retval)
    563 {
    564 	struct x86_get_ldt_args gl;
    565 	int error;
    566 	int num_ldt;
    567 	union descriptor *ldt_buf;
    568 
    569 	/*
    570 	 * I've checked the linux code - this function is asymetric with
    571 	 * linux_write_ldt, and returns raw ldt entries.
    572 	 * NB, the code I saw zerod the spare parts of the user buffer.
    573 	 */
    574 
    575 	DPRINTF(("linux_read_ldt!"));
    576 
    577 	num_ldt = x86_get_ldt_len(l);
    578 	if (num_ldt <= 0)
    579 		return EINVAL;
    580 
    581 	gl.start = 0;
    582 	gl.desc = NULL;
    583 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    584 
    585 	if (gl.num > num_ldt)
    586 		gl.num = num_ldt;
    587 
    588 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
    589 
    590 	error = x86_get_ldt1(l, &gl, ldt_buf);
    591 	/* NB gl.num might have changed */
    592 	if (error == 0) {
    593 		*retval = gl.num * sizeof *ldt;
    594 		error = copyout(ldt_buf, SCARG(uap, ptr),
    595 		    gl.num * sizeof *ldt_buf);
    596 	}
    597 	free(ldt, M_TEMP);
    598 
    599 	return error;
    600 }
    601 
    602 struct linux_ldt_info {
    603 	u_int entry_number;
    604 	u_long base_addr;
    605 	u_int limit;
    606 	u_int seg_32bit:1;
    607 	u_int contents:2;
    608 	u_int read_exec_only:1;
    609 	u_int limit_in_pages:1;
    610 	u_int seg_not_present:1;
    611 	u_int useable:1;
    612 };
    613 
    614 static int
    615 linux_write_ldt(struct lwp *l, struct linux_sys_modify_ldt_args *uap,
    616     int oldmode)
    617 {
    618 	struct linux_ldt_info ldt_info;
    619 	union descriptor d;
    620 	struct x86_set_ldt_args sl;
    621 	int error;
    622 
    623 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    624 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    625 		return (EINVAL);
    626 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    627 		return error;
    628 	if (ldt_info.entry_number >= 8192)
    629 		return (EINVAL);
    630 	if (ldt_info.contents == 3) {
    631 		if (oldmode)
    632 			return (EINVAL);
    633 		if (ldt_info.seg_not_present)
    634 			return (EINVAL);
    635 	}
    636 
    637 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    638 	    (oldmode || (ldt_info.contents == 0 &&
    639 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    640 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    641 	    ldt_info.useable == 0))) {
    642 		/* this means you should zero the ldt */
    643 		(void)memset(&d, 0, sizeof(d));
    644 	} else {
    645 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    646 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    647 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
    648 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    649 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
    650 		    (!ldt_info.read_exec_only << 1);
    651 		d.sd.sd_dpl = SEL_UPL;
    652 		d.sd.sd_p = !ldt_info.seg_not_present;
    653 		d.sd.sd_def32 = ldt_info.seg_32bit;
    654 		d.sd.sd_gran = ldt_info.limit_in_pages;
    655 		if (!oldmode)
    656 			d.sd.sd_xx = ldt_info.useable;
    657 		else
    658 			d.sd.sd_xx = 0;
    659 	}
    660 	sl.start = ldt_info.entry_number;
    661 	sl.desc = NULL;;
    662 	sl.num = 1;
    663 
    664 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    665 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    666 
    667 	return x86_set_ldt1(l, &sl, &d);
    668 }
    669 
    670 #endif /* USER_LDT */
    671 
    672 int
    673 linux_sys_modify_ldt(struct lwp *l, void *v,
    674     register_t *retval)
    675 {
    676 	struct linux_sys_modify_ldt_args /* {
    677 		syscallarg(int) func;
    678 		syscallarg(void *) ptr;
    679 		syscallarg(size_t) bytecount;
    680 	} */ *uap = v;
    681 
    682 	switch (SCARG(uap, func)) {
    683 #ifdef USER_LDT
    684 	case 0:
    685 		return linux_read_ldt(l, uap, retval);
    686 	case 1:
    687 		return linux_write_ldt(l, uap, 1);
    688 	case 2:
    689 #ifdef notyet
    690 		return (linux_read_default_ldt(l, uap, retval);
    691 #else
    692 		return (ENOSYS);
    693 #endif
    694 	case 0x11:
    695 		return linux_write_ldt(l, uap, 0);
    696 #endif /* USER_LDT */
    697 
    698 	default:
    699 		return (ENOSYS);
    700 	}
    701 }
    702 
    703 /*
    704  * XXX Pathetic hack to make svgalib work. This will fake the major
    705  * device number of an opened VT so that svgalib likes it. grmbl.
    706  * Should probably do it 'wrong the right way' and use a mapping
    707  * array for all major device numbers, and map linux_mknod too.
    708  */
    709 dev_t
    710 linux_fakedev(dev_t dev, int raw)
    711 {
    712 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    713 	const struct cdevsw *cd = cdevsw_lookup(dev);
    714 
    715 	if (raw) {
    716 #if (NWSDISPLAY > 0)
    717 		extern const struct cdevsw wsdisplay_cdevsw;
    718 		if (cd == &wsdisplay_cdevsw)
    719 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    720 #endif
    721 	}
    722 
    723 	if (cd == &ptc_cdevsw)
    724 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    725 	if (cd == &pts_cdevsw)
    726 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    727 
    728 	return dev;
    729 }
    730 
    731 #if (NWSDISPLAY > 0)
    732 /*
    733  * That's not complete, but enough to get an X server running.
    734  */
    735 #define NR_KEYS 128
    736 static const u_short plain_map[NR_KEYS] = {
    737 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    738 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    739 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    740 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    741 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    742 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    743 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    744 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    745 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    746 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    747 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    748 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    749 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    750 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    751 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    752 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    753 }, shift_map[NR_KEYS] = {
    754 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    755 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    756 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    757 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    758 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    759 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    760 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    761 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    762 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    763 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    764 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    765 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    766 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    767 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    768 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    769 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    770 }, altgr_map[NR_KEYS] = {
    771 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    772 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    773 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    774 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    775 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    776 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    777 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    778 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    779 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    780 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    781 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    782 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    783 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    784 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    785 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    786 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    787 }, ctrl_map[NR_KEYS] = {
    788 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    789 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    790 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    791 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    792 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    793 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    794 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    795 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    796 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    797 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    798 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    799 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    800 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    801 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    802 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    803 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    804 };
    805 
    806 const u_short * const linux_keytabs[] = {
    807 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    808 };
    809 #endif
    810 
    811 static struct biosdisk_info *
    812 fd2biosinfo(struct proc *p, struct file *fp)
    813 {
    814 	struct vnode *vp;
    815 	const char *blkname;
    816 	char diskname[16];
    817 	int i;
    818 	struct nativedisk_info *nip;
    819 	struct disklist *dl = x86_alldisks;
    820 
    821 	if (fp->f_type != DTYPE_VNODE)
    822 		return NULL;
    823 	vp = (struct vnode *)fp->f_data;
    824 
    825 	if (vp->v_type != VBLK)
    826 		return NULL;
    827 
    828 	blkname = devsw_blk2name(major(vp->v_rdev));
    829 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    830 	    DISKUNIT(vp->v_rdev));
    831 
    832 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    833 		nip = &dl->dl_nativedisks[i];
    834 		if (strcmp(diskname, nip->ni_devname))
    835 			continue;
    836 		if (nip->ni_nmatches != 0)
    837 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    838 	}
    839 
    840 	return NULL;
    841 }
    842 
    843 
    844 /*
    845  * We come here in a last attempt to satisfy a Linux ioctl() call
    846  */
    847 int
    848 linux_machdepioctl(struct lwp *l, void *v, register_t *retval)
    849 {
    850 	struct linux_sys_ioctl_args /* {
    851 		syscallarg(int) fd;
    852 		syscallarg(u_long) com;
    853 		syscallarg(void *) data;
    854 	} */ *uap = v;
    855 	struct sys_ioctl_args bia;
    856 	u_long com;
    857 	int error, error1;
    858 #if (NWSDISPLAY > 0)
    859 	struct vt_mode lvt;
    860 	struct kbentry kbe;
    861 #endif
    862 	struct linux_hd_geometry hdg;
    863 	struct linux_hd_big_geometry hdg_big;
    864 	struct biosdisk_info *bip;
    865 	struct filedesc *fdp;
    866 	struct file *fp;
    867 	int fd;
    868 	struct disklabel label, *labp;
    869 	struct partinfo partp;
    870 	int (*ioctlf)(struct file *, u_long, void *, struct lwp *);
    871 	u_long start, biostotal, realtotal;
    872 	u_char heads, sectors;
    873 	u_int cylinders;
    874 	struct ioctl_pt pt;
    875 	struct proc *p = l->l_proc;
    876 
    877 	fd = SCARG(uap, fd);
    878 	SCARG(&bia, fd) = fd;
    879 	SCARG(&bia, data) = SCARG(uap, data);
    880 	com = SCARG(uap, com);
    881 
    882 	fdp = p->p_fd;
    883 
    884 	if ((fp = fd_getfile(fdp, fd)) == NULL)
    885 		return (EBADF);
    886 
    887 	FILE_USE(fp);
    888 
    889 	switch (com) {
    890 #if (NWSDISPLAY > 0)
    891 	case LINUX_KDGKBMODE:
    892 		com = KDGKBMODE;
    893 		break;
    894 	case LINUX_KDSKBMODE:
    895 		com = KDSKBMODE;
    896 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    897 			SCARG(&bia, data) = (void *)K_RAW;
    898 		break;
    899 	case LINUX_KIOCSOUND:
    900 		SCARG(&bia, data) =
    901 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    902 		/* fall through */
    903 	case LINUX_KDMKTONE:
    904 		com = KDMKTONE;
    905 		break;
    906 	case LINUX_KDSETMODE:
    907 		com = KDSETMODE;
    908 		break;
    909 	case LINUX_KDGETMODE:
    910 		/* KD_* values are equal to the wscons numbers */
    911 		com = WSDISPLAYIO_GMODE;
    912 		break;
    913 	case LINUX_KDENABIO:
    914 		com = KDENABIO;
    915 		break;
    916 	case LINUX_KDDISABIO:
    917 		com = KDDISABIO;
    918 		break;
    919 	case LINUX_KDGETLED:
    920 		com = KDGETLED;
    921 		break;
    922 	case LINUX_KDSETLED:
    923 		com = KDSETLED;
    924 		break;
    925 	case LINUX_VT_OPENQRY:
    926 		com = VT_OPENQRY;
    927 		break;
    928 	case LINUX_VT_GETMODE:
    929 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt, l);
    930 		if (error != 0)
    931 			goto out;
    932 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    933 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    934 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    935 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
    936 		goto out;
    937 	case LINUX_VT_SETMODE:
    938 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
    939 		if (error != 0)
    940 			goto out;
    941 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    942 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    943 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    944 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt, l);
    945 		goto out;
    946 	case LINUX_VT_DISALLOCATE:
    947 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    948 		error = 0;
    949 		goto out;
    950 	case LINUX_VT_RELDISP:
    951 		com = VT_RELDISP;
    952 		break;
    953 	case LINUX_VT_ACTIVATE:
    954 		com = VT_ACTIVATE;
    955 		break;
    956 	case LINUX_VT_WAITACTIVE:
    957 		com = VT_WAITACTIVE;
    958 		break;
    959 	case LINUX_VT_GETSTATE:
    960 		com = VT_GETSTATE;
    961 		break;
    962 	case LINUX_KDGKBTYPE:
    963 	    {
    964 		static const u_int8_t kb101 = KB_101;
    965 
    966 		/* This is what Linux does. */
    967 		error = copyout(&kb101, SCARG(uap, data), 1);
    968 		goto out;
    969 	    }
    970 	case LINUX_KDGKBENT:
    971 		/*
    972 		 * The Linux KDGKBENT ioctl is different from the
    973 		 * SYSV original. So we handle it in machdep code.
    974 		 * XXX We should use keyboard mapping information
    975 		 * from wsdisplay, but this would be expensive.
    976 		 */
    977 		if ((error = copyin(SCARG(uap, data), &kbe,
    978 				    sizeof(struct kbentry))))
    979 			goto out;
    980 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    981 		    || kbe.kb_index >= NR_KEYS) {
    982 			error = EINVAL;
    983 			goto out;
    984 		}
    985 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    986 		error = copyout(&kbe, SCARG(uap, data),
    987 				sizeof(struct kbentry));
    988 		goto out;
    989 #endif
    990 	case LINUX_HDIO_GETGEO:
    991 	case LINUX_HDIO_GETGEO_BIG:
    992 		/*
    993 		 * Try to mimic Linux behaviour: return the BIOS geometry
    994 		 * if possible (extending its # of cylinders if it's beyond
    995 		 * the 1023 limit), fall back to the MI geometry (i.e.
    996 		 * the real geometry) if not found, by returning an
    997 		 * error. See common/linux_hdio.c
    998 		 */
    999 		bip = fd2biosinfo(p, fp);
   1000 		ioctlf = fp->f_ops->fo_ioctl;
   1001 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label, l);
   1002 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp, l);
   1003 		if (error != 0 && error1 != 0) {
   1004 			error = error1;
   1005 			goto out;
   1006 		}
   1007 		labp = error != 0 ? &label : partp.disklab;
   1008 		start = error1 != 0 ? partp.part->p_offset : 0;
   1009 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1010 		    && bip->bi_cyl != 0) {
   1011 			heads = bip->bi_head;
   1012 			sectors = bip->bi_sec;
   1013 			cylinders = bip->bi_cyl;
   1014 			biostotal = heads * sectors * cylinders;
   1015 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1016 			    labp->d_ncylinders;
   1017 			if (realtotal > biostotal)
   1018 				cylinders = realtotal / (heads * sectors);
   1019 		} else {
   1020 			heads = labp->d_ntracks;
   1021 			cylinders = labp->d_ncylinders;
   1022 			sectors = labp->d_nsectors;
   1023 		}
   1024 		if (com == LINUX_HDIO_GETGEO) {
   1025 			hdg.start = start;
   1026 			hdg.heads = heads;
   1027 			hdg.cylinders = cylinders;
   1028 			hdg.sectors = sectors;
   1029 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1030 			goto out;
   1031 		} else {
   1032 			hdg_big.start = start;
   1033 			hdg_big.heads = heads;
   1034 			hdg_big.cylinders = cylinders;
   1035 			hdg_big.sectors = sectors;
   1036 			error = copyout(&hdg_big, SCARG(uap, data),
   1037 			    sizeof hdg_big);
   1038 			goto out;
   1039 		}
   1040 
   1041 	default:
   1042 		/*
   1043 		 * Unknown to us. If it's on a device, just pass it through
   1044 		 * using PTIOCLINUX, the device itself might be able to
   1045 		 * make some sense of it.
   1046 		 * XXX hack: if the function returns EJUSTRETURN,
   1047 		 * it has stuffed a sysctl return value in pt.data.
   1048 		 */
   1049 		ioctlf = fp->f_ops->fo_ioctl;
   1050 		pt.com = SCARG(uap, com);
   1051 		pt.data = SCARG(uap, data);
   1052 		error = ioctlf(fp, PTIOCLINUX, (void *)&pt, l);
   1053 		if (error == EJUSTRETURN) {
   1054 			retval[0] = (register_t)pt.data;
   1055 			error = 0;
   1056 		}
   1057 
   1058 		if (error == ENOTTY) {
   1059 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1060 			    com));
   1061 		}
   1062 		goto out;
   1063 	}
   1064 	SCARG(&bia, com) = com;
   1065 	/* XXX NJWLWP */
   1066 	error = sys_ioctl(curlwp, &bia, retval);
   1067 out:
   1068 	FILE_UNUSE(fp ,l);
   1069 	return error;
   1070 }
   1071 
   1072 /*
   1073  * Set I/O permissions for a process. Just set the maximum level
   1074  * right away (ignoring the argument), otherwise we would have
   1075  * to rely on I/O permission maps, which are not implemented.
   1076  */
   1077 int
   1078 linux_sys_iopl(struct lwp *l, void *v, register_t *retval)
   1079 {
   1080 #if 0
   1081 	struct linux_sys_iopl_args /* {
   1082 		syscallarg(int) level;
   1083 	} */ *uap = v;
   1084 #endif
   1085 	struct trapframe *fp = l->l_md.md_regs;
   1086 
   1087 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1088 	    NULL, NULL, NULL, NULL) != 0)
   1089 		return EPERM;
   1090 	fp->tf_eflags |= PSL_IOPL;
   1091 	*retval = 0;
   1092 	return 0;
   1093 }
   1094 
   1095 /*
   1096  * See above. If a root process tries to set access to an I/O port,
   1097  * just let it have the whole range.
   1098  */
   1099 int
   1100 linux_sys_ioperm(struct lwp *l, void *v, register_t *retval)
   1101 {
   1102 	struct linux_sys_ioperm_args /* {
   1103 		syscallarg(unsigned int) lo;
   1104 		syscallarg(unsigned int) hi;
   1105 		syscallarg(int) val;
   1106 	} */ *uap = v;
   1107 	struct trapframe *fp = l->l_md.md_regs;
   1108 
   1109 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1110 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1111 	    NULL, NULL) != 0)
   1112 		return EPERM;
   1113 	if (SCARG(uap, val))
   1114 		fp->tf_eflags |= PSL_IOPL;
   1115 	*retval = 0;
   1116 	return 0;
   1117 }
   1118 
   1119 int
   1120 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1121     void *arg)
   1122 {
   1123 	return 0;
   1124 }
   1125 
   1126 const char *
   1127 linux_get_uname_arch(void)
   1128 {
   1129 	static char uname_arch[5] = "i386";
   1130 
   1131 	if (uname_arch[1] == '3')
   1132 		uname_arch[1] += cpu_class;
   1133 	return uname_arch;
   1134 }
   1135 
   1136 #ifdef LINUX_NPTL
   1137 void *
   1138 linux_get_newtls(struct lwp *l)
   1139 {
   1140 	struct trapframe *tf = l->l_md.md_regs;
   1141 
   1142 	/* XXX: Implement me */
   1143 	return NULL;
   1144 }
   1145 
   1146 int
   1147 linux_set_newtls(struct lwp *l, void *tls)
   1148 {
   1149 	/* XXX: Implement me */
   1150 	return 0;
   1151 }
   1152 #endif
   1153