Home | History | Annotate | Line # | Download | only in i386
linux_machdep.c revision 1.134
      1 /*	$NetBSD: linux_machdep.c,v 1.134 2008/03/21 21:54:58 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1995, 2000, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Frank van der Linden.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 __KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.134 2008/03/21 21:54:58 ad Exp $");
     41 
     42 #if defined(_KERNEL_OPT)
     43 #include "opt_vm86.h"
     44 #include "opt_user_ldt.h"
     45 #endif
     46 
     47 #include <sys/param.h>
     48 #include <sys/systm.h>
     49 #include <sys/signalvar.h>
     50 #include <sys/kernel.h>
     51 #include <sys/proc.h>
     52 #include <sys/user.h>
     53 #include <sys/buf.h>
     54 #include <sys/reboot.h>
     55 #include <sys/conf.h>
     56 #include <sys/exec.h>
     57 #include <sys/file.h>
     58 #include <sys/callout.h>
     59 #include <sys/malloc.h>
     60 #include <sys/mbuf.h>
     61 #include <sys/msgbuf.h>
     62 #include <sys/mount.h>
     63 #include <sys/vnode.h>
     64 #include <sys/device.h>
     65 #include <sys/syscallargs.h>
     66 #include <sys/filedesc.h>
     67 #include <sys/exec_elf.h>
     68 #include <sys/disklabel.h>
     69 #include <sys/ioctl.h>
     70 #include <sys/wait.h>
     71 #include <sys/kauth.h>
     72 
     73 #include <miscfs/specfs/specdev.h>
     74 
     75 #include <compat/linux/common/linux_types.h>
     76 #include <compat/linux/common/linux_signal.h>
     77 #include <compat/linux/common/linux_util.h>
     78 #include <compat/linux/common/linux_ioctl.h>
     79 #include <compat/linux/common/linux_hdio.h>
     80 #include <compat/linux/common/linux_exec.h>
     81 #include <compat/linux/common/linux_machdep.h>
     82 #include <compat/linux/common/linux_errno.h>
     83 
     84 #include <compat/linux/linux_syscallargs.h>
     85 
     86 #include <sys/cpu.h>
     87 #include <machine/cpufunc.h>
     88 #include <machine/psl.h>
     89 #include <machine/reg.h>
     90 #include <machine/segments.h>
     91 #include <machine/specialreg.h>
     92 #include <machine/sysarch.h>
     93 #include <machine/vm86.h>
     94 #include <machine/vmparam.h>
     95 
     96 /*
     97  * To see whether wscons is configured (for virtual console ioctl calls).
     98  */
     99 #if defined(_KERNEL_OPT)
    100 #include "wsdisplay.h"
    101 #endif
    102 #if (NWSDISPLAY > 0)
    103 #include <dev/wscons/wsconsio.h>
    104 #include <dev/wscons/wsdisplay_usl_io.h>
    105 #if defined(_KERNEL_OPT)
    106 #include "opt_xserver.h"
    107 #endif
    108 #endif
    109 
    110 #ifdef DEBUG_LINUX
    111 #define DPRINTF(a) uprintf a
    112 #else
    113 #define DPRINTF(a)
    114 #endif
    115 
    116 static struct biosdisk_info *fd2biosinfo(struct proc *, struct file *);
    117 extern struct disklist *x86_alldisks;
    118 static void linux_save_ucontext(struct lwp *, struct trapframe *,
    119     const sigset_t *, struct sigaltstack *, struct linux_ucontext *);
    120 static void linux_save_sigcontext(struct lwp *, struct trapframe *,
    121     const sigset_t *, struct linux_sigcontext *);
    122 static int linux_restore_sigcontext(struct lwp *,
    123     struct linux_sigcontext *, register_t *);
    124 static void linux_rt_sendsig(const ksiginfo_t *, const sigset_t *);
    125 static void linux_old_sendsig(const ksiginfo_t *, const sigset_t *);
    126 
    127 extern char linux_sigcode[], linux_rt_sigcode[];
    128 /*
    129  * Deal with some i386-specific things in the Linux emulation code.
    130  */
    131 
    132 void
    133 linux_setregs(struct lwp *l, struct exec_package *epp, u_long stack)
    134 {
    135 	struct pcb *pcb = &l->l_addr->u_pcb;
    136 	struct trapframe *tf;
    137 
    138 #if NNPX > 0
    139 	/* If we were using the FPU, forget about it. */
    140 	if (npxproc == l)
    141 		npxdrop();
    142 #endif
    143 
    144 #ifdef USER_LDT
    145 	pmap_ldt_cleanup(l);
    146 #endif
    147 
    148 	l->l_md.md_flags &= ~MDL_USEDFPU;
    149 
    150 	if (i386_use_fxsave) {
    151 		pcb->pcb_savefpu.sv_xmm.sv_env.en_cw = __Linux_NPXCW__;
    152 		pcb->pcb_savefpu.sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__;
    153 	} else
    154 		pcb->pcb_savefpu.sv_87.sv_env.en_cw = __Linux_NPXCW__;
    155 
    156 	tf = l->l_md.md_regs;
    157 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    158 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    159 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    160 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    161 	tf->tf_edi = 0;
    162 	tf->tf_esi = 0;
    163 	tf->tf_ebp = 0;
    164 	tf->tf_ebx = (int)l->l_proc->p_psstr;
    165 	tf->tf_edx = 0;
    166 	tf->tf_ecx = 0;
    167 	tf->tf_eax = 0;
    168 	tf->tf_eip = epp->ep_entry;
    169 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    170 	tf->tf_eflags = PSL_USERSET;
    171 	tf->tf_esp = stack;
    172 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    173 }
    174 
    175 /*
    176  * Send an interrupt to process.
    177  *
    178  * Stack is set up to allow sigcode stored
    179  * in u. to call routine, followed by kcall
    180  * to sigreturn routine below.  After sigreturn
    181  * resets the signal mask, the stack, and the
    182  * frame pointer, it returns to the user
    183  * specified pc, psl.
    184  */
    185 
    186 void
    187 linux_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    188 {
    189 	if (SIGACTION(curproc, ksi->ksi_signo).sa_flags & SA_SIGINFO)
    190 		linux_rt_sendsig(ksi, mask);
    191 	else
    192 		linux_old_sendsig(ksi, mask);
    193 }
    194 
    195 
    196 static void
    197 linux_save_ucontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct sigaltstack *sas, struct linux_ucontext *uc)
    198 {
    199 	uc->uc_flags = 0;
    200 	uc->uc_link = NULL;
    201 	native_to_linux_sigaltstack(&uc->uc_stack, sas);
    202 	linux_save_sigcontext(l, tf, mask, &uc->uc_mcontext);
    203 	native_to_linux_sigset(&uc->uc_sigmask, mask);
    204 	(void)memset(&uc->uc_fpregs_mem, 0, sizeof(uc->uc_fpregs_mem));
    205 }
    206 
    207 static void
    208 linux_save_sigcontext(struct lwp *l, struct trapframe *tf, const sigset_t *mask, struct linux_sigcontext *sc)
    209 {
    210 	/* Save register context. */
    211 #ifdef VM86
    212 	if (tf->tf_eflags & PSL_VM) {
    213 		sc->sc_gs = tf->tf_vm86_gs;
    214 		sc->sc_fs = tf->tf_vm86_fs;
    215 		sc->sc_es = tf->tf_vm86_es;
    216 		sc->sc_ds = tf->tf_vm86_ds;
    217 		sc->sc_eflags = get_vflags(l);
    218 	} else
    219 #endif
    220 	{
    221 		sc->sc_gs = tf->tf_gs;
    222 		sc->sc_fs = tf->tf_fs;
    223 		sc->sc_es = tf->tf_es;
    224 		sc->sc_ds = tf->tf_ds;
    225 		sc->sc_eflags = tf->tf_eflags;
    226 	}
    227 	sc->sc_edi = tf->tf_edi;
    228 	sc->sc_esi = tf->tf_esi;
    229 	sc->sc_esp = tf->tf_esp;
    230 	sc->sc_ebp = tf->tf_ebp;
    231 	sc->sc_ebx = tf->tf_ebx;
    232 	sc->sc_edx = tf->tf_edx;
    233 	sc->sc_ecx = tf->tf_ecx;
    234 	sc->sc_eax = tf->tf_eax;
    235 	sc->sc_eip = tf->tf_eip;
    236 	sc->sc_cs = tf->tf_cs;
    237 	sc->sc_esp_at_signal = tf->tf_esp;
    238 	sc->sc_ss = tf->tf_ss;
    239 	sc->sc_err = tf->tf_err;
    240 	sc->sc_trapno = tf->tf_trapno;
    241 	sc->sc_cr2 = l->l_addr->u_pcb.pcb_cr2;
    242 	sc->sc_387 = NULL;
    243 
    244 	/* Save signal stack. */
    245 	/* Linux doesn't save the onstack flag in sigframe */
    246 
    247 	/* Save signal mask. */
    248 	native_to_linux_old_sigset(&sc->sc_mask, mask);
    249 }
    250 
    251 static void
    252 linux_rt_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    253 {
    254 	struct lwp *l = curlwp;
    255 	struct proc *p = l->l_proc;
    256 	struct trapframe *tf;
    257 	struct linux_rt_sigframe *fp, frame;
    258 	int onstack, error;
    259 	linux_siginfo_t *lsi;
    260 	int sig = ksi->ksi_signo;
    261 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    262 	struct sigaltstack *sas = &l->l_sigstk;
    263 
    264 	tf = l->l_md.md_regs;
    265 	/* Do we need to jump onto the signal stack? */
    266 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    267 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    268 
    269 
    270 	/* Allocate space for the signal handler context. */
    271 	if (onstack)
    272 		fp = (struct linux_rt_sigframe *)((char *)sas->ss_sp +
    273 		    sas->ss_size);
    274 	else
    275 		fp = (struct linux_rt_sigframe *)tf->tf_esp;
    276 	fp--;
    277 
    278 	DPRINTF(("rt: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    279 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    280 
    281 	/* Build stack frame for signal trampoline. */
    282 	frame.sf_handler = catcher;
    283 	frame.sf_sig = native_to_linux_signo[sig];
    284 	frame.sf_sip = &fp->sf_si;
    285 	frame.sf_ucp = &fp->sf_uc;
    286 
    287 	/*
    288 	 * XXX: the following code assumes that the constants for
    289 	 * siginfo are the same between linux and NetBSD.
    290 	 */
    291 	(void)memset(lsi = &frame.sf_si, 0, sizeof(frame.sf_si));
    292 	lsi->lsi_errno = native_to_linux_errno[ksi->ksi_errno];
    293 	lsi->lsi_code = ksi->ksi_code;
    294 	switch (lsi->lsi_signo = frame.sf_sig) {
    295 	case LINUX_SIGILL:
    296 	case LINUX_SIGFPE:
    297 	case LINUX_SIGSEGV:
    298 	case LINUX_SIGBUS:
    299 	case LINUX_SIGTRAP:
    300 		lsi->lsi_addr = ksi->ksi_addr;
    301 		break;
    302 	case LINUX_SIGCHLD:
    303 		lsi->lsi_uid = ksi->ksi_uid;
    304 		lsi->lsi_pid = ksi->ksi_pid;
    305 		lsi->lsi_utime = ksi->ksi_utime;
    306 		lsi->lsi_stime = ksi->ksi_stime;
    307 
    308 		/* We use the same codes */
    309 		lsi->lsi_code = ksi->ksi_code;
    310 		/* XXX is that right? */
    311 		lsi->lsi_status = WEXITSTATUS(ksi->ksi_status);
    312 		break;
    313 	case LINUX_SIGIO:
    314 		lsi->lsi_band = ksi->ksi_band;
    315 		lsi->lsi_fd = ksi->ksi_fd;
    316 		break;
    317 	default:
    318 		lsi->lsi_uid = ksi->ksi_uid;
    319 		lsi->lsi_pid = ksi->ksi_pid;
    320 		if (lsi->lsi_signo == LINUX_SIGALRM ||
    321 		    lsi->lsi_signo >= LINUX_SIGRTMIN)
    322 			lsi->lsi_value.sival_ptr = ksi->ksi_value.sival_ptr;
    323 		break;
    324 	}
    325 
    326 	/* Save register context. */
    327 	linux_save_ucontext(l, tf, mask, sas, &frame.sf_uc);
    328 	sendsig_reset(l, sig);
    329 
    330 	mutex_exit(&p->p_smutex);
    331 	error = copyout(&frame, fp, sizeof(frame));
    332 	mutex_enter(&p->p_smutex);
    333 
    334 	if (error != 0) {
    335 		/*
    336 		 * Process has trashed its stack; give it an illegal
    337 		 * instruction to halt it in its tracks.
    338 		 */
    339 		sigexit(l, SIGILL);
    340 		/* NOTREACHED */
    341 	}
    342 
    343 	/*
    344 	 * Build context to run handler in.
    345 	 */
    346 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    347 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    348 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    349 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    350 	tf->tf_eip = ((int)p->p_sigctx.ps_sigcode) +
    351 	    (linux_rt_sigcode - linux_sigcode);
    352 	tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
    353 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    354 	tf->tf_esp = (int)fp;
    355 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    356 
    357 	/* Remember that we're now on the signal stack. */
    358 	if (onstack)
    359 		sas->ss_flags |= SS_ONSTACK;
    360 }
    361 
    362 static void
    363 linux_old_sendsig(const ksiginfo_t *ksi, const sigset_t *mask)
    364 {
    365 	struct lwp *l = curlwp;
    366 	struct proc *p = l->l_proc;
    367 	struct trapframe *tf;
    368 	struct linux_sigframe *fp, frame;
    369 	int onstack, error;
    370 	int sig = ksi->ksi_signo;
    371 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    372 	struct sigaltstack *sas = &l->l_sigstk;
    373 
    374 	tf = l->l_md.md_regs;
    375 
    376 	/* Do we need to jump onto the signal stack? */
    377 	onstack = (sas->ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
    378 	    (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    379 
    380 	/* Allocate space for the signal handler context. */
    381 	if (onstack)
    382 		fp = (struct linux_sigframe *) ((char *)sas->ss_sp +
    383 		    sas->ss_size);
    384 	else
    385 		fp = (struct linux_sigframe *)tf->tf_esp;
    386 	fp--;
    387 
    388 	DPRINTF(("old: onstack = %d, fp = %p sig = %d eip = 0x%x cr2 = 0x%x\n",
    389 	    onstack, fp, sig, tf->tf_eip, l->l_addr->u_pcb.pcb_cr2));
    390 
    391 	/* Build stack frame for signal trampoline. */
    392 	frame.sf_handler = catcher;
    393 	frame.sf_sig = native_to_linux_signo[sig];
    394 
    395 	linux_save_sigcontext(l, tf, mask, &frame.sf_sc);
    396 	sendsig_reset(l, sig);
    397 
    398 	mutex_exit(&p->p_smutex);
    399 	error = copyout(&frame, fp, sizeof(frame));
    400 	mutex_enter(&p->p_smutex);
    401 
    402 	if (error != 0) {
    403 		/*
    404 		 * Process has trashed its stack; give it an illegal
    405 		 * instruction to halt it in its tracks.
    406 		 */
    407 		sigexit(l, SIGILL);
    408 		/* NOTREACHED */
    409 	}
    410 
    411 	/*
    412 	 * Build context to run handler in.
    413 	 */
    414 	tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
    415 	tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
    416 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    417 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    418 	tf->tf_eip = (int)p->p_sigctx.ps_sigcode;
    419 	tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
    420 	tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC);
    421 	tf->tf_esp = (int)fp;
    422 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    423 
    424 	/* Remember that we're now on the signal stack. */
    425 	if (onstack)
    426 		sas->ss_flags |= SS_ONSTACK;
    427 }
    428 
    429 /*
    430  * System call to cleanup state after a signal
    431  * has been taken.  Reset signal mask and
    432  * stack state from context left by sendsig (above).
    433  * Return to previous pc and psl as specified by
    434  * context left by sendsig. Check carefully to
    435  * make sure that the user has not modified the
    436  * psl to gain improper privileges or to cause
    437  * a machine fault.
    438  */
    439 int
    440 linux_sys_rt_sigreturn(struct lwp *l, const struct linux_sys_rt_sigreturn_args *uap, register_t *retval)
    441 {
    442 	/* {
    443 		syscallarg(struct linux_ucontext *) ucp;
    444 	} */
    445 	struct linux_ucontext context, *ucp = SCARG(uap, ucp);
    446 	int error;
    447 
    448 	/*
    449 	 * The trampoline code hands us the context.
    450 	 * It is unsafe to keep track of it ourselves, in the event that a
    451 	 * program jumps out of a signal handler.
    452 	 */
    453 	if ((error = copyin(ucp, &context, sizeof(*ucp))) != 0)
    454 		return error;
    455 
    456 	/* XXX XAX we can do better here by using more of the ucontext */
    457 	return linux_restore_sigcontext(l, &context.uc_mcontext, retval);
    458 }
    459 
    460 int
    461 linux_sys_sigreturn(struct lwp *l, const struct linux_sys_sigreturn_args *uap, register_t *retval)
    462 {
    463 	/* {
    464 		syscallarg(struct linux_sigcontext *) scp;
    465 	} */
    466 	struct linux_sigcontext context, *scp = SCARG(uap, scp);
    467 	int error;
    468 
    469 	/*
    470 	 * The trampoline code hands us the context.
    471 	 * It is unsafe to keep track of it ourselves, in the event that a
    472 	 * program jumps out of a signal handler.
    473 	 */
    474 	if ((error = copyin((void *)scp, &context, sizeof(*scp))) != 0)
    475 		return error;
    476 	return linux_restore_sigcontext(l, &context, retval);
    477 }
    478 
    479 static int
    480 linux_restore_sigcontext(struct lwp *l, struct linux_sigcontext *scp,
    481     register_t *retval)
    482 {
    483 	struct proc *p = l->l_proc;
    484 	struct sigaltstack *sas = &l->l_sigstk;
    485 	struct trapframe *tf;
    486 	sigset_t mask;
    487 	ssize_t ss_gap;
    488 	/* Restore register context. */
    489 	tf = l->l_md.md_regs;
    490 
    491 	DPRINTF(("sigreturn enter esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    492 #ifdef VM86
    493 	if (scp->sc_eflags & PSL_VM) {
    494 		void syscall_vm86(struct trapframe *);
    495 
    496 		tf->tf_vm86_gs = scp->sc_gs;
    497 		tf->tf_vm86_fs = scp->sc_fs;
    498 		tf->tf_vm86_es = scp->sc_es;
    499 		tf->tf_vm86_ds = scp->sc_ds;
    500 		set_vflags(l, scp->sc_eflags);
    501 		p->p_md.md_syscall = syscall_vm86;
    502 	} else
    503 #endif
    504 	{
    505 		/*
    506 		 * Check for security violations.  If we're returning to
    507 		 * protected mode, the CPU will validate the segment registers
    508 		 * automatically and generate a trap on violations.  We handle
    509 		 * the trap, rather than doing all of the checking here.
    510 		 */
    511 		if (((scp->sc_eflags ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 ||
    512 		    !USERMODE(scp->sc_cs, scp->sc_eflags))
    513 			return EINVAL;
    514 
    515 		tf->tf_gs = scp->sc_gs;
    516 		tf->tf_fs = scp->sc_fs;
    517 		tf->tf_es = scp->sc_es;
    518 		tf->tf_ds = scp->sc_ds;
    519 #ifdef VM86
    520 		if (tf->tf_eflags & PSL_VM)
    521 			(*p->p_emul->e_syscall_intern)(p);
    522 #endif
    523 		tf->tf_eflags = scp->sc_eflags;
    524 	}
    525 	tf->tf_edi = scp->sc_edi;
    526 	tf->tf_esi = scp->sc_esi;
    527 	tf->tf_ebp = scp->sc_ebp;
    528 	tf->tf_ebx = scp->sc_ebx;
    529 	tf->tf_edx = scp->sc_edx;
    530 	tf->tf_ecx = scp->sc_ecx;
    531 	tf->tf_eax = scp->sc_eax;
    532 	tf->tf_eip = scp->sc_eip;
    533 	tf->tf_cs = scp->sc_cs;
    534 	tf->tf_esp = scp->sc_esp_at_signal;
    535 	tf->tf_ss = scp->sc_ss;
    536 
    537 	/* Restore signal stack. */
    538 	/*
    539 	 * Linux really does it this way; it doesn't have space in sigframe
    540 	 * to save the onstack flag.
    541 	 */
    542 	mutex_enter(&p->p_smutex);
    543 	ss_gap = (ssize_t)((char *)scp->sc_esp_at_signal - (char *)sas->ss_sp);
    544 	if (ss_gap >= 0 && ss_gap < sas->ss_size)
    545 		sas->ss_flags |= SS_ONSTACK;
    546 	else
    547 		sas->ss_flags &= ~SS_ONSTACK;
    548 
    549 	/* Restore signal mask. */
    550 	linux_old_to_native_sigset(&mask, &scp->sc_mask);
    551 	(void) sigprocmask1(l, SIG_SETMASK, &mask, 0);
    552 	mutex_exit(&p->p_smutex);
    553 
    554 	DPRINTF(("sigreturn exit esp=%x eip=%x\n", tf->tf_esp, tf->tf_eip));
    555 	return EJUSTRETURN;
    556 }
    557 
    558 #ifdef USER_LDT
    559 
    560 static int
    561 linux_read_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    562     register_t *retval)
    563 {
    564 	struct x86_get_ldt_args gl;
    565 	int error;
    566 	int num_ldt;
    567 	union descriptor *ldt_buf;
    568 
    569 	/*
    570 	 * I've checked the linux code - this function is asymetric with
    571 	 * linux_write_ldt, and returns raw ldt entries.
    572 	 * NB, the code I saw zerod the spare parts of the user buffer.
    573 	 */
    574 
    575 	DPRINTF(("linux_read_ldt!"));
    576 
    577 	num_ldt = x86_get_ldt_len(l);
    578 	if (num_ldt <= 0)
    579 		return EINVAL;
    580 
    581 	gl.start = 0;
    582 	gl.desc = NULL;
    583 	gl.num = SCARG(uap, bytecount) / sizeof(union descriptor);
    584 
    585 	if (gl.num > num_ldt)
    586 		gl.num = num_ldt;
    587 
    588 	ldt_buf = malloc(gl.num * sizeof *ldt, M_TEMP, M_WAITOK);
    589 
    590 	error = x86_get_ldt1(l, &gl, ldt_buf);
    591 	/* NB gl.num might have changed */
    592 	if (error == 0) {
    593 		*retval = gl.num * sizeof *ldt;
    594 		error = copyout(ldt_buf, SCARG(uap, ptr),
    595 		    gl.num * sizeof *ldt_buf);
    596 	}
    597 	free(ldt, M_TEMP);
    598 
    599 	return error;
    600 }
    601 
    602 struct linux_ldt_info {
    603 	u_int entry_number;
    604 	u_long base_addr;
    605 	u_int limit;
    606 	u_int seg_32bit:1;
    607 	u_int contents:2;
    608 	u_int read_exec_only:1;
    609 	u_int limit_in_pages:1;
    610 	u_int seg_not_present:1;
    611 	u_int useable:1;
    612 };
    613 
    614 static int
    615 linux_write_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap,
    616     int oldmode)
    617 {
    618 	struct linux_ldt_info ldt_info;
    619 	union descriptor d;
    620 	struct x86_set_ldt_args sl;
    621 	int error;
    622 
    623 	DPRINTF(("linux_write_ldt %d\n", oldmode));
    624 	if (SCARG(uap, bytecount) != sizeof(ldt_info))
    625 		return (EINVAL);
    626 	if ((error = copyin(SCARG(uap, ptr), &ldt_info, sizeof(ldt_info))) != 0)
    627 		return error;
    628 	if (ldt_info.entry_number >= 8192)
    629 		return (EINVAL);
    630 	if (ldt_info.contents == 3) {
    631 		if (oldmode)
    632 			return (EINVAL);
    633 		if (ldt_info.seg_not_present)
    634 			return (EINVAL);
    635 	}
    636 
    637 	if (ldt_info.base_addr == 0 && ldt_info.limit == 0 &&
    638 	    (oldmode || (ldt_info.contents == 0 &&
    639 	    ldt_info.read_exec_only == 1 && ldt_info.seg_32bit == 0 &&
    640 	    ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 &&
    641 	    ldt_info.useable == 0))) {
    642 		/* this means you should zero the ldt */
    643 		(void)memset(&d, 0, sizeof(d));
    644 	} else {
    645 		d.sd.sd_lobase = ldt_info.base_addr & 0xffffff;
    646 		d.sd.sd_hibase = (ldt_info.base_addr >> 24) & 0xff;
    647 		d.sd.sd_lolimit = ldt_info.limit & 0xffff;
    648 		d.sd.sd_hilimit = (ldt_info.limit >> 16) & 0xf;
    649 		d.sd.sd_type = 16 | (ldt_info.contents << 2) |
    650 		    (!ldt_info.read_exec_only << 1);
    651 		d.sd.sd_dpl = SEL_UPL;
    652 		d.sd.sd_p = !ldt_info.seg_not_present;
    653 		d.sd.sd_def32 = ldt_info.seg_32bit;
    654 		d.sd.sd_gran = ldt_info.limit_in_pages;
    655 		if (!oldmode)
    656 			d.sd.sd_xx = ldt_info.useable;
    657 		else
    658 			d.sd.sd_xx = 0;
    659 	}
    660 	sl.start = ldt_info.entry_number;
    661 	sl.desc = NULL;;
    662 	sl.num = 1;
    663 
    664 	DPRINTF(("linux_write_ldt: idx=%d, base=0x%lx, limit=0x%x\n",
    665 	    ldt_info.entry_number, ldt_info.base_addr, ldt_info.limit));
    666 
    667 	return x86_set_ldt1(l, &sl, &d);
    668 }
    669 
    670 #endif /* USER_LDT */
    671 
    672 int
    673 linux_sys_modify_ldt(struct lwp *l, const struct linux_sys_modify_ldt_args *uap, register_t *retval)
    674 {
    675 	/* {
    676 		syscallarg(int) func;
    677 		syscallarg(void *) ptr;
    678 		syscallarg(size_t) bytecount;
    679 	} */
    680 
    681 	switch (SCARG(uap, func)) {
    682 #ifdef USER_LDT
    683 	case 0:
    684 		return linux_read_ldt(l, (const void *)uap, retval);
    685 	case 1:
    686 		return linux_write_ldt(l, (const void *)uap, 1);
    687 	case 2:
    688 #ifdef notyet
    689 		return (linux_read_default_ldt(l, (const void *)uap, retval);
    690 #else
    691 		return (ENOSYS);
    692 #endif
    693 	case 0x11:
    694 		return linux_write_ldt(l, (const void *)uap, 0);
    695 #endif /* USER_LDT */
    696 
    697 	default:
    698 		return (ENOSYS);
    699 	}
    700 }
    701 
    702 /*
    703  * XXX Pathetic hack to make svgalib work. This will fake the major
    704  * device number of an opened VT so that svgalib likes it. grmbl.
    705  * Should probably do it 'wrong the right way' and use a mapping
    706  * array for all major device numbers, and map linux_mknod too.
    707  */
    708 dev_t
    709 linux_fakedev(dev_t dev, int raw)
    710 {
    711 	extern const struct cdevsw ptc_cdevsw, pts_cdevsw;
    712 	const struct cdevsw *cd = cdevsw_lookup(dev);
    713 
    714 	if (raw) {
    715 #if (NWSDISPLAY > 0)
    716 		extern const struct cdevsw wsdisplay_cdevsw;
    717 		if (cd == &wsdisplay_cdevsw)
    718 			return makedev(LINUX_CONS_MAJOR, (minor(dev) + 1));
    719 #endif
    720 	}
    721 
    722 	if (cd == &ptc_cdevsw)
    723 		return makedev(LINUX_PTC_MAJOR, minor(dev));
    724 	if (cd == &pts_cdevsw)
    725 		return makedev(LINUX_PTS_MAJOR, minor(dev));
    726 
    727 	return dev;
    728 }
    729 
    730 #if (NWSDISPLAY > 0)
    731 /*
    732  * That's not complete, but enough to get an X server running.
    733  */
    734 #define NR_KEYS 128
    735 static const u_short plain_map[NR_KEYS] = {
    736 	0x0200,	0x001b,	0x0031,	0x0032,	0x0033,	0x0034,	0x0035,	0x0036,
    737 	0x0037,	0x0038,	0x0039,	0x0030,	0x002d,	0x003d,	0x007f,	0x0009,
    738 	0x0b71,	0x0b77,	0x0b65,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    739 	0x0b6f,	0x0b70,	0x005b,	0x005d,	0x0201,	0x0702,	0x0b61,	0x0b73,
    740 	0x0b64,	0x0b66,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x003b,
    741 	0x0027,	0x0060,	0x0700,	0x005c,	0x0b7a,	0x0b78,	0x0b63,	0x0b76,
    742 	0x0b62,	0x0b6e,	0x0b6d,	0x002c,	0x002e,	0x002f,	0x0700,	0x030c,
    743 	0x0703,	0x0020,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    744 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0209,	0x0307,
    745 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    746 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003c,	0x010a,
    747 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    748 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    749 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    750 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    751 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    752 }, shift_map[NR_KEYS] = {
    753 	0x0200,	0x001b,	0x0021,	0x0040,	0x0023,	0x0024,	0x0025,	0x005e,
    754 	0x0026,	0x002a,	0x0028,	0x0029,	0x005f,	0x002b,	0x007f,	0x0009,
    755 	0x0b51,	0x0b57,	0x0b45,	0x0b52,	0x0b54,	0x0b59,	0x0b55,	0x0b49,
    756 	0x0b4f,	0x0b50,	0x007b,	0x007d,	0x0201,	0x0702,	0x0b41,	0x0b53,
    757 	0x0b44,	0x0b46,	0x0b47,	0x0b48,	0x0b4a,	0x0b4b,	0x0b4c,	0x003a,
    758 	0x0022,	0x007e,	0x0700,	0x007c,	0x0b5a,	0x0b58,	0x0b43,	0x0b56,
    759 	0x0b42,	0x0b4e,	0x0b4d,	0x003c,	0x003e,	0x003f,	0x0700,	0x030c,
    760 	0x0703,	0x0020,	0x0207,	0x010a,	0x010b,	0x010c,	0x010d,	0x010e,
    761 	0x010f,	0x0110,	0x0111,	0x0112,	0x0113,	0x0213,	0x0203,	0x0307,
    762 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    763 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x003e,	0x010a,
    764 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    765 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    766 	0x020b,	0x0601,	0x0602,	0x0117,	0x0600,	0x020a,	0x0115,	0x0116,
    767 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    768 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    769 }, altgr_map[NR_KEYS] = {
    770 	0x0200,	0x0200,	0x0200,	0x0040,	0x0200,	0x0024,	0x0200,	0x0200,
    771 	0x007b,	0x005b,	0x005d,	0x007d,	0x005c,	0x0200,	0x0200,	0x0200,
    772 	0x0b71,	0x0b77,	0x0918,	0x0b72,	0x0b74,	0x0b79,	0x0b75,	0x0b69,
    773 	0x0b6f,	0x0b70,	0x0200,	0x007e,	0x0201,	0x0702,	0x0914,	0x0b73,
    774 	0x0917,	0x0919,	0x0b67,	0x0b68,	0x0b6a,	0x0b6b,	0x0b6c,	0x0200,
    775 	0x0200,	0x0200,	0x0700,	0x0200,	0x0b7a,	0x0b78,	0x0916,	0x0b76,
    776 	0x0915,	0x0b6e,	0x0b6d,	0x0200,	0x0200,	0x0200,	0x0700,	0x030c,
    777 	0x0703,	0x0200,	0x0207,	0x050c,	0x050d,	0x050e,	0x050f,	0x0510,
    778 	0x0511,	0x0512,	0x0513,	0x0514,	0x0515,	0x0208,	0x0202,	0x0911,
    779 	0x0912,	0x0913,	0x030b,	0x090e,	0x090f,	0x0910,	0x030a,	0x090b,
    780 	0x090c,	0x090d,	0x090a,	0x0310,	0x0206,	0x0200,	0x007c,	0x0516,
    781 	0x0517,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    782 	0x030e,	0x0702,	0x030d,	0x0200,	0x0701,	0x0205,	0x0114,	0x0603,
    783 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    784 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    785 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    786 }, ctrl_map[NR_KEYS] = {
    787 	0x0200,	0x0200,	0x0200,	0x0000,	0x001b,	0x001c,	0x001d,	0x001e,
    788 	0x001f,	0x007f,	0x0200,	0x0200,	0x001f,	0x0200,	0x0008,	0x0200,
    789 	0x0011,	0x0017,	0x0005,	0x0012,	0x0014,	0x0019,	0x0015,	0x0009,
    790 	0x000f,	0x0010,	0x001b,	0x001d,	0x0201,	0x0702,	0x0001,	0x0013,
    791 	0x0004,	0x0006,	0x0007,	0x0008,	0x000a,	0x000b,	0x000c,	0x0200,
    792 	0x0007,	0x0000,	0x0700,	0x001c,	0x001a,	0x0018,	0x0003,	0x0016,
    793 	0x0002,	0x000e,	0x000d,	0x0200,	0x020e,	0x007f,	0x0700,	0x030c,
    794 	0x0703,	0x0000,	0x0207,	0x0100,	0x0101,	0x0102,	0x0103,	0x0104,
    795 	0x0105,	0x0106,	0x0107,	0x0108,	0x0109,	0x0208,	0x0204,	0x0307,
    796 	0x0308,	0x0309,	0x030b,	0x0304,	0x0305,	0x0306,	0x030a,	0x0301,
    797 	0x0302,	0x0303,	0x0300,	0x0310,	0x0206,	0x0200,	0x0200,	0x010a,
    798 	0x010b,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    799 	0x030e,	0x0702,	0x030d,	0x001c,	0x0701,	0x0205,	0x0114,	0x0603,
    800 	0x0118,	0x0601,	0x0602,	0x0117,	0x0600,	0x0119,	0x0115,	0x0116,
    801 	0x011a,	0x010c,	0x010d,	0x011b,	0x011c,	0x0110,	0x0311,	0x011d,
    802 	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,	0x0200,
    803 };
    804 
    805 const u_short * const linux_keytabs[] = {
    806 	plain_map, shift_map, altgr_map, altgr_map, ctrl_map
    807 };
    808 #endif
    809 
    810 static struct biosdisk_info *
    811 fd2biosinfo(struct proc *p, struct file *fp)
    812 {
    813 	struct vnode *vp;
    814 	const char *blkname;
    815 	char diskname[16];
    816 	int i;
    817 	struct nativedisk_info *nip;
    818 	struct disklist *dl = x86_alldisks;
    819 
    820 	if (fp->f_type != DTYPE_VNODE)
    821 		return NULL;
    822 	vp = (struct vnode *)fp->f_data;
    823 
    824 	if (vp->v_type != VBLK)
    825 		return NULL;
    826 
    827 	blkname = devsw_blk2name(major(vp->v_rdev));
    828 	snprintf(diskname, sizeof diskname, "%s%u", blkname,
    829 	    DISKUNIT(vp->v_rdev));
    830 
    831 	for (i = 0; i < dl->dl_nnativedisks; i++) {
    832 		nip = &dl->dl_nativedisks[i];
    833 		if (strcmp(diskname, nip->ni_devname))
    834 			continue;
    835 		if (nip->ni_nmatches != 0)
    836 			return &dl->dl_biosdisks[nip->ni_biosmatches[0]];
    837 	}
    838 
    839 	return NULL;
    840 }
    841 
    842 
    843 /*
    844  * We come here in a last attempt to satisfy a Linux ioctl() call
    845  */
    846 int
    847 linux_machdepioctl(struct lwp *l, const struct linux_sys_ioctl_args *uap, register_t *retval)
    848 {
    849 	/* {
    850 		syscallarg(int) fd;
    851 		syscallarg(u_long) com;
    852 		syscallarg(void *) data;
    853 	} */
    854 	struct sys_ioctl_args bia;
    855 	u_long com;
    856 	int error, error1;
    857 #if (NWSDISPLAY > 0)
    858 	struct vt_mode lvt;
    859 	struct kbentry kbe;
    860 #endif
    861 	struct linux_hd_geometry hdg;
    862 	struct linux_hd_big_geometry hdg_big;
    863 	struct biosdisk_info *bip;
    864 	file_t *fp;
    865 	int fd;
    866 	struct disklabel label, *labp;
    867 	struct partinfo partp;
    868 	int (*ioctlf)(struct file *, u_long, void *);
    869 	u_long start, biostotal, realtotal;
    870 	u_char heads, sectors;
    871 	u_int cylinders;
    872 	struct ioctl_pt pt;
    873 
    874 	fd = SCARG(uap, fd);
    875 	SCARG(&bia, fd) = fd;
    876 	SCARG(&bia, data) = SCARG(uap, data);
    877 	com = SCARG(uap, com);
    878 
    879 	if ((fp = fd_getfile(fd)) == NULL)
    880 		return (EBADF);
    881 
    882 	switch (com) {
    883 #if (NWSDISPLAY > 0)
    884 	case LINUX_KDGKBMODE:
    885 		com = KDGKBMODE;
    886 		break;
    887 	case LINUX_KDSKBMODE:
    888 		com = KDSKBMODE;
    889 		if ((unsigned)SCARG(uap, data) == LINUX_K_MEDIUMRAW)
    890 			SCARG(&bia, data) = (void *)K_RAW;
    891 		break;
    892 	case LINUX_KIOCSOUND:
    893 		SCARG(&bia, data) =
    894 		    (void *)(((unsigned long)SCARG(&bia, data)) & 0xffff);
    895 		/* fall through */
    896 	case LINUX_KDMKTONE:
    897 		com = KDMKTONE;
    898 		break;
    899 	case LINUX_KDSETMODE:
    900 		com = KDSETMODE;
    901 		break;
    902 	case LINUX_KDGETMODE:
    903 		/* KD_* values are equal to the wscons numbers */
    904 		com = WSDISPLAYIO_GMODE;
    905 		break;
    906 	case LINUX_KDENABIO:
    907 		com = KDENABIO;
    908 		break;
    909 	case LINUX_KDDISABIO:
    910 		com = KDDISABIO;
    911 		break;
    912 	case LINUX_KDGETLED:
    913 		com = KDGETLED;
    914 		break;
    915 	case LINUX_KDSETLED:
    916 		com = KDSETLED;
    917 		break;
    918 	case LINUX_VT_OPENQRY:
    919 		com = VT_OPENQRY;
    920 		break;
    921 	case LINUX_VT_GETMODE:
    922 		error = fp->f_ops->fo_ioctl(fp, VT_GETMODE, &lvt);
    923 		if (error != 0)
    924 			goto out;
    925 		lvt.relsig = native_to_linux_signo[lvt.relsig];
    926 		lvt.acqsig = native_to_linux_signo[lvt.acqsig];
    927 		lvt.frsig = native_to_linux_signo[lvt.frsig];
    928 		error = copyout(&lvt, SCARG(uap, data), sizeof (lvt));
    929 		goto out;
    930 	case LINUX_VT_SETMODE:
    931 		error = copyin(SCARG(uap, data), &lvt, sizeof (lvt));
    932 		if (error != 0)
    933 			goto out;
    934 		lvt.relsig = linux_to_native_signo[lvt.relsig];
    935 		lvt.acqsig = linux_to_native_signo[lvt.acqsig];
    936 		lvt.frsig = linux_to_native_signo[lvt.frsig];
    937 		error = fp->f_ops->fo_ioctl(fp, VT_SETMODE, &lvt);
    938 		goto out;
    939 	case LINUX_VT_DISALLOCATE:
    940 		/* XXX should use WSDISPLAYIO_DELSCREEN */
    941 		error = 0;
    942 		goto out;
    943 	case LINUX_VT_RELDISP:
    944 		com = VT_RELDISP;
    945 		break;
    946 	case LINUX_VT_ACTIVATE:
    947 		com = VT_ACTIVATE;
    948 		break;
    949 	case LINUX_VT_WAITACTIVE:
    950 		com = VT_WAITACTIVE;
    951 		break;
    952 	case LINUX_VT_GETSTATE:
    953 		com = VT_GETSTATE;
    954 		break;
    955 	case LINUX_KDGKBTYPE:
    956 	    {
    957 		static const u_int8_t kb101 = KB_101;
    958 
    959 		/* This is what Linux does. */
    960 		error = copyout(&kb101, SCARG(uap, data), 1);
    961 		goto out;
    962 	    }
    963 	case LINUX_KDGKBENT:
    964 		/*
    965 		 * The Linux KDGKBENT ioctl is different from the
    966 		 * SYSV original. So we handle it in machdep code.
    967 		 * XXX We should use keyboard mapping information
    968 		 * from wsdisplay, but this would be expensive.
    969 		 */
    970 		if ((error = copyin(SCARG(uap, data), &kbe,
    971 				    sizeof(struct kbentry))))
    972 			goto out;
    973 		if (kbe.kb_table >= sizeof(linux_keytabs) / sizeof(u_short *)
    974 		    || kbe.kb_index >= NR_KEYS) {
    975 			error = EINVAL;
    976 			goto out;
    977 		}
    978 		kbe.kb_value = linux_keytabs[kbe.kb_table][kbe.kb_index];
    979 		error = copyout(&kbe, SCARG(uap, data),
    980 				sizeof(struct kbentry));
    981 		goto out;
    982 #endif
    983 	case LINUX_HDIO_GETGEO:
    984 	case LINUX_HDIO_GETGEO_BIG:
    985 		/*
    986 		 * Try to mimic Linux behaviour: return the BIOS geometry
    987 		 * if possible (extending its # of cylinders if it's beyond
    988 		 * the 1023 limit), fall back to the MI geometry (i.e.
    989 		 * the real geometry) if not found, by returning an
    990 		 * error. See common/linux_hdio.c
    991 		 */
    992 		bip = fd2biosinfo(curproc, fp);
    993 		ioctlf = fp->f_ops->fo_ioctl;
    994 		error = ioctlf(fp, DIOCGDEFLABEL, (void *)&label);
    995 		error1 = ioctlf(fp, DIOCGPART, (void *)&partp);
    996 		if (error != 0 && error1 != 0) {
    997 			error = error1;
    998 			goto out;
    999 		}
   1000 		labp = error != 0 ? &label : partp.disklab;
   1001 		start = error1 != 0 ? partp.part->p_offset : 0;
   1002 		if (bip != NULL && bip->bi_head != 0 && bip->bi_sec != 0
   1003 		    && bip->bi_cyl != 0) {
   1004 			heads = bip->bi_head;
   1005 			sectors = bip->bi_sec;
   1006 			cylinders = bip->bi_cyl;
   1007 			biostotal = heads * sectors * cylinders;
   1008 			realtotal = labp->d_ntracks * labp->d_nsectors *
   1009 			    labp->d_ncylinders;
   1010 			if (realtotal > biostotal)
   1011 				cylinders = realtotal / (heads * sectors);
   1012 		} else {
   1013 			heads = labp->d_ntracks;
   1014 			cylinders = labp->d_ncylinders;
   1015 			sectors = labp->d_nsectors;
   1016 		}
   1017 		if (com == LINUX_HDIO_GETGEO) {
   1018 			hdg.start = start;
   1019 			hdg.heads = heads;
   1020 			hdg.cylinders = cylinders;
   1021 			hdg.sectors = sectors;
   1022 			error = copyout(&hdg, SCARG(uap, data), sizeof hdg);
   1023 			goto out;
   1024 		} else {
   1025 			hdg_big.start = start;
   1026 			hdg_big.heads = heads;
   1027 			hdg_big.cylinders = cylinders;
   1028 			hdg_big.sectors = sectors;
   1029 			error = copyout(&hdg_big, SCARG(uap, data),
   1030 			    sizeof hdg_big);
   1031 			goto out;
   1032 		}
   1033 
   1034 	default:
   1035 		/*
   1036 		 * Unknown to us. If it's on a device, just pass it through
   1037 		 * using PTIOCLINUX, the device itself might be able to
   1038 		 * make some sense of it.
   1039 		 * XXX hack: if the function returns EJUSTRETURN,
   1040 		 * it has stuffed a sysctl return value in pt.data.
   1041 		 */
   1042 		ioctlf = fp->f_ops->fo_ioctl;
   1043 		pt.com = SCARG(uap, com);
   1044 		pt.data = SCARG(uap, data);
   1045 		error = ioctlf(fp, PTIOCLINUX, &pt);
   1046 		if (error == EJUSTRETURN) {
   1047 			retval[0] = (register_t)pt.data;
   1048 			error = 0;
   1049 		}
   1050 
   1051 		if (error == ENOTTY) {
   1052 			DPRINTF(("linux_machdepioctl: invalid ioctl %08lx\n",
   1053 			    com));
   1054 		}
   1055 		goto out;
   1056 	}
   1057 	SCARG(&bia, com) = com;
   1058 	error = sys_ioctl(curlwp, &bia, retval);
   1059 out:
   1060 	fd_putfile(fd);
   1061 	return error;
   1062 }
   1063 
   1064 /*
   1065  * Set I/O permissions for a process. Just set the maximum level
   1066  * right away (ignoring the argument), otherwise we would have
   1067  * to rely on I/O permission maps, which are not implemented.
   1068  */
   1069 int
   1070 linux_sys_iopl(struct lwp *l, const struct linux_sys_iopl_args *uap, register_t *retval)
   1071 {
   1072 	/* {
   1073 		syscallarg(int) level;
   1074 	} */
   1075 	struct trapframe *fp = l->l_md.md_regs;
   1076 
   1077 	if (kauth_authorize_machdep(l->l_cred, KAUTH_MACHDEP_IOPL,
   1078 	    NULL, NULL, NULL, NULL) != 0)
   1079 		return EPERM;
   1080 	fp->tf_eflags |= PSL_IOPL;
   1081 	*retval = 0;
   1082 	return 0;
   1083 }
   1084 
   1085 /*
   1086  * See above. If a root process tries to set access to an I/O port,
   1087  * just let it have the whole range.
   1088  */
   1089 int
   1090 linux_sys_ioperm(struct lwp *l, const struct linux_sys_ioperm_args *uap, register_t *retval)
   1091 {
   1092 	/* {
   1093 		syscallarg(unsigned int) lo;
   1094 		syscallarg(unsigned int) hi;
   1095 		syscallarg(int) val;
   1096 	} */
   1097 	struct trapframe *fp = l->l_md.md_regs;
   1098 
   1099 	if (kauth_authorize_machdep(l->l_cred, SCARG(uap, val) ?
   1100 	    KAUTH_MACHDEP_IOPERM_SET : KAUTH_MACHDEP_IOPERM_GET, NULL, NULL,
   1101 	    NULL, NULL) != 0)
   1102 		return EPERM;
   1103 	if (SCARG(uap, val))
   1104 		fp->tf_eflags |= PSL_IOPL;
   1105 	*retval = 0;
   1106 	return 0;
   1107 }
   1108 
   1109 int
   1110 linux_usertrap(struct lwp *l, vaddr_t trapaddr,
   1111     void *arg)
   1112 {
   1113 	return 0;
   1114 }
   1115 
   1116 const char *
   1117 linux_get_uname_arch(void)
   1118 {
   1119 	static char uname_arch[5] = "i386";
   1120 
   1121 	if (uname_arch[1] == '3')
   1122 		uname_arch[1] += cpu_class;
   1123 	return uname_arch;
   1124 }
   1125 
   1126 #ifdef LINUX_NPTL
   1127 void *
   1128 linux_get_newtls(struct lwp *l)
   1129 {
   1130 	struct trapframe *tf = l->l_md.md_regs;
   1131 
   1132 	/* XXX: Implement me */
   1133 	return NULL;
   1134 }
   1135 
   1136 int
   1137 linux_set_newtls(struct lwp *l, void *tls)
   1138 {
   1139 	/* XXX: Implement me */
   1140 	return 0;
   1141 }
   1142 #endif
   1143